Blank lines "between" verbatim sections are now acceptible; This allows code examples...
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
677c8c51 13$VERSION = "1.43_01"; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef 16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43 18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43 31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43 36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef 39=head2 podchecker()
40
41This function can take a hash of options:
e3237417 42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a 47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417 49
50=back
360aca43 51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
c23d1eb0 56Curious/ambitious users are welcome to propose additional features they wish
57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
58consistent with L<perlpod>.
360aca43 59
7b47f8ec 60The following checks are currently performed:
e3237417 61
62=over 4
63
64=item *
65
48f30392 66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef 67and unterminated interior sequences.
68
69=item *
70
71Check for proper balancing of C<=begin> and C<=end>. The contents of such
72a block are generally ignored, i.e. no syntax checks are performed.
e3237417 73
74=item *
75
76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
77
78=item *
79
e2c3adef 80Check for same nested interior-sequences (e.g.
81C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 82
83=item *
84
267d5541 85Check for malformed or non-existing entities C<EE<lt>...E<gt>>.
e3237417 86
87=item *
88
e2c3adef 89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
90for details.
e3237417 91
92=item *
93
e2c3adef 94Check for unresolved document-internal links. This check may also reveal
95misspelled links that seem to be internal links but should be links
96to something else.
e3237417 97
98=back
99
e2c3adef 100=head1 DIAGNOSTICS
e3237417 101
e2c3adef 102=head2 Errors
e3237417 103
104=over 4
105
66aff6dd 106=item * empty =headn
107
108A heading (C<=head1> or C<=head2>) without any text? That ain't no
109heading!
110
e2c3adef 111=item * =over on line I<N> without closing =back
e3237417 112
e2c3adef 113The C<=over> command does not have a corresponding C<=back> before the
114next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 115
e2c3adef 116=item * =item without previous =over
e3237417 117
e2c3adef 118=item * =back without previous =over
e3237417 119
e2c3adef 120An C<=item> or C<=back> command has been found outside a
121C<=over>/C<=back> block.
e3237417 122
e2c3adef 123=item * No argument for =begin
e3237417 124
e2c3adef 125A C<=begin> command was found that is not followed by the formatter
126specification.
e3237417 127
e2c3adef 128=item * =end without =begin
e3237417 129
e2c3adef 130A standalone C<=end> command was found.
131
132=item * Nested =begin's
133
66aff6dd 134There were at least two consecutive C<=begin> commands without
e2c3adef 135the corresponding C<=end>. Only one C<=begin> may be active at
136a time.
137
138=item * =for without formatter specification
e3237417 139
e2c3adef 140There is no specification of the formatter after the C<=for> command.
141
142=item * unresolved internal link I<NAME>
143
144The given link to I<NAME> does not have a matching node in the current
267d5541 145POD. This also happened when a single word node name is not enclosed in
e2c3adef 146C<"">.
147
148=item * Unknown command "I<CMD>"
149
150An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d 151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
152C<=for>, C<=pod>, C<=cut>
e2c3adef 153
154=item * Unknown interior-sequence "I<SEQ>"
155
156An invalid markup command has been encountered. Valid are:
157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
159C<ZE<lt>E<gt>>
160
161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
162
163Two nested identical markup commands have been found. Generally this
164does not make sense.
165
166=item * garbled entity I<STRING>
167
66aff6dd 168The I<STRING> found cannot be interpreted as a character entity.
169
170=item * Entity number out of range
171
172An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 173
174=item * malformed link LE<lt>E<gt>
175
176The link found cannot be parsed because it does not conform to the
177syntax described in L<perlpod>.
e3237417 178
66aff6dd 179=item * nonempty ZE<lt>E<gt>
180
181The C<ZE<lt>E<gt>> sequence is supposed to be empty.
182
48f30392 183=item * empty XE<lt>E<gt>
184
185The index entry specified contains nothing but whitespace.
186
66aff6dd 187=item * Spurious text after =pod / =cut
188
189The commands C<=pod> and C<=cut> do not take any arguments.
190
191=item * Spurious character(s) after =back
192
193The C<=back> command does not take any arguments.
194
e3237417 195=back
196
e2c3adef 197=head2 Warnings
e3237417 198
e2c3adef 199These may not necessarily cause trouble, but indicate mediocre style.
200
201=over 4
202
fb8eeed8 203=item * multiple occurrence of link target I<name>
66aff6dd 204
205The POD file has some C<=item> and/or C<=head> commands that have
206the same text. Potential hyperlinks to such a text cannot be unique then.
d5c61f7c 207This warning is printed only with warning level greater than one.
66aff6dd 208
209=item * line containing nothing but whitespace in paragraph
210
211There is some whitespace on a seemingly empty line. POD is very sensitive
212to such things, so this is flagged. B<vi> users switch on the B<list>
213option to avoid this problem.
214
92e3d63a 215=begin _disabled_
216
66aff6dd 217=item * file does not start with =head
218
219The file starts with a different POD directive than head.
220This is most probably something you do not want.
221
92e3d63a 222=end _disabled_
223
66aff6dd 224=item * previous =item has no contents
e2c3adef 225
66aff6dd 226There is a list C<=item> right above the flagged line that has no
227text contents. You probably want to delete empty items.
228
229=item * preceding non-item paragraph(s)
230
231A list introduced by C<=over> starts with a text or verbatim paragraph,
232but continues with C<=item>s. Move the non-item paragraph out of the
233C<=over>/C<=back> block.
234
235=item * =item type mismatch (I<one> vs. I<two>)
236
267d5541 237A list started with e.g. a bullet-like C<=item> and continued with a
66aff6dd 238numbered one. This is obviously inconsistent. For most translators the
239type of the I<first> C<=item> determines the type of the list.
e2c3adef 240
241=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
242
243Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
244can potentially cause errors as they could be misinterpreted as
92e3d63a 245markup commands. This is only printed when the -warnings level is
246greater than 1.
e2c3adef 247
66aff6dd 248=item * Unknown entity
e2c3adef 249
250A character entity was found that does not belong to the standard
66aff6dd 251ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 252
253=item * No items in =over
254
66aff6dd 255The list opened with C<=over> does not contain any items.
e2c3adef 256
257=item * No argument for =item
258
259C<=item> without any parameters is deprecated. It should either be followed
260by C<*> to indicate an unordered list, by a number (optionally followed
261by a dot) to indicate an ordered (numbered) list or simple text for a
262definition list.
263
66aff6dd 264=item * empty section in previous paragraph
265
266The previous section (introduced by a C<=head> command) does not contain
267any text. This usually indicates that something is missing. Note: A
268C<=head1> followed immediately by C<=head2> does not trigger this warning.
269
e2c3adef 270=item * Verbatim paragraph in NAME section
271
272The NAME section (C<=head1 NAME>) should consist of a single paragraph
273with the script/module name, followed by a dash `-' and a very short
274description of what the thing is good for.
275
9c6ed6d7 276=item * =headI<n> without preceding higher level
277
278For example if there is a C<=head2> in the POD file prior to a
279C<=head1>.
280
92e3d63a 281=back
282
283=head2 Hyperlinks
284
267d5541 285There are some warnings with respect to malformed hyperlinks:
92e3d63a 286
287=over 4
288
92e3d63a 289=item * ignoring leading/trailing whitespace in link
290
291There is whitespace at the beginning or the end of the contents of
292LE<lt>...E<gt>.
293
294=item * (section) in '$page' deprecated
295
296There is a section detected in the page name of LE<lt>...E<gt>, e.g.
ad838556 297C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
92e3d63a 298Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
299to expand this to appropriate code. For links to (builtin) functions,
300please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
301
302=item * alternative text/node '%s' contains non-escaped | or /
303
304The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
305Although the hyperlink parser does its best to determine which "/" is
306text and which is a delimiter in case of doubt, one ought to escape
307these literal characters like this:
308
309 / E<sol>
310 | E<verbar>
e2c3adef 311
312=back
e3237417 313
314=head1 RETURN VALUE
315
316B<podchecker> returns the number of POD syntax errors found or -1 if
317there were no POD commands at all found in the file.
318
360aca43 319=head1 EXAMPLES
320
c23d1eb0 321See L</SYNOPSIS>
360aca43 322
48f30392 323=head1 INTERFACE
360aca43 324
48f30392 325While checking, this module collects document properties, e.g. the nodes
326for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
327POD translators can use this feature to syntax-check and get the nodes in
328a first pass before actually starting to convert. This is expensive in terms
329of execution time, but allows for very robust conversions.
360aca43 330
c23d1eb0 331Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
332method to print errors and warnings. The summary output (e.g.
333"Pod syntax OK") has been dropped from the module and has been included in
334B<podchecker> (the script). This allows users of B<Pod::Checker> to
267d5541 335control completely the output behavior. Users of B<podchecker> (the script)
336get the well-known behavior.
c23d1eb0 337
360aca43 338=cut
339
340#############################################################################
341
342use strict;
343#use diagnostics;
344use Carp;
345use Exporter;
346use Pod::Parser;
347
348use vars qw(@ISA @EXPORT);
349@ISA = qw(Pod::Parser);
350@EXPORT = qw(&podchecker);
351
352use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
353
354my %VALID_COMMANDS = (
355 'pod' => 1,
356 'cut' => 1,
357 'head1' => 1,
358 'head2' => 1,
c6b85e5d 359 'head3' => 1,
360 'head4' => 1,
360aca43 361 'over' => 1,
362 'back' => 1,
363 'item' => 1,
364 'for' => 1,
365 'begin' => 1,
366 'end' => 1,
4e592f10 367 'encoding' => '1',
360aca43 368);
369
370my %VALID_SEQUENCES = (
371 'I' => 1,
372 'B' => 1,
373 'S' => 1,
374 'C' => 1,
375 'L' => 1,
376 'F' => 1,
377 'X' => 1,
378 'Z' => 1,
379 'E' => 1,
380);
381
e2c3adef 382# stolen from HTML::Entities
383my %ENTITIES = (
384 # Some normal chars that have special meaning in SGML context
385 amp => '&', # ampersand
386'gt' => '>', # greater than
387'lt' => '<', # less than
388 quot => '"', # double quote
389
390 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
391 AElig => 'Æ', # capital AE diphthong (ligature)
392 Aacute => 'Á', # capital A, acute accent
393 Acirc => 'Â', # capital A, circumflex accent
394 Agrave => 'À', # capital A, grave accent
395 Aring => 'Å', # capital A, ring
396 Atilde => 'Ã', # capital A, tilde
397 Auml => 'Ä', # capital A, dieresis or umlaut mark
398 Ccedil => 'Ç', # capital C, cedilla
399 ETH => 'Ð', # capital Eth, Icelandic
400 Eacute => 'É', # capital E, acute accent
401 Ecirc => 'Ê', # capital E, circumflex accent
402 Egrave => 'È', # capital E, grave accent
403 Euml => 'Ë', # capital E, dieresis or umlaut mark
404 Iacute => 'Í', # capital I, acute accent
405 Icirc => 'Î', # capital I, circumflex accent
406 Igrave => 'Ì', # capital I, grave accent
407 Iuml => 'Ï', # capital I, dieresis or umlaut mark
408 Ntilde => 'Ñ', # capital N, tilde
409 Oacute => 'Ó', # capital O, acute accent
410 Ocirc => 'Ô', # capital O, circumflex accent
411 Ograve => 'Ò', # capital O, grave accent
412 Oslash => 'Ø', # capital O, slash
413 Otilde => 'Õ', # capital O, tilde
414 Ouml => 'Ö', # capital O, dieresis or umlaut mark
415 THORN => 'Þ', # capital THORN, Icelandic
416 Uacute => 'Ú', # capital U, acute accent
417 Ucirc => 'Û', # capital U, circumflex accent
418 Ugrave => 'Ù', # capital U, grave accent
419 Uuml => 'Ü', # capital U, dieresis or umlaut mark
420 Yacute => 'Ý', # capital Y, acute accent
421 aacute => 'á', # small a, acute accent
422 acirc => 'â', # small a, circumflex accent
423 aelig => 'æ', # small ae diphthong (ligature)
424 agrave => 'à', # small a, grave accent
425 aring => 'å', # small a, ring
426 atilde => 'ã', # small a, tilde
427 auml => 'ä', # small a, dieresis or umlaut mark
428 ccedil => 'ç', # small c, cedilla
429 eacute => 'é', # small e, acute accent
430 ecirc => 'ê', # small e, circumflex accent
431 egrave => 'è', # small e, grave accent
432 eth => 'ð', # small eth, Icelandic
433 euml => 'ë', # small e, dieresis or umlaut mark
434 iacute => 'í', # small i, acute accent
435 icirc => 'î', # small i, circumflex accent
436 igrave => 'ì', # small i, grave accent
437 iuml => 'ï', # small i, dieresis or umlaut mark
438 ntilde => 'ñ', # small n, tilde
439 oacute => 'ó', # small o, acute accent
440 ocirc => 'ô', # small o, circumflex accent
441 ograve => 'ò', # small o, grave accent
442 oslash => 'ø', # small o, slash
443 otilde => 'õ', # small o, tilde
444 ouml => 'ö', # small o, dieresis or umlaut mark
445 szlig => 'ß', # small sharp s, German (sz ligature)
446 thorn => 'þ', # small thorn, Icelandic
447 uacute => 'ú', # small u, acute accent
448 ucirc => 'û', # small u, circumflex accent
449 ugrave => 'ù', # small u, grave accent
450 uuml => 'ü', # small u, dieresis or umlaut mark
451 yacute => 'ý', # small y, acute accent
452 yuml => 'ÿ', # small y, dieresis or umlaut mark
453
454 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
455 copy => '©', # copyright sign
456 reg => '®', # registered sign
457 nbsp => "\240", # non breaking space
458
459 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
460 iexcl => '¡',
461 cent => '¢',
462 pound => '£',
463 curren => '¤',
464 yen => '¥',
465 brvbar => '¦',
466 sect => '§',
467 uml => '¨',
468 ordf => 'ª',
469 laquo => '«',
470'not' => '¬', # not is a keyword in perl
471 shy => '­',
472 macr => '¯',
473 deg => '°',
474 plusmn => '±',
475 sup1 => '¹',
476 sup2 => '²',
477 sup3 => '³',
478 acute => '´',
479 micro => 'µ',
480 para => '¶',
481 middot => '·',
482 cedil => '¸',
483 ordm => 'º',
484 raquo => '»',
485 frac14 => '¼',
486 frac12 => '½',
487 frac34 => '¾',
488 iquest => '¿',
489'times' => '×', # times is a keyword in perl
490 divide => '÷',
66aff6dd 491
492# some POD special entities
493 verbar => '|',
494 sol => '/'
e2c3adef 495);
496
360aca43 497##---------------------------------------------------------------------------
498
499##---------------------------------
500## Function definitions begin here
501##---------------------------------
502
e3237417 503sub podchecker( $ ; $ % ) {
504 my ($infile, $outfile, %options) = @_;
360aca43 505 local $_;
506
507 ## Set defaults
508 $infile ||= \*STDIN;
509 $outfile ||= \*STDERR;
510
511 ## Now create a pod checker
e3237417 512 my $checker = new Pod::Checker(%options);
360aca43 513
514 ## Now check the pod document for errors
515 $checker->parse_from_file($infile, $outfile);
48f30392 516
360aca43 517 ## Return the number of errors found
518 return $checker->num_errors();
519}
520
521##---------------------------------------------------------------------------
522
523##-------------------------------
524## Method definitions begin here
525##-------------------------------
526
92e3d63a 527##################################
528
529=over 4
530
531=item C<Pod::Checker-E<gt>new( %options )>
532
533Return a reference to a new Pod::Checker object that inherits from
534Pod::Parser and is used for calling the required methods later. The
535following options are recognized:
536
537C<-warnings =E<gt> num>
538 Print warnings if C<num> is true. The higher the value of C<num>,
539the more warnings are printed. Currently there are only levels 1 and 2.
540
541C<-quiet =E<gt> num>
542 If C<num> is true, do not print any errors/warnings. This is useful
543when Pod::Checker is used to munge POD code into plain text from within
544POD formatters.
545
546=cut
547
66aff6dd 548## sub new {
549## my $this = shift;
550## my $class = ref($this) || $this;
551## my %params = @_;
552## my $self = {%params};
553## bless $self, $class;
554## $self->initialize();
555## return $self;
556## }
360aca43 557
558sub initialize {
559 my $self = shift;
664bb207 560 ## Initialize number of errors, and setup an error function to
561 ## increment this number and then print to the designated output.
562 $self->{_NUM_ERRORS} = 0;
9c6ed6d7 563 $self->{_NUM_WARNINGS} = 0;
92e3d63a 564 $self->{-quiet} ||= 0;
565 # set the error handling subroutine
566 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417 567 $self->{_commands} = 0; # total number of POD commands encountered
568 $self->{_list_stack} = []; # stack for nested lists
569 $self->{_have_begin} = ''; # stores =begin
570 $self->{_links} = []; # stack for internal hyperlinks
571 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 572 $self->{_index} = []; # text in X<>
e2c3adef 573 # print warnings?
e3237417 574 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 575 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 576 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207 577}
578
48f30392 579##################################
580
48f30392 581=item C<$checker-E<gt>poderror( @args )>
582
583=item C<$checker-E<gt>poderror( {%opts}, @args )>
584
585Internal method for printing errors and warnings. If no options are
586given, simply prints "@_". The following options are recognized and used
587to form the output:
588
589 -msg
590
591A message to print prior to C<@args>.
592
593 -line
594
595The line number the error occurred in.
596
597 -file
598
599The file (name) the error occurred in.
600
601 -severity
602
603The error level, should be 'WARNING' or 'ERROR'.
604
605=cut
606
e2c3adef 607# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 608sub poderror {
609 my $self = shift;
610 my %opts = (ref $_[0]) ? %{shift()} : ();
611
612 ## Retrieve options
613 chomp( my $msg = ($opts{-msg} || "")."@_" );
614 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
615 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd 616 unless (exists $opts{-severity}) {
617 ## See if can find severity in message prefix
618 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
619 }
664bb207 620 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
621
e3237417 622 ## Increment error count and print message "
623 ++($self->{_NUM_ERRORS})
624 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
9c6ed6d7 625 ++($self->{_NUM_WARNINGS})
626 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
7b47f8ec 627 unless($self->{-quiet}) {
628 my $out_fh = $self->output_handle() || \*STDERR;
629 print $out_fh ($severity, $msg, $line, $file, "\n")
630 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
631 }
360aca43 632}
633
48f30392 634##################################
635
636=item C<$checker-E<gt>num_errors()>
637
638Set (if argument specified) and retrieve the number of errors found.
639
640=cut
641
360aca43 642sub num_errors {
643 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
644}
645
48f30392 646##################################
647
9c6ed6d7 648=item C<$checker-E<gt>num_warnings()>
649
650Set (if argument specified) and retrieve the number of warnings found.
651
652=cut
653
654sub num_warnings {
655 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
656}
657
658##################################
659
48f30392 660=item C<$checker-E<gt>name()>
661
662Set (if argument specified) and retrieve the canonical name of POD as
663found in the C<=head1 NAME> section.
664
665=cut
666
e2c3adef 667sub name {
668 return (@_ > 1 && $_[1]) ?
669 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
670}
671
48f30392 672##################################
673
674=item C<$checker-E<gt>node()>
675
676Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
677and C<=item>) of the current POD. The nodes are returned in the order of
fb8eeed8 678their occurrence. They consist of plain text, each piece of whitespace is
48f30392 679collapsed to a single blank.
680
681=cut
682
e2c3adef 683sub node {
684 my ($self,$text) = @_;
685 if(defined $text) {
66aff6dd 686 $text =~ s/\s+$//s; # strip trailing whitespace
687 $text =~ s/\s+/ /gs; # collapse whitespace
688 # add node, order important!
e2c3adef 689 push(@{$self->{_nodes}}, $text);
66aff6dd 690 # keep also a uniqueness counter
48f30392 691 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef 692 return $text;
693 }
694 @{$self->{_nodes}};
695}
696
48f30392 697##################################
698
699=item C<$checker-E<gt>idx()>
700
701Add (if argument specified) and retrieve the index entries (as defined by
702C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
703of whitespace is collapsed to a single blank.
704
705=cut
706
707# set/return index entries of current POD
708sub idx {
709 my ($self,$text) = @_;
710 if(defined $text) {
711 $text =~ s/\s+$//s; # strip trailing whitespace
712 $text =~ s/\s+/ /gs; # collapse whitespace
713 # add node, order important!
714 push(@{$self->{_index}}, $text);
715 # keep also a uniqueness counter
716 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
717 return $text;
718 }
719 @{$self->{_index}};
720}
721
722##################################
723
724=item C<$checker-E<gt>hyperlink()>
725
726Add (if argument specified) and retrieve the hyperlinks (as defined by
d1be9408 727C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
48f30392 728number and C<Pod::Hyperlink> object.
729
730=back
731
732=cut
733
e2c3adef 734# set/return hyperlinks of the current POD
735sub hyperlink {
736 my $self = shift;
737 if($_[0]) {
738 push(@{$self->{_links}}, $_[0]);
739 return $_[0];
740 }
741 @{$self->{_links}};
742}
743
e3237417 744## overrides for Pod::Parser
745
360aca43 746sub end_pod {
66aff6dd 747 ## Do some final checks and
748 ## print the number of errors found
749 my $self = shift;
750 my $infile = $self->input_file();
66aff6dd 751
752 if(@{$self->{_list_stack}}) {
66aff6dd 753 my $list;
754 while(($list = $self->_close_list('EOF',$infile)) &&
755 $list->indent() ne 'auto') {
756 $self->poderror({ -line => 'EOF', -file => $infile,
757 -severity => 'ERROR', -msg => "=over on line " .
758 $list->start() . " without closing =back" }); #"
759 }
760 }
761
762 # check validity of document internal hyperlinks
763 # first build the node names from the paragraph text
764 my %nodes;
765 foreach($self->node()) {
766 $nodes{$_} = 1;
92e3d63a 767 if(/^(\S+)\s+\S/) {
66aff6dd 768 # we have more than one word. Use the first as a node, too.
769 # This is used heavily in perlfunc.pod
770 $nodes{$1} ||= 2; # derived node
771 }
772 }
92e3d63a 773 foreach($self->idx()) {
774 $nodes{$_} = 3; # index node
775 }
66aff6dd 776 foreach($self->hyperlink()) {
48f30392 777 my ($line,$link) = @$_;
778 # _TODO_ what if there is a link to the page itself by the name,
779 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
780 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
781 my $node = $self->_check_ptree($self->parse_text($link->node(),
782 $line), $line, $infile, 'L');
783 if($node && !$nodes{$node}) {
784 $self->poderror({ -line => $line || '', -file => $infile,
785 -severity => 'ERROR',
786 -msg => "unresolved internal link '$node'"});
787 }
66aff6dd 788 }
789 }
48f30392 790
791 # check the internal nodes for uniqueness. This pertains to
792 # =headX, =item and X<...>
d5c61f7c 793 if($self->{-warnings} && $self->{-warnings}>1) {
794 foreach(grep($self->{_unique_nodes}->{$_} > 1,
795 keys %{$self->{_unique_nodes}})) {
796 $self->poderror({ -line => '-', -file => $infile,
66aff6dd 797 -severity => 'WARNING',
fb8eeed8 798 -msg => "multiple occurrence of link target '$_'"});
d5c61f7c 799 }
66aff6dd 800 }
801
c23d1eb0 802 # no POD found here
803 $self->num_errors(-1) if($self->{_commands} == 0);
360aca43 804}
805
e2c3adef 806# check a POD command directive
360aca43 807sub command {
664bb207 808 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 809 my ($file, $line) = $pod_para->file_line;
360aca43 810 ## Check the command syntax
e3237417 811 my $arg; # this will hold the command argument
664bb207 812 if (! $VALID_COMMANDS{$cmd}) {
813 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 814 -msg => "Unknown command '$cmd'" });
360aca43 815 }
92e3d63a 816 else { # found a valid command
817 $self->{_commands}++; # delete this line if below is enabled again
818
819 ##### following check disabled due to strong request
820 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
821 # $self->poderror({ -line => $line, -file => $file,
822 # -severity => 'WARNING',
823 # -msg => "file does not start with =head" });
824 #}
825
826 # check syntax of particular command
e3237417 827 if($cmd eq 'over') {
e2c3adef 828 # check for argument
829 $arg = $self->interpolate_and_check($paragraph, $line,$file);
830 my $indent = 4; # default
831 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
832 $indent = $1;
e2c3adef 833 }
e3237417 834 # start a new list
66aff6dd 835 $self->_open_list($indent,$line,$file);
e3237417 836 }
837 elsif($cmd eq 'item') {
e2c3adef 838 # are we in a list?
e3237417 839 unless(@{$self->{_list_stack}}) {
840 $self->poderror({ -line => $line, -file => $file,
841 -severity => 'ERROR',
842 -msg => "=item without previous =over" });
e2c3adef 843 # auto-open in case we encounter many more
66aff6dd 844 $self->_open_list('auto',$line,$file);
845 }
846 my $list = $self->{_list_stack}->[0];
847 # check whether the previous item had some contents
848 if(defined $self->{_list_item_contents} &&
849 $self->{_list_item_contents} == 0) {
850 $self->poderror({ -line => $line, -file => $file,
851 -severity => 'WARNING',
852 -msg => "previous =item has no contents" });
853 }
854 if($list->{_has_par}) {
855 $self->poderror({ -line => $line, -file => $file,
856 -severity => 'WARNING',
857 -msg => "preceding non-item paragraph(s)" });
858 delete $list->{_has_par};
e3237417 859 }
e2c3adef 860 # check for argument
861 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 862 if($arg && $arg =~ /(\S+)/) {
863 $arg =~ s/[\s\n]+$//;
864 my $type;
865 if($arg =~ /^[*]\s*(\S*.*)/) {
866 $type = 'bullet';
867 $self->{_list_item_contents} = $1 ? 1 : 0;
868 $arg = $1;
869 }
870 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
871 $type = 'number';
872 $self->{_list_item_contents} = $1 ? 1 : 0;
873 $arg = $1;
874 }
875 else {
876 $type = 'definition';
877 $self->{_list_item_contents} = 1;
878 }
879 my $first = $list->type();
880 if($first && $first ne $type) {
881 $self->poderror({ -line => $line, -file => $file,
882 -severity => 'WARNING',
883 -msg => "=item type mismatch ('$first' vs. '$type')"});
884 }
885 else { # first item
886 $list->type($type);
887 }
888 }
889 else {
e2c3adef 890 $self->poderror({ -line => $line, -file => $file,
891 -severity => 'WARNING',
892 -msg => "No argument for =item" });
893 $arg = ' '; # empty
66aff6dd 894 $self->{_list_item_contents} = 0;
e3237417 895 }
e2c3adef 896 # add this item
66aff6dd 897 $list->item($arg);
e2c3adef 898 # remember this node
899 $self->node($arg);
e3237417 900 }
901 elsif($cmd eq 'back') {
902 # check if we have an open list
903 unless(@{$self->{_list_stack}}) {
904 $self->poderror({ -line => $line, -file => $file,
905 -severity => 'ERROR',
906 -msg => "=back without previous =over" });
907 }
908 else {
909 # check for spurious characters
e2c3adef 910 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 911 if($arg && $arg =~ /\S/) {
912 $self->poderror({ -line => $line, -file => $file,
66aff6dd 913 -severity => 'ERROR',
e3237417 914 -msg => "Spurious character(s) after =back" });
915 }
916 # close list
66aff6dd 917 my $list = $self->_close_list($line,$file);
e3237417 918 # check for empty lists
919 if(!$list->item() && $self->{-warnings}) {
920 $self->poderror({ -line => $line, -file => $file,
921 -severity => 'WARNING',
922 -msg => "No items in =over (at line " .
923 $list->start() . ") / =back list"}); #"
924 }
925 }
926 }
66aff6dd 927 elsif($cmd =~ /^head(\d+)/) {
9c6ed6d7 928 my $hnum = $1;
929 $self->{"_have_head_$hnum"}++; # count head types
930 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) {
931 $self->poderror({ -line => $line, -file => $file,
932 -severity => 'WARNING',
933 -msg => "=head$hnum without preceding higher level"});
934 }
48f30392 935 # check whether the previous =head section had some contents
66aff6dd 936 if(defined $self->{_commands_in_head} &&
937 $self->{_commands_in_head} == 0 &&
938 defined $self->{_last_head} &&
9c6ed6d7 939 $self->{_last_head} >= $hnum) {
66aff6dd 940 $self->poderror({ -line => $line, -file => $file,
941 -severity => 'WARNING',
942 -msg => "empty section in previous paragraph"});
943 }
944 $self->{_commands_in_head} = -1;
9c6ed6d7 945 $self->{_last_head} = $hnum;
e3237417 946 # check if there is an open list
947 if(@{$self->{_list_stack}}) {
948 my $list;
66aff6dd 949 while(($list = $self->_close_list($line,$file)) &&
950 $list->indent() ne 'auto') {
e3237417 951 $self->poderror({ -line => $line, -file => $file,
952 -severity => 'ERROR',
e2c3adef 953 -msg => "=over on line ". $list->start() .
954 " without closing =back (at $cmd)" });
e3237417 955 }
956 }
957 # remember this node
e2c3adef 958 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 959 $arg =~ s/[\s\n]+$//s;
960 $self->node($arg);
961 unless(length($arg)) {
962 $self->poderror({ -line => $line, -file => $file,
963 -severity => 'ERROR',
964 -msg => "empty =$cmd"});
965 }
e2c3adef 966 if($cmd eq 'head1') {
e2c3adef 967 $self->{_current_head1} = $arg;
968 } else {
969 $self->{_current_head1} = '';
970 }
e3237417 971 }
972 elsif($cmd eq 'begin') {
973 if($self->{_have_begin}) {
974 # already have a begin
975 $self->poderror({ -line => $line, -file => $file,
976 -severity => 'ERROR',
977 -msg => "Nested =begin's (first at line " .
978 $self->{_have_begin} . ")"});
979 }
980 else {
981 # check for argument
e2c3adef 982 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 983 unless($arg && $arg =~ /(\S+)/) {
984 $self->poderror({ -line => $line, -file => $file,
e2c3adef 985 -severity => 'ERROR',
e3237417 986 -msg => "No argument for =begin"});
987 }
988 # remember the =begin
989 $self->{_have_begin} = "$line:$1";
990 }
991 }
992 elsif($cmd eq 'end') {
993 if($self->{_have_begin}) {
994 # close the existing =begin
995 $self->{_have_begin} = '';
996 # check for spurious characters
e2c3adef 997 $arg = $self->interpolate_and_check($paragraph, $line,$file);
998 # the closing argument is optional
999 #if($arg && $arg =~ /\S/) {
1000 # $self->poderror({ -line => $line, -file => $file,
1001 # -severity => 'WARNING',
1002 # -msg => "Spurious character(s) after =end" });
1003 #}
e3237417 1004 }
1005 else {
1006 # don't have a matching =begin
1007 $self->poderror({ -line => $line, -file => $file,
e2c3adef 1008 -severity => 'ERROR',
e3237417 1009 -msg => "=end without =begin" });
1010 }
1011 }
e2c3adef 1012 elsif($cmd eq 'for') {
1013 unless($paragraph =~ /\s*(\S+)\s*/) {
1014 $self->poderror({ -line => $line, -file => $file,
1015 -severity => 'ERROR',
1016 -msg => "=for without formatter specification" });
1017 }
1018 $arg = ''; # do not expand paragraph below
1019 }
66aff6dd 1020 elsif($cmd =~ /^(pod|cut)$/) {
1021 # check for argument
1022 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1023 if($arg && $arg =~ /(\S+)/) {
1024 $self->poderror({ -line => $line, -file => $file,
1025 -severity => 'ERROR',
1026 -msg => "Spurious text after =$cmd"});
1027 }
1028 }
1029 $self->{_commands_in_head}++;
e3237417 1030 ## Check the interior sequences in the command-text
e2c3adef 1031 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1032 unless(defined $arg);
e2c3adef 1033 }
360aca43 1034}
1035
66aff6dd 1036sub _open_list
1037{
1038 my ($self,$indent,$line,$file) = @_;
1039 my $list = Pod::List->new(
1040 -indent => $indent,
1041 -start => $line,
1042 -file => $file);
1043 unshift(@{$self->{_list_stack}}, $list);
1044 undef $self->{_list_item_contents};
1045 $list;
1046}
1047
1048sub _close_list
1049{
1050 my ($self,$line,$file) = @_;
1051 my $list = shift(@{$self->{_list_stack}});
1052 if(defined $self->{_list_item_contents} &&
1053 $self->{_list_item_contents} == 0) {
1054 $self->poderror({ -line => $line, -file => $file,
1055 -severity => 'WARNING',
1056 -msg => "previous =item has no contents" });
1057 }
1058 undef $self->{_list_item_contents};
1059 $list;
1060}
1061
e2c3adef 1062# process a block of some text
1063sub interpolate_and_check {
e3237417 1064 my ($self, $paragraph, $line, $file) = @_;
1065 ## Check the interior sequences in the command-text
1066 # and return the text
1067 $self->_check_ptree(
1068 $self->parse_text($paragraph,$line), $line, $file, '');
1069}
1070
1071sub _check_ptree {
1072 my ($self,$ptree,$line,$file,$nestlist) = @_;
1073 local($_);
1074 my $text = '';
1075 # process each node in the parse tree
1076 foreach(@$ptree) {
1077 # regular text chunk
1078 unless(ref) {
e3237417 1079 # count the unescaped angle brackets
92e3d63a 1080 # complain only when warning level is greater than 1
c23d1eb0 1081 if($self->{-warnings} && $self->{-warnings}>1) {
1082 my $count;
1083 if($count = tr/<>/<>/) {
e3237417 1084 $self->poderror({ -line => $line, -file => $file,
1085 -severity => 'WARNING',
c23d1eb0 1086 -msg => "$count unescaped <> in paragraph" });
1087 }
e3237417 1088 }
c23d1eb0 1089 $text .= $_;
e3237417 1090 next;
1091 }
1092 # have an interior sequence
1093 my $cmd = $_->cmd_name();
1094 my $contents = $_->parse_tree();
1095 ($file,$line) = $_->file_line();
1096 # check for valid tag
1097 if (! $VALID_SEQUENCES{$cmd}) {
1098 $self->poderror({ -line => $line, -file => $file,
1099 -severity => 'ERROR',
66aff6dd 1100 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 1101 # expand it anyway
1102 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1103 next;
1104 }
1105 if($nestlist =~ /$cmd/) {
1106 $self->poderror({ -line => $line, -file => $file,
7b47f8ec 1107 -severity => 'WARNING',
e3237417 1108 -msg => "nested commands $cmd<...$cmd<...>...>"});
1109 # _TODO_ should we add the contents anyway?
1110 # expand it anyway, see below
1111 }
1112 if($cmd eq 'E') {
1113 # preserve entities
1114 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1115 $self->poderror({ -line => $line, -file => $file,
1116 -severity => 'ERROR',
1117 -msg => "garbled entity " . $_->raw_text()});
1118 next;
1119 }
e2c3adef 1120 my $ent = $$contents[0];
66aff6dd 1121 my $val;
1122 if($ent =~ /^0x[0-9a-f]+$/i) {
1123 # hexadec entity
1124 $val = hex($ent);
1125 }
1126 elsif($ent =~ /^0\d+$/) {
1127 # octal
1128 $val = oct($ent);
1129 }
1130 elsif($ent =~ /^\d+$/) {
e2c3adef 1131 # numeric entity
66aff6dd 1132 $val = $ent;
1133 }
1134 if(defined $val) {
1135 if($val>0 && $val<256) {
1136 $text .= chr($val);
1137 }
1138 else {
1139 $self->poderror({ -line => $line, -file => $file,
1140 -severity => 'ERROR',
1141 -msg => "Entity number out of range " . $_->raw_text()});
1142 }
e2c3adef 1143 }
1144 elsif($ENTITIES{$ent}) {
1145 # known ISO entity
1146 $text .= $ENTITIES{$ent};
1147 }
1148 else {
1149 $self->poderror({ -line => $line, -file => $file,
1150 -severity => 'WARNING',
66aff6dd 1151 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef 1152 $text .= "E<$ent>";
1153 }
e3237417 1154 }
1155 elsif($cmd eq 'L') {
1156 # try to parse the hyperlink
1157 my $link = Pod::Hyperlink->new($contents->raw_text());
1158 unless(defined $link) {
1159 $self->poderror({ -line => $line, -file => $file,
1160 -severity => 'ERROR',
e2c3adef 1161 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417 1162 next;
1163 }
1164 $link->line($line); # remember line
1165 if($self->{-warnings}) {
1166 foreach my $w ($link->warning()) {
1167 $self->poderror({ -line => $line, -file => $file,
1168 -severity => 'WARNING',
1169 -msg => $w });
1170 }
1171 }
1172 # check the link text
1173 $text .= $self->_check_ptree($self->parse_text($link->text(),
1174 $line), $line, $file, "$nestlist$cmd");
48f30392 1175 # remember link
1176 $self->hyperlink([$line,$link]);
e3237417 1177 }
1178 elsif($cmd =~ /[BCFIS]/) {
1179 # add the guts
1180 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1181 }
66aff6dd 1182 elsif($cmd eq 'Z') {
1183 if(length($contents->raw_text())) {
1184 $self->poderror({ -line => $line, -file => $file,
1185 -severity => 'ERROR',
1186 -msg => "Nonempty Z<>"});
1187 }
1188 }
48f30392 1189 elsif($cmd eq 'X') {
1190 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1191 if($idx =~ /^\s*$/s) {
1192 $self->poderror({ -line => $line, -file => $file,
1193 -severity => 'ERROR',
1194 -msg => "Empty X<>"});
1195 }
1196 else {
1197 # remember this node
1198 $self->idx($idx);
1199 }
1200 }
1201 else {
1202 # not reached
1203 die "internal error";
e3237417 1204 }
1205 }
1206 $text;
1207}
1208
e2c3adef 1209# process a block of verbatim text
360aca43 1210sub verbatim {
66aff6dd 1211 ## Nothing particular to check
e2c3adef 1212 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1213
1214 $self->_preproc_par($paragraph);
1215
e2c3adef 1216 if($self->{_current_head1} eq 'NAME') {
1217 my ($file, $line) = $pod_para->file_line;
1218 $self->poderror({ -line => $line, -file => $file,
1219 -severity => 'WARNING',
1220 -msg => 'Verbatim paragraph in NAME section' });
1221 }
360aca43 1222}
1223
e2c3adef 1224# process a block of regular text
360aca43 1225sub textblock {
1226 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1227 my ($file, $line) = $pod_para->file_line;
e3237417 1228
66aff6dd 1229 $self->_preproc_par($paragraph);
1230
e2c3adef 1231 # skip this paragraph if in a =begin block
1232 unless($self->{_have_begin}) {
1233 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1234 if($self->{_current_head1} eq 'NAME') {
1235 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1236 # this is the canonical name
1237 $self->{-name} = $1 unless(defined $self->{-name});
1238 }
e3237417 1239 }
1240 }
e3237417 1241}
1242
66aff6dd 1243sub _preproc_par
1244{
1245 my $self = shift;
1246 $_[0] =~ s/[\s\n]+$//;
1247 if($_[0]) {
1248 $self->{_commands_in_head}++;
1249 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1250 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1251 $self->{_list_stack}->[0]->{_has_par} = 1;
1252 }
1253 }
1254}
1255
e3237417 12561;
66aff6dd 1257
48f30392 1258__END__
1259
1260=head1 AUTHOR
1261
aaa799f9 1262Please report bugs using L<http://rt.cpan.org>.
1263
48f30392 1264Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
aaa799f9 1265Marek Rouchal E<lt>marekr@cpan.orgE<gt>
48f30392 1266
1267Based on code for B<Pod::Text::pod2text()> written by
1268Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1269
1270=cut
1271