Upgrade to Pod-Parser-1.36.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
1bc4b319 11use strict;
360aca43 12
1bc4b319 13use vars qw($VERSION @ISA @EXPORT %VALID_COMMANDS %VALID_SEQUENCES);
14$VERSION = '1.44_01'; ## Current version of this package
828c4421 15require 5.005; ## requires this Perl version or later
360aca43 16
e2c3adef 17use Pod::ParseUtils; ## for hyperlinks and lists
18
360aca43 19=head1 NAME
20
21Pod::Checker, podchecker() - check pod documents for syntax errors
22
23=head1 SYNOPSIS
24
25 use Pod::Checker;
26
e3237417 27 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 28
e2c3adef 29 my $checker = new Pod::Checker %options;
48f30392 30 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 31
360aca43 32=head1 OPTIONS/ARGUMENTS
33
34C<$filepath> is the input POD to read and C<$outputpath> is
35where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 36indicating a file-path, or else a reference to an open filehandle.
360aca43 37If unspecified, the input-file it defaults to C<\*STDIN>, and
38the output-file defaults to C<\*STDERR>.
39
e2c3adef 40=head2 podchecker()
41
42This function can take a hash of options:
e3237417 43
44=over 4
45
46=item B<-warnings> =E<gt> I<val>
47
92e3d63a 48Turn warnings on/off. I<val> is usually 1 for on, but higher values
49trigger additional warnings. See L<"Warnings">.
e3237417 50
51=back
360aca43 52
53=head1 DESCRIPTION
54
55B<podchecker> will perform syntax checking of Perl5 POD format documentation.
56
c23d1eb0 57Curious/ambitious users are welcome to propose additional features they wish
58to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
59consistent with L<perlpod>.
360aca43 60
7b47f8ec 61The following checks are currently performed:
e3237417 62
63=over 4
64
65=item *
66
48f30392 67Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef 68and unterminated interior sequences.
69
70=item *
71
72Check for proper balancing of C<=begin> and C<=end>. The contents of such
73a block are generally ignored, i.e. no syntax checks are performed.
e3237417 74
75=item *
76
77Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
78
79=item *
80
1bc4b319 81Check for same nested interior-sequences (e.g.
e2c3adef 82C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 83
84=item *
85
267d5541 86Check for malformed or non-existing entities C<EE<lt>...E<gt>>.
e3237417 87
88=item *
89
e2c3adef 90Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
91for details.
e3237417 92
93=item *
94
e2c3adef 95Check for unresolved document-internal links. This check may also reveal
96misspelled links that seem to be internal links but should be links
97to something else.
e3237417 98
99=back
100
e2c3adef 101=head1 DIAGNOSTICS
e3237417 102
e2c3adef 103=head2 Errors
e3237417 104
105=over 4
106
66aff6dd 107=item * empty =headn
108
109A heading (C<=head1> or C<=head2>) without any text? That ain't no
110heading!
111
e2c3adef 112=item * =over on line I<N> without closing =back
e3237417 113
e2c3adef 114The C<=over> command does not have a corresponding C<=back> before the
115next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 116
e2c3adef 117=item * =item without previous =over
e3237417 118
e2c3adef 119=item * =back without previous =over
e3237417 120
e2c3adef 121An C<=item> or C<=back> command has been found outside a
122C<=over>/C<=back> block.
e3237417 123
e2c3adef 124=item * No argument for =begin
e3237417 125
e2c3adef 126A C<=begin> command was found that is not followed by the formatter
127specification.
e3237417 128
e2c3adef 129=item * =end without =begin
e3237417 130
e2c3adef 131A standalone C<=end> command was found.
132
133=item * Nested =begin's
134
66aff6dd 135There were at least two consecutive C<=begin> commands without
e2c3adef 136the corresponding C<=end>. Only one C<=begin> may be active at
137a time.
138
139=item * =for without formatter specification
e3237417 140
e2c3adef 141There is no specification of the formatter after the C<=for> command.
142
143=item * unresolved internal link I<NAME>
144
145The given link to I<NAME> does not have a matching node in the current
267d5541 146POD. This also happened when a single word node name is not enclosed in
e2c3adef 147C<"">.
148
149=item * Unknown command "I<CMD>"
150
151An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d 152C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
153C<=for>, C<=pod>, C<=cut>
e2c3adef 154
155=item * Unknown interior-sequence "I<SEQ>"
156
157An invalid markup command has been encountered. Valid are:
1bc4b319 158C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
159C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
e2c3adef 160C<ZE<lt>E<gt>>
161
162=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
163
164Two nested identical markup commands have been found. Generally this
165does not make sense.
166
167=item * garbled entity I<STRING>
168
66aff6dd 169The I<STRING> found cannot be interpreted as a character entity.
170
171=item * Entity number out of range
172
173An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 174
175=item * malformed link LE<lt>E<gt>
176
177The link found cannot be parsed because it does not conform to the
178syntax described in L<perlpod>.
e3237417 179
66aff6dd 180=item * nonempty ZE<lt>E<gt>
181
182The C<ZE<lt>E<gt>> sequence is supposed to be empty.
183
48f30392 184=item * empty XE<lt>E<gt>
185
186The index entry specified contains nothing but whitespace.
187
66aff6dd 188=item * Spurious text after =pod / =cut
189
190The commands C<=pod> and C<=cut> do not take any arguments.
191
192=item * Spurious character(s) after =back
193
194The C<=back> command does not take any arguments.
195
e3237417 196=back
197
e2c3adef 198=head2 Warnings
e3237417 199
e2c3adef 200These may not necessarily cause trouble, but indicate mediocre style.
201
202=over 4
203
fb8eeed8 204=item * multiple occurrence of link target I<name>
66aff6dd 205
206The POD file has some C<=item> and/or C<=head> commands that have
207the same text. Potential hyperlinks to such a text cannot be unique then.
d5c61f7c 208This warning is printed only with warning level greater than one.
66aff6dd 209
210=item * line containing nothing but whitespace in paragraph
211
212There is some whitespace on a seemingly empty line. POD is very sensitive
213to such things, so this is flagged. B<vi> users switch on the B<list>
214option to avoid this problem.
215
92e3d63a 216=begin _disabled_
217
66aff6dd 218=item * file does not start with =head
219
220The file starts with a different POD directive than head.
221This is most probably something you do not want.
222
92e3d63a 223=end _disabled_
224
66aff6dd 225=item * previous =item has no contents
e2c3adef 226
66aff6dd 227There is a list C<=item> right above the flagged line that has no
228text contents. You probably want to delete empty items.
229
230=item * preceding non-item paragraph(s)
231
232A list introduced by C<=over> starts with a text or verbatim paragraph,
233but continues with C<=item>s. Move the non-item paragraph out of the
234C<=over>/C<=back> block.
235
236=item * =item type mismatch (I<one> vs. I<two>)
237
267d5541 238A list started with e.g. a bullet-like C<=item> and continued with a
66aff6dd 239numbered one. This is obviously inconsistent. For most translators the
240type of the I<first> C<=item> determines the type of the list.
e2c3adef 241
242=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
243
244Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
245can potentially cause errors as they could be misinterpreted as
92e3d63a 246markup commands. This is only printed when the -warnings level is
247greater than 1.
e2c3adef 248
66aff6dd 249=item * Unknown entity
e2c3adef 250
251A character entity was found that does not belong to the standard
66aff6dd 252ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 253
254=item * No items in =over
255
66aff6dd 256The list opened with C<=over> does not contain any items.
e2c3adef 257
258=item * No argument for =item
259
260C<=item> without any parameters is deprecated. It should either be followed
261by C<*> to indicate an unordered list, by a number (optionally followed
262by a dot) to indicate an ordered (numbered) list or simple text for a
263definition list.
264
66aff6dd 265=item * empty section in previous paragraph
266
267The previous section (introduced by a C<=head> command) does not contain
1bc4b319 268any text. This usually indicates that something is missing. Note: A
66aff6dd 269C<=head1> followed immediately by C<=head2> does not trigger this warning.
270
e2c3adef 271=item * Verbatim paragraph in NAME section
272
273The NAME section (C<=head1 NAME>) should consist of a single paragraph
274with the script/module name, followed by a dash `-' and a very short
275description of what the thing is good for.
276
9c6ed6d7 277=item * =headI<n> without preceding higher level
278
279For example if there is a C<=head2> in the POD file prior to a
280C<=head1>.
281
92e3d63a 282=back
283
284=head2 Hyperlinks
285
267d5541 286There are some warnings with respect to malformed hyperlinks:
92e3d63a 287
288=over 4
289
92e3d63a 290=item * ignoring leading/trailing whitespace in link
291
1bc4b319 292There is whitespace at the beginning or the end of the contents of
92e3d63a 293LE<lt>...E<gt>.
294
295=item * (section) in '$page' deprecated
296
297There is a section detected in the page name of LE<lt>...E<gt>, e.g.
ad838556 298C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
92e3d63a 299Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
300to expand this to appropriate code. For links to (builtin) functions,
301please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
302
303=item * alternative text/node '%s' contains non-escaped | or /
304
305The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
306Although the hyperlink parser does its best to determine which "/" is
307text and which is a delimiter in case of doubt, one ought to escape
308these literal characters like this:
309
310 / E<sol>
311 | E<verbar>
e2c3adef 312
313=back
e3237417 314
315=head1 RETURN VALUE
316
317B<podchecker> returns the number of POD syntax errors found or -1 if
318there were no POD commands at all found in the file.
319
360aca43 320=head1 EXAMPLES
321
c23d1eb0 322See L</SYNOPSIS>
360aca43 323
48f30392 324=head1 INTERFACE
360aca43 325
48f30392 326While checking, this module collects document properties, e.g. the nodes
327for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
328POD translators can use this feature to syntax-check and get the nodes in
329a first pass before actually starting to convert. This is expensive in terms
330of execution time, but allows for very robust conversions.
360aca43 331
c23d1eb0 332Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
1bc4b319 333method to print errors and warnings. The summary output (e.g.
c23d1eb0 334"Pod syntax OK") has been dropped from the module and has been included in
335B<podchecker> (the script). This allows users of B<Pod::Checker> to
267d5541 336control completely the output behavior. Users of B<podchecker> (the script)
337get the well-known behavior.
c23d1eb0 338
360aca43 339=cut
340
341#############################################################################
342
360aca43 343#use diagnostics;
1bc4b319 344use Carp qw(croak);
360aca43 345use Exporter;
346use Pod::Parser;
347
360aca43 348@ISA = qw(Pod::Parser);
349@EXPORT = qw(&podchecker);
350
360aca43 351my %VALID_COMMANDS = (
352 'pod' => 1,
353 'cut' => 1,
354 'head1' => 1,
355 'head2' => 1,
c6b85e5d 356 'head3' => 1,
357 'head4' => 1,
360aca43 358 'over' => 1,
359 'back' => 1,
360 'item' => 1,
361 'for' => 1,
362 'begin' => 1,
363 'end' => 1,
1bc4b319 364 'encoding' => 1,
360aca43 365);
366
367my %VALID_SEQUENCES = (
368 'I' => 1,
369 'B' => 1,
370 'S' => 1,
371 'C' => 1,
372 'L' => 1,
373 'F' => 1,
374 'X' => 1,
375 'Z' => 1,
376 'E' => 1,
377);
378
e2c3adef 379# stolen from HTML::Entities
380my %ENTITIES = (
381 # Some normal chars that have special meaning in SGML context
1bc4b319 382 amp => '&', # ampersand
e2c3adef 383'gt' => '>', # greater than
384'lt' => '<', # less than
385 quot => '"', # double quote
386
387 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
1bc4b319 388 AElig => 'Æ', # capital AE diphthong (ligature)
389 Aacute => 'Á', # capital A, acute accent
390 Acirc => 'Â', # capital A, circumflex accent
391 Agrave => 'À', # capital A, grave accent
392 Aring => 'Å', # capital A, ring
393 Atilde => 'Ã', # capital A, tilde
394 Auml => 'Ä', # capital A, dieresis or umlaut mark
395 Ccedil => 'Ç', # capital C, cedilla
396 ETH => 'Ð', # capital Eth, Icelandic
397 Eacute => 'É', # capital E, acute accent
398 Ecirc => 'Ê', # capital E, circumflex accent
399 Egrave => 'È', # capital E, grave accent
400 Euml => 'Ë', # capital E, dieresis or umlaut mark
401 Iacute => 'Í', # capital I, acute accent
402 Icirc => 'Î', # capital I, circumflex accent
403 Igrave => 'Ì', # capital I, grave accent
404 Iuml => 'Ï', # capital I, dieresis or umlaut mark
405 Ntilde => 'Ñ', # capital N, tilde
406 Oacute => 'Ó', # capital O, acute accent
407 Ocirc => 'Ô', # capital O, circumflex accent
408 Ograve => 'Ò', # capital O, grave accent
409 Oslash => 'Ø', # capital O, slash
410 Otilde => 'Õ', # capital O, tilde
411 Ouml => 'Ö', # capital O, dieresis or umlaut mark
412 THORN => 'Þ', # capital THORN, Icelandic
413 Uacute => 'Ú', # capital U, acute accent
414 Ucirc => 'Û', # capital U, circumflex accent
415 Ugrave => 'Ù', # capital U, grave accent
416 Uuml => 'Ü', # capital U, dieresis or umlaut mark
417 Yacute => 'Ý', # capital Y, acute accent
418 aacute => 'á', # small a, acute accent
419 acirc => 'â', # small a, circumflex accent
420 aelig => 'æ', # small ae diphthong (ligature)
421 agrave => 'à', # small a, grave accent
422 aring => 'å', # small a, ring
423 atilde => 'ã', # small a, tilde
424 auml => 'ä', # small a, dieresis or umlaut mark
425 ccedil => 'ç', # small c, cedilla
426 eacute => 'é', # small e, acute accent
427 ecirc => 'ê', # small e, circumflex accent
428 egrave => 'è', # small e, grave accent
429 eth => 'ð', # small eth, Icelandic
430 euml => 'ë', # small e, dieresis or umlaut mark
431 iacute => 'í', # small i, acute accent
432 icirc => 'î', # small i, circumflex accent
433 igrave => 'ì', # small i, grave accent
434 iuml => 'ï', # small i, dieresis or umlaut mark
435 ntilde => 'ñ', # small n, tilde
436 oacute => 'ó', # small o, acute accent
437 ocirc => 'ô', # small o, circumflex accent
438 ograve => 'ò', # small o, grave accent
439 oslash => 'ø', # small o, slash
440 otilde => 'õ', # small o, tilde
441 ouml => 'ö', # small o, dieresis or umlaut mark
442 szlig => 'ß', # small sharp s, German (sz ligature)
443 thorn => 'þ', # small thorn, Icelandic
444 uacute => 'ú', # small u, acute accent
445 ucirc => 'û', # small u, circumflex accent
446 ugrave => 'ù', # small u, grave accent
447 uuml => 'ü', # small u, dieresis or umlaut mark
448 yacute => 'ý', # small y, acute accent
449 yuml => 'ÿ', # small y, dieresis or umlaut mark
e2c3adef 450
451 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
452 copy => '©', # copyright sign
453 reg => '®', # registered sign
454 nbsp => "\240", # non breaking space
455
456 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
457 iexcl => '¡',
458 cent => '¢',
459 pound => '£',
460 curren => '¤',
461 yen => '¥',
462 brvbar => '¦',
463 sect => '§',
464 uml => '¨',
465 ordf => 'ª',
466 laquo => '«',
467'not' => '¬', # not is a keyword in perl
468 shy => '­',
469 macr => '¯',
470 deg => '°',
471 plusmn => '±',
472 sup1 => '¹',
473 sup2 => '²',
474 sup3 => '³',
475 acute => '´',
476 micro => 'µ',
477 para => '¶',
478 middot => '·',
479 cedil => '¸',
480 ordm => 'º',
481 raquo => '»',
482 frac14 => '¼',
483 frac12 => '½',
484 frac34 => '¾',
485 iquest => '¿',
486'times' => '×', # times is a keyword in perl
487 divide => '÷',
66aff6dd 488
489# some POD special entities
490 verbar => '|',
491 sol => '/'
e2c3adef 492);
493
360aca43 494##---------------------------------------------------------------------------
495
496##---------------------------------
497## Function definitions begin here
498##---------------------------------
499
1bc4b319 500sub podchecker {
e3237417 501 my ($infile, $outfile, %options) = @_;
360aca43 502 local $_;
503
504 ## Set defaults
505 $infile ||= \*STDIN;
506 $outfile ||= \*STDERR;
507
508 ## Now create a pod checker
e3237417 509 my $checker = new Pod::Checker(%options);
360aca43 510
511 ## Now check the pod document for errors
512 $checker->parse_from_file($infile, $outfile);
48f30392 513
360aca43 514 ## Return the number of errors found
515 return $checker->num_errors();
516}
517
518##---------------------------------------------------------------------------
519
520##-------------------------------
521## Method definitions begin here
522##-------------------------------
523
92e3d63a 524##################################
525
526=over 4
527
528=item C<Pod::Checker-E<gt>new( %options )>
529
530Return a reference to a new Pod::Checker object that inherits from
531Pod::Parser and is used for calling the required methods later. The
532following options are recognized:
533
534C<-warnings =E<gt> num>
535 Print warnings if C<num> is true. The higher the value of C<num>,
536the more warnings are printed. Currently there are only levels 1 and 2.
537
538C<-quiet =E<gt> num>
539 If C<num> is true, do not print any errors/warnings. This is useful
540when Pod::Checker is used to munge POD code into plain text from within
541POD formatters.
542
543=cut
544
66aff6dd 545## sub new {
546## my $this = shift;
547## my $class = ref($this) || $this;
548## my %params = @_;
549## my $self = {%params};
550## bless $self, $class;
551## $self->initialize();
552## return $self;
553## }
360aca43 554
555sub initialize {
556 my $self = shift;
664bb207 557 ## Initialize number of errors, and setup an error function to
558 ## increment this number and then print to the designated output.
559 $self->{_NUM_ERRORS} = 0;
9c6ed6d7 560 $self->{_NUM_WARNINGS} = 0;
92e3d63a 561 $self->{-quiet} ||= 0;
562 # set the error handling subroutine
563 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417 564 $self->{_commands} = 0; # total number of POD commands encountered
565 $self->{_list_stack} = []; # stack for nested lists
566 $self->{_have_begin} = ''; # stores =begin
567 $self->{_links} = []; # stack for internal hyperlinks
568 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 569 $self->{_index} = []; # text in X<>
e2c3adef 570 # print warnings?
e3237417 571 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 572 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 573 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207 574}
575
48f30392 576##################################
577
48f30392 578=item C<$checker-E<gt>poderror( @args )>
579
580=item C<$checker-E<gt>poderror( {%opts}, @args )>
581
582Internal method for printing errors and warnings. If no options are
583given, simply prints "@_". The following options are recognized and used
584to form the output:
585
586 -msg
587
588A message to print prior to C<@args>.
589
590 -line
591
592The line number the error occurred in.
593
594 -file
595
596The file (name) the error occurred in.
597
598 -severity
599
600The error level, should be 'WARNING' or 'ERROR'.
601
602=cut
603
e2c3adef 604# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 605sub poderror {
606 my $self = shift;
607 my %opts = (ref $_[0]) ? %{shift()} : ();
608
609 ## Retrieve options
1bc4b319 610 chomp( my $msg = ($opts{-msg} || '')."@_" );
611 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : '';
612 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : '';
66aff6dd 613 unless (exists $opts{-severity}) {
614 ## See if can find severity in message prefix
615 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
616 }
1bc4b319 617 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : '';
664bb207 618
e3237417 619 ## Increment error count and print message "
1bc4b319 620 ++($self->{_NUM_ERRORS})
e3237417 621 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
9c6ed6d7 622 ++($self->{_NUM_WARNINGS})
623 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
7b47f8ec 624 unless($self->{-quiet}) {
625 my $out_fh = $self->output_handle() || \*STDERR;
626 print $out_fh ($severity, $msg, $line, $file, "\n")
627 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
628 }
360aca43 629}
630
48f30392 631##################################
632
633=item C<$checker-E<gt>num_errors()>
634
635Set (if argument specified) and retrieve the number of errors found.
636
637=cut
638
360aca43 639sub num_errors {
640 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
641}
642
48f30392 643##################################
644
9c6ed6d7 645=item C<$checker-E<gt>num_warnings()>
646
647Set (if argument specified) and retrieve the number of warnings found.
648
649=cut
650
651sub num_warnings {
652 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
653}
654
655##################################
656
48f30392 657=item C<$checker-E<gt>name()>
658
659Set (if argument specified) and retrieve the canonical name of POD as
660found in the C<=head1 NAME> section.
661
662=cut
663
e2c3adef 664sub name {
665 return (@_ > 1 && $_[1]) ?
1bc4b319 666 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
e2c3adef 667}
668
48f30392 669##################################
670
671=item C<$checker-E<gt>node()>
672
673Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
674and C<=item>) of the current POD. The nodes are returned in the order of
fb8eeed8 675their occurrence. They consist of plain text, each piece of whitespace is
48f30392 676collapsed to a single blank.
677
678=cut
679
e2c3adef 680sub node {
681 my ($self,$text) = @_;
682 if(defined $text) {
66aff6dd 683 $text =~ s/\s+$//s; # strip trailing whitespace
684 $text =~ s/\s+/ /gs; # collapse whitespace
685 # add node, order important!
e2c3adef 686 push(@{$self->{_nodes}}, $text);
66aff6dd 687 # keep also a uniqueness counter
48f30392 688 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef 689 return $text;
690 }
691 @{$self->{_nodes}};
692}
693
48f30392 694##################################
695
696=item C<$checker-E<gt>idx()>
697
698Add (if argument specified) and retrieve the index entries (as defined by
699C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
700of whitespace is collapsed to a single blank.
701
702=cut
703
704# set/return index entries of current POD
705sub idx {
706 my ($self,$text) = @_;
707 if(defined $text) {
708 $text =~ s/\s+$//s; # strip trailing whitespace
709 $text =~ s/\s+/ /gs; # collapse whitespace
710 # add node, order important!
711 push(@{$self->{_index}}, $text);
712 # keep also a uniqueness counter
713 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
714 return $text;
715 }
716 @{$self->{_index}};
717}
718
719##################################
720
721=item C<$checker-E<gt>hyperlink()>
722
723Add (if argument specified) and retrieve the hyperlinks (as defined by
d1be9408 724C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
48f30392 725number and C<Pod::Hyperlink> object.
726
727=back
728
729=cut
730
e2c3adef 731# set/return hyperlinks of the current POD
732sub hyperlink {
733 my $self = shift;
734 if($_[0]) {
735 push(@{$self->{_links}}, $_[0]);
736 return $_[0];
737 }
738 @{$self->{_links}};
739}
740
e3237417 741## overrides for Pod::Parser
742
360aca43 743sub end_pod {
66aff6dd 744 ## Do some final checks and
745 ## print the number of errors found
746 my $self = shift;
747 my $infile = $self->input_file();
66aff6dd 748
749 if(@{$self->{_list_stack}}) {
66aff6dd 750 my $list;
751 while(($list = $self->_close_list('EOF',$infile)) &&
752 $list->indent() ne 'auto') {
753 $self->poderror({ -line => 'EOF', -file => $infile,
1bc4b319 754 -severity => 'ERROR', -msg => '=over on line ' .
755 $list->start() . ' without closing =back' });
66aff6dd 756 }
757 }
758
759 # check validity of document internal hyperlinks
760 # first build the node names from the paragraph text
761 my %nodes;
762 foreach($self->node()) {
763 $nodes{$_} = 1;
92e3d63a 764 if(/^(\S+)\s+\S/) {
66aff6dd 765 # we have more than one word. Use the first as a node, too.
766 # This is used heavily in perlfunc.pod
767 $nodes{$1} ||= 2; # derived node
768 }
769 }
92e3d63a 770 foreach($self->idx()) {
771 $nodes{$_} = 3; # index node
772 }
66aff6dd 773 foreach($self->hyperlink()) {
48f30392 774 my ($line,$link) = @$_;
775 # _TODO_ what if there is a link to the page itself by the name,
776 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
777 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
778 my $node = $self->_check_ptree($self->parse_text($link->node(),
779 $line), $line, $infile, 'L');
780 if($node && !$nodes{$node}) {
781 $self->poderror({ -line => $line || '', -file => $infile,
782 -severity => 'ERROR',
783 -msg => "unresolved internal link '$node'"});
784 }
66aff6dd 785 }
786 }
48f30392 787
788 # check the internal nodes for uniqueness. This pertains to
789 # =headX, =item and X<...>
d5c61f7c 790 if($self->{-warnings} && $self->{-warnings}>1) {
791 foreach(grep($self->{_unique_nodes}->{$_} > 1,
792 keys %{$self->{_unique_nodes}})) {
793 $self->poderror({ -line => '-', -file => $infile,
66aff6dd 794 -severity => 'WARNING',
fb8eeed8 795 -msg => "multiple occurrence of link target '$_'"});
d5c61f7c 796 }
66aff6dd 797 }
798
c23d1eb0 799 # no POD found here
800 $self->num_errors(-1) if($self->{_commands} == 0);
360aca43 801}
802
e2c3adef 803# check a POD command directive
1bc4b319 804sub command {
664bb207 805 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 806 my ($file, $line) = $pod_para->file_line;
360aca43 807 ## Check the command syntax
e3237417 808 my $arg; # this will hold the command argument
664bb207 809 if (! $VALID_COMMANDS{$cmd}) {
810 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 811 -msg => "Unknown command '$cmd'" });
360aca43 812 }
92e3d63a 813 else { # found a valid command
814 $self->{_commands}++; # delete this line if below is enabled again
815
816 ##### following check disabled due to strong request
817 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
818 # $self->poderror({ -line => $line, -file => $file,
1bc4b319 819 # -severity => 'WARNING',
92e3d63a 820 # -msg => "file does not start with =head" });
821 #}
822
823 # check syntax of particular command
e3237417 824 if($cmd eq 'over') {
e2c3adef 825 # check for argument
826 $arg = $self->interpolate_and_check($paragraph, $line,$file);
827 my $indent = 4; # default
828 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
829 $indent = $1;
e2c3adef 830 }
e3237417 831 # start a new list
66aff6dd 832 $self->_open_list($indent,$line,$file);
e3237417 833 }
834 elsif($cmd eq 'item') {
e2c3adef 835 # are we in a list?
e3237417 836 unless(@{$self->{_list_stack}}) {
837 $self->poderror({ -line => $line, -file => $file,
1bc4b319 838 -severity => 'ERROR',
839 -msg => '=item without previous =over' });
e2c3adef 840 # auto-open in case we encounter many more
66aff6dd 841 $self->_open_list('auto',$line,$file);
842 }
843 my $list = $self->{_list_stack}->[0];
844 # check whether the previous item had some contents
845 if(defined $self->{_list_item_contents} &&
846 $self->{_list_item_contents} == 0) {
847 $self->poderror({ -line => $line, -file => $file,
1bc4b319 848 -severity => 'WARNING',
849 -msg => 'previous =item has no contents' });
66aff6dd 850 }
851 if($list->{_has_par}) {
852 $self->poderror({ -line => $line, -file => $file,
1bc4b319 853 -severity => 'WARNING',
854 -msg => 'preceding non-item paragraph(s)' });
66aff6dd 855 delete $list->{_has_par};
e3237417 856 }
e2c3adef 857 # check for argument
858 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 859 if($arg && $arg =~ /(\S+)/) {
860 $arg =~ s/[\s\n]+$//;
861 my $type;
862 if($arg =~ /^[*]\s*(\S*.*)/) {
863 $type = 'bullet';
864 $self->{_list_item_contents} = $1 ? 1 : 0;
865 $arg = $1;
866 }
4df4f5d0 867 elsif($arg =~ /^\d+\.?\s+(\S*)/) {
66aff6dd 868 $type = 'number';
869 $self->{_list_item_contents} = $1 ? 1 : 0;
870 $arg = $1;
871 }
872 else {
873 $type = 'definition';
874 $self->{_list_item_contents} = 1;
875 }
876 my $first = $list->type();
877 if($first && $first ne $type) {
878 $self->poderror({ -line => $line, -file => $file,
1bc4b319 879 -severity => 'WARNING',
66aff6dd 880 -msg => "=item type mismatch ('$first' vs. '$type')"});
881 }
882 else { # first item
883 $list->type($type);
884 }
885 }
886 else {
e2c3adef 887 $self->poderror({ -line => $line, -file => $file,
1bc4b319 888 -severity => 'WARNING',
889 -msg => 'No argument for =item' });
890 $arg = ' '; # empty
66aff6dd 891 $self->{_list_item_contents} = 0;
e3237417 892 }
e2c3adef 893 # add this item
66aff6dd 894 $list->item($arg);
e2c3adef 895 # remember this node
896 $self->node($arg);
e3237417 897 }
898 elsif($cmd eq 'back') {
899 # check if we have an open list
900 unless(@{$self->{_list_stack}}) {
901 $self->poderror({ -line => $line, -file => $file,
1bc4b319 902 -severity => 'ERROR',
903 -msg => '=back without previous =over' });
e3237417 904 }
905 else {
906 # check for spurious characters
e2c3adef 907 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 908 if($arg && $arg =~ /\S/) {
909 $self->poderror({ -line => $line, -file => $file,
1bc4b319 910 -severity => 'ERROR',
911 -msg => 'Spurious character(s) after =back' });
e3237417 912 }
913 # close list
66aff6dd 914 my $list = $self->_close_list($line,$file);
e3237417 915 # check for empty lists
916 if(!$list->item() && $self->{-warnings}) {
917 $self->poderror({ -line => $line, -file => $file,
1bc4b319 918 -severity => 'WARNING',
919 -msg => 'No items in =over (at line ' .
920 $list->start() . ') / =back list'});
e3237417 921 }
922 }
923 }
66aff6dd 924 elsif($cmd =~ /^head(\d+)/) {
9c6ed6d7 925 my $hnum = $1;
926 $self->{"_have_head_$hnum"}++; # count head types
1bc4b319 927 if($hnum > 1 && !$self->{'_have_head_'.($hnum -1)}) {
9c6ed6d7 928 $self->poderror({ -line => $line, -file => $file,
1bc4b319 929 -severity => 'WARNING',
9c6ed6d7 930 -msg => "=head$hnum without preceding higher level"});
931 }
48f30392 932 # check whether the previous =head section had some contents
66aff6dd 933 if(defined $self->{_commands_in_head} &&
934 $self->{_commands_in_head} == 0 &&
935 defined $self->{_last_head} &&
9c6ed6d7 936 $self->{_last_head} >= $hnum) {
66aff6dd 937 $self->poderror({ -line => $line, -file => $file,
1bc4b319 938 -severity => 'WARNING',
939 -msg => 'empty section in previous paragraph'});
66aff6dd 940 }
941 $self->{_commands_in_head} = -1;
9c6ed6d7 942 $self->{_last_head} = $hnum;
e3237417 943 # check if there is an open list
944 if(@{$self->{_list_stack}}) {
945 my $list;
66aff6dd 946 while(($list = $self->_close_list($line,$file)) &&
947 $list->indent() ne 'auto') {
e3237417 948 $self->poderror({ -line => $line, -file => $file,
1bc4b319 949 -severity => 'ERROR',
950 -msg => '=over on line '. $list->start() .
e2c3adef 951 " without closing =back (at $cmd)" });
e3237417 952 }
953 }
954 # remember this node
e2c3adef 955 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 956 $arg =~ s/[\s\n]+$//s;
957 $self->node($arg);
958 unless(length($arg)) {
959 $self->poderror({ -line => $line, -file => $file,
1bc4b319 960 -severity => 'ERROR',
66aff6dd 961 -msg => "empty =$cmd"});
962 }
e2c3adef 963 if($cmd eq 'head1') {
e2c3adef 964 $self->{_current_head1} = $arg;
965 } else {
966 $self->{_current_head1} = '';
967 }
e3237417 968 }
969 elsif($cmd eq 'begin') {
970 if($self->{_have_begin}) {
971 # already have a begin
972 $self->poderror({ -line => $line, -file => $file,
1bc4b319 973 -severity => 'ERROR',
974 -msg => q{Nested =begin's (first at line } .
975 $self->{_have_begin} . ')'});
e3237417 976 }
977 else {
978 # check for argument
e2c3adef 979 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 980 unless($arg && $arg =~ /(\S+)/) {
981 $self->poderror({ -line => $line, -file => $file,
1bc4b319 982 -severity => 'ERROR',
983 -msg => 'No argument for =begin'});
e3237417 984 }
985 # remember the =begin
986 $self->{_have_begin} = "$line:$1";
987 }
988 }
989 elsif($cmd eq 'end') {
990 if($self->{_have_begin}) {
991 # close the existing =begin
992 $self->{_have_begin} = '';
993 # check for spurious characters
e2c3adef 994 $arg = $self->interpolate_and_check($paragraph, $line,$file);
995 # the closing argument is optional
996 #if($arg && $arg =~ /\S/) {
997 # $self->poderror({ -line => $line, -file => $file,
1bc4b319 998 # -severity => 'WARNING',
e2c3adef 999 # -msg => "Spurious character(s) after =end" });
1000 #}
e3237417 1001 }
1002 else {
1003 # don't have a matching =begin
1004 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1005 -severity => 'ERROR',
1006 -msg => '=end without =begin' });
e3237417 1007 }
1008 }
e2c3adef 1009 elsif($cmd eq 'for') {
1010 unless($paragraph =~ /\s*(\S+)\s*/) {
1011 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1012 -severity => 'ERROR',
1013 -msg => '=for without formatter specification' });
e2c3adef 1014 }
1015 $arg = ''; # do not expand paragraph below
1016 }
66aff6dd 1017 elsif($cmd =~ /^(pod|cut)$/) {
1018 # check for argument
1019 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1020 if($arg && $arg =~ /(\S+)/) {
1021 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1022 -severity => 'ERROR',
66aff6dd 1023 -msg => "Spurious text after =$cmd"});
1024 }
1025 }
1026 $self->{_commands_in_head}++;
e3237417 1027 ## Check the interior sequences in the command-text
e2c3adef 1028 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1029 unless(defined $arg);
e2c3adef 1030 }
360aca43 1031}
1032
66aff6dd 1033sub _open_list
1034{
1035 my ($self,$indent,$line,$file) = @_;
1036 my $list = Pod::List->new(
1037 -indent => $indent,
1038 -start => $line,
1039 -file => $file);
1040 unshift(@{$self->{_list_stack}}, $list);
1041 undef $self->{_list_item_contents};
1042 $list;
1043}
1044
1045sub _close_list
1046{
1047 my ($self,$line,$file) = @_;
1048 my $list = shift(@{$self->{_list_stack}});
1049 if(defined $self->{_list_item_contents} &&
1050 $self->{_list_item_contents} == 0) {
1051 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1052 -severity => 'WARNING',
1053 -msg => 'previous =item has no contents' });
66aff6dd 1054 }
1055 undef $self->{_list_item_contents};
1056 $list;
1057}
1058
e2c3adef 1059# process a block of some text
1060sub interpolate_and_check {
e3237417 1061 my ($self, $paragraph, $line, $file) = @_;
1062 ## Check the interior sequences in the command-text
1063 # and return the text
1064 $self->_check_ptree(
1065 $self->parse_text($paragraph,$line), $line, $file, '');
1066}
1067
1068sub _check_ptree {
1069 my ($self,$ptree,$line,$file,$nestlist) = @_;
1070 local($_);
1071 my $text = '';
1072 # process each node in the parse tree
1073 foreach(@$ptree) {
1074 # regular text chunk
1075 unless(ref) {
e3237417 1076 # count the unescaped angle brackets
92e3d63a 1077 # complain only when warning level is greater than 1
c23d1eb0 1078 if($self->{-warnings} && $self->{-warnings}>1) {
1079 my $count;
1080 if($count = tr/<>/<>/) {
e3237417 1081 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1082 -severity => 'WARNING',
c23d1eb0 1083 -msg => "$count unescaped <> in paragraph" });
1084 }
e3237417 1085 }
c23d1eb0 1086 $text .= $_;
e3237417 1087 next;
1088 }
1089 # have an interior sequence
1090 my $cmd = $_->cmd_name();
1091 my $contents = $_->parse_tree();
1092 ($file,$line) = $_->file_line();
1093 # check for valid tag
1094 if (! $VALID_SEQUENCES{$cmd}) {
1095 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1096 -severity => 'ERROR',
66aff6dd 1097 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 1098 # expand it anyway
1099 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1100 next;
1101 }
1102 if($nestlist =~ /$cmd/) {
1103 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1104 -severity => 'WARNING',
e3237417 1105 -msg => "nested commands $cmd<...$cmd<...>...>"});
1106 # _TODO_ should we add the contents anyway?
1107 # expand it anyway, see below
1108 }
1109 if($cmd eq 'E') {
1110 # preserve entities
1111 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1112 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1113 -severity => 'ERROR',
1114 -msg => 'garbled entity ' . $_->raw_text()});
e3237417 1115 next;
1116 }
e2c3adef 1117 my $ent = $$contents[0];
66aff6dd 1118 my $val;
1119 if($ent =~ /^0x[0-9a-f]+$/i) {
1120 # hexadec entity
1121 $val = hex($ent);
1122 }
1123 elsif($ent =~ /^0\d+$/) {
1124 # octal
1125 $val = oct($ent);
1126 }
1127 elsif($ent =~ /^\d+$/) {
e2c3adef 1128 # numeric entity
66aff6dd 1129 $val = $ent;
1130 }
1131 if(defined $val) {
1132 if($val>0 && $val<256) {
1133 $text .= chr($val);
1134 }
1135 else {
1136 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1137 -severity => 'ERROR',
1138 -msg => 'Entity number out of range ' . $_->raw_text()});
66aff6dd 1139 }
e2c3adef 1140 }
1141 elsif($ENTITIES{$ent}) {
1142 # known ISO entity
1143 $text .= $ENTITIES{$ent};
1144 }
1145 else {
1146 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1147 -severity => 'WARNING',
1148 -msg => 'Unknown entity ' . $_->raw_text()});
e2c3adef 1149 $text .= "E<$ent>";
1150 }
e3237417 1151 }
1152 elsif($cmd eq 'L') {
1153 # try to parse the hyperlink
1154 my $link = Pod::Hyperlink->new($contents->raw_text());
1155 unless(defined $link) {
1156 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1157 -severity => 'ERROR',
1158 -msg => 'malformed link ' . $_->raw_text() ." : $@"});
e3237417 1159 next;
1160 }
1161 $link->line($line); # remember line
1162 if($self->{-warnings}) {
1163 foreach my $w ($link->warning()) {
1164 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1165 -severity => 'WARNING',
e3237417 1166 -msg => $w });
1167 }
1168 }
1169 # check the link text
1170 $text .= $self->_check_ptree($self->parse_text($link->text(),
1171 $line), $line, $file, "$nestlist$cmd");
48f30392 1172 # remember link
1173 $self->hyperlink([$line,$link]);
e3237417 1174 }
1175 elsif($cmd =~ /[BCFIS]/) {
1176 # add the guts
1177 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1178 }
66aff6dd 1179 elsif($cmd eq 'Z') {
1180 if(length($contents->raw_text())) {
1181 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1182 -severity => 'ERROR',
1183 -msg => 'Nonempty Z<>'});
66aff6dd 1184 }
1185 }
48f30392 1186 elsif($cmd eq 'X') {
1187 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1188 if($idx =~ /^\s*$/s) {
1189 $self->poderror({ -line => $line, -file => $file,
1bc4b319 1190 -severity => 'ERROR',
1191 -msg => 'Empty X<>'});
48f30392 1192 }
1193 else {
1194 # remember this node
1195 $self->idx($idx);
1196 }
1197 }
1198 else {
1199 # not reached
1bc4b319 1200 croak 'internal error';
e3237417 1201 }
1202 }
1203 $text;
1204}
1205
e2c3adef 1206# process a block of verbatim text
1bc4b319 1207sub verbatim {
66aff6dd 1208 ## Nothing particular to check
e2c3adef 1209 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1210
1211 $self->_preproc_par($paragraph);
1212
e2c3adef 1213 if($self->{_current_head1} eq 'NAME') {
1214 my ($file, $line) = $pod_para->file_line;
1215 $self->poderror({ -line => $line, -file => $file,
1216 -severity => 'WARNING',
1217 -msg => 'Verbatim paragraph in NAME section' });
1218 }
360aca43 1219}
1220
e2c3adef 1221# process a block of regular text
1bc4b319 1222sub textblock {
360aca43 1223 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1224 my ($file, $line) = $pod_para->file_line;
e3237417 1225
66aff6dd 1226 $self->_preproc_par($paragraph);
1227
e2c3adef 1228 # skip this paragraph if in a =begin block
1229 unless($self->{_have_begin}) {
1230 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1231 if($self->{_current_head1} eq 'NAME') {
1232 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1233 # this is the canonical name
1234 $self->{-name} = $1 unless(defined $self->{-name});
1235 }
e3237417 1236 }
1237 }
e3237417 1238}
1239
66aff6dd 1240sub _preproc_par
1241{
1242 my $self = shift;
1243 $_[0] =~ s/[\s\n]+$//;
1244 if($_[0]) {
1245 $self->{_commands_in_head}++;
1246 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1247 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1248 $self->{_list_stack}->[0]->{_has_par} = 1;
1249 }
1250 }
1251}
1252
e3237417 12531;
66aff6dd 1254
48f30392 1255__END__
1256
1257=head1 AUTHOR
1258
aaa799f9 1259Please report bugs using L<http://rt.cpan.org>.
1260
48f30392 1261Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
aaa799f9 1262Marek Rouchal E<lt>marekr@cpan.orgE<gt>
48f30392 1263
1264Based on code for B<Pod::Text::pod2text()> written by
1265Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1266
1267=cut
1268