Typos in *.p[lm]
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
7d8277e2 13$VERSION = 1.42; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef 16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43 18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43 31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43 36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef 39=head2 podchecker()
40
41This function can take a hash of options:
e3237417 42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a 47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417 49
50=back
360aca43 51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
c23d1eb0 56Curious/ambitious users are welcome to propose additional features they wish
57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
58consistent with L<perlpod>.
360aca43 59
48f30392 60The following checks are currently preformed:
e3237417 61
62=over 4
63
64=item *
65
48f30392 66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef 67and unterminated interior sequences.
68
69=item *
70
71Check for proper balancing of C<=begin> and C<=end>. The contents of such
72a block are generally ignored, i.e. no syntax checks are performed.
e3237417 73
74=item *
75
76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
77
78=item *
79
e2c3adef 80Check for same nested interior-sequences (e.g.
81C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 82
83=item *
84
e2c3adef 85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417 86
87=item *
88
e2c3adef 89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
90for details.
e3237417 91
92=item *
93
e2c3adef 94Check for unresolved document-internal links. This check may also reveal
95misspelled links that seem to be internal links but should be links
96to something else.
e3237417 97
98=back
99
e2c3adef 100=head1 DIAGNOSTICS
e3237417 101
e2c3adef 102=head2 Errors
e3237417 103
104=over 4
105
66aff6dd 106=item * empty =headn
107
108A heading (C<=head1> or C<=head2>) without any text? That ain't no
109heading!
110
e2c3adef 111=item * =over on line I<N> without closing =back
e3237417 112
e2c3adef 113The C<=over> command does not have a corresponding C<=back> before the
114next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 115
e2c3adef 116=item * =item without previous =over
e3237417 117
e2c3adef 118=item * =back without previous =over
e3237417 119
e2c3adef 120An C<=item> or C<=back> command has been found outside a
121C<=over>/C<=back> block.
e3237417 122
e2c3adef 123=item * No argument for =begin
e3237417 124
e2c3adef 125A C<=begin> command was found that is not followed by the formatter
126specification.
e3237417 127
e2c3adef 128=item * =end without =begin
e3237417 129
e2c3adef 130A standalone C<=end> command was found.
131
132=item * Nested =begin's
133
66aff6dd 134There were at least two consecutive C<=begin> commands without
e2c3adef 135the corresponding C<=end>. Only one C<=begin> may be active at
136a time.
137
138=item * =for without formatter specification
e3237417 139
e2c3adef 140There is no specification of the formatter after the C<=for> command.
141
142=item * unresolved internal link I<NAME>
143
144The given link to I<NAME> does not have a matching node in the current
3c4b39be 145POD. This also happens when a single word node name is not enclosed in
e2c3adef 146C<"">.
147
148=item * Unknown command "I<CMD>"
149
150An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d 151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
152C<=for>, C<=pod>, C<=cut>
e2c3adef 153
154=item * Unknown interior-sequence "I<SEQ>"
155
156An invalid markup command has been encountered. Valid are:
157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
159C<ZE<lt>E<gt>>
160
161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
162
163Two nested identical markup commands have been found. Generally this
164does not make sense.
165
166=item * garbled entity I<STRING>
167
66aff6dd 168The I<STRING> found cannot be interpreted as a character entity.
169
170=item * Entity number out of range
171
172An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 173
174=item * malformed link LE<lt>E<gt>
175
176The link found cannot be parsed because it does not conform to the
177syntax described in L<perlpod>.
e3237417 178
66aff6dd 179=item * nonempty ZE<lt>E<gt>
180
181The C<ZE<lt>E<gt>> sequence is supposed to be empty.
182
48f30392 183=item * empty XE<lt>E<gt>
184
185The index entry specified contains nothing but whitespace.
186
66aff6dd 187=item * Spurious text after =pod / =cut
188
189The commands C<=pod> and C<=cut> do not take any arguments.
190
191=item * Spurious character(s) after =back
192
193The C<=back> command does not take any arguments.
194
e3237417 195=back
196
e2c3adef 197=head2 Warnings
e3237417 198
e2c3adef 199These may not necessarily cause trouble, but indicate mediocre style.
200
201=over 4
202
fb8eeed8 203=item * multiple occurrence of link target I<name>
66aff6dd 204
205The POD file has some C<=item> and/or C<=head> commands that have
206the same text. Potential hyperlinks to such a text cannot be unique then.
d5c61f7c 207This warning is printed only with warning level greater than one.
66aff6dd 208
209=item * line containing nothing but whitespace in paragraph
210
211There is some whitespace on a seemingly empty line. POD is very sensitive
212to such things, so this is flagged. B<vi> users switch on the B<list>
213option to avoid this problem.
214
92e3d63a 215=begin _disabled_
216
66aff6dd 217=item * file does not start with =head
218
219The file starts with a different POD directive than head.
220This is most probably something you do not want.
221
92e3d63a 222=end _disabled_
223
66aff6dd 224=item * previous =item has no contents
e2c3adef 225
66aff6dd 226There is a list C<=item> right above the flagged line that has no
227text contents. You probably want to delete empty items.
228
229=item * preceding non-item paragraph(s)
230
231A list introduced by C<=over> starts with a text or verbatim paragraph,
232but continues with C<=item>s. Move the non-item paragraph out of the
233C<=over>/C<=back> block.
234
235=item * =item type mismatch (I<one> vs. I<two>)
236
3c4b39be 237A list started with e.g. a bulleted C<=item> and continued with a
66aff6dd 238numbered one. This is obviously inconsistent. For most translators the
239type of the I<first> C<=item> determines the type of the list.
e2c3adef 240
241=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
242
243Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
244can potentially cause errors as they could be misinterpreted as
92e3d63a 245markup commands. This is only printed when the -warnings level is
246greater than 1.
e2c3adef 247
66aff6dd 248=item * Unknown entity
e2c3adef 249
250A character entity was found that does not belong to the standard
66aff6dd 251ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 252
253=item * No items in =over
254
66aff6dd 255The list opened with C<=over> does not contain any items.
e2c3adef 256
257=item * No argument for =item
258
259C<=item> without any parameters is deprecated. It should either be followed
260by C<*> to indicate an unordered list, by a number (optionally followed
261by a dot) to indicate an ordered (numbered) list or simple text for a
262definition list.
263
66aff6dd 264=item * empty section in previous paragraph
265
266The previous section (introduced by a C<=head> command) does not contain
267any text. This usually indicates that something is missing. Note: A
268C<=head1> followed immediately by C<=head2> does not trigger this warning.
269
e2c3adef 270=item * Verbatim paragraph in NAME section
271
272The NAME section (C<=head1 NAME>) should consist of a single paragraph
273with the script/module name, followed by a dash `-' and a very short
274description of what the thing is good for.
275
9c6ed6d7 276=item * =headI<n> without preceding higher level
277
278For example if there is a C<=head2> in the POD file prior to a
279C<=head1>.
280
92e3d63a 281=back
282
283=head2 Hyperlinks
284
285There are some warnings wrt. malformed hyperlinks.
286
287=over 4
288
92e3d63a 289=item * ignoring leading/trailing whitespace in link
290
291There is whitespace at the beginning or the end of the contents of
292LE<lt>...E<gt>.
293
294=item * (section) in '$page' deprecated
295
296There is a section detected in the page name of LE<lt>...E<gt>, e.g.
ad838556 297C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
92e3d63a 298Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
299to expand this to appropriate code. For links to (builtin) functions,
300please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
301
302=item * alternative text/node '%s' contains non-escaped | or /
303
304The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
305Although the hyperlink parser does its best to determine which "/" is
306text and which is a delimiter in case of doubt, one ought to escape
307these literal characters like this:
308
309 / E<sol>
310 | E<verbar>
e2c3adef 311
312=back
e3237417 313
314=head1 RETURN VALUE
315
316B<podchecker> returns the number of POD syntax errors found or -1 if
317there were no POD commands at all found in the file.
318
360aca43 319=head1 EXAMPLES
320
c23d1eb0 321See L</SYNOPSIS>
360aca43 322
48f30392 323=head1 INTERFACE
360aca43 324
48f30392 325While checking, this module collects document properties, e.g. the nodes
326for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
327POD translators can use this feature to syntax-check and get the nodes in
328a first pass before actually starting to convert. This is expensive in terms
329of execution time, but allows for very robust conversions.
360aca43 330
c23d1eb0 331Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
332method to print errors and warnings. The summary output (e.g.
333"Pod syntax OK") has been dropped from the module and has been included in
334B<podchecker> (the script). This allows users of B<Pod::Checker> to
335control completely the output behaviour. Users of B<podchecker> (the script)
336get the well-known behaviour.
337
360aca43 338=cut
339
340#############################################################################
341
342use strict;
343#use diagnostics;
344use Carp;
345use Exporter;
346use Pod::Parser;
347
348use vars qw(@ISA @EXPORT);
349@ISA = qw(Pod::Parser);
350@EXPORT = qw(&podchecker);
351
352use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
353
354my %VALID_COMMANDS = (
355 'pod' => 1,
356 'cut' => 1,
357 'head1' => 1,
358 'head2' => 1,
c6b85e5d 359 'head3' => 1,
360 'head4' => 1,
360aca43 361 'over' => 1,
362 'back' => 1,
363 'item' => 1,
364 'for' => 1,
365 'begin' => 1,
366 'end' => 1,
367);
368
369my %VALID_SEQUENCES = (
370 'I' => 1,
371 'B' => 1,
372 'S' => 1,
373 'C' => 1,
374 'L' => 1,
375 'F' => 1,
376 'X' => 1,
377 'Z' => 1,
378 'E' => 1,
379);
380
e2c3adef 381# stolen from HTML::Entities
382my %ENTITIES = (
383 # Some normal chars that have special meaning in SGML context
384 amp => '&', # ampersand
385'gt' => '>', # greater than
386'lt' => '<', # less than
387 quot => '"', # double quote
388
389 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
390 AElig => 'Æ', # capital AE diphthong (ligature)
391 Aacute => 'Á', # capital A, acute accent
392 Acirc => 'Â', # capital A, circumflex accent
393 Agrave => 'À', # capital A, grave accent
394 Aring => 'Å', # capital A, ring
395 Atilde => 'Ã', # capital A, tilde
396 Auml => 'Ä', # capital A, dieresis or umlaut mark
397 Ccedil => 'Ç', # capital C, cedilla
398 ETH => 'Ð', # capital Eth, Icelandic
399 Eacute => 'É', # capital E, acute accent
400 Ecirc => 'Ê', # capital E, circumflex accent
401 Egrave => 'È', # capital E, grave accent
402 Euml => 'Ë', # capital E, dieresis or umlaut mark
403 Iacute => 'Í', # capital I, acute accent
404 Icirc => 'Î', # capital I, circumflex accent
405 Igrave => 'Ì', # capital I, grave accent
406 Iuml => 'Ï', # capital I, dieresis or umlaut mark
407 Ntilde => 'Ñ', # capital N, tilde
408 Oacute => 'Ó', # capital O, acute accent
409 Ocirc => 'Ô', # capital O, circumflex accent
410 Ograve => 'Ò', # capital O, grave accent
411 Oslash => 'Ø', # capital O, slash
412 Otilde => 'Õ', # capital O, tilde
413 Ouml => 'Ö', # capital O, dieresis or umlaut mark
414 THORN => 'Þ', # capital THORN, Icelandic
415 Uacute => 'Ú', # capital U, acute accent
416 Ucirc => 'Û', # capital U, circumflex accent
417 Ugrave => 'Ù', # capital U, grave accent
418 Uuml => 'Ü', # capital U, dieresis or umlaut mark
419 Yacute => 'Ý', # capital Y, acute accent
420 aacute => 'á', # small a, acute accent
421 acirc => 'â', # small a, circumflex accent
422 aelig => 'æ', # small ae diphthong (ligature)
423 agrave => 'à', # small a, grave accent
424 aring => 'å', # small a, ring
425 atilde => 'ã', # small a, tilde
426 auml => 'ä', # small a, dieresis or umlaut mark
427 ccedil => 'ç', # small c, cedilla
428 eacute => 'é', # small e, acute accent
429 ecirc => 'ê', # small e, circumflex accent
430 egrave => 'è', # small e, grave accent
431 eth => 'ð', # small eth, Icelandic
432 euml => 'ë', # small e, dieresis or umlaut mark
433 iacute => 'í', # small i, acute accent
434 icirc => 'î', # small i, circumflex accent
435 igrave => 'ì', # small i, grave accent
436 iuml => 'ï', # small i, dieresis or umlaut mark
437 ntilde => 'ñ', # small n, tilde
438 oacute => 'ó', # small o, acute accent
439 ocirc => 'ô', # small o, circumflex accent
440 ograve => 'ò', # small o, grave accent
441 oslash => 'ø', # small o, slash
442 otilde => 'õ', # small o, tilde
443 ouml => 'ö', # small o, dieresis or umlaut mark
444 szlig => 'ß', # small sharp s, German (sz ligature)
445 thorn => 'þ', # small thorn, Icelandic
446 uacute => 'ú', # small u, acute accent
447 ucirc => 'û', # small u, circumflex accent
448 ugrave => 'ù', # small u, grave accent
449 uuml => 'ü', # small u, dieresis or umlaut mark
450 yacute => 'ý', # small y, acute accent
451 yuml => 'ÿ', # small y, dieresis or umlaut mark
452
453 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
454 copy => '©', # copyright sign
455 reg => '®', # registered sign
456 nbsp => "\240", # non breaking space
457
458 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
459 iexcl => '¡',
460 cent => '¢',
461 pound => '£',
462 curren => '¤',
463 yen => '¥',
464 brvbar => '¦',
465 sect => '§',
466 uml => '¨',
467 ordf => 'ª',
468 laquo => '«',
469'not' => '¬', # not is a keyword in perl
470 shy => '­',
471 macr => '¯',
472 deg => '°',
473 plusmn => '±',
474 sup1 => '¹',
475 sup2 => '²',
476 sup3 => '³',
477 acute => '´',
478 micro => 'µ',
479 para => '¶',
480 middot => '·',
481 cedil => '¸',
482 ordm => 'º',
483 raquo => '»',
484 frac14 => '¼',
485 frac12 => '½',
486 frac34 => '¾',
487 iquest => '¿',
488'times' => '×', # times is a keyword in perl
489 divide => '÷',
66aff6dd 490
491# some POD special entities
492 verbar => '|',
493 sol => '/'
e2c3adef 494);
495
360aca43 496##---------------------------------------------------------------------------
497
498##---------------------------------
499## Function definitions begin here
500##---------------------------------
501
e3237417 502sub podchecker( $ ; $ % ) {
503 my ($infile, $outfile, %options) = @_;
360aca43 504 local $_;
505
506 ## Set defaults
507 $infile ||= \*STDIN;
508 $outfile ||= \*STDERR;
509
510 ## Now create a pod checker
e3237417 511 my $checker = new Pod::Checker(%options);
360aca43 512
513 ## Now check the pod document for errors
514 $checker->parse_from_file($infile, $outfile);
48f30392 515
360aca43 516 ## Return the number of errors found
517 return $checker->num_errors();
518}
519
520##---------------------------------------------------------------------------
521
522##-------------------------------
523## Method definitions begin here
524##-------------------------------
525
92e3d63a 526##################################
527
528=over 4
529
530=item C<Pod::Checker-E<gt>new( %options )>
531
532Return a reference to a new Pod::Checker object that inherits from
533Pod::Parser and is used for calling the required methods later. The
534following options are recognized:
535
536C<-warnings =E<gt> num>
537 Print warnings if C<num> is true. The higher the value of C<num>,
538the more warnings are printed. Currently there are only levels 1 and 2.
539
540C<-quiet =E<gt> num>
541 If C<num> is true, do not print any errors/warnings. This is useful
542when Pod::Checker is used to munge POD code into plain text from within
543POD formatters.
544
545=cut
546
66aff6dd 547## sub new {
548## my $this = shift;
549## my $class = ref($this) || $this;
550## my %params = @_;
551## my $self = {%params};
552## bless $self, $class;
553## $self->initialize();
554## return $self;
555## }
360aca43 556
557sub initialize {
558 my $self = shift;
664bb207 559 ## Initialize number of errors, and setup an error function to
560 ## increment this number and then print to the designated output.
561 $self->{_NUM_ERRORS} = 0;
9c6ed6d7 562 $self->{_NUM_WARNINGS} = 0;
92e3d63a 563 $self->{-quiet} ||= 0;
564 # set the error handling subroutine
565 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417 566 $self->{_commands} = 0; # total number of POD commands encountered
567 $self->{_list_stack} = []; # stack for nested lists
568 $self->{_have_begin} = ''; # stores =begin
569 $self->{_links} = []; # stack for internal hyperlinks
570 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 571 $self->{_index} = []; # text in X<>
e2c3adef 572 # print warnings?
e3237417 573 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 574 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 575 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207 576}
577
48f30392 578##################################
579
48f30392 580=item C<$checker-E<gt>poderror( @args )>
581
582=item C<$checker-E<gt>poderror( {%opts}, @args )>
583
584Internal method for printing errors and warnings. If no options are
585given, simply prints "@_". The following options are recognized and used
586to form the output:
587
588 -msg
589
590A message to print prior to C<@args>.
591
592 -line
593
594The line number the error occurred in.
595
596 -file
597
598The file (name) the error occurred in.
599
600 -severity
601
602The error level, should be 'WARNING' or 'ERROR'.
603
604=cut
605
e2c3adef 606# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 607sub poderror {
608 my $self = shift;
609 my %opts = (ref $_[0]) ? %{shift()} : ();
610
611 ## Retrieve options
612 chomp( my $msg = ($opts{-msg} || "")."@_" );
613 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
614 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd 615 unless (exists $opts{-severity}) {
616 ## See if can find severity in message prefix
617 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
618 }
664bb207 619 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
620
e3237417 621 ## Increment error count and print message "
622 ++($self->{_NUM_ERRORS})
623 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
9c6ed6d7 624 ++($self->{_NUM_WARNINGS})
625 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
92e3d63a 626 my $out_fh = $self->output_handle() || \*STDERR;
e2c3adef 627 print $out_fh ($severity, $msg, $line, $file, "\n")
628 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43 629}
630
48f30392 631##################################
632
633=item C<$checker-E<gt>num_errors()>
634
635Set (if argument specified) and retrieve the number of errors found.
636
637=cut
638
360aca43 639sub num_errors {
640 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
641}
642
48f30392 643##################################
644
9c6ed6d7 645=item C<$checker-E<gt>num_warnings()>
646
647Set (if argument specified) and retrieve the number of warnings found.
648
649=cut
650
651sub num_warnings {
652 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
653}
654
655##################################
656
48f30392 657=item C<$checker-E<gt>name()>
658
659Set (if argument specified) and retrieve the canonical name of POD as
660found in the C<=head1 NAME> section.
661
662=cut
663
e2c3adef 664sub name {
665 return (@_ > 1 && $_[1]) ?
666 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
667}
668
48f30392 669##################################
670
671=item C<$checker-E<gt>node()>
672
673Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
674and C<=item>) of the current POD. The nodes are returned in the order of
fb8eeed8 675their occurrence. They consist of plain text, each piece of whitespace is
48f30392 676collapsed to a single blank.
677
678=cut
679
e2c3adef 680sub node {
681 my ($self,$text) = @_;
682 if(defined $text) {
66aff6dd 683 $text =~ s/\s+$//s; # strip trailing whitespace
684 $text =~ s/\s+/ /gs; # collapse whitespace
685 # add node, order important!
e2c3adef 686 push(@{$self->{_nodes}}, $text);
66aff6dd 687 # keep also a uniqueness counter
48f30392 688 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef 689 return $text;
690 }
691 @{$self->{_nodes}};
692}
693
48f30392 694##################################
695
696=item C<$checker-E<gt>idx()>
697
698Add (if argument specified) and retrieve the index entries (as defined by
699C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
700of whitespace is collapsed to a single blank.
701
702=cut
703
704# set/return index entries of current POD
705sub idx {
706 my ($self,$text) = @_;
707 if(defined $text) {
708 $text =~ s/\s+$//s; # strip trailing whitespace
709 $text =~ s/\s+/ /gs; # collapse whitespace
710 # add node, order important!
711 push(@{$self->{_index}}, $text);
712 # keep also a uniqueness counter
713 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
714 return $text;
715 }
716 @{$self->{_index}};
717}
718
719##################################
720
721=item C<$checker-E<gt>hyperlink()>
722
723Add (if argument specified) and retrieve the hyperlinks (as defined by
d1be9408 724C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
48f30392 725number and C<Pod::Hyperlink> object.
726
727=back
728
729=cut
730
e2c3adef 731# set/return hyperlinks of the current POD
732sub hyperlink {
733 my $self = shift;
734 if($_[0]) {
735 push(@{$self->{_links}}, $_[0]);
736 return $_[0];
737 }
738 @{$self->{_links}};
739}
740
e3237417 741## overrides for Pod::Parser
742
360aca43 743sub end_pod {
66aff6dd 744 ## Do some final checks and
745 ## print the number of errors found
746 my $self = shift;
747 my $infile = $self->input_file();
66aff6dd 748
749 if(@{$self->{_list_stack}}) {
66aff6dd 750 my $list;
751 while(($list = $self->_close_list('EOF',$infile)) &&
752 $list->indent() ne 'auto') {
753 $self->poderror({ -line => 'EOF', -file => $infile,
754 -severity => 'ERROR', -msg => "=over on line " .
755 $list->start() . " without closing =back" }); #"
756 }
757 }
758
759 # check validity of document internal hyperlinks
760 # first build the node names from the paragraph text
761 my %nodes;
762 foreach($self->node()) {
763 $nodes{$_} = 1;
92e3d63a 764 if(/^(\S+)\s+\S/) {
66aff6dd 765 # we have more than one word. Use the first as a node, too.
766 # This is used heavily in perlfunc.pod
767 $nodes{$1} ||= 2; # derived node
768 }
769 }
92e3d63a 770 foreach($self->idx()) {
771 $nodes{$_} = 3; # index node
772 }
66aff6dd 773 foreach($self->hyperlink()) {
48f30392 774 my ($line,$link) = @$_;
775 # _TODO_ what if there is a link to the page itself by the name,
776 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
777 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
778 my $node = $self->_check_ptree($self->parse_text($link->node(),
779 $line), $line, $infile, 'L');
780 if($node && !$nodes{$node}) {
781 $self->poderror({ -line => $line || '', -file => $infile,
782 -severity => 'ERROR',
783 -msg => "unresolved internal link '$node'"});
784 }
66aff6dd 785 }
786 }
48f30392 787
788 # check the internal nodes for uniqueness. This pertains to
789 # =headX, =item and X<...>
d5c61f7c 790 if($self->{-warnings} && $self->{-warnings}>1) {
791 foreach(grep($self->{_unique_nodes}->{$_} > 1,
792 keys %{$self->{_unique_nodes}})) {
793 $self->poderror({ -line => '-', -file => $infile,
66aff6dd 794 -severity => 'WARNING',
fb8eeed8 795 -msg => "multiple occurrence of link target '$_'"});
d5c61f7c 796 }
66aff6dd 797 }
798
c23d1eb0 799 # no POD found here
800 $self->num_errors(-1) if($self->{_commands} == 0);
360aca43 801}
802
e2c3adef 803# check a POD command directive
360aca43 804sub command {
664bb207 805 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 806 my ($file, $line) = $pod_para->file_line;
360aca43 807 ## Check the command syntax
e3237417 808 my $arg; # this will hold the command argument
664bb207 809 if (! $VALID_COMMANDS{$cmd}) {
810 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 811 -msg => "Unknown command '$cmd'" });
360aca43 812 }
92e3d63a 813 else { # found a valid command
814 $self->{_commands}++; # delete this line if below is enabled again
815
816 ##### following check disabled due to strong request
817 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
818 # $self->poderror({ -line => $line, -file => $file,
819 # -severity => 'WARNING',
820 # -msg => "file does not start with =head" });
821 #}
822
823 # check syntax of particular command
e3237417 824 if($cmd eq 'over') {
e2c3adef 825 # check for argument
826 $arg = $self->interpolate_and_check($paragraph, $line,$file);
827 my $indent = 4; # default
828 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
829 $indent = $1;
e2c3adef 830 }
e3237417 831 # start a new list
66aff6dd 832 $self->_open_list($indent,$line,$file);
e3237417 833 }
834 elsif($cmd eq 'item') {
e2c3adef 835 # are we in a list?
e3237417 836 unless(@{$self->{_list_stack}}) {
837 $self->poderror({ -line => $line, -file => $file,
838 -severity => 'ERROR',
839 -msg => "=item without previous =over" });
e2c3adef 840 # auto-open in case we encounter many more
66aff6dd 841 $self->_open_list('auto',$line,$file);
842 }
843 my $list = $self->{_list_stack}->[0];
844 # check whether the previous item had some contents
845 if(defined $self->{_list_item_contents} &&
846 $self->{_list_item_contents} == 0) {
847 $self->poderror({ -line => $line, -file => $file,
848 -severity => 'WARNING',
849 -msg => "previous =item has no contents" });
850 }
851 if($list->{_has_par}) {
852 $self->poderror({ -line => $line, -file => $file,
853 -severity => 'WARNING',
854 -msg => "preceding non-item paragraph(s)" });
855 delete $list->{_has_par};
e3237417 856 }
e2c3adef 857 # check for argument
858 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 859 if($arg && $arg =~ /(\S+)/) {
860 $arg =~ s/[\s\n]+$//;
861 my $type;
862 if($arg =~ /^[*]\s*(\S*.*)/) {
863 $type = 'bullet';
864 $self->{_list_item_contents} = $1 ? 1 : 0;
865 $arg = $1;
866 }
867 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
868 $type = 'number';
869 $self->{_list_item_contents} = $1 ? 1 : 0;
870 $arg = $1;
871 }
872 else {
873 $type = 'definition';
874 $self->{_list_item_contents} = 1;
875 }
876 my $first = $list->type();
877 if($first && $first ne $type) {
878 $self->poderror({ -line => $line, -file => $file,
879 -severity => 'WARNING',
880 -msg => "=item type mismatch ('$first' vs. '$type')"});
881 }
882 else { # first item
883 $list->type($type);
884 }
885 }
886 else {
e2c3adef 887 $self->poderror({ -line => $line, -file => $file,
888 -severity => 'WARNING',
889 -msg => "No argument for =item" });
890 $arg = ' '; # empty
66aff6dd 891 $self->{_list_item_contents} = 0;
e3237417 892 }
e2c3adef 893 # add this item
66aff6dd 894 $list->item($arg);
e2c3adef 895 # remember this node
896 $self->node($arg);
e3237417 897 }
898 elsif($cmd eq 'back') {
899 # check if we have an open list
900 unless(@{$self->{_list_stack}}) {
901 $self->poderror({ -line => $line, -file => $file,
902 -severity => 'ERROR',
903 -msg => "=back without previous =over" });
904 }
905 else {
906 # check for spurious characters
e2c3adef 907 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 908 if($arg && $arg =~ /\S/) {
909 $self->poderror({ -line => $line, -file => $file,
66aff6dd 910 -severity => 'ERROR',
e3237417 911 -msg => "Spurious character(s) after =back" });
912 }
913 # close list
66aff6dd 914 my $list = $self->_close_list($line,$file);
e3237417 915 # check for empty lists
916 if(!$list->item() && $self->{-warnings}) {
917 $self->poderror({ -line => $line, -file => $file,
918 -severity => 'WARNING',
919 -msg => "No items in =over (at line " .
920 $list->start() . ") / =back list"}); #"
921 }
922 }
923 }
66aff6dd 924 elsif($cmd =~ /^head(\d+)/) {
9c6ed6d7 925 my $hnum = $1;
926 $self->{"_have_head_$hnum"}++; # count head types
927 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) {
928 $self->poderror({ -line => $line, -file => $file,
929 -severity => 'WARNING',
930 -msg => "=head$hnum without preceding higher level"});
931 }
48f30392 932 # check whether the previous =head section had some contents
66aff6dd 933 if(defined $self->{_commands_in_head} &&
934 $self->{_commands_in_head} == 0 &&
935 defined $self->{_last_head} &&
9c6ed6d7 936 $self->{_last_head} >= $hnum) {
66aff6dd 937 $self->poderror({ -line => $line, -file => $file,
938 -severity => 'WARNING',
939 -msg => "empty section in previous paragraph"});
940 }
941 $self->{_commands_in_head} = -1;
9c6ed6d7 942 $self->{_last_head} = $hnum;
e3237417 943 # check if there is an open list
944 if(@{$self->{_list_stack}}) {
945 my $list;
66aff6dd 946 while(($list = $self->_close_list($line,$file)) &&
947 $list->indent() ne 'auto') {
e3237417 948 $self->poderror({ -line => $line, -file => $file,
949 -severity => 'ERROR',
e2c3adef 950 -msg => "=over on line ". $list->start() .
951 " without closing =back (at $cmd)" });
e3237417 952 }
953 }
954 # remember this node
e2c3adef 955 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 956 $arg =~ s/[\s\n]+$//s;
957 $self->node($arg);
958 unless(length($arg)) {
959 $self->poderror({ -line => $line, -file => $file,
960 -severity => 'ERROR',
961 -msg => "empty =$cmd"});
962 }
e2c3adef 963 if($cmd eq 'head1') {
e2c3adef 964 $self->{_current_head1} = $arg;
965 } else {
966 $self->{_current_head1} = '';
967 }
e3237417 968 }
969 elsif($cmd eq 'begin') {
970 if($self->{_have_begin}) {
971 # already have a begin
972 $self->poderror({ -line => $line, -file => $file,
973 -severity => 'ERROR',
974 -msg => "Nested =begin's (first at line " .
975 $self->{_have_begin} . ")"});
976 }
977 else {
978 # check for argument
e2c3adef 979 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 980 unless($arg && $arg =~ /(\S+)/) {
981 $self->poderror({ -line => $line, -file => $file,
e2c3adef 982 -severity => 'ERROR',
e3237417 983 -msg => "No argument for =begin"});
984 }
985 # remember the =begin
986 $self->{_have_begin} = "$line:$1";
987 }
988 }
989 elsif($cmd eq 'end') {
990 if($self->{_have_begin}) {
991 # close the existing =begin
992 $self->{_have_begin} = '';
993 # check for spurious characters
e2c3adef 994 $arg = $self->interpolate_and_check($paragraph, $line,$file);
995 # the closing argument is optional
996 #if($arg && $arg =~ /\S/) {
997 # $self->poderror({ -line => $line, -file => $file,
998 # -severity => 'WARNING',
999 # -msg => "Spurious character(s) after =end" });
1000 #}
e3237417 1001 }
1002 else {
1003 # don't have a matching =begin
1004 $self->poderror({ -line => $line, -file => $file,
e2c3adef 1005 -severity => 'ERROR',
e3237417 1006 -msg => "=end without =begin" });
1007 }
1008 }
e2c3adef 1009 elsif($cmd eq 'for') {
1010 unless($paragraph =~ /\s*(\S+)\s*/) {
1011 $self->poderror({ -line => $line, -file => $file,
1012 -severity => 'ERROR',
1013 -msg => "=for without formatter specification" });
1014 }
1015 $arg = ''; # do not expand paragraph below
1016 }
66aff6dd 1017 elsif($cmd =~ /^(pod|cut)$/) {
1018 # check for argument
1019 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1020 if($arg && $arg =~ /(\S+)/) {
1021 $self->poderror({ -line => $line, -file => $file,
1022 -severity => 'ERROR',
1023 -msg => "Spurious text after =$cmd"});
1024 }
1025 }
1026 $self->{_commands_in_head}++;
e3237417 1027 ## Check the interior sequences in the command-text
e2c3adef 1028 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1029 unless(defined $arg);
e2c3adef 1030 }
360aca43 1031}
1032
66aff6dd 1033sub _open_list
1034{
1035 my ($self,$indent,$line,$file) = @_;
1036 my $list = Pod::List->new(
1037 -indent => $indent,
1038 -start => $line,
1039 -file => $file);
1040 unshift(@{$self->{_list_stack}}, $list);
1041 undef $self->{_list_item_contents};
1042 $list;
1043}
1044
1045sub _close_list
1046{
1047 my ($self,$line,$file) = @_;
1048 my $list = shift(@{$self->{_list_stack}});
1049 if(defined $self->{_list_item_contents} &&
1050 $self->{_list_item_contents} == 0) {
1051 $self->poderror({ -line => $line, -file => $file,
1052 -severity => 'WARNING',
1053 -msg => "previous =item has no contents" });
1054 }
1055 undef $self->{_list_item_contents};
1056 $list;
1057}
1058
e2c3adef 1059# process a block of some text
1060sub interpolate_and_check {
e3237417 1061 my ($self, $paragraph, $line, $file) = @_;
1062 ## Check the interior sequences in the command-text
1063 # and return the text
1064 $self->_check_ptree(
1065 $self->parse_text($paragraph,$line), $line, $file, '');
1066}
1067
1068sub _check_ptree {
1069 my ($self,$ptree,$line,$file,$nestlist) = @_;
1070 local($_);
1071 my $text = '';
1072 # process each node in the parse tree
1073 foreach(@$ptree) {
1074 # regular text chunk
1075 unless(ref) {
e3237417 1076 # count the unescaped angle brackets
92e3d63a 1077 # complain only when warning level is greater than 1
c23d1eb0 1078 if($self->{-warnings} && $self->{-warnings}>1) {
1079 my $count;
1080 if($count = tr/<>/<>/) {
e3237417 1081 $self->poderror({ -line => $line, -file => $file,
1082 -severity => 'WARNING',
c23d1eb0 1083 -msg => "$count unescaped <> in paragraph" });
1084 }
e3237417 1085 }
c23d1eb0 1086 $text .= $_;
e3237417 1087 next;
1088 }
1089 # have an interior sequence
1090 my $cmd = $_->cmd_name();
1091 my $contents = $_->parse_tree();
1092 ($file,$line) = $_->file_line();
1093 # check for valid tag
1094 if (! $VALID_SEQUENCES{$cmd}) {
1095 $self->poderror({ -line => $line, -file => $file,
1096 -severity => 'ERROR',
66aff6dd 1097 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 1098 # expand it anyway
1099 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1100 next;
1101 }
1102 if($nestlist =~ /$cmd/) {
1103 $self->poderror({ -line => $line, -file => $file,
1104 -severity => 'ERROR',
1105 -msg => "nested commands $cmd<...$cmd<...>...>"});
1106 # _TODO_ should we add the contents anyway?
1107 # expand it anyway, see below
1108 }
1109 if($cmd eq 'E') {
1110 # preserve entities
1111 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1112 $self->poderror({ -line => $line, -file => $file,
1113 -severity => 'ERROR',
1114 -msg => "garbled entity " . $_->raw_text()});
1115 next;
1116 }
e2c3adef 1117 my $ent = $$contents[0];
66aff6dd 1118 my $val;
1119 if($ent =~ /^0x[0-9a-f]+$/i) {
1120 # hexadec entity
1121 $val = hex($ent);
1122 }
1123 elsif($ent =~ /^0\d+$/) {
1124 # octal
1125 $val = oct($ent);
1126 }
1127 elsif($ent =~ /^\d+$/) {
e2c3adef 1128 # numeric entity
66aff6dd 1129 $val = $ent;
1130 }
1131 if(defined $val) {
1132 if($val>0 && $val<256) {
1133 $text .= chr($val);
1134 }
1135 else {
1136 $self->poderror({ -line => $line, -file => $file,
1137 -severity => 'ERROR',
1138 -msg => "Entity number out of range " . $_->raw_text()});
1139 }
e2c3adef 1140 }
1141 elsif($ENTITIES{$ent}) {
1142 # known ISO entity
1143 $text .= $ENTITIES{$ent};
1144 }
1145 else {
1146 $self->poderror({ -line => $line, -file => $file,
1147 -severity => 'WARNING',
66aff6dd 1148 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef 1149 $text .= "E<$ent>";
1150 }
e3237417 1151 }
1152 elsif($cmd eq 'L') {
1153 # try to parse the hyperlink
1154 my $link = Pod::Hyperlink->new($contents->raw_text());
1155 unless(defined $link) {
1156 $self->poderror({ -line => $line, -file => $file,
1157 -severity => 'ERROR',
e2c3adef 1158 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417 1159 next;
1160 }
1161 $link->line($line); # remember line
1162 if($self->{-warnings}) {
1163 foreach my $w ($link->warning()) {
1164 $self->poderror({ -line => $line, -file => $file,
1165 -severity => 'WARNING',
1166 -msg => $w });
1167 }
1168 }
1169 # check the link text
1170 $text .= $self->_check_ptree($self->parse_text($link->text(),
1171 $line), $line, $file, "$nestlist$cmd");
48f30392 1172 # remember link
1173 $self->hyperlink([$line,$link]);
e3237417 1174 }
1175 elsif($cmd =~ /[BCFIS]/) {
1176 # add the guts
1177 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1178 }
66aff6dd 1179 elsif($cmd eq 'Z') {
1180 if(length($contents->raw_text())) {
1181 $self->poderror({ -line => $line, -file => $file,
1182 -severity => 'ERROR',
1183 -msg => "Nonempty Z<>"});
1184 }
1185 }
48f30392 1186 elsif($cmd eq 'X') {
1187 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1188 if($idx =~ /^\s*$/s) {
1189 $self->poderror({ -line => $line, -file => $file,
1190 -severity => 'ERROR',
1191 -msg => "Empty X<>"});
1192 }
1193 else {
1194 # remember this node
1195 $self->idx($idx);
1196 }
1197 }
1198 else {
1199 # not reached
1200 die "internal error";
e3237417 1201 }
1202 }
1203 $text;
1204}
1205
e2c3adef 1206# process a block of verbatim text
360aca43 1207sub verbatim {
66aff6dd 1208 ## Nothing particular to check
e2c3adef 1209 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1210
1211 $self->_preproc_par($paragraph);
1212
e2c3adef 1213 if($self->{_current_head1} eq 'NAME') {
1214 my ($file, $line) = $pod_para->file_line;
1215 $self->poderror({ -line => $line, -file => $file,
1216 -severity => 'WARNING',
1217 -msg => 'Verbatim paragraph in NAME section' });
1218 }
360aca43 1219}
1220
e2c3adef 1221# process a block of regular text
360aca43 1222sub textblock {
1223 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1224 my ($file, $line) = $pod_para->file_line;
e3237417 1225
66aff6dd 1226 $self->_preproc_par($paragraph);
1227
e2c3adef 1228 # skip this paragraph if in a =begin block
1229 unless($self->{_have_begin}) {
1230 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1231 if($self->{_current_head1} eq 'NAME') {
1232 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1233 # this is the canonical name
1234 $self->{-name} = $1 unless(defined $self->{-name});
1235 }
e3237417 1236 }
1237 }
e3237417 1238}
1239
66aff6dd 1240sub _preproc_par
1241{
1242 my $self = shift;
1243 $_[0] =~ s/[\s\n]+$//;
1244 if($_[0]) {
1245 $self->{_commands_in_head}++;
1246 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1247 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1248 $self->{_list_stack}->[0]->{_has_par} = 1;
1249 }
1250 }
1251}
1252
e3237417 12531;
66aff6dd 1254
48f30392 1255__END__
1256
1257=head1 AUTHOR
1258
aaa799f9 1259Please report bugs using L<http://rt.cpan.org>.
1260
48f30392 1261Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
aaa799f9 1262Marek Rouchal E<lt>marekr@cpan.orgE<gt>
48f30392 1263
1264Based on code for B<Pod::Text::pod2text()> written by
1265Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1266
1267=cut
1268