Integrate with Sarathy.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
664bb207 4# Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
48f30392 13$VERSION = 1.098; ## Current version of this package
360aca43 14require 5.004; ## requires this Perl version or later
15
e2c3adef 16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43 18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43 31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43 36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef 39=head2 podchecker()
40
41This function can take a hash of options:
e3237417 42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
47Turn warnings on/off. See L<"Warnings">.
48
49=back
360aca43 50
51=head1 DESCRIPTION
52
53B<podchecker> will perform syntax checking of Perl5 POD format documentation.
54
e2c3adef 55I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43 56
57It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef 58additional features they wish to see in B<Pod::Checker> and B<podchecker>
59and verify that the checks are consistent with L<perlpod>.
360aca43 60
48f30392 61The following checks are currently preformed:
e3237417 62
63=over 4
64
65=item *
66
48f30392 67Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef 68and unterminated interior sequences.
69
70=item *
71
72Check for proper balancing of C<=begin> and C<=end>. The contents of such
73a block are generally ignored, i.e. no syntax checks are performed.
e3237417 74
75=item *
76
77Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
78
79=item *
80
e2c3adef 81Check for same nested interior-sequences (e.g.
82C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 83
84=item *
85
e2c3adef 86Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417 87
88=item *
89
e2c3adef 90Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
91for details.
e3237417 92
93=item *
94
e2c3adef 95Check for unresolved document-internal links. This check may also reveal
96misspelled links that seem to be internal links but should be links
97to something else.
e3237417 98
99=back
100
e2c3adef 101=head1 DIAGNOSTICS
e3237417 102
e2c3adef 103=head2 Errors
e3237417 104
105=over 4
106
66aff6dd 107=item * empty =headn
108
109A heading (C<=head1> or C<=head2>) without any text? That ain't no
110heading!
111
e2c3adef 112=item * =over on line I<N> without closing =back
e3237417 113
e2c3adef 114The C<=over> command does not have a corresponding C<=back> before the
115next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 116
e2c3adef 117=item * =item without previous =over
e3237417 118
e2c3adef 119=item * =back without previous =over
e3237417 120
e2c3adef 121An C<=item> or C<=back> command has been found outside a
122C<=over>/C<=back> block.
e3237417 123
e2c3adef 124=item * No argument for =begin
e3237417 125
e2c3adef 126A C<=begin> command was found that is not followed by the formatter
127specification.
e3237417 128
e2c3adef 129=item * =end without =begin
e3237417 130
e2c3adef 131A standalone C<=end> command was found.
132
133=item * Nested =begin's
134
66aff6dd 135There were at least two consecutive C<=begin> commands without
e2c3adef 136the corresponding C<=end>. Only one C<=begin> may be active at
137a time.
138
139=item * =for without formatter specification
e3237417 140
e2c3adef 141There is no specification of the formatter after the C<=for> command.
142
143=item * unresolved internal link I<NAME>
144
145The given link to I<NAME> does not have a matching node in the current
146POD. This also happend when a single word node name is not enclosed in
147C<"">.
148
149=item * Unknown command "I<CMD>"
150
151An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
152C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, C<=for>, C<=pod>,
153C<=cut>
154
155=item * Unknown interior-sequence "I<SEQ>"
156
157An invalid markup command has been encountered. Valid are:
158C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
159C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
160C<ZE<lt>E<gt>>
161
162=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
163
164Two nested identical markup commands have been found. Generally this
165does not make sense.
166
167=item * garbled entity I<STRING>
168
66aff6dd 169The I<STRING> found cannot be interpreted as a character entity.
170
171=item * Entity number out of range
172
173An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 174
175=item * malformed link LE<lt>E<gt>
176
177The link found cannot be parsed because it does not conform to the
178syntax described in L<perlpod>.
e3237417 179
66aff6dd 180=item * nonempty ZE<lt>E<gt>
181
182The C<ZE<lt>E<gt>> sequence is supposed to be empty.
183
48f30392 184=item * empty XE<lt>E<gt>
185
186The index entry specified contains nothing but whitespace.
187
66aff6dd 188=item * Spurious text after =pod / =cut
189
190The commands C<=pod> and C<=cut> do not take any arguments.
191
192=item * Spurious character(s) after =back
193
194The C<=back> command does not take any arguments.
195
e3237417 196=back
197
e2c3adef 198=head2 Warnings
e3237417 199
e2c3adef 200These may not necessarily cause trouble, but indicate mediocre style.
201
202=over 4
203
66aff6dd 204=item * multiple occurence of link target I<name>
205
206The POD file has some C<=item> and/or C<=head> commands that have
207the same text. Potential hyperlinks to such a text cannot be unique then.
208
209=item * line containing nothing but whitespace in paragraph
210
211There is some whitespace on a seemingly empty line. POD is very sensitive
212to such things, so this is flagged. B<vi> users switch on the B<list>
213option to avoid this problem.
214
215=item * file does not start with =head
216
217The file starts with a different POD directive than head.
218This is most probably something you do not want.
219
e2c3adef 220=item * No numeric argument for =over
221
222The C<=over> command is supposed to have a numeric argument (the
223indentation).
224
66aff6dd 225=item * previous =item has no contents
e2c3adef 226
66aff6dd 227There is a list C<=item> right above the flagged line that has no
228text contents. You probably want to delete empty items.
229
230=item * preceding non-item paragraph(s)
231
232A list introduced by C<=over> starts with a text or verbatim paragraph,
233but continues with C<=item>s. Move the non-item paragraph out of the
234C<=over>/C<=back> block.
235
236=item * =item type mismatch (I<one> vs. I<two>)
237
238A list started with e.g. a bulletted C<=item> and continued with a
239numbered one. This is obviously inconsistent. For most translators the
240type of the I<first> C<=item> determines the type of the list.
e2c3adef 241
242=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
243
244Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
245can potentially cause errors as they could be misinterpreted as
246markup commands.
247
66aff6dd 248=item * Unknown entity
e2c3adef 249
250A character entity was found that does not belong to the standard
66aff6dd 251ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 252
253=item * No items in =over
254
66aff6dd 255The list opened with C<=over> does not contain any items.
e2c3adef 256
257=item * No argument for =item
258
259C<=item> without any parameters is deprecated. It should either be followed
260by C<*> to indicate an unordered list, by a number (optionally followed
261by a dot) to indicate an ordered (numbered) list or simple text for a
262definition list.
263
66aff6dd 264=item * empty section in previous paragraph
265
266The previous section (introduced by a C<=head> command) does not contain
267any text. This usually indicates that something is missing. Note: A
268C<=head1> followed immediately by C<=head2> does not trigger this warning.
269
e2c3adef 270=item * Verbatim paragraph in NAME section
271
272The NAME section (C<=head1 NAME>) should consist of a single paragraph
273with the script/module name, followed by a dash `-' and a very short
274description of what the thing is good for.
275
276=item * Hyperlinks
277
278There are some warnings wrt. hyperlinks:
279Leading/trailing whitespace, newlines in hyperlinks,
280brackets C<()>.
281
282=back
e3237417 283
284=head1 RETURN VALUE
285
286B<podchecker> returns the number of POD syntax errors found or -1 if
287there were no POD commands at all found in the file.
288
360aca43 289=head1 EXAMPLES
290
291I<[T.B.D.]>
292
48f30392 293=head1 INTERFACE
360aca43 294
48f30392 295While checking, this module collects document properties, e.g. the nodes
296for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
297POD translators can use this feature to syntax-check and get the nodes in
298a first pass before actually starting to convert. This is expensive in terms
299of execution time, but allows for very robust conversions.
360aca43 300
301=cut
302
303#############################################################################
304
305use strict;
306#use diagnostics;
307use Carp;
308use Exporter;
309use Pod::Parser;
310
311use vars qw(@ISA @EXPORT);
312@ISA = qw(Pod::Parser);
313@EXPORT = qw(&podchecker);
314
315use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
316
317my %VALID_COMMANDS = (
318 'pod' => 1,
319 'cut' => 1,
320 'head1' => 1,
321 'head2' => 1,
322 'over' => 1,
323 'back' => 1,
324 'item' => 1,
325 'for' => 1,
326 'begin' => 1,
327 'end' => 1,
328);
329
330my %VALID_SEQUENCES = (
331 'I' => 1,
332 'B' => 1,
333 'S' => 1,
334 'C' => 1,
335 'L' => 1,
336 'F' => 1,
337 'X' => 1,
338 'Z' => 1,
339 'E' => 1,
340);
341
e2c3adef 342# stolen from HTML::Entities
343my %ENTITIES = (
344 # Some normal chars that have special meaning in SGML context
345 amp => '&', # ampersand
346'gt' => '>', # greater than
347'lt' => '<', # less than
348 quot => '"', # double quote
349
350 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
351 AElig => 'Æ', # capital AE diphthong (ligature)
352 Aacute => 'Á', # capital A, acute accent
353 Acirc => 'Â', # capital A, circumflex accent
354 Agrave => 'À', # capital A, grave accent
355 Aring => 'Å', # capital A, ring
356 Atilde => 'Ã', # capital A, tilde
357 Auml => 'Ä', # capital A, dieresis or umlaut mark
358 Ccedil => 'Ç', # capital C, cedilla
359 ETH => 'Ð', # capital Eth, Icelandic
360 Eacute => 'É', # capital E, acute accent
361 Ecirc => 'Ê', # capital E, circumflex accent
362 Egrave => 'È', # capital E, grave accent
363 Euml => 'Ë', # capital E, dieresis or umlaut mark
364 Iacute => 'Í', # capital I, acute accent
365 Icirc => 'Î', # capital I, circumflex accent
366 Igrave => 'Ì', # capital I, grave accent
367 Iuml => 'Ï', # capital I, dieresis or umlaut mark
368 Ntilde => 'Ñ', # capital N, tilde
369 Oacute => 'Ó', # capital O, acute accent
370 Ocirc => 'Ô', # capital O, circumflex accent
371 Ograve => 'Ò', # capital O, grave accent
372 Oslash => 'Ø', # capital O, slash
373 Otilde => 'Õ', # capital O, tilde
374 Ouml => 'Ö', # capital O, dieresis or umlaut mark
375 THORN => 'Þ', # capital THORN, Icelandic
376 Uacute => 'Ú', # capital U, acute accent
377 Ucirc => 'Û', # capital U, circumflex accent
378 Ugrave => 'Ù', # capital U, grave accent
379 Uuml => 'Ü', # capital U, dieresis or umlaut mark
380 Yacute => 'Ý', # capital Y, acute accent
381 aacute => 'á', # small a, acute accent
382 acirc => 'â', # small a, circumflex accent
383 aelig => 'æ', # small ae diphthong (ligature)
384 agrave => 'à', # small a, grave accent
385 aring => 'å', # small a, ring
386 atilde => 'ã', # small a, tilde
387 auml => 'ä', # small a, dieresis or umlaut mark
388 ccedil => 'ç', # small c, cedilla
389 eacute => 'é', # small e, acute accent
390 ecirc => 'ê', # small e, circumflex accent
391 egrave => 'è', # small e, grave accent
392 eth => 'ð', # small eth, Icelandic
393 euml => 'ë', # small e, dieresis or umlaut mark
394 iacute => 'í', # small i, acute accent
395 icirc => 'î', # small i, circumflex accent
396 igrave => 'ì', # small i, grave accent
397 iuml => 'ï', # small i, dieresis or umlaut mark
398 ntilde => 'ñ', # small n, tilde
399 oacute => 'ó', # small o, acute accent
400 ocirc => 'ô', # small o, circumflex accent
401 ograve => 'ò', # small o, grave accent
402 oslash => 'ø', # small o, slash
403 otilde => 'õ', # small o, tilde
404 ouml => 'ö', # small o, dieresis or umlaut mark
405 szlig => 'ß', # small sharp s, German (sz ligature)
406 thorn => 'þ', # small thorn, Icelandic
407 uacute => 'ú', # small u, acute accent
408 ucirc => 'û', # small u, circumflex accent
409 ugrave => 'ù', # small u, grave accent
410 uuml => 'ü', # small u, dieresis or umlaut mark
411 yacute => 'ý', # small y, acute accent
412 yuml => 'ÿ', # small y, dieresis or umlaut mark
413
414 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
415 copy => '©', # copyright sign
416 reg => '®', # registered sign
417 nbsp => "\240", # non breaking space
418
419 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
420 iexcl => '¡',
421 cent => '¢',
422 pound => '£',
423 curren => '¤',
424 yen => '¥',
425 brvbar => '¦',
426 sect => '§',
427 uml => '¨',
428 ordf => 'ª',
429 laquo => '«',
430'not' => '¬', # not is a keyword in perl
431 shy => '­',
432 macr => '¯',
433 deg => '°',
434 plusmn => '±',
435 sup1 => '¹',
436 sup2 => '²',
437 sup3 => '³',
438 acute => '´',
439 micro => 'µ',
440 para => '¶',
441 middot => '·',
442 cedil => '¸',
443 ordm => 'º',
444 raquo => '»',
445 frac14 => '¼',
446 frac12 => '½',
447 frac34 => '¾',
448 iquest => '¿',
449'times' => '×', # times is a keyword in perl
450 divide => '÷',
66aff6dd 451
452# some POD special entities
453 verbar => '|',
454 sol => '/'
e2c3adef 455);
456
360aca43 457##---------------------------------------------------------------------------
458
459##---------------------------------
460## Function definitions begin here
461##---------------------------------
462
e3237417 463sub podchecker( $ ; $ % ) {
464 my ($infile, $outfile, %options) = @_;
360aca43 465 local $_;
466
467 ## Set defaults
468 $infile ||= \*STDIN;
469 $outfile ||= \*STDERR;
470
471 ## Now create a pod checker
e3237417 472 my $checker = new Pod::Checker(%options);
66aff6dd 473 $checker->parseopts(-process_cut_cmd => 1);
360aca43 474
475 ## Now check the pod document for errors
476 $checker->parse_from_file($infile, $outfile);
48f30392 477
360aca43 478 ## Return the number of errors found
479 return $checker->num_errors();
480}
481
482##---------------------------------------------------------------------------
483
484##-------------------------------
485## Method definitions begin here
486##-------------------------------
487
66aff6dd 488## sub new {
489## my $this = shift;
490## my $class = ref($this) || $this;
491## my %params = @_;
492## my $self = {%params};
493## bless $self, $class;
494## $self->initialize();
495## return $self;
496## }
360aca43 497
498sub initialize {
499 my $self = shift;
664bb207 500 ## Initialize number of errors, and setup an error function to
501 ## increment this number and then print to the designated output.
502 $self->{_NUM_ERRORS} = 0;
e2c3adef 503 $self->errorsub('poderror'); # set the error handling subroutine
e3237417 504 $self->{_commands} = 0; # total number of POD commands encountered
505 $self->{_list_stack} = []; # stack for nested lists
506 $self->{_have_begin} = ''; # stores =begin
507 $self->{_links} = []; # stack for internal hyperlinks
508 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 509 $self->{_index} = []; # text in X<>
e2c3adef 510 # print warnings?
e3237417 511 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 512 $self->{_current_head1} = ''; # the current =head1 block
664bb207 513}
514
48f30392 515##################################
516
517=over 4
518
519=item C<$checker-E<gt>poderror( @args )>
520
521=item C<$checker-E<gt>poderror( {%opts}, @args )>
522
523Internal method for printing errors and warnings. If no options are
524given, simply prints "@_". The following options are recognized and used
525to form the output:
526
527 -msg
528
529A message to print prior to C<@args>.
530
531 -line
532
533The line number the error occurred in.
534
535 -file
536
537The file (name) the error occurred in.
538
539 -severity
540
541The error level, should be 'WARNING' or 'ERROR'.
542
543=cut
544
e2c3adef 545# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 546sub poderror {
547 my $self = shift;
548 my %opts = (ref $_[0]) ? %{shift()} : ();
549
550 ## Retrieve options
551 chomp( my $msg = ($opts{-msg} || "")."@_" );
552 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
553 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd 554 unless (exists $opts{-severity}) {
555 ## See if can find severity in message prefix
556 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
557 }
664bb207 558 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
559
e3237417 560 ## Increment error count and print message "
561 ++($self->{_NUM_ERRORS})
562 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
664bb207 563 my $out_fh = $self->output_handle();
e2c3adef 564 print $out_fh ($severity, $msg, $line, $file, "\n")
565 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43 566}
567
48f30392 568##################################
569
570=item C<$checker-E<gt>num_errors()>
571
572Set (if argument specified) and retrieve the number of errors found.
573
574=cut
575
360aca43 576sub num_errors {
577 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
578}
579
48f30392 580##################################
581
582=item C<$checker-E<gt>name()>
583
584Set (if argument specified) and retrieve the canonical name of POD as
585found in the C<=head1 NAME> section.
586
587=cut
588
e2c3adef 589sub name {
590 return (@_ > 1 && $_[1]) ?
591 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
592}
593
48f30392 594##################################
595
596=item C<$checker-E<gt>node()>
597
598Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
599and C<=item>) of the current POD. The nodes are returned in the order of
600their occurence. They consist of plain text, each piece of whitespace is
601collapsed to a single blank.
602
603=cut
604
e2c3adef 605sub node {
606 my ($self,$text) = @_;
607 if(defined $text) {
66aff6dd 608 $text =~ s/\s+$//s; # strip trailing whitespace
609 $text =~ s/\s+/ /gs; # collapse whitespace
610 # add node, order important!
e2c3adef 611 push(@{$self->{_nodes}}, $text);
66aff6dd 612 # keep also a uniqueness counter
48f30392 613 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef 614 return $text;
615 }
616 @{$self->{_nodes}};
617}
618
48f30392 619##################################
620
621=item C<$checker-E<gt>idx()>
622
623Add (if argument specified) and retrieve the index entries (as defined by
624C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
625of whitespace is collapsed to a single blank.
626
627=cut
628
629# set/return index entries of current POD
630sub idx {
631 my ($self,$text) = @_;
632 if(defined $text) {
633 $text =~ s/\s+$//s; # strip trailing whitespace
634 $text =~ s/\s+/ /gs; # collapse whitespace
635 # add node, order important!
636 push(@{$self->{_index}}, $text);
637 # keep also a uniqueness counter
638 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
639 return $text;
640 }
641 @{$self->{_index}};
642}
643
644##################################
645
646=item C<$checker-E<gt>hyperlink()>
647
648Add (if argument specified) and retrieve the hyperlinks (as defined by
649C<LE<lt>E<gt>>) of the current POD. They consist of an 2-item array: line
650number and C<Pod::Hyperlink> object.
651
652=back
653
654=cut
655
e2c3adef 656# set/return hyperlinks of the current POD
657sub hyperlink {
658 my $self = shift;
659 if($_[0]) {
660 push(@{$self->{_links}}, $_[0]);
661 return $_[0];
662 }
663 @{$self->{_links}};
664}
665
e3237417 666## overrides for Pod::Parser
667
360aca43 668sub end_pod {
66aff6dd 669 ## Do some final checks and
670 ## print the number of errors found
671 my $self = shift;
672 my $infile = $self->input_file();
673 my $out_fh = $self->output_handle();
674
675 if(@{$self->{_list_stack}}) {
676 # _TODO_ display, but don't count them for now
677 my $list;
678 while(($list = $self->_close_list('EOF',$infile)) &&
679 $list->indent() ne 'auto') {
680 $self->poderror({ -line => 'EOF', -file => $infile,
681 -severity => 'ERROR', -msg => "=over on line " .
682 $list->start() . " without closing =back" }); #"
683 }
684 }
685
686 # check validity of document internal hyperlinks
687 # first build the node names from the paragraph text
688 my %nodes;
689 foreach($self->node()) {
690 $nodes{$_} = 1;
691 if(/^(\S+)\s+/) {
692 # we have more than one word. Use the first as a node, too.
693 # This is used heavily in perlfunc.pod
694 $nodes{$1} ||= 2; # derived node
695 }
696 }
697 foreach($self->hyperlink()) {
48f30392 698 my ($line,$link) = @$_;
699 # _TODO_ what if there is a link to the page itself by the name,
700 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
701 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
702 my $node = $self->_check_ptree($self->parse_text($link->node(),
703 $line), $line, $infile, 'L');
704 if($node && !$nodes{$node}) {
705 $self->poderror({ -line => $line || '', -file => $infile,
706 -severity => 'ERROR',
707 -msg => "unresolved internal link '$node'"});
708 }
66aff6dd 709 }
710 }
48f30392 711
712 # check the internal nodes for uniqueness. This pertains to
713 # =headX, =item and X<...>
66aff6dd 714 foreach(grep($self->{_unique_nodes}->{$_} > 1,
715 keys %{$self->{_unique_nodes}})) {
716 $self->poderror({ -line => '-', -file => $infile,
717 -severity => 'WARNING',
718 -msg => "multiple occurence of link target '$_'"});
719 }
720
721 ## Print the number of errors found
722 my $num_errors = $self->num_errors();
723 if ($num_errors > 0) {
724 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 725 ($num_errors == 1) ? "error" : "errors");
66aff6dd 726 }
727 elsif($self->{_commands} == 0) {
728 print $out_fh "$infile does not contain any pod commands.\n";
729 $self->num_errors(-1);
730 }
731 else {
732 print $out_fh "$infile pod syntax OK.\n";
733 }
360aca43 734}
735
e2c3adef 736# check a POD command directive
360aca43 737sub command {
664bb207 738 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 739 my ($file, $line) = $pod_para->file_line;
360aca43 740 ## Check the command syntax
e3237417 741 my $arg; # this will hold the command argument
664bb207 742 if (! $VALID_COMMANDS{$cmd}) {
743 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 744 -msg => "Unknown command '$cmd'" });
360aca43 745 }
746 else {
66aff6dd 747 # found a valid command
748 if(!$self->{_commands}++ && $cmd !~ /^head/) {
749 $self->poderror({ -line => $line, -file => $file,
750 -severity => 'WARNING',
751 -msg => "file does not start with =head" });
752 }
e3237417 753 ## check syntax of particular command
754 if($cmd eq 'over') {
e2c3adef 755 # check for argument
756 $arg = $self->interpolate_and_check($paragraph, $line,$file);
757 my $indent = 4; # default
758 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
759 $indent = $1;
760 } else {
761 $self->poderror({ -line => $line, -file => $file,
762 -severity => 'WARNING',
763 -msg => "No numeric argument for =over"});
764 }
e3237417 765 # start a new list
66aff6dd 766 $self->_open_list($indent,$line,$file);
e3237417 767 }
768 elsif($cmd eq 'item') {
e2c3adef 769 # are we in a list?
e3237417 770 unless(@{$self->{_list_stack}}) {
771 $self->poderror({ -line => $line, -file => $file,
772 -severity => 'ERROR',
773 -msg => "=item without previous =over" });
e2c3adef 774 # auto-open in case we encounter many more
66aff6dd 775 $self->_open_list('auto',$line,$file);
776 }
777 my $list = $self->{_list_stack}->[0];
778 # check whether the previous item had some contents
779 if(defined $self->{_list_item_contents} &&
780 $self->{_list_item_contents} == 0) {
781 $self->poderror({ -line => $line, -file => $file,
782 -severity => 'WARNING',
783 -msg => "previous =item has no contents" });
784 }
785 if($list->{_has_par}) {
786 $self->poderror({ -line => $line, -file => $file,
787 -severity => 'WARNING',
788 -msg => "preceding non-item paragraph(s)" });
789 delete $list->{_has_par};
e3237417 790 }
e2c3adef 791 # check for argument
792 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 793 if($arg && $arg =~ /(\S+)/) {
794 $arg =~ s/[\s\n]+$//;
795 my $type;
796 if($arg =~ /^[*]\s*(\S*.*)/) {
797 $type = 'bullet';
798 $self->{_list_item_contents} = $1 ? 1 : 0;
799 $arg = $1;
800 }
801 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
802 $type = 'number';
803 $self->{_list_item_contents} = $1 ? 1 : 0;
804 $arg = $1;
805 }
806 else {
807 $type = 'definition';
808 $self->{_list_item_contents} = 1;
809 }
810 my $first = $list->type();
811 if($first && $first ne $type) {
812 $self->poderror({ -line => $line, -file => $file,
813 -severity => 'WARNING',
814 -msg => "=item type mismatch ('$first' vs. '$type')"});
815 }
816 else { # first item
817 $list->type($type);
818 }
819 }
820 else {
e2c3adef 821 $self->poderror({ -line => $line, -file => $file,
822 -severity => 'WARNING',
823 -msg => "No argument for =item" });
824 $arg = ' '; # empty
66aff6dd 825 $self->{_list_item_contents} = 0;
e3237417 826 }
e2c3adef 827 # add this item
66aff6dd 828 $list->item($arg);
e2c3adef 829 # remember this node
830 $self->node($arg);
e3237417 831 }
832 elsif($cmd eq 'back') {
833 # check if we have an open list
834 unless(@{$self->{_list_stack}}) {
835 $self->poderror({ -line => $line, -file => $file,
836 -severity => 'ERROR',
837 -msg => "=back without previous =over" });
838 }
839 else {
840 # check for spurious characters
e2c3adef 841 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 842 if($arg && $arg =~ /\S/) {
843 $self->poderror({ -line => $line, -file => $file,
66aff6dd 844 -severity => 'ERROR',
e3237417 845 -msg => "Spurious character(s) after =back" });
846 }
847 # close list
66aff6dd 848 my $list = $self->_close_list($line,$file);
e3237417 849 # check for empty lists
850 if(!$list->item() && $self->{-warnings}) {
851 $self->poderror({ -line => $line, -file => $file,
852 -severity => 'WARNING',
853 -msg => "No items in =over (at line " .
854 $list->start() . ") / =back list"}); #"
855 }
856 }
857 }
66aff6dd 858 elsif($cmd =~ /^head(\d+)/) {
48f30392 859 # check whether the previous =head section had some contents
66aff6dd 860 if(defined $self->{_commands_in_head} &&
861 $self->{_commands_in_head} == 0 &&
862 defined $self->{_last_head} &&
863 $self->{_last_head} >= $1) {
864 $self->poderror({ -line => $line, -file => $file,
865 -severity => 'WARNING',
866 -msg => "empty section in previous paragraph"});
867 }
868 $self->{_commands_in_head} = -1;
869 $self->{_last_head} = $1;
e3237417 870 # check if there is an open list
871 if(@{$self->{_list_stack}}) {
872 my $list;
66aff6dd 873 while(($list = $self->_close_list($line,$file)) &&
874 $list->indent() ne 'auto') {
e3237417 875 $self->poderror({ -line => $line, -file => $file,
876 -severity => 'ERROR',
e2c3adef 877 -msg => "=over on line ". $list->start() .
878 " without closing =back (at $cmd)" });
e3237417 879 }
880 }
881 # remember this node
e2c3adef 882 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 883 $arg =~ s/[\s\n]+$//s;
884 $self->node($arg);
885 unless(length($arg)) {
886 $self->poderror({ -line => $line, -file => $file,
887 -severity => 'ERROR',
888 -msg => "empty =$cmd"});
889 }
e2c3adef 890 if($cmd eq 'head1') {
e2c3adef 891 $self->{_current_head1} = $arg;
892 } else {
893 $self->{_current_head1} = '';
894 }
e3237417 895 }
896 elsif($cmd eq 'begin') {
897 if($self->{_have_begin}) {
898 # already have a begin
899 $self->poderror({ -line => $line, -file => $file,
900 -severity => 'ERROR',
901 -msg => "Nested =begin's (first at line " .
902 $self->{_have_begin} . ")"});
903 }
904 else {
905 # check for argument
e2c3adef 906 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 907 unless($arg && $arg =~ /(\S+)/) {
908 $self->poderror({ -line => $line, -file => $file,
e2c3adef 909 -severity => 'ERROR',
e3237417 910 -msg => "No argument for =begin"});
911 }
912 # remember the =begin
913 $self->{_have_begin} = "$line:$1";
914 }
915 }
916 elsif($cmd eq 'end') {
917 if($self->{_have_begin}) {
918 # close the existing =begin
919 $self->{_have_begin} = '';
920 # check for spurious characters
e2c3adef 921 $arg = $self->interpolate_and_check($paragraph, $line,$file);
922 # the closing argument is optional
923 #if($arg && $arg =~ /\S/) {
924 # $self->poderror({ -line => $line, -file => $file,
925 # -severity => 'WARNING',
926 # -msg => "Spurious character(s) after =end" });
927 #}
e3237417 928 }
929 else {
930 # don't have a matching =begin
931 $self->poderror({ -line => $line, -file => $file,
e2c3adef 932 -severity => 'ERROR',
e3237417 933 -msg => "=end without =begin" });
934 }
935 }
e2c3adef 936 elsif($cmd eq 'for') {
937 unless($paragraph =~ /\s*(\S+)\s*/) {
938 $self->poderror({ -line => $line, -file => $file,
939 -severity => 'ERROR',
940 -msg => "=for without formatter specification" });
941 }
942 $arg = ''; # do not expand paragraph below
943 }
66aff6dd 944 elsif($cmd =~ /^(pod|cut)$/) {
945 # check for argument
946 $arg = $self->interpolate_and_check($paragraph, $line,$file);
947 if($arg && $arg =~ /(\S+)/) {
948 $self->poderror({ -line => $line, -file => $file,
949 -severity => 'ERROR',
950 -msg => "Spurious text after =$cmd"});
951 }
952 }
953 $self->{_commands_in_head}++;
e3237417 954 ## Check the interior sequences in the command-text
e2c3adef 955 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 956 unless(defined $arg);
e2c3adef 957 }
360aca43 958}
959
66aff6dd 960sub _open_list
961{
962 my ($self,$indent,$line,$file) = @_;
963 my $list = Pod::List->new(
964 -indent => $indent,
965 -start => $line,
966 -file => $file);
967 unshift(@{$self->{_list_stack}}, $list);
968 undef $self->{_list_item_contents};
969 $list;
970}
971
972sub _close_list
973{
974 my ($self,$line,$file) = @_;
975 my $list = shift(@{$self->{_list_stack}});
976 if(defined $self->{_list_item_contents} &&
977 $self->{_list_item_contents} == 0) {
978 $self->poderror({ -line => $line, -file => $file,
979 -severity => 'WARNING',
980 -msg => "previous =item has no contents" });
981 }
982 undef $self->{_list_item_contents};
983 $list;
984}
985
e2c3adef 986# process a block of some text
987sub interpolate_and_check {
e3237417 988 my ($self, $paragraph, $line, $file) = @_;
989 ## Check the interior sequences in the command-text
990 # and return the text
991 $self->_check_ptree(
992 $self->parse_text($paragraph,$line), $line, $file, '');
993}
994
995sub _check_ptree {
996 my ($self,$ptree,$line,$file,$nestlist) = @_;
997 local($_);
998 my $text = '';
999 # process each node in the parse tree
1000 foreach(@$ptree) {
1001 # regular text chunk
1002 unless(ref) {
1003 my $count;
1004 # count the unescaped angle brackets
1005 my $i = $_;
e2c3adef 1006 if($count = $i =~ tr/<>/<>/) {
e3237417 1007 $self->poderror({ -line => $line, -file => $file,
1008 -severity => 'WARNING',
e2c3adef 1009 -msg => "$count unescaped <> in paragraph" })
1010 if($self->{-warnings});
e3237417 1011 }
1012 $text .= $i;
1013 next;
1014 }
1015 # have an interior sequence
1016 my $cmd = $_->cmd_name();
1017 my $contents = $_->parse_tree();
1018 ($file,$line) = $_->file_line();
1019 # check for valid tag
1020 if (! $VALID_SEQUENCES{$cmd}) {
1021 $self->poderror({ -line => $line, -file => $file,
1022 -severity => 'ERROR',
66aff6dd 1023 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 1024 # expand it anyway
1025 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1026 next;
1027 }
1028 if($nestlist =~ /$cmd/) {
1029 $self->poderror({ -line => $line, -file => $file,
1030 -severity => 'ERROR',
1031 -msg => "nested commands $cmd<...$cmd<...>...>"});
1032 # _TODO_ should we add the contents anyway?
1033 # expand it anyway, see below
1034 }
1035 if($cmd eq 'E') {
1036 # preserve entities
1037 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1038 $self->poderror({ -line => $line, -file => $file,
1039 -severity => 'ERROR',
1040 -msg => "garbled entity " . $_->raw_text()});
1041 next;
1042 }
e2c3adef 1043 my $ent = $$contents[0];
66aff6dd 1044 my $val;
1045 if($ent =~ /^0x[0-9a-f]+$/i) {
1046 # hexadec entity
1047 $val = hex($ent);
1048 }
1049 elsif($ent =~ /^0\d+$/) {
1050 # octal
1051 $val = oct($ent);
1052 }
1053 elsif($ent =~ /^\d+$/) {
e2c3adef 1054 # numeric entity
66aff6dd 1055 $val = $ent;
1056 }
1057 if(defined $val) {
1058 if($val>0 && $val<256) {
1059 $text .= chr($val);
1060 }
1061 else {
1062 $self->poderror({ -line => $line, -file => $file,
1063 -severity => 'ERROR',
1064 -msg => "Entity number out of range " . $_->raw_text()});
1065 }
e2c3adef 1066 }
1067 elsif($ENTITIES{$ent}) {
1068 # known ISO entity
1069 $text .= $ENTITIES{$ent};
1070 }
1071 else {
1072 $self->poderror({ -line => $line, -file => $file,
1073 -severity => 'WARNING',
66aff6dd 1074 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef 1075 $text .= "E<$ent>";
1076 }
e3237417 1077 }
1078 elsif($cmd eq 'L') {
1079 # try to parse the hyperlink
1080 my $link = Pod::Hyperlink->new($contents->raw_text());
1081 unless(defined $link) {
1082 $self->poderror({ -line => $line, -file => $file,
1083 -severity => 'ERROR',
e2c3adef 1084 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417 1085 next;
1086 }
1087 $link->line($line); # remember line
1088 if($self->{-warnings}) {
1089 foreach my $w ($link->warning()) {
1090 $self->poderror({ -line => $line, -file => $file,
1091 -severity => 'WARNING',
1092 -msg => $w });
1093 }
1094 }
1095 # check the link text
1096 $text .= $self->_check_ptree($self->parse_text($link->text(),
1097 $line), $line, $file, "$nestlist$cmd");
48f30392 1098 # remember link
1099 $self->hyperlink([$line,$link]);
e3237417 1100 }
1101 elsif($cmd =~ /[BCFIS]/) {
1102 # add the guts
1103 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1104 }
66aff6dd 1105 elsif($cmd eq 'Z') {
1106 if(length($contents->raw_text())) {
1107 $self->poderror({ -line => $line, -file => $file,
1108 -severity => 'ERROR',
1109 -msg => "Nonempty Z<>"});
1110 }
1111 }
48f30392 1112 elsif($cmd eq 'X') {
1113 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1114 if($idx =~ /^\s*$/s) {
1115 $self->poderror({ -line => $line, -file => $file,
1116 -severity => 'ERROR',
1117 -msg => "Empty X<>"});
1118 }
1119 else {
1120 # remember this node
1121 $self->idx($idx);
1122 }
1123 }
1124 else {
1125 # not reached
1126 die "internal error";
e3237417 1127 }
1128 }
1129 $text;
1130}
1131
e2c3adef 1132# process a block of verbatim text
360aca43 1133sub verbatim {
66aff6dd 1134 ## Nothing particular to check
e2c3adef 1135 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1136
1137 $self->_preproc_par($paragraph);
1138
e2c3adef 1139 if($self->{_current_head1} eq 'NAME') {
1140 my ($file, $line) = $pod_para->file_line;
1141 $self->poderror({ -line => $line, -file => $file,
1142 -severity => 'WARNING',
1143 -msg => 'Verbatim paragraph in NAME section' });
1144 }
360aca43 1145}
1146
e2c3adef 1147# process a block of regular text
360aca43 1148sub textblock {
1149 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1150 my ($file, $line) = $pod_para->file_line;
e3237417 1151
66aff6dd 1152 $self->_preproc_par($paragraph);
1153
e2c3adef 1154 # skip this paragraph if in a =begin block
1155 unless($self->{_have_begin}) {
1156 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1157 if($self->{_current_head1} eq 'NAME') {
1158 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1159 # this is the canonical name
1160 $self->{-name} = $1 unless(defined $self->{-name});
1161 }
e3237417 1162 }
1163 }
e3237417 1164}
1165
66aff6dd 1166sub _preproc_par
1167{
1168 my $self = shift;
1169 $_[0] =~ s/[\s\n]+$//;
1170 if($_[0]) {
1171 $self->{_commands_in_head}++;
1172 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1173 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1174 $self->{_list_stack}->[0]->{_has_par} = 1;
1175 }
1176 }
1177}
1178
e3237417 11791;
66aff6dd 1180
48f30392 1181__END__
1182
1183=head1 AUTHOR
1184
1185Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1186Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
1187
1188Based on code for B<Pod::Text::pod2text()> written by
1189Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1190
1191=cut
1192