Integrate with Sarathy.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
664bb207 4# Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
48f30392 13$VERSION = 1.098; ## Current version of this package
360aca43 14require 5.004; ## requires this Perl version or later
15
e2c3adef 16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43 18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43 31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43 36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef 39=head2 podchecker()
40
41This function can take a hash of options:
e3237417 42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
47Turn warnings on/off. See L<"Warnings">.
48
49=back
360aca43 50
51=head1 DESCRIPTION
52
53B<podchecker> will perform syntax checking of Perl5 POD format documentation.
54
e2c3adef 55I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43 56
57It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef 58additional features they wish to see in B<Pod::Checker> and B<podchecker>
59and verify that the checks are consistent with L<perlpod>.
360aca43 60
48f30392 61The following checks are currently preformed:
e3237417 62
63=over 4
64
65=item *
66
48f30392 67Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef 68and unterminated interior sequences.
69
70=item *
71
72Check for proper balancing of C<=begin> and C<=end>. The contents of such
73a block are generally ignored, i.e. no syntax checks are performed.
e3237417 74
75=item *
76
77Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
78
79=item *
80
e2c3adef 81Check for same nested interior-sequences (e.g.
82C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 83
84=item *
85
e2c3adef 86Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417 87
88=item *
89
e2c3adef 90Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
91for details.
e3237417 92
93=item *
94
e2c3adef 95Check for unresolved document-internal links. This check may also reveal
96misspelled links that seem to be internal links but should be links
97to something else.
e3237417 98
99=back
100
e2c3adef 101=head1 DIAGNOSTICS
e3237417 102
e2c3adef 103=head2 Errors
e3237417 104
105=over 4
106
66aff6dd 107=item * empty =headn
108
109A heading (C<=head1> or C<=head2>) without any text? That ain't no
110heading!
111
e2c3adef 112=item * =over on line I<N> without closing =back
e3237417 113
e2c3adef 114The C<=over> command does not have a corresponding C<=back> before the
115next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 116
e2c3adef 117=item * =item without previous =over
e3237417 118
e2c3adef 119=item * =back without previous =over
e3237417 120
e2c3adef 121An C<=item> or C<=back> command has been found outside a
122C<=over>/C<=back> block.
e3237417 123
e2c3adef 124=item * No argument for =begin
e3237417 125
e2c3adef 126A C<=begin> command was found that is not followed by the formatter
127specification.
e3237417 128
e2c3adef 129=item * =end without =begin
e3237417 130
e2c3adef 131A standalone C<=end> command was found.
132
133=item * Nested =begin's
134
66aff6dd 135There were at least two consecutive C<=begin> commands without
e2c3adef 136the corresponding C<=end>. Only one C<=begin> may be active at
137a time.
138
139=item * =for without formatter specification
e3237417 140
e2c3adef 141There is no specification of the formatter after the C<=for> command.
142
143=item * unresolved internal link I<NAME>
144
145The given link to I<NAME> does not have a matching node in the current
146POD. This also happend when a single word node name is not enclosed in
147C<"">.
148
149=item * Unknown command "I<CMD>"
150
151An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
152C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, C<=for>, C<=pod>,
153C<=cut>
154
155=item * Unknown interior-sequence "I<SEQ>"
156
157An invalid markup command has been encountered. Valid are:
158C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
159C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
160C<ZE<lt>E<gt>>
161
162=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
163
164Two nested identical markup commands have been found. Generally this
165does not make sense.
166
167=item * garbled entity I<STRING>
168
66aff6dd 169The I<STRING> found cannot be interpreted as a character entity.
170
171=item * Entity number out of range
172
173An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 174
175=item * malformed link LE<lt>E<gt>
176
177The link found cannot be parsed because it does not conform to the
178syntax described in L<perlpod>.
e3237417 179
66aff6dd 180=item * nonempty ZE<lt>E<gt>
181
182The C<ZE<lt>E<gt>> sequence is supposed to be empty.
183
48f30392 184=item * empty XE<lt>E<gt>
185
186The index entry specified contains nothing but whitespace.
187
66aff6dd 188=item * Spurious text after =pod / =cut
189
190The commands C<=pod> and C<=cut> do not take any arguments.
191
192=item * Spurious character(s) after =back
193
194The C<=back> command does not take any arguments.
195
e3237417 196=back
197
e2c3adef 198=head2 Warnings
e3237417 199
e2c3adef 200These may not necessarily cause trouble, but indicate mediocre style.
201
202=over 4
203
66aff6dd 204=item * multiple occurence of link target I<name>
205
206The POD file has some C<=item> and/or C<=head> commands that have
207the same text. Potential hyperlinks to such a text cannot be unique then.
208
209=item * line containing nothing but whitespace in paragraph
210
211There is some whitespace on a seemingly empty line. POD is very sensitive
212to such things, so this is flagged. B<vi> users switch on the B<list>
213option to avoid this problem.
214
215=item * file does not start with =head
216
217The file starts with a different POD directive than head.
218This is most probably something you do not want.
219
e2c3adef 220=item * No numeric argument for =over
221
222The C<=over> command is supposed to have a numeric argument (the
223indentation).
224
66aff6dd 225=item * previous =item has no contents
e2c3adef 226
66aff6dd 227There is a list C<=item> right above the flagged line that has no
228text contents. You probably want to delete empty items.
229
230=item * preceding non-item paragraph(s)
231
232A list introduced by C<=over> starts with a text or verbatim paragraph,
233but continues with C<=item>s. Move the non-item paragraph out of the
234C<=over>/C<=back> block.
235
236=item * =item type mismatch (I<one> vs. I<two>)
237
238A list started with e.g. a bulletted C<=item> and continued with a
239numbered one. This is obviously inconsistent. For most translators the
240type of the I<first> C<=item> determines the type of the list.
e2c3adef 241
242=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
243
244Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
245can potentially cause errors as they could be misinterpreted as
246markup commands.
247
66aff6dd 248=item * Unknown entity
e2c3adef 249
250A character entity was found that does not belong to the standard
66aff6dd 251ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 252
253=item * No items in =over
254
66aff6dd 255The list opened with C<=over> does not contain any items.
e2c3adef 256
257=item * No argument for =item
258
259C<=item> without any parameters is deprecated. It should either be followed
260by C<*> to indicate an unordered list, by a number (optionally followed
261by a dot) to indicate an ordered (numbered) list or simple text for a
262definition list.
263
66aff6dd 264=item * empty section in previous paragraph
265
266The previous section (introduced by a C<=head> command) does not contain
267any text. This usually indicates that something is missing. Note: A
268C<=head1> followed immediately by C<=head2> does not trigger this warning.
269
e2c3adef 270=item * Verbatim paragraph in NAME section
271
272The NAME section (C<=head1 NAME>) should consist of a single paragraph
273with the script/module name, followed by a dash `-' and a very short
274description of what the thing is good for.
275
276=item * Hyperlinks
277
278There are some warnings wrt. hyperlinks:
279Leading/trailing whitespace, newlines in hyperlinks,
280brackets C<()>.
281
282=back
e3237417 283
284=head1 RETURN VALUE
285
286B<podchecker> returns the number of POD syntax errors found or -1 if
287there were no POD commands at all found in the file.
288
360aca43 289=head1 EXAMPLES
290
291I<[T.B.D.]>
292
48f30392 293=head1 INTERFACE
360aca43 294
48f30392 295While checking, this module collects document properties, e.g. the nodes
296for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
297POD translators can use this feature to syntax-check and get the nodes in
298a first pass before actually starting to convert. This is expensive in terms
299of execution time, but allows for very robust conversions.
360aca43 300
301=cut
302
303#############################################################################
304
305use strict;
306#use diagnostics;
307use Carp;
308use Exporter;
309use Pod::Parser;
f0963acb 310require VMS::Filespec if $^O eq 'VMS';
360aca43 311
312use vars qw(@ISA @EXPORT);
313@ISA = qw(Pod::Parser);
314@EXPORT = qw(&podchecker);
315
316use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
317
318my %VALID_COMMANDS = (
319 'pod' => 1,
320 'cut' => 1,
321 'head1' => 1,
322 'head2' => 1,
323 'over' => 1,
324 'back' => 1,
325 'item' => 1,
326 'for' => 1,
327 'begin' => 1,
328 'end' => 1,
329);
330
331my %VALID_SEQUENCES = (
332 'I' => 1,
333 'B' => 1,
334 'S' => 1,
335 'C' => 1,
336 'L' => 1,
337 'F' => 1,
338 'X' => 1,
339 'Z' => 1,
340 'E' => 1,
341);
342
e2c3adef 343# stolen from HTML::Entities
344my %ENTITIES = (
345 # Some normal chars that have special meaning in SGML context
346 amp => '&', # ampersand
347'gt' => '>', # greater than
348'lt' => '<', # less than
349 quot => '"', # double quote
350
351 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
352 AElig => 'Æ', # capital AE diphthong (ligature)
353 Aacute => 'Á', # capital A, acute accent
354 Acirc => 'Â', # capital A, circumflex accent
355 Agrave => 'À', # capital A, grave accent
356 Aring => 'Å', # capital A, ring
357 Atilde => 'Ã', # capital A, tilde
358 Auml => 'Ä', # capital A, dieresis or umlaut mark
359 Ccedil => 'Ç', # capital C, cedilla
360 ETH => 'Ð', # capital Eth, Icelandic
361 Eacute => 'É', # capital E, acute accent
362 Ecirc => 'Ê', # capital E, circumflex accent
363 Egrave => 'È', # capital E, grave accent
364 Euml => 'Ë', # capital E, dieresis or umlaut mark
365 Iacute => 'Í', # capital I, acute accent
366 Icirc => 'Î', # capital I, circumflex accent
367 Igrave => 'Ì', # capital I, grave accent
368 Iuml => 'Ï', # capital I, dieresis or umlaut mark
369 Ntilde => 'Ñ', # capital N, tilde
370 Oacute => 'Ó', # capital O, acute accent
371 Ocirc => 'Ô', # capital O, circumflex accent
372 Ograve => 'Ò', # capital O, grave accent
373 Oslash => 'Ø', # capital O, slash
374 Otilde => 'Õ', # capital O, tilde
375 Ouml => 'Ö', # capital O, dieresis or umlaut mark
376 THORN => 'Þ', # capital THORN, Icelandic
377 Uacute => 'Ú', # capital U, acute accent
378 Ucirc => 'Û', # capital U, circumflex accent
379 Ugrave => 'Ù', # capital U, grave accent
380 Uuml => 'Ü', # capital U, dieresis or umlaut mark
381 Yacute => 'Ý', # capital Y, acute accent
382 aacute => 'á', # small a, acute accent
383 acirc => 'â', # small a, circumflex accent
384 aelig => 'æ', # small ae diphthong (ligature)
385 agrave => 'à', # small a, grave accent
386 aring => 'å', # small a, ring
387 atilde => 'ã', # small a, tilde
388 auml => 'ä', # small a, dieresis or umlaut mark
389 ccedil => 'ç', # small c, cedilla
390 eacute => 'é', # small e, acute accent
391 ecirc => 'ê', # small e, circumflex accent
392 egrave => 'è', # small e, grave accent
393 eth => 'ð', # small eth, Icelandic
394 euml => 'ë', # small e, dieresis or umlaut mark
395 iacute => 'í', # small i, acute accent
396 icirc => 'î', # small i, circumflex accent
397 igrave => 'ì', # small i, grave accent
398 iuml => 'ï', # small i, dieresis or umlaut mark
399 ntilde => 'ñ', # small n, tilde
400 oacute => 'ó', # small o, acute accent
401 ocirc => 'ô', # small o, circumflex accent
402 ograve => 'ò', # small o, grave accent
403 oslash => 'ø', # small o, slash
404 otilde => 'õ', # small o, tilde
405 ouml => 'ö', # small o, dieresis or umlaut mark
406 szlig => 'ß', # small sharp s, German (sz ligature)
407 thorn => 'þ', # small thorn, Icelandic
408 uacute => 'ú', # small u, acute accent
409 ucirc => 'û', # small u, circumflex accent
410 ugrave => 'ù', # small u, grave accent
411 uuml => 'ü', # small u, dieresis or umlaut mark
412 yacute => 'ý', # small y, acute accent
413 yuml => 'ÿ', # small y, dieresis or umlaut mark
414
415 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
416 copy => '©', # copyright sign
417 reg => '®', # registered sign
418 nbsp => "\240", # non breaking space
419
420 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
421 iexcl => '¡',
422 cent => '¢',
423 pound => '£',
424 curren => '¤',
425 yen => '¥',
426 brvbar => '¦',
427 sect => '§',
428 uml => '¨',
429 ordf => 'ª',
430 laquo => '«',
431'not' => '¬', # not is a keyword in perl
432 shy => '­',
433 macr => '¯',
434 deg => '°',
435 plusmn => '±',
436 sup1 => '¹',
437 sup2 => '²',
438 sup3 => '³',
439 acute => '´',
440 micro => 'µ',
441 para => '¶',
442 middot => '·',
443 cedil => '¸',
444 ordm => 'º',
445 raquo => '»',
446 frac14 => '¼',
447 frac12 => '½',
448 frac34 => '¾',
449 iquest => '¿',
450'times' => '×', # times is a keyword in perl
451 divide => '÷',
66aff6dd 452
453# some POD special entities
454 verbar => '|',
455 sol => '/'
e2c3adef 456);
457
360aca43 458##---------------------------------------------------------------------------
459
460##---------------------------------
461## Function definitions begin here
462##---------------------------------
463
e3237417 464sub podchecker( $ ; $ % ) {
465 my ($infile, $outfile, %options) = @_;
360aca43 466 local $_;
467
468 ## Set defaults
469 $infile ||= \*STDIN;
470 $outfile ||= \*STDERR;
471
472 ## Now create a pod checker
e3237417 473 my $checker = new Pod::Checker(%options);
a5317591 474 $checker->parseopts(-process_cut_cmd => 1, -warnings => 1);
360aca43 475
476 ## Now check the pod document for errors
477 $checker->parse_from_file($infile, $outfile);
48f30392 478
360aca43 479 ## Return the number of errors found
480 return $checker->num_errors();
481}
482
483##---------------------------------------------------------------------------
484
485##-------------------------------
486## Method definitions begin here
487##-------------------------------
488
66aff6dd 489## sub new {
490## my $this = shift;
491## my $class = ref($this) || $this;
492## my %params = @_;
493## my $self = {%params};
494## bless $self, $class;
495## $self->initialize();
496## return $self;
497## }
360aca43 498
499sub initialize {
500 my $self = shift;
664bb207 501 ## Initialize number of errors, and setup an error function to
502 ## increment this number and then print to the designated output.
503 $self->{_NUM_ERRORS} = 0;
e2c3adef 504 $self->errorsub('poderror'); # set the error handling subroutine
e3237417 505 $self->{_commands} = 0; # total number of POD commands encountered
506 $self->{_list_stack} = []; # stack for nested lists
507 $self->{_have_begin} = ''; # stores =begin
508 $self->{_links} = []; # stack for internal hyperlinks
509 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 510 $self->{_index} = []; # text in X<>
e2c3adef 511 # print warnings?
e3237417 512 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 513 $self->{_current_head1} = ''; # the current =head1 block
664bb207 514}
515
48f30392 516##################################
517
518=over 4
519
520=item C<$checker-E<gt>poderror( @args )>
521
522=item C<$checker-E<gt>poderror( {%opts}, @args )>
523
524Internal method for printing errors and warnings. If no options are
525given, simply prints "@_". The following options are recognized and used
526to form the output:
527
528 -msg
529
530A message to print prior to C<@args>.
531
532 -line
533
534The line number the error occurred in.
535
536 -file
537
538The file (name) the error occurred in.
539
540 -severity
541
542The error level, should be 'WARNING' or 'ERROR'.
543
544=cut
545
e2c3adef 546# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 547sub poderror {
548 my $self = shift;
549 my %opts = (ref $_[0]) ? %{shift()} : ();
f0963acb 550 $opts{-file} = VMS::Filespec::unixify($opts{-file}) if (exists($opts{-file}) && $^O eq 'VMS');
664bb207 551
552 ## Retrieve options
553 chomp( my $msg = ($opts{-msg} || "")."@_" );
554 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
555 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd 556 unless (exists $opts{-severity}) {
557 ## See if can find severity in message prefix
558 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
559 }
664bb207 560 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
561
e3237417 562 ## Increment error count and print message "
563 ++($self->{_NUM_ERRORS})
564 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
664bb207 565 my $out_fh = $self->output_handle();
e2c3adef 566 print $out_fh ($severity, $msg, $line, $file, "\n")
567 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43 568}
569
48f30392 570##################################
571
572=item C<$checker-E<gt>num_errors()>
573
574Set (if argument specified) and retrieve the number of errors found.
575
576=cut
577
360aca43 578sub num_errors {
579 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
580}
581
48f30392 582##################################
583
584=item C<$checker-E<gt>name()>
585
586Set (if argument specified) and retrieve the canonical name of POD as
587found in the C<=head1 NAME> section.
588
589=cut
590
e2c3adef 591sub name {
592 return (@_ > 1 && $_[1]) ?
593 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
594}
595
48f30392 596##################################
597
598=item C<$checker-E<gt>node()>
599
600Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
601and C<=item>) of the current POD. The nodes are returned in the order of
602their occurence. They consist of plain text, each piece of whitespace is
603collapsed to a single blank.
604
605=cut
606
e2c3adef 607sub node {
608 my ($self,$text) = @_;
609 if(defined $text) {
66aff6dd 610 $text =~ s/\s+$//s; # strip trailing whitespace
611 $text =~ s/\s+/ /gs; # collapse whitespace
612 # add node, order important!
e2c3adef 613 push(@{$self->{_nodes}}, $text);
66aff6dd 614 # keep also a uniqueness counter
48f30392 615 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef 616 return $text;
617 }
618 @{$self->{_nodes}};
619}
620
48f30392 621##################################
622
623=item C<$checker-E<gt>idx()>
624
625Add (if argument specified) and retrieve the index entries (as defined by
626C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
627of whitespace is collapsed to a single blank.
628
629=cut
630
631# set/return index entries of current POD
632sub idx {
633 my ($self,$text) = @_;
634 if(defined $text) {
635 $text =~ s/\s+$//s; # strip trailing whitespace
636 $text =~ s/\s+/ /gs; # collapse whitespace
637 # add node, order important!
638 push(@{$self->{_index}}, $text);
639 # keep also a uniqueness counter
640 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
641 return $text;
642 }
643 @{$self->{_index}};
644}
645
646##################################
647
648=item C<$checker-E<gt>hyperlink()>
649
650Add (if argument specified) and retrieve the hyperlinks (as defined by
651C<LE<lt>E<gt>>) of the current POD. They consist of an 2-item array: line
652number and C<Pod::Hyperlink> object.
653
654=back
655
656=cut
657
e2c3adef 658# set/return hyperlinks of the current POD
659sub hyperlink {
660 my $self = shift;
661 if($_[0]) {
662 push(@{$self->{_links}}, $_[0]);
663 return $_[0];
664 }
665 @{$self->{_links}};
666}
667
e3237417 668## overrides for Pod::Parser
669
360aca43 670sub end_pod {
66aff6dd 671 ## Do some final checks and
672 ## print the number of errors found
673 my $self = shift;
674 my $infile = $self->input_file();
f0963acb 675 $infile = VMS::Filespec::unixify($infile) if $^O eq 'VMS';
66aff6dd 676 my $out_fh = $self->output_handle();
677
678 if(@{$self->{_list_stack}}) {
679 # _TODO_ display, but don't count them for now
680 my $list;
681 while(($list = $self->_close_list('EOF',$infile)) &&
682 $list->indent() ne 'auto') {
683 $self->poderror({ -line => 'EOF', -file => $infile,
684 -severity => 'ERROR', -msg => "=over on line " .
685 $list->start() . " without closing =back" }); #"
686 }
687 }
688
689 # check validity of document internal hyperlinks
690 # first build the node names from the paragraph text
691 my %nodes;
692 foreach($self->node()) {
693 $nodes{$_} = 1;
694 if(/^(\S+)\s+/) {
695 # we have more than one word. Use the first as a node, too.
696 # This is used heavily in perlfunc.pod
697 $nodes{$1} ||= 2; # derived node
698 }
699 }
700 foreach($self->hyperlink()) {
48f30392 701 my ($line,$link) = @$_;
702 # _TODO_ what if there is a link to the page itself by the name,
703 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
704 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
705 my $node = $self->_check_ptree($self->parse_text($link->node(),
706 $line), $line, $infile, 'L');
707 if($node && !$nodes{$node}) {
708 $self->poderror({ -line => $line || '', -file => $infile,
709 -severity => 'ERROR',
710 -msg => "unresolved internal link '$node'"});
711 }
66aff6dd 712 }
713 }
48f30392 714
715 # check the internal nodes for uniqueness. This pertains to
716 # =headX, =item and X<...>
66aff6dd 717 foreach(grep($self->{_unique_nodes}->{$_} > 1,
718 keys %{$self->{_unique_nodes}})) {
719 $self->poderror({ -line => '-', -file => $infile,
720 -severity => 'WARNING',
721 -msg => "multiple occurence of link target '$_'"});
722 }
723
724 ## Print the number of errors found
725 my $num_errors = $self->num_errors();
726 if ($num_errors > 0) {
727 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 728 ($num_errors == 1) ? "error" : "errors");
66aff6dd 729 }
730 elsif($self->{_commands} == 0) {
731 print $out_fh "$infile does not contain any pod commands.\n";
732 $self->num_errors(-1);
733 }
734 else {
735 print $out_fh "$infile pod syntax OK.\n";
736 }
360aca43 737}
738
e2c3adef 739# check a POD command directive
360aca43 740sub command {
664bb207 741 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 742 my ($file, $line) = $pod_para->file_line;
360aca43 743 ## Check the command syntax
e3237417 744 my $arg; # this will hold the command argument
664bb207 745 if (! $VALID_COMMANDS{$cmd}) {
746 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 747 -msg => "Unknown command '$cmd'" });
360aca43 748 }
749 else {
66aff6dd 750 # found a valid command
751 if(!$self->{_commands}++ && $cmd !~ /^head/) {
752 $self->poderror({ -line => $line, -file => $file,
753 -severity => 'WARNING',
754 -msg => "file does not start with =head" });
755 }
e3237417 756 ## check syntax of particular command
757 if($cmd eq 'over') {
e2c3adef 758 # check for argument
759 $arg = $self->interpolate_and_check($paragraph, $line,$file);
760 my $indent = 4; # default
761 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
762 $indent = $1;
763 } else {
764 $self->poderror({ -line => $line, -file => $file,
765 -severity => 'WARNING',
766 -msg => "No numeric argument for =over"});
767 }
e3237417 768 # start a new list
66aff6dd 769 $self->_open_list($indent,$line,$file);
e3237417 770 }
771 elsif($cmd eq 'item') {
e2c3adef 772 # are we in a list?
e3237417 773 unless(@{$self->{_list_stack}}) {
774 $self->poderror({ -line => $line, -file => $file,
775 -severity => 'ERROR',
776 -msg => "=item without previous =over" });
e2c3adef 777 # auto-open in case we encounter many more
66aff6dd 778 $self->_open_list('auto',$line,$file);
779 }
780 my $list = $self->{_list_stack}->[0];
781 # check whether the previous item had some contents
782 if(defined $self->{_list_item_contents} &&
783 $self->{_list_item_contents} == 0) {
784 $self->poderror({ -line => $line, -file => $file,
785 -severity => 'WARNING',
786 -msg => "previous =item has no contents" });
787 }
788 if($list->{_has_par}) {
789 $self->poderror({ -line => $line, -file => $file,
790 -severity => 'WARNING',
791 -msg => "preceding non-item paragraph(s)" });
792 delete $list->{_has_par};
e3237417 793 }
e2c3adef 794 # check for argument
795 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 796 if($arg && $arg =~ /(\S+)/) {
797 $arg =~ s/[\s\n]+$//;
798 my $type;
799 if($arg =~ /^[*]\s*(\S*.*)/) {
800 $type = 'bullet';
801 $self->{_list_item_contents} = $1 ? 1 : 0;
802 $arg = $1;
803 }
804 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
805 $type = 'number';
806 $self->{_list_item_contents} = $1 ? 1 : 0;
807 $arg = $1;
808 }
809 else {
810 $type = 'definition';
811 $self->{_list_item_contents} = 1;
812 }
813 my $first = $list->type();
814 if($first && $first ne $type) {
815 $self->poderror({ -line => $line, -file => $file,
816 -severity => 'WARNING',
817 -msg => "=item type mismatch ('$first' vs. '$type')"});
818 }
819 else { # first item
820 $list->type($type);
821 }
822 }
823 else {
e2c3adef 824 $self->poderror({ -line => $line, -file => $file,
825 -severity => 'WARNING',
826 -msg => "No argument for =item" });
827 $arg = ' '; # empty
66aff6dd 828 $self->{_list_item_contents} = 0;
e3237417 829 }
e2c3adef 830 # add this item
66aff6dd 831 $list->item($arg);
e2c3adef 832 # remember this node
833 $self->node($arg);
e3237417 834 }
835 elsif($cmd eq 'back') {
836 # check if we have an open list
837 unless(@{$self->{_list_stack}}) {
838 $self->poderror({ -line => $line, -file => $file,
839 -severity => 'ERROR',
840 -msg => "=back without previous =over" });
841 }
842 else {
843 # check for spurious characters
e2c3adef 844 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 845 if($arg && $arg =~ /\S/) {
846 $self->poderror({ -line => $line, -file => $file,
66aff6dd 847 -severity => 'ERROR',
e3237417 848 -msg => "Spurious character(s) after =back" });
849 }
850 # close list
66aff6dd 851 my $list = $self->_close_list($line,$file);
e3237417 852 # check for empty lists
853 if(!$list->item() && $self->{-warnings}) {
854 $self->poderror({ -line => $line, -file => $file,
855 -severity => 'WARNING',
856 -msg => "No items in =over (at line " .
857 $list->start() . ") / =back list"}); #"
858 }
859 }
860 }
66aff6dd 861 elsif($cmd =~ /^head(\d+)/) {
48f30392 862 # check whether the previous =head section had some contents
66aff6dd 863 if(defined $self->{_commands_in_head} &&
864 $self->{_commands_in_head} == 0 &&
865 defined $self->{_last_head} &&
866 $self->{_last_head} >= $1) {
867 $self->poderror({ -line => $line, -file => $file,
868 -severity => 'WARNING',
869 -msg => "empty section in previous paragraph"});
870 }
871 $self->{_commands_in_head} = -1;
872 $self->{_last_head} = $1;
e3237417 873 # check if there is an open list
874 if(@{$self->{_list_stack}}) {
875 my $list;
66aff6dd 876 while(($list = $self->_close_list($line,$file)) &&
877 $list->indent() ne 'auto') {
e3237417 878 $self->poderror({ -line => $line, -file => $file,
879 -severity => 'ERROR',
e2c3adef 880 -msg => "=over on line ". $list->start() .
881 " without closing =back (at $cmd)" });
e3237417 882 }
883 }
884 # remember this node
e2c3adef 885 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 886 $arg =~ s/[\s\n]+$//s;
887 $self->node($arg);
888 unless(length($arg)) {
889 $self->poderror({ -line => $line, -file => $file,
890 -severity => 'ERROR',
891 -msg => "empty =$cmd"});
892 }
e2c3adef 893 if($cmd eq 'head1') {
e2c3adef 894 $self->{_current_head1} = $arg;
895 } else {
896 $self->{_current_head1} = '';
897 }
e3237417 898 }
899 elsif($cmd eq 'begin') {
900 if($self->{_have_begin}) {
901 # already have a begin
902 $self->poderror({ -line => $line, -file => $file,
903 -severity => 'ERROR',
904 -msg => "Nested =begin's (first at line " .
905 $self->{_have_begin} . ")"});
906 }
907 else {
908 # check for argument
e2c3adef 909 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 910 unless($arg && $arg =~ /(\S+)/) {
911 $self->poderror({ -line => $line, -file => $file,
e2c3adef 912 -severity => 'ERROR',
e3237417 913 -msg => "No argument for =begin"});
914 }
915 # remember the =begin
916 $self->{_have_begin} = "$line:$1";
917 }
918 }
919 elsif($cmd eq 'end') {
920 if($self->{_have_begin}) {
921 # close the existing =begin
922 $self->{_have_begin} = '';
923 # check for spurious characters
e2c3adef 924 $arg = $self->interpolate_and_check($paragraph, $line,$file);
925 # the closing argument is optional
926 #if($arg && $arg =~ /\S/) {
927 # $self->poderror({ -line => $line, -file => $file,
928 # -severity => 'WARNING',
929 # -msg => "Spurious character(s) after =end" });
930 #}
e3237417 931 }
932 else {
933 # don't have a matching =begin
934 $self->poderror({ -line => $line, -file => $file,
e2c3adef 935 -severity => 'ERROR',
e3237417 936 -msg => "=end without =begin" });
937 }
938 }
e2c3adef 939 elsif($cmd eq 'for') {
940 unless($paragraph =~ /\s*(\S+)\s*/) {
941 $self->poderror({ -line => $line, -file => $file,
942 -severity => 'ERROR',
943 -msg => "=for without formatter specification" });
944 }
945 $arg = ''; # do not expand paragraph below
946 }
66aff6dd 947 elsif($cmd =~ /^(pod|cut)$/) {
948 # check for argument
949 $arg = $self->interpolate_and_check($paragraph, $line,$file);
950 if($arg && $arg =~ /(\S+)/) {
951 $self->poderror({ -line => $line, -file => $file,
952 -severity => 'ERROR',
953 -msg => "Spurious text after =$cmd"});
954 }
955 }
956 $self->{_commands_in_head}++;
e3237417 957 ## Check the interior sequences in the command-text
e2c3adef 958 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 959 unless(defined $arg);
e2c3adef 960 }
360aca43 961}
962
66aff6dd 963sub _open_list
964{
965 my ($self,$indent,$line,$file) = @_;
966 my $list = Pod::List->new(
967 -indent => $indent,
968 -start => $line,
969 -file => $file);
970 unshift(@{$self->{_list_stack}}, $list);
971 undef $self->{_list_item_contents};
972 $list;
973}
974
975sub _close_list
976{
977 my ($self,$line,$file) = @_;
978 my $list = shift(@{$self->{_list_stack}});
979 if(defined $self->{_list_item_contents} &&
980 $self->{_list_item_contents} == 0) {
981 $self->poderror({ -line => $line, -file => $file,
982 -severity => 'WARNING',
983 -msg => "previous =item has no contents" });
984 }
985 undef $self->{_list_item_contents};
986 $list;
987}
988
e2c3adef 989# process a block of some text
990sub interpolate_and_check {
e3237417 991 my ($self, $paragraph, $line, $file) = @_;
992 ## Check the interior sequences in the command-text
993 # and return the text
994 $self->_check_ptree(
995 $self->parse_text($paragraph,$line), $line, $file, '');
996}
997
998sub _check_ptree {
999 my ($self,$ptree,$line,$file,$nestlist) = @_;
1000 local($_);
1001 my $text = '';
1002 # process each node in the parse tree
1003 foreach(@$ptree) {
1004 # regular text chunk
1005 unless(ref) {
1006 my $count;
1007 # count the unescaped angle brackets
1008 my $i = $_;
e2c3adef 1009 if($count = $i =~ tr/<>/<>/) {
e3237417 1010 $self->poderror({ -line => $line, -file => $file,
1011 -severity => 'WARNING',
e2c3adef 1012 -msg => "$count unescaped <> in paragraph" })
1013 if($self->{-warnings});
e3237417 1014 }
1015 $text .= $i;
1016 next;
1017 }
1018 # have an interior sequence
1019 my $cmd = $_->cmd_name();
1020 my $contents = $_->parse_tree();
1021 ($file,$line) = $_->file_line();
1022 # check for valid tag
1023 if (! $VALID_SEQUENCES{$cmd}) {
1024 $self->poderror({ -line => $line, -file => $file,
1025 -severity => 'ERROR',
66aff6dd 1026 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 1027 # expand it anyway
1028 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1029 next;
1030 }
1031 if($nestlist =~ /$cmd/) {
1032 $self->poderror({ -line => $line, -file => $file,
1033 -severity => 'ERROR',
1034 -msg => "nested commands $cmd<...$cmd<...>...>"});
1035 # _TODO_ should we add the contents anyway?
1036 # expand it anyway, see below
1037 }
1038 if($cmd eq 'E') {
1039 # preserve entities
1040 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1041 $self->poderror({ -line => $line, -file => $file,
1042 -severity => 'ERROR',
1043 -msg => "garbled entity " . $_->raw_text()});
1044 next;
1045 }
e2c3adef 1046 my $ent = $$contents[0];
66aff6dd 1047 my $val;
1048 if($ent =~ /^0x[0-9a-f]+$/i) {
1049 # hexadec entity
1050 $val = hex($ent);
1051 }
1052 elsif($ent =~ /^0\d+$/) {
1053 # octal
1054 $val = oct($ent);
1055 }
1056 elsif($ent =~ /^\d+$/) {
e2c3adef 1057 # numeric entity
66aff6dd 1058 $val = $ent;
1059 }
1060 if(defined $val) {
1061 if($val>0 && $val<256) {
1062 $text .= chr($val);
1063 }
1064 else {
1065 $self->poderror({ -line => $line, -file => $file,
1066 -severity => 'ERROR',
1067 -msg => "Entity number out of range " . $_->raw_text()});
1068 }
e2c3adef 1069 }
1070 elsif($ENTITIES{$ent}) {
1071 # known ISO entity
1072 $text .= $ENTITIES{$ent};
1073 }
1074 else {
1075 $self->poderror({ -line => $line, -file => $file,
1076 -severity => 'WARNING',
66aff6dd 1077 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef 1078 $text .= "E<$ent>";
1079 }
e3237417 1080 }
1081 elsif($cmd eq 'L') {
1082 # try to parse the hyperlink
1083 my $link = Pod::Hyperlink->new($contents->raw_text());
1084 unless(defined $link) {
1085 $self->poderror({ -line => $line, -file => $file,
1086 -severity => 'ERROR',
e2c3adef 1087 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417 1088 next;
1089 }
1090 $link->line($line); # remember line
1091 if($self->{-warnings}) {
1092 foreach my $w ($link->warning()) {
1093 $self->poderror({ -line => $line, -file => $file,
1094 -severity => 'WARNING',
1095 -msg => $w });
1096 }
1097 }
1098 # check the link text
1099 $text .= $self->_check_ptree($self->parse_text($link->text(),
1100 $line), $line, $file, "$nestlist$cmd");
48f30392 1101 # remember link
1102 $self->hyperlink([$line,$link]);
e3237417 1103 }
1104 elsif($cmd =~ /[BCFIS]/) {
1105 # add the guts
1106 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1107 }
66aff6dd 1108 elsif($cmd eq 'Z') {
1109 if(length($contents->raw_text())) {
1110 $self->poderror({ -line => $line, -file => $file,
1111 -severity => 'ERROR',
1112 -msg => "Nonempty Z<>"});
1113 }
1114 }
48f30392 1115 elsif($cmd eq 'X') {
1116 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1117 if($idx =~ /^\s*$/s) {
1118 $self->poderror({ -line => $line, -file => $file,
1119 -severity => 'ERROR',
1120 -msg => "Empty X<>"});
1121 }
1122 else {
1123 # remember this node
1124 $self->idx($idx);
1125 }
1126 }
1127 else {
1128 # not reached
1129 die "internal error";
e3237417 1130 }
1131 }
1132 $text;
1133}
1134
e2c3adef 1135# process a block of verbatim text
360aca43 1136sub verbatim {
66aff6dd 1137 ## Nothing particular to check
e2c3adef 1138 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1139
1140 $self->_preproc_par($paragraph);
1141
e2c3adef 1142 if($self->{_current_head1} eq 'NAME') {
1143 my ($file, $line) = $pod_para->file_line;
1144 $self->poderror({ -line => $line, -file => $file,
1145 -severity => 'WARNING',
1146 -msg => 'Verbatim paragraph in NAME section' });
1147 }
360aca43 1148}
1149
e2c3adef 1150# process a block of regular text
360aca43 1151sub textblock {
1152 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1153 my ($file, $line) = $pod_para->file_line;
e3237417 1154
66aff6dd 1155 $self->_preproc_par($paragraph);
1156
e2c3adef 1157 # skip this paragraph if in a =begin block
1158 unless($self->{_have_begin}) {
1159 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1160 if($self->{_current_head1} eq 'NAME') {
1161 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1162 # this is the canonical name
1163 $self->{-name} = $1 unless(defined $self->{-name});
1164 }
e3237417 1165 }
1166 }
e3237417 1167}
1168
66aff6dd 1169sub _preproc_par
1170{
1171 my $self = shift;
1172 $_[0] =~ s/[\s\n]+$//;
1173 if($_[0]) {
1174 $self->{_commands_in_head}++;
1175 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1176 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1177 $self->{_list_stack}->[0]->{_has_par} = 1;
1178 }
1179 }
1180}
1181
e3237417 11821;
66aff6dd 1183
48f30392 1184__END__
1185
1186=head1 AUTHOR
1187
1188Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1189Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
1190
1191Based on code for B<Pod::Text::pod2text()> written by
1192Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1193
1194=cut
1195