1 #############################################################################
2 # Pod/Checker.pm -- check pod documents for syntax errors
4 # Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved.
5 # This file is part of "PodParser". PodParser is free software;
6 # you can redistribute it and/or modify it under the same terms
8 #############################################################################
12 use vars qw($VERSION);
13 $VERSION = 1.090; ## Current version of this package
14 require 5.004; ## requires this Perl version or later
18 Pod::Checker, podchecker() - check pod documents for syntax errors
24 $syntax_okay = podchecker($filepath, $outputpath, %options);
26 =head1 OPTIONS/ARGUMENTS
28 C<$filepath> is the input POD to read and C<$outputpath> is
29 where to write POD syntax error messages. Either argument may be a scalar
30 indcating a file-path, or else a reference to an open filehandle.
31 If unspecified, the input-file it defaults to C<\*STDIN>, and
32 the output-file defaults to C<\*STDERR>.
38 =item B<-warnings> =E<gt> I<val>
40 Turn warnings on/off. See L<"Warnings">.
46 B<podchecker> will perform syntax checking of Perl5 POD format documentation.
48 I<NOTE THAT THIS MODULE IS CURRENTLY IN THE INITIAL DEVELOPMENT STAGE!>
49 As of this writing, all it does is check for unknown '=xxxx' commands,
50 unknown 'X<...>' interior-sequences, and unterminated interior sequences.
52 It is hoped that curious/ambitious user will help flesh out and add the
53 additional features they wish to see in B<Pod::Checker> and B<podchecker>.
55 The following additional checks are preformed:
61 Check for proper balancing of C<=begin> and C<=end>.
65 Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
69 Check for same nested interior-sequences (e.g. C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
73 Check for malformed entities.
77 Check for correct syntax of hyperlinks C<LE<lt>E<gt>>. See L<perlpod> for
82 Check for unresolved document-internal links.
88 The following warnings are printed. These may not necessarily cause trouble,
89 but indicate mediocre style.
95 Spurious characters after C<=back> and C<=end>.
99 Unescaped C<E<lt>> and C<E<gt>> in the text.
103 Missing arguments for C<=begin> and C<=over>.
107 Empty C<=over> / C<=back> list.
111 Hyperlinks: leading/trailing whitespace, brackets C<()> in the page name.
121 B<podchecker> returns the number of POD syntax errors found or -1 if
122 there were no POD commands at all found in the file.
130 Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
131 Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
133 Based on code for B<Pod::Text::pod2text()> written by
134 Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
138 #############################################################################
146 use vars qw(@ISA @EXPORT);
147 @ISA = qw(Pod::Parser);
148 @EXPORT = qw(&podchecker);
150 use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
152 my %VALID_COMMANDS = (
165 my %VALID_SEQUENCES = (
177 ##---------------------------------------------------------------------------
179 ##---------------------------------
180 ## Function definitions begin here
181 ##---------------------------------
183 sub podchecker( $ ; $ % ) {
184 my ($infile, $outfile, %options) = @_;
189 $outfile ||= \*STDERR;
191 ## Now create a pod checker
192 my $checker = new Pod::Checker(%options);
194 ## Now check the pod document for errors
195 $checker->parse_from_file($infile, $outfile);
197 ## Return the number of errors found
198 return $checker->num_errors();
201 ##---------------------------------------------------------------------------
203 ##-------------------------------
204 ## Method definitions begin here
205 ##-------------------------------
209 my $class = ref($this) || $this;
211 my $self = {%params};
219 ## Initialize number of errors, and setup an error function to
220 ## increment this number and then print to the designated output.
221 $self->{_NUM_ERRORS} = 0;
222 $self->errorsub('poderror');
223 $self->{_commands} = 0; # total number of POD commands encountered
224 $self->{_list_stack} = []; # stack for nested lists
225 $self->{_have_begin} = ''; # stores =begin
226 $self->{_links} = []; # stack for internal hyperlinks
227 $self->{_nodes} = []; # stack for =head/=item nodes
228 $self->{-warnings} = 1 unless(defined $self->{-warnings});
231 ## Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
234 my %opts = (ref $_[0]) ? %{shift()} : ();
237 chomp( my $msg = ($opts{-msg} || "")."@_" );
238 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
239 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
240 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
242 ## Increment error count and print message "
243 ++($self->{_NUM_ERRORS})
244 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
245 my $out_fh = $self->output_handle();
246 print $out_fh ($severity, $msg, $line, $file, "\n");
250 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
253 ## overrides for Pod::Parser
256 ## Do some final checks and
257 ## print the number of errors found
259 my $infile = $self->input_file();
260 my $out_fh = $self->output_handle();
262 if(@{$self->{_list_stack}}) {
263 # _TODO_ display, but don't count them for now
265 while($list = shift(@{$self->{_list_stack}})) {
266 $self->poderror({ -line => 'EOF', -file => $infile,
267 -severity => 'ERROR', -msg => "=over on line " .
268 $list->start() . " without closing =back" }); #"
272 # check validity of document internal hyperlinks
273 # first build the node names from the paragraph text
275 foreach($self->node()) {
276 #print "Have node: +$_+\n";
279 # we have more than one word. Use the first as a node, too.
280 # This is used heavily in perlfunc.pod
281 $nodes{$1} ||= 2; # derived node
284 foreach($self->hyperlink()) {
285 #print "Seek node: +$_+\n";
287 s/^(\d+):// && ($line = $1);
288 if($_ && !$nodes{$_}) {
289 $self->poderror({ -line => $line, -file => $infile,
290 -severity => 'ERROR',
291 -msg => "unresolved internal link `$_'"});
295 ## Print the number of errors found
296 my $num_errors = $self->num_errors();
297 if ($num_errors > 0) {
298 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
299 ($num_errors == 1) ? "error" : "errors");
301 elsif($self->{_commands} == 0) {
302 print $out_fh "$infile does not contain any pod commands.\n";
303 $self->num_errors(-1);
306 print $out_fh "$infile pod syntax OK.\n";
311 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
312 my ($file, $line) = $pod_para->file_line;
313 ## Check the command syntax
314 my $arg; # this will hold the command argument
315 if (! $VALID_COMMANDS{$cmd}) {
316 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
317 -msg => "Unknown command \"$cmd\"" });
320 $self->{_commands}++; # found a valid command
321 ## check syntax of particular command
324 unshift(@{$self->{_list_stack}},
326 -indent => $paragraph,
330 elsif($cmd eq 'item') {
331 unless(@{$self->{_list_stack}}) {
332 $self->poderror({ -line => $line, -file => $file,
333 -severity => 'ERROR',
334 -msg => "=item without previous =over" });
338 $arg = $self->_interpolate_and_check($paragraph, $line, $file);
339 unless($arg && $arg =~ /(\S+)/) {
340 $self->poderror({ -line => $line, -file => $file,
341 -severity => 'WARNING',
342 -msg => "No argument for =item" });
345 $self->{_list_stack}[0]->item($arg || '');
347 $self->node($arg) if($arg);
350 elsif($cmd eq 'back') {
351 # check if we have an open list
352 unless(@{$self->{_list_stack}}) {
353 $self->poderror({ -line => $line, -file => $file,
354 -severity => 'ERROR',
355 -msg => "=back without previous =over" });
358 # check for spurious characters
359 $arg = $self->_interpolate_and_check($paragraph, $line,$file);
360 if($arg && $arg =~ /\S/) {
361 $self->poderror({ -line => $line, -file => $file,
362 -severity => 'WARNING',
363 -msg => "Spurious character(s) after =back" });
366 my $list = shift @{$self->{_list_stack}};
367 # check for empty lists
368 if(!$list->item() && $self->{-warnings}) {
369 $self->poderror({ -line => $line, -file => $file,
370 -severity => 'WARNING',
371 -msg => "No items in =over (at line " .
372 $list->start() . ") / =back list"}); #"
376 elsif($cmd =~ /^head/) {
377 # check if there is an open list
378 if(@{$self->{_list_stack}}) {
380 while($list = shift(@{$self->{_list_stack}})) {
381 $self->poderror({ -line => $line, -file => $file,
382 -severity => 'ERROR',
383 -msg => "unclosed =over (line ". $list->start() .
388 $arg = $self->_interpolate_and_check($paragraph, $line,$file);
389 $self->node($arg) if($arg);
391 elsif($cmd eq 'begin') {
392 if($self->{_have_begin}) {
393 # already have a begin
394 $self->poderror({ -line => $line, -file => $file,
395 -severity => 'ERROR',
396 -msg => "Nested =begin's (first at line " .
397 $self->{_have_begin} . ")"});
401 $arg = $self->_interpolate_and_check($paragraph, $line,$file);
402 unless($arg && $arg =~ /(\S+)/) {
403 $self->poderror({ -line => $line, -file => $file,
404 -severity => 'WARNING',
405 -msg => "No argument for =begin"});
407 # remember the =begin
408 $self->{_have_begin} = "$line:$1";
411 elsif($cmd eq 'end') {
412 if($self->{_have_begin}) {
413 # close the existing =begin
414 $self->{_have_begin} = '';
415 # check for spurious characters
416 $arg = $self->_interpolate_and_check($paragraph, $line,$file);
417 if($arg && $arg =~ /\S/) {
418 $self->poderror({ -line => $line, -file => $file,
419 -severity => 'WARNING',
420 -msg => "Spurious character(s) after =end" });
424 # don't have a matching =begin
425 $self->poderror({ -line => $line, -file => $file,
426 -severity => 'WARNING',
427 -msg => "=end without =begin" });
431 ## Check the interior sequences in the command-text
432 $self->_interpolate_and_check($paragraph, $line,$file)
433 unless(defined $arg);
436 sub _interpolate_and_check {
437 my ($self, $paragraph, $line, $file) = @_;
438 ## Check the interior sequences in the command-text
439 # and return the text
441 $self->parse_text($paragraph,$line), $line, $file, '');
445 my ($self,$ptree,$line,$file,$nestlist) = @_;
448 # process each node in the parse tree
453 # count the unescaped angle brackets
455 if($count = $i =~ s/[<>]/$self->expand_unescaped_bracket($&)/ge) {
456 $self->poderror({ -line => $line, -file => $file,
457 -severity => 'WARNING',
458 -msg => "$count unescaped <>" });
463 # have an interior sequence
464 my $cmd = $_->cmd_name();
465 my $contents = $_->parse_tree();
466 ($file,$line) = $_->file_line();
467 # check for valid tag
468 if (! $VALID_SEQUENCES{$cmd}) {
469 $self->poderror({ -line => $line, -file => $file,
470 -severity => 'ERROR',
471 -msg => qq(Unknown interior-sequence "$cmd")});
473 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
476 if($nestlist =~ /$cmd/) {
477 $self->poderror({ -line => $line, -file => $file,
478 -severity => 'ERROR',
479 -msg => "nested commands $cmd<...$cmd<...>...>"});
480 # _TODO_ should we add the contents anyway?
481 # expand it anyway, see below
485 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
486 $self->poderror({ -line => $line, -file => $file,
487 -severity => 'ERROR',
488 -msg => "garbled entity " . $_->raw_text()});
491 $text .= $self->expand_entity($$contents[0]);
494 # try to parse the hyperlink
495 my $link = Pod::Hyperlink->new($contents->raw_text());
496 unless(defined $link) {
497 $self->poderror({ -line => $line, -file => $file,
498 -severity => 'ERROR',
499 -msg => "malformed link L<>: $@"});
502 $link->line($line); # remember line
503 if($self->{-warnings}) {
504 foreach my $w ($link->warning()) {
505 $self->poderror({ -line => $line, -file => $file,
506 -severity => 'WARNING',
510 # check the link text
511 $text .= $self->_check_ptree($self->parse_text($link->text(),
512 $line), $line, $file, "$nestlist$cmd");
514 $node = $self->_check_ptree($self->parse_text($link->node(),
515 $line), $line, $file, "$nestlist$cmd")
517 # store internal link
518 # _TODO_ what if there is a link to the page itself by the name,
519 # e.g. Tk::Pod : L<Tk::Pod/"DESCRIPTION">
520 $self->hyperlink("$line:$node") if($node && !$link->page());
522 elsif($cmd =~ /[BCFIS]/) {
524 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
527 # check, but add nothing to $text (X<>, Z<>)
528 $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
534 # default method - just return it
535 sub expand_unescaped_bracket {
536 my ($self,$bracket) = @_;
542 my ($self,$entity) = @_;
546 # _TODO_ overloadable methods for BC..Z<...> expansion
550 ## my ($self, $paragraph, $line_num, $pod_para) = @_;
554 my ($self, $paragraph, $line_num, $pod_para) = @_;
555 my ($file, $line) = $pod_para->file_line;
556 $self->_interpolate_and_check($paragraph, $line,$file);
559 # set/return nodes of the current POD
561 my ($self,$text) = @_;
563 $text =~ s/[\s\n]+$//; # strip trailing whitespace
565 push(@{$self->{_nodes}}, $text);
571 # set/return hyperlinks of the current POD
575 push(@{$self->{_links}}, $_[0]);
581 #-----------------------------------------------------------------------------
584 # class to hold POD list info (=over, =item, =back)
585 #-----------------------------------------------------------------------------
593 my $class = ref($this) || $this;
595 my $self = {%params};
603 $self->{-file} ||= 'unknown';
604 $self->{-start} ||= 'unknown';
605 $self->{-indent} ||= 4; # perlpod: "should be the default"
606 $self->{_items} = [];
609 # The POD file name the list appears in
611 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
614 # The line in the file the node appears
616 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
621 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
624 # The individual =items of this list
626 my ($self,$item) = @_;
628 push(@{$self->{_items}}, $item);
632 return @{$self->{_items}};
636 #-----------------------------------------------------------------------------
639 # class to hold hyperlinks (L<>)
640 #-----------------------------------------------------------------------------
642 package Pod::Hyperlink;
646 Pod::Hyperlink - class for manipulation of POD hyperlinks
650 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
654 The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
655 C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
656 different parts of a POD hyperlink.
664 The B<new()> method can either be passed a set of key/value pairs or a single
665 scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
666 of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
667 failure, the error message is stored in C<$@>.
671 This method can be used to (re)parse a (new) hyperlink. The result is stored
672 in the current object.
674 =item markup($on,$off,$pageon,$pageoff)
676 The result of this method is a string the represents the textual value of the
677 link, but with included arbitrary markers that highlight the active portion
678 of the link. This will mainly be used by POD translators and saves the
679 effort of determining which words have to be highlighted. Examples: Depending
680 on the type of link, the following text will be returned, the C<*> represent
681 the places where the section/item specific on/off markers will be placed
682 (link to a specific node) and C<+> for the pageon/pageoff markers (link to the
686 the *$|* entry in the +perlvar+ manpage
687 the section on *OPTIONS* in the +perldoc+ manpage
688 the section on *DESCRIPTION* elsewhere in this document
690 This method is read-only.
694 This method returns the textual representation of the hyperlink as above,
695 but without markers (read only).
699 After parsing, this method returns any warnings ecountered during the
704 This method sets or returns the POD page this link points to.
708 As above, but the destination node text of the link.
712 The node type, either C<section> or C<item>.
716 Sets or returns an alternative text specified in the link.
720 Just simple slots for storing information about the line and the file
721 the link was incountered in. Has to be filled in manually.
727 Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
728 a lot of things from L<pod2man> and L<pod2roff>.
736 my $class = ref($this) || $this;
742 # called with a list of parameters
746 # called with L<> contents
747 return undef unless($self->parse($_[0]));
755 $self->{-line} ||= 'undef';
756 $self->{-file} ||= 'undef';
757 $self->{-page} ||= '';
758 $self->{-node} ||= '';
759 $self->{-alttext} ||= '';
760 $self->{-type} ||= 'undef';
761 $self->{_warnings} = [];
762 $self->_construct_text();
768 # syntax check the link and extract destination
769 my ($alttext,$page,$section,$item) = ('','','','');
771 # strip leading/trailing whitespace
773 $self->warning("ignoring leading whitespace in link");
776 $self->warning("ignoring trailing whitespace in link");
779 # collapse newlines with whitespace
782 # extract alternative text
783 if(s!^([^|/"\n]*)[|]!!) {
787 if(s!^([^|/"\s]*)(?=/|$)!!) {
791 if(s!^/?"([^"\n]+)"$!!) { # e.g. L</"blah blah">
799 if(s!^([^|"\s\n/][^"\n/]*)$!!) { # e.g. L<lah di dah>
802 # now there should be nothing left
804 _invalid_link("garbled entry (spurious characters `$_')");
807 elsif(!(length($page) || length($section) || length($item))) {
808 _invalid_link("empty link");
811 elsif($alttext =~ /[<>]/) {
812 _invalid_link("alternative text contains < or >");
815 else { # no errors so far
816 if($page =~ /[(]\d\w*[)]$/) {
817 $self->warning("brackets in `$page'");
818 $page = $`; # strip that extension
820 if($page =~ /^(\s*)(\S+)(\s*)/ && (length($1) || length($3))) {
821 $self->warning("whitespace in `$page'");
822 $page = $2; # strip that extension
826 $self->node($section || $item); # _TODO_ do not distinguish for now
827 $self->alttext($alttext);
828 $self->type($item ? 'item' : 'section');
832 sub _construct_text {
834 my $alttext = $self->alttext();
835 my $type = $self->type();
836 my $section = $self->node();
837 my $page = $self->page();
839 $alttext ? $alttext : (
841 $type eq 'item' ? 'the ' . $section . ' entry' :
842 'the section on ' . $section ) .
843 ($page ? ($section ? ' in ':''). 'the ' . $page . ' manpage' :
844 'elsewhere in this document');
845 # for being marked up later
847 $alttext ? '<SECTON>' . $alttext . '<SECTOFF>' : (
849 $type eq 'item' ? 'the <SECTON>' . $section . '<SECTOFF> entry' :
850 'the section on <SECTON>' . $section . '<SECTOFF>' ) .
851 ($page ? ($section ? ' in ':'') . 'the <PAGEON>' .
852 $page . '<PAGEOFF> manpage' :
853 ' elsewhere in this document');
858 my ($self,$on,$off,$pageon,$pageoff) = @_;
863 $_[0]->_construct_text;
864 my $str = $self->{_markup};
865 $str =~ s/<SECTON>/$on/;
866 $str =~ s/<SECTOFF>/$off/;
867 $str =~ s/<PAGEON>/$pageon/;
868 $str =~ s/<PAGEOFF>/$pageoff/;
872 # The complete link's text
874 $_[0]->_construct_text();
878 # The POD page the link appears on
882 push(@{$self->{_warnings}}, @_);
885 return @{$self->{_warnings}};
888 # The POD file name the link appears in
890 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
893 # The line in the file the link appears
895 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
898 # The POD page the link appears on
900 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
903 # The link destination
905 return (@_ > 1) ? ($_[0]->{-node} = $_[1]) : $_[0]->{-node};
908 # Potential alternative text
910 return (@_ > 1) ? ($_[0]->{-alttext} = $_[1]) : $_[0]->{-alttext};
915 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
921 #eval { die "$msg\n" };
923 $@ = $msg; # this seems to work, too!