X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FPod%2FChecker.pm;h=e899f59031d0ca6e0126ca229458d30188cce8a7;hb=4e592f10f45677b2800948f688180c0f1ee587b4;hp=c661c7527e7a93607b7a715a7889565b6aabdf7e;hpb=e2c3adefd8c31a020997b83179ab5ab417e7e4ac;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Pod/Checker.pm b/lib/Pod/Checker.pm index c661c75..e899f59 100644 --- a/lib/Pod/Checker.pm +++ b/lib/Pod/Checker.pm @@ -1,7 +1,7 @@ ############################################################################# # Pod/Checker.pm -- check pod documents for syntax errors # -# Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved. +# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -10,8 +10,8 @@ package Pod::Checker; use vars qw($VERSION); -$VERSION = 1.096; ## Current version of this package -require 5.004; ## requires this Perl version or later +$VERSION = 1.4301; ## Current version of this package +require 5.005; ## requires this Perl version or later use Pod::ParseUtils; ## for hyperlinks and lists @@ -26,6 +26,7 @@ Pod::Checker, podchecker() - check pod documents for syntax errors $syntax_okay = podchecker($filepath, $outputpath, %options); my $checker = new Pod::Checker %options; + $checker->parse_from_file($filepath, \*STDERR); =head1 OPTIONS/ARGUMENTS @@ -43,7 +44,8 @@ This function can take a hash of options: =item B<-warnings> =E I -Turn warnings on/off. See L<"Warnings">. +Turn warnings on/off. I is usually 1 for on, but higher values +trigger additional warnings. See L<"Warnings">. =back @@ -51,19 +53,17 @@ Turn warnings on/off. See L<"Warnings">. B will perform syntax checking of Perl5 POD format documentation. -I +Curious/ambitious users are welcome to propose additional features they wish +to see in B and B and verify that the checks are +consistent with L. -It is hoped that curious/ambitious user will help flesh out and add the -additional features they wish to see in B and B -and verify that the checks are consistent with L. - -The following checks are preformed: +The following checks are currently performed: =over 4 =item * -Unknown '=xxxx' commands, unknown 'X<...>' interior-sequences, +Unknown '=xxxx' commands, unknown 'XE...E' interior-sequences, and unterminated interior sequences. =item * @@ -82,7 +82,7 @@ C...LE...E...E>). =item * -Check for malformed or nonexisting entities C...E>. +Check for malformed or non-existing entities C...E>. =item * @@ -97,20 +97,17 @@ to something else. =back -=head2 Additional Features - -While checking, this module collects document properties, e.g. the nodes -for hyperlinks (C<=headX>, C<=item>). POD translators can use this feature -to syntax-check and get the nodes in a first pass before actually starting -to convert. This is expensive in terms of execution time, but allows for -very robust conversions. - =head1 DIAGNOSTICS =head2 Errors =over 4 +=item * empty =headn + +A heading (C<=head1> or C<=head2>) without any text? That ain't no +heading! + =item * =over on line I without closing =back The C<=over> command does not have a corresponding C<=back> before the @@ -134,7 +131,7 @@ A standalone C<=end> command was found. =item * Nested =begin's -There were at least two concecutive C<=begin> commands without +There were at least two consecutive C<=begin> commands without the corresponding C<=end>. Only one C<=begin> may be active at a time. @@ -145,14 +142,14 @@ There is no specification of the formatter after the C<=for> command. =item * unresolved internal link I The given link to I does not have a matching node in the current -POD. This also happend when a single word node name is not enclosed in +POD. This also happened when a single word node name is not enclosed in C<"">. =item * Unknown command "I" An invalid POD command has been found. Valid are C<=head1>, C<=head2>, -C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, C<=for>, C<=pod>, -C<=cut> +C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, +C<=for>, C<=pod>, C<=cut> =item * Unknown interior-sequence "I" @@ -168,13 +165,33 @@ does not make sense. =item * garbled entity I -The I found cannot be interpreted as an character entity. +The I found cannot be interpreted as a character entity. + +=item * Entity number out of range + +An entity specified by number (dec, hex, oct) is out of range (1-255). =item * malformed link LEE The link found cannot be parsed because it does not conform to the syntax described in L. +=item * nonempty ZEE + +The CE> sequence is supposed to be empty. + +=item * empty XEE + +The index entry specified contains nothing but whitespace. + +=item * Spurious text after =pod / =cut + +The commands C<=pod> and C<=cut> do not take any arguments. + +=item * Spurious character(s) after =back + +The C<=back> command does not take any arguments. + =back =head2 Warnings @@ -183,29 +200,59 @@ These may not necessarily cause trouble, but indicate mediocre style. =over 4 -=item * No numeric argument for =over +=item * multiple occurrence of link target I -The C<=over> command is supposed to have a numeric argument (the -indentation). +The POD file has some C<=item> and/or C<=head> commands that have +the same text. Potential hyperlinks to such a text cannot be unique then. +This warning is printed only with warning level greater than one. -=item * Spurious character(s) after =back +=item * line containing nothing but whitespace in paragraph -The C<=back> command does not take any arguments. +There is some whitespace on a seemingly empty line. POD is very sensitive +to such things, so this is flagged. B users switch on the B +option to avoid this problem. + +=begin _disabled_ + +=item * file does not start with =head + +The file starts with a different POD directive than head. +This is most probably something you do not want. + +=end _disabled_ + +=item * previous =item has no contents + +There is a list C<=item> right above the flagged line that has no +text contents. You probably want to delete empty items. + +=item * preceding non-item paragraph(s) + +A list introduced by C<=over> starts with a text or verbatim paragraph, +but continues with C<=item>s. Move the non-item paragraph out of the +C<=over>/C<=back> block. + +=item * =item type mismatch (I vs. I) + +A list started with e.g. a bullet-like C<=item> and continued with a +numbered one. This is obviously inconsistent. For most translators the +type of the I C<=item> determines the type of the list. =item * I unescaped CE> in paragraph Angle brackets not written as CltE> and CgtE> can potentially cause errors as they could be misinterpreted as -markup commands. +markup commands. This is only printed when the -warnings level is +greater than 1. -=item * Non-standard entity +=item * Unknown entity A character entity was found that does not belong to the standard -ISO set. +ISO set or the POD specials C and C. =item * No items in =over -The list does not contain any items. +The list opened with C<=over> does not contain any items. =item * No argument for =item @@ -214,17 +261,53 @@ by C<*> to indicate an unordered list, by a number (optionally followed by a dot) to indicate an ordered (numbered) list or simple text for a definition list. +=item * empty section in previous paragraph + +The previous section (introduced by a C<=head> command) does not contain +any text. This usually indicates that something is missing. Note: A +C<=head1> followed immediately by C<=head2> does not trigger this warning. + =item * Verbatim paragraph in NAME section The NAME section (C<=head1 NAME>) should consist of a single paragraph with the script/module name, followed by a dash `-' and a very short description of what the thing is good for. -=item * Hyperlinks +=item * =headI without preceding higher level -There are some warnings wrt. hyperlinks: -Leading/trailing whitespace, newlines in hyperlinks, -brackets C<()>. +For example if there is a C<=head2> in the POD file prior to a +C<=head1>. + +=back + +=head2 Hyperlinks + +There are some warnings with respect to malformed hyperlinks: + +=over 4 + +=item * ignoring leading/trailing whitespace in link + +There is whitespace at the beginning or the end of the contents of +LE...E. + +=item * (section) in '$page' deprecated + +There is a section detected in the page name of LE...E, e.g. +Cpasswd(2)E>. POD hyperlinks may point to POD documents only. +Please write Cpasswd(2)E> instead. Some formatters are able +to expand this to appropriate code. For links to (builtin) functions, +please say Cperlfunc/mkdirE>, without (). + +=item * alternative text/node '%s' contains non-escaped | or / + +The characters C<|> and C are special in the LE...E context. +Although the hyperlink parser does its best to determine which "/" is +text and which is a delimiter in case of doubt, one ought to escape +these literal characters like this: + + / E + | E =back @@ -235,15 +318,22 @@ there were no POD commands at all found in the file. =head1 EXAMPLES -I<[T.B.D.]> +See L -=head1 AUTHOR +=head1 INTERFACE -Brad Appleton Ebradapp@enteract.comE (initial version), -Marek Rouchal Emarek@saftsack.fs.uni-bayreuth.deE - -Based on code for B written by -Tom Christiansen Etchrist@mox.perl.comE +While checking, this module collects document properties, e.g. the nodes +for hyperlinks (C<=headX>, C<=item>) and index entries (CE>). +POD translators can use this feature to syntax-check and get the nodes in +a first pass before actually starting to convert. This is expensive in terms +of execution time, but allows for very robust conversions. + +Since PodParser-1.24 the B module uses only the B +method to print errors and warnings. The summary output (e.g. +"Pod syntax OK") has been dropped from the module and has been included in +B (the script). This allows users of B to +control completely the output behavior. Users of B (the script) +get the well-known behavior. =cut @@ -266,12 +356,15 @@ my %VALID_COMMANDS = ( 'cut' => 1, 'head1' => 1, 'head2' => 1, + 'head3' => 1, + 'head4' => 1, 'over' => 1, 'back' => 1, 'item' => 1, 'for' => 1, 'begin' => 1, 'end' => 1, + 'encoding' => '1', ); my %VALID_SEQUENCES = ( @@ -395,6 +488,10 @@ my %ENTITIES = ( iquest => '¿', 'times' => '×', # times is a keyword in perl divide => '÷', + +# some POD special entities + verbar => '|', + sol => '/' ); ##--------------------------------------------------------------------------- @@ -416,7 +513,7 @@ sub podchecker( $ ; $ % ) { ## Now check the pod document for errors $checker->parse_from_file($infile, $outfile); - + ## Return the number of errors found return $checker->num_errors(); } @@ -427,32 +524,86 @@ sub podchecker( $ ; $ % ) { ## Method definitions begin here ##------------------------------- -sub new { - my $this = shift; - my $class = ref($this) || $this; - my %params = @_; - my $self = {%params}; - bless $self, $class; - $self->initialize(); - return $self; -} +################################## + +=over 4 + +=item Cnew( %options )> + +Return a reference to a new Pod::Checker object that inherits from +Pod::Parser and is used for calling the required methods later. The +following options are recognized: + +C<-warnings =E num> + Print warnings if C is true. The higher the value of C, +the more warnings are printed. Currently there are only levels 1 and 2. + +C<-quiet =E num> + If C is true, do not print any errors/warnings. This is useful +when Pod::Checker is used to munge POD code into plain text from within +POD formatters. + +=cut + +## sub new { +## my $this = shift; +## my $class = ref($this) || $this; +## my %params = @_; +## my $self = {%params}; +## bless $self, $class; +## $self->initialize(); +## return $self; +## } sub initialize { my $self = shift; ## Initialize number of errors, and setup an error function to ## increment this number and then print to the designated output. $self->{_NUM_ERRORS} = 0; - $self->errorsub('poderror'); # set the error handling subroutine + $self->{_NUM_WARNINGS} = 0; + $self->{-quiet} ||= 0; + # set the error handling subroutine + $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror'); $self->{_commands} = 0; # total number of POD commands encountered $self->{_list_stack} = []; # stack for nested lists $self->{_have_begin} = ''; # stores =begin $self->{_links} = []; # stack for internal hyperlinks $self->{_nodes} = []; # stack for =head/=item nodes + $self->{_index} = []; # text in X<> # print warnings? $self->{-warnings} = 1 unless(defined $self->{-warnings}); $self->{_current_head1} = ''; # the current =head1 block + $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings}); } +################################## + +=item C<$checker-Epoderror( @args )> + +=item C<$checker-Epoderror( {%opts}, @args )> + +Internal method for printing errors and warnings. If no options are +given, simply prints "@_". The following options are recognized and used +to form the output: + + -msg + +A message to print prior to C<@args>. + + -line + +The line number the error occurred in. + + -file + +The file (name) the error occurred in. + + -severity + +The error level, should be 'WARNING' or 'ERROR'. + +=cut + # Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) sub poderror { my $self = shift; @@ -462,39 +613,124 @@ sub poderror { chomp( my $msg = ($opts{-msg} || "")."@_" ); my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ""; my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ""; + unless (exists $opts{-severity}) { + ## See if can find severity in message prefix + $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); + } my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ""; ## Increment error count and print message " ++($self->{_NUM_ERRORS}) if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); - my $out_fh = $self->output_handle(); - print $out_fh ($severity, $msg, $line, $file, "\n") - if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); + ++($self->{_NUM_WARNINGS}) + if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING')); + unless($self->{-quiet}) { + my $out_fh = $self->output_handle() || \*STDERR; + print $out_fh ($severity, $msg, $line, $file, "\n") + if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); + } } -# set/retrieve the number of errors found +################################## + +=item C<$checker-Enum_errors()> + +Set (if argument specified) and retrieve the number of errors found. + +=cut + sub num_errors { return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; } -# set and/or retrieve canonical name of POD +################################## + +=item C<$checker-Enum_warnings()> + +Set (if argument specified) and retrieve the number of warnings found. + +=cut + +sub num_warnings { + return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS}; +} + +################################## + +=item C<$checker-Ename()> + +Set (if argument specified) and retrieve the canonical name of POD as +found in the C<=head1 NAME> section. + +=cut + sub name { return (@_ > 1 && $_[1]) ? ($_[0]->{-name} = $_[1]) : $_[0]->{-name}; } -# set/return nodes of the current POD +################################## + +=item C<$checker-Enode()> + +Add (if argument specified) and retrieve the nodes (as defined by C<=headX> +and C<=item>) of the current POD. The nodes are returned in the order of +their occurrence. They consist of plain text, each piece of whitespace is +collapsed to a single blank. + +=cut + sub node { my ($self,$text) = @_; if(defined $text) { - $text =~ s/[\s\n]+$//; # strip trailing whitespace - # add node + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! push(@{$self->{_nodes}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); return $text; } @{$self->{_nodes}}; } +################################## + +=item C<$checker-Eidx()> + +Add (if argument specified) and retrieve the index entries (as defined by +CE>) of the current POD. They consist of plain text, each piece +of whitespace is collapsed to a single blank. + +=cut + +# set/return index entries of current POD +sub idx { + my ($self,$text) = @_; + if(defined $text) { + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! + push(@{$self->{_index}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); + return $text; + } + @{$self->{_index}}; +} + +################################## + +=item C<$checker-Ehyperlink()> + +Add (if argument specified) and retrieve the hyperlinks (as defined by +CE>) of the current POD. They consist of a 2-item array: line +number and C object. + +=back + +=cut + # set/return hyperlinks of the current POD sub hyperlink { my $self = shift; @@ -508,56 +744,63 @@ sub hyperlink { ## overrides for Pod::Parser sub end_pod { - ## Do some final checks and - ## print the number of errors found - my $self = shift; - my $infile = $self->input_file(); - my $out_fh = $self->output_handle(); - - if(@{$self->{_list_stack}}) { - # _TODO_ display, but don't count them for now - my $list; - while($list = shift(@{$self->{_list_stack}})) { - $self->poderror({ -line => 'EOF', -file => $infile, - -severity => 'ERROR', -msg => "=over on line " . - $list->start() . " without closing =back" }); #" - } - } - - # check validity of document internal hyperlinks - # first build the node names from the paragraph text - my %nodes; - foreach($self->node()) { - $nodes{$_} = 1; - if(/^(\S+)\s+/) { - # we have more than one word. Use the first as a node, too. - # This is used heavily in perlfunc.pod - $nodes{$1} ||= 2; # derived node - } - } - foreach($self->hyperlink()) { - my $line = ''; - s/^(\d+):// && ($line = $1); - if($_ && !$nodes{$_}) { - $self->poderror({ -line => $line, -file => $infile, - -severity => 'ERROR', - -msg => "unresolved internal link `$_'"}); - } - } - - ## Print the number of errors found - my $num_errors = $self->num_errors(); - if ($num_errors > 0) { - printf $out_fh ("$infile has $num_errors pod syntax %s.\n", - ($num_errors == 1) ? "error" : "errors"); - } - elsif($self->{_commands} == 0) { - print $out_fh "$infile does not contain any pod commands.\n"; - $self->num_errors(-1); - } - else { - print $out_fh "$infile pod syntax OK.\n"; - } + ## Do some final checks and + ## print the number of errors found + my $self = shift; + my $infile = $self->input_file(); + + if(@{$self->{_list_stack}}) { + my $list; + while(($list = $self->_close_list('EOF',$infile)) && + $list->indent() ne 'auto') { + $self->poderror({ -line => 'EOF', -file => $infile, + -severity => 'ERROR', -msg => "=over on line " . + $list->start() . " without closing =back" }); #" + } + } + + # check validity of document internal hyperlinks + # first build the node names from the paragraph text + my %nodes; + foreach($self->node()) { + $nodes{$_} = 1; + if(/^(\S+)\s+\S/) { + # we have more than one word. Use the first as a node, too. + # This is used heavily in perlfunc.pod + $nodes{$1} ||= 2; # derived node + } + } + foreach($self->idx()) { + $nodes{$_} = 3; # index node + } + foreach($self->hyperlink()) { + my ($line,$link) = @$_; + # _TODO_ what if there is a link to the page itself by the name, + # e.g. in Tk::Pod : L + if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { + my $node = $self->_check_ptree($self->parse_text($link->node(), + $line), $line, $infile, 'L'); + if($node && !$nodes{$node}) { + $self->poderror({ -line => $line || '', -file => $infile, + -severity => 'ERROR', + -msg => "unresolved internal link '$node'"}); + } + } + } + + # check the internal nodes for uniqueness. This pertains to + # =headX, =item and X<...> + if($self->{-warnings} && $self->{-warnings}>1) { + foreach(grep($self->{_unique_nodes}->{$_} > 1, + keys %{$self->{_unique_nodes}})) { + $self->poderror({ -line => '-', -file => $infile, + -severity => 'WARNING', + -msg => "multiple occurrence of link target '$_'"}); + } + } + + # no POD found here + $self->num_errors(-1) if($self->{_commands} == 0); } # check a POD command directive @@ -568,27 +811,28 @@ sub command { my $arg; # this will hold the command argument if (! $VALID_COMMANDS{$cmd}) { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', - -msg => "Unknown command \"$cmd\"" }); + -msg => "Unknown command '$cmd'" }); } - else { - $self->{_commands}++; # found a valid command - ## check syntax of particular command + else { # found a valid command + $self->{_commands}++; # delete this line if below is enabled again + + ##### following check disabled due to strong request + #if(!$self->{_commands}++ && $cmd !~ /^head/) { + # $self->poderror({ -line => $line, -file => $file, + # -severity => 'WARNING', + # -msg => "file does not start with =head" }); + #} + + # check syntax of particular command if($cmd eq 'over') { # check for argument $arg = $self->interpolate_and_check($paragraph, $line,$file); my $indent = 4; # default if($arg && $arg =~ /^\s*(\d+)\s*$/) { $indent = $1; - } else { - $self->poderror({ -line => $line, -file => $file, - -severity => 'WARNING', - -msg => "No numeric argument for =over"}); } # start a new list - unshift(@{$self->{_list_stack}}, Pod::List->new( - -indent => $indent, - -start => $line, - -file => $file)); + $self->_open_list($indent,$line,$file); } elsif($cmd eq 'item') { # are we in a list? @@ -597,22 +841,60 @@ sub command { -severity => 'ERROR', -msg => "=item without previous =over" }); # auto-open in case we encounter many more - unshift(@{$self->{_list_stack}}, - Pod::List->new( - -indent => 'auto', - -start => $line, - -file => $file)); + $self->_open_list('auto',$line,$file); + } + my $list = $self->{_list_stack}->[0]; + # check whether the previous item had some contents + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "previous =item has no contents" }); + } + if($list->{_has_par}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "preceding non-item paragraph(s)" }); + delete $list->{_has_par}; } # check for argument $arg = $self->interpolate_and_check($paragraph, $line, $file); - unless($arg && $arg =~ /(\S+)/) { + if($arg && $arg =~ /(\S+)/) { + $arg =~ s/[\s\n]+$//; + my $type; + if($arg =~ /^[*]\s*(\S*.*)/) { + $type = 'bullet'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + elsif($arg =~ /^\d+\.?\s*(\S*)/) { + $type = 'number'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + else { + $type = 'definition'; + $self->{_list_item_contents} = 1; + } + my $first = $list->type(); + if($first && $first ne $type) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "=item type mismatch ('$first' vs. '$type')"}); + } + else { # first item + $list->type($type); + } + } + else { $self->poderror({ -line => $line, -file => $file, -severity => 'WARNING', -msg => "No argument for =item" }); $arg = ' '; # empty + $self->{_list_item_contents} = 0; } # add this item - $self->{_list_stack}[0]->item($arg); + $list->item($arg); # remember this node $self->node($arg); } @@ -628,11 +910,11 @@ sub command { $arg = $self->interpolate_and_check($paragraph, $line,$file); if($arg && $arg =~ /\S/) { $self->poderror({ -line => $line, -file => $file, - -severity => 'WARNING', + -severity => 'ERROR', -msg => "Spurious character(s) after =back" }); } # close list - my $list = shift @{$self->{_list_stack}}; + my $list = $self->_close_list($line,$file); # check for empty lists if(!$list->item() && $self->{-warnings}) { $self->poderror({ -line => $line, -file => $file, @@ -642,11 +924,30 @@ sub command { } } } - elsif($cmd =~ /^head/) { + elsif($cmd =~ /^head(\d+)/) { + my $hnum = $1; + $self->{"_have_head_$hnum"}++; # count head types + if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "=head$hnum without preceding higher level"}); + } + # check whether the previous =head section had some contents + if(defined $self->{_commands_in_head} && + $self->{_commands_in_head} == 0 && + defined $self->{_last_head} && + $self->{_last_head} >= $hnum) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "empty section in previous paragraph"}); + } + $self->{_commands_in_head} = -1; + $self->{_last_head} = $hnum; # check if there is an open list if(@{$self->{_list_stack}}) { my $list; - while($list = shift(@{$self->{_list_stack}})) { + while(($list = $self->_close_list($line,$file)) && + $list->indent() ne 'auto') { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', -msg => "=over on line ". $list->start() . @@ -655,9 +956,14 @@ sub command { } # remember this node $arg = $self->interpolate_and_check($paragraph, $line,$file); - $self->node($arg) if($arg); + $arg =~ s/[\s\n]+$//s; + $self->node($arg); + unless(length($arg)) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "empty =$cmd"}); + } if($cmd eq 'head1') { - $arg =~ s/[\s\n]+$//; $self->{_current_head1} = $arg; } else { $self->{_current_head1} = ''; @@ -711,12 +1017,48 @@ sub command { } $arg = ''; # do not expand paragraph below } + elsif($cmd =~ /^(pod|cut)$/) { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + if($arg && $arg =~ /(\S+)/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Spurious text after =$cmd"}); + } + } + $self->{_commands_in_head}++; ## Check the interior sequences in the command-text $self->interpolate_and_check($paragraph, $line,$file) unless(defined $arg); } } +sub _open_list +{ + my ($self,$indent,$line,$file) = @_; + my $list = Pod::List->new( + -indent => $indent, + -start => $line, + -file => $file); + unshift(@{$self->{_list_stack}}, $list); + undef $self->{_list_item_contents}; + $list; +} + +sub _close_list +{ + my ($self,$line,$file) = @_; + my $list = shift(@{$self->{_list_stack}}); + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "previous =item has no contents" }); + } + undef $self->{_list_item_contents}; + $list; +} + # process a block of some text sub interpolate_and_check { my ($self, $paragraph, $line, $file) = @_; @@ -734,16 +1076,17 @@ sub _check_ptree { foreach(@$ptree) { # regular text chunk unless(ref) { - my $count; # count the unescaped angle brackets - my $i = $_; - if($count = $i =~ tr/<>/<>/) { + # complain only when warning level is greater than 1 + if($self->{-warnings} && $self->{-warnings}>1) { + my $count; + if($count = tr/<>/<>/) { $self->poderror({ -line => $line, -file => $file, -severity => 'WARNING', - -msg => "$count unescaped <> in paragraph" }) - if($self->{-warnings}); + -msg => "$count unescaped <> in paragraph" }); + } } - $text .= $i; + $text .= $_; next; } # have an interior sequence @@ -754,14 +1097,14 @@ sub _check_ptree { if (! $VALID_SEQUENCES{$cmd}) { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', - -msg => qq(Unknown interior-sequence "$cmd")}); + -msg => qq(Unknown interior-sequence '$cmd')}); # expand it anyway $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); next; } if($nestlist =~ /$cmd/) { $self->poderror({ -line => $line, -file => $file, - -severity => 'ERROR', + -severity => 'WARNING', -msg => "nested commands $cmd<...$cmd<...>...>"}); # _TODO_ should we add the contents anyway? # expand it anyway, see below @@ -775,9 +1118,28 @@ sub _check_ptree { next; } my $ent = $$contents[0]; - if($ent =~ /^\d+$/) { + my $val; + if($ent =~ /^0x[0-9a-f]+$/i) { + # hexadec entity + $val = hex($ent); + } + elsif($ent =~ /^0\d+$/) { + # octal + $val = oct($ent); + } + elsif($ent =~ /^\d+$/) { # numeric entity - $text .= chr($ent); + $val = $ent; + } + if(defined $val) { + if($val>0 && $val<256) { + $text .= chr($val); + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Entity number out of range " . $_->raw_text()}); + } } elsif($ENTITIES{$ent}) { # known ISO entity @@ -786,7 +1148,7 @@ sub _check_ptree { else { $self->poderror({ -line => $line, -file => $file, -severity => 'WARNING', - -msg => "Non-standard entity " . $_->raw_text()}); + -msg => "Unknown entity " . $_->raw_text()}); $text .= "E<$ent>"; } } @@ -810,34 +1172,47 @@ sub _check_ptree { # check the link text $text .= $self->_check_ptree($self->parse_text($link->text(), $line), $line, $file, "$nestlist$cmd"); - my $node = ''; - # remember internal link - # _TODO_ what if there is a link to the page itself by the name, - # e.g. in Tk::Pod : L - if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { - $node = $self->_check_ptree($self->parse_text($link->node(), - $line), $line, $file, "$nestlist$cmd"); - $self->hyperlink("$line:$node") if($node); - } + # remember link + $self->hyperlink([$line,$link]); } elsif($cmd =~ /[BCFIS]/) { # add the guts $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); } + elsif($cmd eq 'Z') { + if(length($contents->raw_text())) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Nonempty Z<>"}); + } + } + elsif($cmd eq 'X') { + my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + if($idx =~ /^\s*$/s) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Empty X<>"}); + } + else { + # remember this node + $self->idx($idx); + } + } else { - # check, but add nothing to $text (X<>, Z<>) - $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + # not reached + die "internal error"; } } $text; } -# _TODO_ overloadable methods for BC..Z<...> expansion? - # process a block of verbatim text sub verbatim { - ## Nothing to check + ## Nothing particular to check my ($self, $paragraph, $line_num, $pod_para) = @_; + + $self->_preproc_par($paragraph); + if($self->{_current_head1} eq 'NAME') { my ($file, $line) = $pod_para->file_line; $self->poderror({ -line => $line, -file => $file, @@ -851,6 +1226,8 @@ sub textblock { my ($self, $paragraph, $line_num, $pod_para) = @_; my ($file, $line) = $pod_para->file_line; + $self->_preproc_par($paragraph); + # skip this paragraph if in a =begin block unless($self->{_have_begin}) { my $block = $self->interpolate_and_check($paragraph, $line,$file); @@ -863,4 +1240,32 @@ sub textblock { } } +sub _preproc_par +{ + my $self = shift; + $_[0] =~ s/[\s\n]+$//; + if($_[0]) { + $self->{_commands_in_head}++; + $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); + if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { + $self->{_list_stack}->[0]->{_has_par} = 1; + } + } +} + 1; + +__END__ + +=head1 AUTHOR + +Please report bugs using L. + +Brad Appleton Ebradapp@enteract.comE (initial version), +Marek Rouchal Emarekr@cpan.orgE + +Based on code for B written by +Tom Christiansen Etchrist@mox.perl.comE + +=cut +