From: Gurusamy Sarathy Date: Tue, 22 Feb 2000 10:45:54 +0000 (+0000) Subject: PodParser-1.093 update (from Brad Appleton's site) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=66aff6dd6356ffc29ccae7b7f1ff2e3f01929b1f;p=p5sagit%2Fp5-mst-13.2.git PodParser-1.093 update (from Brad Appleton's site) p4raw-id: //depot/perl@5198 --- diff --git a/lib/Pod/Checker.pm b/lib/Pod/Checker.pm index c661c75..b5f980b 100644 --- a/lib/Pod/Checker.pm +++ b/lib/Pod/Checker.pm @@ -10,7 +10,7 @@ package Pod::Checker; use vars qw($VERSION); -$VERSION = 1.096; ## Current version of this package +$VERSION = 1.097; ## Current version of this package require 5.004; ## requires this Perl version or later use Pod::ParseUtils; ## for hyperlinks and lists @@ -111,6 +111,11 @@ very robust conversions. =over 4 +=item * empty =headn + +A heading (C<=head1> or C<=head2>) without any text? That ain't no +heading! + =item * =over on line I without closing =back The C<=over> command does not have a corresponding C<=back> before the @@ -134,7 +139,7 @@ A standalone C<=end> command was found. =item * Nested =begin's -There were at least two concecutive C<=begin> commands without +There were at least two consecutive C<=begin> commands without the corresponding C<=end>. Only one C<=begin> may be active at a time. @@ -168,13 +173,29 @@ does not make sense. =item * garbled entity I -The I found cannot be interpreted as an character entity. +The I found cannot be interpreted as a character entity. + +=item * Entity number out of range + +An entity specified by number (dec, hex, oct) is out of range (1-255). =item * malformed link LEE The link found cannot be parsed because it does not conform to the syntax described in L. +=item * nonempty ZEE + +The CE> sequence is supposed to be empty. + +=item * Spurious text after =pod / =cut + +The commands C<=pod> and C<=cut> do not take any arguments. + +=item * Spurious character(s) after =back + +The C<=back> command does not take any arguments. + =back =head2 Warnings @@ -183,14 +204,43 @@ These may not necessarily cause trouble, but indicate mediocre style. =over 4 +=item * multiple occurence of link target I + +The POD file has some C<=item> and/or C<=head> commands that have +the same text. Potential hyperlinks to such a text cannot be unique then. + +=item * line containing nothing but whitespace in paragraph + +There is some whitespace on a seemingly empty line. POD is very sensitive +to such things, so this is flagged. B users switch on the B +option to avoid this problem. + +=item * file does not start with =head + +The file starts with a different POD directive than head. +This is most probably something you do not want. + =item * No numeric argument for =over The C<=over> command is supposed to have a numeric argument (the indentation). -=item * Spurious character(s) after =back +=item * previous =item has no contents -The C<=back> command does not take any arguments. +There is a list C<=item> right above the flagged line that has no +text contents. You probably want to delete empty items. + +=item * preceding non-item paragraph(s) + +A list introduced by C<=over> starts with a text or verbatim paragraph, +but continues with C<=item>s. Move the non-item paragraph out of the +C<=over>/C<=back> block. + +=item * =item type mismatch (I vs. I) + +A list started with e.g. a bulletted C<=item> and continued with a +numbered one. This is obviously inconsistent. For most translators the +type of the I C<=item> determines the type of the list. =item * I unescaped CE> in paragraph @@ -198,14 +248,14 @@ Angle brackets not written as CltE> and CgtE> can potentially cause errors as they could be misinterpreted as markup commands. -=item * Non-standard entity +=item * Unknown entity A character entity was found that does not belong to the standard -ISO set. +ISO set or the POD specials C and C. =item * No items in =over -The list does not contain any items. +The list opened with C<=over> does not contain any items. =item * No argument for =item @@ -214,6 +264,12 @@ by C<*> to indicate an unordered list, by a number (optionally followed by a dot) to indicate an ordered (numbered) list or simple text for a definition list. +=item * empty section in previous paragraph + +The previous section (introduced by a C<=head> command) does not contain +any text. This usually indicates that something is missing. Note: A +C<=head1> followed immediately by C<=head2> does not trigger this warning. + =item * Verbatim paragraph in NAME section The NAME section (C<=head1 NAME>) should consist of a single paragraph @@ -395,6 +451,10 @@ my %ENTITIES = ( iquest => '¿', 'times' => '×', # times is a keyword in perl divide => '÷', + +# some POD special entities + verbar => '|', + sol => '/' ); ##--------------------------------------------------------------------------- @@ -413,6 +473,7 @@ sub podchecker( $ ; $ % ) { ## Now create a pod checker my $checker = new Pod::Checker(%options); + $checker->parseopts(-process_cut_cmd => 1); ## Now check the pod document for errors $checker->parse_from_file($infile, $outfile); @@ -427,15 +488,15 @@ sub podchecker( $ ; $ % ) { ## Method definitions begin here ##------------------------------- -sub new { - my $this = shift; - my $class = ref($this) || $this; - my %params = @_; - my $self = {%params}; - bless $self, $class; - $self->initialize(); - return $self; -} +## sub new { +## my $this = shift; +## my $class = ref($this) || $this; +## my %params = @_; +## my $self = {%params}; +## bless $self, $class; +## $self->initialize(); +## return $self; +## } sub initialize { my $self = shift; @@ -462,6 +523,10 @@ sub poderror { chomp( my $msg = ($opts{-msg} || "")."@_" ); my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ""; my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ""; + unless (exists $opts{-severity}) { + ## See if can find severity in message prefix + $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); + } my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ""; ## Increment error count and print message " @@ -487,9 +552,12 @@ sub name { sub node { my ($self,$text) = @_; if(defined $text) { - $text =~ s/[\s\n]+$//; # strip trailing whitespace - # add node + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! push(@{$self->{_nodes}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++; return $text; } @{$self->{_nodes}}; @@ -508,56 +576,63 @@ sub hyperlink { ## overrides for Pod::Parser sub end_pod { - ## Do some final checks and - ## print the number of errors found - my $self = shift; - my $infile = $self->input_file(); - my $out_fh = $self->output_handle(); - - if(@{$self->{_list_stack}}) { - # _TODO_ display, but don't count them for now - my $list; - while($list = shift(@{$self->{_list_stack}})) { - $self->poderror({ -line => 'EOF', -file => $infile, - -severity => 'ERROR', -msg => "=over on line " . - $list->start() . " without closing =back" }); #" - } - } - - # check validity of document internal hyperlinks - # first build the node names from the paragraph text - my %nodes; - foreach($self->node()) { - $nodes{$_} = 1; - if(/^(\S+)\s+/) { - # we have more than one word. Use the first as a node, too. - # This is used heavily in perlfunc.pod - $nodes{$1} ||= 2; # derived node - } - } - foreach($self->hyperlink()) { - my $line = ''; - s/^(\d+):// && ($line = $1); - if($_ && !$nodes{$_}) { - $self->poderror({ -line => $line, -file => $infile, - -severity => 'ERROR', - -msg => "unresolved internal link `$_'"}); - } - } - - ## Print the number of errors found - my $num_errors = $self->num_errors(); - if ($num_errors > 0) { - printf $out_fh ("$infile has $num_errors pod syntax %s.\n", + ## Do some final checks and + ## print the number of errors found + my $self = shift; + my $infile = $self->input_file(); + my $out_fh = $self->output_handle(); + + if(@{$self->{_list_stack}}) { + # _TODO_ display, but don't count them for now + my $list; + while(($list = $self->_close_list('EOF',$infile)) && + $list->indent() ne 'auto') { + $self->poderror({ -line => 'EOF', -file => $infile, + -severity => 'ERROR', -msg => "=over on line " . + $list->start() . " without closing =back" }); #" + } + } + + # check validity of document internal hyperlinks + # first build the node names from the paragraph text + my %nodes; + foreach($self->node()) { + $nodes{$_} = 1; + if(/^(\S+)\s+/) { + # we have more than one word. Use the first as a node, too. + # This is used heavily in perlfunc.pod + $nodes{$1} ||= 2; # derived node + } + } + foreach($self->hyperlink()) { + my $line = ''; + s/^(\d+):// && ($line = $1); + if($_ && !$nodes{$_}) { + $self->poderror({ -line => $line, -file => $infile, + -severity => 'ERROR', + -msg => "unresolved internal link '$_'"}); + } + } + foreach(grep($self->{_unique_nodes}->{$_} > 1, + keys %{$self->{_unique_nodes}})) { + $self->poderror({ -line => '-', -file => $infile, + -severity => 'WARNING', + -msg => "multiple occurence of link target '$_'"}); + } + + ## Print the number of errors found + my $num_errors = $self->num_errors(); + if ($num_errors > 0) { + printf $out_fh ("$infile has $num_errors pod syntax %s.\n", ($num_errors == 1) ? "error" : "errors"); - } - elsif($self->{_commands} == 0) { - print $out_fh "$infile does not contain any pod commands.\n"; - $self->num_errors(-1); - } - else { - print $out_fh "$infile pod syntax OK.\n"; - } + } + elsif($self->{_commands} == 0) { + print $out_fh "$infile does not contain any pod commands.\n"; + $self->num_errors(-1); + } + else { + print $out_fh "$infile pod syntax OK.\n"; + } } # check a POD command directive @@ -568,10 +643,15 @@ sub command { my $arg; # this will hold the command argument if (! $VALID_COMMANDS{$cmd}) { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', - -msg => "Unknown command \"$cmd\"" }); + -msg => "Unknown command '$cmd'" }); } else { - $self->{_commands}++; # found a valid command + # found a valid command + if(!$self->{_commands}++ && $cmd !~ /^head/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "file does not start with =head" }); + } ## check syntax of particular command if($cmd eq 'over') { # check for argument @@ -585,10 +665,7 @@ sub command { -msg => "No numeric argument for =over"}); } # start a new list - unshift(@{$self->{_list_stack}}, Pod::List->new( - -indent => $indent, - -start => $line, - -file => $file)); + $self->_open_list($indent,$line,$file); } elsif($cmd eq 'item') { # are we in a list? @@ -597,22 +674,60 @@ sub command { -severity => 'ERROR', -msg => "=item without previous =over" }); # auto-open in case we encounter many more - unshift(@{$self->{_list_stack}}, - Pod::List->new( - -indent => 'auto', - -start => $line, - -file => $file)); + $self->_open_list('auto',$line,$file); + } + my $list = $self->{_list_stack}->[0]; + # check whether the previous item had some contents + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "previous =item has no contents" }); + } + if($list->{_has_par}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "preceding non-item paragraph(s)" }); + delete $list->{_has_par}; } # check for argument $arg = $self->interpolate_and_check($paragraph, $line, $file); - unless($arg && $arg =~ /(\S+)/) { + if($arg && $arg =~ /(\S+)/) { + $arg =~ s/[\s\n]+$//; + my $type; + if($arg =~ /^[*]\s*(\S*.*)/) { + $type = 'bullet'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + elsif($arg =~ /^\d+\.?\s*(\S*)/) { + $type = 'number'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + else { + $type = 'definition'; + $self->{_list_item_contents} = 1; + } + my $first = $list->type(); + if($first && $first ne $type) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "=item type mismatch ('$first' vs. '$type')"}); + } + else { # first item + $list->type($type); + } + } + else { $self->poderror({ -line => $line, -file => $file, -severity => 'WARNING', -msg => "No argument for =item" }); $arg = ' '; # empty + $self->{_list_item_contents} = 0; } # add this item - $self->{_list_stack}[0]->item($arg); + $list->item($arg); # remember this node $self->node($arg); } @@ -628,11 +743,11 @@ sub command { $arg = $self->interpolate_and_check($paragraph, $line,$file); if($arg && $arg =~ /\S/) { $self->poderror({ -line => $line, -file => $file, - -severity => 'WARNING', + -severity => 'ERROR', -msg => "Spurious character(s) after =back" }); } # close list - my $list = shift @{$self->{_list_stack}}; + my $list = $self->_close_list($line,$file); # check for empty lists if(!$list->item() && $self->{-warnings}) { $self->poderror({ -line => $line, -file => $file, @@ -642,11 +757,22 @@ sub command { } } } - elsif($cmd =~ /^head/) { + elsif($cmd =~ /^head(\d+)/) { + if(defined $self->{_commands_in_head} && + $self->{_commands_in_head} == 0 && + defined $self->{_last_head} && + $self->{_last_head} >= $1) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "empty section in previous paragraph"}); + } + $self->{_commands_in_head} = -1; + $self->{_last_head} = $1; # check if there is an open list if(@{$self->{_list_stack}}) { my $list; - while($list = shift(@{$self->{_list_stack}})) { + while(($list = $self->_close_list($line,$file)) && + $list->indent() ne 'auto') { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', -msg => "=over on line ". $list->start() . @@ -655,9 +781,14 @@ sub command { } # remember this node $arg = $self->interpolate_and_check($paragraph, $line,$file); - $self->node($arg) if($arg); + $arg =~ s/[\s\n]+$//s; + $self->node($arg); + unless(length($arg)) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "empty =$cmd"}); + } if($cmd eq 'head1') { - $arg =~ s/[\s\n]+$//; $self->{_current_head1} = $arg; } else { $self->{_current_head1} = ''; @@ -711,12 +842,48 @@ sub command { } $arg = ''; # do not expand paragraph below } + elsif($cmd =~ /^(pod|cut)$/) { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + if($arg && $arg =~ /(\S+)/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Spurious text after =$cmd"}); + } + } + $self->{_commands_in_head}++; ## Check the interior sequences in the command-text $self->interpolate_and_check($paragraph, $line,$file) unless(defined $arg); } } +sub _open_list +{ + my ($self,$indent,$line,$file) = @_; + my $list = Pod::List->new( + -indent => $indent, + -start => $line, + -file => $file); + unshift(@{$self->{_list_stack}}, $list); + undef $self->{_list_item_contents}; + $list; +} + +sub _close_list +{ + my ($self,$line,$file) = @_; + my $list = shift(@{$self->{_list_stack}}); + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "previous =item has no contents" }); + } + undef $self->{_list_item_contents}; + $list; +} + # process a block of some text sub interpolate_and_check { my ($self, $paragraph, $line, $file) = @_; @@ -754,7 +921,7 @@ sub _check_ptree { if (! $VALID_SEQUENCES{$cmd}) { $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', - -msg => qq(Unknown interior-sequence "$cmd")}); + -msg => qq(Unknown interior-sequence '$cmd')}); # expand it anyway $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); next; @@ -775,9 +942,28 @@ sub _check_ptree { next; } my $ent = $$contents[0]; - if($ent =~ /^\d+$/) { + my $val; + if($ent =~ /^0x[0-9a-f]+$/i) { + # hexadec entity + $val = hex($ent); + } + elsif($ent =~ /^0\d+$/) { + # octal + $val = oct($ent); + } + elsif($ent =~ /^\d+$/) { # numeric entity - $text .= chr($ent); + $val = $ent; + } + if(defined $val) { + if($val>0 && $val<256) { + $text .= chr($val); + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Entity number out of range " . $_->raw_text()}); + } } elsif($ENTITIES{$ent}) { # known ISO entity @@ -786,7 +972,7 @@ sub _check_ptree { else { $self->poderror({ -line => $line, -file => $file, -severity => 'WARNING', - -msg => "Non-standard entity " . $_->raw_text()}); + -msg => "Unknown entity " . $_->raw_text()}); $text .= "E<$ent>"; } } @@ -824,8 +1010,15 @@ sub _check_ptree { # add the guts $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); } - else { - # check, but add nothing to $text (X<>, Z<>) + elsif($cmd eq 'Z') { + if(length($contents->raw_text())) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Nonempty Z<>"}); + } + } + else { # X<> + # check, but add nothing to $text $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); } } @@ -836,8 +1029,11 @@ sub _check_ptree { # process a block of verbatim text sub verbatim { - ## Nothing to check + ## Nothing particular to check my ($self, $paragraph, $line_num, $pod_para) = @_; + + $self->_preproc_par($paragraph); + if($self->{_current_head1} eq 'NAME') { my ($file, $line) = $pod_para->file_line; $self->poderror({ -line => $line, -file => $file, @@ -851,6 +1047,8 @@ sub textblock { my ($self, $paragraph, $line_num, $pod_para) = @_; my ($file, $line) = $pod_para->file_line; + $self->_preproc_par($paragraph); + # skip this paragraph if in a =begin block unless($self->{_have_begin}) { my $block = $self->interpolate_and_check($paragraph, $line,$file); @@ -863,4 +1061,18 @@ sub textblock { } } +sub _preproc_par +{ + my $self = shift; + $_[0] =~ s/[\s\n]+$//; + if($_[0]) { + $self->{_commands_in_head}++; + $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); + if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { + $self->{_list_stack}->[0]->{_has_par} = 1; + } + } +} + 1; + diff --git a/lib/Pod/Find.pm b/lib/Pod/Find.pm index 399bbba..038b090 100644 --- a/lib/Pod/Find.pm +++ b/lib/Pod/Find.pm @@ -3,7 +3,8 @@ # # Author: Marek Rouchal # -# borrowing code from Nick Ing-Simmon's PodToHtml +# Copyright (C) 1999-2000 by Marek Rouchal (and borrowing code +# from Nick Ing-Simmon's PodToHtml). All rights reserved. # This file is part of "PodParser". Pod::Find is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -12,8 +13,8 @@ package Pod::Find; use vars qw($VERSION); -$VERSION = 0.10; ## Current version of this package -require 5.005; ## requires this Perl version or later +$VERSION = 0.11; ## Current version of this package +require 5.004; ## requires this Perl version or later ############################################################################# @@ -113,7 +114,9 @@ use vars qw(@ISA @EXPORT_OK $VERSION); # package global variables my $SIMPLIFY_RX; -# return a hash of the +# return a hash of the POD files found +# first argument may be a hashref (options), +# rest is a list of directories to search recursively sub pod_find { my %opts; @@ -145,7 +148,8 @@ sub pod_find # * remove e.g. 5.00503 # * remove pod/ if followed by *.pod (e.g. in pod/perlfunc.pod) $SIMPLIFY_RX = - qr!^(?i:site_perl/|$Config::Config{archname}/|\d+\.\d+([_.]?\d+)?/|pod/(?=.*?\.pod$))*!o; + qq!^(?i:site_perl/|\Q$Config::Config{archname}\E/|\\d+\\.\\d+([_.]?\\d+)?/|pod/(?=.*?\\.pod\$))*!; + } my %dirs_visited; @@ -166,7 +170,7 @@ sub pod_find } next; } - my $root_rx = qr!^\Q$try\E/!; + my $root_rx = qq!^\Q$try\E/!; File::Find::find( sub { my $item = $File::Find::name; if(-d) { diff --git a/lib/Pod/InputObjects.pm b/lib/Pod/InputObjects.pm index 1432895..174759a 100644 --- a/lib/Pod/InputObjects.pm +++ b/lib/Pod/InputObjects.pm @@ -2,7 +2,7 @@ # Pod/InputObjects.pm -- package which defines objects for input streams # and paragraphs and commands when parsing POD docs. # -# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved. +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -11,7 +11,7 @@ package Pod::InputObjects; use vars qw($VERSION); -$VERSION = 1.090; ## Current version of this package +$VERSION = 1.093; ## Current version of this package require 5.004; ## requires this Perl version or later ############################################################################# diff --git a/lib/Pod/ParseUtils.pm b/lib/Pod/ParseUtils.pm index a66e8f5..2b3734f 100644 --- a/lib/Pod/ParseUtils.pm +++ b/lib/Pod/ParseUtils.pm @@ -1,7 +1,7 @@ ############################################################################# # Pod/ParseUtils.pm -- helpers for POD parsing and conversion # -# Copyright (C) 1999 by Marek Rouchal. All rights reserved. +# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -305,32 +305,38 @@ sub parse { #warn "DEBUG: link=$_\n"; # only page - if(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) { - $page = $1 . $2; + # problem: a lot of people use (), or (1) or the like to indicate + # man page sections. But this collides with L that is supposed + # to point to an internal funtion... + # I would like the following better, here and below: + #if(m!^(\w+(?:::\w+)*)$!) { + my $page_rx = '[\w.]+(?:::[\w.]+)*'; + if(m!^($page_rx)$!o) { + $page = $1; $type = 'page'; } - # alttext, page and section - elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) { - ($alttext, $page, $node) = ($1, $2 . $3, $4); + # alttext, page and "section" + elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) { + ($alttext, $page, $node) = ($1, $2, $3); $type = 'section'; } - # page and section - elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) { - ($page, $node) = ($1 . $2, $3); + # page and "section" + elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) { + ($page, $node) = ($1, $2); $type = 'section'; } # page and item - elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) { - ($page, $node) = ($1 . $2, $3); + elsif(m!^($page_rx)\s*/\s*(.+)$!o) { + ($page, $node) = ($1, $2); $type = 'item'; } - # only section - elsif(m!^(?:/\s*|)"(.+)"$!) { + # only "section" + elsif(m!^/?"(.+)"$!) { $node = $1; $type = 'section'; } # only item - elsif(m!^/(.+)$!) { + elsif(m!^\s*/(.+)$!) { $node = $1; $type = 'item'; } @@ -340,16 +346,16 @@ sub parse { $type = 'hyperlink'; } # alttext, page and item - elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) { - ($alttext, $page, $node) = ($1, $2 . $3, $4); + elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) { + ($alttext, $page, $node) = ($1, $2, $3); $type = 'item'; } # alttext and page - elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) { - ($alttext, $page) = ($1, $2 . $3); + elsif(m!^(.+?)\s*[|]\s*($page_rx)$!o) { + ($alttext, $page) = ($1, $2); $type = 'page'; } - # alttext and section + # alttext and "section" elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { ($alttext, $node) = ($1,$2); $type = 'section'; @@ -368,9 +374,17 @@ sub parse { $node = $_; $type = 'item'; } + # collapse whitespace in nodes + $node =~ s/\s+/ /gs; - if($page =~ /[(]\w*[)]$/) { - $self->warning("section in `$page' deprecated"); + #if($page =~ /[(]\w*[)]$/) { + # $self->warning("section in '$page' deprecated"); + #} + if($node =~ m:[|/]:) { + $self->warning("node '$node' contains non-escaped | or /"); + } + if($alttext =~ m:[|/]:) { + $self->warning("alternative text '$node' contains non-escaped | or /"); } $self->{-page} = $page; $self->{-node} = $node; @@ -559,18 +573,24 @@ sub link { my $self = shift; my $link = $self->page() || ''; if($self->node()) { + my $node = $self->node(); + $text =~ s/\|/E/g; + $text =~ s:/:E:g; if($self->type() eq 'section') { - $link .= ($link ? '/' : '') . '"' . $self->node() . '"'; + $link .= ($link ? '/' : '') . '"' . $node . '"'; } elsif($self->type() eq 'hyperlink') { $link = $self->node(); } else { # item - $link .= '/' . $self->node(); + $link .= '/' . $node; } } if($self->alttext()) { - $link = $self->alttext() . '|' . $link; + my $text = $self->alttext(); + $text =~ s/\|/E/g; + $text =~ s:/:E:g; + $link = "$text|$link"; } $link; } diff --git a/lib/Pod/Parser.pm b/lib/Pod/Parser.pm index c727142..bafabba 100644 --- a/lib/Pod/Parser.pm +++ b/lib/Pod/Parser.pm @@ -1,7 +1,7 @@ ############################################################################# # Pod/Parser.pm -- package which defines a base class for parsing POD docs. # -# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved. +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -10,7 +10,7 @@ package Pod::Parser; use vars qw($VERSION); -$VERSION = 1.091; ## Current version of this package +$VERSION = 1.093; ## Current version of this package require 5.004; ## requires this Perl version or later ############################################################################# @@ -55,9 +55,9 @@ Pod::Parser - base class for creating POD filters and translators sub interior_sequence { my ($parser, $seq_command, $seq_argument) = @_; ## Expand an interior sequence; sample actions might be: - return "*$seq_argument*" if ($seq_command = 'B'); - return "`$seq_argument'" if ($seq_command = 'C'); - return "_${seq_argument}_'" if ($seq_command = 'I'); + return "*$seq_argument*" if ($seq_command eq 'B'); + return "`$seq_argument'" if ($seq_command eq 'C'); + return "_${seq_argument}_'" if ($seq_command eq 'I'); ## ... other sequence commands and their resulting text } @@ -142,8 +142,8 @@ For the most part, the B base class should be able to do most of the input parsing for you and leave you free to worry about how to intepret the commands and translate the result. -Note that all we have described here in this quick overview is -the simplest most straightforward use of B to do stream-based +Note that all we have described here in this quick overview is the +simplest most straightforward use of B to do stream-based parsing. It is also possible to use the B function to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">. @@ -599,7 +599,7 @@ Please note that the B method is invoked I the B method. After all (possibly preprocessed) lines in a paragraph have been assembled together and either it has been determined that the paragraph is part of the POD documentation from one -of the selected sections or the C<-want_nonPODs> option is true, +of the selected sections or the C<-want_nonPODs> option is true, then B is invoked. The base class implementation of this method returns the given text. @@ -718,13 +718,6 @@ is a reference to the parse-tree object. =cut -## This global regex is used to see if the text before a '>' inside -## an interior sequence looks like '-' or '=', but not '--', '==', -## '!=', '$-', '$=' or <>= -use vars qw( $ARROW_RE ); -$ARROW_RE = join('', qw{ (?: [^-+*/=!&|%^x.<>$]= | [^-$]- )$ }); -#$ARROW_RE = qr/(?:[^-+*/=!&|%^x.<>$]+=|[^-$]+-)$/; ## 5.005+ only! - sub parse_text { my $self = shift; local $_ = ''; @@ -738,7 +731,7 @@ sub parse_text { my $text = shift; my $line = shift; my $file = $self->input_file(); - my ($cmd, $prev) = ('', ''); + my $cmd = ""; ## Convert method calls into closures, for our convenience my $xseq_sub = $expand_seq; @@ -757,7 +750,7 @@ sub parse_text { ref $xseq_sub or $xseq_sub = sub { shift()->$expand_seq(@_) }; ref $xtext_sub or $xtext_sub = sub { shift()->$expand_text(@_) }; ref $xptree_sub or $xptree_sub = sub { shift()->$expand_ptree(@_) }; - + ## Keep track of the "current" interior sequence, and maintain a stack ## of "in progress" sequences. ## @@ -769,52 +762,82 @@ sub parse_text { ## my $seq = Pod::ParseTree->new(); my @seq_stack = ($seq); + my ($ldelim, $rdelim) = ('', ''); ## Iterate over all sequence starts/stops, newlines, & text ## (NOTE: split with capturing parens keeps the delimiters) $_ = $text; - for ( split /([A-Z]<|>|\n)/ ) { - ## Keep track of line count - ++$line if ($_ eq "\n"); - ## Look for the beginning of a sequence - if ( /^([A-Z])(<)$/ ) { + my @tokens = split /([A-Z]<(?:<+\s+)?)/; + while ( @tokens ) { + $_ = shift @tokens; + ## Look for the beginning of a sequencd + if ( /^([A-Z])(<(?:<+\s+)?)$/ ) { ## Push a new sequence onto the stack of those "in-progress" + ($cmd, $ldelim) = ($1, $2); $seq = Pod::InteriorSequence->new( - -name => ($cmd = $1), - -ldelim => $2, -rdelim => '', - -file => $file, -line => $line + -name => $cmd, + -ldelim => $ldelim, -rdelim => '', + -file => $file, -line => $line ); + $ldelim =~ s/\s+$//, ($rdelim = $ldelim) =~ tr//; (@seq_stack > 1) and $seq->nested($seq_stack[-1]); push @seq_stack, $seq; } - ## Look for sequence ending (preclude '->' and '=>' inside C<...>) - elsif ( (@seq_stack > 1) and - /^>$/ and ($cmd ne 'C' or $prev !~ /$ARROW_RE/o) ) - { - ## End of current sequence, record terminating delimiter - $seq->rdelim($_); - ## Pop it off the stack of "in progress" sequences - pop @seq_stack; - ## Append result to its parent in current parse tree - $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) : $seq); - ## Remember the current cmd-name - $cmd = (@seq_stack > 1) ? $seq_stack[-1]->name : ''; + ## Look for sequence ending + elsif ( @seq_stack > 1 ) { + ## Make sure we match the right kind of closing delimiter + my ($seq_end, $post_seq) = ("", ""); + if ( ($ldelim eq '<' and /\A(.*?)(>)/s) + or /\A(.*?)(\s+$rdelim)/s ) + { + ## Found end-of-sequence, capture the interior and the + ## closing the delimiter, and put the rest back on the + ## token-list + $post_seq = substr($_, length($1) + length($2)); + ($_, $seq_end) = ($1, $2); + (length $post_seq) and unshift @tokens, $post_seq; + } + if (length) { + ## In the middle of a sequence, append this text to it, and + ## dont forget to "expand" it if that's what the caller wanted + $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); + $_ .= $seq_end; + } + if (length $seq_end) { + ## End of current sequence, record terminating delimiter + $seq->rdelim($seq_end); + ## Pop it off the stack of "in progress" sequences + pop @seq_stack; + ## Append result to its parent in current parse tree + $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) + : $seq); + ## Remember the current cmd-name and left-delimiter + $cmd = (@seq_stack > 1) ? $seq_stack[-1]->name : ''; + $ldelim = (@seq_stack > 1) ? $seq_stack[-1]->ldelim : ''; + $ldelim =~ s/\s+$//, ($rdelim = $ldelim) =~ tr//; + } } elsif (length) { ## In the middle of a sequence, append this text to it, and ## dont forget to "expand" it if that's what the caller wanted $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); } - ## Remember the "current" sequence and the previously seen token - ($seq, $prev) = ( $seq_stack[-1], $_ ); + ## Keep track of line count + $line += tr/\n//; + ## Remember the "current" sequence + $seq = $seq_stack[-1]; } ## Handle unterminated sequences my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef; while (@seq_stack > 1) { ($cmd, $file, $line) = ($seq->name, $seq->file_line); + $ldelim = $seq->ldelim; + ($rdelim = $ldelim) =~ tr//; + $rdelim =~ s/^(\S+)(\s*)$/$2$1/; pop @seq_stack; - my $errmsg = "** Unterminated $cmd<...> at $file line $line\n"; + my $errmsg = "*** WARNING: unterminated ${cmd}${ldelim}...${rdelim}". + " at line $line in file $file\n"; (ref $errorsub) and &{$errorsub}($errmsg) or (defined $errorsub) and $self->$errorsub($errmsg) or warn($errmsg); @@ -1034,9 +1057,20 @@ sub parse_from_filehandle { ++$plines; } - ## See of this line is blank and ends the current paragraph. + ## See if this line is blank and ends the current paragraph. ## If it isnt, then keep iterating until it is. - next unless (($textline =~ /^\s*$/) && (length $paragraph)); + next unless (($textline =~ /^(\s*)$/) && (length $paragraph)); + + ## Issue a warning about any non-empty blank lines + if ( length($1) > 1 ) { + my $errorsub = $self->errorsub(); + my $file = $self->input_file(); + my $errmsg = "*** WARNING: line containing nothing but whitespace". + " in paragraph at line $nlines in file $file\n"; + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $self->$errorsub($errmsg) + or warn($errmsg); + } ## Now process the paragraph parse_paragraph($self, $paragraph, ($nlines - $plines) + 1); diff --git a/lib/Pod/Select.pm b/lib/Pod/Select.pm index 94ded86..150dfca 100644 --- a/lib/Pod/Select.pm +++ b/lib/Pod/Select.pm @@ -1,7 +1,7 @@ ############################################################################# # Pod/Select.pm -- function to select portions of POD docs # -# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved. +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -10,7 +10,7 @@ package Pod::Select; use vars qw($VERSION); -$VERSION = 1.090; ## Current version of this package +$VERSION = 1.093; ## Current version of this package require 5.004; ## requires this Perl version or later ############################################################################# diff --git a/lib/Pod/Usage.pm b/lib/Pod/Usage.pm index aa3a009..9f01a52 100644 --- a/lib/Pod/Usage.pm +++ b/lib/Pod/Usage.pm @@ -1,7 +1,7 @@ ############################################################################# # Pod/Usage.pm -- print usage messages for the running script. # -# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved. +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. @@ -10,7 +10,7 @@ package Pod::Usage; use vars qw($VERSION); -$VERSION = 1.090; ## Current version of this package +$VERSION = 1.093; ## Current version of this package require 5.004; ## requires this Perl version or later =head1 NAME diff --git a/t/pod/poderrs.t b/t/pod/poderrs.t index e27130c..bec2a19 100755 --- a/t/pod/poderrs.t +++ b/t/pod/poderrs.t @@ -36,6 +36,11 @@ Camps is very, entertaining. And they say we'll have some fun if it stops raining! +Okay, now use a non-empty blank line to terminate a paragraph and make +sure we get a warning. + +The above blank line contains tabs and spaces only + =head1 Additional tests =head2 item without over diff --git a/t/pod/poderrs.xr b/t/pod/poderrs.xr index 157d130..038cf4f 100644 --- a/t/pod/poderrs.xr +++ b/t/pod/poderrs.xr @@ -1,32 +1,33 @@ -*** ERROR: Unknown command "unknown1" at line 21 in file pod/poderrs.t -*** ERROR: Unknown interior-sequence "Q" at line 25 in file pod/poderrs.t -*** ERROR: Unknown interior-sequence "A" at line 26 in file pod/poderrs.t -*** ERROR: Unknown interior-sequence "Y" at line 27 in file pod/poderrs.t -*** ERROR: Unknown interior-sequence "V" at line 27 in file pod/poderrs.t -** Unterminated B<...> at pod/poderrs.t line 31 -** Unterminated I<...> at pod/poderrs.t line 30 -** Unterminated C<...> at pod/poderrs.t line 33 -*** ERROR: =item without previous =over at line 43 in file pod/poderrs.t -*** ERROR: =over on line 43 without closing =back (at head2) at line 45 in file pod/poderrs.t -*** ERROR: =back without previous =over at line 47 in file pod/poderrs.t -*** ERROR: =over on line 51 without closing =back (at head2) at line 55 in file pod/poderrs.t -*** ERROR: =end without =begin at line 57 in file pod/poderrs.t -*** ERROR: Nested =begin's (first at line 61:html) at line 63 in file pod/poderrs.t -*** ERROR: =end without =begin at line 67 in file pod/poderrs.t -*** ERROR: nested commands C<...C<...>...> at line 71 in file pod/poderrs.t -*** ERROR: garbled entity E at line 75 in file pod/poderrs.t -*** ERROR: garbled entity E> at line 76 in file pod/poderrs.t -*** ERROR: garbled entity E> at line 77 in file pod/poderrs.t -*** WARNING: collapsing newlines to blanks at line 87 in file pod/poderrs.t -*** ERROR: malformed link L<> : empty link at line 89 in file pod/poderrs.t -*** WARNING: section in `passwd(5)' deprecated at line 94 in file pod/poderrs.t -*** WARNING: ignoring leading whitespace in link at line 95 in file pod/poderrs.t -*** WARNING: ignoring trailing whitespace in link at line 95 in file pod/poderrs.t -*** WARNING: Spurious character(s) after =back at line 101 in file pod/poderrs.t -*** WARNING: No items in =over (at line 109) / =back list at line 111 in file pod/poderrs.t -*** WARNING: 2 unescaped <> in paragraph at line 113 in file pod/poderrs.t -*** ERROR: unresolved internal link `begin or begin' at line 81 in file pod/poderrs.t -*** ERROR: unresolved internal link `end with begin' at line 82 in file pod/poderrs.t -*** ERROR: unresolved internal link `OoPs' at line 83 in file pod/poderrs.t -*** ERROR: unresolved internal link `abc def' at line 87 in file pod/poderrs.t -pod/poderrs.t has 24 pod syntax errors. +*** ERROR: Unknown command 'unknown1' at line 21 in file pod/poderrs.t +*** ERROR: Unknown interior-sequence 'Q' at line 25 in file pod/poderrs.t +*** ERROR: Unknown interior-sequence 'A' at line 26 in file pod/poderrs.t +*** ERROR: Unknown interior-sequence 'Y' at line 27 in file pod/poderrs.t +*** ERROR: Unknown interior-sequence 'V' at line 27 in file pod/poderrs.t +*** WARNING: unterminated B<...> at line 31 in file pod/poderrs.t +*** WARNING: unterminated I<...> at line 30 in file pod/poderrs.t +*** WARNING: unterminated C<...> at line 33 in file pod/poderrs.t +*** WARNING: line containing nothing but whitespace in paragraph at line 41 in file pod/poderrs.t +*** ERROR: =item without previous =over at line 48 in file pod/poderrs.t +*** ERROR: =back without previous =over at line 52 in file pod/poderrs.t +*** ERROR: =over on line 56 without closing =back (at head2) at line 60 in file pod/poderrs.t +*** ERROR: =end without =begin at line 62 in file pod/poderrs.t +*** ERROR: Nested =begin's (first at line 66:html) at line 68 in file pod/poderrs.t +*** ERROR: =end without =begin at line 72 in file pod/poderrs.t +*** ERROR: nested commands C<...C<...>...> at line 76 in file pod/poderrs.t +*** ERROR: garbled entity E at line 80 in file pod/poderrs.t +*** ERROR: garbled entity E> at line 81 in file pod/poderrs.t +*** ERROR: garbled entity E> at line 82 in file pod/poderrs.t +*** WARNING: collapsing newlines to blanks at line 92 in file pod/poderrs.t +*** ERROR: malformed link L<> : empty link at line 94 in file pod/poderrs.t +*** WARNING: section in 'passwd(5)' deprecated at line 99 in file pod/poderrs.t +*** WARNING: ignoring leading whitespace in link at line 100 in file pod/poderrs.t +*** WARNING: ignoring trailing whitespace in link at line 100 in file pod/poderrs.t +*** ERROR: Spurious character(s) after =back at line 106 in file pod/poderrs.t +*** WARNING: No items in =over (at line 114) / =back list at line 116 in file pod/poderrs.t +*** WARNING: 2 unescaped <> in paragraph at line 118 in file pod/poderrs.t +*** ERROR: unresolved internal link 'begin or begin' at line 86 in file pod/poderrs.t +*** ERROR: unresolved internal link 'end with begin' at line 87 in file pod/poderrs.t +*** ERROR: unresolved internal link 'OoPs' at line 88 in file pod/poderrs.t +*** ERROR: unresolved internal link 'abc def' at line 92 in file pod/poderrs.t +*** WARNING: multiple occurence of link target 'oops' at line - in file pod/poderrs.t +pod/poderrs.t has 21 pod syntax errors. diff --git a/t/pod/special_seqs.t b/t/pod/special_seqs.t index 572fb8c..b8af57e 100755 --- a/t/pod/special_seqs.t +++ b/t/pod/special_seqs.t @@ -17,10 +17,16 @@ __END__ =pod This is a test to see if I can do not only C<$self> and C, but -also C<$self->method()> and C<$self->{FIELDNAME}> and C<{FOO=>BAR}> without -resorting to escape sequences. +also C<< $self->method() >> and C<< $self->{FIELDNAME} >> and +C<< $Foo <=> $Bar >> without resorting to escape sequences. If +I want to refer to the right-shift operator I can do something +like C<<< $x >> 3 >>> or even C<<<< $y >> 5 >>>>. -Now for the grand finale of C<$self->method()->{FIELDNAME} = {FOO=>BAR}>. +Now for the grand finale of C<< $self->method()->{FIELDNAME} = {FOO=>BAR} >>. +And I also want to make sure that newlines work like this +C<<< +$self->{FOOBAR} >> 3 and [$b => $a]->[$a <=> $b] +>>> Of course I should still be able to do all this I escape sequences too: C<$self-Emethod()> and C<$self-E{FIELDNAME}> and C<{FOO=EBAR}>. @@ -29,4 +35,9 @@ Dont forget C<$self-Emethod()-E{FIELDNAME} = {FOO=EBAR}>. And make sure that C<0> works too! +Now, if I use << or >> as my delimiters, then I have to use whitespace. +So things like C<<$self->method()>> and C<<$self->{FIELDNAME}>> wont end +up doing what you might expect since the first > will still terminate +the first < seen. + =cut diff --git a/t/pod/special_seqs.xr b/t/pod/special_seqs.xr index fc06593..a07f4cf 100644 --- a/t/pod/special_seqs.xr +++ b/t/pod/special_seqs.xr @@ -1,8 +1,12 @@ This is a test to see if I can do not only `$self' and `method()', but - also `$self->method()' and `$self->{FIELDNAME}' and `{FOO=>BAR}' without - resorting to escape sequences. + also `$self->method()' and `$self->{FIELDNAME}' and `$Foo <=> $Bar' + without resorting to escape sequences. If I want to refer to the + right-shift operator I can do something like `$x >> 3' or even `$y >> + 5'. Now for the grand finale of `$self->method()->{FIELDNAME} = {FOO=>BAR}'. + And I also want to make sure that newlines work like this + `$self->{FOOBAR} >> 3 and [$b => $a]->[$a <=> $b]' Of course I should still be able to do all this *with* escape sequences too: `$self->method()' and `$self->{FIELDNAME}' and `{FOO=>BAR}'. @@ -11,3 +15,8 @@ And make sure that `0' works too! + Now, if I use << or >> as my delimiters, then I have to use whitespace. + So things like `<$self-'method()>> and `<$self-'{FIELDNAME}>> wont end + up doing what you might expect since the first > will still terminate + the first < seen. +