[perl #35847] File::Find not performing as documented

[p5sagit/p5-mst-13.2.git] / lib / Pod / Parser.pm
diff --git a/lib/Pod/Parser.pm b/lib/Pod/Parser.pm

index 8ef5a59..a5fde84 100644 (file)
--- a/lib/Pod/Parser.pm
+++ b/lib/Pod/Parser.pm
@@ -1,7 +1,7 @@
 #############################################################################
 # Pod/Parser.pm -- package which defines a base class for parsing POD docs.
 #
-# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved.
+# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved.
 # This file is part of "PodParser". PodParser is free software;
 # you can redistribute it and/or modify it under the same terms
 # as Perl itself.
@@ -10,8 +10,8 @@
 package Pod::Parser;
 
 use vars qw($VERSION);
-$VERSION = 1.085;  ## Current version of this package
-require  5.004;    ## requires this Perl version or later
+$VERSION = 1.32;  ## Current version of this package
+require  5.005;    ## requires this Perl version or later
 
 #############################################################################
 
@@ -55,9 +55,9 @@ Pod::Parser - base class for creating POD filters and translators
     sub interior_sequence { 
         my ($parser, $seq_command, $seq_argument) = @_;
         ## Expand an interior sequence; sample actions might be:
-        return "*$seq_argument*"     if ($seq_command = 'B');
-        return "`$seq_argument'"     if ($seq_command = 'C');
-        return "_${seq_argument}_'"  if ($seq_command = 'I');
+        return "*$seq_argument*"     if ($seq_command eq 'B');
+        return "`$seq_argument'"     if ($seq_command eq 'C');
+        return "_${seq_argument}_'"  if ($seq_command eq 'I');
         ## ... other sequence commands and their resulting text
     }
 
@@ -71,7 +71,7 @@ Pod::Parser - base class for creating POD filters and translators
 
 =head1 REQUIRES
 
-perl5.004, Pod::InputObjects, Exporter, Carp
+perl5.005, Pod::InputObjects, Exporter, Symbol, Carp
 
 =head1 EXPORTS
 
@@ -142,8 +142,8 @@ For the most part, the B<Pod::Parser> base class should be able to
 do most of the input parsing for you and leave you free to worry about
 how to intepret the commands and translate the result.
 
-Note that all we have described here in this quick overview overview is
-the simplest most striaghtforward use of B<Pod::Parser> to do stream-based
+Note that all we have described here in this quick overview is the
+simplest most straightforward use of B<Pod::Parser> to do stream-based
 parsing. It is also possible to use the B<Pod::Parser::parse_text> function
 to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">.
 
@@ -151,7 +151,7 @@ to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">.
 
 A I<parse-option> is simply a named option of B<Pod::Parser> with a
 value that corresponds to a certain specified behavior. These various
-behaviors of B<Pod::Parser> may be enabled/disabled by setting or
+behaviors of B<Pod::Parser> may be enabled/disabled by setting
 or unsetting one or more I<parse-options> using the B<parseopts()> method.
 The set of currently accepted parse-options is as follows:
 
@@ -164,7 +164,7 @@ the POD sections of the input. Input paragraphs that are not part
 of the POD-format documentation are not made available to the caller
 (not even using B<preprocess_paragraph()>). Setting this option to a
 non-empty, non-zero value will allow B<preprocess_paragraph()> to see
-non-POD sectioins of the input as well as POD sections. The B<cutting()>
+non-POD sections of the input as well as POD sections. The B<cutting()>
 method can be used to determine if the corresponding paragraph is a POD
 paragraph, or some other input paragraph.
 
@@ -172,7 +172,7 @@ paragraph, or some other input paragraph.
 
 Normally (by default) B<Pod::Parser> handles the C<=cut> POD directive
 by itself and does not pass it on to the caller for processing. Setting
-this option to non-empty, non-zero value will cause B<Pod::Parser> to
+this option to a non-empty, non-zero value will cause B<Pod::Parser> to
 pass the C<=cut> directive to the caller just like any other POD command
 (and hence it may be processed by the B<command()> method).
 
@@ -181,6 +181,15 @@ B<Pod::Parser> will still interpret the C<=cut> directive to mean that
 to capture the actual C<=cut> paragraph itself for whatever purpose
 it desires.
 
+=item B<-warnings> (default: unset)
+
+Normally (by default) B<Pod::Parser> recognizes a bare minimum of
+pod syntax errors and warnings and issues diagnostic messages
+for errors, but not for warnings. (Use B<Pod::Checker> to do more
+thorough checking of POD syntax.) Setting this option to a non-empty,
+non-zero value will cause B<Pod::Parser> to issue diagnostics for
+the few warnings it recognizes as well as the errors.
+
 =back
 
 Please see L<"parseopts()"> for a complete description of the interface
@@ -196,6 +205,12 @@ use strict;
 use Pod::InputObjects;
 use Carp;
 use Exporter;
+BEGIN {
+   if ($] < 5.6) {
+      require Symbol;
+      import Symbol;
+   }
+}
 @ISA = qw(Exporter);
 
 ## These "variables" are used as local "glob aliases" for performance
@@ -587,18 +602,20 @@ The value returned should correspond to the new text to use in its
 place If the empty string is returned or an undefined value is
 returned, then the given C<$text> is ignored (not processed).
 
-This method is invoked after gathering up all thelines in a paragraph
+This method is invoked after gathering up all the lines in a paragraph
+and after determining the cutting state of the paragraph,
 but before trying to further parse or interpret them. After
 B<preprocess_paragraph()> returns, the current cutting state (which
 is returned by C<$self-E<gt>cutting()>) is examined. If it evaluates
-to false then input text (including the given C<$text>) is cut (not
+to true then input text (including the given C<$text>) is cut (not
 processed) until the next POD directive is encountered.
 
 Please note that the B<preprocess_line()> method is invoked I<before>
 the B<preprocess_paragraph()> method. After all (possibly preprocessed)
-lines in a paragraph have been assembled together and it has been
+lines in a paragraph have been assembled together and either it has been
 determined that the paragraph is part of the POD documentation from one
-of the selected sections, then B<preprocess_paragraph()> is invoked.
+of the selected sections or the C<-want_nonPODs> option is true,
+then B<preprocess_paragraph()> is invoked.
 
 The base class implementation of this method returns the given text.
 
@@ -630,7 +647,7 @@ their functionality.
 
 This method is useful if you need to perform your own interpolation 
 of interior sequences and can't rely upon B<interpolate> to expand
-them in simple bottom-up order order.
+them in simple bottom-up order.
 
 The parameter C<$text> is a string or block of text to be parsed
 for interior sequences; and the parameter C<$line_num> is the
@@ -716,13 +733,6 @@ is a reference to the parse-tree object.
 
 =cut
 
-## This global regex is used to see if the text before a '>' inside
-## an interior sequence looks like '-' or '=', but not '--', '==',
-## '!=', '$-', '$=' or <<op>>=
-use vars qw( $ARROW_RE );
-$ARROW_RE = join('', qw{ (?: [^-+*/=!&|%^x.<>$]= | [^-$]- )$ });
-#$ARROW_RE = qr/(?:[^-+*/=!&|%^x.<>$]+=|[^-$]+-)$/;  ## 5.005+ only!
-
 sub parse_text {
     my $self = shift;
     local $_ = '';
@@ -736,7 +746,7 @@ sub parse_text {
     my $text = shift;
     my $line = shift;
     my $file = $self->input_file();
-    my ($cmd, $prev)  = ('', '');
+    my $cmd  = "";
 
     ## Convert method calls into closures, for our convenience
     my $xseq_sub   = $expand_seq;
@@ -755,7 +765,7 @@ sub parse_text {
     ref $xseq_sub    or  $xseq_sub   = sub { shift()->$expand_seq(@_) };
     ref $xtext_sub   or  $xtext_sub  = sub { shift()->$expand_text(@_) };
     ref $xptree_sub  or  $xptree_sub = sub { shift()->$expand_ptree(@_) };
-    
+
     ## Keep track of the "current" interior sequence, and maintain a stack
     ## of "in progress" sequences.
     ##
@@ -767,52 +777,88 @@ sub parse_text {
     ##
     my $seq       = Pod::ParseTree->new();
     my @seq_stack = ($seq);
+    my ($ldelim, $rdelim) = ('', '');
 
-    ## Iterate over all sequence starts/stops, newlines, & text
-    ## (NOTE: split with capturing parens keeps the delimiters)
+    ## Iterate over all sequence starts text (NOTE: split with
+    ## capturing parens keeps the delimiters)
     $_ = $text;
-    for ( split /([A-Z]<|>|\n)/ ) {
-        ## Keep track of line count
-        ++$line  if ($_ eq "\n");
+    my @tokens = split /([A-Z]<(?:<+\s)?)/;
+    while ( @tokens ) {
+        $_ = shift @tokens;
         ## Look for the beginning of a sequence
-        if ( /^([A-Z])(<)$/ ) {
+        if ( /^([A-Z])(<(?:<+\s)?)$/ ) {
             ## Push a new sequence onto the stack of those "in-progress"
+            my $ldelim_orig;
+            ($cmd, $ldelim_orig) = ($1, $2);
+            ($ldelim = $ldelim_orig) =~ s/\s+$//;
+            ($rdelim = $ldelim) =~ tr/</>/;
             $seq = Pod::InteriorSequence->new(
-                       -name   => ($cmd = $1),
-                       -ldelim => $2,     -rdelim => '',
-                       -file   => $file,  -line   => $line
+                       -name   => $cmd,
+                       -ldelim => $ldelim_orig,  -rdelim => $rdelim,
+                       -file   => $file,    -line   => $line
                    );
             (@seq_stack > 1)  and  $seq->nested($seq_stack[-1]);
             push @seq_stack, $seq;
         }
-        ## Look for sequence ending (preclude '->' and '=>' inside C<...>)
-        elsif ( (@seq_stack > 1)  and
-                /^>$/ and ($cmd ne 'C' or $prev !~ /$ARROW_RE/o) )
-        {
-            ## End of current sequence, record terminating delimiter
-            $seq->rdelim($_);
-            ## Pop it off the stack of "in progress" sequences
-            pop @seq_stack;
-            ## Append result to its parent in current parse tree
-            $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) : $seq);
-            ## Remember the current cmd-name
-            $cmd = (@seq_stack > 1) ? $seq_stack[-1]->name : '';
+        ## Look for sequence ending
+        elsif ( @seq_stack > 1 ) {
+            ## Make sure we match the right kind of closing delimiter
+            my ($seq_end, $post_seq) = ("", "");
+            if ( ($ldelim eq '<'   and  /\A(.*?)(>)/s)
+                 or  /\A(.*?)(\s+$rdelim)/s )
+            {
+                ## Found end-of-sequence, capture the interior and the
+                ## closing the delimiter, and put the rest back on the
+                ## token-list
+                $post_seq = substr($_, length($1) + length($2));
+                ($_, $seq_end) = ($1, $2);
+                (length $post_seq)  and  unshift @tokens, $post_seq;
+            }
+            if (length) {
+                ## In the middle of a sequence, append this text to it, and
+                ## dont forget to "expand" it if that's what the caller wanted
+                $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_);
+                $_ .= $seq_end;
+            }
+            if (length $seq_end) {
+                ## End of current sequence, record terminating delimiter
+                $seq->rdelim($seq_end);
+                ## Pop it off the stack of "in progress" sequences
+                pop @seq_stack;
+                ## Append result to its parent in current parse tree
+                $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq)
+                                                   : $seq);
+                ## Remember the current cmd-name and left-delimiter
+                if(@seq_stack > 1) {
+                    $cmd = $seq_stack[-1]->name;
+                    $ldelim = $seq_stack[-1]->ldelim;
+                    $rdelim = $seq_stack[-1]->rdelim;
+                } else {
+                    $cmd = $ldelim = $rdelim = '';
+                }
+            }
         }
         elsif (length) {
             ## In the middle of a sequence, append this text to it, and
             ## dont forget to "expand" it if that's what the caller wanted
             $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_);
         }
-        ## Remember the "current" sequence and the previously seen token
-        ($seq, $prev) = ( $seq_stack[-1], $_ );
+        ## Keep track of line count
+        $line += tr/\n//;
+        ## Remember the "current" sequence
+        $seq = $seq_stack[-1];
     }
 
     ## Handle unterminated sequences
     my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef;
     while (@seq_stack > 1) {
        ($cmd, $file, $line) = ($seq->name, $seq->file_line);
+       $ldelim  = $seq->ldelim;
+       ($rdelim = $ldelim) =~ tr/</>/;
+       $rdelim  =~ s/^(\S+)(\s*)$/$2$1/;
        pop @seq_stack;
-       my $errmsg = "** Unterminated $cmd<...> at $file line $line\n";
+       my $errmsg = "*** ERROR: unterminated ${cmd}${ldelim}...${rdelim}".
+                    " at line $line in file $file\n";
        (ref $errorsub) and &{$errorsub}($errmsg)
            or (defined $errorsub) and $self->$errorsub($errmsg)
                or  warn($errmsg);
@@ -876,17 +922,16 @@ sub parse_paragraph {
     local $_;
 
     ## See if we want to preprocess nonPOD paragraphs as well as POD ones.
-    my $wantNonPods = $myOpts{'-want_nonPODs'} || 0;
+    my $wantNonPods = $myOpts{'-want_nonPODs'};
+
+    ## Update cutting status
+    $myData{_CUTTING} = 0 if $text =~ /^={1,2}\S/;
 
     ## Perform any desired preprocessing if we wanted it this early
     $wantNonPods  and  $text = $self->preprocess_paragraph($text, $line_num);
 
-    ## This is the end of a non-empty paragraph
     ## Ignore up until next POD directive if we are cutting
-    if ($myData{_CUTTING}) {
-       return  unless ($text =~ /^={1,2}\S/);
-       $myData{_CUTTING} = 0;
-    }
+    return if $myData{_CUTTING};
 
     ## Now we know this is block of text in a POD section!
 
@@ -921,8 +966,7 @@ sub parse_paragraph {
         ## and whatever sequence of characters was used to separate them
         $pfx = $1;
         $_ = substr($text, length $pfx);
-        $sep = /(\s+)(?=\S)/ ? $1 : '';
-        ($cmd, $text) = split(" ", $_, 2);
+        ($cmd, $sep, $text) = split /(\s+)/, $_, 2; 
         ## If this is a "cut" directive then we dont need to do anything
         ## except return to "cutting" mode.
         if ($cmd eq 'cut') {
@@ -1001,6 +1045,8 @@ sub parse_from_filehandle {
     my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : ();
     my ($in_fh, $out_fh) = @_;
     $in_fh = \*STDIN  unless ($in_fh);
+    local *myData = $self;  ## alias to avoid deref-ing overhead
+    local *myOpts = ($myData{_PARSEOPTS} ||= {});  ## get parse-options
     local $_;
 
     ## Put this stream at the top of the stack and do beginning-of-input
@@ -1020,7 +1066,6 @@ sub parse_from_filehandle {
     while (defined ($textline = $tied_fh ? <$in_fh> : $in_fh->getline)) {
         $textline = $self->preprocess_line($textline, ++$nlines);
         next  unless ((defined $textline)  &&  (length $textline));
-        $_ = $paragraph;  ## save previous contents
 
         if ((! length $paragraph) && ($textline =~ /^==/)) {
             ## '==' denotes a one-line command paragraph
@@ -1033,9 +1078,21 @@ sub parse_from_filehandle {
             ++$plines;
         }
 
-        ## See of this line is blank and ends the current paragraph.
+        ## See if this line is blank and ends the current paragraph.
         ## If it isnt, then keep iterating until it is.
-        next unless (($textline =~ /^\s*$/) && (length $paragraph));
+        next unless (($textline =~ /^([^\S\r\n]*)[\r\n]*$/)
+                                     && (length $paragraph));
+
+        ## Issue a warning about any non-empty blank lines
+        if (length($1) > 0 and $myOpts{'-warnings'} and ! $myData{_CUTTING}) {
+            my $errorsub = $self->errorsub();
+            my $file = $self->input_file();
+            my $errmsg = "*** WARNING: line containing nothing but whitespace".
+                         " in paragraph at line $nlines in file $file\n";
+            (ref $errorsub) and &{$errorsub}($errmsg)
+                or (defined $errorsub) and $self->$errorsub($errmsg)
+                    or  warn($errmsg);
+        }
 
         ## Now process the paragraph
         parse_paragraph($self, $paragraph, ($nlines - $plines) + 1);
@@ -1088,6 +1145,8 @@ performed). If the special output filename ">&STDERR" is given then the
 STDERR filehandle is used for output (and no open or close is
 performed). If no output filehandle is currently in use and no output
 filename is specified, then "-" is implied.
+Alternatively, an L<IO::String> object is also accepted as an output
+file handle.
 
 This method does I<not> usually need to be overridden by subclasses.
 
@@ -1097,24 +1156,29 @@ sub parse_from_file {
     my $self = shift;
     my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : ();
     my ($infile, $outfile) = @_;
-    my ($in_fh,  $out_fh);
+    my ($in_fh,  $out_fh) = (gensym(), gensym())  if ($] < 5.006);
     my ($close_input, $close_output) = (0, 0);
     local *myData = $self;
-    local $_;
+    local *_;
 
     ## Is $infile a filename or a (possibly implied) filehandle
-    $infile  = '-'  unless ((defined $infile)  && (length $infile));
-    if (($infile  eq '-') || ($infile =~ /^<&(STDIN|0)$/i)) {
-        ## Not a filename, just a string implying STDIN
-        $myData{_INFILE} = "<standard input>";
-        $in_fh = \*STDIN;
-    }
-    elsif (ref $infile) {
+    if (defined $infile && ref $infile) {
+        if (ref($infile) =~ /^(SCALAR|ARRAY|HASH|CODE|REF)$/) {
+            croak "Input from $1 reference not supported!\n";
+        }
         ## Must be a filehandle-ref (or else assume its a ref to an object
         ## that supports the common IO read operations).
         $myData{_INFILE} = ${$infile};
         $in_fh = $infile;
     }
+    elsif (!defined($infile) || !length($infile) || ($infile eq '-')
+        || ($infile =~ /^<&(?:STDIN|0)$/i))
+    {
+        ## Not a filename, just a string implying STDIN
+        $infile ||= '-';
+        $myData{_INFILE} = "<standard input>";
+        $in_fh = \*STDIN;
+    }
     else {
         ## We have a filename, open it for reading
         $myData{_INFILE} = $infile;
@@ -1128,37 +1192,54 @@ sub parse_from_file {
     ## the entire document (but *not* if this is an included file). We
     ## determine this by seeing if the input stream stack has been set-up
     ## already
-    ## 
-    unless ((defined $outfile) && (length $outfile)) {
-        (defined $myData{_TOP_STREAM}) && ($out_fh  = $myData{_OUTPUT})
-                                       || ($outfile = '-');
-    }
-    ## Is $outfile a filename or a (possibly implied) filehandle
-    if ((defined $outfile) && (length $outfile)) {
-        if (($outfile  eq '-') || ($outfile =~ /^>&?(?:STDOUT|1)$/i)) {
-            ## Not a filename, just a string implying STDOUT
-            $myData{_OUTFILE} = "<standard output>";
-            $out_fh  = \*STDOUT;
+
+    ## Is $outfile a filename, a (possibly implied) filehandle, maybe a ref?
+    if (ref $outfile) {
+        ## we need to check for ref() first, as other checks involve reading
+        if (ref($outfile) =~ /^(ARRAY|HASH|CODE)$/) {
+            croak "Output to $1 reference not supported!\n";
         }
-        elsif ($outfile =~ /^>&(STDERR|2)$/i) {
-            ## Not a filename, just a string implying STDERR
-            $myData{_OUTFILE} = "<standard error>";
-            $out_fh  = \*STDERR;
+        elsif (ref($outfile) eq 'SCALAR') {
+#           # NOTE: IO::String isn't a part of the perl distribution,
+#           #       so probably we shouldn't support this case...
+#           require IO::String;
+#           $myData{_OUTFILE} = "$outfile";
+#           $out_fh = IO::String->new($outfile);
+            croak "Output to SCALAR reference not supported!\n";
         }
-        elsif (ref $outfile) {
+        else {
             ## Must be a filehandle-ref (or else assume its a ref to an
             ## object that supports the common IO write operations).
-            $myData{_OUTFILE} = ${$outfile};;
+            $myData{_OUTFILE} = ${$outfile};
             $out_fh = $outfile;
         }
+    }
+    elsif (!defined($outfile) || !length($outfile) || ($outfile eq '-')
+        || ($outfile =~ /^>&?(?:STDOUT|1)$/i))
+    {
+        if (defined $myData{_TOP_STREAM}) {
+            $out_fh = $myData{_OUTPUT};
+        }
         else {
-            ## We have a filename, open it for writing
-            $myData{_OUTFILE} = $outfile;
-            open($out_fh, "> $outfile")  or
-                 croak "Can't open $outfile for writing: $!\n";
-            $close_output = 1;
+            ## Not a filename, just a string implying STDOUT
+            $outfile ||= '-';
+            $myData{_OUTFILE} = "<standard output>";
+            $out_fh  = \*STDOUT;
         }
     }
+    elsif ($outfile =~ /^>&(STDERR|2)$/i) {
+        ## Not a filename, just a string implying STDERR
+        $myData{_OUTFILE} = "<standard error>";
+        $out_fh  = \*STDERR;
+    }
+    else {
+        ## We have a filename, open it for writing
+        $myData{_OUTFILE} = $outfile;
+        (-d $outfile) and croak "$outfile is a directory, not POD input!\n";
+        open($out_fh, "> $outfile")  or
+             croak "Can't open $outfile for writing: $!\n";
+        $close_output = 1;
+    }
 
     ## Whew! That was a lot of work to set up reasonably/robust behavior
     ## in the case of a non-filename for reading and writing. Now we just
@@ -1196,7 +1277,7 @@ builtin is used to issue error messages (this is the default behavior).
             my $errorsub = $parser->errorsub()
             my $errmsg = "This is an error message!\n"
             (ref $errorsub) and &{$errorsub}($errmsg)
-                or (defined $errmsg) and $parser->$errorsub($errmsg)
+                or (defined $errorsub) and $parser->$errorsub($errmsg)
                     or  warn($errmsg);
 
 Returns a method name, or else a reference to the user-supplied subroutine
@@ -1256,7 +1337,7 @@ key/value pairs and the specified parse-option names are set to the
 given values. Any unspecified parse-options are unaffected.
 
             ## Set them back to the default
-            $parser->parseopts(-process_cut_cmd => 0);
+            $parser->parseopts(-warnings => 0);
 
 When passed a single hash-ref, B<parseopts> uses that hash to completely
 reset the existing parse-options, all previous parse-option values
@@ -1265,7 +1346,7 @@ are lost.
             ## Reset all options to default 
             $parser->parseopts( { } );
 
-See L<"PARSING OPTIONS"> for more for the name and meaning of each
+See L<"PARSING OPTIONS"> for more information on the name and meaning of each
 parse-option currently recognized.
 
 =cut
@@ -1535,7 +1616,7 @@ markup languages like HTML and XML) then you may need to take the
 tree-based approach. Rather than doing everything in one pass and
 calling the B<interpolate()> method to expand sequences into text, it
 may be desirable to instead create a parse-tree using the B<parse_text()>
-method to return a tree-like structure which may contain an ordered list
+method to return a tree-like structure which may contain an ordered
 list of children (each of which may be a text-string, or a similar
 tree-like structure).
 
@@ -1705,6 +1786,8 @@ causing any namespace clashes due to multiple inheritance.
 
 =head1 AUTHOR
 
+Please report bugs using L<http://rt.cpan.org>.
+
 Brad Appleton E<lt>bradapp@enteract.comE<gt>
 
 Based on code for B<Pod::Text> written by
@@ -1713,3 +1796,4 @@ Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
 =cut
 
 1;
+# vim: ts=4 sw=4 et