[p5sagit/p5-mst-13.2.git] / cpan / Pod-Simple / lib / Pod / Simple / PullParser.pm


require 5;
package Pod::Simple::PullParser;
$VERSION = '3.14';
use Pod::Simple ();
BEGIN {@ISA = ('Pod::Simple')}

use strict;
use Carp ();

use Pod::Simple::PullParserStartToken;
use Pod::Simple::PullParserEndToken;
use Pod::Simple::PullParserTextToken;

BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }

__PACKAGE__->_accessorize(
  'source_fh',         # the filehandle we're reading from
  'source_scalar_ref', # the scalarref we're reading from
  'source_arrayref',   # the arrayref we're reading from
);

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#
#  And here is how we implement a pull-parser on top of a push-parser...

sub filter {
  my($self, $source) = @_;
  $self = $self->new unless ref $self;

  $source = *STDIN{IO} unless defined $source;
  $self->set_source($source);
  $self->output_fh(*STDOUT{IO});

  $self->run; # define run() in a subclass if you want to use filter()!
  return $self;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

sub parse_string_document {
  my $this = shift;
  $this->set_source(\ $_[0]);
  $this->run;
}

sub parse_file {
  my($this, $filename) = @_;
  $this->set_source($filename);
  $this->run;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  In case anyone tries to use them:

sub run {
  use Carp ();
  if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
    Carp::croak "You can call run() only on subclasses of "
     . __PACKAGE__;
  } else {
    Carp::croak join '',
      "You can't call run() because ",
      ref($_[0]) || $_[0], " didn't define a run() method";
  }
}

sub parse_lines {
  use Carp ();
  Carp::croak "Use set_source with ", __PACKAGE__,
    " and subclasses, not parse_lines";
}

sub parse_line {
  use Carp ();
  Carp::croak "Use set_source with ", __PACKAGE__,
    " and subclasses, not parse_line";
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

sub new {
  my $class = shift;
  my $self = $class->SUPER::new(@_);
  die "Couldn't construct for $class" unless $self;

  $self->{'token_buffer'} ||= [];
  $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
  $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';
  $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';

  DEBUG > 1 and print "New pullparser object: $self\n";

  return $self;
}

# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

sub get_token {
  my $self = shift;
  DEBUG > 1 and print "\nget_token starting up on $self.\n";
  DEBUG > 2 and print " Items in token-buffer (",
   scalar( @{ $self->{'token_buffer'} } ) ,
   ") :\n", map(
     "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }
   ),
   @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',
   "\n"
  ;

  until( @{ $self->{'token_buffer'} } ) {
    DEBUG > 3 and print "I need to get something into my empty token buffer...\n";
    if($self->{'source_dead'}) {
      DEBUG and print "$self 's source is dead.\n";
      push @{ $self->{'token_buffer'} }, undef;
    } elsif(exists $self->{'source_fh'}) {
      my @lines;
      my $fh = $self->{'source_fh'}
       || Carp::croak('You have to call set_source before you can call get_token');
       
      DEBUG and print "$self 's source is filehandle $fh.\n";
      # Read those many lines at a time
      for(my $i = Pod::Simple::MANY_LINES; $i--;) {
        DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n";
        local $/ = $Pod::Simple::NL;
        push @lines, scalar(<$fh>); # readline
        DEBUG > 3 and print "  Line is: ",
          defined($lines[-1]) ? $lines[-1] : "<undef>\n";
        unless( defined $lines[-1] ) {
          DEBUG and print "That's it for that source fh!  Killing.\n";
          delete $self->{'source_fh'}; # so it can be GC'd
          last;
        }
         # but pass thru the undef, which will set source_dead to true

        # TODO: look to see if $lines[-1] is =encoding, and if so,
        # do horribly magic things

      }
      
      if(DEBUG > 8) {
        print "* I've gotten ", scalar(@lines), " lines:\n";
        foreach my $l (@lines) {
          if(defined $l) {
            print "  line {$l}\n";
          } else {
            print "  line undef\n";
          }
        }
        print "* end of ", scalar(@lines), " lines\n";
      }

      $self->SUPER::parse_lines(@lines);
      
    } elsif(exists $self->{'source_arrayref'}) {
      DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ",
       scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";

      DEBUG > 3 and print "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
      $self->SUPER::parse_lines(
        splice @{ $self->{'source_arrayref'} },
        0,
        Pod::Simple::MANY_LINES
      );
      unless( @{ $self->{'source_arrayref'} } ) {
        DEBUG and print "That's it for that source arrayref!  Killing.\n";
        $self->SUPER::parse_lines(undef);
        delete $self->{'source_arrayref'}; # so it can be GC'd
      }
       # to make sure that an undef is always sent to signal end-of-stream

    } elsif(exists $self->{'source_scalar_ref'}) {

      DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
        length(${ $self->{'source_scalar_ref'} }) -
        (pos(${ $self->{'source_scalar_ref'} }) || 0),
        " characters left to parse.\n";

      DEBUG > 3 and print " Fetching a line from source-string...\n";
      if( ${ $self->{'source_scalar_ref'} } =~
        m/([^\n\r]*)((?:\r?\n)?)/g
      ) {
        #print(">> $1\n"),
        $self->SUPER::parse_lines($1)
         if length($1) or length($2)
          or pos(     ${ $self->{'source_scalar_ref'} })
           != length( ${ $self->{'source_scalar_ref'} });
         # I.e., unless it's a zero-length "empty line" at the very
         #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
      } else { # that's the end.  Byebye
        $self->SUPER::parse_lines(undef);
        delete $self->{'source_scalar_ref'};
        DEBUG and print "That's it for that source scalarref!  Killing.\n";
      }

      
    } else {
      die "What source??";
    }
  }
  DEBUG and print "get_token about to return ",
   Pod::Simple::pretty( @{$self->{'token_buffer'}}
     ? $self->{'token_buffer'}[-1] : undef
   ), "\n";
  return shift @{$self->{'token_buffer'}}; # that's an undef if empty
}

sub unget_token {
  my $self = shift;
  DEBUG and print "Ungetting ", scalar(@_), " tokens: ",
   @_ ? "@_\n" : "().\n";
  foreach my $t (@_) {
    Carp::croak "Can't unget that, because it's not a token -- it's undef!"
     unless defined $t;
    Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
     unless ref $t;
    Carp::croak "Can't unget $t, because it's not a token object!"
     unless UNIVERSAL::can($t, 'type');
  }
  
  unshift @{$self->{'token_buffer'}}, @_;
  DEBUG > 1 and print "Token buffer now has ",
   scalar(@{$self->{'token_buffer'}}), " items in it.\n";
  return;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

# $self->{'source_filename'} = $source;

sub set_source {
  my $self = shift @_;
  return $self->{'source_fh'} unless @_;
  my $handle;
  if(!defined $_[0]) {
    Carp::croak("Can't use empty-string as a source for set_source");
  } elsif(ref(\( $_[0] )) eq 'GLOB') {
    $self->{'source_filename'} = '' . ($handle = $_[0]);
    DEBUG and print "$self 's source is glob $_[0]\n";
    # and fall thru   
  } elsif(ref( $_[0] ) eq 'SCALAR') {
    $self->{'source_scalar_ref'} = $_[0];
    DEBUG and print "$self 's source is scalar ref $_[0]\n";
    return;
  } elsif(ref( $_[0] ) eq 'ARRAY') {
    $self->{'source_arrayref'} = $_[0];
    DEBUG and print "$self 's source is array ref $_[0]\n";
    return;
  } elsif(ref $_[0]) {
    $self->{'source_filename'} = '' . ($handle = $_[0]);
    DEBUG and print "$self 's source is fh-obj $_[0]\n";
  } elsif(!length $_[0]) {
    Carp::croak("Can't use empty-string as a source for set_source");
  } else {  # It's a filename!
    DEBUG and print "$self 's source is filename $_[0]\n";
    {
      local *PODSOURCE;
      open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
      $handle = *PODSOURCE{IO};
    }
    $self->{'source_filename'} = $_[0];
    DEBUG and print "  Its name is $_[0].\n";

    # TODO: file-discipline things here!
  }

  $self->{'source_fh'} = $handle;
  DEBUG and print "  Its handle is $handle\n";
  return 1;
}

# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

sub get_title_short {  shift->get_short_title(@_)  } # alias

sub get_short_title {
  my $title = shift->get_title(@_);
  $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
    # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
  return $title;
}

sub get_title       { shift->_get_titled_section(
  'NAME', max_token => 50, desperate => 1, @_)
}
sub get_version     { shift->_get_titled_section(
   'VERSION',
    max_token => 400,
    accept_verbatim => 1,
    max_content_length => 3_000,
   @_,
  );
}
sub get_description { shift->_get_titled_section(
   'DESCRIPTION',
    max_token => 400,
    max_content_length => 3_000,
   @_,
) }

sub get_authors     { shift->get_author(@_) }  # a harmless alias

sub get_author      {
  my $this = shift;
  # Max_token is so high because these are
  #  typically at the end of the document:
  $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
  $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
}

#--------------------------------------------------------------------------

sub _get_titled_section {
  # Based on a get_title originally contributed by Graham Barr
  my($self, $titlename, %options) = (@_);
  
  my $max_token            = delete $options{'max_token'};
  my $desperate_for_title  = delete $options{'desperate'};
  my $accept_verbatim      = delete $options{'accept_verbatim'};
  my $max_content_length   = delete $options{'max_content_length'};
  my $nocase               = delete $options{'nocase'};
  $max_content_length = 120 unless defined $max_content_length;

  Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
    . join " ", map "[$_]", sort keys %options
  )
   if keys %options;

  my %content_containers;
  $content_containers{'Para'} = 1;
  if($accept_verbatim) {
    $content_containers{'Verbatim'} = 1;
    $content_containers{'VerbatimFormatted'} = 1;
  }

  my $token_count = 0;
  my $title;
  my @to_unget;
  my $state = 0;
  my $depth = 0;

  Carp::croak "What kind of titlename is \"$titlename\"?!" unless
   defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
  my $titlename_re = quotemeta($titlename);

  my $head1_text_content;
  my $para_text_content;

  while(
    ++$token_count <= ($max_token || 1_000_000)
    and defined(my $token = $self->get_token)
  ) {
    push @to_unget, $token;

    if ($state == 0) { # seeking =head1
      if( $token->is_start and $token->tagname eq 'head1' ) {
        DEBUG and print "  Found head1.  Seeking content...\n";
        ++$state;
        $head1_text_content = '';
      }
    }

    elsif($state == 1) { # accumulating text until end of head1
      if( $token->is_text ) {
        DEBUG and print "   Adding \"", $token->text, "\" to head1-content.\n";
        $head1_text_content .= $token->text;
      } elsif( $token->is_end and $token->tagname eq 'head1' ) {
        DEBUG and print "  Found end of head1.  Considering content...\n";
        $head1_text_content = uc $head1_text_content if $nocase;
        if($head1_text_content eq $titlename
          or $head1_text_content =~ m/\($titlename_re\)/s
          # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
        ) {
          DEBUG and print "  Yup, it was $titlename.  Seeking next para-content...\n";
          ++$state;
        } elsif(
          $desperate_for_title
           # if we're so desperate we'll take the first
           #  =head1's content as a title
          and $head1_text_content =~ m/\S/
          and $head1_text_content !~ m/^[ A-Z]+$/s
          and $head1_text_content !~
            m/\((?:
             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
            )\)/sx
            # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
          and ($max_content_length
            ? (length($head1_text_content) <= $max_content_length) # sanity
            : 1)
        ) {
          DEBUG and print "  It looks titular: \"$head1_text_content\".\n",
            "\n  Using that.\n";
          $title = $head1_text_content;
          last;
        } else {
          --$state;
          DEBUG and print "  Didn't look titular ($head1_text_content).\n",
            "\n  Dropping back to seeking-head1-content mode...\n";
        }
      }
    }
    
    elsif($state == 2) {
      # seeking start of para (which must immediately follow)
      if($token->is_start and $content_containers{ $token->tagname }) {
        DEBUG and print "  Found start of Para.  Accumulating content...\n";
        $para_text_content = '';
        ++$state;
      } else {
        DEBUG and print
         "  Didn't see an immediately subsequent start-Para.  Reseeking H1\n";
        $state = 0;
      }
    }
    
    elsif($state == 3) {
      # accumulating text until end of Para
      if( $token->is_text ) {
        DEBUG and print "   Adding \"", $token->text, "\" to para-content.\n";
        $para_text_content .= $token->text;
        # and keep looking
        
      } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
        DEBUG and print "  Found end of Para.  Considering content: ",
          $para_text_content, "\n";

        if( $para_text_content =~ m/\S/
          and ($max_content_length
           ? (length($para_text_content) <= $max_content_length)
           : 1)
        ) {
          # Some minimal sanity constraints, I think.
          DEBUG and print "  It looks contentworthy, I guess.  Using it.\n";
          $title = $para_text_content;
          last;
        } else {
          DEBUG and print "  Doesn't look at all contentworthy!\n  Giving up.\n";
          undef $title;
          last;
        }
      }
    }
    
    else {
      die "IMPOSSIBLE STATE $state!\n";  # should never happen
    }
    
  }
  
  # Put it all back!
  $self->unget_token(@to_unget);
  
  if(DEBUG) {
    if(defined $title) { print "  Returing title <$title>\n" }
    else { print "Returning title <>\n" }
  }
  
  return '' unless defined $title;
  $title =~ s/^\s+//;
  return $title;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#
#  Methods that actually do work at parse-time:

sub _handle_element_start {
  my $self = shift;   # leaving ($element_name, $attr_hash_r)
  DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
  
  push @{ $self->{'token_buffer'} },
       $self->{'start_token_class'}->new(@_);
  return;
}

sub _handle_text {
  my $self = shift;   # leaving ($text)
  DEBUG > 2 and print "== $_[0]\n";
  push @{ $self->{'token_buffer'} },
       $self->{'text_token_class'}->new(@_);
  return;
}

sub _handle_element_end {
  my $self = shift;   # leaving ($element_name);
  DEBUG > 2 and print "-- $_[0]\n";
  push @{ $self->{'token_buffer'} }, 
       $self->{'end_token_class'}->new(@_);
  return;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

1;


__END__

=head1 NAME

Pod::Simple::PullParser -- a pull-parser interface to parsing Pod

=head1 SYNOPSIS

 my $parser = SomePodProcessor->new;
 $parser->set_source( "whatever.pod" );
 $parser->run;

Or:

 my $parser = SomePodProcessor->new;
 $parser->set_source( $some_filehandle_object );
 $parser->run;

Or:

 my $parser = SomePodProcessor->new;
 $parser->set_source( \$document_source );
 $parser->run;

Or:

 my $parser = SomePodProcessor->new;
 $parser->set_source( \@document_lines );
 $parser->run;

And elsewhere:

 require 5;
 package SomePodProcessor;
 use strict;
 use base qw(Pod::Simple::PullParser);
 
 sub run {
   my $self = shift;
  Token:
   while(my $token = $self->get_token) {
     ...process each token...
   }
 }

=head1 DESCRIPTION

This class is for using Pod::Simple to build a Pod processor -- but
one that uses an interface based on a stream of token objects,
instead of based on events.

This is a subclass of L<Pod::Simple> and inherits all its methods.

A subclass of Pod::Simple::PullParser should define a C<run> method
that calls C<< $token = $parser->get_token >> to pull tokens.

See the source for Pod::Simple::RTF for an example of a formatter
that uses Pod::Simple::PullParser.

=head1 METHODS

=over

=item my $token = $parser->get_token

This returns the next token object (which will be of a subclass of
L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
the end of the document.

=item $parser->unget_token( $token )

=item $parser->unget_token( $token1, $token2, ... )

This restores the token object(s) to the front of the parser stream.

=back

The source has to be set before you can parse anything.  The lowest-level
way is to call C<set_source>:

=over

=item $parser->set_source( $filename )

=item $parser->set_source( $filehandle_object )

=item $parser->set_source( \$document_source )

=item $parser->set_source( \@document_lines )

=back

Or you can call these methods, which Pod::Simple::PullParser has defined
to work just like Pod::Simple's same-named methods:

=over

=item $parser->parse_file(...)

=item $parser->parse_string_document(...)

=item $parser->filter(...)

=item $parser->parse_from_file(...)

=back

For those to work, the Pod-processing subclass of
Pod::Simple::PullParser has to have defined a $parser->run method --
so it is advised that all Pod::Simple::PullParser subclasses do so.
See the Synopsis above, or the source for Pod::Simple::RTF.

Authors of formatter subclasses might find these methods useful to
call on a parser object that you haven't started pulling tokens
from yet:

=over

=item my $title_string = $parser->get_title

This tries to get the title string out of $parser, by getting some tokens,
and scanning them for the title, and then ungetting them so that you can
process the token-stream from the beginning.

For example, suppose you have a document that starts out:

  =head1 NAME
  
  Hoo::Boy::Wowza -- Stuff B<wow> yeah!

$parser->get_title on that document will return "Hoo::Boy::Wowza --
Stuff wow yeah!". If the document starts with:

  =head1 Name
  
  Hoo::Boy::W00t -- Stuff B<w00t> yeah!

Then you'll need to pass the C<nocase> option in order to recognize "Name":

  $parser->get_title(nocase => 1);

In cases where get_title can't find the title, it will return empty-string
("").

=item my $title_string = $parser->get_short_title

This is just like get_title, except that it returns just the modulename, if
the title seems to be of the form "SomeModuleName -- description".

For example, suppose you have a document that starts out:

  =head1 NAME
  
  Hoo::Boy::Wowza -- Stuff B<wow> yeah!

then $parser->get_short_title on that document will return
"Hoo::Boy::Wowza".

But if the document starts out:

  =head1 NAME
  
  Hooboy, stuff B<wow> yeah!

then $parser->get_short_title on that document will return "Hooboy,
stuff wow yeah!". If the document starts with:

  =head1 Name
  
  Hoo::Boy::W00t -- Stuff B<w00t> yeah!

Then you'll need to pass the C<nocase> option in order to recognize "Name":

  $parser->get_short_title(nocase => 1);

If the title can't be found, then get_short_title returns empty-string
("").

=item $author_name   = $parser->get_author

This works like get_title except that it returns the contents of the
"=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n"
section, pass the C<nocase> otpion:

  $parser->get_author(nocase => 1);

(This method tolerates "AUTHORS" instead of "AUTHOR" too.)

=item $description_name = $parser->get_description

This works like get_title except that it returns the contents of the
"=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section
isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n"
section, pass the C<nocase> otpion:

  $parser->get_description(nocase => 1);

=item $version_block = $parser->get_version

This works like get_title except that it returns the contents of
the "=head1 VERSION\n\n[BIG BLOCK]\n" block.  Note that this does NOT
return the module's C<$VERSION>!! To recognize a
"=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> otpion:

  $parser->get_version(nocase => 1);

=back

=head1 NOTE

You don't actually I<have> to define a C<run> method.  If you're
writing a Pod-formatter class, you should define a C<run> just so
that users can call C<parse_file> etc, but you don't I<have> to.

And if you're not writing a formatter class, but are instead just
writing a program that does something simple with a Pod::PullParser
object (and not an object of a subclass), then there's no reason to
bother subclassing to add a C<run> method.

=head1 SEE ALSO

L<Pod::Simple>

L<Pod::Simple::PullParserToken> -- and its subclasses
L<Pod::Simple::PullParserStartToken>,
L<Pod::Simple::PullParserTextToken>, and
L<Pod::Simple::PullParserEndToken>.

L<HTML::TokeParser>, which inspired this.

=head1 SUPPORT

Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.

This module is managed in an open GitHub repository,
L<http://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/theory/pod-simple.git> and send patches!

Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.

=head1 COPYRIGHT AND DISCLAIMERS

Copyright (c) 2002 Sean M. Burke.

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.

=head1 AUTHOR

Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.

Pod::Simple is maintained by:

=over

=item * Allison Randal C<allison@perl.org>

=item * Hans Dieter Pearcey C<hdp@cpan.org>

=item * David E. Wheeler C<dwheeler@cpan.org>

=back

=cut
JUNK:

sub _old_get_title {  # some witchery in here
  my $self = $_[0];
  my $title;
  my @to_unget;

  while(1) {
    push @to_unget, $self->get_token;
    unless(defined $to_unget[-1]) { # whoops, short doc!
      pop @to_unget;
      last;
    }

    DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n";

    (DEBUG and print "Too much in the buffer.\n"),
     last if @to_unget > 25; # sanity
    
    my $pattern = '';
    if( #$to_unget[-1]->type eq 'end'
        #and $to_unget[-1]->tagname eq 'Para'
        #and
        ($pattern = join('',
         map {;
            ($_->type eq 'start') ? ("<" . $_->tagname .">")
          : ($_->type eq 'end'  ) ? ("</". $_->tagname .">")
          : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
          : "BLORP"
         } @to_unget
       )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
    ) {
      # Whee, it fits the pattern
      DEBUG and print "Seems to match =head1 NAME pattern.\n";
      $title = '';
      foreach my $t (reverse @to_unget) {
        last if $t->type eq 'start' and $t->tagname eq 'Para';
        $title = $t->text . $title if $t->type eq 'text';
      }
      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
      last;

    } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
      and !( $1 eq '1' and $2 eq 'NAME' )
    ) {
      # Well, it fits a fallback pattern
      DEBUG and print "Seems to match NAMEless pattern.\n";
      $title = '';
      foreach my $t (reverse @to_unget) {
        last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
        $title = $t->text . $title if $t->type eq 'text';
      }
      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
      last;
      
    } else {
      DEBUG and $pattern and print "Leading pattern: $pattern\n";
    }
  }
  
  # Put it all back:
  $self->unget_token(@to_unget);
  
  if(DEBUG) {
    if(defined $title) { print "  Returing title <$title>\n" }
    else { print "Returning title <>\n" }
  }
  
  return '' unless defined $title;
  return $title;
}