cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm

   1
   2 require 5;
   3 package Pod::Simple::PullParser;
   4 $VERSION = '2.02';
   5 use Pod::Simple ();
   6 BEGIN {@ISA = ('Pod::Simple')}
   7
   8 use strict;
   9 use Carp ();
  10
  11 use Pod::Simple::PullParserStartToken;
  12 use Pod::Simple::PullParserEndToken;
  13 use Pod::Simple::PullParserTextToken;
  14
  15 BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
  16
  17 __PACKAGE__->_accessorize(
  18   'source_fh',         # the filehandle we're reading from
  19   'source_scalar_ref', # the scalarref we're reading from
  20   'source_arrayref',   # the arrayref we're reading from
  21 );
  22
  23 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  24 #
  25 #  And here is how we implement a pull-parser on top of a push-parser...
  26
  27 sub filter {
  28   my($self, $source) = @_;
  29   $self = $self->new unless ref $self;
  30
  31   $source = *STDIN{IO} unless defined $source;
  32   $self->set_source($source);
  33   $self->output_fh(*STDOUT{IO});
  34
  35   $self->run; # define run() in a subclass if you want to use filter()!
  36   return $self;
  37 }
  38
  39 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  40
  41 sub parse_string_document {
  42   my $this = shift;
  43   $this->set_source(\ $_[0]);
  44   $this->run;
  45 }
  46
  47 sub parse_file {
  48   my($this, $filename) = @_;
  49   $this->set_source($filename);
  50   $this->run;
  51 }
  52
  53 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  54 #  In case anyone tries to use them:
  55
  56 sub run {
  57   use Carp ();
  58   if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
  59     Carp::croak "You can call run() only on subclasses of "
  60      . __PACKAGE__;
  61   } else {
  62     Carp::croak join '',
  63       "You can't call run() because ",
  64       ref($_[0]) || $_[0], " didn't define a run() method";
  65   }
  66 }
  67
  68 sub parse_lines {
  69   use Carp ();
  70   Carp::croak "Use set_source with ", __PACKAGE__,
  71     " and subclasses, not parse_lines";
  72 }
  73
  74 sub parse_line {
  75   use Carp ();
  76   Carp::croak "Use set_source with ", __PACKAGE__,
  77     " and subclasses, not parse_line";
  78 }
  79
  80 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  81
  82 sub new {
  83   my $class = shift;
  84   my $self = $class->SUPER::new(@_);
  85   die "Couldn't construct for $class" unless $self;
  86
  87   $self->{'token_buffer'} ||= [];
  88   $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
  89   $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';
  90   $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';
  91
  92   DEBUG > 1 and print "New pullparser object: $self\n";
  93
  94   return $self;
  95 }
  96
  97 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
  98
  99 sub get_token {
 100   my $self = shift;
 101   DEBUG > 1 and print "\nget_token starting up on $self.\n";
 102   DEBUG > 2 and print " Items in token-buffer (",
 103    scalar( @{ $self->{'token_buffer'} } ) ,
 104    ") :\n", map(
 105      "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }
 106    ),
 107    @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',
 108    "\n"
 109   ;
 110
 111   until( @{ $self->{'token_buffer'} } ) {
 112     DEBUG > 3 and print "I need to get something into my empty token buffer...\n";
 113     if($self->{'source_dead'}) {
 114       DEBUG and print "$self 's source is dead.\n";
 115       push @{ $self->{'token_buffer'} }, undef;
 116     } elsif(exists $self->{'source_fh'}) {
 117       my @lines;
 118       my $fh = $self->{'source_fh'}
 119        || Carp::croak('You have to call set_source before you can call get_token');
 120
 121       DEBUG and print "$self 's source is filehandle $fh.\n";
 122       # Read those many lines at a time
 123       for(my $i = Pod::Simple::MANY_LINES; $i--;) {
 124         DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n";
 125         local $/ = $Pod::Simple::NL;
 126         push @lines, scalar(<$fh>); # readline
 127         DEBUG > 3 and print "  Line is: ",
 128           defined($lines[-1]) ? $lines[-1] : "<undef>\n";
 129         unless( defined $lines[-1] ) {
 130           DEBUG and print "That's it for that source fh!  Killing.\n";
 131           delete $self->{'source_fh'}; # so it can be GC'd
 132           last;
 133         }
 134          # but pass thru the undef, which will set source_dead to true
 135
 136         # TODO: look to see if $lines[-1] is =encoding, and if so,
 137         # do horribly magic things
 138
 139       }
 140
 141       if(DEBUG > 8) {
 142         print "* I've gotten ", scalar(@lines), " lines:\n";
 143         foreach my $l (@lines) {
 144           if(defined $l) {
 145             print "  line {$l}\n";
 146           } else {
 147             print "  line undef\n";
 148           }
 149         }
 150         print "* end of ", scalar(@lines), " lines\n";
 151       }
 152
 153       $self->SUPER::parse_lines(@lines);
 154
 155     } elsif(exists $self->{'source_arrayref'}) {
 156       DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ",
 157        scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
 158
 159       DEBUG > 3 and print "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
 160       $self->SUPER::parse_lines(
 161         splice @{ $self->{'source_arrayref'} },
 162         0,
 163         Pod::Simple::MANY_LINES
 164       );
 165       unless( @{ $self->{'source_arrayref'} } ) {
 166         DEBUG and print "That's it for that source arrayref!  Killing.\n";
 167         $self->SUPER::parse_lines(undef);
 168         delete $self->{'source_arrayref'}; # so it can be GC'd
 169       }
 170        # to make sure that an undef is always sent to signal end-of-stream
 171
 172     } elsif(exists $self->{'source_scalar_ref'}) {
 173
 174       DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
 175         length(${ $self->{'source_scalar_ref'} }) -
 176         (pos(${ $self->{'source_scalar_ref'} }) || 0),
 177         " characters left to parse.\n";
 178
 179       DEBUG > 3 and print " Fetching a line from source-string...\n";
 180       if( ${ $self->{'source_scalar_ref'} } =~
 181         m/([^\n\r]*)((?:\r?\n)?)/g
 182       ) {
 183         #print(">> $1\n"),
 184         $self->SUPER::parse_lines($1)
 185          if length($1) or length($2)
 186           or pos(     ${ $self->{'source_scalar_ref'} })
 187            != length( ${ $self->{'source_scalar_ref'} });
 188          # I.e., unless it's a zero-length "empty line" at the very
 189          #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
 190       } else { # that's the end.  Byebye
 191         $self->SUPER::parse_lines(undef);
 192         delete $self->{'source_scalar_ref'};
 193         DEBUG and print "That's it for that source scalarref!  Killing.\n";
 194       }
 195
 196
 197     } else {
 198       die "What source??";
 199     }
 200   }
 201   DEBUG and print "get_token about to return ",
 202    Pod::Simple::pretty( @{$self->{'token_buffer'}}
 203      ? $self->{'token_buffer'}[-1] : undef
 204    ), "\n";
 205   return shift @{$self->{'token_buffer'}}; # that's an undef if empty
 206 }
 207
 208 use UNIVERSAL ();
 209 sub unget_token {
 210   my $self = shift;
 211   DEBUG and print "Ungetting ", scalar(@_), " tokens: ",
 212    @_ ? "@_\n" : "().\n";
 213   foreach my $t (@_) {
 214     Carp::croak "Can't unget that, because it's not a token -- it's undef!"
 215      unless defined $t;
 216     Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
 217      unless ref $t;
 218     Carp::croak "Can't unget $t, because it's not a token object!"
 219      unless UNIVERSAL::can($t, 'type');
 220   }
 221
 222   unshift @{$self->{'token_buffer'}}, @_;
 223   DEBUG > 1 and print "Token buffer now has ",
 224    scalar(@{$self->{'token_buffer'}}), " items in it.\n";
 225   return;
 226 }
 227
 228 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 229
 230 # $self->{'source_filename'} = $source;
 231
 232 sub set_source {
 233   my $self = shift @_;
 234   return $self->{'source_fh'} unless @_;
 235   my $handle;
 236   if(!defined $_[0]) {
 237     Carp::croak("Can't use empty-string as a source for set_source");
 238   } elsif(ref(\( $_[0] )) eq 'GLOB') {
 239     $self->{'source_filename'} = '' . ($handle = $_[0]);
 240     DEBUG and print "$self 's source is glob $_[0]\n";
 241     # and fall thru
 242   } elsif(ref( $_[0] ) eq 'SCALAR') {
 243     $self->{'source_scalar_ref'} = $_[0];
 244     DEBUG and print "$self 's source is scalar ref $_[0]\n";
 245     return;
 246   } elsif(ref( $_[0] ) eq 'ARRAY') {
 247     $self->{'source_arrayref'} = $_[0];
 248     DEBUG and print "$self 's source is array ref $_[0]\n";
 249     return;
 250   } elsif(ref $_[0]) {
 251     $self->{'source_filename'} = '' . ($handle = $_[0]);
 252     DEBUG and print "$self 's source is fh-obj $_[0]\n";
 253   } elsif(!length $_[0]) {
 254     Carp::croak("Can't use empty-string as a source for set_source");
 255   } else {  # It's a filename!
 256     DEBUG and print "$self 's source is filename $_[0]\n";
 257     {
 258       local *PODSOURCE;
 259       open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
 260       $handle = *PODSOURCE{IO};
 261     }
 262     $self->{'source_filename'} = $_[0];
 263     DEBUG and print "  Its name is $_[0].\n";
 264
 265     # TODO: file-discipline things here!
 266   }
 267
 268   $self->{'source_fh'} = $handle;
 269   DEBUG and print "  Its handle is $handle\n";
 270   return 1;
 271 }
 272
 273 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
 274
 275 sub get_title_short {  shift->get_short_title(@_)  } # alias
 276
 277 sub get_short_title {
 278   my $title = shift->get_title(@_);
 279   $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
 280     # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
 281   return $title;
 282 }
 283
 284 sub get_title       { shift->_get_titled_section(
 285   'NAME', max_token => 50, desperate => 1, @_)
 286 }
 287 sub get_version     { shift->_get_titled_section(
 288    'VERSION',
 289     max_token => 400,
 290     accept_verbatim => 1,
 291     max_content_length => 3_000,
 292    @_,
 293   );
 294 }
 295 sub get_description { shift->_get_titled_section(
 296    'DESCRIPTION',
 297     max_token => 400,
 298     max_content_length => 3_000,
 299    @_,
 300 ) }
 301
 302 sub get_authors     { shift->get_author(@_) }  # a harmless alias
 303
 304 sub get_author      {
 305   my $this = shift;
 306   # Max_token is so high because these are
 307   #  typically at the end of the document:
 308   $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
 309   $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
 310 }
 311
 312 #--------------------------------------------------------------------------
 313
 314 sub _get_titled_section {
 315   # Based on a get_title originally contributed by Graham Barr
 316   my($self, $titlename, %options) = (@_);
 317
 318   my $max_token            = delete $options{'max_token'};
 319   my $desperate_for_title  = delete $options{'desperate'};
 320   my $accept_verbatim      = delete $options{'accept_verbatim'};
 321   my $max_content_length   = delete $options{'max_content_length'};
 322   $max_content_length = 120 unless defined $max_content_length;
 323
 324   Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
 325     . join " ", map "[$_]", sort keys %options
 326   )
 327    if keys %options;
 328
 329   my %content_containers;
 330   $content_containers{'Para'} = 1;
 331   if($accept_verbatim) {
 332     $content_containers{'Verbatim'} = 1;
 333     $content_containers{'VerbatimFormatted'} = 1;
 334   }
 335
 336   my $token_count = 0;
 337   my $title;
 338   my @to_unget;
 339   my $state = 0;
 340   my $depth = 0;
 341
 342   Carp::croak "What kind of titlename is \"$titlename\"?!" unless
 343    defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
 344   my $titlename_re = quotemeta($titlename);
 345
 346   my $head1_text_content;
 347   my $para_text_content;
 348
 349   while(
 350     ++$token_count <= ($max_token || 1_000_000)
 351     and defined(my $token = $self->get_token)
 352   ) {
 353     push @to_unget, $token;
 354
 355     if ($state == 0) { # seeking =head1
 356       if( $token->is_start and $token->tagname eq 'head1' ) {
 357         DEBUG and print "  Found head1.  Seeking content...\n";
 358         ++$state;
 359         $head1_text_content = '';
 360       }
 361     }
 362
 363     elsif($state == 1) { # accumulating text until end of head1
 364       if( $token->is_text ) {
 365         DEBUG and print "   Adding \"", $token->text, "\" to head1-content.\n";
 366         $head1_text_content .= $token->text;
 367       } elsif( $token->is_end and $token->tagname eq 'head1' ) {
 368         DEBUG and print "  Found end of head1.  Considering content...\n";
 369         if($head1_text_content eq $titlename
 370           or $head1_text_content =~ m/\($titlename_re\)/s
 371           # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
 372         ) {
 373           DEBUG and print "  Yup, it was $titlename.  Seeking next para-content...\n";
 374           ++$state;
 375         } elsif(
 376           $desperate_for_title
 377            # if we're so desperate we'll take the first
 378            #  =head1's content as a title
 379           and $head1_text_content =~ m/\S/
 380           and $head1_text_content !~ m/^[ A-Z]+$/s
 381           and $head1_text_content !~
 382             m/\((?:
 383              NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
 384              | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
 385              | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
 386             )\)/sx
 387             # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
 388           and ($max_content_length
 389             ? (length($head1_text_content) <= $max_content_length) # sanity
 390             : 1)
 391         ) {
 392           DEBUG and print "  It looks titular: \"$head1_text_content\".\n",
 393             "\n  Using that.\n";
 394           $title = $head1_text_content;
 395           last;
 396         } else {
 397           --$state;
 398           DEBUG and print "  Didn't look titular ($head1_text_content).\n",
 399             "\n  Dropping back to seeking-head1-content mode...\n";
 400         }
 401       }
 402     }
 403
 404     elsif($state == 2) {
 405       # seeking start of para (which must immediately follow)
 406       if($token->is_start and $content_containers{ $token->tagname }) {
 407         DEBUG and print "  Found start of Para.  Accumulating content...\n";
 408         $para_text_content = '';
 409         ++$state;
 410       } else {
 411         DEBUG and print
 412          "  Didn't see an immediately subsequent start-Para.  Reseeking H1\n";
 413         $state = 0;
 414       }
 415     }
 416
 417     elsif($state == 3) {
 418       # accumulating text until end of Para
 419       if( $token->is_text ) {
 420         DEBUG and print "   Adding \"", $token->text, "\" to para-content.\n";
 421         $para_text_content .= $token->text;
 422         # and keep looking
 423
 424       } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
 425         DEBUG and print "  Found end of Para.  Considering content: ",
 426           $para_text_content, "\n";
 427
 428         if( $para_text_content =~ m/\S/
 429           and ($max_content_length
 430            ? (length($para_text_content) <= $max_content_length)
 431            : 1)
 432         ) {
 433           # Some minimal sanity constraints, I think.
 434           DEBUG and print "  It looks contentworthy, I guess.  Using it.\n";
 435           $title = $para_text_content;
 436           last;
 437         } else {
 438           DEBUG and print "  Doesn't look at all contentworthy!\n  Giving up.\n";
 439           undef $title;
 440           last;
 441         }
 442       }
 443     }
 444
 445     else {
 446       die "IMPOSSIBLE STATE $state!\n";  # should never happen
 447     }
 448
 449   }
 450
 451   # Put it all back!
 452   $self->unget_token(@to_unget);
 453
 454   if(DEBUG) {
 455     if(defined $title) { print "  Returing title <$title>\n" }
 456     else { print "Returning title <>\n" }
 457   }
 458
 459   return '' unless defined $title;
 460   $title =~ s/^\s+//;
 461   return $title;
 462 }
 463
 464 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 465 #
 466 #  Methods that actually do work at parse-time:
 467
 468 sub _handle_element_start {
 469   my $self = shift;   # leaving ($element_name, $attr_hash_r)
 470   DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
 471
 472   push @{ $self->{'token_buffer'} },
 473        $self->{'start_token_class'}->new(@_);
 474   return;
 475 }
 476
 477 sub _handle_text {
 478   my $self = shift;   # leaving ($text)
 479   DEBUG > 2 and print "== $_[0]\n";
 480   push @{ $self->{'token_buffer'} },
 481        $self->{'text_token_class'}->new(@_);
 482   return;
 483 }
 484
 485 sub _handle_element_end {
 486   my $self = shift;   # leaving ($element_name);
 487   DEBUG > 2 and print "-- $_[0]\n";
 488   push @{ $self->{'token_buffer'} },
 489        $self->{'end_token_class'}->new(@_);
 490   return;
 491 }
 492
 493 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 494
 495 1;
 496
 497
 498 __END__
 499
 500 =head1 NAME
 501
 502 Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
 503
 504 =head1 SYNOPSIS
 505
 506  my $parser = SomePodProcessor->new;
 507  $parser->set_source( "whatever.pod" );
 508  $parser->run;
 509
 510 Or:
 511
 512  my $parser = SomePodProcessor->new;
 513  $parser->set_source( $some_filehandle_object );
 514  $parser->run;
 515
 516 Or:
 517
 518  my $parser = SomePodProcessor->new;
 519  $parser->set_source( \$document_source );
 520  $parser->run;
 521
 522 Or:
 523
 524  my $parser = SomePodProcessor->new;
 525  $parser->set_source( \@document_lines );
 526  $parser->run;
 527
 528 And elsewhere:
 529
 530  require 5;
 531  package SomePodProcessor;
 532  use strict;
 533  use base qw(Pod::Simple::PullParser);
 534
 535  sub run {
 536    my $self = shift;
 537   Token:
 538    while(my $token = $self->get_token) {
 539      ...process each token...
 540    }
 541  }
 542
 543 =head1 DESCRIPTION
 544
 545 This class is for using Pod::Simple to build a Pod processor -- but
 546 one that uses an interface based on a stream of token objects,
 547 instead of based on events.
 548
 549 This is a subclass of L<Pod::Simple> and inherits all its methods.
 550
 551 A subclass of Pod::Simple::PullParser should define a C<run> method
 552 that calls C<< $token = $parser->get_token >> to pull tokens.
 553
 554 See the source for Pod::Simple::RTF for an example of a formatter
 555 that uses Pod::Simple::PullParser.
 556
 557 =head1 METHODS
 558
 559 =over
 560
 561 =item my $token = $parser->get_token
 562
 563 This returns the next token object (which will be of a subclass of
 564 L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
 565 the end of the document.
 566
 567 =item $parser->unget_token( $token )
 568
 569 =item $parser->unget_token( $token1, $token2, ... )
 570
 571 This restores the token object(s) to the front of the parser stream.
 572
 573 =back
 574
 575 The source has to be set before you can parse anything.  The lowest-level
 576 way is to call C<set_source>:
 577
 578 =over
 579
 580 =item $parser->set_source( $filename )
 581
 582 =item $parser->set_source( $filehandle_object )
 583
 584 =item $parser->set_source( \$document_source )
 585
 586 =item $parser->set_source( \@document_lines )
 587
 588 =back
 589
 590 Or you can call these methods, which Pod::Simple::PullParser has defined
 591 to work just like Pod::Simple's same-named methods:
 592
 593 =over
 594
 595 =item $parser->parse_file(...)
 596
 597 =item $parser->parse_string_document(...)
 598
 599 =item $parser->filter(...)
 600
 601 =item $parser->parse_from_file(...)
 602
 603 =back
 604
 605 For those to work, the Pod-processing subclass of
 606 Pod::Simple::PullParser has to have defined a $parser->run method --
 607 so it is advised that all Pod::Simple::PullParser subclasses do so.
 608 See the Synopsis above, or the source for Pod::Simple::RTF.
 609
 610 Authors of formatter subclasses might find these methods useful to
 611 call on a parser object that you haven't started pulling tokens
 612 from yet:
 613
 614 =over
 615
 616 =item my $title_string = $parser->get_title
 617
 618 This tries to get the title string out of $parser, by getting some tokens,
 619 and scanning them for the title, and then ungetting them so that you can
 620 process the token-stream from the beginning.
 621
 622 For example, suppose you have a document that starts out:
 623
 624   =head1 NAME
 625
 626   Hoo::Boy::Wowza -- Stuff B<wow> yeah!
 627
 628 $parser->get_title on that document will return "Hoo::Boy::Wowza --
 629 Stuff wow yeah!".
 630
 631 In cases where get_title can't find the title, it will return empty-string
 632 ("").
 633
 634 =item my $title_string = $parser->get_short_title
 635
 636 This is just like get_title, except that it returns just the modulename, if
 637 the title seems to be of the form "SomeModuleName -- description".
 638
 639 For example, suppose you have a document that starts out:
 640
 641   =head1 NAME
 642
 643   Hoo::Boy::Wowza -- Stuff B<wow> yeah!
 644
 645 then $parser->get_short_title on that document will return
 646 "Hoo::Boy::Wowza".
 647
 648 But if the document starts out:
 649
 650   =head1 NAME
 651
 652   Hooboy, stuff B<wow> yeah!
 653
 654 then $parser->get_short_title on that document will return "Hooboy,
 655 stuff wow yeah!".
 656
 657 If the title can't be found, then get_short_title returns empty-string
 658 ("").
 659
 660 =item $author_name   = $parser->get_author
 661
 662 This works like get_title except that it returns the contents of the
 663 "=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
 664 isn't terribly long.
 665
 666 (This method tolerates "AUTHORS" instead of "AUTHOR" too.)
 667
 668 =item $description_name = $parser->get_description
 669
 670 This works like get_title except that it returns the contents of the
 671 "=head1 PARAGRAPH\n\nParagraph...\n" section, assuming that that section
 672 isn't terribly long.
 673
 674 =item $version_block = $parser->get_version
 675
 676 This works like get_title except that it returns the contents of
 677 the "=head1 VERSION\n\n[BIG BLOCK]\n" block.  Note that this does NOT
 678 return the module's C<$VERSION>!!
 679
 680
 681 =back
 682
 683 =head1 NOTE
 684
 685 You don't actually I<have> to define a C<run> method.  If you're
 686 writing a Pod-formatter class, you should define a C<run> just so
 687 that users can call C<parse_file> etc, but you don't I<have> to.
 688
 689 And if you're not writing a formatter class, but are instead just
 690 writing a program that does something simple with a Pod::PullParser
 691 object (and not an object of a subclass), then there's no reason to
 692 bother subclassing to add a C<run> method.
 693
 694 =head1 SEE ALSO
 695
 696 L<Pod::Simple>
 697
 698 L<Pod::Simple::PullParserToken> -- and its subclasses
 699 L<Pod::Simple::PullParserStartToken>,
 700 L<Pod::Simple::PullParserTextToken>, and
 701 L<Pod::Simple::PullParserEndToken>.
 702
 703 L<HTML::TokeParser>, which inspired this.
 704
 705 =head1 COPYRIGHT AND DISCLAIMERS
 706
 707 Copyright (c) 2002 Sean M. Burke.  All rights reserved.
 708
 709 This library is free software; you can redistribute it and/or modify it
 710 under the same terms as Perl itself.
 711
 712 This program is distributed in the hope that it will be useful, but
 713 without any warranty; without even the implied warranty of
 714 merchantability or fitness for a particular purpose.
 715
 716 =head1 AUTHOR
 717
 718 Sean M. Burke C<sburke@cpan.org>
 719
 720 =cut
 721
 722
 723
 724 JUNK:
 725
 726 sub _old_get_title {  # some witchery in here
 727   my $self = $_[0];
 728   my $title;
 729   my @to_unget;
 730
 731   while(1) {
 732     push @to_unget, $self->get_token;
 733     unless(defined $to_unget[-1]) { # whoops, short doc!
 734       pop @to_unget;
 735       last;
 736     }
 737
 738     DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n";
 739
 740     (DEBUG and print "Too much in the buffer.\n"),
 741      last if @to_unget > 25; # sanity
 742
 743     my $pattern = '';
 744     if( #$to_unget[-1]->type eq 'end'
 745         #and $to_unget[-1]->tagname eq 'Para'
 746         #and
 747         ($pattern = join('',
 748          map {;
 749             ($_->type eq 'start') ? ("<" . $_->tagname .">")
 750           : ($_->type eq 'end'  ) ? ("</". $_->tagname .">")
 751           : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
 752           : "BLORP"
 753          } @to_unget
 754        )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
 755     ) {
 756       # Whee, it fits the pattern
 757       DEBUG and print "Seems to match =head1 NAME pattern.\n";
 758       $title = '';
 759       foreach my $t (reverse @to_unget) {
 760         last if $t->type eq 'start' and $t->tagname eq 'Para';
 761         $title = $t->text . $title if $t->type eq 'text';
 762       }
 763       undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
 764       last;
 765
 766     } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
 767       and !( $1 eq '1' and $2 eq 'NAME' )
 768     ) {
 769       # Well, it fits a fallback pattern
 770       DEBUG and print "Seems to match NAMEless pattern.\n";
 771       $title = '';
 772       foreach my $t (reverse @to_unget) {
 773         last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
 774         $title = $t->text . $title if $t->type eq 'text';
 775       }
 776       undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
 777       last;
 778
 779     } else {
 780       DEBUG and $pattern and print "Leading pattern: $pattern\n";
 781     }
 782   }
 783
 784   # Put it all back:
 785   $self->unget_token(@to_unget);
 786
 787   if(DEBUG) {
 788     if(defined $title) { print "  Returing title <$title>\n" }
 789     else { print "Returning title <>\n" }
 790   }
 791
 792   return '' unless defined $title;
 793   return $title;
 794 }
 795