cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm

   1
   2 require 5;
   3 package Pod::Simple::PullParser;
   4 $VERSION = '3.14';
   5 use Pod::Simple ();
   6 BEGIN {@ISA = ('Pod::Simple')}
   7
   8 use strict;
   9 use Carp ();
  10
  11 use Pod::Simple::PullParserStartToken;
  12 use Pod::Simple::PullParserEndToken;
  13 use Pod::Simple::PullParserTextToken;
  14
  15 BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
  16
  17 __PACKAGE__->_accessorize(
  18   'source_fh',         # the filehandle we're reading from
  19   'source_scalar_ref', # the scalarref we're reading from
  20   'source_arrayref',   # the arrayref we're reading from
  21 );
  22
  23 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  24 #
  25 #  And here is how we implement a pull-parser on top of a push-parser...
  26
  27 sub filter {
  28   my($self, $source) = @_;
  29   $self = $self->new unless ref $self;
  30
  31   $source = *STDIN{IO} unless defined $source;
  32   $self->set_source($source);
  33   $self->output_fh(*STDOUT{IO});
  34
  35   $self->run; # define run() in a subclass if you want to use filter()!
  36   return $self;
  37 }
  38
  39 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  40
  41 sub parse_string_document {
  42   my $this = shift;
  43   $this->set_source(\ $_[0]);
  44   $this->run;
  45 }
  46
  47 sub parse_file {
  48   my($this, $filename) = @_;
  49   $this->set_source($filename);
  50   $this->run;
  51 }
  52
  53 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  54 #  In case anyone tries to use them:
  55
  56 sub run {
  57   use Carp ();
  58   if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
  59     Carp::croak "You can call run() only on subclasses of "
  60      . __PACKAGE__;
  61   } else {
  62     Carp::croak join '',
  63       "You can't call run() because ",
  64       ref($_[0]) || $_[0], " didn't define a run() method";
  65   }
  66 }
  67
  68 sub parse_lines {
  69   use Carp ();
  70   Carp::croak "Use set_source with ", __PACKAGE__,
  71     " and subclasses, not parse_lines";
  72 }
  73
  74 sub parse_line {
  75   use Carp ();
  76   Carp::croak "Use set_source with ", __PACKAGE__,
  77     " and subclasses, not parse_line";
  78 }
  79
  80 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  81
  82 sub new {
  83   my $class = shift;
  84   my $self = $class->SUPER::new(@_);
  85   die "Couldn't construct for $class" unless $self;
  86
  87   $self->{'token_buffer'} ||= [];
  88   $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
  89   $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';
  90   $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';
  91
  92   DEBUG > 1 and print "New pullparser object: $self\n";
  93
  94   return $self;
  95 }
  96
  97 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
  98
  99 sub get_token {
 100   my $self = shift;
 101   DEBUG > 1 and print "\nget_token starting up on $self.\n";
 102   DEBUG > 2 and print " Items in token-buffer (",
 103    scalar( @{ $self->{'token_buffer'} } ) ,
 104    ") :\n", map(
 105      "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }
 106    ),
 107    @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',
 108    "\n"
 109   ;
 110
 111   until( @{ $self->{'token_buffer'} } ) {
 112     DEBUG > 3 and print "I need to get something into my empty token buffer...\n";
 113     if($self->{'source_dead'}) {
 114       DEBUG and print "$self 's source is dead.\n";
 115       push @{ $self->{'token_buffer'} }, undef;
 116     } elsif(exists $self->{'source_fh'}) {
 117       my @lines;
 118       my $fh = $self->{'source_fh'}
 119        || Carp::croak('You have to call set_source before you can call get_token');
 120
 121       DEBUG and print "$self 's source is filehandle $fh.\n";
 122       # Read those many lines at a time
 123       for(my $i = Pod::Simple::MANY_LINES; $i--;) {
 124         DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n";
 125         local $/ = $Pod::Simple::NL;
 126         push @lines, scalar(<$fh>); # readline
 127         DEBUG > 3 and print "  Line is: ",
 128           defined($lines[-1]) ? $lines[-1] : "<undef>\n";
 129         unless( defined $lines[-1] ) {
 130           DEBUG and print "That's it for that source fh!  Killing.\n";
 131           delete $self->{'source_fh'}; # so it can be GC'd
 132           last;
 133         }
 134          # but pass thru the undef, which will set source_dead to true
 135
 136         # TODO: look to see if $lines[-1] is =encoding, and if so,
 137         # do horribly magic things
 138
 139       }
 140
 141       if(DEBUG > 8) {
 142         print "* I've gotten ", scalar(@lines), " lines:\n";
 143         foreach my $l (@lines) {
 144           if(defined $l) {
 145             print "  line {$l}\n";
 146           } else {
 147             print "  line undef\n";
 148           }
 149         }
 150         print "* end of ", scalar(@lines), " lines\n";
 151       }
 152
 153       $self->SUPER::parse_lines(@lines);
 154
 155     } elsif(exists $self->{'source_arrayref'}) {
 156       DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ",
 157        scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
 158
 159       DEBUG > 3 and print "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
 160       $self->SUPER::parse_lines(
 161         splice @{ $self->{'source_arrayref'} },
 162         0,
 163         Pod::Simple::MANY_LINES
 164       );
 165       unless( @{ $self->{'source_arrayref'} } ) {
 166         DEBUG and print "That's it for that source arrayref!  Killing.\n";
 167         $self->SUPER::parse_lines(undef);
 168         delete $self->{'source_arrayref'}; # so it can be GC'd
 169       }
 170        # to make sure that an undef is always sent to signal end-of-stream
 171
 172     } elsif(exists $self->{'source_scalar_ref'}) {
 173
 174       DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
 175         length(${ $self->{'source_scalar_ref'} }) -
 176         (pos(${ $self->{'source_scalar_ref'} }) || 0),
 177         " characters left to parse.\n";
 178
 179       DEBUG > 3 and print " Fetching a line from source-string...\n";
 180       if( ${ $self->{'source_scalar_ref'} } =~
 181         m/([^\n\r]*)((?:\r?\n)?)/g
 182       ) {
 183         #print(">> $1\n"),
 184         $self->SUPER::parse_lines($1)
 185          if length($1) or length($2)
 186           or pos(     ${ $self->{'source_scalar_ref'} })
 187            != length( ${ $self->{'source_scalar_ref'} });
 188          # I.e., unless it's a zero-length "empty line" at the very
 189          #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
 190       } else { # that's the end.  Byebye
 191         $self->SUPER::parse_lines(undef);
 192         delete $self->{'source_scalar_ref'};
 193         DEBUG and print "That's it for that source scalarref!  Killing.\n";
 194       }
 195
 196
 197     } else {
 198       die "What source??";
 199     }
 200   }
 201   DEBUG and print "get_token about to return ",
 202    Pod::Simple::pretty( @{$self->{'token_buffer'}}
 203      ? $self->{'token_buffer'}[-1] : undef
 204    ), "\n";
 205   return shift @{$self->{'token_buffer'}}; # that's an undef if empty
 206 }
 207
 208 sub unget_token {
 209   my $self = shift;
 210   DEBUG and print "Ungetting ", scalar(@_), " tokens: ",
 211    @_ ? "@_\n" : "().\n";
 212   foreach my $t (@_) {
 213     Carp::croak "Can't unget that, because it's not a token -- it's undef!"
 214      unless defined $t;
 215     Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
 216      unless ref $t;
 217     Carp::croak "Can't unget $t, because it's not a token object!"
 218      unless UNIVERSAL::can($t, 'type');
 219   }
 220
 221   unshift @{$self->{'token_buffer'}}, @_;
 222   DEBUG > 1 and print "Token buffer now has ",
 223    scalar(@{$self->{'token_buffer'}}), " items in it.\n";
 224   return;
 225 }
 226
 227 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 228
 229 # $self->{'source_filename'} = $source;
 230
 231 sub set_source {
 232   my $self = shift @_;
 233   return $self->{'source_fh'} unless @_;
 234   my $handle;
 235   if(!defined $_[0]) {
 236     Carp::croak("Can't use empty-string as a source for set_source");
 237   } elsif(ref(\( $_[0] )) eq 'GLOB') {
 238     $self->{'source_filename'} = '' . ($handle = $_[0]);
 239     DEBUG and print "$self 's source is glob $_[0]\n";
 240     # and fall thru
 241   } elsif(ref( $_[0] ) eq 'SCALAR') {
 242     $self->{'source_scalar_ref'} = $_[0];
 243     DEBUG and print "$self 's source is scalar ref $_[0]\n";
 244     return;
 245   } elsif(ref( $_[0] ) eq 'ARRAY') {
 246     $self->{'source_arrayref'} = $_[0];
 247     DEBUG and print "$self 's source is array ref $_[0]\n";
 248     return;
 249   } elsif(ref $_[0]) {
 250     $self->{'source_filename'} = '' . ($handle = $_[0]);
 251     DEBUG and print "$self 's source is fh-obj $_[0]\n";
 252   } elsif(!length $_[0]) {
 253     Carp::croak("Can't use empty-string as a source for set_source");
 254   } else {  # It's a filename!
 255     DEBUG and print "$self 's source is filename $_[0]\n";
 256     {
 257       local *PODSOURCE;
 258       open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
 259       $handle = *PODSOURCE{IO};
 260     }
 261     $self->{'source_filename'} = $_[0];
 262     DEBUG and print "  Its name is $_[0].\n";
 263
 264     # TODO: file-discipline things here!
 265   }
 266
 267   $self->{'source_fh'} = $handle;
 268   DEBUG and print "  Its handle is $handle\n";
 269   return 1;
 270 }
 271
 272 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
 273
 274 sub get_title_short {  shift->get_short_title(@_)  } # alias
 275
 276 sub get_short_title {
 277   my $title = shift->get_title(@_);
 278   $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
 279     # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
 280   return $title;
 281 }
 282
 283 sub get_title       { shift->_get_titled_section(
 284   'NAME', max_token => 50, desperate => 1, @_)
 285 }
 286 sub get_version     { shift->_get_titled_section(
 287    'VERSION',
 288     max_token => 400,
 289     accept_verbatim => 1,
 290     max_content_length => 3_000,
 291    @_,
 292   );
 293 }
 294 sub get_description { shift->_get_titled_section(
 295    'DESCRIPTION',
 296     max_token => 400,
 297     max_content_length => 3_000,
 298    @_,
 299 ) }
 300
 301 sub get_authors     { shift->get_author(@_) }  # a harmless alias
 302
 303 sub get_author      {
 304   my $this = shift;
 305   # Max_token is so high because these are
 306   #  typically at the end of the document:
 307   $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
 308   $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
 309 }
 310
 311 #--------------------------------------------------------------------------
 312
 313 sub _get_titled_section {
 314   # Based on a get_title originally contributed by Graham Barr
 315   my($self, $titlename, %options) = (@_);
 316
 317   my $max_token            = delete $options{'max_token'};
 318   my $desperate_for_title  = delete $options{'desperate'};
 319   my $accept_verbatim      = delete $options{'accept_verbatim'};
 320   my $max_content_length   = delete $options{'max_content_length'};
 321   my $nocase               = delete $options{'nocase'};
 322   $max_content_length = 120 unless defined $max_content_length;
 323
 324   Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
 325     . join " ", map "[$_]", sort keys %options
 326   )
 327    if keys %options;
 328
 329   my %content_containers;
 330   $content_containers{'Para'} = 1;
 331   if($accept_verbatim) {
 332     $content_containers{'Verbatim'} = 1;
 333     $content_containers{'VerbatimFormatted'} = 1;
 334   }
 335
 336   my $token_count = 0;
 337   my $title;
 338   my @to_unget;
 339   my $state = 0;
 340   my $depth = 0;
 341
 342   Carp::croak "What kind of titlename is \"$titlename\"?!" unless
 343    defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
 344   my $titlename_re = quotemeta($titlename);
 345
 346   my $head1_text_content;
 347   my $para_text_content;
 348
 349   while(
 350     ++$token_count <= ($max_token || 1_000_000)
 351     and defined(my $token = $self->get_token)
 352   ) {
 353     push @to_unget, $token;
 354
 355     if ($state == 0) { # seeking =head1
 356       if( $token->is_start and $token->tagname eq 'head1' ) {
 357         DEBUG and print "  Found head1.  Seeking content...\n";
 358         ++$state;
 359         $head1_text_content = '';
 360       }
 361     }
 362
 363     elsif($state == 1) { # accumulating text until end of head1
 364       if( $token->is_text ) {
 365         DEBUG and print "   Adding \"", $token->text, "\" to head1-content.\n";
 366         $head1_text_content .= $token->text;
 367       } elsif( $token->is_end and $token->tagname eq 'head1' ) {
 368         DEBUG and print "  Found end of head1.  Considering content...\n";
 369         $head1_text_content = uc $head1_text_content if $nocase;
 370         if($head1_text_content eq $titlename
 371           or $head1_text_content =~ m/\($titlename_re\)/s
 372           # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
 373         ) {
 374           DEBUG and print "  Yup, it was $titlename.  Seeking next para-content...\n";
 375           ++$state;
 376         } elsif(
 377           $desperate_for_title
 378            # if we're so desperate we'll take the first
 379            #  =head1's content as a title
 380           and $head1_text_content =~ m/\S/
 381           and $head1_text_content !~ m/^[ A-Z]+$/s
 382           and $head1_text_content !~
 383             m/\((?:
 384              NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
 385              | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
 386              | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
 387             )\)/sx
 388             # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
 389           and ($max_content_length
 390             ? (length($head1_text_content) <= $max_content_length) # sanity
 391             : 1)
 392         ) {
 393           DEBUG and print "  It looks titular: \"$head1_text_content\".\n",
 394             "\n  Using that.\n";
 395           $title = $head1_text_content;
 396           last;
 397         } else {
 398           --$state;
 399           DEBUG and print "  Didn't look titular ($head1_text_content).\n",
 400             "\n  Dropping back to seeking-head1-content mode...\n";
 401         }
 402       }
 403     }
 404
 405     elsif($state == 2) {
 406       # seeking start of para (which must immediately follow)
 407       if($token->is_start and $content_containers{ $token->tagname }) {
 408         DEBUG and print "  Found start of Para.  Accumulating content...\n";
 409         $para_text_content = '';
 410         ++$state;
 411       } else {
 412         DEBUG and print
 413          "  Didn't see an immediately subsequent start-Para.  Reseeking H1\n";
 414         $state = 0;
 415       }
 416     }
 417
 418     elsif($state == 3) {
 419       # accumulating text until end of Para
 420       if( $token->is_text ) {
 421         DEBUG and print "   Adding \"", $token->text, "\" to para-content.\n";
 422         $para_text_content .= $token->text;
 423         # and keep looking
 424
 425       } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
 426         DEBUG and print "  Found end of Para.  Considering content: ",
 427           $para_text_content, "\n";
 428
 429         if( $para_text_content =~ m/\S/
 430           and ($max_content_length
 431            ? (length($para_text_content) <= $max_content_length)
 432            : 1)
 433         ) {
 434           # Some minimal sanity constraints, I think.
 435           DEBUG and print "  It looks contentworthy, I guess.  Using it.\n";
 436           $title = $para_text_content;
 437           last;
 438         } else {
 439           DEBUG and print "  Doesn't look at all contentworthy!\n  Giving up.\n";
 440           undef $title;
 441           last;
 442         }
 443       }
 444     }
 445
 446     else {
 447       die "IMPOSSIBLE STATE $state!\n";  # should never happen
 448     }
 449
 450   }
 451
 452   # Put it all back!
 453   $self->unget_token(@to_unget);
 454
 455   if(DEBUG) {
 456     if(defined $title) { print "  Returing title <$title>\n" }
 457     else { print "Returning title <>\n" }
 458   }
 459
 460   return '' unless defined $title;
 461   $title =~ s/^\s+//;
 462   return $title;
 463 }
 464
 465 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 466 #
 467 #  Methods that actually do work at parse-time:
 468
 469 sub _handle_element_start {
 470   my $self = shift;   # leaving ($element_name, $attr_hash_r)
 471   DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
 472
 473   push @{ $self->{'token_buffer'} },
 474        $self->{'start_token_class'}->new(@_);
 475   return;
 476 }
 477
 478 sub _handle_text {
 479   my $self = shift;   # leaving ($text)
 480   DEBUG > 2 and print "== $_[0]\n";
 481   push @{ $self->{'token_buffer'} },
 482        $self->{'text_token_class'}->new(@_);
 483   return;
 484 }
 485
 486 sub _handle_element_end {
 487   my $self = shift;   # leaving ($element_name);
 488   DEBUG > 2 and print "-- $_[0]\n";
 489   push @{ $self->{'token_buffer'} },
 490        $self->{'end_token_class'}->new(@_);
 491   return;
 492 }
 493
 494 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 495
 496 1;
 497
 498
 499 __END__
 500
 501 =head1 NAME
 502
 503 Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
 504
 505 =head1 SYNOPSIS
 506
 507  my $parser = SomePodProcessor->new;
 508  $parser->set_source( "whatever.pod" );
 509  $parser->run;
 510
 511 Or:
 512
 513  my $parser = SomePodProcessor->new;
 514  $parser->set_source( $some_filehandle_object );
 515  $parser->run;
 516
 517 Or:
 518
 519  my $parser = SomePodProcessor->new;
 520  $parser->set_source( \$document_source );
 521  $parser->run;
 522
 523 Or:
 524
 525  my $parser = SomePodProcessor->new;
 526  $parser->set_source( \@document_lines );
 527  $parser->run;
 528
 529 And elsewhere:
 530
 531  require 5;
 532  package SomePodProcessor;
 533  use strict;
 534  use base qw(Pod::Simple::PullParser);
 535
 536  sub run {
 537    my $self = shift;
 538   Token:
 539    while(my $token = $self->get_token) {
 540      ...process each token...
 541    }
 542  }
 543
 544 =head1 DESCRIPTION
 545
 546 This class is for using Pod::Simple to build a Pod processor -- but
 547 one that uses an interface based on a stream of token objects,
 548 instead of based on events.
 549
 550 This is a subclass of L<Pod::Simple> and inherits all its methods.
 551
 552 A subclass of Pod::Simple::PullParser should define a C<run> method
 553 that calls C<< $token = $parser->get_token >> to pull tokens.
 554
 555 See the source for Pod::Simple::RTF for an example of a formatter
 556 that uses Pod::Simple::PullParser.
 557
 558 =head1 METHODS
 559
 560 =over
 561
 562 =item my $token = $parser->get_token
 563
 564 This returns the next token object (which will be of a subclass of
 565 L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
 566 the end of the document.
 567
 568 =item $parser->unget_token( $token )
 569
 570 =item $parser->unget_token( $token1, $token2, ... )
 571
 572 This restores the token object(s) to the front of the parser stream.
 573
 574 =back
 575
 576 The source has to be set before you can parse anything.  The lowest-level
 577 way is to call C<set_source>:
 578
 579 =over
 580
 581 =item $parser->set_source( $filename )
 582
 583 =item $parser->set_source( $filehandle_object )
 584
 585 =item $parser->set_source( \$document_source )
 586
 587 =item $parser->set_source( \@document_lines )
 588
 589 =back
 590
 591 Or you can call these methods, which Pod::Simple::PullParser has defined
 592 to work just like Pod::Simple's same-named methods:
 593
 594 =over
 595
 596 =item $parser->parse_file(...)
 597
 598 =item $parser->parse_string_document(...)
 599
 600 =item $parser->filter(...)
 601
 602 =item $parser->parse_from_file(...)
 603
 604 =back
 605
 606 For those to work, the Pod-processing subclass of
 607 Pod::Simple::PullParser has to have defined a $parser->run method --
 608 so it is advised that all Pod::Simple::PullParser subclasses do so.
 609 See the Synopsis above, or the source for Pod::Simple::RTF.
 610
 611 Authors of formatter subclasses might find these methods useful to
 612 call on a parser object that you haven't started pulling tokens
 613 from yet:
 614
 615 =over
 616
 617 =item my $title_string = $parser->get_title
 618
 619 This tries to get the title string out of $parser, by getting some tokens,
 620 and scanning them for the title, and then ungetting them so that you can
 621 process the token-stream from the beginning.
 622
 623 For example, suppose you have a document that starts out:
 624
 625   =head1 NAME
 626
 627   Hoo::Boy::Wowza -- Stuff B<wow> yeah!
 628
 629 $parser->get_title on that document will return "Hoo::Boy::Wowza --
 630 Stuff wow yeah!". If the document starts with:
 631
 632   =head1 Name
 633
 634   Hoo::Boy::W00t -- Stuff B<w00t> yeah!
 635
 636 Then you'll need to pass the C<nocase> option in order to recognize "Name":
 637
 638   $parser->get_title(nocase => 1);
 639
 640 In cases where get_title can't find the title, it will return empty-string
 641 ("").
 642
 643 =item my $title_string = $parser->get_short_title
 644
 645 This is just like get_title, except that it returns just the modulename, if
 646 the title seems to be of the form "SomeModuleName -- description".
 647
 648 For example, suppose you have a document that starts out:
 649
 650   =head1 NAME
 651
 652   Hoo::Boy::Wowza -- Stuff B<wow> yeah!
 653
 654 then $parser->get_short_title on that document will return
 655 "Hoo::Boy::Wowza".
 656
 657 But if the document starts out:
 658
 659   =head1 NAME
 660
 661   Hooboy, stuff B<wow> yeah!
 662
 663 then $parser->get_short_title on that document will return "Hooboy,
 664 stuff wow yeah!". If the document starts with:
 665
 666   =head1 Name
 667
 668   Hoo::Boy::W00t -- Stuff B<w00t> yeah!
 669
 670 Then you'll need to pass the C<nocase> option in order to recognize "Name":
 671
 672   $parser->get_short_title(nocase => 1);
 673
 674 If the title can't be found, then get_short_title returns empty-string
 675 ("").
 676
 677 =item $author_name   = $parser->get_author
 678
 679 This works like get_title except that it returns the contents of the
 680 "=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
 681 isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n"
 682 section, pass the C<nocase> otpion:
 683
 684   $parser->get_author(nocase => 1);
 685
 686 (This method tolerates "AUTHORS" instead of "AUTHOR" too.)
 687
 688 =item $description_name = $parser->get_description
 689
 690 This works like get_title except that it returns the contents of the
 691 "=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section
 692 isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n"
 693 section, pass the C<nocase> otpion:
 694
 695   $parser->get_description(nocase => 1);
 696
 697 =item $version_block = $parser->get_version
 698
 699 This works like get_title except that it returns the contents of
 700 the "=head1 VERSION\n\n[BIG BLOCK]\n" block.  Note that this does NOT
 701 return the module's C<$VERSION>!! To recognize a
 702 "=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> otpion:
 703
 704   $parser->get_version(nocase => 1);
 705
 706 =back
 707
 708 =head1 NOTE
 709
 710 You don't actually I<have> to define a C<run> method.  If you're
 711 writing a Pod-formatter class, you should define a C<run> just so
 712 that users can call C<parse_file> etc, but you don't I<have> to.
 713
 714 And if you're not writing a formatter class, but are instead just
 715 writing a program that does something simple with a Pod::PullParser
 716 object (and not an object of a subclass), then there's no reason to
 717 bother subclassing to add a C<run> method.
 718
 719 =head1 SEE ALSO
 720
 721 L<Pod::Simple>
 722
 723 L<Pod::Simple::PullParserToken> -- and its subclasses
 724 L<Pod::Simple::PullParserStartToken>,
 725 L<Pod::Simple::PullParserTextToken>, and
 726 L<Pod::Simple::PullParserEndToken>.
 727
 728 L<HTML::TokeParser>, which inspired this.
 729
 730 =head1 SUPPORT
 731
 732 Questions or discussion about POD and Pod::Simple should be sent to the
 733 pod-people@perl.org mail list. Send an empty email to
 734 pod-people-subscribe@perl.org to subscribe.
 735
 736 This module is managed in an open GitHub repository,
 737 L<http://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
 738 to clone L<git://github.com/theory/pod-simple.git> and send patches!
 739
 740 Patches against Pod::Simple are welcome. Please send bug reports to
 741 <bug-pod-simple@rt.cpan.org>.
 742
 743 =head1 COPYRIGHT AND DISCLAIMERS
 744
 745 Copyright (c) 2002 Sean M. Burke.
 746
 747 This library is free software; you can redistribute it and/or modify it
 748 under the same terms as Perl itself.
 749
 750 This program is distributed in the hope that it will be useful, but
 751 without any warranty; without even the implied warranty of
 752 merchantability or fitness for a particular purpose.
 753
 754 =head1 AUTHOR
 755
 756 Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
 757 But don't bother him, he's retired.
 758
 759 Pod::Simple is maintained by:
 760
 761 =over
 762
 763 =item * Allison Randal C<allison@perl.org>
 764
 765 =item * Hans Dieter Pearcey C<hdp@cpan.org>
 766
 767 =item * David E. Wheeler C<dwheeler@cpan.org>
 768
 769 =back
 770
 771 =cut
 772 JUNK:
 773
 774 sub _old_get_title {  # some witchery in here
 775   my $self = $_[0];
 776   my $title;
 777   my @to_unget;
 778
 779   while(1) {
 780     push @to_unget, $self->get_token;
 781     unless(defined $to_unget[-1]) { # whoops, short doc!
 782       pop @to_unget;
 783       last;
 784     }
 785
 786     DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n";
 787
 788     (DEBUG and print "Too much in the buffer.\n"),
 789      last if @to_unget > 25; # sanity
 790
 791     my $pattern = '';
 792     if( #$to_unget[-1]->type eq 'end'
 793         #and $to_unget[-1]->tagname eq 'Para'
 794         #and
 795         ($pattern = join('',
 796          map {;
 797             ($_->type eq 'start') ? ("<" . $_->tagname .">")
 798           : ($_->type eq 'end'  ) ? ("</". $_->tagname .">")
 799           : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
 800           : "BLORP"
 801          } @to_unget
 802        )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
 803     ) {
 804       # Whee, it fits the pattern
 805       DEBUG and print "Seems to match =head1 NAME pattern.\n";
 806       $title = '';
 807       foreach my $t (reverse @to_unget) {
 808         last if $t->type eq 'start' and $t->tagname eq 'Para';
 809         $title = $t->text . $title if $t->type eq 'text';
 810       }
 811       undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
 812       last;
 813
 814     } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
 815       and !( $1 eq '1' and $2 eq 'NAME' )
 816     ) {
 817       # Well, it fits a fallback pattern
 818       DEBUG and print "Seems to match NAMEless pattern.\n";
 819       $title = '';
 820       foreach my $t (reverse @to_unget) {
 821         last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
 822         $title = $t->text . $title if $t->type eq 'text';
 823       }
 824       undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
 825       last;
 826
 827     } else {
 828       DEBUG and $pattern and print "Leading pattern: $pattern\n";
 829     }
 830   }
 831
 832   # Put it all back:
 833   $self->unget_token(@to_unget);
 834
 835   if(DEBUG) {
 836     if(defined $title) { print "  Returing title <$title>\n" }
 837     else { print "Returning title <>\n" }
 838   }
 839
 840   return '' unless defined $title;
 841   return $title;
 842 }
 843