Commit | Line | Data |
351625bd |
1 | |
2 | require 5; |
3 | package Pod::Simple::PullParser; |
4 | $VERSION = '2.02'; |
5 | use Pod::Simple (); |
6 | BEGIN {@ISA = ('Pod::Simple')} |
7 | |
8 | use strict; |
9 | use Carp (); |
10 | |
11 | use Pod::Simple::PullParserStartToken; |
12 | use Pod::Simple::PullParserEndToken; |
13 | use Pod::Simple::PullParserTextToken; |
14 | |
15 | BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } |
16 | |
17 | __PACKAGE__->_accessorize( |
18 | 'source_fh', # the filehandle we're reading from |
19 | 'source_scalar_ref', # the scalarref we're reading from |
20 | 'source_arrayref', # the arrayref we're reading from |
21 | ); |
22 | |
23 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
24 | # |
25 | # And here is how we implement a pull-parser on top of a push-parser... |
26 | |
27 | sub filter { |
28 | my($self, $source) = @_; |
29 | $self = $self->new unless ref $self; |
30 | |
31 | $source = *STDIN{IO} unless defined $source; |
32 | $self->set_source($source); |
33 | $self->output_fh(*STDOUT{IO}); |
34 | |
35 | $self->run; # define run() in a subclass if you want to use filter()! |
36 | return $self; |
37 | } |
38 | |
39 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
40 | |
41 | sub parse_string_document { |
42 | my $this = shift; |
43 | $this->set_source(\ $_[0]); |
44 | $this->run; |
45 | } |
46 | |
47 | sub parse_file { |
48 | my($this, $filename) = @_; |
49 | $this->set_source($filename); |
50 | $this->run; |
51 | } |
52 | |
53 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
54 | # In case anyone tries to use them: |
55 | |
56 | sub run { |
57 | use Carp (); |
58 | if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed! |
59 | Carp::croak "You can call run() only on subclasses of " |
60 | . __PACKAGE__; |
61 | } else { |
62 | Carp::croak join '', |
63 | "You can't call run() because ", |
64 | ref($_[0]) || $_[0], " didn't define a run() method"; |
65 | } |
66 | } |
67 | |
68 | sub parse_lines { |
69 | use Carp (); |
70 | Carp::croak "Use set_source with ", __PACKAGE__, |
71 | " and subclasses, not parse_lines"; |
72 | } |
73 | |
74 | sub parse_line { |
75 | use Carp (); |
76 | Carp::croak "Use set_source with ", __PACKAGE__, |
77 | " and subclasses, not parse_line"; |
78 | } |
79 | |
80 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
81 | |
82 | sub new { |
83 | my $class = shift; |
84 | my $self = $class->SUPER::new(@_); |
85 | die "Couldn't construct for $class" unless $self; |
86 | |
87 | $self->{'token_buffer'} ||= []; |
88 | $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken'; |
89 | $self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken'; |
90 | $self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken'; |
91 | |
92 | DEBUG > 1 and print "New pullparser object: $self\n"; |
93 | |
94 | return $self; |
95 | } |
96 | |
97 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
98 | |
99 | sub get_token { |
100 | my $self = shift; |
101 | DEBUG > 1 and print "\nget_token starting up on $self.\n"; |
102 | DEBUG > 2 and print " Items in token-buffer (", |
103 | scalar( @{ $self->{'token_buffer'} } ) , |
104 | ") :\n", map( |
105 | " " . $_->dump . "\n", @{ $self->{'token_buffer'} } |
106 | ), |
107 | @{ $self->{'token_buffer'} } ? '' : ' (no tokens)', |
108 | "\n" |
109 | ; |
110 | |
111 | until( @{ $self->{'token_buffer'} } ) { |
112 | DEBUG > 3 and print "I need to get something into my empty token buffer...\n"; |
113 | if($self->{'source_dead'}) { |
114 | DEBUG and print "$self 's source is dead.\n"; |
115 | push @{ $self->{'token_buffer'} }, undef; |
116 | } elsif(exists $self->{'source_fh'}) { |
117 | my @lines; |
118 | my $fh = $self->{'source_fh'} |
119 | || Carp::croak('You have to call set_source before you can call get_token'); |
120 | |
121 | DEBUG and print "$self 's source is filehandle $fh.\n"; |
122 | # Read those many lines at a time |
123 | for(my $i = Pod::Simple::MANY_LINES; $i--;) { |
124 | DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n"; |
125 | local $/ = $Pod::Simple::NL; |
126 | push @lines, scalar(<$fh>); # readline |
127 | DEBUG > 3 and print " Line is: ", |
128 | defined($lines[-1]) ? $lines[-1] : "<undef>\n"; |
129 | unless( defined $lines[-1] ) { |
130 | DEBUG and print "That's it for that source fh! Killing.\n"; |
131 | delete $self->{'source_fh'}; # so it can be GC'd |
132 | last; |
133 | } |
134 | # but pass thru the undef, which will set source_dead to true |
135 | |
136 | # TODO: look to see if $lines[-1] is =encoding, and if so, |
137 | # do horribly magic things |
138 | |
139 | } |
140 | |
141 | if(DEBUG > 8) { |
142 | print "* I've gotten ", scalar(@lines), " lines:\n"; |
143 | foreach my $l (@lines) { |
144 | if(defined $l) { |
145 | print " line {$l}\n"; |
146 | } else { |
147 | print " line undef\n"; |
148 | } |
149 | } |
150 | print "* end of ", scalar(@lines), " lines\n"; |
151 | } |
152 | |
153 | $self->SUPER::parse_lines(@lines); |
154 | |
155 | } elsif(exists $self->{'source_arrayref'}) { |
156 | DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ", |
157 | scalar(@{$self->{'source_arrayref'}}), " items left in it.\n"; |
158 | |
159 | DEBUG > 3 and print " Fetching ", Pod::Simple::MANY_LINES, " lines.\n"; |
160 | $self->SUPER::parse_lines( |
161 | splice @{ $self->{'source_arrayref'} }, |
162 | 0, |
163 | Pod::Simple::MANY_LINES |
164 | ); |
165 | unless( @{ $self->{'source_arrayref'} } ) { |
166 | DEBUG and print "That's it for that source arrayref! Killing.\n"; |
167 | $self->SUPER::parse_lines(undef); |
168 | delete $self->{'source_arrayref'}; # so it can be GC'd |
169 | } |
170 | # to make sure that an undef is always sent to signal end-of-stream |
171 | |
172 | } elsif(exists $self->{'source_scalar_ref'}) { |
173 | |
174 | DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ", |
175 | length(${ $self->{'source_scalar_ref'} }) - |
176 | (pos(${ $self->{'source_scalar_ref'} }) || 0), |
177 | " characters left to parse.\n"; |
178 | |
179 | DEBUG > 3 and print " Fetching a line from source-string...\n"; |
180 | if( ${ $self->{'source_scalar_ref'} } =~ |
181 | m/([^\n\r]*)((?:\r?\n)?)/g |
182 | ) { |
183 | #print(">> $1\n"), |
184 | $self->SUPER::parse_lines($1) |
185 | if length($1) or length($2) |
186 | or pos( ${ $self->{'source_scalar_ref'} }) |
187 | != length( ${ $self->{'source_scalar_ref'} }); |
188 | # I.e., unless it's a zero-length "empty line" at the very |
189 | # end of "foo\nbar\n" (i.e., between the \n and the EOS). |
190 | } else { # that's the end. Byebye |
191 | $self->SUPER::parse_lines(undef); |
192 | delete $self->{'source_scalar_ref'}; |
193 | DEBUG and print "That's it for that source scalarref! Killing.\n"; |
194 | } |
195 | |
196 | |
197 | } else { |
198 | die "What source??"; |
199 | } |
200 | } |
201 | DEBUG and print "get_token about to return ", |
202 | Pod::Simple::pretty( @{$self->{'token_buffer'}} |
203 | ? $self->{'token_buffer'}[-1] : undef |
204 | ), "\n"; |
205 | return shift @{$self->{'token_buffer'}}; # that's an undef if empty |
206 | } |
207 | |
208 | use UNIVERSAL (); |
209 | sub unget_token { |
210 | my $self = shift; |
211 | DEBUG and print "Ungetting ", scalar(@_), " tokens: ", |
212 | @_ ? "@_\n" : "().\n"; |
213 | foreach my $t (@_) { |
214 | Carp::croak "Can't unget that, because it's not a token -- it's undef!" |
215 | unless defined $t; |
216 | Carp::croak "Can't unget $t, because it's not a token -- it's a string!" |
217 | unless ref $t; |
218 | Carp::croak "Can't unget $t, because it's not a token object!" |
219 | unless UNIVERSAL::can($t, 'type'); |
220 | } |
221 | |
222 | unshift @{$self->{'token_buffer'}}, @_; |
223 | DEBUG > 1 and print "Token buffer now has ", |
224 | scalar(@{$self->{'token_buffer'}}), " items in it.\n"; |
225 | return; |
226 | } |
227 | |
228 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
229 | |
230 | # $self->{'source_filename'} = $source; |
231 | |
232 | sub set_source { |
233 | my $self = shift @_; |
234 | return $self->{'source_fh'} unless @_; |
235 | my $handle; |
236 | if(!defined $_[0]) { |
237 | Carp::croak("Can't use empty-string as a source for set_source"); |
238 | } elsif(ref(\( $_[0] )) eq 'GLOB') { |
239 | $self->{'source_filename'} = '' . ($handle = $_[0]); |
240 | DEBUG and print "$self 's source is glob $_[0]\n"; |
241 | # and fall thru |
242 | } elsif(ref( $_[0] ) eq 'SCALAR') { |
243 | $self->{'source_scalar_ref'} = $_[0]; |
244 | DEBUG and print "$self 's source is scalar ref $_[0]\n"; |
245 | return; |
246 | } elsif(ref( $_[0] ) eq 'ARRAY') { |
247 | $self->{'source_arrayref'} = $_[0]; |
248 | DEBUG and print "$self 's source is array ref $_[0]\n"; |
249 | return; |
250 | } elsif(ref $_[0]) { |
251 | $self->{'source_filename'} = '' . ($handle = $_[0]); |
252 | DEBUG and print "$self 's source is fh-obj $_[0]\n"; |
253 | } elsif(!length $_[0]) { |
254 | Carp::croak("Can't use empty-string as a source for set_source"); |
255 | } else { # It's a filename! |
256 | DEBUG and print "$self 's source is filename $_[0]\n"; |
257 | { |
258 | local *PODSOURCE; |
259 | open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!"; |
260 | $handle = *PODSOURCE{IO}; |
261 | } |
262 | $self->{'source_filename'} = $_[0]; |
263 | DEBUG and print " Its name is $_[0].\n"; |
264 | |
265 | # TODO: file-discipline things here! |
266 | } |
267 | |
268 | $self->{'source_fh'} = $handle; |
269 | DEBUG and print " Its handle is $handle\n"; |
270 | return 1; |
271 | } |
272 | |
273 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
274 | |
275 | sub get_title_short { shift->get_short_title(@_) } # alias |
276 | |
277 | sub get_short_title { |
278 | my $title = shift->get_title(@_); |
279 | $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s; |
280 | # turn "Foo::Bar -- bars for your foo" into "Foo::Bar" |
281 | return $title; |
282 | } |
283 | |
284 | sub get_title { shift->_get_titled_section( |
285 | 'NAME', max_token => 50, desperate => 1, @_) |
286 | } |
287 | sub get_version { shift->_get_titled_section( |
288 | 'VERSION', |
289 | max_token => 400, |
290 | accept_verbatim => 1, |
291 | max_content_length => 3_000, |
292 | @_, |
293 | ); |
294 | } |
295 | sub get_description { shift->_get_titled_section( |
296 | 'DESCRIPTION', |
297 | max_token => 400, |
298 | max_content_length => 3_000, |
299 | @_, |
300 | ) } |
301 | |
302 | sub get_authors { shift->get_author(@_) } # a harmless alias |
303 | |
304 | sub get_author { |
305 | my $this = shift; |
306 | # Max_token is so high because these are |
307 | # typically at the end of the document: |
308 | $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) || |
309 | $this->_get_titled_section('AUTHORS', max_token => 10_000, @_); |
310 | } |
311 | |
312 | #-------------------------------------------------------------------------- |
313 | |
314 | sub _get_titled_section { |
315 | # Based on a get_title originally contributed by Graham Barr |
316 | my($self, $titlename, %options) = (@_); |
317 | |
318 | my $max_token = delete $options{'max_token'}; |
319 | my $desperate_for_title = delete $options{'desperate'}; |
320 | my $accept_verbatim = delete $options{'accept_verbatim'}; |
321 | my $max_content_length = delete $options{'max_content_length'}; |
322 | $max_content_length = 120 unless defined $max_content_length; |
323 | |
324 | Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ") |
325 | . join " ", map "[$_]", sort keys %options |
326 | ) |
327 | if keys %options; |
328 | |
329 | my %content_containers; |
330 | $content_containers{'Para'} = 1; |
331 | if($accept_verbatim) { |
332 | $content_containers{'Verbatim'} = 1; |
333 | $content_containers{'VerbatimFormatted'} = 1; |
334 | } |
335 | |
336 | my $token_count = 0; |
337 | my $title; |
338 | my @to_unget; |
339 | my $state = 0; |
340 | my $depth = 0; |
341 | |
342 | Carp::croak "What kind of titlename is \"$titlename\"?!" unless |
343 | defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity |
344 | my $titlename_re = quotemeta($titlename); |
345 | |
346 | my $head1_text_content; |
347 | my $para_text_content; |
348 | |
349 | while( |
350 | ++$token_count <= ($max_token || 1_000_000) |
351 | and defined(my $token = $self->get_token) |
352 | ) { |
353 | push @to_unget, $token; |
354 | |
355 | if ($state == 0) { # seeking =head1 |
356 | if( $token->is_start and $token->tagname eq 'head1' ) { |
357 | DEBUG and print " Found head1. Seeking content...\n"; |
358 | ++$state; |
359 | $head1_text_content = ''; |
360 | } |
361 | } |
362 | |
363 | elsif($state == 1) { # accumulating text until end of head1 |
364 | if( $token->is_text ) { |
365 | DEBUG and print " Adding \"", $token->text, "\" to head1-content.\n"; |
366 | $head1_text_content .= $token->text; |
367 | } elsif( $token->is_end and $token->tagname eq 'head1' ) { |
368 | DEBUG and print " Found end of head1. Considering content...\n"; |
369 | if($head1_text_content eq $titlename |
370 | or $head1_text_content =~ m/\($titlename_re\)/s |
371 | # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n |
372 | ) { |
373 | DEBUG and print " Yup, it was $titlename. Seeking next para-content...\n"; |
374 | ++$state; |
375 | } elsif( |
376 | $desperate_for_title |
377 | # if we're so desperate we'll take the first |
378 | # =head1's content as a title |
379 | and $head1_text_content =~ m/\S/ |
380 | and $head1_text_content !~ m/^[ A-Z]+$/s |
381 | and $head1_text_content !~ |
382 | m/\((?: |
383 | NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS |
384 | | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS? |
385 | | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT |
386 | )\)/sx |
387 | # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION) |
388 | and ($max_content_length |
389 | ? (length($head1_text_content) <= $max_content_length) # sanity |
390 | : 1) |
391 | ) { |
392 | DEBUG and print " It looks titular: \"$head1_text_content\".\n", |
393 | "\n Using that.\n"; |
394 | $title = $head1_text_content; |
395 | last; |
396 | } else { |
397 | --$state; |
398 | DEBUG and print " Didn't look titular ($head1_text_content).\n", |
399 | "\n Dropping back to seeking-head1-content mode...\n"; |
400 | } |
401 | } |
402 | } |
403 | |
404 | elsif($state == 2) { |
405 | # seeking start of para (which must immediately follow) |
406 | if($token->is_start and $content_containers{ $token->tagname }) { |
407 | DEBUG and print " Found start of Para. Accumulating content...\n"; |
408 | $para_text_content = ''; |
409 | ++$state; |
410 | } else { |
411 | DEBUG and print |
412 | " Didn't see an immediately subsequent start-Para. Reseeking H1\n"; |
413 | $state = 0; |
414 | } |
415 | } |
416 | |
417 | elsif($state == 3) { |
418 | # accumulating text until end of Para |
419 | if( $token->is_text ) { |
420 | DEBUG and print " Adding \"", $token->text, "\" to para-content.\n"; |
421 | $para_text_content .= $token->text; |
422 | # and keep looking |
423 | |
424 | } elsif( $token->is_end and $content_containers{ $token->tagname } ) { |
425 | DEBUG and print " Found end of Para. Considering content: ", |
426 | $para_text_content, "\n"; |
427 | |
428 | if( $para_text_content =~ m/\S/ |
429 | and ($max_content_length |
430 | ? (length($para_text_content) <= $max_content_length) |
431 | : 1) |
432 | ) { |
433 | # Some minimal sanity constraints, I think. |
434 | DEBUG and print " It looks contentworthy, I guess. Using it.\n"; |
435 | $title = $para_text_content; |
436 | last; |
437 | } else { |
438 | DEBUG and print " Doesn't look at all contentworthy!\n Giving up.\n"; |
439 | undef $title; |
440 | last; |
441 | } |
442 | } |
443 | } |
444 | |
445 | else { |
446 | die "IMPOSSIBLE STATE $state!\n"; # should never happen |
447 | } |
448 | |
449 | } |
450 | |
451 | # Put it all back! |
452 | $self->unget_token(@to_unget); |
453 | |
454 | if(DEBUG) { |
455 | if(defined $title) { print " Returing title <$title>\n" } |
456 | else { print "Returning title <>\n" } |
457 | } |
458 | |
459 | return '' unless defined $title; |
460 | $title =~ s/^\s+//; |
461 | return $title; |
462 | } |
463 | |
464 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
465 | # |
466 | # Methods that actually do work at parse-time: |
467 | |
468 | sub _handle_element_start { |
469 | my $self = shift; # leaving ($element_name, $attr_hash_r) |
470 | DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n"; |
471 | |
472 | push @{ $self->{'token_buffer'} }, |
473 | $self->{'start_token_class'}->new(@_); |
474 | return; |
475 | } |
476 | |
477 | sub _handle_text { |
478 | my $self = shift; # leaving ($text) |
479 | DEBUG > 2 and print "== $_[0]\n"; |
480 | push @{ $self->{'token_buffer'} }, |
481 | $self->{'text_token_class'}->new(@_); |
482 | return; |
483 | } |
484 | |
485 | sub _handle_element_end { |
486 | my $self = shift; # leaving ($element_name); |
487 | DEBUG > 2 and print "-- $_[0]\n"; |
488 | push @{ $self->{'token_buffer'} }, |
489 | $self->{'end_token_class'}->new(@_); |
490 | return; |
491 | } |
492 | |
493 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
494 | |
495 | 1; |
496 | |
497 | |
498 | __END__ |
499 | |
500 | =head1 NAME |
501 | |
502 | Pod::Simple::PullParser -- a pull-parser interface to parsing Pod |
503 | |
504 | =head1 SYNOPSIS |
505 | |
506 | my $parser = SomePodProcessor->new; |
507 | $parser->set_source( "whatever.pod" ); |
508 | $parser->run; |
509 | |
510 | Or: |
511 | |
512 | my $parser = SomePodProcessor->new; |
513 | $parser->set_source( $some_filehandle_object ); |
514 | $parser->run; |
515 | |
516 | Or: |
517 | |
518 | my $parser = SomePodProcessor->new; |
519 | $parser->set_source( \$document_source ); |
520 | $parser->run; |
521 | |
522 | Or: |
523 | |
524 | my $parser = SomePodProcessor->new; |
525 | $parser->set_source( \@document_lines ); |
526 | $parser->run; |
527 | |
528 | And elsewhere: |
529 | |
530 | require 5; |
531 | package SomePodProcessor; |
532 | use strict; |
533 | use base qw(Pod::Simple::PullParser); |
534 | |
535 | sub run { |
536 | my $self = shift; |
537 | Token: |
538 | while(my $token = $self->get_token) { |
539 | ...process each token... |
540 | } |
541 | } |
542 | |
543 | =head1 DESCRIPTION |
544 | |
545 | This class is for using Pod::Simple to build a Pod processor -- but |
546 | one that uses an interface based on a stream of token objects, |
547 | instead of based on events. |
548 | |
549 | This is a subclass of L<Pod::Simple> and inherits all its methods. |
550 | |
551 | A subclass of Pod::Simple::PullParser should define a C<run> method |
552 | that calls C<< $token = $parser->get_token >> to pull tokens. |
553 | |
554 | See the source for Pod::Simple::RTF for an example of a formatter |
555 | that uses Pod::Simple::PullParser. |
556 | |
557 | =head1 METHODS |
558 | |
559 | =over |
560 | |
561 | =item my $token = $parser->get_token |
562 | |
563 | This returns the next token object (which will be of a subclass of |
564 | L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit |
565 | the end of the document. |
566 | |
567 | =item $parser->unget_token( $token ) |
568 | |
569 | =item $parser->unget_token( $token1, $token2, ... ) |
570 | |
571 | This restores the token object(s) to the front of the parser stream. |
572 | |
573 | =back |
574 | |
575 | The source has to be set before you can parse anything. The lowest-level |
576 | way is to call C<set_source>: |
577 | |
578 | =over |
579 | |
580 | =item $parser->set_source( $filename ) |
581 | |
582 | =item $parser->set_source( $filehandle_object ) |
583 | |
584 | =item $parser->set_source( \$document_source ) |
585 | |
586 | =item $parser->set_source( \@document_lines ) |
587 | |
588 | =back |
589 | |
590 | Or you can call these methods, which Pod::Simple::PullParser has defined |
591 | to work just like Pod::Simple's same-named methods: |
592 | |
593 | =over |
594 | |
595 | =item $parser->parse_file(...) |
596 | |
597 | =item $parser->parse_string_document(...) |
598 | |
599 | =item $parser->filter(...) |
600 | |
601 | =item $parser->parse_from_file(...) |
602 | |
603 | =back |
604 | |
605 | For those to work, the Pod-processing subclass of |
606 | Pod::Simple::PullParser has to have defined a $parser->run method -- |
607 | so it is advised that all Pod::Simple::PullParser subclasses do so. |
608 | See the Synopsis above, or the source for Pod::Simple::RTF. |
609 | |
610 | Authors of formatter subclasses might find these methods useful to |
611 | call on a parser object that you haven't started pulling tokens |
612 | from yet: |
613 | |
614 | =over |
615 | |
616 | =item my $title_string = $parser->get_title |
617 | |
618 | This tries to get the title string out of $parser, by getting some tokens, |
619 | and scanning them for the title, and then ungetting them so that you can |
620 | process the token-stream from the beginning. |
621 | |
622 | For example, suppose you have a document that starts out: |
623 | |
624 | =head1 NAME |
625 | |
626 | Hoo::Boy::Wowza -- Stuff B<wow> yeah! |
627 | |
628 | $parser->get_title on that document will return "Hoo::Boy::Wowza -- |
629 | Stuff wow yeah!". |
630 | |
631 | In cases where get_title can't find the title, it will return empty-string |
632 | (""). |
633 | |
634 | =item my $title_string = $parser->get_short_title |
635 | |
636 | This is just like get_title, except that it returns just the modulename, if |
637 | the title seems to be of the form "SomeModuleName -- description". |
638 | |
639 | For example, suppose you have a document that starts out: |
640 | |
641 | =head1 NAME |
642 | |
643 | Hoo::Boy::Wowza -- Stuff B<wow> yeah! |
644 | |
645 | then $parser->get_short_title on that document will return |
646 | "Hoo::Boy::Wowza". |
647 | |
648 | But if the document starts out: |
649 | |
650 | =head1 NAME |
651 | |
652 | Hooboy, stuff B<wow> yeah! |
653 | |
654 | then $parser->get_short_title on that document will return "Hooboy, |
655 | stuff wow yeah!". |
656 | |
657 | If the title can't be found, then get_short_title returns empty-string |
658 | (""). |
659 | |
660 | =item $author_name = $parser->get_author |
661 | |
662 | This works like get_title except that it returns the contents of the |
663 | "=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section |
664 | isn't terribly long. |
665 | |
666 | (This method tolerates "AUTHORS" instead of "AUTHOR" too.) |
667 | |
668 | =item $description_name = $parser->get_description |
669 | |
670 | This works like get_title except that it returns the contents of the |
671 | "=head1 PARAGRAPH\n\nParagraph...\n" section, assuming that that section |
672 | isn't terribly long. |
673 | |
674 | =item $version_block = $parser->get_version |
675 | |
676 | This works like get_title except that it returns the contents of |
677 | the "=head1 VERSION\n\n[BIG BLOCK]\n" block. Note that this does NOT |
678 | return the module's C<$VERSION>!! |
679 | |
680 | |
681 | =back |
682 | |
683 | =head1 NOTE |
684 | |
685 | You don't actually I<have> to define a C<run> method. If you're |
686 | writing a Pod-formatter class, you should define a C<run> just so |
687 | that users can call C<parse_file> etc, but you don't I<have> to. |
688 | |
689 | And if you're not writing a formatter class, but are instead just |
690 | writing a program that does something simple with a Pod::PullParser |
691 | object (and not an object of a subclass), then there's no reason to |
692 | bother subclassing to add a C<run> method. |
693 | |
694 | =head1 SEE ALSO |
695 | |
696 | L<Pod::Simple> |
697 | |
698 | L<Pod::Simple::PullParserToken> -- and its subclasses |
699 | L<Pod::Simple::PullParserStartToken>, |
700 | L<Pod::Simple::PullParserTextToken>, and |
701 | L<Pod::Simple::PullParserEndToken>. |
702 | |
703 | L<HTML::TokeParser>, which inspired this. |
704 | |
705 | =head1 COPYRIGHT AND DISCLAIMERS |
706 | |
707 | Copyright (c) 2002 Sean M. Burke. All rights reserved. |
708 | |
709 | This library is free software; you can redistribute it and/or modify it |
710 | under the same terms as Perl itself. |
711 | |
712 | This program is distributed in the hope that it will be useful, but |
713 | without any warranty; without even the implied warranty of |
714 | merchantability or fitness for a particular purpose. |
715 | |
716 | =head1 AUTHOR |
717 | |
718 | Sean M. Burke C<sburke@cpan.org> |
719 | |
720 | =cut |
721 | |
722 | |
723 | |
724 | JUNK: |
725 | |
726 | sub _old_get_title { # some witchery in here |
727 | my $self = $_[0]; |
728 | my $title; |
729 | my @to_unget; |
730 | |
731 | while(1) { |
732 | push @to_unget, $self->get_token; |
733 | unless(defined $to_unget[-1]) { # whoops, short doc! |
734 | pop @to_unget; |
735 | last; |
736 | } |
737 | |
738 | DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n"; |
739 | |
740 | (DEBUG and print "Too much in the buffer.\n"), |
741 | last if @to_unget > 25; # sanity |
742 | |
743 | my $pattern = ''; |
744 | if( #$to_unget[-1]->type eq 'end' |
745 | #and $to_unget[-1]->tagname eq 'Para' |
746 | #and |
747 | ($pattern = join('', |
748 | map {; |
749 | ($_->type eq 'start') ? ("<" . $_->tagname .">") |
750 | : ($_->type eq 'end' ) ? ("</". $_->tagname .">") |
751 | : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X') |
752 | : "BLORP" |
753 | } @to_unget |
754 | )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s |
755 | ) { |
756 | # Whee, it fits the pattern |
757 | DEBUG and print "Seems to match =head1 NAME pattern.\n"; |
758 | $title = ''; |
759 | foreach my $t (reverse @to_unget) { |
760 | last if $t->type eq 'start' and $t->tagname eq 'Para'; |
761 | $title = $t->text . $title if $t->type eq 'text'; |
762 | } |
763 | undef $title if $title =~ m<^\s*$>; # make sure it's contentful! |
764 | last; |
765 | |
766 | } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$} |
767 | and !( $1 eq '1' and $2 eq 'NAME' ) |
768 | ) { |
769 | # Well, it fits a fallback pattern |
770 | DEBUG and print "Seems to match NAMEless pattern.\n"; |
771 | $title = ''; |
772 | foreach my $t (reverse @to_unget) { |
773 | last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s; |
774 | $title = $t->text . $title if $t->type eq 'text'; |
775 | } |
776 | undef $title if $title =~ m<^\s*$>; # make sure it's contentful! |
777 | last; |
778 | |
779 | } else { |
780 | DEBUG and $pattern and print "Leading pattern: $pattern\n"; |
781 | } |
782 | } |
783 | |
784 | # Put it all back: |
785 | $self->unget_token(@to_unget); |
786 | |
787 | if(DEBUG) { |
788 | if(defined $title) { print " Returing title <$title>\n" } |
789 | else { print "Returning title <>\n" } |
790 | } |
791 | |
792 | return '' unless defined $title; |
793 | return $title; |
794 | } |
795 | |