Commit | Line | Data |
351625bd |
1 | |
2 | require 5; |
3 | package Pod::Simple::PullParser; |
316e9929 |
4 | $VERSION = '3.14'; |
351625bd |
5 | use Pod::Simple (); |
6 | BEGIN {@ISA = ('Pod::Simple')} |
7 | |
8 | use strict; |
9 | use Carp (); |
10 | |
11 | use Pod::Simple::PullParserStartToken; |
12 | use Pod::Simple::PullParserEndToken; |
13 | use Pod::Simple::PullParserTextToken; |
14 | |
15 | BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } |
16 | |
17 | __PACKAGE__->_accessorize( |
18 | 'source_fh', # the filehandle we're reading from |
19 | 'source_scalar_ref', # the scalarref we're reading from |
20 | 'source_arrayref', # the arrayref we're reading from |
21 | ); |
22 | |
23 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
24 | # |
25 | # And here is how we implement a pull-parser on top of a push-parser... |
26 | |
27 | sub filter { |
28 | my($self, $source) = @_; |
29 | $self = $self->new unless ref $self; |
30 | |
31 | $source = *STDIN{IO} unless defined $source; |
32 | $self->set_source($source); |
33 | $self->output_fh(*STDOUT{IO}); |
34 | |
35 | $self->run; # define run() in a subclass if you want to use filter()! |
36 | return $self; |
37 | } |
38 | |
39 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
40 | |
41 | sub parse_string_document { |
42 | my $this = shift; |
43 | $this->set_source(\ $_[0]); |
44 | $this->run; |
45 | } |
46 | |
47 | sub parse_file { |
48 | my($this, $filename) = @_; |
49 | $this->set_source($filename); |
50 | $this->run; |
51 | } |
52 | |
53 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
54 | # In case anyone tries to use them: |
55 | |
56 | sub run { |
57 | use Carp (); |
58 | if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed! |
59 | Carp::croak "You can call run() only on subclasses of " |
60 | . __PACKAGE__; |
61 | } else { |
62 | Carp::croak join '', |
63 | "You can't call run() because ", |
64 | ref($_[0]) || $_[0], " didn't define a run() method"; |
65 | } |
66 | } |
67 | |
68 | sub parse_lines { |
69 | use Carp (); |
70 | Carp::croak "Use set_source with ", __PACKAGE__, |
71 | " and subclasses, not parse_lines"; |
72 | } |
73 | |
74 | sub parse_line { |
75 | use Carp (); |
76 | Carp::croak "Use set_source with ", __PACKAGE__, |
77 | " and subclasses, not parse_line"; |
78 | } |
79 | |
80 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
81 | |
82 | sub new { |
83 | my $class = shift; |
84 | my $self = $class->SUPER::new(@_); |
85 | die "Couldn't construct for $class" unless $self; |
86 | |
87 | $self->{'token_buffer'} ||= []; |
88 | $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken'; |
89 | $self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken'; |
90 | $self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken'; |
91 | |
92 | DEBUG > 1 and print "New pullparser object: $self\n"; |
93 | |
94 | return $self; |
95 | } |
96 | |
97 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
98 | |
99 | sub get_token { |
100 | my $self = shift; |
101 | DEBUG > 1 and print "\nget_token starting up on $self.\n"; |
102 | DEBUG > 2 and print " Items in token-buffer (", |
103 | scalar( @{ $self->{'token_buffer'} } ) , |
104 | ") :\n", map( |
105 | " " . $_->dump . "\n", @{ $self->{'token_buffer'} } |
106 | ), |
107 | @{ $self->{'token_buffer'} } ? '' : ' (no tokens)', |
108 | "\n" |
109 | ; |
110 | |
111 | until( @{ $self->{'token_buffer'} } ) { |
112 | DEBUG > 3 and print "I need to get something into my empty token buffer...\n"; |
113 | if($self->{'source_dead'}) { |
114 | DEBUG and print "$self 's source is dead.\n"; |
115 | push @{ $self->{'token_buffer'} }, undef; |
116 | } elsif(exists $self->{'source_fh'}) { |
117 | my @lines; |
118 | my $fh = $self->{'source_fh'} |
119 | || Carp::croak('You have to call set_source before you can call get_token'); |
120 | |
121 | DEBUG and print "$self 's source is filehandle $fh.\n"; |
122 | # Read those many lines at a time |
123 | for(my $i = Pod::Simple::MANY_LINES; $i--;) { |
124 | DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n"; |
125 | local $/ = $Pod::Simple::NL; |
126 | push @lines, scalar(<$fh>); # readline |
127 | DEBUG > 3 and print " Line is: ", |
128 | defined($lines[-1]) ? $lines[-1] : "<undef>\n"; |
129 | unless( defined $lines[-1] ) { |
130 | DEBUG and print "That's it for that source fh! Killing.\n"; |
131 | delete $self->{'source_fh'}; # so it can be GC'd |
132 | last; |
133 | } |
134 | # but pass thru the undef, which will set source_dead to true |
135 | |
136 | # TODO: look to see if $lines[-1] is =encoding, and if so, |
137 | # do horribly magic things |
138 | |
139 | } |
140 | |
141 | if(DEBUG > 8) { |
142 | print "* I've gotten ", scalar(@lines), " lines:\n"; |
143 | foreach my $l (@lines) { |
144 | if(defined $l) { |
145 | print " line {$l}\n"; |
146 | } else { |
147 | print " line undef\n"; |
148 | } |
149 | } |
150 | print "* end of ", scalar(@lines), " lines\n"; |
151 | } |
152 | |
153 | $self->SUPER::parse_lines(@lines); |
154 | |
155 | } elsif(exists $self->{'source_arrayref'}) { |
156 | DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ", |
157 | scalar(@{$self->{'source_arrayref'}}), " items left in it.\n"; |
158 | |
159 | DEBUG > 3 and print " Fetching ", Pod::Simple::MANY_LINES, " lines.\n"; |
160 | $self->SUPER::parse_lines( |
161 | splice @{ $self->{'source_arrayref'} }, |
162 | 0, |
163 | Pod::Simple::MANY_LINES |
164 | ); |
165 | unless( @{ $self->{'source_arrayref'} } ) { |
166 | DEBUG and print "That's it for that source arrayref! Killing.\n"; |
167 | $self->SUPER::parse_lines(undef); |
168 | delete $self->{'source_arrayref'}; # so it can be GC'd |
169 | } |
170 | # to make sure that an undef is always sent to signal end-of-stream |
171 | |
172 | } elsif(exists $self->{'source_scalar_ref'}) { |
173 | |
174 | DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ", |
175 | length(${ $self->{'source_scalar_ref'} }) - |
176 | (pos(${ $self->{'source_scalar_ref'} }) || 0), |
177 | " characters left to parse.\n"; |
178 | |
179 | DEBUG > 3 and print " Fetching a line from source-string...\n"; |
180 | if( ${ $self->{'source_scalar_ref'} } =~ |
181 | m/([^\n\r]*)((?:\r?\n)?)/g |
182 | ) { |
183 | #print(">> $1\n"), |
184 | $self->SUPER::parse_lines($1) |
185 | if length($1) or length($2) |
186 | or pos( ${ $self->{'source_scalar_ref'} }) |
187 | != length( ${ $self->{'source_scalar_ref'} }); |
188 | # I.e., unless it's a zero-length "empty line" at the very |
189 | # end of "foo\nbar\n" (i.e., between the \n and the EOS). |
190 | } else { # that's the end. Byebye |
191 | $self->SUPER::parse_lines(undef); |
192 | delete $self->{'source_scalar_ref'}; |
193 | DEBUG and print "That's it for that source scalarref! Killing.\n"; |
194 | } |
195 | |
196 | |
197 | } else { |
198 | die "What source??"; |
199 | } |
200 | } |
201 | DEBUG and print "get_token about to return ", |
202 | Pod::Simple::pretty( @{$self->{'token_buffer'}} |
203 | ? $self->{'token_buffer'}[-1] : undef |
204 | ), "\n"; |
205 | return shift @{$self->{'token_buffer'}}; # that's an undef if empty |
206 | } |
207 | |
351625bd |
208 | sub unget_token { |
209 | my $self = shift; |
210 | DEBUG and print "Ungetting ", scalar(@_), " tokens: ", |
211 | @_ ? "@_\n" : "().\n"; |
212 | foreach my $t (@_) { |
213 | Carp::croak "Can't unget that, because it's not a token -- it's undef!" |
214 | unless defined $t; |
215 | Carp::croak "Can't unget $t, because it's not a token -- it's a string!" |
216 | unless ref $t; |
217 | Carp::croak "Can't unget $t, because it's not a token object!" |
218 | unless UNIVERSAL::can($t, 'type'); |
219 | } |
220 | |
221 | unshift @{$self->{'token_buffer'}}, @_; |
222 | DEBUG > 1 and print "Token buffer now has ", |
223 | scalar(@{$self->{'token_buffer'}}), " items in it.\n"; |
224 | return; |
225 | } |
226 | |
227 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
228 | |
229 | # $self->{'source_filename'} = $source; |
230 | |
231 | sub set_source { |
232 | my $self = shift @_; |
233 | return $self->{'source_fh'} unless @_; |
234 | my $handle; |
235 | if(!defined $_[0]) { |
236 | Carp::croak("Can't use empty-string as a source for set_source"); |
237 | } elsif(ref(\( $_[0] )) eq 'GLOB') { |
238 | $self->{'source_filename'} = '' . ($handle = $_[0]); |
239 | DEBUG and print "$self 's source is glob $_[0]\n"; |
240 | # and fall thru |
241 | } elsif(ref( $_[0] ) eq 'SCALAR') { |
242 | $self->{'source_scalar_ref'} = $_[0]; |
243 | DEBUG and print "$self 's source is scalar ref $_[0]\n"; |
244 | return; |
245 | } elsif(ref( $_[0] ) eq 'ARRAY') { |
246 | $self->{'source_arrayref'} = $_[0]; |
247 | DEBUG and print "$self 's source is array ref $_[0]\n"; |
248 | return; |
249 | } elsif(ref $_[0]) { |
250 | $self->{'source_filename'} = '' . ($handle = $_[0]); |
251 | DEBUG and print "$self 's source is fh-obj $_[0]\n"; |
252 | } elsif(!length $_[0]) { |
253 | Carp::croak("Can't use empty-string as a source for set_source"); |
254 | } else { # It's a filename! |
255 | DEBUG and print "$self 's source is filename $_[0]\n"; |
256 | { |
257 | local *PODSOURCE; |
258 | open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!"; |
259 | $handle = *PODSOURCE{IO}; |
260 | } |
261 | $self->{'source_filename'} = $_[0]; |
262 | DEBUG and print " Its name is $_[0].\n"; |
263 | |
264 | # TODO: file-discipline things here! |
265 | } |
266 | |
267 | $self->{'source_fh'} = $handle; |
268 | DEBUG and print " Its handle is $handle\n"; |
269 | return 1; |
270 | } |
271 | |
272 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
273 | |
274 | sub get_title_short { shift->get_short_title(@_) } # alias |
275 | |
276 | sub get_short_title { |
277 | my $title = shift->get_title(@_); |
278 | $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s; |
279 | # turn "Foo::Bar -- bars for your foo" into "Foo::Bar" |
280 | return $title; |
281 | } |
282 | |
283 | sub get_title { shift->_get_titled_section( |
284 | 'NAME', max_token => 50, desperate => 1, @_) |
285 | } |
286 | sub get_version { shift->_get_titled_section( |
287 | 'VERSION', |
288 | max_token => 400, |
289 | accept_verbatim => 1, |
290 | max_content_length => 3_000, |
291 | @_, |
292 | ); |
293 | } |
294 | sub get_description { shift->_get_titled_section( |
295 | 'DESCRIPTION', |
296 | max_token => 400, |
297 | max_content_length => 3_000, |
298 | @_, |
299 | ) } |
300 | |
301 | sub get_authors { shift->get_author(@_) } # a harmless alias |
302 | |
303 | sub get_author { |
304 | my $this = shift; |
305 | # Max_token is so high because these are |
306 | # typically at the end of the document: |
307 | $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) || |
308 | $this->_get_titled_section('AUTHORS', max_token => 10_000, @_); |
309 | } |
310 | |
311 | #-------------------------------------------------------------------------- |
312 | |
313 | sub _get_titled_section { |
314 | # Based on a get_title originally contributed by Graham Barr |
315 | my($self, $titlename, %options) = (@_); |
316 | |
317 | my $max_token = delete $options{'max_token'}; |
318 | my $desperate_for_title = delete $options{'desperate'}; |
319 | my $accept_verbatim = delete $options{'accept_verbatim'}; |
320 | my $max_content_length = delete $options{'max_content_length'}; |
9d65762f |
321 | my $nocase = delete $options{'nocase'}; |
351625bd |
322 | $max_content_length = 120 unless defined $max_content_length; |
323 | |
324 | Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ") |
325 | . join " ", map "[$_]", sort keys %options |
326 | ) |
327 | if keys %options; |
328 | |
329 | my %content_containers; |
330 | $content_containers{'Para'} = 1; |
331 | if($accept_verbatim) { |
332 | $content_containers{'Verbatim'} = 1; |
333 | $content_containers{'VerbatimFormatted'} = 1; |
334 | } |
335 | |
336 | my $token_count = 0; |
337 | my $title; |
338 | my @to_unget; |
339 | my $state = 0; |
340 | my $depth = 0; |
341 | |
342 | Carp::croak "What kind of titlename is \"$titlename\"?!" unless |
343 | defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity |
344 | my $titlename_re = quotemeta($titlename); |
345 | |
346 | my $head1_text_content; |
347 | my $para_text_content; |
348 | |
349 | while( |
350 | ++$token_count <= ($max_token || 1_000_000) |
351 | and defined(my $token = $self->get_token) |
352 | ) { |
353 | push @to_unget, $token; |
354 | |
355 | if ($state == 0) { # seeking =head1 |
356 | if( $token->is_start and $token->tagname eq 'head1' ) { |
357 | DEBUG and print " Found head1. Seeking content...\n"; |
358 | ++$state; |
359 | $head1_text_content = ''; |
360 | } |
361 | } |
362 | |
363 | elsif($state == 1) { # accumulating text until end of head1 |
364 | if( $token->is_text ) { |
365 | DEBUG and print " Adding \"", $token->text, "\" to head1-content.\n"; |
366 | $head1_text_content .= $token->text; |
367 | } elsif( $token->is_end and $token->tagname eq 'head1' ) { |
368 | DEBUG and print " Found end of head1. Considering content...\n"; |
9d65762f |
369 | $head1_text_content = uc $head1_text_content if $nocase; |
351625bd |
370 | if($head1_text_content eq $titlename |
371 | or $head1_text_content =~ m/\($titlename_re\)/s |
372 | # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n |
373 | ) { |
374 | DEBUG and print " Yup, it was $titlename. Seeking next para-content...\n"; |
375 | ++$state; |
376 | } elsif( |
377 | $desperate_for_title |
378 | # if we're so desperate we'll take the first |
379 | # =head1's content as a title |
380 | and $head1_text_content =~ m/\S/ |
381 | and $head1_text_content !~ m/^[ A-Z]+$/s |
382 | and $head1_text_content !~ |
383 | m/\((?: |
384 | NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS |
385 | | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS? |
386 | | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT |
387 | )\)/sx |
388 | # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION) |
389 | and ($max_content_length |
390 | ? (length($head1_text_content) <= $max_content_length) # sanity |
391 | : 1) |
392 | ) { |
393 | DEBUG and print " It looks titular: \"$head1_text_content\".\n", |
394 | "\n Using that.\n"; |
395 | $title = $head1_text_content; |
396 | last; |
397 | } else { |
398 | --$state; |
399 | DEBUG and print " Didn't look titular ($head1_text_content).\n", |
400 | "\n Dropping back to seeking-head1-content mode...\n"; |
401 | } |
402 | } |
403 | } |
404 | |
405 | elsif($state == 2) { |
406 | # seeking start of para (which must immediately follow) |
407 | if($token->is_start and $content_containers{ $token->tagname }) { |
408 | DEBUG and print " Found start of Para. Accumulating content...\n"; |
409 | $para_text_content = ''; |
410 | ++$state; |
411 | } else { |
412 | DEBUG and print |
413 | " Didn't see an immediately subsequent start-Para. Reseeking H1\n"; |
414 | $state = 0; |
415 | } |
416 | } |
417 | |
418 | elsif($state == 3) { |
419 | # accumulating text until end of Para |
420 | if( $token->is_text ) { |
421 | DEBUG and print " Adding \"", $token->text, "\" to para-content.\n"; |
422 | $para_text_content .= $token->text; |
423 | # and keep looking |
424 | |
425 | } elsif( $token->is_end and $content_containers{ $token->tagname } ) { |
426 | DEBUG and print " Found end of Para. Considering content: ", |
427 | $para_text_content, "\n"; |
428 | |
429 | if( $para_text_content =~ m/\S/ |
430 | and ($max_content_length |
431 | ? (length($para_text_content) <= $max_content_length) |
432 | : 1) |
433 | ) { |
434 | # Some minimal sanity constraints, I think. |
435 | DEBUG and print " It looks contentworthy, I guess. Using it.\n"; |
436 | $title = $para_text_content; |
437 | last; |
438 | } else { |
439 | DEBUG and print " Doesn't look at all contentworthy!\n Giving up.\n"; |
440 | undef $title; |
441 | last; |
442 | } |
443 | } |
444 | } |
445 | |
446 | else { |
447 | die "IMPOSSIBLE STATE $state!\n"; # should never happen |
448 | } |
449 | |
450 | } |
451 | |
452 | # Put it all back! |
453 | $self->unget_token(@to_unget); |
454 | |
455 | if(DEBUG) { |
456 | if(defined $title) { print " Returing title <$title>\n" } |
457 | else { print "Returning title <>\n" } |
458 | } |
459 | |
460 | return '' unless defined $title; |
461 | $title =~ s/^\s+//; |
462 | return $title; |
463 | } |
464 | |
465 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
466 | # |
467 | # Methods that actually do work at parse-time: |
468 | |
469 | sub _handle_element_start { |
470 | my $self = shift; # leaving ($element_name, $attr_hash_r) |
471 | DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n"; |
472 | |
473 | push @{ $self->{'token_buffer'} }, |
474 | $self->{'start_token_class'}->new(@_); |
475 | return; |
476 | } |
477 | |
478 | sub _handle_text { |
479 | my $self = shift; # leaving ($text) |
480 | DEBUG > 2 and print "== $_[0]\n"; |
481 | push @{ $self->{'token_buffer'} }, |
482 | $self->{'text_token_class'}->new(@_); |
483 | return; |
484 | } |
485 | |
486 | sub _handle_element_end { |
487 | my $self = shift; # leaving ($element_name); |
488 | DEBUG > 2 and print "-- $_[0]\n"; |
489 | push @{ $self->{'token_buffer'} }, |
490 | $self->{'end_token_class'}->new(@_); |
491 | return; |
492 | } |
493 | |
494 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
495 | |
496 | 1; |
497 | |
498 | |
499 | __END__ |
500 | |
501 | =head1 NAME |
502 | |
503 | Pod::Simple::PullParser -- a pull-parser interface to parsing Pod |
504 | |
505 | =head1 SYNOPSIS |
506 | |
507 | my $parser = SomePodProcessor->new; |
508 | $parser->set_source( "whatever.pod" ); |
509 | $parser->run; |
510 | |
511 | Or: |
512 | |
513 | my $parser = SomePodProcessor->new; |
514 | $parser->set_source( $some_filehandle_object ); |
515 | $parser->run; |
516 | |
517 | Or: |
518 | |
519 | my $parser = SomePodProcessor->new; |
520 | $parser->set_source( \$document_source ); |
521 | $parser->run; |
522 | |
523 | Or: |
524 | |
525 | my $parser = SomePodProcessor->new; |
526 | $parser->set_source( \@document_lines ); |
527 | $parser->run; |
528 | |
529 | And elsewhere: |
530 | |
531 | require 5; |
532 | package SomePodProcessor; |
533 | use strict; |
534 | use base qw(Pod::Simple::PullParser); |
535 | |
536 | sub run { |
537 | my $self = shift; |
538 | Token: |
539 | while(my $token = $self->get_token) { |
540 | ...process each token... |
541 | } |
542 | } |
543 | |
544 | =head1 DESCRIPTION |
545 | |
546 | This class is for using Pod::Simple to build a Pod processor -- but |
547 | one that uses an interface based on a stream of token objects, |
548 | instead of based on events. |
549 | |
550 | This is a subclass of L<Pod::Simple> and inherits all its methods. |
551 | |
552 | A subclass of Pod::Simple::PullParser should define a C<run> method |
553 | that calls C<< $token = $parser->get_token >> to pull tokens. |
554 | |
555 | See the source for Pod::Simple::RTF for an example of a formatter |
556 | that uses Pod::Simple::PullParser. |
557 | |
558 | =head1 METHODS |
559 | |
560 | =over |
561 | |
562 | =item my $token = $parser->get_token |
563 | |
564 | This returns the next token object (which will be of a subclass of |
565 | L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit |
566 | the end of the document. |
567 | |
568 | =item $parser->unget_token( $token ) |
569 | |
570 | =item $parser->unget_token( $token1, $token2, ... ) |
571 | |
572 | This restores the token object(s) to the front of the parser stream. |
573 | |
574 | =back |
575 | |
576 | The source has to be set before you can parse anything. The lowest-level |
577 | way is to call C<set_source>: |
578 | |
579 | =over |
580 | |
581 | =item $parser->set_source( $filename ) |
582 | |
583 | =item $parser->set_source( $filehandle_object ) |
584 | |
585 | =item $parser->set_source( \$document_source ) |
586 | |
587 | =item $parser->set_source( \@document_lines ) |
588 | |
589 | =back |
590 | |
591 | Or you can call these methods, which Pod::Simple::PullParser has defined |
592 | to work just like Pod::Simple's same-named methods: |
593 | |
594 | =over |
595 | |
596 | =item $parser->parse_file(...) |
597 | |
598 | =item $parser->parse_string_document(...) |
599 | |
600 | =item $parser->filter(...) |
601 | |
602 | =item $parser->parse_from_file(...) |
603 | |
604 | =back |
605 | |
606 | For those to work, the Pod-processing subclass of |
607 | Pod::Simple::PullParser has to have defined a $parser->run method -- |
608 | so it is advised that all Pod::Simple::PullParser subclasses do so. |
609 | See the Synopsis above, or the source for Pod::Simple::RTF. |
610 | |
611 | Authors of formatter subclasses might find these methods useful to |
612 | call on a parser object that you haven't started pulling tokens |
613 | from yet: |
614 | |
615 | =over |
616 | |
617 | =item my $title_string = $parser->get_title |
618 | |
619 | This tries to get the title string out of $parser, by getting some tokens, |
620 | and scanning them for the title, and then ungetting them so that you can |
621 | process the token-stream from the beginning. |
622 | |
623 | For example, suppose you have a document that starts out: |
624 | |
625 | =head1 NAME |
626 | |
627 | Hoo::Boy::Wowza -- Stuff B<wow> yeah! |
628 | |
629 | $parser->get_title on that document will return "Hoo::Boy::Wowza -- |
9d65762f |
630 | Stuff wow yeah!". If the document starts with: |
631 | |
632 | =head1 Name |
633 | |
634 | Hoo::Boy::W00t -- Stuff B<w00t> yeah! |
635 | |
636 | Then you'll need to pass the C<nocase> option in order to recognize "Name": |
637 | |
638 | $parser->get_title(nocase => 1); |
351625bd |
639 | |
640 | In cases where get_title can't find the title, it will return empty-string |
641 | (""). |
642 | |
643 | =item my $title_string = $parser->get_short_title |
644 | |
645 | This is just like get_title, except that it returns just the modulename, if |
646 | the title seems to be of the form "SomeModuleName -- description". |
647 | |
648 | For example, suppose you have a document that starts out: |
649 | |
650 | =head1 NAME |
651 | |
652 | Hoo::Boy::Wowza -- Stuff B<wow> yeah! |
653 | |
654 | then $parser->get_short_title on that document will return |
655 | "Hoo::Boy::Wowza". |
656 | |
657 | But if the document starts out: |
658 | |
659 | =head1 NAME |
660 | |
661 | Hooboy, stuff B<wow> yeah! |
662 | |
663 | then $parser->get_short_title on that document will return "Hooboy, |
9d65762f |
664 | stuff wow yeah!". If the document starts with: |
665 | |
666 | =head1 Name |
667 | |
668 | Hoo::Boy::W00t -- Stuff B<w00t> yeah! |
669 | |
670 | Then you'll need to pass the C<nocase> option in order to recognize "Name": |
671 | |
672 | $parser->get_short_title(nocase => 1); |
351625bd |
673 | |
674 | If the title can't be found, then get_short_title returns empty-string |
675 | (""). |
676 | |
677 | =item $author_name = $parser->get_author |
678 | |
679 | This works like get_title except that it returns the contents of the |
680 | "=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section |
9d65762f |
681 | isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n" |
682 | section, pass the C<nocase> otpion: |
683 | |
684 | $parser->get_author(nocase => 1); |
351625bd |
685 | |
686 | (This method tolerates "AUTHORS" instead of "AUTHOR" too.) |
687 | |
688 | =item $description_name = $parser->get_description |
689 | |
690 | This works like get_title except that it returns the contents of the |
9d65762f |
691 | "=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section |
692 | isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n" |
693 | section, pass the C<nocase> otpion: |
694 | |
695 | $parser->get_description(nocase => 1); |
351625bd |
696 | |
697 | =item $version_block = $parser->get_version |
698 | |
699 | This works like get_title except that it returns the contents of |
700 | the "=head1 VERSION\n\n[BIG BLOCK]\n" block. Note that this does NOT |
9d65762f |
701 | return the module's C<$VERSION>!! To recognize a |
702 | "=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> otpion: |
351625bd |
703 | |
9d65762f |
704 | $parser->get_version(nocase => 1); |
351625bd |
705 | |
706 | =back |
707 | |
708 | =head1 NOTE |
709 | |
710 | You don't actually I<have> to define a C<run> method. If you're |
711 | writing a Pod-formatter class, you should define a C<run> just so |
712 | that users can call C<parse_file> etc, but you don't I<have> to. |
713 | |
714 | And if you're not writing a formatter class, but are instead just |
715 | writing a program that does something simple with a Pod::PullParser |
716 | object (and not an object of a subclass), then there's no reason to |
717 | bother subclassing to add a C<run> method. |
718 | |
719 | =head1 SEE ALSO |
720 | |
721 | L<Pod::Simple> |
722 | |
723 | L<Pod::Simple::PullParserToken> -- and its subclasses |
724 | L<Pod::Simple::PullParserStartToken>, |
725 | L<Pod::Simple::PullParserTextToken>, and |
726 | L<Pod::Simple::PullParserEndToken>. |
727 | |
728 | L<HTML::TokeParser>, which inspired this. |
729 | |
a242eeb4 |
730 | =head1 SUPPORT |
731 | |
732 | Questions or discussion about POD and Pod::Simple should be sent to the |
733 | pod-people@perl.org mail list. Send an empty email to |
734 | pod-people-subscribe@perl.org to subscribe. |
735 | |
736 | This module is managed in an open GitHub repository, |
737 | L<http://github.com/theory/pod-simple/>. Feel free to fork and contribute, or |
738 | to clone L<git://github.com/theory/pod-simple.git> and send patches! |
739 | |
740 | Patches against Pod::Simple are welcome. Please send bug reports to |
741 | <bug-pod-simple@rt.cpan.org>. |
742 | |
351625bd |
743 | =head1 COPYRIGHT AND DISCLAIMERS |
744 | |
433cf6b4 |
745 | Copyright (c) 2002 Sean M. Burke. |
351625bd |
746 | |
747 | This library is free software; you can redistribute it and/or modify it |
748 | under the same terms as Perl itself. |
749 | |
750 | This program is distributed in the hope that it will be useful, but |
751 | without any warranty; without even the implied warranty of |
752 | merchantability or fitness for a particular purpose. |
753 | |
754 | =head1 AUTHOR |
755 | |
a242eeb4 |
756 | Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. |
757 | But don't bother him, he's retired. |
351625bd |
758 | |
a242eeb4 |
759 | Pod::Simple is maintained by: |
351625bd |
760 | |
a242eeb4 |
761 | =over |
351625bd |
762 | |
a242eeb4 |
763 | =item * Allison Randal C<allison@perl.org> |
351625bd |
764 | |
a242eeb4 |
765 | =item * Hans Dieter Pearcey C<hdp@cpan.org> |
766 | |
767 | =item * David E. Wheeler C<dwheeler@cpan.org> |
768 | |
769 | =back |
770 | |
771 | =cut |
351625bd |
772 | JUNK: |
773 | |
774 | sub _old_get_title { # some witchery in here |
775 | my $self = $_[0]; |
776 | my $title; |
777 | my @to_unget; |
778 | |
779 | while(1) { |
780 | push @to_unget, $self->get_token; |
781 | unless(defined $to_unget[-1]) { # whoops, short doc! |
782 | pop @to_unget; |
783 | last; |
784 | } |
785 | |
786 | DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n"; |
787 | |
788 | (DEBUG and print "Too much in the buffer.\n"), |
789 | last if @to_unget > 25; # sanity |
790 | |
791 | my $pattern = ''; |
792 | if( #$to_unget[-1]->type eq 'end' |
793 | #and $to_unget[-1]->tagname eq 'Para' |
794 | #and |
795 | ($pattern = join('', |
796 | map {; |
797 | ($_->type eq 'start') ? ("<" . $_->tagname .">") |
798 | : ($_->type eq 'end' ) ? ("</". $_->tagname .">") |
799 | : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X') |
800 | : "BLORP" |
801 | } @to_unget |
802 | )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s |
803 | ) { |
804 | # Whee, it fits the pattern |
805 | DEBUG and print "Seems to match =head1 NAME pattern.\n"; |
806 | $title = ''; |
807 | foreach my $t (reverse @to_unget) { |
808 | last if $t->type eq 'start' and $t->tagname eq 'Para'; |
809 | $title = $t->text . $title if $t->type eq 'text'; |
810 | } |
811 | undef $title if $title =~ m<^\s*$>; # make sure it's contentful! |
812 | last; |
813 | |
814 | } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$} |
815 | and !( $1 eq '1' and $2 eq 'NAME' ) |
816 | ) { |
817 | # Well, it fits a fallback pattern |
818 | DEBUG and print "Seems to match NAMEless pattern.\n"; |
819 | $title = ''; |
820 | foreach my $t (reverse @to_unget) { |
821 | last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s; |
822 | $title = $t->text . $title if $t->type eq 'text'; |
823 | } |
824 | undef $title if $title =~ m<^\s*$>; # make sure it's contentful! |
825 | last; |
826 | |
827 | } else { |
828 | DEBUG and $pattern and print "Leading pattern: $pattern\n"; |
829 | } |
830 | } |
831 | |
832 | # Put it all back: |
833 | $self->unget_token(@to_unget); |
834 | |
835 | if(DEBUG) { |
836 | if(defined $title) { print " Returing title <$title>\n" } |
837 | else { print "Returning title <>\n" } |
838 | } |
839 | |
840 | return '' unless defined $title; |
841 | return $title; |
842 | } |
843 | |