2 package Pod::Simple::BlackBox;
4 # "What's in the box?" "Pain."
6 ###########################################################################
8 # This is where all the scary things happen: parsing lines into
9 # paragraphs; and then into directives, verbatims, and then also
10 # turning formatting sequences into treelets.
12 # Are you really sure you want to read this code?
14 #-----------------------------------------------------------------------------
16 # The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17 # of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18 # to call the proper callbacks on the treelets.
20 # Every node in a treelet is a ['name', {attrhash}, ...children...]
25 #use constant DEBUG => 7;
28 *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
31 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
33 sub parse_line { shift->parse_lines(@_) } # alias
35 # - - - Turn back now! Run away! - - -
37 sub parse_lines { # Usage: $parser->parse_lines(@lines)
38 # an undef means end-of-stream
41 my $code_handler = $self->{'code_handler'};
42 my $cut_handler = $self->{'cut_handler'};
43 $self->{'line_count'} ||= 0;
48 print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
51 print "# About to parse lines: ",
52 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
54 my $paras = ($self->{'paras'} ||= []);
55 # paragraph buffer. Because we need to defer processing of =over
56 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
59 $self->{'pod_para_count'} ||= 0;
62 foreach my $source_line (@_) {
63 if( $self->{'source_dead'} ) {
64 DEBUG > 4 and print "# Source is dead.\n";
68 unless( defined $source_line ) {
69 DEBUG > 4 and print "# Undef-line seen.\n";
71 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
72 push @$paras, $paras->[-1], $paras->[-1];
73 # So that it definitely fills the buffer.
74 $self->{'source_dead'} = 1;
75 $self->_ponder_paragraph_buffer;
80 if( $self->{'line_count'}++ ) {
81 ($line = $source_line) =~ tr/\n\r//d;
82 # If we don't have two vars, we'll end up with that there
83 # tr/// modding the (potentially read-only) original source line!
86 DEBUG > 2 and print "First line: [$source_line]\n";
88 if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
89 DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n";
90 $self->_handle_encoding_line( "=encode utf8" );
93 } elsif( $line =~ s/^\xFE\xFF//s ) {
94 DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
96 $self->{'line_count'},
97 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
103 # TODO: implement somehow?
105 } elsif( $line =~ s/^\xFF\xFE//s ) {
106 DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
108 $self->{'line_count'},
109 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
115 # TODO: implement somehow?
118 DEBUG > 2 and print "First line is BOM-less.\n";
119 ($line = $source_line) =~ tr/\n\r//d;
124 DEBUG > 5 and print "# Parsing line: [$line]\n";
126 if(!$self->{'in_pod'}) {
127 if($line =~ m/^=([a-zA-Z]+)/s) {
130 $self->{'line_count'},
131 "=cut found outside a pod block. Skipping to next block."
134 ## Before there were errata sections in the world, it was
135 ## least-pessimal to abort processing the file. But now we can
136 ## just barrel on thru (but still not start a pod block).
142 $self->{'in_pod'} = $self->{'start_of_pod_block'}
143 = $self->{'last_was_blank'} = 1;
144 # And fall thru to the pod-mode block further down
147 DEBUG > 5 and print "# It's a code-line.\n";
148 $code_handler->(map $_, $line, $self->{'line_count'}, $self)
150 # Note: this may cause code to be processed out of order relative
151 # to pods, but in order relative to cuts.
153 # Note also that we haven't yet applied the transcoding to $line
154 # by time we call $code_handler!
156 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
157 # That RE is from perlsyn, section "Plain Old Comments (Not!)",
158 #$fname = $2 if defined $2;
159 #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
160 DEBUG > 1 and print "# Setting nextline to $1\n";
161 $self->{'line_count'} = $1 - 1;
168 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
169 # Else we're in pod mode:
171 # Apply any necessary transcoding:
172 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
174 # HERE WE CATCH =encoding EARLY!
175 if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
176 $line = $self->_handle_encoding_line( $line );
179 if($line =~ m/^=cut/s) {
180 # here ends the pod block, and therefore the previous pod para
181 DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
182 $self->{'in_pod'} = 0;
183 # ++$self->{'pod_para_count'};
184 $self->_ponder_paragraph_buffer();
185 # by now it's safe to consider the previous paragraph as done.
186 $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
189 # TODO: add to docs: Note: this may cause cuts to be processed out
190 # of order relative to pods, but in order relative to code.
192 } elsif($line =~ m/^\s*$/s) { # it's a blank line
193 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
194 DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
195 push @{$paras->[-1]}, $line;
196 } # otherwise it's not interesting
198 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
199 DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
202 $self->{'last_was_blank'} = 1;
204 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
206 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
207 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
208 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
209 # Note that in "=head1 foo", the WS is lost.
210 # Example: ['=head1', {'start_line' => 123}, ' foo']
212 ++$self->{'pod_para_count'};
214 $self->_ponder_paragraph_buffer();
215 # by now it's safe to consider the previous paragraph as done.
217 push @$paras, $new; # the new incipient paragraph
218 DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
220 } elsif($line =~ m/^\s/s) {
222 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
223 DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
224 push @{$paras->[-1]}, $line;
226 ++$self->{'pod_para_count'};
227 $self->_ponder_paragraph_buffer();
228 # by now it's safe to consider the previous paragraph as done.
229 DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
230 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
233 ++$self->{'pod_para_count'};
234 $self->_ponder_paragraph_buffer();
235 # by now it's safe to consider the previous paragraph as done.
236 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
237 DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
239 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
242 # It's a non-blank line /continuing/ the current para
244 DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
245 push @{$paras->[-1]}, $line;
248 die "Continuing a paragraph but \@\$paras is empty?";
250 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
253 } # ends the big while loop
255 DEBUG > 1 and print(pretty(@$paras), "\n");
259 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
261 sub _handle_encoding_line {
262 my($self, $line) = @_;
264 # The point of this routine is to set $self->{'_transcoder'} as indicated.
266 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
267 DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
271 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
275 # Cf. perldoc Encode and perldoc Encode::Supported
277 require Pod::Simple::Transcode;
279 if( $self->{'encoding'} ) {
280 my $norm_current = $self->{'encoding'};
282 foreach my $that ($norm_current, $norm_e) {
286 if($norm_current eq $norm_e) {
287 DEBUG > 1 and print "The '=encoding $orig' line is ",
288 "redundant. ($norm_current eq $norm_e). Ignoring.\n";
290 # But that doesn't necessarily mean that the earlier one went okay
292 $enc_error = "Encoding is already set to " . $self->{'encoding'};
293 DEBUG > 1 and print $enc_error;
296 # OK, let's turn on the encoding
298 DEBUG > 1 and print " Setting encoding to $e\n";
299 $self->{'encoding'} = $e;
304 DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
306 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
308 die($enc_error = "WHAT? _transcoder is already set?!")
309 if $self->{'_transcoder'}; # should never happen
310 require Pod::Simple::Transcode;
311 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
313 my @x = ('', "abc", "123");
314 $self->{'_transcoder'}->(@x);
316 $@ && die( $enc_error =
317 "Really unexpected error setting up encoding $e: $@\nAborting"
321 my @supported = Pod::Simple::Transcode::->all_encodings;
323 # Note unsupported, and complain
324 DEBUG and print " Encoding [$e] is unsupported.",
325 "\nSupporteds: @supported\n";
328 # Look for a near match:
332 foreach my $enc (@supported) {
335 next unless $n eq $norm;
336 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
339 my $encmodver = Pod::Simple::Transcode::->encmodver;
340 $enc_error = join '' =>
341 "This document probably does not appear as it should, because its ",
342 "\"=encoding $e\" line calls for an unsupported encoding.",
343 $suggestion, " [$encmodver\'s supported encodings are: @supported]"
346 $self->scream( $self->{'line_count'}, $enc_error );
348 push @{ $self->{'encoding_command_statuses'} }, $enc_error;
350 return '=encoding ALREADYDONE';
353 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
355 sub _handle_encoding_second_level {
356 # By time this is called, the encoding (if well formed) will already
357 # have been acted one.
358 my($self, $para) = @_;
360 my $content = join ' ', splice @x, 2;
361 $content =~ s/^\s+//s;
362 $content =~ s/\s+$//s;
364 DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
366 if($content eq 'ALREADYDONE') {
367 # It's already been handled. Check for errors.
368 if(! $self->{'encoding_command_statuses'} ) {
369 DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
370 } elsif( $self->{'encoding_command_statuses'}[-1] ) {
371 $self->whine( $para->[1]{'start_line'},
372 sprintf "Couldn't do %s: %s",
373 $self->{'encoding_command_reqs' }[-1],
374 $self->{'encoding_command_statuses'}[-1],
377 DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
381 # Otherwise it's a syntax error
382 $self->whine( $para->[1]{'start_line'},
383 "Invalid =encoding syntax: $content"
390 #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
393 my $m = -321; # magic line number
397 # Return 0 or more fake-o paragraphs explaining the accumulated
398 # errors on this document.
400 return() unless $self->{'errata'} and keys %{$self->{'errata'}};
404 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
406 ['=item', {'start_line' => $m}, "Around line $line:"],
407 map( ['~Para', {'start_line' => $m, '~cooked' => 1},
408 #['~Top', {'start_line' => $m},
412 @{$self->{'errata'}{$line}}
417 # TODO: report of unknown entities? unrenderable characters?
420 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
421 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
424 'The above document had some coding errors, which are explained below:'
427 ['=over', {'start_line' => $m, 'errata' => 1}, ''],
431 ['=back', {'start_line' => $m, 'errata' => 1}, ''],
434 DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
441 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
443 ##############################################################################
445 ## stop reading now stop reading now stop reading now stop reading now stop
447 ## HERE IT BECOMES REALLY SCARY
449 ## stop reading now stop reading now stop reading now stop reading now stop
451 ##############################################################################
453 sub _ponder_paragraph_buffer {
455 # Para-token types as found in the buffer.
456 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
457 # =over, =back, =item
458 # and the null =pod (to be complained about if over one line)
460 # "~data" paragraphs are something we generate at this level, depending on
461 # a currently open =over region
463 # Events fired: Begin and end for:
464 # directivename (like head1 .. head4), item, extend,
465 # for (from =begin...=end, =for),
466 # over-bullet, over-number, over-text, over-block,
467 # item-bullet, item-number, item-text,
469 # Data, Para, Verbatim
470 # B, C, longdirname (TODO -- wha?), etc. for all directives
475 return unless @{$paras = $self->{'paras'}};
476 my $curr_open = ($self->{'curr_open'} ||= []);
480 DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
482 # We have something in our buffer. So apparently the document has started.
483 unless($self->{'doc_has_started'}) {
484 $self->{'doc_has_started'} = 1;
486 my $starting_contentless;
487 $starting_contentless =
490 and @$paras and ! grep $_->[0] ne '~end', @$paras
491 # i.e., if the paras is all ~ends
494 DEBUG and print "# Starting ",
495 $starting_contentless ? 'contentless' : 'contentful',
499 $self->_handle_element_start(
500 ($scratch = 'Document'),
502 'start_line' => $paras->[0][1]{'start_line'},
503 $starting_contentless ? ( 'contentless' => 1 ) : (),
508 my($para, $para_type);
510 last if @$paras == 1 and
511 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
512 or $paras->[0][0] eq '=item' )
514 # Those're the three kinds of paragraphs that require lookahead.
515 # Actually, an "=item Foo" inside an <over type=text> region
516 # and any =item inside an <over type=block> region (rare)
517 # don't require any lookahead, but all others (bullets
520 # TODO: winge about many kinds of directives in non-resolving =for regions?
521 # TODO: many? like what? =head1 etc?
523 $para = shift @$paras;
524 $para_type = $para->[0];
526 DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
527 $self->_dump_curr_open(), ")\n";
529 if($para_type eq '=for') {
530 next if $self->_ponder_for($para,$curr_open,$paras);
532 } elsif($para_type eq '=begin') {
533 next if $self->_ponder_begin($para,$curr_open,$paras);
535 } elsif($para_type eq '=end') {
536 next if $self->_ponder_end($para,$curr_open,$paras);
538 } elsif($para_type eq '~end') { # The virtual end-document signal
539 next if $self->_ponder_doc_end($para,$curr_open,$paras);
543 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
544 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
545 if(grep $_->[1]{'~ignore'}, @$curr_open) {
547 print "Skipping $para_type paragraph because in ignore mode.\n";
550 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
551 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
553 if($para_type eq '=pod') {
554 $self->_ponder_pod($para,$curr_open,$paras);
556 } elsif($para_type eq '=over') {
557 next if $self->_ponder_over($para,$curr_open,$paras);
559 } elsif($para_type eq '=back') {
560 next if $self->_ponder_back($para,$curr_open,$paras);
564 # All non-magical codes!!!
566 # Here we start using $para_type for our own twisted purposes, to
567 # mean how it should get treated, not as what the element name
570 DEBUG > 1 and print "Pondering non-magical $para_type\n";
574 # Enforce some =headN discipline
575 if($para_type =~ m/^=head\d$/s
576 and ! $self->{'accept_heads_anywhere'}
578 and $curr_open->[-1][0] eq '=over'
580 DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
582 $para->[1]{'start_line'},
583 "You forgot a '=back' before '$para_type'"
585 unshift @$paras, ['=back', {}, ''], $para; # close the =over
590 if($para_type eq '=item') {
593 unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
595 $para->[1]{'start_line'},
596 "'=item' outside of any '=over'"
599 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
606 my $over_type = $over->[1]{'~type'};
610 die "Typeless over in stack, starting at line "
611 . $over->[1]{'start_line'};
613 } elsif($over_type eq 'block') {
614 unless($curr_open->[-1][1]{'~bitched_about'}) {
615 $curr_open->[-1][1]{'~bitched_about'} = 1;
617 $curr_open->[-1][1]{'start_line'},
618 "You can't have =items (as at line "
619 . $para->[1]{'start_line'}
620 . ") unless the first thing after the =over is an =item"
623 # Just turn it into a paragraph and reconsider it
624 $para->[0] = '~Para';
625 unshift @$paras, $para;
628 } elsif($over_type eq 'text') {
629 my $item_type = $self->_get_item_type($para);
630 # That kills the content of the item if it's a number or bullet.
631 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
633 if($item_type eq 'text') {
634 # Nothing special needs doing for 'text'
635 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
636 die "Unknown item type $item_type"
637 unless $item_type eq 'number' or $item_type eq 'bullet';
638 # Undo our clobbering:
639 push @$para, $para->[1]{'~orig_content'};
640 delete $para->[1]{'number'};
641 # Only a PROPER item-number element is allowed
642 # to have a number attribute.
644 die "Unhandled item type $item_type"; # should never happen
647 # =item-text thingies don't need any assimilation, it seems.
649 } elsif($over_type eq 'number') {
650 my $item_type = $self->_get_item_type($para);
651 # That kills the content of the item if it's a number or bullet.
652 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
654 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
656 if($item_type eq 'bullet') {
657 # Hm, it's not numeric. Correct for this.
658 $para->[1]{'number'} = $expected_value;
660 $para->[1]{'start_line'},
661 "Expected '=item $expected_value'"
663 push @$para, $para->[1]{'~orig_content'};
664 # restore the bullet, blocking the assimilation of next para
666 } elsif($item_type eq 'text') {
667 # Hm, it's not numeric. Correct for this.
668 $para->[1]{'number'} = $expected_value;
670 $para->[1]{'start_line'},
671 "Expected '=item $expected_value'"
673 # Text content will still be there and will block next ~Para
675 } elsif($item_type ne 'number') {
676 die "Unknown item type $item_type"; # should never happen
678 } elsif($expected_value == $para->[1]{'number'}) {
679 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
682 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
683 " instead of the expected value of $expected_value\n";
685 $para->[1]{'start_line'},
686 "You have '=item " . $para->[1]{'number'} .
687 "' instead of the expected '=item $expected_value'"
689 $para->[1]{'number'} = $expected_value; # correcting!!
693 # For the cases where we /didn't/ push to @$para
694 if($paras->[0][0] eq '~Para') {
695 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
696 push @$para, splice @{shift @$paras},2;
698 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
699 push @$para, ''; # Just so it's not contentless
704 } elsif($over_type eq 'bullet') {
705 my $item_type = $self->_get_item_type($para);
706 # That kills the content of the item if it's a number or bullet.
707 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
709 if($item_type eq 'bullet') {
712 if( $para->[1]{'~_freaky_para_hack'} ) {
713 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
714 push @$para, delete $para->[1]{'~_freaky_para_hack'};
717 } elsif($item_type eq 'number') {
719 $para->[1]{'start_line'},
722 push @$para, $para->[1]{'~orig_content'};
723 # and block assimilation of the next paragraph
724 delete $para->[1]{'number'};
725 # Only a PROPER item-number element is allowed
726 # to have a number attribute.
727 } elsif($item_type eq 'text') {
729 $para->[1]{'start_line'},
732 # But doesn't need processing. But it'll block assimilation
735 die "Unhandled item type $item_type"; # should never happen
739 # For the cases where we /didn't/ push to @$para
740 if($paras->[0][0] eq '~Para') {
741 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
742 push @$para, splice @{shift @$paras},2;
744 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
745 push @$para, ''; # Just so it's not contentless
750 die "Unhandled =over type \"$over_type\"?";
754 $para_type = 'Plain';
755 $para->[0] .= '-' . $over_type;
756 # Whew. Now fall thru and process it.
759 } elsif($para_type eq '=extend') {
760 # Well, might as well implement it here.
761 $self->_ponder_extend($para);
763 } elsif($para_type eq '=encoding') {
764 # Not actually acted on here, but we catch errors here.
765 $self->_handle_encoding_second_level($para);
768 } elsif($para_type eq '~Verbatim') {
769 $para->[0] = 'Verbatim';
770 $para_type = '?Verbatim';
771 } elsif($para_type eq '~Para') {
773 $para_type = '?Plain';
774 } elsif($para_type eq 'Data') {
776 $para_type = '?Data';
777 } elsif( $para_type =~ s/^=//s
778 and defined( $para_type = $self->{'accept_directives'}{$para_type} )
780 DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
782 # An unknown directive!
783 DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
784 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
787 $para->[1]{'start_line'},
788 "Unknown directive: $para->[0]"
791 # And maybe treat it as text instead of just letting it go?
795 if($para_type =~ s/^\?//s) {
796 if(! @$curr_open) { # usual case
797 DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
799 my @fors = grep $_->[0] eq '=for', @$curr_open;
800 DEBUG > 1 and print "Containing fors: ",
801 join(',', map $_->[1]{'target'}, @fors), "\n";
804 DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
806 #} elsif(grep $_->[1]{'~resolve'}, @fors) {
807 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
808 } elsif( $fors[-1][1]{'~resolve'} ) {
809 # Look to the immediately containing for
811 if($para_type eq 'Data') {
812 DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
814 $para_type = 'Plain';
816 DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
819 DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
820 $para->[0] = $para_type = 'Data';
825 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
826 if($para_type eq 'Plain') {
827 $self->_ponder_Plain($para);
828 } elsif($para_type eq 'Verbatim') {
829 $self->_ponder_Verbatim($para);
830 } elsif($para_type eq 'Data') {
831 $self->_ponder_Data($para);
833 die "\$para type is $para_type -- how did that happen?";
837 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
838 $para->[0] =~ s/^[~=]//s;
840 DEBUG and print "\n", pretty($para), "\n";
842 # traverse the treelet (which might well be just one string scalar)
843 $self->{'content_seen'} ||= 1;
844 $self->_traverse_treelet_bit(@$para);
851 ###########################################################################
852 # The sub-ponderers...
857 my ($self,$para,$curr_open,$paras) = @_;
859 # Fake it out as a begin/end
862 if(grep $_->[1]{'~ignore'}, @$curr_open) {
863 DEBUG > 1 and print "Ignoring ignorable =for\n";
867 for(my $i = 2; $i < @$para; ++$i) {
868 if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
873 unless(defined $target) {
875 $para->[1]{'start_line'},
876 "=for without a target?"
881 print "Faking out a =for $target as a =begin $target / =end $target\n";
887 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
892 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
901 my ($self,$para,$curr_open,$paras) = @_;
902 my $content = join ' ', splice @$para, 2;
903 $content =~ s/^\s+//s;
904 $content =~ s/\s+$//s;
905 unless(length($content)) {
907 $para->[1]{'start_line'},
908 "=begin without a target?"
910 DEBUG and print "Ignoring targetless =begin\n";
914 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
915 $para->[1]{'title'} = $title if ($title);
916 $para->[1]{'target'} = $target; # without any ':'
917 $content = $target; # strip off the title
919 $content =~ s/^:!/!:/s;
920 my $neg; # whether this is a negation-match
921 $neg = 1 if $content =~ s/^!//s;
922 my $to_resolve; # whether to process formatting codes
923 $to_resolve = 1 if $content =~ s/^://s;
925 my $dont_ignore; # whether this target matches us
927 foreach my $target_name (
928 split(',', $content, -1),
932 print " Considering whether =begin $content matches $target_name\n";
933 next unless $self->{'accept_targets'}{$target_name};
936 print " It DOES match the acceptable target $target_name!\n";
938 if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
940 $para->[1]{'target_matching'} = $target_name;
941 last; # stop looking at other target names
947 delete $para->[1]{'target_matching'};
948 DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
951 $para->[1]{'target_matching'} = '!';
952 DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
956 $para->[0] = '=for'; # Just what we happen to call these, internally
957 $para->[1]{'~really'} ||= '=begin';
958 $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
959 $para->[1]{'~resolve'} = $to_resolve || 0;
961 DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
962 "ignore contents of this region\n";
963 DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
964 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
965 DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
967 push @$curr_open, $para;
968 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
969 DEBUG > 1 and print "Ignoring ignorable =begin\n";
971 $self->{'content_seen'} ||= 1;
972 $self->_handle_element_start((my $scratch='for'), $para->[1]);
979 my ($self,$para,$curr_open,$paras) = @_;
980 my $content = join ' ', splice @$para, 2;
981 $content =~ s/^\s+//s;
982 $content =~ s/\s+$//s;
983 DEBUG and print "Ogling '=end $content' directive\n";
985 unless(length($content)) {
987 $para->[1]{'start_line'},
988 "'=end' without a target?" . (
989 ( @$curr_open and $curr_open->[-1][0] eq '=for' )
990 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
994 DEBUG and print "Ignoring targetless =end\n";
998 unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1000 $para->[1]{'start_line'},
1001 "'=end $content' is invalid. (Stack: "
1002 . $self->_dump_curr_open() . ')'
1004 DEBUG and print "Ignoring mistargetted =end $content\n";
1008 unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1010 $para->[1]{'start_line'},
1011 "=end $content without matching =begin. (Stack: "
1012 . $self->_dump_curr_open() . ')'
1014 DEBUG and print "Ignoring mistargetted =end $content\n";
1018 unless($content eq $curr_open->[-1][1]{'target'}) {
1020 $para->[1]{'start_line'},
1021 "=end $content doesn't match =begin "
1022 . $curr_open->[-1][1]{'target'}
1024 . $self->_dump_curr_open() . ')'
1026 DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1030 # Else it's okay to close...
1031 if(grep $_->[1]{'~ignore'}, @$curr_open) {
1032 DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1033 # And that may be because of this to-be-closed =for region, or some
1034 # other one, but it doesn't matter.
1036 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1039 $self->{'content_seen'} ||= 1;
1040 $self->_handle_element_end( my $scratch = 'for' );
1042 DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1048 sub _ponder_doc_end {
1049 my ($self,$para,$curr_open,$paras) = @_;
1050 if(@$curr_open) { # Deal with things left open
1051 DEBUG and print "Stack is nonempty at end-document: (",
1052 $self->_dump_curr_open(), ")\n";
1054 DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1055 unshift @$paras, $self->_closers_for_all_curr_open;
1056 # Make sure there is exactly one ~end in the parastack, at the end:
1057 @$paras = grep $_->[0] ne '~end', @$paras;
1058 push @$paras, $para, $para;
1059 # We need two -- once for the next cycle where we
1060 # generate errata, and then another to be at the end
1061 # when that loop back around to process the errata.
1065 DEBUG and print "Okay, stack is empty now.\n";
1068 # Try generating errata section, if applicable
1069 unless($self->{'~tried_gen_errata'}) {
1070 $self->{'~tried_gen_errata'} = 1;
1071 my @extras = $self->_gen_errata();
1073 unshift @$paras, @extras;
1074 DEBUG and print "Generated errata... relooping...\n";
1075 return 1; # I.e., loop around again to process these fake-o paragraphs
1079 splice @$paras; # Well, that's that for this paragraph buffer.
1080 DEBUG and print "Throwing end-document event.\n";
1082 $self->_handle_element_end( my $scratch = 'Document' );
1083 return 1; # Hasta la byebye
1087 my ($self,$para,$curr_open,$paras) = @_;
1089 $para->[1]{'start_line'},
1090 "=pod directives shouldn't be over one line long! Ignoring all "
1091 . (@$para - 2) . " lines of content"
1093 # Content is always ignored.
1098 my ($self,$para,$curr_open,$paras) = @_;
1099 return 1 unless @$paras;
1102 if($paras->[0][0] eq '=item') { # most common case
1103 $list_type = $self->_get_initial_item_type($paras->[0]);
1105 } elsif($paras->[0][0] eq '=back') {
1106 # Ignore empty lists. TODO: make this an option?
1110 } elsif($paras->[0][0] eq '~end') {
1112 $para->[1]{'start_line'},
1113 "=over is the last thing in the document?!"
1115 return 1; # But feh, ignore it.
1117 $list_type = 'block';
1119 $para->[1]{'~type'} = $list_type;
1120 push @$curr_open, $para;
1121 # yes, we reuse the paragraph as a stack item
1123 my $content = join ' ', splice @$para, 2;
1125 if($content =~ m/^\s*$/s) {
1126 $para->[1]{'indent'} = 4;
1127 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1129 $para->[1]{'indent'} = $1;
1132 $para->[1]{'start_line'},
1133 "Can't have a 0 in =over $content"
1135 $para->[1]{'indent'} = 4;
1139 $para->[1]{'start_line'},
1140 "=over should be: '=over' or '=over positive_number'"
1142 $para->[1]{'indent'} = 4;
1144 DEBUG > 1 and print "=over found of type $list_type\n";
1146 $self->{'content_seen'} ||= 1;
1147 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1153 my ($self,$para,$curr_open,$paras) = @_;
1154 # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1156 my $content = join ' ', splice @$para, 2;
1157 if($content =~ m/\S/) {
1159 $para->[1]{'start_line'},
1160 "=back doesn't take any parameters, but you said =back $content"
1164 if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1165 DEBUG > 1 and print "=back happily closes matching =over\n";
1166 # Expected case: we're closing the most recently opened thing
1167 #my $over = pop @$curr_open;
1168 $self->{'content_seen'} ||= 1;
1169 $self->_handle_element_end( my $scratch =
1170 'over-' . ( (pop @$curr_open)->[1]{'~type'} )
1173 DEBUG > 1 and print "=back found without a matching =over. Stack: (",
1174 join(', ', map $_->[0], @$curr_open), ").\n";
1176 $para->[1]{'start_line'},
1177 '=back without =over'
1179 return 1; # and ignore it
1184 my ($self,$para,$curr_open,$paras) = @_;
1186 unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
1188 $para->[1]{'start_line'},
1189 "'=item' outside of any '=over'"
1192 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1199 my $over_type = $over->[1]{'~type'};
1203 die "Typeless over in stack, starting at line "
1204 . $over->[1]{'start_line'};
1206 } elsif($over_type eq 'block') {
1207 unless($curr_open->[-1][1]{'~bitched_about'}) {
1208 $curr_open->[-1][1]{'~bitched_about'} = 1;
1210 $curr_open->[-1][1]{'start_line'},
1211 "You can't have =items (as at line "
1212 . $para->[1]{'start_line'}
1213 . ") unless the first thing after the =over is an =item"
1216 # Just turn it into a paragraph and reconsider it
1217 $para->[0] = '~Para';
1218 unshift @$paras, $para;
1221 } elsif($over_type eq 'text') {
1222 my $item_type = $self->_get_item_type($para);
1223 # That kills the content of the item if it's a number or bullet.
1224 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1226 if($item_type eq 'text') {
1227 # Nothing special needs doing for 'text'
1228 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1229 die "Unknown item type $item_type"
1230 unless $item_type eq 'number' or $item_type eq 'bullet';
1231 # Undo our clobbering:
1232 push @$para, $para->[1]{'~orig_content'};
1233 delete $para->[1]{'number'};
1234 # Only a PROPER item-number element is allowed
1235 # to have a number attribute.
1237 die "Unhandled item type $item_type"; # should never happen
1240 # =item-text thingies don't need any assimilation, it seems.
1242 } elsif($over_type eq 'number') {
1243 my $item_type = $self->_get_item_type($para);
1244 # That kills the content of the item if it's a number or bullet.
1245 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1247 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1249 if($item_type eq 'bullet') {
1250 # Hm, it's not numeric. Correct for this.
1251 $para->[1]{'number'} = $expected_value;
1253 $para->[1]{'start_line'},
1254 "Expected '=item $expected_value'"
1256 push @$para, $para->[1]{'~orig_content'};
1257 # restore the bullet, blocking the assimilation of next para
1259 } elsif($item_type eq 'text') {
1260 # Hm, it's not numeric. Correct for this.
1261 $para->[1]{'number'} = $expected_value;
1263 $para->[1]{'start_line'},
1264 "Expected '=item $expected_value'"
1266 # Text content will still be there and will block next ~Para
1268 } elsif($item_type ne 'number') {
1269 die "Unknown item type $item_type"; # should never happen
1271 } elsif($expected_value == $para->[1]{'number'}) {
1272 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1275 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1276 " instead of the expected value of $expected_value\n";
1278 $para->[1]{'start_line'},
1279 "You have '=item " . $para->[1]{'number'} .
1280 "' instead of the expected '=item $expected_value'"
1282 $para->[1]{'number'} = $expected_value; # correcting!!
1286 # For the cases where we /didn't/ push to @$para
1287 if($paras->[0][0] eq '~Para') {
1288 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1289 push @$para, splice @{shift @$paras},2;
1291 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1292 push @$para, ''; # Just so it's not contentless
1297 } elsif($over_type eq 'bullet') {
1298 my $item_type = $self->_get_item_type($para);
1299 # That kills the content of the item if it's a number or bullet.
1300 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1302 if($item_type eq 'bullet') {
1305 if( $para->[1]{'~_freaky_para_hack'} ) {
1306 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1307 push @$para, delete $para->[1]{'~_freaky_para_hack'};
1310 } elsif($item_type eq 'number') {
1312 $para->[1]{'start_line'},
1313 "Expected '=item *'"
1315 push @$para, $para->[1]{'~orig_content'};
1316 # and block assimilation of the next paragraph
1317 delete $para->[1]{'number'};
1318 # Only a PROPER item-number element is allowed
1319 # to have a number attribute.
1320 } elsif($item_type eq 'text') {
1322 $para->[1]{'start_line'},
1323 "Expected '=item *'"
1325 # But doesn't need processing. But it'll block assimilation
1328 die "Unhandled item type $item_type"; # should never happen
1332 # For the cases where we /didn't/ push to @$para
1333 if($paras->[0][0] eq '~Para') {
1334 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1335 push @$para, splice @{shift @$paras},2;
1337 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1338 push @$para, ''; # Just so it's not contentless
1343 die "Unhandled =over type \"$over_type\"?";
1346 $para->[0] .= '-' . $over_type;
1352 my ($self,$para) = @_;
1353 DEBUG and print " giving plain treatment...\n";
1354 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1355 or $para->[1]{'~cooked'}
1358 @{$self->_make_treelet(
1359 join("\n", splice(@$para, 2)),
1360 $para->[1]{'start_line'}
1363 # Empty paragraphs don't need a treelet for any reason I can see.
1364 # And precooked paragraphs already have a treelet.
1368 sub _ponder_Verbatim {
1369 my ($self,$para) = @_;
1370 DEBUG and print " giving verbatim treatment...\n";
1372 $para->[1]{'xml:space'} = 'preserve';
1374 my $indent = $self->strip_verbatim_indent;
1375 if ($indent && ref $indent eq 'CODE') {
1376 my @shifted = (shift @{$para}, shift @{$para});
1377 $indent = $indent->($para);
1378 unshift @{$para}, @shifted;
1381 for(my $i = 2; $i < @$para; $i++) {
1382 foreach my $line ($para->[$i]) { # just for aliasing
1383 # Strip indentation.
1384 $line =~ s/^\E$indent// if $indent
1385 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1387 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1388 # tabs are at every EIGHTH column. For portability, it has to be
1389 # one setting everywhere, and 8th wins.
1390 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1393 # TODO: whinge about (or otherwise treat) unindented or overlong lines
1398 # Now the VerbatimFormatted hoodoo...
1399 if( $self->{'accept_codes'} and
1400 $self->{'accept_codes'}{'VerbatimFormatted'}
1402 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1403 # Kill any number of terminal newlines
1404 $self->_verbatim_format($para);
1405 } elsif ($self->{'codes_in_verbatim'}) {
1407 @{$self->_make_treelet(
1408 join("\n", splice(@$para, 2)),
1409 $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1411 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1413 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1414 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1420 my ($self,$para) = @_;
1421 DEBUG and print " giving data treatment...\n";
1422 $para->[1]{'xml:space'} = 'preserve';
1423 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1430 ###########################################################################
1432 sub _traverse_treelet_bit { # for use only by the routine above
1433 my($self, $name) = splice @_,0,2;
1436 $self->_handle_element_start(($scratch=$name), shift @_);
1438 foreach my $x (@_) {
1440 &_traverse_treelet_bit($self, @$x);
1442 $self->_handle_text($x);
1446 $self->_handle_element_end($scratch=$name);
1450 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1452 sub _closers_for_all_curr_open {
1455 foreach my $still_open (@{ $self->{'curr_open'} || return }) {
1456 my @copy = @$still_open;
1457 $copy[1] = {%{ $copy[1] }};
1458 #$copy[1]{'start_line'} = -1;
1459 if($copy[0] eq '=for') {
1461 } elsif($copy[0] eq '=over') {
1464 die "I don't know how to auto-close an open $copy[0] region";
1467 unless( @copy > 2 ) {
1468 push @copy, $copy[1]{'target'};
1469 $copy[-1] = '' unless defined $copy[-1];
1470 # since =over's don't have targets
1473 DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1474 unshift @closers, \@copy;
1479 #--------------------------------------------------------------------------
1481 sub _verbatim_format {
1486 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1487 DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1489 # Unlike with simple Verbatim blocks, we don't end up just doing
1490 # a join("\n", ...) on the contents, so we have to append a
1491 # newline to ever line, and then nix the last one later.
1496 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1497 print "_verbatim_format $i: $p->[$i]";
1502 for(my $i = $#$p; $i > 2; $i--) {
1503 # work backwards over the lines, except the first (#2)
1505 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1506 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1507 # look at a formatty line preceding a nonformatty one
1508 DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1509 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1510 DEBUG > 5 and print " It's a formatty line. ",
1511 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1513 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1514 DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
1517 DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
1520 DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
1524 # A formatty line has to have #: in the first two columns, and uses
1525 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1527 # What do you want? i like pie. [or whatever]
1528 # #:^^^^^^^^^^^^^^^^^ /////////////
1531 DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1533 $formatting = ' ' . $1;
1534 $formatting =~ s/\s+$//s; # nix trailing whitespace
1535 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1536 splice @$p,$i,1; # remove this line
1537 $i--; # don't consider next line
1541 if( length($formatting) >= length($p->[$i-1]) ) {
1542 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1544 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1546 # Make $formatting and the previous line be exactly the same length,
1547 # with $formatting having a " " as the last character.
1549 DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1553 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1554 #print "Format matches $1\n";
1557 #print "SKIPPING <$2>\n";
1559 substr($p->[$i-1], pos($formatting)-length($1), length($1));
1561 #print "SNARING $+\n";
1566 $5 ? 'VerbatimBI' : die("Should never get called")
1568 substr($p->[$i-1], pos($formatting)-length($1), length($1))
1570 #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1574 splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1575 DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1577 DEBUG > 6 and print "New version of the above line is these tokens (",
1578 scalar(@new_line), "):",
1579 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1580 $i--; # So the next line we scrutinize is the line before the one
1581 # that we just went and formatted
1584 $p->[0] = 'VerbatimFormatted';
1586 # Collapse adjacent text nodes, just for kicks.
1587 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1588 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1589 DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1590 $p->[$i] .= splice @$p, $i+1, 1; # merge
1595 # Now look for the last text token, and remove the terminal newline
1596 for( my $i = $#$p; $i >= 2; $i-- ) {
1597 # work backwards over the tokens, even the first
1598 if( !ref($p->[$i]) ) {
1599 if($p->[$i] =~ s/\n$//s) {
1600 DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1603 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1605 last; # we only want the next one
1613 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1616 sub _treelet_from_formatting_codes {
1617 # Given a paragraph, returns a treelet. Full of scary tokenizing code.
1618 # Like [ '~Top', {'start_line' => $start_line},
1620 # [ 'B', {}, "pie" ],
1624 my($self, $para, $start_line, $preserve_space) = @_;
1626 my $treelet = ['~Top', {'start_line' => $start_line},];
1628 unless ($preserve_space || $self->{'preserve_whitespace'}) {
1629 $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'};
1631 $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1636 # Only apparent problem the above code is that N<< >> turns into
1637 # N<< >>. But then, word wrapping does that too! So don't do that!
1640 my @lineage = ($treelet);
1642 DEBUG > 4 and print "Paragraph:\n$para\n\n";
1644 # Here begins our frightening tokenizer RE. The following regex matches
1645 # text in four main parts:
1647 # * Start-codes. The first alternative matches C< or C<<, the latter
1648 # followed by some whitespace. $1 will hold the entire start code
1649 # (including any space following a multiple-angle-bracket delimiter),
1650 # and $2 will hold only the additional brackets past the first in a
1651 # multiple-bracket delimiter. length($2) + 1 will be the number of
1652 # closing brackets we have to find.
1654 # * Closing brackets. Match some amount of whitespace followed by
1655 # multiple close brackets. The logic to see if this closes anything
1656 # is down below. Note that in order to parse C<< >> correctly, we
1657 # have to use look-behind (?<=\s\s), since the match of the starting
1658 # code will have consumed the whitespace.
1660 # * A single closing bracket, to close a simple code like C<>.
1662 # * Something that isn't a start or end code. We have to be careful
1663 # about accepting whitespace, since perlpodspec says that any whitespace
1664 # before a multiple-bracket closing delimiter should be ignored.
1669 # Match starting codes, including the whitespace following a
1670 # multiple-delimiter start code. $1 gets the whole start code and
1671 # $2 gets all but one of the <s in the multiple-bracket case.
1672 ([A-Z]<(?:(<+)\s+)?)
1674 # Match multiple-bracket end codes. $3 gets the whitespace that
1675 # should be discarded before an end bracket but kept in other cases
1676 # and $4 gets the end brackets themselves.
1677 (\s+|(?<=\s\s))(>{2,})
1679 (\s?>) # $5: simple end-codes
1681 ( # $6: stuff containing no start-codes or end-codes
1689 # whitespace is ok, but we don't want to eat the whitespace before
1690 # a multiple-bracket end code.
1691 # NOTE: we may still have problems with e.g. S<< >>
1700 DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1703 DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1704 # signal that we're looking for simple unless we're in complex.
1706 # We're in complex already. It's just stuff.
1707 DEBUG > 4 and print " It's just stuff.\n";
1708 push @{ $lineage[-1] }, $1;
1710 # length of the necessary complex end-code string
1711 push @stack, length($2) + 1;
1712 push @lineage, [ substr($1,0,1), {}, ]; # new node object
1713 push @{ $lineage[-2] }, $lineage[-1];
1716 DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1718 # We're in complex already. It's just stuff.
1719 DEBUG > 4 and print " It's just stuff.\n";
1720 push @{ $lineage[-1] }, $1;
1722 # signal that we're looking for simple.
1724 push @lineage, [ substr($1,0,1), {}, ]; # new node object
1725 push @{ $lineage[-2] }, $lineage[-1];
1728 } elsif(defined $4) {
1729 DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1730 # This is where it gets messy...
1732 # We saw " >>>>" but needed nothing. This is ALL just stuff then.
1733 DEBUG > 4 and print " But it's really just stuff.\n";
1734 push @{ $lineage[-1] }, $3, $4;
1736 } elsif(!$stack[-1]) {
1737 # We saw " >>>>" but needed only ">". Back pos up.
1738 DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1739 push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1740 pos($para) = pos($para) - length($4) + 1;
1741 } elsif($stack[-1] == length($4)) {
1742 # We found " >>>>", and it was exactly what we needed. Commonest case.
1743 DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1744 } elsif($stack[-1] < length($4)) {
1745 # We saw " >>>>" but needed only " >>". Back pos up.
1746 DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1747 pos($para) = pos($para) - length($4) + $stack[-1];
1749 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1750 DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1751 push @{ $lineage[-1] }, $3, $4;
1754 #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1756 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1757 # Keep the element from being childless
1762 } elsif(defined $5) {
1763 DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
1765 if(@stack and ! $stack[-1]) {
1766 # We're indeed expecting a simple end-code
1767 DEBUG > 4 and print " It's indeed an end-code.\n";
1769 if(length($5) == 2) { # There was a space there: " >"
1770 push @{ $lineage[-1] }, ' ';
1771 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1772 push @{ $lineage[-1] }, ''; # keep it from being really childless
1778 DEBUG > 4 and print " It's just stuff.\n";
1779 push @{ $lineage[-1] }, $5;
1782 } elsif(defined $6) {
1783 DEBUG > 3 and print "Found stuff \"$6\"\n";
1784 push @{ $lineage[-1] }, $6;
1787 # should never ever ever ever happen
1788 DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1789 die "SPORK 512512!";
1793 if(@stack) { # Uhoh, some sequences weren't closed.
1796 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1799 my $code = (pop @lineage)->[0];
1800 my $ender_length = pop @stack;
1803 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1805 $x = $code . "<$x>";
1808 DEBUG > 1 and print "Unterminated $x sequence\n";
1809 $self->whine($start_line,
1810 "Unterminated $x sequence",
1817 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1819 sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1820 return stringify_lol($_[1]);
1823 sub stringify_lol { # function: stringify_lol($lol)
1824 my $string_form = '';
1825 _stringify_lol( $_[0] => \$string_form );
1826 return $string_form;
1829 sub _stringify_lol { # the real recursor
1832 for(my $i = 2; $i < @$lol; ++$i) {
1833 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1834 _stringify_lol( $lol->[$i], $to); # recurse!
1842 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1844 sub _dump_curr_open { # return a string representation of the stack
1845 my $curr_open = $_[0]{'curr_open'};
1847 return '[empty]' unless @$curr_open;
1851 ? ( ($_->[1]{'~really'} || '=over')
1852 . ' ' . $_->[1]{'target'})
1859 ###########################################################################
1861 "\a" => '\a', # ding!
1868 "\n" => '\n', # probably overrides one of either \cm or \cj
1877 sub pretty { # adopted from Class::Classless
1878 # Not the most brilliant routine, but passable.
1879 # Don't give it a cyclic data structure!
1880 my @stuff = @_; # copy
1888 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1889 $x = "[ " . pretty(@$_) . " ]" ;
1891 } elsif(ref($_) eq 'SCALAR') {
1892 $x = "\\" . pretty($$_) ;
1894 } elsif(ref($_) eq 'HASH') {
1896 $x = "{" . join(", ",
1897 map(pretty($_) . '=>' . pretty($hr->{$_}),
1898 sort keys %$hr ) ) . "}" ;
1900 } elsif(!length($_)) { q{''} # empty string
1902 $_ eq '0' # very common case
1904 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1905 and $_ ne '-0' # the strange case that that RE lets thru
1909 if( chr(65) eq 'A' ) {
1910 s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1911 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1912 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1914 # We're in some crazy non-ASCII world!
1915 s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1916 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1917 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1922 # $out =~ s/\n */ /g if length($out) < 75;
1926 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1928 # A rather unsubtle method of blowing away all the state information
1929 # from a parser object so it can be reused. Provided as a utility for
1930 # backward compatibilty in Pod::Man, etc. but not recommended for
1935 foreach (qw(source_dead source_filename doc_has_started
1936 start_of_pod_block content_seen last_was_blank paras curr_open
1937 line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
1944 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@