Commit | Line | Data |
351625bd |
1 | |
2 | package Pod::Simple::BlackBox; |
3 | # |
4 | # "What's in the box?" "Pain." |
5 | # |
6 | ########################################################################### |
7 | # |
8 | # This is where all the scary things happen: parsing lines into |
9 | # paragraphs; and then into directives, verbatims, and then also |
10 | # turning formatting sequences into treelets. |
11 | # |
12 | # Are you really sure you want to read this code? |
13 | # |
14 | #----------------------------------------------------------------------------- |
15 | # |
16 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work |
17 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and |
18 | # to call the proper callbacks on the treelets. |
19 | # |
20 | # Every node in a treelet is a ['name', {attrhash}, ...children...] |
21 | |
22 | use integer; # vroom! |
23 | use strict; |
24 | use Carp (); |
25 | BEGIN { |
26 | require Pod::Simple; |
27 | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG |
28 | } |
29 | |
30 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
31 | |
32 | sub parse_line { shift->parse_lines(@_) } # alias |
33 | |
34 | # - - - Turn back now! Run away! - - - |
35 | |
36 | sub parse_lines { # Usage: $parser->parse_lines(@lines) |
37 | # an undef means end-of-stream |
38 | my $self = shift; |
39 | |
40 | my $code_handler = $self->{'code_handler'}; |
41 | my $cut_handler = $self->{'cut_handler'}; |
42 | $self->{'line_count'} ||= 0; |
43 | |
44 | my $scratch; |
45 | |
46 | DEBUG > 4 and |
47 | print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; |
48 | |
49 | DEBUG > 5 and |
50 | print "# About to parse lines: ", |
51 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; |
52 | |
53 | my $paras = ($self->{'paras'} ||= []); |
54 | # paragraph buffer. Because we need to defer processing of =over |
55 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer |
56 | # to process this. |
57 | |
58 | $self->{'pod_para_count'} ||= 0; |
59 | |
60 | my $line; |
61 | foreach my $source_line (@_) { |
62 | if( $self->{'source_dead'} ) { |
63 | DEBUG > 4 and print "# Source is dead.\n"; |
64 | last; |
65 | } |
66 | |
67 | unless( defined $source_line ) { |
68 | DEBUG > 4 and print "# Undef-line seen.\n"; |
69 | |
70 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; |
71 | push @$paras, $paras->[-1], $paras->[-1]; |
72 | # So that it definitely fills the buffer. |
73 | $self->{'source_dead'} = 1; |
74 | $self->_ponder_paragraph_buffer; |
75 | next; |
76 | } |
77 | |
78 | |
79 | if( $self->{'line_count'}++ ) { |
80 | ($line = $source_line) =~ tr/\n\r//d; |
81 | # If we don't have two vars, we'll end up with that there |
82 | # tr/// modding the (potentially read-only) original source line! |
83 | |
84 | } else { |
85 | DEBUG > 2 and print "First line: [$source_line]\n"; |
86 | |
87 | if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { |
88 | DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n"; |
89 | $self->_handle_encoding_line( "=encode utf8" ); |
90 | $line =~ tr/\n\r//d; |
91 | |
92 | } elsif( $line =~ s/^\xFE\xFF//s ) { |
93 | DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; |
94 | $self->scream( |
95 | $self->{'line_count'}, |
96 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." |
97 | ); |
98 | splice @_; |
99 | push @_, undef; |
100 | next; |
101 | |
102 | # TODO: implement somehow? |
103 | |
104 | } elsif( $line =~ s/^\xFF\xFE//s ) { |
105 | DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; |
106 | $self->scream( |
107 | $self->{'line_count'}, |
108 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." |
109 | ); |
110 | splice @_; |
111 | push @_, undef; |
112 | next; |
113 | |
114 | # TODO: implement somehow? |
115 | |
116 | } else { |
117 | DEBUG > 2 and print "First line is BOM-less.\n"; |
118 | ($line = $source_line) =~ tr/\n\r//d; |
119 | } |
120 | } |
121 | |
122 | |
123 | DEBUG > 5 and print "# Parsing line: [$line]\n"; |
124 | |
125 | if(!$self->{'in_pod'}) { |
126 | if($line =~ m/^=([a-zA-Z]+)/s) { |
127 | if($1 eq 'cut') { |
128 | $self->scream( |
129 | $self->{'line_count'}, |
130 | "=cut found outside a pod block. Skipping to next block." |
131 | ); |
132 | |
133 | ## Before there were errata sections in the world, it was |
134 | ## least-pessimal to abort processing the file. But now we can |
135 | ## just barrel on thru (but still not start a pod block). |
136 | #splice @_; |
137 | #push @_, undef; |
138 | |
139 | next; |
140 | } else { |
141 | $self->{'in_pod'} = $self->{'start_of_pod_block'} |
142 | = $self->{'last_was_blank'} = 1; |
143 | # And fall thru to the pod-mode block further down |
144 | } |
145 | } else { |
146 | DEBUG > 5 and print "# It's a code-line.\n"; |
147 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) |
148 | if $code_handler; |
149 | # Note: this may cause code to be processed out of order relative |
150 | # to pods, but in order relative to cuts. |
151 | |
152 | # Note also that we haven't yet applied the transcoding to $line |
153 | # by time we call $code_handler! |
154 | |
155 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { |
156 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", |
157 | #$fname = $2 if defined $2; |
158 | #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; |
159 | DEBUG > 1 and print "# Setting nextline to $1\n"; |
160 | $self->{'line_count'} = $1 - 1; |
161 | } |
162 | |
163 | next; |
164 | } |
165 | } |
166 | |
167 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . |
168 | # Else we're in pod mode: |
169 | |
170 | # Apply any necessary transcoding: |
171 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); |
172 | |
173 | # HERE WE CATCH =encoding EARLY! |
174 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { |
175 | $line = $self->_handle_encoding_line( $line ); |
176 | } |
177 | |
178 | if($line =~ m/^=cut/s) { |
179 | # here ends the pod block, and therefore the previous pod para |
180 | DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; |
181 | $self->{'in_pod'} = 0; |
182 | # ++$self->{'pod_para_count'}; |
183 | $self->_ponder_paragraph_buffer(); |
184 | # by now it's safe to consider the previous paragraph as done. |
185 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) |
186 | if $cut_handler; |
187 | |
188 | # TODO: add to docs: Note: this may cause cuts to be processed out |
189 | # of order relative to pods, but in order relative to code. |
190 | |
191 | } elsif($line =~ m/^\s*$/s) { # it's a blank line |
192 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { |
193 | DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; |
194 | push @{$paras->[-1]}, $line; |
195 | } # otherwise it's not interesting |
196 | |
197 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { |
198 | DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; |
199 | } |
200 | |
201 | $self->{'last_was_blank'} = 1; |
202 | |
203 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... |
204 | |
205 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { |
206 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS |
207 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; |
208 | # Note that in "=head1 foo", the WS is lost. |
209 | # Example: ['=head1', {'start_line' => 123}, ' foo'] |
210 | |
211 | ++$self->{'pod_para_count'}; |
212 | |
213 | $self->_ponder_paragraph_buffer(); |
214 | # by now it's safe to consider the previous paragraph as done. |
215 | |
216 | push @$paras, $new; # the new incipient paragraph |
217 | DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; |
218 | |
219 | } elsif($line =~ m/^\s/s) { |
220 | |
221 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { |
222 | DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; |
223 | push @{$paras->[-1]}, $line; |
224 | } else { |
225 | ++$self->{'pod_para_count'}; |
226 | $self->_ponder_paragraph_buffer(); |
227 | # by now it's safe to consider the previous paragraph as done. |
228 | DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; |
229 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; |
230 | } |
231 | } else { |
232 | ++$self->{'pod_para_count'}; |
233 | $self->_ponder_paragraph_buffer(); |
234 | # by now it's safe to consider the previous paragraph as done. |
235 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; |
236 | DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; |
237 | } |
238 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; |
239 | |
240 | } else { |
241 | # It's a non-blank line /continuing/ the current para |
242 | if(@$paras) { |
243 | DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; |
244 | push @{$paras->[-1]}, $line; |
245 | } else { |
246 | # Unexpected case! |
247 | die "Continuing a paragraph but \@\$paras is empty?"; |
248 | } |
249 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; |
250 | } |
251 | |
252 | } # ends the big while loop |
253 | |
254 | DEBUG > 1 and print(pretty(@$paras), "\n"); |
255 | return $self; |
256 | } |
257 | |
258 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
259 | |
260 | sub _handle_encoding_line { |
261 | my($self, $line) = @_; |
262 | |
263 | # The point of this routine is to set $self->{'_transcoder'} as indicated. |
264 | |
265 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; |
266 | DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; |
267 | |
268 | my $e = $1; |
269 | my $orig = $e; |
270 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; |
271 | |
272 | my $enc_error; |
273 | |
274 | # Cf. perldoc Encode and perldoc Encode::Supported |
275 | |
276 | require Pod::Simple::Transcode; |
277 | |
278 | if( $self->{'encoding'} ) { |
279 | my $norm_current = $self->{'encoding'}; |
280 | my $norm_e = $e; |
281 | foreach my $that ($norm_current, $norm_e) { |
282 | $that = lc($that); |
283 | $that =~ s/[-_]//g; |
284 | } |
285 | if($norm_current eq $norm_e) { |
286 | DEBUG > 1 and print "The '=encoding $orig' line is ", |
287 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; |
288 | $enc_error = ''; |
289 | # But that doesn't necessarily mean that the earlier one went okay |
290 | } else { |
291 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; |
292 | DEBUG > 1 and print $enc_error; |
293 | } |
294 | } elsif ( |
295 | # OK, let's turn on the encoding |
296 | do { |
297 | DEBUG > 1 and print " Setting encoding to $e\n"; |
298 | $self->{'encoding'} = $e; |
299 | 1; |
300 | } |
301 | and $e eq 'HACKRAW' |
302 | ) { |
303 | DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; |
304 | |
305 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { |
306 | |
307 | die($enc_error = "WHAT? _transcoder is already set?!") |
308 | if $self->{'_transcoder'}; # should never happen |
309 | require Pod::Simple::Transcode; |
310 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); |
311 | eval { |
312 | my @x = ('', "abc", "123"); |
313 | $self->{'_transcoder'}->(@x); |
314 | }; |
315 | $@ && die( $enc_error = |
316 | "Really unexpected error setting up encoding $e: $@\nAborting" |
317 | ); |
318 | |
319 | } else { |
320 | my @supported = Pod::Simple::Transcode::->all_encodings; |
321 | |
322 | # Note unsupported, and complain |
323 | DEBUG and print " Encoding [$e] is unsupported.", |
324 | "\nSupporteds: @supported\n"; |
325 | my $suggestion = ''; |
326 | |
327 | # Look for a near match: |
328 | my $norm = lc($e); |
329 | $norm =~ tr[-_][]d; |
330 | my $n; |
331 | foreach my $enc (@supported) { |
332 | $n = lc($enc); |
333 | $n =~ tr[-_][]d; |
334 | next unless $n eq $norm; |
335 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; |
336 | last; |
337 | } |
338 | my $encmodver = Pod::Simple::Transcode::->encmodver; |
339 | $enc_error = join '' => |
340 | "This document probably does not appear as it should, because its ", |
341 | "\"=encoding $e\" line calls for an unsupported encoding.", |
342 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" |
343 | ; |
344 | |
345 | $self->scream( $self->{'line_count'}, $enc_error ); |
346 | } |
347 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; |
348 | |
349 | return '=encoding ALREADYDONE'; |
350 | } |
351 | |
352 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
353 | |
354 | sub _handle_encoding_second_level { |
355 | # By time this is called, the encoding (if well formed) will already |
356 | # have been acted one. |
357 | my($self, $para) = @_; |
358 | my @x = @$para; |
359 | my $content = join ' ', splice @x, 2; |
360 | $content =~ s/^\s+//s; |
361 | $content =~ s/\s+$//s; |
362 | |
363 | DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; |
364 | |
365 | if($content eq 'ALREADYDONE') { |
366 | # It's already been handled. Check for errors. |
367 | if(! $self->{'encoding_command_statuses'} ) { |
368 | DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; |
369 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { |
370 | $self->whine( $para->[1]{'start_line'}, |
371 | sprintf "Couldn't do %s: %s", |
372 | $self->{'encoding_command_reqs' }[-1], |
373 | $self->{'encoding_command_statuses'}[-1], |
374 | ); |
375 | } else { |
376 | DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; |
377 | } |
378 | |
379 | } else { |
380 | # Otherwise it's a syntax error |
381 | $self->whine( $para->[1]{'start_line'}, |
382 | "Invalid =encoding syntax: $content" |
383 | ); |
384 | } |
385 | |
386 | return; |
387 | } |
388 | |
389 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` |
390 | |
391 | { |
392 | my $m = -321; # magic line number |
393 | |
394 | sub _gen_errata { |
395 | my $self = $_[0]; |
396 | # Return 0 or more fake-o paragraphs explaining the accumulated |
397 | # errors on this document. |
398 | |
399 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; |
400 | |
401 | my @out; |
402 | |
403 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { |
404 | push @out, |
405 | ['=item', {'start_line' => $m}, "Around line $line:"], |
406 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, |
407 | #['~Top', {'start_line' => $m}, |
408 | $_ |
409 | #] |
410 | ], |
411 | @{$self->{'errata'}{$line}} |
412 | ) |
413 | ; |
414 | } |
415 | |
416 | # TODO: report of unknown entities? unrenderable characters? |
417 | |
418 | unshift @out, |
419 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], |
420 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, |
421 | "Hey! ", |
422 | ['B', {}, |
423 | 'The above document had some coding errors, which are explained below:' |
424 | ] |
425 | ], |
426 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], |
427 | ; |
428 | |
429 | push @out, |
430 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], |
431 | ; |
432 | |
433 | DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; |
434 | |
435 | return @out; |
436 | } |
437 | |
438 | } |
439 | |
440 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
441 | |
442 | ############################################################################## |
443 | ## |
444 | ## stop reading now stop reading now stop reading now stop reading now stop |
445 | ## |
446 | ## HERE IT BECOMES REALLY SCARY |
447 | ## |
448 | ## stop reading now stop reading now stop reading now stop reading now stop |
449 | ## |
450 | ############################################################################## |
451 | |
452 | sub _ponder_paragraph_buffer { |
453 | |
454 | # Para-token types as found in the buffer. |
455 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, |
456 | # =over, =back, =item |
457 | # and the null =pod (to be complained about if over one line) |
458 | # |
459 | # "~data" paragraphs are something we generate at this level, depending on |
460 | # a currently open =over region |
461 | |
462 | # Events fired: Begin and end for: |
463 | # directivename (like head1 .. head4), item, extend, |
464 | # for (from =begin...=end, =for), |
465 | # over-bullet, over-number, over-text, over-block, |
466 | # item-bullet, item-number, item-text, |
467 | # Document, |
468 | # Data, Para, Verbatim |
469 | # B, C, longdirname (TODO -- wha?), etc. for all directives |
470 | # |
471 | |
472 | my $self = $_[0]; |
473 | my $paras; |
474 | return unless @{$paras = $self->{'paras'}}; |
475 | my $curr_open = ($self->{'curr_open'} ||= []); |
476 | |
477 | my $scratch; |
478 | |
479 | DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; |
480 | |
481 | # We have something in our buffer. So apparently the document has started. |
482 | unless($self->{'doc_has_started'}) { |
483 | $self->{'doc_has_started'} = 1; |
484 | |
485 | my $starting_contentless; |
486 | $starting_contentless = |
487 | ( |
488 | !@$curr_open |
489 | and @$paras and ! grep $_->[0] ne '~end', @$paras |
490 | # i.e., if the paras is all ~ends |
491 | ) |
492 | ; |
493 | DEBUG and print "# Starting ", |
494 | $starting_contentless ? 'contentless' : 'contentful', |
495 | " document\n" |
496 | ; |
497 | |
498 | $self->_handle_element_start( |
499 | ($scratch = 'Document'), |
500 | { |
501 | 'start_line' => $paras->[0][1]{'start_line'}, |
502 | $starting_contentless ? ( 'contentless' => 1 ) : (), |
503 | }, |
504 | ); |
505 | } |
506 | |
507 | my($para, $para_type); |
508 | while(@$paras) { |
509 | last if @$paras == 1 and |
510 | ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' |
511 | or $paras->[0][0] eq '=item' ) |
512 | ; |
513 | # Those're the three kinds of paragraphs that require lookahead. |
514 | # Actually, an "=item Foo" inside an <over type=text> region |
515 | # and any =item inside an <over type=block> region (rare) |
516 | # don't require any lookahead, but all others (bullets |
517 | # and numbers) do. |
518 | |
519 | # TODO: winge about many kinds of directives in non-resolving =for regions? |
520 | # TODO: many? like what? =head1 etc? |
521 | |
522 | $para = shift @$paras; |
523 | $para_type = $para->[0]; |
524 | |
525 | DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", |
526 | $self->_dump_curr_open(), ")\n"; |
527 | |
528 | if($para_type eq '=for') { |
529 | next if $self->_ponder_for($para,$curr_open,$paras); |
530 | |
531 | } elsif($para_type eq '=begin') { |
532 | next if $self->_ponder_begin($para,$curr_open,$paras); |
533 | |
534 | } elsif($para_type eq '=end') { |
535 | next if $self->_ponder_end($para,$curr_open,$paras); |
536 | |
537 | } elsif($para_type eq '~end') { # The virtual end-document signal |
538 | next if $self->_ponder_doc_end($para,$curr_open,$paras); |
539 | } |
540 | |
541 | |
542 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
543 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
544 | if(grep $_->[1]{'~ignore'}, @$curr_open) { |
545 | DEBUG > 1 and |
546 | print "Skipping $para_type paragraph because in ignore mode.\n"; |
547 | next; |
548 | } |
549 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
550 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
551 | |
552 | if($para_type eq '=pod') { |
553 | $self->_ponder_pod($para,$curr_open,$paras); |
554 | |
555 | } elsif($para_type eq '=over') { |
556 | next if $self->_ponder_over($para,$curr_open,$paras); |
557 | |
558 | } elsif($para_type eq '=back') { |
559 | next if $self->_ponder_back($para,$curr_open,$paras); |
560 | |
561 | } else { |
562 | |
563 | # All non-magical codes!!! |
564 | |
565 | # Here we start using $para_type for our own twisted purposes, to |
566 | # mean how it should get treated, not as what the element name |
567 | # should be. |
568 | |
569 | DEBUG > 1 and print "Pondering non-magical $para_type\n"; |
570 | |
571 | my $i; |
572 | |
573 | # Enforce some =headN discipline |
574 | if($para_type =~ m/^=head\d$/s |
575 | and ! $self->{'accept_heads_anywhere'} |
576 | and @$curr_open |
577 | and $curr_open->[-1][0] eq '=over' |
578 | ) { |
579 | DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; |
580 | $self->whine( |
581 | $para->[1]{'start_line'}, |
582 | "You forgot a '=back' before '$para_type'" |
583 | ); |
584 | unshift @$paras, ['=back', {}, ''], $para; # close the =over |
585 | next; |
586 | } |
587 | |
588 | |
589 | if($para_type eq '=item') { |
590 | |
591 | my $over; |
592 | unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { |
593 | $self->whine( |
594 | $para->[1]{'start_line'}, |
595 | "'=item' outside of any '=over'" |
596 | ); |
597 | unshift @$paras, |
598 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], |
599 | $para |
600 | ; |
601 | next; |
602 | } |
603 | |
604 | |
605 | my $over_type = $over->[1]{'~type'}; |
606 | |
607 | if(!$over_type) { |
608 | # Shouldn't happen1 |
609 | die "Typeless over in stack, starting at line " |
610 | . $over->[1]{'start_line'}; |
611 | |
612 | } elsif($over_type eq 'block') { |
613 | unless($curr_open->[-1][1]{'~bitched_about'}) { |
614 | $curr_open->[-1][1]{'~bitched_about'} = 1; |
615 | $self->whine( |
616 | $curr_open->[-1][1]{'start_line'}, |
617 | "You can't have =items (as at line " |
618 | . $para->[1]{'start_line'} |
619 | . ") unless the first thing after the =over is an =item" |
620 | ); |
621 | } |
622 | # Just turn it into a paragraph and reconsider it |
623 | $para->[0] = '~Para'; |
624 | unshift @$paras, $para; |
625 | next; |
626 | |
627 | } elsif($over_type eq 'text') { |
628 | my $item_type = $self->_get_item_type($para); |
629 | # That kills the content of the item if it's a number or bullet. |
630 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; |
631 | |
632 | if($item_type eq 'text') { |
633 | # Nothing special needs doing for 'text' |
634 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { |
635 | die "Unknown item type $item_type" |
636 | unless $item_type eq 'number' or $item_type eq 'bullet'; |
637 | # Undo our clobbering: |
638 | push @$para, $para->[1]{'~orig_content'}; |
639 | delete $para->[1]{'number'}; |
640 | # Only a PROPER item-number element is allowed |
641 | # to have a number attribute. |
642 | } else { |
643 | die "Unhandled item type $item_type"; # should never happen |
644 | } |
645 | |
646 | # =item-text thingies don't need any assimilation, it seems. |
647 | |
648 | } elsif($over_type eq 'number') { |
649 | my $item_type = $self->_get_item_type($para); |
650 | # That kills the content of the item if it's a number or bullet. |
651 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; |
652 | |
653 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; |
654 | |
655 | if($item_type eq 'bullet') { |
656 | # Hm, it's not numeric. Correct for this. |
657 | $para->[1]{'number'} = $expected_value; |
658 | $self->whine( |
659 | $para->[1]{'start_line'}, |
660 | "Expected '=item $expected_value'" |
661 | ); |
662 | push @$para, $para->[1]{'~orig_content'}; |
663 | # restore the bullet, blocking the assimilation of next para |
664 | |
665 | } elsif($item_type eq 'text') { |
666 | # Hm, it's not numeric. Correct for this. |
667 | $para->[1]{'number'} = $expected_value; |
668 | $self->whine( |
669 | $para->[1]{'start_line'}, |
670 | "Expected '=item $expected_value'" |
671 | ); |
672 | # Text content will still be there and will block next ~Para |
673 | |
674 | } elsif($item_type ne 'number') { |
675 | die "Unknown item type $item_type"; # should never happen |
676 | |
677 | } elsif($expected_value == $para->[1]{'number'}) { |
678 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; |
679 | |
680 | } else { |
681 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, |
682 | " instead of the expected value of $expected_value\n"; |
683 | $self->whine( |
684 | $para->[1]{'start_line'}, |
685 | "You have '=item " . $para->[1]{'number'} . |
686 | "' instead of the expected '=item $expected_value'" |
687 | ); |
688 | $para->[1]{'number'} = $expected_value; # correcting!! |
689 | } |
690 | |
691 | if(@$para == 2) { |
692 | # For the cases where we /didn't/ push to @$para |
693 | if($paras->[0][0] eq '~Para') { |
694 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; |
695 | push @$para, splice @{shift @$paras},2; |
696 | } else { |
697 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; |
698 | push @$para, ''; # Just so it's not contentless |
699 | } |
700 | } |
701 | |
702 | |
703 | } elsif($over_type eq 'bullet') { |
704 | my $item_type = $self->_get_item_type($para); |
705 | # That kills the content of the item if it's a number or bullet. |
706 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; |
707 | |
708 | if($item_type eq 'bullet') { |
709 | # as expected! |
710 | |
711 | if( $para->[1]{'~_freaky_para_hack'} ) { |
712 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; |
713 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; |
714 | } |
715 | |
716 | } elsif($item_type eq 'number') { |
717 | $self->whine( |
718 | $para->[1]{'start_line'}, |
719 | "Expected '=item *'" |
720 | ); |
721 | push @$para, $para->[1]{'~orig_content'}; |
722 | # and block assimilation of the next paragraph |
723 | delete $para->[1]{'number'}; |
724 | # Only a PROPER item-number element is allowed |
725 | # to have a number attribute. |
726 | } elsif($item_type eq 'text') { |
727 | $self->whine( |
728 | $para->[1]{'start_line'}, |
729 | "Expected '=item *'" |
730 | ); |
731 | # But doesn't need processing. But it'll block assimilation |
732 | # of the next para. |
733 | } else { |
734 | die "Unhandled item type $item_type"; # should never happen |
735 | } |
736 | |
737 | if(@$para == 2) { |
738 | # For the cases where we /didn't/ push to @$para |
739 | if($paras->[0][0] eq '~Para') { |
740 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; |
741 | push @$para, splice @{shift @$paras},2; |
742 | } else { |
743 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; |
744 | push @$para, ''; # Just so it's not contentless |
745 | } |
746 | } |
747 | |
748 | } else { |
749 | die "Unhandled =over type \"$over_type\"?"; |
750 | # Shouldn't happen! |
751 | } |
752 | |
753 | $para_type = 'Plain'; |
754 | $para->[0] .= '-' . $over_type; |
755 | # Whew. Now fall thru and process it. |
756 | |
757 | |
758 | } elsif($para_type eq '=extend') { |
759 | # Well, might as well implement it here. |
760 | $self->_ponder_extend($para); |
761 | next; # and skip |
762 | } elsif($para_type eq '=encoding') { |
763 | # Not actually acted on here, but we catch errors here. |
764 | $self->_handle_encoding_second_level($para); |
765 | |
766 | next; # and skip |
767 | } elsif($para_type eq '~Verbatim') { |
768 | $para->[0] = 'Verbatim'; |
769 | $para_type = '?Verbatim'; |
770 | } elsif($para_type eq '~Para') { |
771 | $para->[0] = 'Para'; |
772 | $para_type = '?Plain'; |
773 | } elsif($para_type eq 'Data') { |
774 | $para->[0] = 'Data'; |
775 | $para_type = '?Data'; |
776 | } elsif( $para_type =~ s/^=//s |
777 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) |
778 | ) { |
779 | DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; |
780 | } else { |
781 | # An unknown directive! |
782 | DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", |
783 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) |
784 | ; |
785 | $self->whine( |
786 | $para->[1]{'start_line'}, |
787 | "Unknown directive: $para->[0]" |
788 | ); |
789 | |
790 | # And maybe treat it as text instead of just letting it go? |
791 | next; |
792 | } |
793 | |
794 | if($para_type =~ s/^\?//s) { |
795 | if(! @$curr_open) { # usual case |
796 | DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; |
797 | } else { |
798 | my @fors = grep $_->[0] eq '=for', @$curr_open; |
799 | DEBUG > 1 and print "Containing fors: ", |
800 | join(',', map $_->[1]{'target'}, @fors), "\n"; |
801 | |
802 | if(! @fors) { |
803 | DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; |
804 | |
805 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { |
806 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { |
807 | } elsif( $fors[-1][1]{'~resolve'} ) { |
808 | # Look to the immediately containing for |
809 | |
810 | if($para_type eq 'Data') { |
811 | DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; |
812 | $para->[0] = 'Para'; |
813 | $para_type = 'Plain'; |
814 | } else { |
815 | DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; |
816 | } |
817 | } else { |
818 | DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; |
819 | $para->[0] = $para_type = 'Data'; |
820 | } |
821 | } |
822 | } |
823 | |
824 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
825 | if($para_type eq 'Plain') { |
826 | $self->_ponder_Plain($para); |
827 | } elsif($para_type eq 'Verbatim') { |
828 | $self->_ponder_Verbatim($para); |
829 | } elsif($para_type eq 'Data') { |
830 | $self->_ponder_Data($para); |
831 | } else { |
832 | die "\$para type is $para_type -- how did that happen?"; |
833 | # Shouldn't happen. |
834 | } |
835 | |
836 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
837 | $para->[0] =~ s/^[~=]//s; |
838 | |
839 | DEBUG and print "\n", pretty($para), "\n"; |
840 | |
841 | # traverse the treelet (which might well be just one string scalar) |
842 | $self->{'content_seen'} ||= 1; |
843 | $self->_traverse_treelet_bit(@$para); |
844 | } |
845 | } |
846 | |
847 | return; |
848 | } |
849 | |
850 | ########################################################################### |
851 | # The sub-ponderers... |
852 | |
853 | |
854 | |
855 | sub _ponder_for { |
856 | my ($self,$para,$curr_open,$paras) = @_; |
857 | |
858 | # Fake it out as a begin/end |
859 | my $target; |
860 | |
861 | if(grep $_->[1]{'~ignore'}, @$curr_open) { |
862 | DEBUG > 1 and print "Ignoring ignorable =for\n"; |
863 | return 1; |
864 | } |
865 | |
866 | for(my $i = 2; $i < @$para; ++$i) { |
867 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { |
868 | $target = $1; |
869 | last; |
870 | } |
871 | } |
872 | unless(defined $target) { |
873 | $self->whine( |
874 | $para->[1]{'start_line'}, |
875 | "=for without a target?" |
876 | ); |
877 | return 1; |
878 | } |
879 | DEBUG > 1 and |
880 | print "Faking out a =for $target as a =begin $target / =end $target\n"; |
881 | |
882 | $para->[0] = 'Data'; |
883 | |
884 | unshift @$paras, |
885 | ['=begin', |
886 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, |
887 | $target, |
888 | ], |
889 | $para, |
890 | ['=end', |
891 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, |
892 | $target, |
893 | ], |
894 | ; |
895 | |
896 | return 1; |
897 | } |
898 | |
899 | sub _ponder_begin { |
900 | my ($self,$para,$curr_open,$paras) = @_; |
901 | my $content = join ' ', splice @$para, 2; |
902 | $content =~ s/^\s+//s; |
903 | $content =~ s/\s+$//s; |
904 | unless(length($content)) { |
905 | $self->whine( |
906 | $para->[1]{'start_line'}, |
907 | "=begin without a target?" |
908 | ); |
909 | DEBUG and print "Ignoring targetless =begin\n"; |
910 | return 1; |
911 | } |
912 | |
69473a20 |
913 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; |
914 | $para->[1]{'title'} = $title if ($title); |
915 | $para->[1]{'target'} = $target; # without any ':' |
916 | $content = $target; # strip off the title |
351625bd |
917 | |
918 | $content =~ s/^:!/!:/s; |
919 | my $neg; # whether this is a negation-match |
920 | $neg = 1 if $content =~ s/^!//s; |
921 | my $to_resolve; # whether to process formatting codes |
922 | $to_resolve = 1 if $content =~ s/^://s; |
923 | |
924 | my $dont_ignore; # whether this target matches us |
925 | |
926 | foreach my $target_name ( |
927 | split(',', $content, -1), |
928 | $neg ? () : '*' |
929 | ) { |
930 | DEBUG > 2 and |
931 | print " Considering whether =begin $content matches $target_name\n"; |
932 | next unless $self->{'accept_targets'}{$target_name}; |
933 | |
934 | DEBUG > 2 and |
935 | print " It DOES match the acceptable target $target_name!\n"; |
936 | $to_resolve = 1 |
937 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; |
938 | $dont_ignore = 1; |
939 | $para->[1]{'target_matching'} = $target_name; |
940 | last; # stop looking at other target names |
941 | } |
942 | |
943 | if($neg) { |
944 | if( $dont_ignore ) { |
945 | $dont_ignore = ''; |
946 | delete $para->[1]{'target_matching'}; |
947 | DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; |
948 | } else { |
949 | $dont_ignore = 1; |
950 | $para->[1]{'target_matching'} = '!'; |
951 | DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; |
952 | } |
953 | } |
954 | |
955 | $para->[0] = '=for'; # Just what we happen to call these, internally |
956 | $para->[1]{'~really'} ||= '=begin'; |
957 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; |
958 | $para->[1]{'~resolve'} = $to_resolve || 0; |
959 | |
960 | DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', |
961 | "ignore contents of this region\n"; |
962 | DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", |
963 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; |
964 | DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; |
965 | |
966 | push @$curr_open, $para; |
967 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { |
968 | DEBUG > 1 and print "Ignoring ignorable =begin\n"; |
969 | } else { |
970 | $self->{'content_seen'} ||= 1; |
971 | $self->_handle_element_start((my $scratch='for'), $para->[1]); |
972 | } |
973 | |
974 | return 1; |
975 | } |
976 | |
977 | sub _ponder_end { |
978 | my ($self,$para,$curr_open,$paras) = @_; |
979 | my $content = join ' ', splice @$para, 2; |
980 | $content =~ s/^\s+//s; |
981 | $content =~ s/\s+$//s; |
982 | DEBUG and print "Ogling '=end $content' directive\n"; |
983 | |
984 | unless(length($content)) { |
985 | $self->whine( |
986 | $para->[1]{'start_line'}, |
987 | "'=end' without a target?" . ( |
988 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) |
989 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) |
990 | : '' |
991 | ) |
992 | ); |
993 | DEBUG and print "Ignoring targetless =end\n"; |
994 | return 1; |
995 | } |
996 | |
997 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word |
998 | $self->whine( |
999 | $para->[1]{'start_line'}, |
1000 | "'=end $content' is invalid. (Stack: " |
1001 | . $self->_dump_curr_open() . ')' |
1002 | ); |
1003 | DEBUG and print "Ignoring mistargetted =end $content\n"; |
1004 | return 1; |
1005 | } |
1006 | |
1007 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { |
1008 | $self->whine( |
1009 | $para->[1]{'start_line'}, |
1010 | "=end $content without matching =begin. (Stack: " |
1011 | . $self->_dump_curr_open() . ')' |
1012 | ); |
1013 | DEBUG and print "Ignoring mistargetted =end $content\n"; |
1014 | return 1; |
1015 | } |
1016 | |
1017 | unless($content eq $curr_open->[-1][1]{'target'}) { |
1018 | $self->whine( |
1019 | $para->[1]{'start_line'}, |
1020 | "=end $content doesn't match =begin " |
1021 | . $curr_open->[-1][1]{'target'} |
1022 | . ". (Stack: " |
1023 | . $self->_dump_curr_open() . ')' |
1024 | ); |
1025 | DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; |
1026 | return 1; |
1027 | } |
1028 | |
1029 | # Else it's okay to close... |
1030 | if(grep $_->[1]{'~ignore'}, @$curr_open) { |
1031 | DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; |
1032 | # And that may be because of this to-be-closed =for region, or some |
1033 | # other one, but it doesn't matter. |
1034 | } else { |
1035 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; |
1036 | # what's that for? |
1037 | |
1038 | $self->{'content_seen'} ||= 1; |
1039 | $self->_handle_element_end( my $scratch = 'for' ); |
1040 | } |
1041 | DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; |
1042 | pop @$curr_open; |
1043 | |
1044 | return 1; |
1045 | } |
1046 | |
1047 | sub _ponder_doc_end { |
1048 | my ($self,$para,$curr_open,$paras) = @_; |
1049 | if(@$curr_open) { # Deal with things left open |
1050 | DEBUG and print "Stack is nonempty at end-document: (", |
1051 | $self->_dump_curr_open(), ")\n"; |
1052 | |
1053 | DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; |
1054 | unshift @$paras, $self->_closers_for_all_curr_open; |
1055 | # Make sure there is exactly one ~end in the parastack, at the end: |
1056 | @$paras = grep $_->[0] ne '~end', @$paras; |
1057 | push @$paras, $para, $para; |
1058 | # We need two -- once for the next cycle where we |
1059 | # generate errata, and then another to be at the end |
1060 | # when that loop back around to process the errata. |
1061 | return 1; |
1062 | |
1063 | } else { |
1064 | DEBUG and print "Okay, stack is empty now.\n"; |
1065 | } |
1066 | |
1067 | # Try generating errata section, if applicable |
1068 | unless($self->{'~tried_gen_errata'}) { |
1069 | $self->{'~tried_gen_errata'} = 1; |
1070 | my @extras = $self->_gen_errata(); |
1071 | if(@extras) { |
1072 | unshift @$paras, @extras; |
1073 | DEBUG and print "Generated errata... relooping...\n"; |
1074 | return 1; # I.e., loop around again to process these fake-o paragraphs |
1075 | } |
1076 | } |
1077 | |
1078 | splice @$paras; # Well, that's that for this paragraph buffer. |
1079 | DEBUG and print "Throwing end-document event.\n"; |
1080 | |
1081 | $self->_handle_element_end( my $scratch = 'Document' ); |
1082 | return 1; # Hasta la byebye |
1083 | } |
1084 | |
1085 | sub _ponder_pod { |
1086 | my ($self,$para,$curr_open,$paras) = @_; |
1087 | $self->whine( |
1088 | $para->[1]{'start_line'}, |
1089 | "=pod directives shouldn't be over one line long! Ignoring all " |
1090 | . (@$para - 2) . " lines of content" |
1091 | ) if @$para > 3; |
1092 | # Content is always ignored. |
1093 | return; |
1094 | } |
1095 | |
1096 | sub _ponder_over { |
1097 | my ($self,$para,$curr_open,$paras) = @_; |
1098 | return 1 unless @$paras; |
1099 | my $list_type; |
1100 | |
1101 | if($paras->[0][0] eq '=item') { # most common case |
1102 | $list_type = $self->_get_initial_item_type($paras->[0]); |
1103 | |
1104 | } elsif($paras->[0][0] eq '=back') { |
1105 | # Ignore empty lists. TODO: make this an option? |
1106 | shift @$paras; |
1107 | return 1; |
1108 | |
1109 | } elsif($paras->[0][0] eq '~end') { |
1110 | $self->whine( |
1111 | $para->[1]{'start_line'}, |
1112 | "=over is the last thing in the document?!" |
1113 | ); |
1114 | return 1; # But feh, ignore it. |
1115 | } else { |
1116 | $list_type = 'block'; |
1117 | } |
1118 | $para->[1]{'~type'} = $list_type; |
1119 | push @$curr_open, $para; |
1120 | # yes, we reuse the paragraph as a stack item |
1121 | |
1122 | my $content = join ' ', splice @$para, 2; |
1123 | my $overness; |
1124 | if($content =~ m/^\s*$/s) { |
1125 | $para->[1]{'indent'} = 4; |
1126 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { |
1127 | no integer; |
1128 | $para->[1]{'indent'} = $1; |
1129 | if($1 == 0) { |
1130 | $self->whine( |
1131 | $para->[1]{'start_line'}, |
1132 | "Can't have a 0 in =over $content" |
1133 | ); |
1134 | $para->[1]{'indent'} = 4; |
1135 | } |
1136 | } else { |
1137 | $self->whine( |
1138 | $para->[1]{'start_line'}, |
1139 | "=over should be: '=over' or '=over positive_number'" |
1140 | ); |
1141 | $para->[1]{'indent'} = 4; |
1142 | } |
1143 | DEBUG > 1 and print "=over found of type $list_type\n"; |
1144 | |
1145 | $self->{'content_seen'} ||= 1; |
1146 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); |
1147 | |
1148 | return; |
1149 | } |
1150 | |
1151 | sub _ponder_back { |
1152 | my ($self,$para,$curr_open,$paras) = @_; |
1153 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? |
1154 | |
1155 | my $content = join ' ', splice @$para, 2; |
1156 | if($content =~ m/\S/) { |
1157 | $self->whine( |
1158 | $para->[1]{'start_line'}, |
1159 | "=back doesn't take any parameters, but you said =back $content" |
1160 | ); |
1161 | } |
1162 | |
1163 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { |
1164 | DEBUG > 1 and print "=back happily closes matching =over\n"; |
1165 | # Expected case: we're closing the most recently opened thing |
1166 | #my $over = pop @$curr_open; |
1167 | $self->{'content_seen'} ||= 1; |
1168 | $self->_handle_element_end( my $scratch = |
1169 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ) |
1170 | ); |
1171 | } else { |
1172 | DEBUG > 1 and print "=back found without a matching =over. Stack: (", |
1173 | join(', ', map $_->[0], @$curr_open), ").\n"; |
1174 | $self->whine( |
1175 | $para->[1]{'start_line'}, |
1176 | '=back without =over' |
1177 | ); |
1178 | return 1; # and ignore it |
1179 | } |
1180 | } |
1181 | |
1182 | sub _ponder_item { |
1183 | my ($self,$para,$curr_open,$paras) = @_; |
1184 | my $over; |
1185 | unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { |
1186 | $self->whine( |
1187 | $para->[1]{'start_line'}, |
1188 | "'=item' outside of any '=over'" |
1189 | ); |
1190 | unshift @$paras, |
1191 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], |
1192 | $para |
1193 | ; |
1194 | return 1; |
1195 | } |
1196 | |
1197 | |
1198 | my $over_type = $over->[1]{'~type'}; |
1199 | |
1200 | if(!$over_type) { |
1201 | # Shouldn't happen1 |
1202 | die "Typeless over in stack, starting at line " |
1203 | . $over->[1]{'start_line'}; |
1204 | |
1205 | } elsif($over_type eq 'block') { |
1206 | unless($curr_open->[-1][1]{'~bitched_about'}) { |
1207 | $curr_open->[-1][1]{'~bitched_about'} = 1; |
1208 | $self->whine( |
1209 | $curr_open->[-1][1]{'start_line'}, |
1210 | "You can't have =items (as at line " |
1211 | . $para->[1]{'start_line'} |
1212 | . ") unless the first thing after the =over is an =item" |
1213 | ); |
1214 | } |
1215 | # Just turn it into a paragraph and reconsider it |
1216 | $para->[0] = '~Para'; |
1217 | unshift @$paras, $para; |
1218 | return 1; |
1219 | |
1220 | } elsif($over_type eq 'text') { |
1221 | my $item_type = $self->_get_item_type($para); |
1222 | # That kills the content of the item if it's a number or bullet. |
1223 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; |
1224 | |
1225 | if($item_type eq 'text') { |
1226 | # Nothing special needs doing for 'text' |
1227 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { |
1228 | die "Unknown item type $item_type" |
1229 | unless $item_type eq 'number' or $item_type eq 'bullet'; |
1230 | # Undo our clobbering: |
1231 | push @$para, $para->[1]{'~orig_content'}; |
1232 | delete $para->[1]{'number'}; |
1233 | # Only a PROPER item-number element is allowed |
1234 | # to have a number attribute. |
1235 | } else { |
1236 | die "Unhandled item type $item_type"; # should never happen |
1237 | } |
1238 | |
1239 | # =item-text thingies don't need any assimilation, it seems. |
1240 | |
1241 | } elsif($over_type eq 'number') { |
1242 | my $item_type = $self->_get_item_type($para); |
1243 | # That kills the content of the item if it's a number or bullet. |
1244 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; |
1245 | |
1246 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; |
1247 | |
1248 | if($item_type eq 'bullet') { |
1249 | # Hm, it's not numeric. Correct for this. |
1250 | $para->[1]{'number'} = $expected_value; |
1251 | $self->whine( |
1252 | $para->[1]{'start_line'}, |
1253 | "Expected '=item $expected_value'" |
1254 | ); |
1255 | push @$para, $para->[1]{'~orig_content'}; |
1256 | # restore the bullet, blocking the assimilation of next para |
1257 | |
1258 | } elsif($item_type eq 'text') { |
1259 | # Hm, it's not numeric. Correct for this. |
1260 | $para->[1]{'number'} = $expected_value; |
1261 | $self->whine( |
1262 | $para->[1]{'start_line'}, |
1263 | "Expected '=item $expected_value'" |
1264 | ); |
1265 | # Text content will still be there and will block next ~Para |
1266 | |
1267 | } elsif($item_type ne 'number') { |
1268 | die "Unknown item type $item_type"; # should never happen |
1269 | |
1270 | } elsif($expected_value == $para->[1]{'number'}) { |
1271 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; |
1272 | |
1273 | } else { |
1274 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, |
1275 | " instead of the expected value of $expected_value\n"; |
1276 | $self->whine( |
1277 | $para->[1]{'start_line'}, |
1278 | "You have '=item " . $para->[1]{'number'} . |
1279 | "' instead of the expected '=item $expected_value'" |
1280 | ); |
1281 | $para->[1]{'number'} = $expected_value; # correcting!! |
1282 | } |
1283 | |
1284 | if(@$para == 2) { |
1285 | # For the cases where we /didn't/ push to @$para |
1286 | if($paras->[0][0] eq '~Para') { |
1287 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; |
1288 | push @$para, splice @{shift @$paras},2; |
1289 | } else { |
1290 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; |
1291 | push @$para, ''; # Just so it's not contentless |
1292 | } |
1293 | } |
1294 | |
1295 | |
1296 | } elsif($over_type eq 'bullet') { |
1297 | my $item_type = $self->_get_item_type($para); |
1298 | # That kills the content of the item if it's a number or bullet. |
1299 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; |
1300 | |
1301 | if($item_type eq 'bullet') { |
1302 | # as expected! |
1303 | |
1304 | if( $para->[1]{'~_freaky_para_hack'} ) { |
1305 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; |
1306 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; |
1307 | } |
1308 | |
1309 | } elsif($item_type eq 'number') { |
1310 | $self->whine( |
1311 | $para->[1]{'start_line'}, |
1312 | "Expected '=item *'" |
1313 | ); |
1314 | push @$para, $para->[1]{'~orig_content'}; |
1315 | # and block assimilation of the next paragraph |
1316 | delete $para->[1]{'number'}; |
1317 | # Only a PROPER item-number element is allowed |
1318 | # to have a number attribute. |
1319 | } elsif($item_type eq 'text') { |
1320 | $self->whine( |
1321 | $para->[1]{'start_line'}, |
1322 | "Expected '=item *'" |
1323 | ); |
1324 | # But doesn't need processing. But it'll block assimilation |
1325 | # of the next para. |
1326 | } else { |
1327 | die "Unhandled item type $item_type"; # should never happen |
1328 | } |
1329 | |
1330 | if(@$para == 2) { |
1331 | # For the cases where we /didn't/ push to @$para |
1332 | if($paras->[0][0] eq '~Para') { |
1333 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; |
1334 | push @$para, splice @{shift @$paras},2; |
1335 | } else { |
1336 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; |
1337 | push @$para, ''; # Just so it's not contentless |
1338 | } |
1339 | } |
1340 | |
1341 | } else { |
1342 | die "Unhandled =over type \"$over_type\"?"; |
1343 | # Shouldn't happen! |
1344 | } |
1345 | $para->[0] .= '-' . $over_type; |
1346 | |
1347 | return; |
1348 | } |
1349 | |
1350 | sub _ponder_Plain { |
1351 | my ($self,$para) = @_; |
1352 | DEBUG and print " giving plain treatment...\n"; |
1353 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) |
1354 | or $para->[1]{'~cooked'} |
1355 | ) { |
1356 | push @$para, |
1357 | @{$self->_make_treelet( |
1358 | join("\n", splice(@$para, 2)), |
1359 | $para->[1]{'start_line'} |
1360 | )}; |
1361 | } |
1362 | # Empty paragraphs don't need a treelet for any reason I can see. |
1363 | # And precooked paragraphs already have a treelet. |
1364 | return; |
1365 | } |
1366 | |
1367 | sub _ponder_Verbatim { |
1368 | my ($self,$para) = @_; |
1369 | DEBUG and print " giving verbatim treatment...\n"; |
1370 | |
1371 | $para->[1]{'xml:space'} = 'preserve'; |
1372 | for(my $i = 2; $i < @$para; $i++) { |
1373 | foreach my $line ($para->[$i]) { # just for aliasing |
1374 | while( $line =~ |
1375 | # Sort of adapted from Text::Tabs -- yes, it's hardwired in that |
1376 | # tabs are at every EIGHTH column. For portability, it has to be |
1377 | # one setting everywhere, and 8th wins. |
1378 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e |
1379 | ) {} |
1380 | |
1381 | # TODO: whinge about (or otherwise treat) unindented or overlong lines |
1382 | |
1383 | } |
1384 | } |
1385 | |
1386 | # Now the VerbatimFormatted hoodoo... |
1387 | if( $self->{'accept_codes'} and |
1388 | $self->{'accept_codes'}{'VerbatimFormatted'} |
1389 | ) { |
1390 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } |
1391 | # Kill any number of terminal newlines |
1392 | $self->_verbatim_format($para); |
1393 | } elsif ($self->{'codes_in_verbatim'}) { |
1394 | push @$para, |
1395 | @{$self->_make_treelet( |
1396 | join("\n", splice(@$para, 2)), |
1397 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} |
1398 | )}; |
1399 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines |
1400 | } else { |
1401 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; |
1402 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines |
1403 | } |
1404 | return; |
1405 | } |
1406 | |
1407 | sub _ponder_Data { |
1408 | my ($self,$para) = @_; |
1409 | DEBUG and print " giving data treatment...\n"; |
1410 | $para->[1]{'xml:space'} = 'preserve'; |
1411 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; |
1412 | return; |
1413 | } |
1414 | |
1415 | |
1416 | |
1417 | |
1418 | ########################################################################### |
1419 | |
1420 | sub _traverse_treelet_bit { # for use only by the routine above |
1421 | my($self, $name) = splice @_,0,2; |
1422 | |
1423 | my $scratch; |
1424 | $self->_handle_element_start(($scratch=$name), shift @_); |
1425 | |
1426 | foreach my $x (@_) { |
1427 | if(ref($x)) { |
1428 | &_traverse_treelet_bit($self, @$x); |
1429 | } else { |
1430 | $self->_handle_text($x); |
1431 | } |
1432 | } |
1433 | |
1434 | $self->_handle_element_end($scratch=$name); |
1435 | return; |
1436 | } |
1437 | |
1438 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
1439 | |
1440 | sub _closers_for_all_curr_open { |
1441 | my $self = $_[0]; |
1442 | my @closers; |
1443 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { |
1444 | my @copy = @$still_open; |
1445 | $copy[1] = {%{ $copy[1] }}; |
1446 | #$copy[1]{'start_line'} = -1; |
1447 | if($copy[0] eq '=for') { |
1448 | $copy[0] = '=end'; |
1449 | } elsif($copy[0] eq '=over') { |
1450 | $copy[0] = '=back'; |
1451 | } else { |
1452 | die "I don't know how to auto-close an open $copy[0] region"; |
1453 | } |
1454 | |
1455 | unless( @copy > 2 ) { |
1456 | push @copy, $copy[1]{'target'}; |
1457 | $copy[-1] = '' unless defined $copy[-1]; |
1458 | # since =over's don't have targets |
1459 | } |
1460 | |
1461 | DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; |
1462 | unshift @closers, \@copy; |
1463 | } |
1464 | return @closers; |
1465 | } |
1466 | |
1467 | #-------------------------------------------------------------------------- |
1468 | |
1469 | sub _verbatim_format { |
1470 | my($it, $p) = @_; |
1471 | |
1472 | my $formatting; |
1473 | |
1474 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines |
1475 | DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; |
1476 | $p->[$i] .= "\n"; |
1477 | # Unlike with simple Verbatim blocks, we don't end up just doing |
1478 | # a join("\n", ...) on the contents, so we have to append a |
1479 | # newline to ever line, and then nix the last one later. |
1480 | } |
1481 | |
1482 | if( DEBUG > 4 ) { |
1483 | print "<<\n"; |
1484 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines |
1485 | print "_verbatim_format $i: $p->[$i]"; |
1486 | } |
1487 | print ">>\n"; |
1488 | } |
1489 | |
1490 | for(my $i = $#$p; $i > 2; $i--) { |
1491 | # work backwards over the lines, except the first (#2) |
1492 | |
1493 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s |
1494 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; |
1495 | # look at a formatty line preceding a nonformatty one |
1496 | DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; |
1497 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { |
1498 | DEBUG > 5 and print " It's a formatty line. ", |
1499 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; |
1500 | |
1501 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { |
1502 | DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; |
1503 | next; |
1504 | } else { |
1505 | DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; |
1506 | } |
1507 | } else { |
1508 | DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; |
1509 | next; |
1510 | } |
1511 | |
1512 | # A formatty line has to have #: in the first two columns, and uses |
1513 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. |
1514 | # Example: |
1515 | # What do you want? i like pie. [or whatever] |
1516 | # #:^^^^^^^^^^^^^^^^^ ///////////// |
1517 | |
1518 | |
1519 | DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; |
1520 | |
1521 | $formatting = ' ' . $1; |
1522 | $formatting =~ s/\s+$//s; # nix trailing whitespace |
1523 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op |
1524 | splice @$p,$i,1; # remove this line |
1525 | $i--; # don't consider next line |
1526 | next; |
1527 | } |
1528 | |
1529 | if( length($formatting) >= length($p->[$i-1]) ) { |
1530 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; |
1531 | } else { |
1532 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); |
1533 | } |
1534 | # Make $formatting and the previous line be exactly the same length, |
1535 | # with $formatting having a " " as the last character. |
1536 | |
1537 | DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; |
1538 | |
1539 | |
1540 | my @new_line; |
1541 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { |
1542 | #print "Format matches $1\n"; |
1543 | |
1544 | if($2) { |
1545 | #print "SKIPPING <$2>\n"; |
1546 | push @new_line, |
1547 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); |
1548 | } else { |
1549 | #print "SNARING $+\n"; |
1550 | push @new_line, [ |
1551 | ( |
1552 | $3 ? 'VerbatimB' : |
1553 | $4 ? 'VerbatimI' : |
1554 | $5 ? 'VerbatimBI' : die("Should never get called") |
1555 | ), {}, |
1556 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) |
1557 | ]; |
1558 | #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; |
1559 | } |
1560 | } |
1561 | my @nixed = |
1562 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line |
1563 | DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; |
1564 | |
1565 | DEBUG > 6 and print "New version of the above line is these tokens (", |
1566 | scalar(@new_line), "):", |
1567 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; |
1568 | $i--; # So the next line we scrutinize is the line before the one |
1569 | # that we just went and formatted |
1570 | } |
1571 | |
1572 | $p->[0] = 'VerbatimFormatted'; |
1573 | |
1574 | # Collapse adjacent text nodes, just for kicks. |
1575 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last |
1576 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { |
1577 | DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; |
1578 | $p->[$i] .= splice @$p, $i+1, 1; # merge |
1579 | --$i; # and back up |
1580 | } |
1581 | } |
1582 | |
1583 | # Now look for the last text token, and remove the terminal newline |
1584 | for( my $i = $#$p; $i >= 2; $i-- ) { |
1585 | # work backwards over the tokens, even the first |
1586 | if( !ref($p->[$i]) ) { |
1587 | if($p->[$i] =~ s/\n$//s) { |
1588 | DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; |
1589 | } else { |
1590 | DEBUG > 5 and print |
1591 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; |
1592 | } |
1593 | last; # we only want the next one |
1594 | } |
1595 | } |
1596 | |
1597 | return; |
1598 | } |
1599 | |
1600 | |
1601 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
1602 | |
1603 | |
1604 | sub _treelet_from_formatting_codes { |
1605 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. |
1606 | # Like [ '~Top', {'start_line' => $start_line}, |
1607 | # "I like ", |
1608 | # [ 'B', {}, "pie" ], |
1609 | # "!" |
1610 | # ] |
1611 | |
1612 | my($self, $para, $start_line, $preserve_space) = @_; |
1613 | |
1614 | my $treelet = ['~Top', {'start_line' => $start_line},]; |
1615 | |
1616 | unless ($preserve_space || $self->{'preserve_whitespace'}) { |
1617 | $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'}; |
1618 | |
1619 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. |
1620 | $para =~ s/ $//; |
1621 | $para =~ s/^ //; |
1622 | } |
1623 | |
1624 | # Only apparent problem the above code is that N<< >> turns into |
1625 | # N<< >>. But then, word wrapping does that too! So don't do that! |
1626 | |
1627 | my @stack; |
1628 | my @lineage = ($treelet); |
1629 | |
1630 | DEBUG > 4 and print "Paragraph:\n$para\n\n"; |
1631 | |
1632 | # Here begins our frightening tokenizer RE. The following regex matches |
1633 | # text in four main parts: |
1634 | # |
1635 | # * Start-codes. The first alternative matches C< or C<<, the latter |
1636 | # followed by some whitespace. $1 will hold the entire start code |
1637 | # (including any space following a multiple-angle-bracket delimiter), |
1638 | # and $2 will hold only the additional brackets past the first in a |
1639 | # multiple-bracket delimiter. length($2) + 1 will be the number of |
1640 | # closing brackets we have to find. |
1641 | # |
1642 | # * Closing brackets. Match some amount of whitespace followed by |
1643 | # multiple close brackets. The logic to see if this closes anything |
1644 | # is down below. Note that in order to parse C<< >> correctly, we |
1645 | # have to use look-behind (?<=\s\s), since the match of the starting |
1646 | # code will have consumed the whitespace. |
1647 | # |
1648 | # * A single closing bracket, to close a simple code like C<>. |
1649 | # |
1650 | # * Something that isn't a start or end code. We have to be careful |
1651 | # about accepting whitespace, since perlpodspec says that any whitespace |
1652 | # before a multiple-bracket closing delimiter should be ignored. |
1653 | # |
1654 | while($para =~ |
1655 | m/\G |
1656 | (?: |
1657 | # Match starting codes, including the whitespace following a |
1658 | # multiple-delimiter start code. $1 gets the whole start code and |
1659 | # $2 gets all but one of the <s in the multiple-bracket case. |
1660 | ([A-Z]<(?:(<+)\s+)?) |
1661 | | |
1662 | # Match multiple-bracket end codes. $3 gets the whitespace that |
1663 | # should be discarded before an end bracket but kept in other cases |
1664 | # and $4 gets the end brackets themselves. |
1665 | (\s+|(?<=\s\s))(>{2,}) |
1666 | | |
1667 | (\s?>) # $5: simple end-codes |
1668 | | |
1669 | ( # $6: stuff containing no start-codes or end-codes |
1670 | (?: |
6669d9b5 |
1671 | [^A-Z\s>] |
351625bd |
1672 | | |
1673 | (?: |
1674 | [A-Z](?!<) |
1675 | ) |
1676 | | |
69473a20 |
1677 | # whitespace is ok, but we don't want to eat the whitespace before |
1678 | # a multiple-bracket end code. |
1679 | # NOTE: we may still have problems with e.g. S<< >> |
351625bd |
1680 | (?: |
69473a20 |
1681 | \s(?!\s*>{2,}) |
351625bd |
1682 | ) |
1683 | )+ |
1684 | ) |
1685 | ) |
1686 | /xgo |
1687 | ) { |
1688 | DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; |
1689 | if(defined $1) { |
1690 | if(defined $2) { |
1691 | DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; |
1692 | push @stack, length($2) + 1; |
1693 | # length of the necessary complex end-code string |
1694 | } else { |
1695 | DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; |
1696 | push @stack, 0; # signal that we're looking for simple |
1697 | } |
1698 | push @lineage, [ substr($1,0,1), {}, ]; # new node object |
1699 | push @{ $lineage[-2] }, $lineage[-1]; |
1700 | |
1701 | } elsif(defined $4) { |
1702 | DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; |
1703 | # This is where it gets messy... |
1704 | if(! @stack) { |
1705 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. |
1706 | DEBUG > 4 and print " But it's really just stuff.\n"; |
1707 | push @{ $lineage[-1] }, $3, $4; |
1708 | next; |
1709 | } elsif(!$stack[-1]) { |
1710 | # We saw " >>>>" but needed only ">". Back pos up. |
1711 | DEBUG > 4 and print " And that's more than we needed to close simple.\n"; |
1712 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. |
1713 | pos($para) = pos($para) - length($4) + 1; |
1714 | } elsif($stack[-1] == length($4)) { |
1715 | # We found " >>>>", and it was exactly what we needed. Commonest case. |
1716 | DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; |
1717 | } elsif($stack[-1] < length($4)) { |
1718 | # We saw " >>>>" but needed only " >>". Back pos up. |
1719 | DEBUG > 4 and print " And that's more than we needed to close complex.\n"; |
1720 | pos($para) = pos($para) - length($4) + $stack[-1]; |
1721 | } else { |
1722 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! |
1723 | DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; |
1724 | push @{ $lineage[-1] }, $3, $4; |
1725 | next; |
1726 | } |
1727 | #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; |
1728 | |
1729 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; |
1730 | # Keep the element from being childless |
1731 | |
1732 | pop @stack; |
1733 | pop @lineage; |
1734 | |
1735 | } elsif(defined $5) { |
1736 | DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n"; |
1737 | |
1738 | if(@stack and ! $stack[-1]) { |
1739 | # We're indeed expecting a simple end-code |
1740 | DEBUG > 4 and print " It's indeed an end-code.\n"; |
1741 | |
1742 | if(length($5) == 2) { # There was a space there: " >" |
1743 | push @{ $lineage[-1] }, ' '; |
1744 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element |
1745 | push @{ $lineage[-1] }, ''; # keep it from being really childless |
1746 | } |
1747 | |
1748 | pop @stack; |
1749 | pop @lineage; |
1750 | } else { |
1751 | DEBUG > 4 and print " It's just stuff.\n"; |
1752 | push @{ $lineage[-1] }, $5; |
1753 | } |
1754 | |
1755 | } elsif(defined $6) { |
1756 | DEBUG > 3 and print "Found stuff \"$6\"\n"; |
1757 | push @{ $lineage[-1] }, $6; |
1758 | |
1759 | } else { |
1760 | # should never ever ever ever happen |
1761 | DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; |
1762 | die "SPORK 512512!"; |
1763 | } |
1764 | } |
1765 | |
1766 | if(@stack) { # Uhoh, some sequences weren't closed. |
1767 | my $x= "..."; |
1768 | while(@stack) { |
1769 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; |
1770 | # Hmmmmm! |
1771 | |
1772 | my $code = (pop @lineage)->[0]; |
1773 | my $ender_length = pop @stack; |
1774 | if($ender_length) { |
1775 | --$ender_length; |
1776 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); |
1777 | } else { |
1778 | $x = $code . "<$x>"; |
1779 | } |
1780 | } |
1781 | DEBUG > 1 and print "Unterminated $x sequence\n"; |
1782 | $self->whine($start_line, |
1783 | "Unterminated $x sequence", |
1784 | ); |
1785 | } |
1786 | |
1787 | return $treelet; |
1788 | } |
1789 | |
1790 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
1791 | |
1792 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) |
1793 | return stringify_lol($_[1]); |
1794 | } |
1795 | |
1796 | sub stringify_lol { # function: stringify_lol($lol) |
1797 | my $string_form = ''; |
1798 | _stringify_lol( $_[0] => \$string_form ); |
1799 | return $string_form; |
1800 | } |
1801 | |
1802 | sub _stringify_lol { # the real recursor |
1803 | my($lol, $to) = @_; |
1804 | use UNIVERSAL (); |
1805 | for(my $i = 2; $i < @$lol; ++$i) { |
1806 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { |
1807 | _stringify_lol( $lol->[$i], $to); # recurse! |
1808 | } else { |
1809 | $$to .= $lol->[$i]; |
1810 | } |
1811 | } |
1812 | return; |
1813 | } |
1814 | |
1815 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
1816 | |
1817 | sub _dump_curr_open { # return a string representation of the stack |
1818 | my $curr_open = $_[0]{'curr_open'}; |
1819 | |
1820 | return '[empty]' unless @$curr_open; |
1821 | return join '; ', |
1822 | map {; |
1823 | ($_->[0] eq '=for') |
1824 | ? ( ($_->[1]{'~really'} || '=over') |
1825 | . ' ' . $_->[1]{'target'}) |
1826 | : $_->[0] |
1827 | } |
1828 | @$curr_open |
1829 | ; |
1830 | } |
1831 | |
1832 | ########################################################################### |
1833 | my %pretty_form = ( |
1834 | "\a" => '\a', # ding! |
1835 | "\b" => '\b', # BS |
1836 | "\e" => '\e', # ESC |
1837 | "\f" => '\f', # FF |
1838 | "\t" => '\t', # tab |
1839 | "\cm" => '\cm', |
1840 | "\cj" => '\cj', |
1841 | "\n" => '\n', # probably overrides one of either \cm or \cj |
1842 | '"' => '\"', |
1843 | '\\' => '\\\\', |
1844 | '$' => '\\$', |
1845 | '@' => '\\@', |
1846 | '%' => '\\%', |
1847 | '#' => '\\#', |
1848 | ); |
1849 | |
1850 | sub pretty { # adopted from Class::Classless |
1851 | # Not the most brilliant routine, but passable. |
1852 | # Don't give it a cyclic data structure! |
1853 | my @stuff = @_; # copy |
1854 | my $x; |
1855 | my $out = |
1856 | # join ",\n" . |
1857 | join ", ", |
1858 | map {; |
1859 | if(!defined($_)) { |
1860 | "undef"; |
1861 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { |
1862 | $x = "[ " . pretty(@$_) . " ]" ; |
1863 | $x; |
1864 | } elsif(ref($_) eq 'SCALAR') { |
1865 | $x = "\\" . pretty($$_) ; |
1866 | $x; |
1867 | } elsif(ref($_) eq 'HASH') { |
1868 | my $hr = $_; |
1869 | $x = "{" . join(", ", |
1870 | map(pretty($_) . '=>' . pretty($hr->{$_}), |
1871 | sort keys %$hr ) ) . "}" ; |
1872 | $x; |
1873 | } elsif(!length($_)) { q{''} # empty string |
1874 | } elsif( |
1875 | $_ eq '0' # very common case |
1876 | or( |
1877 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s |
1878 | and $_ ne '-0' # the strange case that that RE lets thru |
1879 | ) |
1880 | ) { $_; |
1881 | } else { |
1882 | if( chr(65) eq 'A' ) { |
1883 | s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> |
1884 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; |
1885 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; |
1886 | } else { |
1887 | # We're in some crazy non-ASCII world! |
1888 | s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> |
1889 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; |
1890 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; |
1891 | } |
1892 | qq{"$_"}; |
1893 | } |
1894 | } @stuff; |
1895 | # $out =~ s/\n */ /g if length($out) < 75; |
1896 | return $out; |
1897 | } |
1898 | |
1899 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
9ea6f39e |
1900 | |
1901 | # A rather unsubtle method of blowing away all the state information |
1902 | # from a parser object so it can be reused. Provided as a utility for |
1903 | # backward compatibilty in Pod::Man, etc. but not recommended for |
1904 | # general use. |
1905 | |
1906 | sub reinit { |
1907 | my $self = shift; |
1908 | foreach (qw(source_dead source_filename doc_has_started |
1909 | start_of_pod_block content_seen last_was_blank paras curr_open |
1910 | line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen |
1911 | Title)) { |
1912 | |
1913 | delete $self->{$_}; |
1914 | } |
1915 | } |
1916 | |
1917 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
351625bd |
1918 | 1; |
1919 | |