cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm

   1
   2 package Pod::Simple::BlackBox;
   3 #
   4 # "What's in the box?"  "Pain."
   5 #
   6 ###########################################################################
   7 #
   8 # This is where all the scary things happen: parsing lines into
   9 #  paragraphs; and then into directives, verbatims, and then also
  10 #  turning formatting sequences into treelets.
  11 #
  12 # Are you really sure you want to read this code?
  13 #
  14 #-----------------------------------------------------------------------------
  15 #
  16 # The basic work of this module Pod::Simple::BlackBox is doing the dirty work
  17 # of parsing Pod into treelets (generally one per non-verbatim paragraph), and
  18 # to call the proper callbacks on the treelets.
  19 #
  20 # Every node in a treelet is a ['name', {attrhash}, ...children...]
  21
  22 use integer; # vroom!
  23 use strict;
  24 use Carp ();
  25 use vars qw($VERSION );
  26 $VERSION = '3.13';
  27 #use constant DEBUG => 7;
  28 BEGIN {
  29   require Pod::Simple;
  30   *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
  31 }
  32
  33 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  34
  35 sub parse_line { shift->parse_lines(@_) } # alias
  36
  37 # - - -  Turn back now!  Run away!  - - -
  38
  39 sub parse_lines {             # Usage: $parser->parse_lines(@lines)
  40   # an undef means end-of-stream
  41   my $self = shift;
  42
  43   my $code_handler = $self->{'code_handler'};
  44   my $cut_handler  = $self->{'cut_handler'};
  45   $self->{'line_count'} ||= 0;
  46
  47   my $scratch;
  48
  49   DEBUG > 4 and
  50    print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
  51
  52   DEBUG > 5 and
  53    print "#  About to parse lines: ",
  54      join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
  55
  56   my $paras = ($self->{'paras'} ||= []);
  57    # paragraph buffer.  Because we need to defer processing of =over
  58    # directives and verbatim paragraphs.  We call _ponder_paragraph_buffer
  59    # to process this.
  60
  61   $self->{'pod_para_count'} ||= 0;
  62
  63   my $line;
  64   foreach my $source_line (@_) {
  65     if( $self->{'source_dead'} ) {
  66       DEBUG > 4 and print "# Source is dead.\n";
  67       last;
  68     }
  69
  70     unless( defined $source_line ) {
  71       DEBUG > 4 and print "# Undef-line seen.\n";
  72
  73       push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
  74       push @$paras, $paras->[-1], $paras->[-1];
  75        # So that it definitely fills the buffer.
  76       $self->{'source_dead'} = 1;
  77       $self->_ponder_paragraph_buffer;
  78       next;
  79     }
  80
  81
  82     if( $self->{'line_count'}++ ) {
  83       ($line = $source_line) =~ tr/\n\r//d;
  84        # If we don't have two vars, we'll end up with that there
  85        # tr/// modding the (potentially read-only) original source line!
  86
  87     } else {
  88       DEBUG > 2 and print "First line: [$source_line]\n";
  89
  90       if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
  91         DEBUG and print "UTF-8 BOM seen.  Faking a '=encode utf8'.\n";
  92         $self->_handle_encoding_line( "=encode utf8" );
  93         $line =~ tr/\n\r//d;
  94
  95       } elsif( $line =~ s/^\xFE\xFF//s ) {
  96         DEBUG and print "Big-endian UTF-16 BOM seen.  Aborting parsing.\n";
  97         $self->scream(
  98           $self->{'line_count'},
  99           "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
 100         );
 101         splice @_;
 102         push @_, undef;
 103         next;
 104
 105         # TODO: implement somehow?
 106
 107       } elsif( $line =~ s/^\xFF\xFE//s ) {
 108         DEBUG and print "Little-endian UTF-16 BOM seen.  Aborting parsing.\n";
 109         $self->scream(
 110           $self->{'line_count'},
 111           "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
 112         );
 113         splice @_;
 114         push @_, undef;
 115         next;
 116
 117         # TODO: implement somehow?
 118
 119       } else {
 120         DEBUG > 2 and print "First line is BOM-less.\n";
 121         ($line = $source_line) =~ tr/\n\r//d;
 122       }
 123     }
 124
 125
 126     DEBUG > 5 and print "# Parsing line: [$line]\n";
 127
 128     if(!$self->{'in_pod'}) {
 129       if($line =~ m/^=([a-zA-Z]+)/s) {
 130         if($1 eq 'cut') {
 131           $self->scream(
 132             $self->{'line_count'},
 133             "=cut found outside a pod block.  Skipping to next block."
 134           );
 135
 136           ## Before there were errata sections in the world, it was
 137           ## least-pessimal to abort processing the file.  But now we can
 138           ## just barrel on thru (but still not start a pod block).
 139           #splice @_;
 140           #push @_, undef;
 141
 142           next;
 143         } else {
 144           $self->{'in_pod'} = $self->{'start_of_pod_block'}
 145                             = $self->{'last_was_blank'}     = 1;
 146           # And fall thru to the pod-mode block further down
 147         }
 148       } else {
 149         DEBUG > 5 and print "# It's a code-line.\n";
 150         $code_handler->(map $_, $line, $self->{'line_count'}, $self)
 151          if $code_handler;
 152         # Note: this may cause code to be processed out of order relative
 153         #  to pods, but in order relative to cuts.
 154
 155         # Note also that we haven't yet applied the transcoding to $line
 156         #  by time we call $code_handler!
 157
 158         if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
 159           # That RE is from perlsyn, section "Plain Old Comments (Not!)",
 160           #$fname = $2 if defined $2;
 161           #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
 162           DEBUG > 1 and print "# Setting nextline to $1\n";
 163           $self->{'line_count'} = $1 - 1;
 164         }
 165
 166         next;
 167       }
 168     }
 169
 170     # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 171     # Else we're in pod mode:
 172
 173     # Apply any necessary transcoding:
 174     $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
 175
 176     # HERE WE CATCH =encoding EARLY!
 177     if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
 178       $line = $self->_handle_encoding_line( $line );
 179     }
 180
 181     if($line =~ m/^=cut/s) {
 182       # here ends the pod block, and therefore the previous pod para
 183       DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
 184       $self->{'in_pod'} = 0;
 185       # ++$self->{'pod_para_count'};
 186       $self->_ponder_paragraph_buffer();
 187        # by now it's safe to consider the previous paragraph as done.
 188       $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
 189        if $cut_handler;
 190
 191       # TODO: add to docs: Note: this may cause cuts to be processed out
 192       #  of order relative to pods, but in order relative to code.
 193
 194     } elsif($line =~ m/^\s*$/s) {  # it's a blank line
 195       if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
 196         DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
 197         push @{$paras->[-1]}, $line;
 198       }  # otherwise it's not interesting
 199
 200       if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
 201         DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
 202       }
 203
 204       $self->{'last_was_blank'} = 1;
 205
 206     } elsif($self->{'last_was_blank'}) {  # A non-blank line starting a new para...
 207
 208       if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
 209         # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
 210         my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
 211          # Note that in "=head1 foo", the WS is lost.
 212          # Example: ['=head1', {'start_line' => 123}, ' foo']
 213
 214         ++$self->{'pod_para_count'};
 215
 216         $self->_ponder_paragraph_buffer();
 217          # by now it's safe to consider the previous paragraph as done.
 218
 219         push @$paras, $new; # the new incipient paragraph
 220         DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
 221
 222       } elsif($line =~ m/^\s/s) {
 223
 224         if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
 225           DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
 226           push @{$paras->[-1]}, $line;
 227         } else {
 228           ++$self->{'pod_para_count'};
 229           $self->_ponder_paragraph_buffer();
 230            # by now it's safe to consider the previous paragraph as done.
 231           DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
 232           push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
 233         }
 234       } else {
 235         ++$self->{'pod_para_count'};
 236         $self->_ponder_paragraph_buffer();
 237          # by now it's safe to consider the previous paragraph as done.
 238         push @$paras, ['~Para',  {'start_line' => $self->{'line_count'}}, $line];
 239         DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
 240       }
 241       $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
 242
 243     } else {
 244       # It's a non-blank line /continuing/ the current para
 245       if(@$paras) {
 246         DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
 247         push @{$paras->[-1]}, $line;
 248       } else {
 249         # Unexpected case!
 250         die "Continuing a paragraph but \@\$paras is empty?";
 251       }
 252       $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
 253     }
 254
 255   } # ends the big while loop
 256
 257   DEBUG > 1 and print(pretty(@$paras), "\n");
 258   return $self;
 259 }
 260
 261 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 262
 263 sub _handle_encoding_line {
 264   my($self, $line) = @_;
 265
 266   # The point of this routine is to set $self->{'_transcoder'} as indicated.
 267
 268   return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
 269   DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
 270
 271   my $e    = $1;
 272   my $orig = $e;
 273   push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
 274
 275   my $enc_error;
 276
 277   # Cf.   perldoc Encode   and   perldoc Encode::Supported
 278
 279   require Pod::Simple::Transcode;
 280
 281   if( $self->{'encoding'} ) {
 282     my $norm_current = $self->{'encoding'};
 283     my $norm_e = $e;
 284     foreach my $that ($norm_current, $norm_e) {
 285       $that =  lc($that);
 286       $that =~ s/[-_]//g;
 287     }
 288     if($norm_current eq $norm_e) {
 289       DEBUG > 1 and print "The '=encoding $orig' line is ",
 290        "redundant.  ($norm_current eq $norm_e).  Ignoring.\n";
 291       $enc_error = '';
 292        # But that doesn't necessarily mean that the earlier one went okay
 293     } else {
 294       $enc_error = "Encoding is already set to " . $self->{'encoding'};
 295       DEBUG > 1 and print $enc_error;
 296     }
 297   } elsif (
 298     # OK, let's turn on the encoding
 299     do {
 300       DEBUG > 1 and print " Setting encoding to $e\n";
 301       $self->{'encoding'} = $e;
 302       1;
 303     }
 304     and $e eq 'HACKRAW'
 305   ) {
 306     DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
 307
 308   } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
 309
 310     die($enc_error = "WHAT? _transcoder is already set?!")
 311      if $self->{'_transcoder'};   # should never happen
 312     require Pod::Simple::Transcode;
 313     $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
 314     eval {
 315       my @x = ('', "abc", "123");
 316       $self->{'_transcoder'}->(@x);
 317     };
 318     $@ && die( $enc_error =
 319       "Really unexpected error setting up encoding $e: $@\nAborting"
 320     );
 321
 322   } else {
 323     my @supported = Pod::Simple::Transcode::->all_encodings;
 324
 325     # Note unsupported, and complain
 326     DEBUG and print " Encoding [$e] is unsupported.",
 327       "\nSupporteds: @supported\n";
 328     my $suggestion = '';
 329
 330     # Look for a near match:
 331     my $norm = lc($e);
 332     $norm =~ tr[-_][]d;
 333     my $n;
 334     foreach my $enc (@supported) {
 335       $n = lc($enc);
 336       $n =~ tr[-_][]d;
 337       next unless $n eq $norm;
 338       $suggestion = "  (Maybe \"$e\" should be \"$enc\"?)";
 339       last;
 340     }
 341     my $encmodver = Pod::Simple::Transcode::->encmodver;
 342     $enc_error = join '' =>
 343       "This document probably does not appear as it should, because its ",
 344       "\"=encoding $e\" line calls for an unsupported encoding.",
 345       $suggestion, "  [$encmodver\'s supported encodings are: @supported]"
 346     ;
 347
 348     $self->scream( $self->{'line_count'}, $enc_error );
 349   }
 350   push @{ $self->{'encoding_command_statuses'} }, $enc_error;
 351
 352   return '=encoding ALREADYDONE';
 353 }
 354
 355 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 356
 357 sub _handle_encoding_second_level {
 358   # By time this is called, the encoding (if well formed) will already
 359   #  have been acted one.
 360   my($self, $para) = @_;
 361   my @x = @$para;
 362   my $content = join ' ', splice @x, 2;
 363   $content =~ s/^\s+//s;
 364   $content =~ s/\s+$//s;
 365
 366   DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
 367
 368   if($content eq 'ALREADYDONE') {
 369     # It's already been handled.  Check for errors.
 370     if(! $self->{'encoding_command_statuses'} ) {
 371       DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
 372     } elsif( $self->{'encoding_command_statuses'}[-1] ) {
 373       $self->whine( $para->[1]{'start_line'},
 374         sprintf "Couldn't do %s: %s",
 375           $self->{'encoding_command_reqs'  }[-1],
 376           $self->{'encoding_command_statuses'}[-1],
 377       );
 378     } else {
 379       DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
 380     }
 381
 382   } else {
 383     # Otherwise it's a syntax error
 384     $self->whine( $para->[1]{'start_line'},
 385       "Invalid =encoding syntax: $content"
 386     );
 387   }
 388
 389   return;
 390 }
 391
 392 #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
 393
 394 {
 395 my $m = -321;   # magic line number
 396
 397 sub _gen_errata {
 398   my $self = $_[0];
 399   # Return 0 or more fake-o paragraphs explaining the accumulated
 400   #  errors on this document.
 401
 402   return() unless $self->{'errata'} and keys %{$self->{'errata'}};
 403
 404   my @out;
 405
 406   foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
 407     push @out,
 408       ['=item', {'start_line' => $m}, "Around line $line:"],
 409       map( ['~Para', {'start_line' => $m, '~cooked' => 1},
 410         #['~Top', {'start_line' => $m},
 411         $_
 412         #]
 413         ],
 414         @{$self->{'errata'}{$line}}
 415       )
 416     ;
 417   }
 418
 419   # TODO: report of unknown entities? unrenderable characters?
 420
 421   unshift @out,
 422     ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
 423     ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
 424      "Hey! ",
 425      ['B', {},
 426       'The above document had some coding errors, which are explained below:'
 427      ]
 428     ],
 429     ['=over',  {'start_line' => $m, 'errata' => 1}, ''],
 430   ;
 431
 432   push @out,
 433     ['=back',  {'start_line' => $m, 'errata' => 1}, ''],
 434   ;
 435
 436   DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
 437
 438   return @out;
 439 }
 440
 441 }
 442
 443 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 444
 445 ##############################################################################
 446 ##
 447 ##  stop reading now stop reading now stop reading now stop reading now stop
 448 ##
 449 ##                         HERE IT BECOMES REALLY SCARY
 450 ##
 451 ##  stop reading now stop reading now stop reading now stop reading now stop
 452 ##
 453 ##############################################################################
 454
 455 sub _ponder_paragraph_buffer {
 456
 457   # Para-token types as found in the buffer.
 458   #   ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
 459   #   =over, =back, =item
 460   #   and the null =pod (to be complained about if over one line)
 461   #
 462   # "~data" paragraphs are something we generate at this level, depending on
 463   # a currently open =over region
 464
 465   # Events fired:  Begin and end for:
 466   #                   directivename (like head1 .. head4), item, extend,
 467   #                   for (from =begin...=end, =for),
 468   #                   over-bullet, over-number, over-text, over-block,
 469   #                   item-bullet, item-number, item-text,
 470   #                   Document,
 471   #                   Data, Para, Verbatim
 472   #                   B, C, longdirname (TODO -- wha?), etc. for all directives
 473   #
 474
 475   my $self = $_[0];
 476   my $paras;
 477   return unless @{$paras = $self->{'paras'}};
 478   my $curr_open = ($self->{'curr_open'} ||= []);
 479
 480   my $scratch;
 481
 482   DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
 483
 484   # We have something in our buffer.  So apparently the document has started.
 485   unless($self->{'doc_has_started'}) {
 486     $self->{'doc_has_started'} = 1;
 487
 488     my $starting_contentless;
 489     $starting_contentless =
 490      (
 491        !@$curr_open
 492        and @$paras and ! grep $_->[0] ne '~end', @$paras
 493         # i.e., if the paras is all ~ends
 494      )
 495     ;
 496     DEBUG and print "# Starting ",
 497       $starting_contentless ? 'contentless' : 'contentful',
 498       " document\n"
 499     ;
 500
 501     $self->_handle_element_start(
 502       ($scratch = 'Document'),
 503       {
 504         'start_line' => $paras->[0][1]{'start_line'},
 505         $starting_contentless ? ( 'contentless' => 1 ) : (),
 506       },
 507     );
 508   }
 509
 510   my($para, $para_type);
 511   while(@$paras) {
 512     last if @$paras == 1 and
 513       ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
 514         or $paras->[0][0] eq '=item' )
 515     ;
 516     # Those're the three kinds of paragraphs that require lookahead.
 517     #   Actually, an "=item Foo" inside an <over type=text> region
 518     #   and any =item inside an <over type=block> region (rare)
 519     #   don't require any lookahead, but all others (bullets
 520     #   and numbers) do.
 521
 522 # TODO: winge about many kinds of directives in non-resolving =for regions?
 523 # TODO: many?  like what?  =head1 etc?
 524
 525     $para = shift @$paras;
 526     $para_type = $para->[0];
 527
 528     DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
 529       $self->_dump_curr_open(), ")\n";
 530
 531     if($para_type eq '=for') {
 532       next if $self->_ponder_for($para,$curr_open,$paras);
 533
 534     } elsif($para_type eq '=begin') {
 535       next if $self->_ponder_begin($para,$curr_open,$paras);
 536
 537     } elsif($para_type eq '=end') {
 538       next if $self->_ponder_end($para,$curr_open,$paras);
 539
 540     } elsif($para_type eq '~end') { # The virtual end-document signal
 541       next if $self->_ponder_doc_end($para,$curr_open,$paras);
 542     }
 543
 544
 545     # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
 546     #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
 547     if(grep $_->[1]{'~ignore'}, @$curr_open) {
 548       DEBUG > 1 and
 549        print "Skipping $para_type paragraph because in ignore mode.\n";
 550       next;
 551     }
 552     #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
 553     # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
 554
 555     if($para_type eq '=pod') {
 556       $self->_ponder_pod($para,$curr_open,$paras);
 557
 558     } elsif($para_type eq '=over') {
 559       next if $self->_ponder_over($para,$curr_open,$paras);
 560
 561     } elsif($para_type eq '=back') {
 562       next if $self->_ponder_back($para,$curr_open,$paras);
 563
 564     } else {
 565
 566       # All non-magical codes!!!
 567
 568       # Here we start using $para_type for our own twisted purposes, to
 569       #  mean how it should get treated, not as what the element name
 570       #  should be.
 571
 572       DEBUG > 1 and print "Pondering non-magical $para_type\n";
 573
 574       my $i;
 575
 576       # Enforce some =headN discipline
 577       if($para_type =~ m/^=head\d$/s
 578          and ! $self->{'accept_heads_anywhere'}
 579          and @$curr_open
 580          and $curr_open->[-1][0] eq '=over'
 581       ) {
 582         DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
 583         $self->whine(
 584           $para->[1]{'start_line'},
 585           "You forgot a '=back' before '$para_type'"
 586         );
 587         unshift @$paras, ['=back', {}, ''], $para;   # close the =over
 588         next;
 589       }
 590
 591
 592       if($para_type eq '=item') {
 593
 594         my $over;
 595         unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
 596           $self->whine(
 597             $para->[1]{'start_line'},
 598             "'=item' outside of any '=over'"
 599           );
 600           unshift @$paras,
 601             ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
 602             $para
 603           ;
 604           next;
 605         }
 606
 607
 608         my $over_type = $over->[1]{'~type'};
 609
 610         if(!$over_type) {
 611           # Shouldn't happen1
 612           die "Typeless over in stack, starting at line "
 613            . $over->[1]{'start_line'};
 614
 615         } elsif($over_type eq 'block') {
 616           unless($curr_open->[-1][1]{'~bitched_about'}) {
 617             $curr_open->[-1][1]{'~bitched_about'} = 1;
 618             $self->whine(
 619               $curr_open->[-1][1]{'start_line'},
 620               "You can't have =items (as at line "
 621               . $para->[1]{'start_line'}
 622               . ") unless the first thing after the =over is an =item"
 623             );
 624           }
 625           # Just turn it into a paragraph and reconsider it
 626           $para->[0] = '~Para';
 627           unshift @$paras, $para;
 628           next;
 629
 630         } elsif($over_type eq 'text') {
 631           my $item_type = $self->_get_item_type($para);
 632             # That kills the content of the item if it's a number or bullet.
 633           DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
 634
 635           if($item_type eq 'text') {
 636             # Nothing special needs doing for 'text'
 637           } elsif($item_type eq 'number' or $item_type eq 'bullet') {
 638             die "Unknown item type $item_type"
 639              unless $item_type eq 'number' or $item_type eq 'bullet';
 640             # Undo our clobbering:
 641             push @$para, $para->[1]{'~orig_content'};
 642             delete $para->[1]{'number'};
 643              # Only a PROPER item-number element is allowed
 644              #  to have a number attribute.
 645           } else {
 646             die "Unhandled item type $item_type"; # should never happen
 647           }
 648
 649           # =item-text thingies don't need any assimilation, it seems.
 650
 651         } elsif($over_type eq 'number') {
 652           my $item_type = $self->_get_item_type($para);
 653             # That kills the content of the item if it's a number or bullet.
 654           DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
 655
 656           my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
 657
 658           if($item_type eq 'bullet') {
 659             # Hm, it's not numeric.  Correct for this.
 660             $para->[1]{'number'} = $expected_value;
 661             $self->whine(
 662               $para->[1]{'start_line'},
 663               "Expected '=item $expected_value'"
 664             );
 665             push @$para, $para->[1]{'~orig_content'};
 666               # restore the bullet, blocking the assimilation of next para
 667
 668           } elsif($item_type eq 'text') {
 669             # Hm, it's not numeric.  Correct for this.
 670             $para->[1]{'number'} = $expected_value;
 671             $self->whine(
 672               $para->[1]{'start_line'},
 673               "Expected '=item $expected_value'"
 674             );
 675             # Text content will still be there and will block next ~Para
 676
 677           } elsif($item_type ne 'number') {
 678             die "Unknown item type $item_type"; # should never happen
 679
 680           } elsif($expected_value == $para->[1]{'number'}) {
 681             DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
 682
 683           } else {
 684             DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
 685              " instead of the expected value of $expected_value\n";
 686             $self->whine(
 687               $para->[1]{'start_line'},
 688               "You have '=item " . $para->[1]{'number'} .
 689               "' instead of the expected '=item $expected_value'"
 690             );
 691             $para->[1]{'number'} = $expected_value;  # correcting!!
 692           }
 693
 694           if(@$para == 2) {
 695             # For the cases where we /didn't/ push to @$para
 696             if($paras->[0][0] eq '~Para') {
 697               DEBUG and print "Assimilating following ~Para content into $over_type item\n";
 698               push @$para, splice @{shift @$paras},2;
 699             } else {
 700               DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
 701               push @$para, '';  # Just so it's not contentless
 702             }
 703           }
 704
 705
 706         } elsif($over_type eq 'bullet') {
 707           my $item_type = $self->_get_item_type($para);
 708             # That kills the content of the item if it's a number or bullet.
 709           DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
 710
 711           if($item_type eq 'bullet') {
 712             # as expected!
 713
 714             if( $para->[1]{'~_freaky_para_hack'} ) {
 715               DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
 716               push @$para, delete $para->[1]{'~_freaky_para_hack'};
 717             }
 718
 719           } elsif($item_type eq 'number') {
 720             $self->whine(
 721               $para->[1]{'start_line'},
 722               "Expected '=item *'"
 723             );
 724             push @$para, $para->[1]{'~orig_content'};
 725              # and block assimilation of the next paragraph
 726             delete $para->[1]{'number'};
 727              # Only a PROPER item-number element is allowed
 728              #  to have a number attribute.
 729           } elsif($item_type eq 'text') {
 730             $self->whine(
 731               $para->[1]{'start_line'},
 732               "Expected '=item *'"
 733             );
 734              # But doesn't need processing.  But it'll block assimilation
 735              #  of the next para.
 736           } else {
 737             die "Unhandled item type $item_type"; # should never happen
 738           }
 739
 740           if(@$para == 2) {
 741             # For the cases where we /didn't/ push to @$para
 742             if($paras->[0][0] eq '~Para') {
 743               DEBUG and print "Assimilating following ~Para content into $over_type item\n";
 744               push @$para, splice @{shift @$paras},2;
 745             } else {
 746               DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
 747               push @$para, '';  # Just so it's not contentless
 748             }
 749           }
 750
 751         } else {
 752           die "Unhandled =over type \"$over_type\"?";
 753           # Shouldn't happen!
 754         }
 755
 756         $para_type = 'Plain';
 757         $para->[0] .= '-' . $over_type;
 758         # Whew.  Now fall thru and process it.
 759
 760
 761       } elsif($para_type eq '=extend') {
 762         # Well, might as well implement it here.
 763         $self->_ponder_extend($para);
 764         next;  # and skip
 765       } elsif($para_type eq '=encoding') {
 766         # Not actually acted on here, but we catch errors here.
 767         $self->_handle_encoding_second_level($para);
 768
 769         next;  # and skip
 770       } elsif($para_type eq '~Verbatim') {
 771         $para->[0] = 'Verbatim';
 772         $para_type = '?Verbatim';
 773       } elsif($para_type eq '~Para') {
 774         $para->[0] = 'Para';
 775         $para_type = '?Plain';
 776       } elsif($para_type eq 'Data') {
 777         $para->[0] = 'Data';
 778         $para_type = '?Data';
 779       } elsif( $para_type =~ s/^=//s
 780         and defined( $para_type = $self->{'accept_directives'}{$para_type} )
 781       ) {
 782         DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
 783       } else {
 784         # An unknown directive!
 785         DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
 786          $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
 787         ;
 788         $self->whine(
 789           $para->[1]{'start_line'},
 790           "Unknown directive: $para->[0]"
 791         );
 792
 793         # And maybe treat it as text instead of just letting it go?
 794         next;
 795       }
 796
 797       if($para_type =~ s/^\?//s) {
 798         if(! @$curr_open) {  # usual case
 799           DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
 800         } else {
 801           my @fors = grep $_->[0] eq '=for', @$curr_open;
 802           DEBUG > 1 and print "Containing fors: ",
 803             join(',', map $_->[1]{'target'}, @fors), "\n";
 804
 805           if(! @fors) {
 806             DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
 807
 808           #} elsif(grep $_->[1]{'~resolve'}, @fors) {
 809           #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
 810           } elsif( $fors[-1][1]{'~resolve'} ) {
 811             # Look to the immediately containing for
 812
 813             if($para_type eq 'Data') {
 814               DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
 815               $para->[0] = 'Para';
 816               $para_type = 'Plain';
 817             } else {
 818               DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
 819             }
 820           } else {
 821             DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
 822             $para->[0] = $para_type = 'Data';
 823           }
 824         }
 825       }
 826
 827       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 828       if($para_type eq 'Plain') {
 829         $self->_ponder_Plain($para);
 830       } elsif($para_type eq 'Verbatim') {
 831         $self->_ponder_Verbatim($para);
 832       } elsif($para_type eq 'Data') {
 833         $self->_ponder_Data($para);
 834       } else {
 835         die "\$para type is $para_type -- how did that happen?";
 836         # Shouldn't happen.
 837       }
 838
 839       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 840       $para->[0] =~ s/^[~=]//s;
 841
 842       DEBUG and print "\n", pretty($para), "\n";
 843
 844       # traverse the treelet (which might well be just one string scalar)
 845       $self->{'content_seen'} ||= 1;
 846       $self->_traverse_treelet_bit(@$para);
 847     }
 848   }
 849
 850   return;
 851 }
 852
 853 ###########################################################################
 854 # The sub-ponderers...
 855
 856
 857
 858 sub _ponder_for {
 859   my ($self,$para,$curr_open,$paras) = @_;
 860
 861   # Fake it out as a begin/end
 862   my $target;
 863
 864   if(grep $_->[1]{'~ignore'}, @$curr_open) {
 865     DEBUG > 1 and print "Ignoring ignorable =for\n";
 866     return 1;
 867   }
 868
 869   for(my $i = 2; $i < @$para; ++$i) {
 870     if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
 871       $target = $1;
 872       last;
 873     }
 874   }
 875   unless(defined $target) {
 876     $self->whine(
 877       $para->[1]{'start_line'},
 878       "=for without a target?"
 879     );
 880     return 1;
 881   }
 882   DEBUG > 1 and
 883    print "Faking out a =for $target as a =begin $target / =end $target\n";
 884
 885   $para->[0] = 'Data';
 886
 887   unshift @$paras,
 888     ['=begin',
 889       {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
 890       $target,
 891     ],
 892     $para,
 893     ['=end',
 894       {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
 895       $target,
 896     ],
 897   ;
 898
 899   return 1;
 900 }
 901
 902 sub _ponder_begin {
 903   my ($self,$para,$curr_open,$paras) = @_;
 904   my $content = join ' ', splice @$para, 2;
 905   $content =~ s/^\s+//s;
 906   $content =~ s/\s+$//s;
 907   unless(length($content)) {
 908     $self->whine(
 909       $para->[1]{'start_line'},
 910       "=begin without a target?"
 911     );
 912     DEBUG and print "Ignoring targetless =begin\n";
 913     return 1;
 914   }
 915
 916   my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
 917   $para->[1]{'title'} = $title if ($title);
 918   $para->[1]{'target'} = $target;  # without any ':'
 919   $content = $target; # strip off the title
 920
 921   $content =~ s/^:!/!:/s;
 922   my $neg;  # whether this is a negation-match
 923   $neg = 1        if $content =~ s/^!//s;
 924   my $to_resolve;  # whether to process formatting codes
 925   $to_resolve = 1 if $content =~ s/^://s;
 926
 927   my $dont_ignore; # whether this target matches us
 928
 929   foreach my $target_name (
 930     split(',', $content, -1),
 931     $neg ? () : '*'
 932   ) {
 933     DEBUG > 2 and
 934      print " Considering whether =begin $content matches $target_name\n";
 935     next unless $self->{'accept_targets'}{$target_name};
 936
 937     DEBUG > 2 and
 938      print "  It DOES match the acceptable target $target_name!\n";
 939     $to_resolve = 1
 940       if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
 941     $dont_ignore = 1;
 942     $para->[1]{'target_matching'} = $target_name;
 943     last; # stop looking at other target names
 944   }
 945
 946   if($neg) {
 947     if( $dont_ignore ) {
 948       $dont_ignore = '';
 949       delete $para->[1]{'target_matching'};
 950       DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
 951     } else {
 952       $dont_ignore = 1;
 953       $para->[1]{'target_matching'} = '!';
 954       DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
 955     }
 956   }
 957
 958   $para->[0] = '=for';  # Just what we happen to call these, internally
 959   $para->[1]{'~really'} ||= '=begin';
 960   $para->[1]{'~ignore'}   = (! $dont_ignore) || 0;
 961   $para->[1]{'~resolve'}  = $to_resolve || 0;
 962
 963   DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
 964     "ignore contents of this region\n";
 965   DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
 966     ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
 967   DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
 968
 969   push @$curr_open, $para;
 970   if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
 971     DEBUG > 1 and print "Ignoring ignorable =begin\n";
 972   } else {
 973     $self->{'content_seen'} ||= 1;
 974     $self->_handle_element_start((my $scratch='for'), $para->[1]);
 975   }
 976
 977   return 1;
 978 }
 979
 980 sub _ponder_end {
 981   my ($self,$para,$curr_open,$paras) = @_;
 982   my $content = join ' ', splice @$para, 2;
 983   $content =~ s/^\s+//s;
 984   $content =~ s/\s+$//s;
 985   DEBUG and print "Ogling '=end $content' directive\n";
 986
 987   unless(length($content)) {
 988     $self->whine(
 989       $para->[1]{'start_line'},
 990       "'=end' without a target?" . (
 991         ( @$curr_open and $curr_open->[-1][0] eq '=for' )
 992         ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
 993         : ''
 994       )
 995     );
 996     DEBUG and print "Ignoring targetless =end\n";
 997     return 1;
 998   }
 999
1000   unless($content =~ m/^\S+$/) {  # i.e., unless it's one word
1001     $self->whine(
1002       $para->[1]{'start_line'},
1003       "'=end $content' is invalid.  (Stack: "
1004       . $self->_dump_curr_open() . ')'
1005     );
1006     DEBUG and print "Ignoring mistargetted =end $content\n";
1007     return 1;
1008   }
1009
1010   unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1011     $self->whine(
1012       $para->[1]{'start_line'},
1013       "=end $content without matching =begin.  (Stack: "
1014       . $self->_dump_curr_open() . ')'
1015     );
1016     DEBUG and print "Ignoring mistargetted =end $content\n";
1017     return 1;
1018   }
1019
1020   unless($content eq $curr_open->[-1][1]{'target'}) {
1021     $self->whine(
1022       $para->[1]{'start_line'},
1023       "=end $content doesn't match =begin "
1024       . $curr_open->[-1][1]{'target'}
1025       . ".  (Stack: "
1026       . $self->_dump_curr_open() . ')'
1027     );
1028     DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1029     return 1;
1030   }
1031
1032   # Else it's okay to close...
1033   if(grep $_->[1]{'~ignore'}, @$curr_open) {
1034     DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1035     # And that may be because of this to-be-closed =for region, or some
1036     #  other one, but it doesn't matter.
1037   } else {
1038     $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1039       # what's that for?
1040
1041     $self->{'content_seen'} ||= 1;
1042     $self->_handle_element_end( my $scratch = 'for' );
1043   }
1044   DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1045   pop @$curr_open;
1046
1047   return 1;
1048 }
1049
1050 sub _ponder_doc_end {
1051   my ($self,$para,$curr_open,$paras) = @_;
1052   if(@$curr_open) { # Deal with things left open
1053     DEBUG and print "Stack is nonempty at end-document: (",
1054       $self->_dump_curr_open(), ")\n";
1055
1056     DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1057     unshift @$paras, $self->_closers_for_all_curr_open;
1058     # Make sure there is exactly one ~end in the parastack, at the end:
1059     @$paras = grep $_->[0] ne '~end', @$paras;
1060     push @$paras, $para, $para;
1061      # We need two -- once for the next cycle where we
1062      #  generate errata, and then another to be at the end
1063      #  when that loop back around to process the errata.
1064     return 1;
1065
1066   } else {
1067     DEBUG and print "Okay, stack is empty now.\n";
1068   }
1069
1070   # Try generating errata section, if applicable
1071   unless($self->{'~tried_gen_errata'}) {
1072     $self->{'~tried_gen_errata'} = 1;
1073     my @extras = $self->_gen_errata();
1074     if(@extras) {
1075       unshift @$paras, @extras;
1076       DEBUG and print "Generated errata... relooping...\n";
1077       return 1;  # I.e., loop around again to process these fake-o paragraphs
1078     }
1079   }
1080
1081   splice @$paras; # Well, that's that for this paragraph buffer.
1082   DEBUG and print "Throwing end-document event.\n";
1083
1084   $self->_handle_element_end( my $scratch = 'Document' );
1085   return 1; # Hasta la byebye
1086 }
1087
1088 sub _ponder_pod {
1089   my ($self,$para,$curr_open,$paras) = @_;
1090   $self->whine(
1091     $para->[1]{'start_line'},
1092     "=pod directives shouldn't be over one line long!  Ignoring all "
1093      . (@$para - 2) . " lines of content"
1094   ) if @$para > 3;
1095   # Content is always ignored.
1096   return;
1097 }
1098
1099 sub _ponder_over {
1100   my ($self,$para,$curr_open,$paras) = @_;
1101   return 1 unless @$paras;
1102   my $list_type;
1103
1104   if($paras->[0][0] eq '=item') { # most common case
1105     $list_type = $self->_get_initial_item_type($paras->[0]);
1106
1107   } elsif($paras->[0][0] eq '=back') {
1108     # Ignore empty lists.  TODO: make this an option?
1109     shift @$paras;
1110     return 1;
1111
1112   } elsif($paras->[0][0] eq '~end') {
1113     $self->whine(
1114       $para->[1]{'start_line'},
1115       "=over is the last thing in the document?!"
1116     );
1117     return 1; # But feh, ignore it.
1118   } else {
1119     $list_type = 'block';
1120   }
1121   $para->[1]{'~type'} = $list_type;
1122   push @$curr_open, $para;
1123    # yes, we reuse the paragraph as a stack item
1124
1125   my $content = join ' ', splice @$para, 2;
1126   my $overness;
1127   if($content =~ m/^\s*$/s) {
1128     $para->[1]{'indent'} = 4;
1129   } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1130     no integer;
1131     $para->[1]{'indent'} = $1;
1132     if($1 == 0) {
1133       $self->whine(
1134         $para->[1]{'start_line'},
1135         "Can't have a 0 in =over $content"
1136       );
1137       $para->[1]{'indent'} = 4;
1138     }
1139   } else {
1140     $self->whine(
1141       $para->[1]{'start_line'},
1142       "=over should be: '=over' or '=over positive_number'"
1143     );
1144     $para->[1]{'indent'} = 4;
1145   }
1146   DEBUG > 1 and print "=over found of type $list_type\n";
1147
1148   $self->{'content_seen'} ||= 1;
1149   $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1150
1151   return;
1152 }
1153
1154 sub _ponder_back {
1155   my ($self,$para,$curr_open,$paras) = @_;
1156   # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1157
1158   my $content = join ' ', splice @$para, 2;
1159   if($content =~ m/\S/) {
1160     $self->whine(
1161       $para->[1]{'start_line'},
1162       "=back doesn't take any parameters, but you said =back $content"
1163     );
1164   }
1165
1166   if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1167     DEBUG > 1 and print "=back happily closes matching =over\n";
1168     # Expected case: we're closing the most recently opened thing
1169     #my $over = pop @$curr_open;
1170     $self->{'content_seen'} ||= 1;
1171     $self->_handle_element_end( my $scratch =
1172       'over-' . ( (pop @$curr_open)->[1]{'~type'} )
1173     );
1174   } else {
1175     DEBUG > 1 and print "=back found without a matching =over.  Stack: (",
1176         join(', ', map $_->[0], @$curr_open), ").\n";
1177     $self->whine(
1178       $para->[1]{'start_line'},
1179       '=back without =over'
1180     );
1181     return 1; # and ignore it
1182   }
1183 }
1184
1185 sub _ponder_item {
1186   my ($self,$para,$curr_open,$paras) = @_;
1187   my $over;
1188   unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
1189     $self->whine(
1190       $para->[1]{'start_line'},
1191       "'=item' outside of any '=over'"
1192     );
1193     unshift @$paras,
1194       ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1195       $para
1196     ;
1197     return 1;
1198   }
1199
1200
1201   my $over_type = $over->[1]{'~type'};
1202
1203   if(!$over_type) {
1204     # Shouldn't happen1
1205     die "Typeless over in stack, starting at line "
1206      . $over->[1]{'start_line'};
1207
1208   } elsif($over_type eq 'block') {
1209     unless($curr_open->[-1][1]{'~bitched_about'}) {
1210       $curr_open->[-1][1]{'~bitched_about'} = 1;
1211       $self->whine(
1212         $curr_open->[-1][1]{'start_line'},
1213         "You can't have =items (as at line "
1214         . $para->[1]{'start_line'}
1215         . ") unless the first thing after the =over is an =item"
1216       );
1217     }
1218     # Just turn it into a paragraph and reconsider it
1219     $para->[0] = '~Para';
1220     unshift @$paras, $para;
1221     return 1;
1222
1223   } elsif($over_type eq 'text') {
1224     my $item_type = $self->_get_item_type($para);
1225       # That kills the content of the item if it's a number or bullet.
1226     DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1227
1228     if($item_type eq 'text') {
1229       # Nothing special needs doing for 'text'
1230     } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1231       die "Unknown item type $item_type"
1232        unless $item_type eq 'number' or $item_type eq 'bullet';
1233       # Undo our clobbering:
1234       push @$para, $para->[1]{'~orig_content'};
1235       delete $para->[1]{'number'};
1236        # Only a PROPER item-number element is allowed
1237        #  to have a number attribute.
1238     } else {
1239       die "Unhandled item type $item_type"; # should never happen
1240     }
1241
1242     # =item-text thingies don't need any assimilation, it seems.
1243
1244   } elsif($over_type eq 'number') {
1245     my $item_type = $self->_get_item_type($para);
1246       # That kills the content of the item if it's a number or bullet.
1247     DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1248
1249     my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1250
1251     if($item_type eq 'bullet') {
1252       # Hm, it's not numeric.  Correct for this.
1253       $para->[1]{'number'} = $expected_value;
1254       $self->whine(
1255         $para->[1]{'start_line'},
1256         "Expected '=item $expected_value'"
1257       );
1258       push @$para, $para->[1]{'~orig_content'};
1259         # restore the bullet, blocking the assimilation of next para
1260
1261     } elsif($item_type eq 'text') {
1262       # Hm, it's not numeric.  Correct for this.
1263       $para->[1]{'number'} = $expected_value;
1264       $self->whine(
1265         $para->[1]{'start_line'},
1266         "Expected '=item $expected_value'"
1267       );
1268       # Text content will still be there and will block next ~Para
1269
1270     } elsif($item_type ne 'number') {
1271       die "Unknown item type $item_type"; # should never happen
1272
1273     } elsif($expected_value == $para->[1]{'number'}) {
1274       DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1275
1276     } else {
1277       DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1278        " instead of the expected value of $expected_value\n";
1279       $self->whine(
1280         $para->[1]{'start_line'},
1281         "You have '=item " . $para->[1]{'number'} .
1282         "' instead of the expected '=item $expected_value'"
1283       );
1284       $para->[1]{'number'} = $expected_value;  # correcting!!
1285     }
1286
1287     if(@$para == 2) {
1288       # For the cases where we /didn't/ push to @$para
1289       if($paras->[0][0] eq '~Para') {
1290         DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1291         push @$para, splice @{shift @$paras},2;
1292       } else {
1293         DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1294         push @$para, '';  # Just so it's not contentless
1295       }
1296     }
1297
1298
1299   } elsif($over_type eq 'bullet') {
1300     my $item_type = $self->_get_item_type($para);
1301       # That kills the content of the item if it's a number or bullet.
1302     DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1303
1304     if($item_type eq 'bullet') {
1305       # as expected!
1306
1307       if( $para->[1]{'~_freaky_para_hack'} ) {
1308         DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1309         push @$para, delete $para->[1]{'~_freaky_para_hack'};
1310       }
1311
1312     } elsif($item_type eq 'number') {
1313       $self->whine(
1314         $para->[1]{'start_line'},
1315         "Expected '=item *'"
1316       );
1317       push @$para, $para->[1]{'~orig_content'};
1318        # and block assimilation of the next paragraph
1319       delete $para->[1]{'number'};
1320        # Only a PROPER item-number element is allowed
1321        #  to have a number attribute.
1322     } elsif($item_type eq 'text') {
1323       $self->whine(
1324         $para->[1]{'start_line'},
1325         "Expected '=item *'"
1326       );
1327        # But doesn't need processing.  But it'll block assimilation
1328        #  of the next para.
1329     } else {
1330       die "Unhandled item type $item_type"; # should never happen
1331     }
1332
1333     if(@$para == 2) {
1334       # For the cases where we /didn't/ push to @$para
1335       if($paras->[0][0] eq '~Para') {
1336         DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1337         push @$para, splice @{shift @$paras},2;
1338       } else {
1339         DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1340         push @$para, '';  # Just so it's not contentless
1341       }
1342     }
1343
1344   } else {
1345     die "Unhandled =over type \"$over_type\"?";
1346     # Shouldn't happen!
1347   }
1348   $para->[0] .= '-' . $over_type;
1349
1350   return;
1351 }
1352
1353 sub _ponder_Plain {
1354   my ($self,$para) = @_;
1355   DEBUG and print " giving plain treatment...\n";
1356   unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1357     or $para->[1]{'~cooked'}
1358   ) {
1359     push @$para,
1360     @{$self->_make_treelet(
1361       join("\n", splice(@$para, 2)),
1362       $para->[1]{'start_line'}
1363     )};
1364   }
1365   # Empty paragraphs don't need a treelet for any reason I can see.
1366   # And precooked paragraphs already have a treelet.
1367   return;
1368 }
1369
1370 sub _ponder_Verbatim {
1371   my ($self,$para) = @_;
1372   DEBUG and print " giving verbatim treatment...\n";
1373
1374   $para->[1]{'xml:space'} = 'preserve';
1375
1376   my $indent = $self->strip_verbatim_indent;
1377   if ($indent && ref $indent eq 'CODE') {
1378       my @shifted = (shift @{$para}, shift @{$para});
1379       $indent = $indent->($para);
1380       unshift @{$para}, @shifted;
1381   }
1382
1383   for(my $i = 2; $i < @$para; $i++) {
1384     foreach my $line ($para->[$i]) { # just for aliasing
1385       # Strip indentation.
1386       $line =~ s/^\E$indent// if $indent
1387           && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1388       while( $line =~
1389         # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1390         # tabs are at every EIGHTH column.  For portability, it has to be
1391         # one setting everywhere, and 8th wins.
1392         s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1393       ) {}
1394
1395       # TODO: whinge about (or otherwise treat) unindented or overlong lines
1396
1397     }
1398   }
1399
1400   # Now the VerbatimFormatted hoodoo...
1401   if( $self->{'accept_codes'} and
1402       $self->{'accept_codes'}{'VerbatimFormatted'}
1403   ) {
1404     while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1405      # Kill any number of terminal newlines
1406     $self->_verbatim_format($para);
1407   } elsif ($self->{'codes_in_verbatim'}) {
1408     push @$para,
1409     @{$self->_make_treelet(
1410       join("\n", splice(@$para, 2)),
1411       $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1412     )};
1413     $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1414   } else {
1415     push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1416     $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1417   }
1418   return;
1419 }
1420
1421 sub _ponder_Data {
1422   my ($self,$para) = @_;
1423   DEBUG and print " giving data treatment...\n";
1424   $para->[1]{'xml:space'} = 'preserve';
1425   push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1426   return;
1427 }
1428
1429
1430
1431
1432 ###########################################################################
1433
1434 sub _traverse_treelet_bit {  # for use only by the routine above
1435   my($self, $name) = splice @_,0,2;
1436
1437   my $scratch;
1438   $self->_handle_element_start(($scratch=$name), shift @_);
1439
1440   foreach my $x (@_) {
1441     if(ref($x)) {
1442       &_traverse_treelet_bit($self, @$x);
1443     } else {
1444       $self->_handle_text($x);
1445     }
1446   }
1447
1448   $self->_handle_element_end($scratch=$name);
1449   return;
1450 }
1451
1452 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1453
1454 sub _closers_for_all_curr_open {
1455   my $self = $_[0];
1456   my @closers;
1457   foreach my $still_open (@{  $self->{'curr_open'} || return  }) {
1458     my @copy = @$still_open;
1459     $copy[1] = {%{ $copy[1] }};
1460     #$copy[1]{'start_line'} = -1;
1461     if($copy[0] eq '=for') {
1462       $copy[0] = '=end';
1463     } elsif($copy[0] eq '=over') {
1464       $copy[0] = '=back';
1465     } else {
1466       die "I don't know how to auto-close an open $copy[0] region";
1467     }
1468
1469     unless( @copy > 2 ) {
1470       push @copy, $copy[1]{'target'};
1471       $copy[-1] = '' unless defined $copy[-1];
1472        # since =over's don't have targets
1473     }
1474
1475     DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1476     unshift @closers, \@copy;
1477   }
1478   return @closers;
1479 }
1480
1481 #--------------------------------------------------------------------------
1482
1483 sub _verbatim_format {
1484   my($it, $p) = @_;
1485
1486   my $formatting;
1487
1488   for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1489     DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1490     $p->[$i] .= "\n";
1491      # Unlike with simple Verbatim blocks, we don't end up just doing
1492      # a join("\n", ...) on the contents, so we have to append a
1493      # newline to ever line, and then nix the last one later.
1494   }
1495
1496   if( DEBUG > 4 ) {
1497     print "<<\n";
1498     for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1499       print "_verbatim_format $i: $p->[$i]";
1500     }
1501     print ">>\n";
1502   }
1503
1504   for(my $i = $#$p; $i > 2; $i--) {
1505     # work backwards over the lines, except the first (#2)
1506
1507     #next unless $p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s
1508     #        and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1509      # look at a formatty line preceding a nonformatty one
1510     DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1511     if($p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s) {
1512       DEBUG > 5 and print "  It's a formatty line.  ",
1513        "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1514
1515       if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1516         DEBUG > 5 and print "  Previous line is formatty!  Skipping this one.\n";
1517         next;
1518       } else {
1519         DEBUG > 5 and print "  Previous line is non-formatty!  Yay!\n";
1520       }
1521     } else {
1522       DEBUG > 5 and print "  It's not a formatty line.  Ignoring\n";
1523       next;
1524     }
1525
1526     # A formatty line has to have #: in the first two columns, and uses
1527     # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1528     # Example:
1529     #   What do you want?  i like pie. [or whatever]
1530     # #:^^^^^^^^^^^^^^^^^              /////////////
1531
1532
1533     DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1534
1535     $formatting = '  ' . $1;
1536     $formatting =~ s/\s+$//s; # nix trailing whitespace
1537     unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1538       splice @$p,$i,1; # remove this line
1539       $i--; # don't consider next line
1540       next;
1541     }
1542
1543     if( length($formatting) >= length($p->[$i-1]) ) {
1544       $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1545     } else {
1546       $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1547     }
1548     # Make $formatting and the previous line be exactly the same length,
1549     # with $formatting having a " " as the last character.
1550
1551     DEBUG > 4 and print "Formatting <$formatting>    on <", $p->[$i-1], ">\n";
1552
1553
1554     my @new_line;
1555     while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1556       #print "Format matches $1\n";
1557
1558       if($2) {
1559         #print "SKIPPING <$2>\n";
1560         push @new_line,
1561           substr($p->[$i-1], pos($formatting)-length($1), length($1));
1562       } else {
1563         #print "SNARING $+\n";
1564         push @new_line, [
1565           (
1566             $3 ? 'VerbatimB'  :
1567             $4 ? 'VerbatimI'  :
1568             $5 ? 'VerbatimBI' : die("Should never get called")
1569           ), {},
1570           substr($p->[$i-1], pos($formatting)-length($1), length($1))
1571         ];
1572         #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1573       }
1574     }
1575     my @nixed =
1576       splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1577     DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1578
1579     DEBUG > 6 and print "New version of the above line is these tokens (",
1580       scalar(@new_line), "):",
1581       map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1582     $i--; # So the next line we scrutinize is the line before the one
1583           #  that we just went and formatted
1584   }
1585
1586   $p->[0] = 'VerbatimFormatted';
1587
1588   # Collapse adjacent text nodes, just for kicks.
1589   for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1590     if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1591       DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1592       $p->[$i] .= splice @$p, $i+1, 1; # merge
1593       --$i;  # and back up
1594     }
1595   }
1596
1597   # Now look for the last text token, and remove the terminal newline
1598   for( my $i = $#$p; $i >= 2; $i-- ) {
1599     # work backwards over the tokens, even the first
1600     if( !ref($p->[$i]) ) {
1601       if($p->[$i] =~ s/\n$//s) {
1602         DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1603       } else {
1604         DEBUG > 5 and print
1605          "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1606       }
1607       last; # we only want the next one
1608     }
1609   }
1610
1611   return;
1612 }
1613
1614
1615 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1616
1617
1618 sub _treelet_from_formatting_codes {
1619   # Given a paragraph, returns a treelet.  Full of scary tokenizing code.
1620   #  Like [ '~Top', {'start_line' => $start_line},
1621   #            "I like ",
1622   #            [ 'B', {}, "pie" ],
1623   #            "!"
1624   #       ]
1625
1626   my($self, $para, $start_line, $preserve_space) = @_;
1627
1628   my $treelet = ['~Top', {'start_line' => $start_line},];
1629
1630   unless ($preserve_space || $self->{'preserve_whitespace'}) {
1631     $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1632     $para =~ s/ $//;
1633     $para =~ s/^ //;
1634   }
1635
1636   # Only apparent problem the above code is that N<<  >> turns into
1637   # N<< >>.  But then, word wrapping does that too!  So don't do that!
1638
1639   my @stack;
1640   my @lineage = ($treelet);
1641
1642   DEBUG > 4 and print "Paragraph:\n$para\n\n";
1643
1644   # Here begins our frightening tokenizer RE.  The following regex matches
1645   # text in four main parts:
1646   #
1647   #  * Start-codes.  The first alternative matches C< or C<<, the latter
1648   #    followed by some whitespace.  $1 will hold the entire start code
1649   #    (including any space following a multiple-angle-bracket delimiter),
1650   #    and $2 will hold only the additional brackets past the first in a
1651   #    multiple-bracket delimiter.  length($2) + 1 will be the number of
1652   #    closing brackets we have to find.
1653   #
1654   #  * Closing brackets.  Match some amount of whitespace followed by
1655   #    multiple close brackets.  The logic to see if this closes anything
1656   #    is down below.  Note that in order to parse C<<  >> correctly, we
1657   #    have to use look-behind (?<=\s\s), since the match of the starting
1658   #    code will have consumed the whitespace.
1659   #
1660   #  * A single closing bracket, to close a simple code like C<>.
1661   #
1662   #  * Something that isn't a start or end code.  We have to be careful
1663   #    about accepting whitespace, since perlpodspec says that any whitespace
1664   #    before a multiple-bracket closing delimiter should be ignored.
1665   #
1666   while($para =~
1667     m/\G
1668       (?:
1669         # Match starting codes, including the whitespace following a
1670         # multiple-delimiter start code.  $1 gets the whole start code and
1671         # $2 gets all but one of the <s in the multiple-bracket case.
1672         ([A-Z]<(?:(<+)\s+)?)
1673         |
1674         # Match multiple-bracket end codes.  $3 gets the whitespace that
1675         # should be discarded before an end bracket but kept in other cases
1676         # and $4 gets the end brackets themselves.
1677         (\s+|(?<=\s\s))(>{2,})
1678         |
1679         (\s?>)          # $5: simple end-codes
1680         |
1681         (               # $6: stuff containing no start-codes or end-codes
1682           (?:
1683             [^A-Z\s>]
1684             |
1685             (?:
1686               [A-Z](?!<)
1687             )
1688             |
1689             # whitespace is ok, but we don't want to eat the whitespace before
1690             # a multiple-bracket end code.
1691             # NOTE: we may still have problems with e.g. S<<    >>
1692             (?:
1693               \s(?!\s*>{2,})
1694             )
1695           )+
1696         )
1697       )
1698     /xgo
1699   ) {
1700     DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1701     if(defined $1) {
1702       if(defined $2) {
1703         DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1704         # signal that we're looking for simple unless we're in complex.
1705         if ($stack[-1]) {
1706             # We're in complex already. It's just stuff.
1707             DEBUG > 4 and print " It's just stuff.\n";
1708             push @{ $lineage[-1] }, $1;
1709         } else {
1710             # length of the necessary complex end-code string
1711             push @stack, length($2) + 1;
1712             push @lineage, [ substr($1,0,1), {}, ];  # new node object
1713             push @{ $lineage[-2] }, $lineage[-1];
1714         }
1715       } else {
1716         DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1717         if ($stack[-1]) {
1718             # We're in complex already. It's just stuff.
1719             DEBUG > 4 and print " It's just stuff.\n";
1720             push @{ $lineage[-1] }, $1;
1721         } else {
1722             # signal that we're looking for simple.
1723             push @stack, 0;
1724             push @lineage, [ substr($1,0,1), {}, ];  # new node object
1725             push @{ $lineage[-2] }, $lineage[-1];
1726         }
1727       }
1728     } elsif(defined $4) {
1729       DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1730       # This is where it gets messy...
1731       if(! @stack) {
1732         # We saw " >>>>" but needed nothing.  This is ALL just stuff then.
1733         DEBUG > 4 and print " But it's really just stuff.\n";
1734         push @{ $lineage[-1] }, $3, $4;
1735         next;
1736       } elsif(!$stack[-1]) {
1737         # We saw " >>>>" but needed only ">".  Back pos up.
1738         DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1739         push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1740         pos($para) = pos($para) - length($4) + 1;
1741       } elsif($stack[-1] == length($4)) {
1742         # We found " >>>>", and it was exactly what we needed.  Commonest case.
1743         DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1744       } elsif($stack[-1] < length($4)) {
1745         # We saw " >>>>" but needed only " >>".  Back pos up.
1746         DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1747         pos($para) = pos($para) - length($4) + $stack[-1];
1748       } else {
1749         # We saw " >>>>" but needed " >>>>>>".  So this is all just stuff!
1750         DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1751         push @{ $lineage[-1] }, $3, $4;
1752         next;
1753       }
1754       #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1755
1756       push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1757       # Keep the element from being childless
1758
1759       pop @stack;
1760       pop @lineage;
1761
1762     } elsif(defined $5) {
1763       DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
1764
1765       if(@stack and ! $stack[-1]) {
1766         # We're indeed expecting a simple end-code
1767         DEBUG > 4 and print " It's indeed an end-code.\n";
1768
1769         if(length($5) == 2) { # There was a space there: " >"
1770           push @{ $lineage[-1] }, ' ';
1771         } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1772           push @{ $lineage[-1] }, ''; # keep it from being really childless
1773         }
1774
1775         pop @stack;
1776         pop @lineage;
1777       } else {
1778         DEBUG > 4 and print " It's just stuff.\n";
1779         push @{ $lineage[-1] }, $5;
1780       }
1781
1782     } elsif(defined $6) {
1783       DEBUG > 3 and print "Found stuff \"$6\"\n";
1784       push @{ $lineage[-1] }, $6;
1785
1786     } else {
1787       # should never ever ever ever happen
1788       DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1789       die "SPORK 512512!";
1790     }
1791   }
1792
1793   if(@stack) { # Uhoh, some sequences weren't closed.
1794     my $x= "...";
1795     while(@stack) {
1796       push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1797       # Hmmmmm!
1798
1799       my $code         = (pop @lineage)->[0];
1800       my $ender_length =  pop @stack;
1801       if($ender_length) {
1802         --$ender_length;
1803         $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1804       } else {
1805         $x = $code . "<$x>";
1806       }
1807     }
1808     DEBUG > 1 and print "Unterminated $x sequence\n";
1809     $self->whine($start_line,
1810       "Unterminated $x sequence",
1811     );
1812   }
1813
1814   return $treelet;
1815 }
1816
1817 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1818
1819 sub text_content_of_treelet {  # method: $parser->text_content_of_treelet($lol)
1820   return stringify_lol($_[1]);
1821 }
1822
1823 sub stringify_lol {  # function: stringify_lol($lol)
1824   my $string_form = '';
1825   _stringify_lol( $_[0] => \$string_form );
1826   return $string_form;
1827 }
1828
1829 sub _stringify_lol {  # the real recursor
1830   my($lol, $to) = @_;
1831   use UNIVERSAL ();
1832   for(my $i = 2; $i < @$lol; ++$i) {
1833     if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1834       _stringify_lol( $lol->[$i], $to);  # recurse!
1835     } else {
1836       $$to .= $lol->[$i];
1837     }
1838   }
1839   return;
1840 }
1841
1842 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1843
1844 sub _dump_curr_open { # return a string representation of the stack
1845   my $curr_open = $_[0]{'curr_open'};
1846
1847   return '[empty]' unless @$curr_open;
1848   return join '; ',
1849     map {;
1850            ($_->[0] eq '=for')
1851              ? ( ($_->[1]{'~really'} || '=over')
1852                . ' ' . $_->[1]{'target'})
1853              : $_->[0]
1854         }
1855     @$curr_open
1856   ;
1857 }
1858
1859 ###########################################################################
1860 my %pretty_form = (
1861   "\a" => '\a', # ding!
1862   "\b" => '\b', # BS
1863   "\e" => '\e', # ESC
1864   "\f" => '\f', # FF
1865   "\t" => '\t', # tab
1866   "\cm" => '\cm',
1867   "\cj" => '\cj',
1868   "\n" => '\n', # probably overrides one of either \cm or \cj
1869   '"' => '\"',
1870   '\\' => '\\\\',
1871   '$' => '\\$',
1872   '@' => '\\@',
1873   '%' => '\\%',
1874   '#' => '\\#',
1875 );
1876
1877 sub pretty { # adopted from Class::Classless
1878   # Not the most brilliant routine, but passable.
1879   # Don't give it a cyclic data structure!
1880   my @stuff = @_; # copy
1881   my $x;
1882   my $out =
1883     # join ",\n" .
1884     join ", ",
1885     map {;
1886     if(!defined($_)) {
1887       "undef";
1888     } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1889       $x = "[ " . pretty(@$_) . " ]" ;
1890       $x;
1891     } elsif(ref($_) eq 'SCALAR') {
1892       $x = "\\" . pretty($$_) ;
1893       $x;
1894     } elsif(ref($_) eq 'HASH') {
1895       my $hr = $_;
1896       $x = "{" . join(", ",
1897         map(pretty($_) . '=>' . pretty($hr->{$_}),
1898             sort keys %$hr ) ) . "}" ;
1899       $x;
1900     } elsif(!length($_)) { q{''} # empty string
1901     } elsif(
1902       $_ eq '0' # very common case
1903       or(
1904          m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1905          and $_ ne '-0' # the strange case that that RE lets thru
1906       )
1907     ) { $_;
1908     } else {
1909       if( chr(65) eq 'A' ) {
1910         s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1911          #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1912          <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1913       } else {
1914         # We're in some crazy non-ASCII world!
1915         s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1916          #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1917          <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1918       }
1919       qq{"$_"};
1920     }
1921   } @stuff;
1922   # $out =~ s/\n */ /g if length($out) < 75;
1923   return $out;
1924 }
1925
1926 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1927
1928 # A rather unsubtle method of blowing away all the state information
1929 # from a parser object so it can be reused. Provided as a utility for
1930 # backward compatibilty in Pod::Man, etc. but not recommended for
1931 # general use.
1932
1933 sub reinit {
1934   my $self = shift;
1935   foreach (qw(source_dead source_filename doc_has_started
1936 start_of_pod_block content_seen last_was_blank paras curr_open
1937 line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
1938 Title)) {
1939
1940     delete $self->{$_};
1941   }
1942 }
1943
1944 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1945 1;
1946