Upgrade to podlators 1.13.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5 4
5use vars qw($VERSION @ISA @EXPORT);
d6a466d7 6$VERSION = 1.04;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
54310121 9
10use Carp;
0e4548d5 11use Config;
12use Cwd;
13use File::Spec::Unix;
14use Getopt::Long;
54310121 15
3ec07288 16use locale; # make \w work right in non-ASCII lands
17
54310121 18=head1 NAME
19
7b8d334a 20Pod::Html - module to convert pod files to HTML
54310121 21
22=head1 SYNOPSIS
23
7b8d334a 24 use Pod::Html;
54310121 25 pod2html([options]);
26
27=head1 DESCRIPTION
28
29Converts files from pod format (see L<perlpod>) to HTML format. It
30can automatically generate indexes and cross-references, and it keeps
31a cache of things it knows how to cross-reference.
32
33=head1 ARGUMENTS
34
35Pod::Html takes the following arguments:
36
37=over 4
38
0e4548d5 39=item backlink
40
41 --backlink="Back to Top"
42
43Adds "Back to Top" links in front of every HEAD1 heading (except for
44the first). By default, no backlink are being generated.
45
b42210d7 46=item cachedir
47
48 --cachedir=name
49
50Creates the item and directory caches in the given directory.
51
0e4548d5 52=item css
53
54 --css=stylesheet
55
56Specify the URL of a cascading style sheet.
57
58=item flush
59
60 --flush
61
62Flushes the item and directory caches.
63
64=item header
65
66 --header
67 --noheader
68
69Creates header and footer blocks containing the text of the NAME
70section. By default, no headers are being generated.
71
54310121 72=item help
73
74 --help
75
76Displays the usage message.
77
5a039dd3 78=item htmldir
79
80 --htmldir=name
81
82Sets the directory in which the resulting HTML file is placed. This
29f227c9 83is used to generate relative links to other files. Not passing this
84causes all links to be absolute, since this is the value that tells
85Pod::Html the root of the documentation tree.
5a039dd3 86
54310121 87=item htmlroot
88
89 --htmlroot=name
90
91Sets the base URL for the HTML files. When cross-references are made,
92the HTML root is prepended to the URL.
93
0e4548d5 94=item index
95
96 --index
97 --noindex
98
99Generate an index at the top of the HTML file. This is the default
100behaviour.
101
54310121 102=item infile
103
104 --infile=name
105
106Specify the pod file to convert. Input is taken from STDIN if no
107infile is specified.
108
54310121 109=item libpods
110
111 --libpods=name:...:name
112
113List of page names (eg, "perlfunc") which contain linkable C<=item>s.
114
115=item netscape
116
117 --netscape
0e4548d5 118 --nonetscape
54310121 119
0e4548d5 120Use Netscape HTML directives when applicable. By default, they will
121B<not> be used.
54310121 122
0e4548d5 123=item outfile
54310121 124
0e4548d5 125 --outfile=name
54310121 126
0e4548d5 127Specify the HTML file to create. Output goes to STDOUT if no outfile
128is specified.
54310121 129
0e4548d5 130=item podpath
54310121 131
0e4548d5 132 --podpath=name:...:name
54310121 133
0e4548d5 134Specify which subdirectories of the podroot contain pod files whose
135HTML converted forms can be linked-to in cross-references.
54310121 136
0e4548d5 137=item podroot
54310121 138
0e4548d5 139 --podroot=name
54310121 140
0e4548d5 141Specify the base directory for finding library pods.
54310121 142
0e4548d5 143=item quiet
54310121 144
0e4548d5 145 --quiet
146 --noquiet
54310121 147
0e4548d5 148Don't display I<mostly harmless> warning messages. These messages
149will be displayed by default. But this is not the same as C<verbose>
150mode.
54310121 151
0e4548d5 152=item recurse
54310121 153
0e4548d5 154 --recurse
54310121 155 --norecurse
156
0e4548d5 157Recurse into subdirectories specified in podpath (default behaviour).
54310121 158
159=item title
160
161 --title=title
162
163Specify the title of the resulting HTML file.
164
165=item verbose
166
167 --verbose
0e4548d5 168 --noverbose
54310121 169
0e4548d5 170Display progress messages. By default, they won't be displayed.
34db337b 171
54310121 172=back
173
174=head1 EXAMPLE
175
176 pod2html("pod2html",
177 "--podpath=lib:ext:pod:vms",
178 "--podroot=/usr/src/perl",
179 "--htmlroot=/perl/nmanual",
180 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
181 "--recurse",
182 "--infile=foo.pod",
183 "--outfile=/perl/nmanual/foo.html");
184
34db337b 185=head1 ENVIRONMENT
186
187Uses $Config{pod2html} to setup default options.
188
54310121 189=head1 AUTHOR
190
191Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
192
54310121 193=head1 SEE ALSO
194
195L<perlpod>
196
197=head1 COPYRIGHT
198
199This program is distributed under the Artistic License.
200
201=cut
202
b42210d7 203my $cachedir = "."; # The directory to which item and directory
204 # caches will be written.
1f763251 205my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
206my $dircache = "pod2htmd$cache_ext";
207my $itemcache = "pod2htmi$cache_ext";
54310121 208
209my @begin_stack = (); # begin/end stack
210
29f227c9 211my @libpods = (); # files to search for links from C<> directives
212my $htmlroot = "/"; # http-server base directory from which all
54310121 213 # relative paths in $podpath stem.
5a039dd3 214my $htmldir = ""; # The directory to which the html pages
215 # will (eventually) be written.
54310121 216my $htmlfile = ""; # write to stdout by default
29f227c9 217my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 218 # refer to this file. This is only used
219 # to make relative urls that point to
220 # other files.
54310121 221my $podfile = ""; # read from stdin by default
222my @podpath = (); # list of directories containing library pods.
223my $podroot = "."; # filesystem base directory from which all
224 # relative paths in $podpath stem.
34db337b 225my $css = ''; # Cascading style sheet
54310121 226my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 227my $quiet = 0; # not quiet by default
54310121 228my $verbose = 0; # not verbose by default
229my $doindex = 1; # non-zero if we should generate an index
0e4548d5 230my $backlink = ''; # text for "back to top" links
54310121 231my $listlevel = 0; # current list depth
2a28b791 232my @listend = (); # the text to use to end the list.
233my $after_lpar = 0; # set to true after a par in an =item
54310121 234my $ignore = 1; # whether or not to format text. we don't
235 # format text until we hit our first pod
236 # directive.
237
238my %items_named = (); # for the multiples of the same item in perlfunc
239my @items_seen = ();
240my $netscape = 0; # whether or not to use netscape directives.
241my $title; # title to give the pod(s)
34db337b 242my $header = 0; # produce block header/footer
54310121 243my $top = 1; # true if we are at the top of the doc. used
244 # to prevent the first <HR> directive.
245my $paragraph; # which paragraph we're processing (used
246 # for error messages)
2a28b791 247my $ptQuote = 0; # status of double-quote conversion
54310121 248my %pages = (); # associative array used to find the location
249 # of pages referenced by L<> links.
250my %sections = (); # sections within this page
251my %items = (); # associative array used to find the location
252 # of =item directives referenced by C<> links
2a28b791 253my %local_items = (); # local items - avoid destruction of %items
39e571d4 254my $Is83; # is dos with short filenames (8.3)
255
54310121 256sub init_globals {
04f720db 257$dircache = "pod2htmd$cache_ext";
258$itemcache = "pod2htmi$cache_ext";
54310121 259
260@begin_stack = (); # begin/end stack
261
262@libpods = (); # files to search for links from C<> directives
263$htmlroot = "/"; # http-server base directory from which all
264 # relative paths in $podpath stem.
67398a75 265$htmldir = ""; # The directory to which the html pages
266 # will (eventually) be written.
54310121 267$htmlfile = ""; # write to stdout by default
268$podfile = ""; # read from stdin by default
269@podpath = (); # list of directories containing library pods.
270$podroot = "."; # filesystem base directory from which all
271 # relative paths in $podpath stem.
34db337b 272$css = ''; # Cascading style sheet
54310121 273$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 274$quiet = 0; # not quiet by default
54310121 275$verbose = 0; # not verbose by default
276$doindex = 1; # non-zero if we should generate an index
0e4548d5 277$backlink = ''; # text for "back to top" links
54310121 278$listlevel = 0; # current list depth
2a28b791 279@listend = (); # the text to use to end the list.
280$after_lpar = 0; # set to true after a par in an =item
54310121 281$ignore = 1; # whether or not to format text. we don't
282 # format text until we hit our first pod
283 # directive.
284
285@items_seen = ();
286%items_named = ();
287$netscape = 0; # whether or not to use netscape directives.
34db337b 288$header = 0; # produce block header/footer
54310121 289$title = ''; # title to give the pod(s)
290$top = 1; # true if we are at the top of the doc. used
291 # to prevent the first <HR> directive.
292$paragraph = ''; # which paragraph we're processing (used
293 # for error messages)
54310121 294%sections = (); # sections within this page
3e3baf6d 295
296# These are not reinitialised here but are kept as a cache.
297# See get_cache and related cache management code.
298#%pages = (); # associative array used to find the location
299 # of pages referenced by L<> links.
300#%items = (); # associative array used to find the location
54310121 301 # of =item directives referenced by C<> links
2a28b791 302%local_items = ();
39e571d4 303$Is83=$^O eq 'dos';
54310121 304}
305
2a28b791 306#
307# clean_data: global clean-up of pod data
308#
309sub clean_data($){
310 my( $dataref ) = @_;
bf202ccd 311 for my $i ( 0..$#{$dataref} ) {
2a28b791 312 ${$dataref}[$i] =~ s/\s+\Z//;
313
314 # have a look for all-space lines
4135c0a0 315 if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
2a28b791 316 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
317 splice( @$dataref, $i, 1, @chunks );
318 }
319 }
320}
321
322
54310121 323sub pod2html {
324 local(@ARGV) = @_;
325 local($/);
326 local $_;
327
328 init_globals();
329
39e571d4 330 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
331
54310121 332 # cache of %pages and %items from last time we ran pod2html
54310121 333
334 #undef $opt_help if defined $opt_help;
335
336 # parse the command-line parameters
337 parse_command_line();
338
339 # set some variables to their default values if necessary
340 local *POD;
341 unless (@ARGV && $ARGV[0]) {
342 $podfile = "-" unless $podfile; # stdin
343 open(POD, "<$podfile")
344 || die "$0: cannot open $podfile file for input: $!\n";
345 } else {
346 $podfile = $ARGV[0]; # XXX: might be more filenames
347 *POD = *ARGV;
348 }
349 $htmlfile = "-" unless $htmlfile; # stdout
350 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
fe6f1558 351 $htmldir =~ s#/\z## ; # so we don't get a //
29f227c9 352 if ( $htmlroot eq ''
353 && defined( $htmldir )
354 && $htmldir ne ''
355 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
356 )
5a039dd3 357 {
29f227c9 358 # Set the 'base' url for this file, so that we can use it
359 # as the location from which to calculate relative links
360 # to other files. If this is '', then absolute links will
361 # be used throughout.
362 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 363 }
54310121 364
365 # read the pod a paragraph at a time
366 warn "Scanning for sections in input file(s)\n" if $verbose;
367 $/ = "";
368 my @poddata = <POD>;
369 close(POD);
2a28b791 370 clean_data( \@poddata );
54310121 371
372 # scan the pod for =head[1-6] directives and build an index
373 my $index = scan_headings(\%sections, @poddata);
374
3e3baf6d 375 unless($index) {
31e56455 376 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 377 }
378
54310121 379 # open the output file
380 open(HTML, ">$htmlfile")
381 || die "$0: cannot open $htmlfile file for output: $!\n";
382
d011ffae 383 # put a title in the HTML file if one wasn't specified
384 if ($title eq '') {
385 TITLE_SEARCH: {
bf202ccd 386 for my $i ( 0..$#poddata ) {
d011ffae 387 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
388 for my $para ( @poddata[$i, $i+1] ) {
389 last TITLE_SEARCH
390 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
391 }
392 }
54310121 393
d011ffae 394 }
395 }
396 }
fe6f1558 397 if (!$title and $podfile =~ /\.pod\z/) {
3e3baf6d 398 # probably a split pod so take first =head[12] as title
bf202ccd 399 for my $i ( 0..$#poddata ) {
3e3baf6d 400 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
401 }
402 warn "adopted '$title' as title for $podfile\n"
403 if $verbose and $title;
404 }
7b8d334a 405 if ($title) {
406 $title =~ s/\s*\(.*\)//;
407 } else {
34db337b 408 warn "$0: no title for $podfile" unless $quiet;
fe6f1558 409 $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
54310121 410 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 411 warn "using $title" if $verbose;
54310121 412 }
34db337b 413 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
414 $csslink =~ s,\\,/,g;
415 $csslink =~ s,(/.):,$1|,;
416
417 my $block = $header ? <<END_OF_BLOCK : '';
418<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
419<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
420<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
421</TD></TR>
422</TABLE>
423END_OF_BLOCK
424
54310121 425 print HTML <<END_OF_HEAD;
7b8d334a 426<HTML>
427<HEAD>
34db337b 428<TITLE>$title</TITLE>$csslink
7b8d334a 429</HEAD>
54310121 430
7b8d334a 431<BODY>
34db337b 432$block
54310121 433END_OF_HEAD
434
3e3baf6d 435 # load/reload/validate/cache %pages and %items
436 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 437
438 # scan the pod for =item directives
2a28b791 439 scan_items( \%local_items, "", @poddata);
54310121 440
441 # put an index at the top of the file. note, if $doindex is 0 we
442 # still generate an index, but surround it with an html comment.
443 # that way some other program can extract it if desired.
444 $index =~ s/--+/-/g;
2a28b791 445 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 446 print HTML "<!-- INDEX BEGIN -->\n";
447 print HTML "<!--\n" unless $doindex;
448 print HTML $index;
449 print HTML "-->\n" unless $doindex;
450 print HTML "<!-- INDEX END -->\n\n";
31e56455 451 print HTML "<HR>\n" if $doindex and $index;
54310121 452
453 # now convert this file
2a28b791 454 my $after_item; # set to true after an =item
455 warn "Converting input file $podfile\n" if $verbose;
456 foreach my $i (0..$#poddata){
457 $ptQuote = 0; # status of quote conversion
458
54310121 459 $_ = $poddata[$i];
460 $paragraph = $i+1;
461 if (/^(=.*)/s) { # is it a pod directive?
462 $ignore = 0;
2a28b791 463 $after_item = 0;
54310121 464 $_ = $1;
465 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
466 process_begin($1, $2);
467 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
468 process_end($1, $2);
469 } elsif (/^=cut/) { # =cut
470 process_cut();
471 } elsif (/^=pod/) { # =pod
472 process_pod();
473 } else {
474 next if @begin_stack && $begin_stack[-1] ne 'html';
475
7b8d334a 476 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 477 process_head( $1, $2, $doindex && $index );
478 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
479 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 480 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791 481 process_item( $1 );
482 $after_item = 1;
54310121 483 } elsif (/^=over\s*(.*)/) { # =over N
484 process_over();
485 } elsif (/^=back/) { # =back
486 process_back();
a45bd81d 487 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121 488 process_for($1,$2);
489 } else {
490 /^=(\S*)\s*/;
491 warn "$0: $podfile: unknown pod directive '$1' in "
492 . "paragraph $paragraph. ignoring.\n";
493 }
494 }
495 $top = 0;
496 }
497 else {
498 next if $ignore;
499 next if @begin_stack && $begin_stack[-1] ne 'html';
adbecb01 500 print HTML and next if @begin_stack && $begin_stack[-1] eq 'html';
54310121 501 my $text = $_;
2a28b791 502 if( $text =~ /\A\s+/ ){
503 process_pre( \$text );
504 print HTML "<PRE>\n$text</PRE>\n";
505
506 } else {
507 process_text( \$text );
508
509 # experimental: check for a paragraph where all lines
510 # have some ...\t...\t...\n pattern
511 if( $text =~ /\t/ ){
512 my @lines = split( "\n", $text );
513 if( @lines > 1 ){
514 my $all = 2;
515 foreach my $line ( @lines ){
516 if( $line =~ /\S/ && $line !~ /\t/ ){
517 $all--;
518 last if $all == 0;
519 }
520 }
521 if( $all > 0 ){
522 $text =~ s/\t+/<TD>/g;
523 $text =~ s/^/<TR><TD>/gm;
524 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
525 $text . '</TABLE>';
526 }
527 }
528 }
529 ## end of experimental
530
531 if( $after_item ){
532 print HTML "$text\n";
533 $after_lpar = 1;
534 } else {
535 print HTML "<P>$text</P>\n";
536 }
537 }
538 $after_item = 0;
54310121 539 }
540 }
541
542 # finish off any pending directives
543 finish_list();
2a28b791 544
545 # link to page index
0e4548d5 546 print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n"
547 if $doindex and $index and $backlink;
2a28b791 548
54310121 549 print HTML <<END_OF_TAIL;
34db337b 550$block
7b8d334a 551</BODY>
54310121 552
7b8d334a 553</HTML>
54310121 554END_OF_TAIL
555
556 # close the html file
557 close(HTML);
558
559 warn "Finished\n" if $verbose;
560}
561
562##############################################################################
563
564my $usage; # see below
565sub usage {
566 my $podfile = shift;
567 warn "$0: $podfile: @_\n" if @_;
568 die $usage;
569}
570
571$usage =<<END_OF_USAGE;
572Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
573 --podpath=<name>:...:<name> --podroot=<name>
574 --libpods=<name>:...:<name> --recurse --verbose --index
b42210d7 575 --netscape --norecurse --noindex --cachedir=<name>
54310121 576
0e4548d5 577 --backlink - set text for "back to top" links (default: none).
b42210d7 578 --cachedir - directory for the item and directory cache files.
0e4548d5 579 --css - stylesheet URL
580 --flush - flushes the item and directory caches.
581 --[no]header - produce block header/footer (default is no headers).
582 --help - prints this message.
583 --htmldir - directory for resulting HTML files.
584 --htmlroot - http-server base directory from which all relative paths
585 in podpath stem (default is /).
586 --[no]index - generate an index at the top of the resulting html
587 (default behaviour).
588 --infile - filename for the pod to convert (input taken from stdin
589 by default).
590 --libpods - colon-separated list of pages to search for =item pod
591 directives in as targets of C<> and implicit links (empty
592 by default). note, these are not filenames, but rather
593 page names like those that appear in L<> links.
594 --[no]netscape - will use netscape html directives when applicable.
595 (default is not to use them).
596 --outfile - filename for the resulting html file (output sent to
597 stdout by default).
598 --podpath - colon-separated list of directories containing library
599 pods (empty by default).
600 --podroot - filesystem base directory from which all relative paths
601 in podpath stem (default is .).
602 --[no]quiet - supress some benign warning messages (default is off).
603 --[no]recurse - recurse on those subdirectories listed in podpath
604 (default behaviour).
605 --title - title that will appear in resulting html file.
606 --[no]verbose - self-explanatory (off by default).
54310121 607
608END_OF_USAGE
609
610sub parse_command_line {
b42210d7 611 my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
612 $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
613 $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
614 $opt_recurse,$opt_title,$opt_verbose);
0e4548d5 615
34db337b 616 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 617 my $result = GetOptions(
0e4548d5 618 'backlink=s' => \$opt_backlink,
b42210d7 619 'cachedir=s' => \$opt_cachedir,
0e4548d5 620 'css=s' => \$opt_css,
29f227c9 621 'flush' => \$opt_flush,
0e4548d5 622 'header!' => \$opt_header,
29f227c9 623 'help' => \$opt_help,
624 'htmldir=s' => \$opt_htmldir,
54310121 625 'htmlroot=s' => \$opt_htmlroot,
29f227c9 626 'index!' => \$opt_index,
54310121 627 'infile=s' => \$opt_infile,
628 'libpods=s' => \$opt_libpods,
629 'netscape!' => \$opt_netscape,
630 'outfile=s' => \$opt_outfile,
631 'podpath=s' => \$opt_podpath,
632 'podroot=s' => \$opt_podroot,
0e4548d5 633 'quiet!' => \$opt_quiet,
54310121 634 'recurse!' => \$opt_recurse,
635 'title=s' => \$opt_title,
0e4548d5 636 'verbose!' => \$opt_verbose,
54310121 637 );
638 usage("-", "invalid parameters") if not $result;
639
640 usage("-") if defined $opt_help; # see if the user asked for help
641 $opt_help = ""; # just to make -w shut-up.
642
54310121 643 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
644 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
645
0e4548d5 646 $backlink = $opt_backlink if defined $opt_backlink;
b42210d7 647 $cachedir = $opt_cachedir if defined $opt_cachedir;
0e4548d5 648 $css = $opt_css if defined $opt_css;
649 $header = $opt_header if defined $opt_header;
650 $htmldir = $opt_htmldir if defined $opt_htmldir;
651 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
652 $doindex = $opt_index if defined $opt_index;
653 $podfile = $opt_infile if defined $opt_infile;
654 $netscape = $opt_netscape if defined $opt_netscape;
655 $htmlfile = $opt_outfile if defined $opt_outfile;
656 $podroot = $opt_podroot if defined $opt_podroot;
657 $quiet = $opt_quiet if defined $opt_quiet;
658 $recurse = $opt_recurse if defined $opt_recurse;
659 $title = $opt_title if defined $opt_title;
660 $verbose = $opt_verbose if defined $opt_verbose;
661
54310121 662 warn "Flushing item and directory caches\n"
663 if $opt_verbose && defined $opt_flush;
b42210d7 664 $dircache = "$cachedir/pod2htmd$cache_ext";
665 $itemcache = "$cachedir/pod2htmi$cache_ext";
54310121 666 unlink($dircache, $itemcache) if defined $opt_flush;
54310121 667}
668
3e3baf6d 669
670my $saved_cache_key;
671
672sub get_cache {
673 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
674 my @cache_key_args = @_;
675
676 # A first-level cache:
677 # Don't bother reading the cache files if they still apply
678 # and haven't changed since we last read them.
679
680 my $this_cache_key = cache_key(@cache_key_args);
681
682 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
683
684 # load the cache of %pages and %items if possible. $tests will be
685 # non-zero if successful.
686 my $tests = 0;
687 if (-f $dircache && -f $itemcache) {
688 warn "scanning for item cache\n" if $verbose;
689 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
690 }
691
692 # if we didn't succeed in loading the cache then we must (re)build
693 # %pages and %items.
694 if (!$tests) {
695 warn "scanning directories in pod-path\n" if $verbose;
696 scan_podpath($podroot, $recurse, 0);
697 }
698 $saved_cache_key = cache_key(@cache_key_args);
699}
700
701sub cache_key {
702 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
703 return join('!', $dircache, $itemcache, $recurse,
29f227c9 704 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 705}
706
54310121 707#
3e3baf6d 708# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 709# are valid caches of %pages and %items. if they are valid then it loads
710# them and returns a non-zero value.
711#
3e3baf6d 712sub load_cache {
54310121 713 my($dircache, $itemcache, $podpath, $podroot) = @_;
714 my($tests);
715 local $_;
716
717 $tests = 0;
718
719 open(CACHE, "<$itemcache") ||
720 die "$0: error opening $itemcache for reading: $!\n";
721 $/ = "\n";
722
723 # is it the same podpath?
724 $_ = <CACHE>;
725 chomp($_);
3e3baf6d 726 $tests++ if (join(":", @$podpath) eq $_);
54310121 727
728 # is it the same podroot?
729 $_ = <CACHE>;
730 chomp($_);
731 $tests++ if ($podroot eq $_);
732
733 # load the cache if its good
734 if ($tests != 2) {
735 close(CACHE);
54310121 736 return 0;
737 }
738
739 warn "loading item cache\n" if $verbose;
740 while (<CACHE>) {
741 /(.*?) (.*)$/;
742 $items{$1} = $2;
743 }
744 close(CACHE);
745
746 warn "scanning for directory cache\n" if $verbose;
747 open(CACHE, "<$dircache") ||
748 die "$0: error opening $dircache for reading: $!\n";
749 $/ = "\n";
750 $tests = 0;
751
752 # is it the same podpath?
753 $_ = <CACHE>;
754 chomp($_);
3e3baf6d 755 $tests++ if (join(":", @$podpath) eq $_);
54310121 756
757 # is it the same podroot?
758 $_ = <CACHE>;
759 chomp($_);
760 $tests++ if ($podroot eq $_);
761
762 # load the cache if its good
763 if ($tests != 2) {
764 close(CACHE);
54310121 765 return 0;
766 }
767
768 warn "loading directory cache\n" if $verbose;
769 while (<CACHE>) {
770 /(.*?) (.*)$/;
771 $pages{$1} = $2;
772 }
773
774 close(CACHE);
775
776 return 1;
777}
778
779#
780# scan_podpath - scans the directories specified in @podpath for directories,
781# .pod files, and .pm files. it also scans the pod files specified in
782# @libpods for =item directives.
783#
784sub scan_podpath {
3e3baf6d 785 my($podroot, $recurse, $append) = @_;
54310121 786 my($pwd, $dir);
787 my($libpod, $dirname, $pod, @files, @poddata);
788
3e3baf6d 789 unless($append) {
790 %items = ();
791 %pages = ();
792 }
793
54310121 794 # scan each directory listed in @podpath
795 $pwd = getcwd();
796 chdir($podroot)
797 || die "$0: error changing to directory $podroot: $!\n";
798 foreach $dir (@podpath) {
799 scan_dir($dir, $recurse);
800 }
801
802 # scan the pods listed in @libpods for =item directives
803 foreach $libpod (@libpods) {
804 # if the page isn't defined then we won't know where to find it
805 # on the system.
806 next unless defined $pages{$libpod} && $pages{$libpod};
807
808 # if there is a directory then use the .pod and .pm files within it.
29f227c9 809 # NOTE: Only finds the first so-named directory in the tree.
810# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
811 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 812 # find all the .pod and .pm files within the directory
813 $dirname = $1;
814 opendir(DIR, $dirname) ||
815 die "$0: error opening directory $dirname: $!\n";
fe6f1558 816 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121 817 closedir(DIR);
818
819 # scan each .pod and .pm file for =item directives
820 foreach $pod (@files) {
821 open(POD, "<$dirname/$pod") ||
822 die "$0: error opening $dirname/$pod for input: $!\n";
823 @poddata = <POD>;
824 close(POD);
2a28b791 825 clean_data( \@poddata );
54310121 826
2a28b791 827 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 828 }
829
830 # use the names of files as =item directives too.
2a28b791 831### Don't think this should be done this way - confuses issues.(WL)
832### foreach $pod (@files) {
833### $pod =~ /^(.*)(\.pod|\.pm)$/;
834### $items{$1} = "$dirname/$1.html" if $1;
835### }
54310121 836 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
837 $pages{$libpod} =~ /([^:]*\.pm):/) {
838 # scan the .pod or .pm file for =item directives
839 $pod = $1;
840 open(POD, "<$pod") ||
841 die "$0: error opening $pod for input: $!\n";
842 @poddata = <POD>;
843 close(POD);
2a28b791 844 clean_data( \@poddata );
54310121 845
2a28b791 846 scan_items( \%items, "$pod", @poddata);
54310121 847 } else {
848 warn "$0: shouldn't be here (line ".__LINE__."\n";
849 }
850 }
851 @poddata = (); # clean-up a bit
852
853 chdir($pwd)
854 || die "$0: error changing to directory $pwd: $!\n";
855
856 # cache the item list for later use
857 warn "caching items for later use\n" if $verbose;
858 open(CACHE, ">$itemcache") ||
859 die "$0: error open $itemcache for writing: $!\n";
860
861 print CACHE join(":", @podpath) . "\n$podroot\n";
862 foreach my $key (keys %items) {
863 print CACHE "$key $items{$key}\n";
864 }
865
866 close(CACHE);
867
868 # cache the directory list for later use
869 warn "caching directories for later use\n" if $verbose;
870 open(CACHE, ">$dircache") ||
871 die "$0: error open $dircache for writing: $!\n";
872
873 print CACHE join(":", @podpath) . "\n$podroot\n";
874 foreach my $key (keys %pages) {
875 print CACHE "$key $pages{$key}\n";
876 }
877
878 close(CACHE);
879}
880
881#
882# scan_dir - scans the directory specified in $dir for subdirectories, .pod
883# files, and .pm files. notes those that it finds. this information will
884# be used later in order to figure out where the pages specified in L<>
885# links are on the filesystem.
886#
887sub scan_dir {
888 my($dir, $recurse) = @_;
889 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
890 local $_;
891
892 @subdirs = ();
893 @pods = ();
894
895 opendir(DIR, $dir) ||
896 die "$0: error opening directory $dir: $!\n";
897 while (defined($_ = readdir(DIR))) {
898 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
899 $pages{$_} = "" unless defined $pages{$_};
900 $pages{$_} .= "$dir/$_:";
901 push(@subdirs, $_);
fe6f1558 902 } elsif (/\.pod\z/) { # .pod
903 s/\.pod\z//;
54310121 904 $pages{$_} = "" unless defined $pages{$_};
905 $pages{$_} .= "$dir/$_.pod:";
906 push(@pods, "$dir/$_.pod");
d3f9d0fb 907 } elsif (/\.html\z/) { # .html
908 s/\.html\z//;
909 $pages{$_} = "" unless defined $pages{$_};
910 $pages{$_} .= "$dir/$_.pod:";
fe6f1558 911 } elsif (/\.pm\z/) { # .pm
912 s/\.pm\z//;
54310121 913 $pages{$_} = "" unless defined $pages{$_};
914 $pages{$_} .= "$dir/$_.pm:";
915 push(@pods, "$dir/$_.pm");
916 }
917 }
918 closedir(DIR);
919
920 # recurse on the subdirectories if necessary
921 if ($recurse) {
922 foreach my $subdir (@subdirs) {
923 scan_dir("$dir/$subdir", $recurse);
924 }
925 }
926}
927
928#
929# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
930# build an index.
931#
932sub scan_headings {
933 my($sections, @data) = @_;
2a28b791 934 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 935
be173d55 936 # here we need local $ignore = 0;
937 # unfortunately, we can't have it, because $ignore is lexical
938 $ignore = 0;
939
54310121 940 $listdepth = 0;
941 $index = "";
942
943 # scan for =head directives, note their name, and build an index
944 # pointing to each of them.
945 foreach my $line (@data) {
bb9460ed 946 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 947 ($tag, $which_head, $otitle) = ($1,$2,$3);
948
949 my $title = depod( $otitle );
950 my $name = htmlify( $title );
951 $$sections{$name} = 1;
952 $title = process_text( \$otitle );
54310121 953
102c538a 954 while ($which_head != $listdepth) {
955 if ($which_head > $listdepth) {
956 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
957 $listdepth++;
958 } elsif ($which_head < $listdepth) {
959 $listdepth--;
960 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
961 }
54310121 962 }
54310121 963
964 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 965 "<A HREF=\"#" . $name . "\">" .
966 $title . "</A></LI>";
54310121 967 }
968 }
969
970 # finish off the lists
971 while ($listdepth--) {
972 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
973 }
974
975 # get rid of bogus lists
976 $index =~ s,\t*<UL>\s*</UL>\n,,g;
977
bb9460ed 978 $ignore = 1; # restore old value;
be173d55 979
54310121 980 return $index;
981}
982
983#
984# scan_items - scans the pod specified by $pod for =item directives. we
985# will use this information later on in resolving C<> links.
986#
987sub scan_items {
2a28b791 988 my( $itemref, $pod, @poddata ) = @_;
54310121 989 my($i, $item);
990 local $_;
991
fe6f1558 992 $pod =~ s/\.pod\z//;
54310121 993 $pod .= ".html" if $pod;
994
995 foreach $i (0..$#poddata) {
2a28b791 996 my $txt = depod( $poddata[$i] );
997
998 # figure out what kind of item it is.
999 # Build string for referencing this item.
1000 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1001 next unless $1;
1002 $item = $1;
1003 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1004 $item = $1;
1005 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1006 $item = $1;
1007 } else {
1008 next;
54310121 1009 }
2a28b791 1010 my $fid = fragment_id( $item );
1011 $$itemref{$fid} = "$pod" if $fid;
54310121 1012 }
1013}
1014
1015#
1016# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1017#
1018sub process_head {
2a28b791 1019 my($tag, $heading, $hasindex) = @_;
54310121 1020
1021 # figure out the level of the =head
1022 $tag =~ /head([1-6])/;
1023 my $level = $1;
1024
2a28b791 1025 if( $listlevel ){
1026 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
1027 while( $listlevel ){
1028 process_back();
1029 }
1030 }
1031
1032 print HTML "<P>\n";
1033 if( $level == 1 && ! $top ){
0e4548d5 1034 print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"
1035 if $hasindex and $backlink;
2a28b791 1036 print HTML "<HR>\n"
1037 }
1038
1039 my $name = htmlify( depod( $heading ) );
1040 my $convert = process_text( \$heading );
1041 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1042}
1043
2a28b791 1044
54310121 1045#
2a28b791 1046# emit_item_tag - print an =item's text
1047# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1048#
2a28b791 1049my $EmittedItem;
1050
1051sub emit_item_tag($$$){
1052 my( $otext, $text, $compact ) = @_;
1053 my $item = fragment_id( $text );
54310121 1054
2a28b791 1055 $EmittedItem = $item;
1056 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1057
2a28b791 1058 print HTML '<STRONG>';
1059 if ($items_named{$item}++) {
1060 print HTML process_text( \$otext );
1061 } else {
1062 my $name = 'item_' . $item;
1063 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1064 }
1065 print HTML "</STRONG><BR>\n";
1066 undef( $EmittedItem );
1067}
1068
1069sub emit_li {
1070 my( $tag ) = @_;
1071 if( $items_seen[$listlevel]++ == 0 ){
1072 push( @listend, "</$tag>" );
1073 print HTML "<$tag>\n";
1074 }
1075 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1076}
1077
1078#
1079# process_item - convert a pod item tag and convert it to HTML format.
1080#
1081sub process_item {
1082 my( $otext ) = @_;
54310121 1083
1084 # lots of documents start a list without doing an =over. this is
1085 # bad! but, the proper thing to do seems to be to just assume
1086 # they did do an =over. so warn them once and then continue.
2a28b791 1087 if( $listlevel == 0 ){
1088 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1089 process_over();
1090 }
54310121 1091
2a28b791 1092 # formatting: insert a paragraph if preceding item has >1 paragraph
1093 if( $after_lpar ){
1094 print HTML "<P></P>\n";
1095 $after_lpar = 0;
1096 }
54310121 1097
1098 # remove formatting instructions from the text
2a28b791 1099 my $text = depod( $otext );
1100
1101 # all the list variants:
1102 if( $text =~ /\A\*/ ){ # bullet
1103 emit_li( 'UL' );
1104 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1105 my $tag = $1;
1106 $otext =~ s/\A\*\s+//;
1107 emit_item_tag( $otext, $tag, 1 );
54310121 1108 }
1109
2a28b791 1110 } elsif( $text =~ /\A\d+/ ){ # numbered list
1111 emit_li( 'OL' );
1112 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1113 my $tag = $1;
1114 $otext =~ s/\A\d+\.?\s*//;
1115 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1116 }
54310121 1117
2a28b791 1118 } else { # definition list
1119 emit_li( 'DL' );
1120 if ($text =~ /\A(.+)\Z/s ){ # should have text
1121 emit_item_tag( $otext, $text, 1 );
7b8d334a 1122 }
54310121 1123 print HTML '<DD>';
1124 }
54310121 1125 print HTML "\n";
1126}
1127
1128#
2a28b791 1129# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1130#
1131sub process_over {
1132 # start a new list
1133 $listlevel++;
2a28b791 1134 push( @items_seen, 0 );
1135 $after_lpar = 0;
54310121 1136}
1137
1138#
1139# process_back - process a pod back tag and convert it to HTML format.
1140#
1141sub process_back {
2a28b791 1142 if( $listlevel == 0 ){
1143 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1144 return;
1145 }
54310121 1146
1147 # close off the list. note, I check to see if $listend[$listlevel] is
1148 # defined because an =item directive may have never appeared and thus
1149 # $listend[$listlevel] may have never been initialized.
1150 $listlevel--;
2a28b791 1151 if( defined $listend[$listlevel] ){
1152 print HTML '<P></P>' if $after_lpar;
1153 print HTML $listend[$listlevel];
1154 print HTML "\n";
1155 pop( @listend );
1156 }
1157 $after_lpar = 0;
54310121 1158
2a28b791 1159 # clean up item count
1160 pop( @items_seen );
54310121 1161}
1162
1163#
2a28b791 1164# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1165#
1166sub process_cut {
1167 $ignore = 1;
1168}
1169
1170#
d1be9408 1171# process_pod - process a pod tag, thus stop ignoring pod directives
2a28b791 1172# until we see a corresponding cut.
54310121 1173#
1174sub process_pod {
1175 # no need to set $ignore to 0 cause the main loop did it
1176}
1177
1178#
2a28b791 1179# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1180# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1181#
1182sub process_for {
1183 my($whom, $text) = @_;
1184 if ( $whom =~ /^(pod2)?html$/i) {
1185 print HTML $text;
c4d9b39d 1186 } elsif ($whom =~ /^illustration$/i) {
1187 1 while chomp $text;
1188 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1189 $text .= $ext, last if -r "$text$ext";
1190 }
1191 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1192 }
54310121 1193}
1194
1195#
1196# process_begin - process a =begin pod tag. this pushes
1197# whom we're beginning on the begin stack. if there's a
1198# begin stack, we only print if it us.
1199#
1200sub process_begin {
1201 my($whom, $text) = @_;
1202 $whom = lc($whom);
1203 push (@begin_stack, $whom);
1204 if ( $whom =~ /^(pod2)?html$/) {
1205 print HTML $text if $text;
1206 }
1207}
1208
1209#
1210# process_end - process a =end pod tag. pop the
1211# begin stack. die if we're mismatched.
1212#
1213sub process_end {
1214 my($whom, $text) = @_;
1215 $whom = lc($whom);
1216 if ($begin_stack[-1] ne $whom ) {
1217 die "Unmatched begin/end at chunk $paragraph\n"
1218 }
2a28b791 1219 pop( @begin_stack );
54310121 1220}
1221
1222#
2a28b791 1223# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1224#
2a28b791 1225sub process_pre {
1226 my( $text ) = @_;
1227 my( $rest );
54310121 1228 return if $ignore;
1229
54310121 1230 $rest = $$text;
1231
2a28b791 1232 # insert spaces in place of tabs
1233 $rest =~ s#.*#
be173d55 1234 my $line = $&;
1235 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1236 $line;
1237 #eg;
54310121 1238
2a28b791 1239 # convert some special chars to HTML escapes
1240 $rest =~ s/&/&amp;/g;
1241 $rest =~ s/</&lt;/g;
1242 $rest =~ s/>/&gt;/g;
1243 $rest =~ s/"/&quot;/g;
1244
1245 # try and create links for all occurrences of perl.* within
1246 # the preformatted text.
1247 $rest =~ s{
1248 (\s*)(perl\w+)
1249 }{
1250 if ( defined $pages{$2} ){ # is a link
1251 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1252 } elsif (defined $pages{dosify($2)}) { # is a link
1253 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1254 } else {
1255 "$1$2";
1256 }
1257 }xeg;
1258 $rest =~ s{
1259 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1260 }{
1261 my $url ;
1262 if ( $htmlfileurl ne '' ){
1263 # Here, we take advantage of the knowledge
1264 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1265 # Since $htmlroot eq '', we need to prepend $htmldir
1266 # on the fron of the link to get the absolute path
1267 # of the link's target. We check for a leading '/'
1268 # to avoid corrupting links that are #, file:, etc.
1269 my $old_url = $3 ;
1270 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1271 $url = relativize_url( "$old_url.html", $htmlfileurl );
1272 } else {
1273 $url = "$3.html" ;
1274 }
1275 "$1$url" ;
1276 }xeg;
1277
1278 # Look for embedded URLs and make them into links. We don't
1279 # relativize them since they are best left as the author intended.
1280
1281 my $urls = '(' . join ('|', qw{
54310121 1282 http
1283 telnet
1284 mailto
1285 news
1286 gopher
1287 file
1288 wais
1289 ftp
1290 } )
1291 . ')';
1292
2a28b791 1293 my $ltrs = '\w';
1294 my $gunk = '/#~:.?+=&%@!\-';
1295 my $punc = '.:?\-';
1296 my $any = "${ltrs}${gunk}${punc}";
54310121 1297
2a28b791 1298 $rest =~ s{
54310121 1299 \b # start at word boundary
1300 ( # begin $1 {
29f227c9 1301 $urls : # need resource and a colon
1302 (?!:) # Ignore File::, among others.
54310121 1303 [$any] +? # followed by on or more
1304 # of any valid character, but
1305 # be conservative and take only
1306 # what you need to....
1307 ) # end $1 }
1308 (?= # look-ahead non-consumptive assertion
1309 [$punc]* # either 0 or more puntuation
1310 [^$any] # followed by a non-url char
1311 | # or else
1312 $ # then end of the string
1313 )
1314 }{<A HREF="$1">$1</A>}igox;
1315
2a28b791 1316 # text should be as it is (verbatim)
1317 $$text = $rest;
1318}
54310121 1319
54310121 1320
2a28b791 1321#
1322# pure text processing
1323#
1324# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1325# we don't want this to happen within IS
1326#
1327sub pure_text($){
1328 my $text = shift();
1329 process_puretext( $text, \$ptQuote, 1 );
54310121 1330}
1331
2a28b791 1332sub inIS_text($){
1333 my $text = shift();
1334 process_puretext( $text, \$ptQuote, 0 );
1335}
54310121 1336
1337#
1338# process_puretext - process pure text (without pod-escapes) converting
1339# double-quotes and handling implicit C<> links.
1340#
1341sub process_puretext {
2a28b791 1342 my($text, $quote, $notinIS) = @_;
54310121 1343
2a28b791 1344 ## Guessing at func() or [$@%&]*var references in plain text is destined
1345 ## to produce some strange looking ref's. uncomment to disable:
1346 ## $notinIS = 0;
1347
1348 my(@words, $lead, $trail);
54310121 1349
2a28b791 1350 # convert double-quotes to single-quotes
1351 if( $$quote && $text =~ s/"/''/s ){
1352 $$quote = 0;
1353 }
1354 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1355 $$quote = 1 if $text =~ s/"/``/s;
54310121 1356
1357 # keep track of leading and trailing white-space
2a28b791 1358 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1359 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1360
2a28b791 1361 # split at space/non-space boundaries
1362 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1363
1364 # process each word individually
1365 foreach my $word (@words) {
2a28b791 1366 # skip space runs
1367 next if $word =~ /^\s*$/;
54310121 1368 # see if we can infer a link
02369fa5 1369 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1370 # has parenthesis so should have been a C<> ref
2a28b791 1371 ## try for a pagename (perlXXX(1))?
02369fa5 1372 my( $func, $args ) = ( $1, $2 );
1373 if( $args =~ /^\d+$/ ){
2a28b791 1374 my $url = page_sect( $word, '' );
1375 if( defined $url ){
1376 $word = "<A HREF=\"$url\">the $word manpage</A>";
1377 next;
1378 }
1379 }
02369fa5 1380 ## try function name for a link, append tt'ed argument list
1381 $word = emit_C( $func, '', "($args)");
2a28b791 1382
1383#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1384## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1385## # perl variables, should be a C<> ref
1386## $word = emit_C( $word );
1387
54310121 1388 } elsif ($word =~ m,^\w+://\w,) {
1389 # looks like a URL
5a039dd3 1390 # Don't relativize it: leave it as the author intended
54310121 1391 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1392 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1393 # looks like an e-mail address
7b8d334a 1394 my ($w1, $w2, $w3) = ("", $word, "");
1395 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1396 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1397 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1398 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1399 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1400 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1401 } else {
7b8d334a 1402 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1403 }
1404 }
1405
2a28b791 1406 # put everything back together
1407 return $lead . join( '', @words ) . $trail;
1408}
1409
54310121 1410
2a28b791 1411#
1412# process_text - handles plaintext that appears in the input pod file.
1413# there may be pod commands embedded within the text so those must be
1414# converted to html commands.
1415#
7ba65c74 1416
c68ea5d1 1417sub process_text1($$;$$);
1418sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1419sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1420
2a28b791 1421sub process_text {
1422 return if $ignore;
1423 my( $tref ) = @_;
1424 my $res = process_text1( 0, $tref );
1425 $$tref = $res;
1426}
1427
c68ea5d1 1428sub process_text1($$;$$){
1429 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791 1430 my $res = '';
1431
60a48b2d 1432 unless (defined $func) {
1433 $func = '';
1434 $lev++;
1435 }
1436
2a28b791 1437 if( $func eq 'B' ){
1438 # B<text> - boldface
1439 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1440
1441 } elsif( $func eq 'C' ){
1442 # C<code> - can be a ref or <CODE></CODE>
1443 # need to extract text
c68ea5d1 1444 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791 1445
1446 ## clean-up of the link target
1447 my $text = depod( $par );
1448
1449 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1450 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1451
1452 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1453
1454 } elsif( $func eq 'E' ){
1455 # E<x> - convert to character
be3174d2 1456 $$rstr =~ s/^([^>]*)>//;
1457 my $escape = $1;
1458 $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1459 $res = "&$escape;";
2a28b791 1460
1461 } elsif( $func eq 'F' ){
1462 # F<filename> - italizice
1463 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1464
1465 } elsif( $func eq 'I' ){
1466 # I<text> - italizice
1467 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1468
1469 } elsif( $func eq 'L' ){
1470 # L<link> - link
1471 ## L<text|cross-ref> => produce text, use cross-ref for linking
1472 ## L<cross-ref> => make text from cross-ref
1473 ## need to extract text
c68ea5d1 1474 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791 1475
1476 # some L<>'s that shouldn't be:
1477 # a) full-blown URL's are emitted as-is
1478 if( $par =~ m{^\w+://}s ){
1479 return make_URL_href( $par );
1480 }
1481 # b) C<...> is stripped and treated as C<>
1482 if( $par =~ /^C<(.*)>$/ ){
1483 my $text = depod( $1 );
1484 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1485 }
1486
1487 # analyze the contents
1488 $par =~ s/\n/ /g; # undo word-wrapped tags
1489 my $opar = $par;
1490 my $linktext;
1491 if( $par =~ s{^([^|]+)\|}{} ){
1492 $linktext = $1;
1493 }
1494
1495 # make sure sections start with a /
1496 $par =~ s{^"}{/"};
1497
1498 my( $page, $section, $ident );
1499
1500 # check for link patterns
1501 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1502 # we've got a name/ident (no quotes)
1503 ( $page, $ident ) = ( $1, $2 );
1504 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1505
1506 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1507 # even though this should be a "section", we go for ident first
1508 ( $page, $ident ) = ( $1, $2 );
1509 ### print STDERR "--> L<$par> to page $page, section $section\n";
1510
1511 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1512 ( $page, $section ) = ( '', $par );
1513 ### print STDERR "--> L<$par> to void page, section $section\n";
1514
1515 } else {
1516 ( $page, $section ) = ( $par, '' );
1517 ### print STDERR "--> L<$par> to page $par, void section\n";
1518 }
1519
1520 # now, either $section or $ident is defined. the convoluted logic
1521 # below tries to resolve L<> according to what the user specified.
1522 # failing this, we try to find the next best thing...
1523 my( $url, $ltext, $fid );
1524
1525 RESOLVE: {
1526 if( defined $ident ){
1527 ## try to resolve $ident as an item
1528 ( $url, $fid ) = coderef( $page, $ident );
1529 if( $url ){
1530 if( ! defined( $linktext ) ){
1531 $linktext = $ident;
1532 $linktext .= " in " if $ident && $page;
1533 $linktext .= "the $page manpage" if $page;
1534 }
1535 ### print STDERR "got coderef url=$url\n";
1536 last RESOLVE;
1537 }
1538 ## no luck: go for a section (auto-quoting!)
1539 $section = $ident;
1540 }
1541 ## now go for a section
1542 my $htmlsection = htmlify( $section );
1543 $url = page_sect( $page, $htmlsection );
1544 if( $url ){
1545 if( ! defined( $linktext ) ){
1546 $linktext = $section;
1547 $linktext .= " in " if $section && $page;
1548 $linktext .= "the $page manpage" if $page;
1549 }
1550 ### print STDERR "got page/section url=$url\n";
1551 last RESOLVE;
1552 }
1553 ## no luck: go for an ident
1554 if( $section ){
1555 $ident = $section;
1556 } else {
1557 $ident = $page;
1558 $page = undef();
1559 }
1560 ( $url, $fid ) = coderef( $page, $ident );
1561 if( $url ){
1562 if( ! defined( $linktext ) ){
1563 $linktext = $ident;
1564 $linktext .= " in " if $ident && $page;
1565 $linktext .= "the $page manpage" if $page;
1566 }
1567 ### print STDERR "got section=>coderef url=$url\n";
1568 last RESOLVE;
1569 }
1570
1571 # warning; show some text.
1572 $linktext = $opar unless defined $linktext;
1573 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1574 }
1575
d1be9408 1576 # now we have a URL or just plain code
2a28b791 1577 $$rstr = $linktext . '>' . $$rstr;
1578 if( defined( $url ) ){
1579 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1580 } else {
1581 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1582 }
1583
1584 } elsif( $func eq 'S' ){
1585 # S<text> - non-breaking spaces
1586 $res = process_text1( $lev, $rstr );
1587 $res =~ s/ /&nbsp;/g;
1588
1589 } elsif( $func eq 'X' ){
1590 # X<> - ignore
1591 $$rstr =~ s/^[^>]*>//;
1592
1593 } elsif( $func eq 'Z' ){
1594 # Z<> - empty
1595 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1596 unless $$rstr =~ s/^>//;
1597
1598 } else {
c68ea5d1 1599 my $term = pattern $closing;
1600 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1601 # all others: either recurse into new function or
c68ea5d1 1602 # terminate at closing angle bracket(s)
2a28b791 1603 my $pt = $1;
c68ea5d1 1604 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1605 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
c68ea5d1 1606 return $res if !$3 && $lev > 1;
1607 if( $3 ){
1608 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1609 }
2a28b791 1610 }
1611 if( $lev == 1 ){
1612 $res .= pure_text( $$rstr );
54310121 1613 } else {
2a28b791 1614 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1615 }
1616 }
2a28b791 1617 return $res;
1618}
54310121 1619
2a28b791 1620#
1621# go_ahead: extract text of an IS (can be nested)
1622#
c68ea5d1 1623sub go_ahead($$$){
1624 my( $rstr, $func, $closing ) = @_;
2a28b791 1625 my $res = '';
c68ea5d1 1626 my @closing = ($closing);
1627 while( $$rstr =~
1628 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1629 $res .= $1;
c68ea5d1 1630 unless( $3 ){
1631 shift @closing;
1632 return $res unless @closing;
2a28b791 1633 } else {
c68ea5d1 1634 unshift @closing, closing $4;
2a28b791 1635 }
1636 $res .= $2;
1637 }
1638 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1639 return $res;
54310121 1640}
1641
1642#
2a28b791 1643# emit_C - output result of C<text>
1644# $text is the depod-ed text
54310121 1645#
02369fa5 1646sub emit_C($;$$){
1647 my( $text, $nocode, $args ) = @_;
60a48b2d 1648 $args = '' unless defined $args;
2a28b791 1649 my $res;
1650 my( $url, $fid ) = coderef( undef(), $text );
1651
1652 # need HTML-safe text
02369fa5 1653 my $linktext = html_escape( "$text$args" );
2a28b791 1654
1655 if( defined( $url ) &&
1656 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1657 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1658 } elsif( 0 && $nocode ){
1659 $res = $linktext;
1660 } else {
1661 $res = "<CODE>$linktext</CODE>";
1662 }
1663 return $res;
54310121 1664}
1665
1666#
2a28b791 1667# html_escape: make text safe for HTML
1668#
1669sub html_escape {
1670 my $rest = $_[0];
1671 $rest =~ s/&/&amp;/g;
1672 $rest =~ s/</&lt;/g;
1673 $rest =~ s/>/&gt;/g;
1674 $rest =~ s/"/&quot;/g;
1675 return $rest;
1676}
1677
1678
1679#
39e571d4 1680# dosify - convert filenames to 8.3
1681#
1682sub dosify {
1683 my($str) = @_;
fe4c6be1 1684 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1685 if ($Is83) {
1686 $str = lc $str;
1687 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1688 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1689 }
1690 return $str;
1691}
1692
1693#
d1be9408 1694# page_sect - make a URL from the text of a L<>
54310121 1695#
2a28b791 1696sub page_sect($$) {
1697 my( $page, $section ) = @_;
1698 my( $linktext, $page83, $link); # work strings
1699
1700 # check if we know that this is a section in this page
1701 if (!defined $pages{$page} && defined $sections{$page}) {
1702 $section = $page;
1703 $page = "";
1704 ### print STDERR "reset page='', section=$section\n";
54310121 1705 }
1706
39e571d4 1707 $page83=dosify($page);
1708 $page=$page83 if (defined $pages{$page83});
54310121 1709 if ($page eq "") {
2a28b791 1710 $link = "#" . htmlify( $section );
350ccacd 1711 } elsif ( $page =~ /::/ ) {
350ccacd 1712 $page =~ s,::,/,g;
29f227c9 1713 # Search page cache for an entry keyed under the html page name,
1714 # then look to see what directory that page might be in. NOTE:
1715 # this will only find one page. A better solution might be to produce
1716 # an intermediate page that is an index to all such pages.
1717 my $page_name = $page ;
fe6f1558 1718 $page_name =~ s,^.*/,,s ;
29f227c9 1719 if ( defined( $pages{ $page_name } ) &&
1720 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1721 ) {
1722 $page = $1 ;
1723 }
1724 else {
1725 # NOTE: This branch assumes that all A::B pages are located in
1726 # $htmlroot/A/B.html . This is often incorrect, since they are
1727 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1728 # analyze the contents of %pages and figure out where any
1729 # cousins of A::B are, then assume that. So, if A::B isn't found,
1730 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1731 # lib/A/B.pm. This is also limited, but it's an improvement.
1732 # Maybe a hints file so that the links point to the correct places
2a28b791 1733 # nonetheless?
1734
29f227c9 1735 }
350ccacd 1736 $link = "$htmlroot/$page.html";
2a28b791 1737 $link .= "#" . htmlify( $section ) if ($section);
54310121 1738 } elsif (!defined $pages{$page}) {
54310121 1739 $link = "";
54310121 1740 } else {
2a28b791 1741 $section = htmlify( $section ) if $section ne "";
1742 ### print STDERR "...section=$section\n";
54310121 1743
1744 # if there is a directory by the name of the page, then assume that an
1745 # appropriate section will exist in the subdirectory
29f227c9 1746# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1747 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1748 $link = "$htmlroot/$1/$section.html";
2a28b791 1749 ### print STDERR "...link=$link\n";
54310121 1750
1751 # since there is no directory by the name of the page, the section will
1752 # have to exist within a .html of the same name. thus, make sure there
1753 # is a .pod or .pm that might become that .html
1754 } else {
2a28b791 1755 $section = "#$section" if $section;
1756 ### print STDERR "...section=$section\n";
1757
54310121 1758 # check if there is a .pod with the page name
1759 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1760 $link = "$htmlroot/$1.html$section";
1761 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1762 $link = "$htmlroot/$1.html$section";
1763 } else {
54310121 1764 $link = "";
54310121 1765 }
1766 }
1767 }
1768
54310121 1769 if ($link) {
29f227c9 1770 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1771 # implies $htmlroot eq ''. This means that the link in question
1772 # needs a prefix of $htmldir if it begins with '/'. The test for
1773 # the initial '/' is done to avoid '#'-only links, and to allow
1774 # for other kinds of links, like file:, ftp:, etc.
1775 my $url ;
1776 if ( $htmlfileurl ne '' ) {
fe6f1558 1777 $link = "$htmldir$link" if $link =~ m{^/}s;
2a28b791 1778 $url = relativize_url( $link, $htmlfileurl );
1779# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1780 }
1781 else {
1782 $url = $link ;
1783 }
2a28b791 1784 return $url;
29f227c9 1785
54310121 1786 } else {
2a28b791 1787 return undef();
54310121 1788 }
54310121 1789}
1790
1791#
29f227c9 1792# relativize_url - convert an absolute URL to one relative to a base URL.
1793# Assumes both end in a filename.
1794#
1795sub relativize_url {
1796 my ($dest,$source) = @_ ;
1797
1798 my ($dest_volume,$dest_directory,$dest_file) =
1799 File::Spec::Unix->splitpath( $dest ) ;
1800 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1801
1802 my ($source_volume,$source_directory,$source_file) =
1803 File::Spec::Unix->splitpath( $source ) ;
1804 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1805
1806 my $rel_path = '' ;
1807 if ( $dest ne '' ) {
1808 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1809 }
1810
1811 if ( $rel_path ne '' &&
1812 substr( $rel_path, -1 ) ne '/' &&
1813 substr( $dest_file, 0, 1 ) ne '#'
1814 ) {
1815 $rel_path .= "/$dest_file" ;
1816 }
1817 else {
1818 $rel_path .= "$dest_file" ;
1819 }
1820
1821 return $rel_path ;
1822}
1823
54310121 1824
1825#
2a28b791 1826# coderef - make URL from the text of a C<>
54310121 1827#
2a28b791 1828sub coderef($$){
1829 my( $page, $item ) = @_;
1830 my( $url );
1831
1832 my $fid = fragment_id( $item );
2a28b791 1833 if( defined( $page ) ){
1834 # we have been given a $page...
1835 $page =~ s{::}{/}g;
1836
1837 # Do we take it? Item could be a section!
228a48a5 1838 my $base = $items{$fid} || "";
2a28b791 1839 $base =~ s{[^/]*/}{};
1840 if( $base ne "$page.html" ){
1841 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1842 $page = undef();
1843 }
54310121 1844
2a28b791 1845 } else {
1846 # no page - local items precede cached items
67398a75 1847 if( defined( $fid ) ){
1848 if( exists $local_items{$fid} ){
1849 $page = $local_items{$fid};
1850 } else {
1851 $page = $items{$fid};
1852 }
2a28b791 1853 }
1854 }
54310121 1855
1856 # if there was a pod file that we found earlier with an appropriate
1857 # =item directive, then create a link to that page.
2a28b791 1858 if( defined $page ){
1859 if( $page ){
228a48a5 1860 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1861 $page = $1 . '.html';
29f227c9 1862 }
2a28b791 1863 my $link = "$htmlroot/$page#item_$fid";
54310121 1864
2a28b791 1865 # Here, we take advantage of the knowledge that $htmlfileurl
1866 # ne '' implies $htmlroot eq ''.
1867 if ( $htmlfileurl ne '' ) {
1868 $link = "$htmldir$link" ;
1869 $url = relativize_url( $link, $htmlfileurl ) ;
1870 } else {
1871 $url = $link ;
1872 }
1873 } else {
1874 $url = "#item_" . $fid;
1875 }
54310121 1876
2a28b791 1877 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1878 }
1879 return( $url, $fid );
54310121 1880}
1881
54310121 1882
1883
1884#
29f227c9 1885# Adapted from Nick Ing-Simmons' PodToHtml package.
1886sub relative_url {
1887 my $source_file = shift ;
1888 my $destination_file = shift;
1889
1890 my $source = URI::file->new_abs($source_file);
1891 my $uo = URI::file->new($destination_file,$source)->abs;
1892 return $uo->rel->as_string;
1893}
1894
1895
1896#
54310121 1897# finish_list - finish off any pending HTML lists. this should be called
1898# after the entire pod file has been read and converted.
1899#
1900sub finish_list {
7b8d334a 1901 while ($listlevel > 0) {
54310121 1902 print HTML "</DL>\n";
1903 $listlevel--;
1904 }
1905}
1906
1907#
1908# htmlify - converts a pod section specification to a suitable section
2a28b791 1909# specification for HTML. Note that we keep spaces and special characters
1910# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1911#
1912sub htmlify {
2a28b791 1913 my( $heading) = @_;
1914 $heading =~ s/(\s+)/ /g;
1915 $heading =~ s/\s+\Z//;
1916 $heading =~ s/\A\s+//;
1917 # The hyphen is a disgrace to the English language.
1918 $heading =~ s/[-"?]//g;
1919 $heading = lc( $heading );
1920 return $heading;
1921}
54310121 1922
2a28b791 1923#
1924# depod - convert text by eliminating all interior sequences
1925# Note: can be called with copy or modify semantics
1926#
1927my %E2c;
67398a75 1928$E2c{lt} = '<';
1929$E2c{gt} = '>';
1930$E2c{sol} = '/';
2a28b791 1931$E2c{verbar} = '|';
67398a75 1932$E2c{amp} = '&'; # in Tk's pods
2a28b791 1933
c68ea5d1 1934sub depod1($;$$);
7ba65c74 1935
2a28b791 1936sub depod($){
1937 my $string;
1938 if( ref( $_[0] ) ){
1939 $string = ${$_[0]};
1940 ${$_[0]} = depod1( \$string );
1941 } else {
1942 $string = $_[0];
1943 depod1( \$string );
1944 }
1945}
54310121 1946
c68ea5d1 1947sub depod1($;$$){
1948 my( $rstr, $func, $closing ) = @_;
2a28b791 1949 my $res = '';
228a48a5 1950 return $res unless defined $$rstr;
2a28b791 1951 if( ! defined( $func ) ){
1952 # skip to next begin of an interior sequence
c68ea5d1 1953 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2a28b791 1954 # recurse into its text
c68ea5d1 1955 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791 1956 }
1957 $res .= $$rstr;
1958 } elsif( $func eq 'E' ){
1959 # E<x> - convert to character
be3174d2 1960 $$rstr =~ s/^([^>]*)>//;
228a48a5 1961 $res .= $E2c{$1} || "";
2a28b791 1962 } elsif( $func eq 'X' ){
1963 # X<> - ignore
1964 $$rstr =~ s/^[^>]*>//;
1965 } elsif( $func eq 'Z' ){
1966 # Z<> - empty
1967 $$rstr =~ s/^>//;
1968 } else {
1969 # all others: either recurse into new function or
1970 # terminate at closing angle bracket
c68ea5d1 1971 my $term = pattern $closing;
1972 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2a28b791 1973 $res .= $1;
c68ea5d1 1974 last unless $3;
1975 $res .= depod1( $rstr, $3, closing $4 );
2a28b791 1976 }
1977 ## If we're here and $2 ne '>': undelimited interior sequence.
1978 ## Ignored, as this is called without proper indication of where we are.
1979 ## Rely on process_text to produce diagnostics.
1980 }
1981 return $res;
1982}
54310121 1983
2a28b791 1984#
1985# fragment_id - construct a fragment identifier from:
1986# a) =item text
1987# b) contents of C<...>
1988#
1989my @hc;
1990sub fragment_id {
1991 my $text = shift();
1992 $text =~ s/\s+\Z//s;
1993 if( $text ){
1994 # a method or function?
1995 return $1 if $text =~ /(\w+)\s*\(/;
1996 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1997
1998 # a variable name?
1999 return $1 if $text =~ /^([$@%*]\S+)/;
2000
2001 # some pattern matching operator?
2002 return $1 if $text =~ m|^(\w+/).*/\w*$|;
2003
2004 # fancy stuff... like "do { }"
2005 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2006
2007 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2008 # and some funnies with ... Module ...
2009 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
2010 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2011
2012 # text? normalize!
2013 $text =~ s/\s+/_/sg;
2014 $text =~ s{(\W)}{
2015 defined( $hc[ord($1)] ) ? $hc[ord($1)]
2016 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2017 $text = substr( $text, 0, 50 );
2018 } else {
2019 return undef();
2020 }
54310121 2021}
2022
2a28b791 2023#
2024# make_URL_href - generate HTML href from URL
2025# Special treatment for CGI queries.
2026#
2027sub make_URL_href($){
2028 my( $url ) = @_;
2029 if( $url !~
228a48a5 2030 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791 2031 $url = "<A HREF=\"$url\">$url</A>";
2032 }
2033 return $url;
54310121 2034}
2035
20361;