Commit | Line | Data |
54310121 |
1 | package Pod::Html; |
2 | |
3 | use Pod::Functions; |
4 | use Getopt::Long; # package for handling command-line parameters |
29f227c9 |
5 | use File::Spec::Unix; |
54310121 |
6 | require Exporter; |
7b8d334a |
7 | use vars qw($VERSION); |
29f227c9 |
8 | $VERSION = 1.02; |
54310121 |
9 | @ISA = Exporter; |
10 | @EXPORT = qw(pod2html htmlify); |
11 | use Cwd; |
12 | |
13 | use Carp; |
14 | |
3ec07288 |
15 | use locale; # make \w work right in non-ASCII lands |
16 | |
54310121 |
17 | use strict; |
18 | |
7b8d334a |
19 | use Config; |
20 | |
54310121 |
21 | =head1 NAME |
22 | |
7b8d334a |
23 | Pod::Html - module to convert pod files to HTML |
54310121 |
24 | |
25 | =head1 SYNOPSIS |
26 | |
7b8d334a |
27 | use Pod::Html; |
54310121 |
28 | pod2html([options]); |
29 | |
30 | =head1 DESCRIPTION |
31 | |
32 | Converts files from pod format (see L<perlpod>) to HTML format. It |
33 | can automatically generate indexes and cross-references, and it keeps |
34 | a cache of things it knows how to cross-reference. |
35 | |
36 | =head1 ARGUMENTS |
37 | |
38 | Pod::Html takes the following arguments: |
39 | |
40 | =over 4 |
41 | |
42 | =item help |
43 | |
44 | --help |
45 | |
46 | Displays the usage message. |
47 | |
5a039dd3 |
48 | =item htmldir |
49 | |
50 | --htmldir=name |
51 | |
52 | Sets the directory in which the resulting HTML file is placed. This |
29f227c9 |
53 | is used to generate relative links to other files. Not passing this |
54 | causes all links to be absolute, since this is the value that tells |
55 | Pod::Html the root of the documentation tree. |
5a039dd3 |
56 | |
54310121 |
57 | =item htmlroot |
58 | |
59 | --htmlroot=name |
60 | |
61 | Sets the base URL for the HTML files. When cross-references are made, |
62 | the HTML root is prepended to the URL. |
63 | |
64 | =item infile |
65 | |
66 | --infile=name |
67 | |
68 | Specify the pod file to convert. Input is taken from STDIN if no |
69 | infile is specified. |
70 | |
71 | =item outfile |
72 | |
73 | --outfile=name |
74 | |
75 | Specify the HTML file to create. Output goes to STDOUT if no outfile |
76 | is specified. |
77 | |
78 | =item podroot |
79 | |
80 | --podroot=name |
81 | |
82 | Specify the base directory for finding library pods. |
83 | |
84 | =item podpath |
85 | |
86 | --podpath=name:...:name |
87 | |
88 | Specify which subdirectories of the podroot contain pod files whose |
89 | HTML converted forms can be linked-to in cross-references. |
90 | |
91 | =item libpods |
92 | |
93 | --libpods=name:...:name |
94 | |
95 | List of page names (eg, "perlfunc") which contain linkable C<=item>s. |
96 | |
97 | =item netscape |
98 | |
99 | --netscape |
100 | |
101 | Use Netscape HTML directives when applicable. |
102 | |
103 | =item nonetscape |
104 | |
105 | --nonetscape |
106 | |
107 | Do not use Netscape HTML directives (default). |
108 | |
109 | =item index |
110 | |
111 | --index |
112 | |
113 | Generate an index at the top of the HTML file (default behaviour). |
114 | |
115 | =item noindex |
116 | |
117 | --noindex |
118 | |
119 | Do not generate an index at the top of the HTML file. |
120 | |
121 | |
122 | =item recurse |
123 | |
124 | --recurse |
125 | |
126 | Recurse into subdirectories specified in podpath (default behaviour). |
127 | |
128 | =item norecurse |
129 | |
130 | --norecurse |
131 | |
132 | Do not recurse into subdirectories specified in podpath. |
133 | |
134 | =item title |
135 | |
136 | --title=title |
137 | |
138 | Specify the title of the resulting HTML file. |
139 | |
140 | =item verbose |
141 | |
142 | --verbose |
143 | |
144 | Display progress messages. |
145 | |
146 | =back |
147 | |
148 | =head1 EXAMPLE |
149 | |
150 | pod2html("pod2html", |
151 | "--podpath=lib:ext:pod:vms", |
152 | "--podroot=/usr/src/perl", |
153 | "--htmlroot=/perl/nmanual", |
154 | "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop", |
155 | "--recurse", |
156 | "--infile=foo.pod", |
157 | "--outfile=/perl/nmanual/foo.html"); |
158 | |
159 | =head1 AUTHOR |
160 | |
161 | Tom Christiansen, E<lt>tchrist@perl.comE<gt>. |
162 | |
163 | =head1 BUGS |
164 | |
165 | Has trouble with C<> etc in = commands. |
166 | |
167 | =head1 SEE ALSO |
168 | |
169 | L<perlpod> |
170 | |
171 | =head1 COPYRIGHT |
172 | |
173 | This program is distributed under the Artistic License. |
174 | |
175 | =cut |
176 | |
177 | my $dircache = "pod2html-dircache"; |
178 | my $itemcache = "pod2html-itemcache"; |
179 | |
180 | my @begin_stack = (); # begin/end stack |
181 | |
29f227c9 |
182 | my @libpods = (); # files to search for links from C<> directives |
183 | my $htmlroot = "/"; # http-server base directory from which all |
54310121 |
184 | # relative paths in $podpath stem. |
5a039dd3 |
185 | my $htmldir = ""; # The directory to which the html pages |
186 | # will (eventually) be written. |
54310121 |
187 | my $htmlfile = ""; # write to stdout by default |
29f227c9 |
188 | my $htmlfileurl = "" ; # The url that other files would use to |
5a039dd3 |
189 | # refer to this file. This is only used |
190 | # to make relative urls that point to |
191 | # other files. |
54310121 |
192 | my $podfile = ""; # read from stdin by default |
193 | my @podpath = (); # list of directories containing library pods. |
194 | my $podroot = "."; # filesystem base directory from which all |
195 | # relative paths in $podpath stem. |
196 | my $recurse = 1; # recurse on subdirectories in $podpath. |
197 | my $verbose = 0; # not verbose by default |
198 | my $doindex = 1; # non-zero if we should generate an index |
199 | my $listlevel = 0; # current list depth |
200 | my @listitem = (); # stack of HTML commands to use when a =item is |
201 | # encountered. the top of the stack is the |
202 | # current list. |
203 | my @listdata = (); # similar to @listitem, but for the text after |
204 | # an =item |
205 | my @listend = (); # similar to @listitem, but the text to use to |
206 | # end the list. |
207 | my $ignore = 1; # whether or not to format text. we don't |
208 | # format text until we hit our first pod |
209 | # directive. |
210 | |
211 | my %items_named = (); # for the multiples of the same item in perlfunc |
212 | my @items_seen = (); |
213 | my $netscape = 0; # whether or not to use netscape directives. |
214 | my $title; # title to give the pod(s) |
215 | my $top = 1; # true if we are at the top of the doc. used |
216 | # to prevent the first <HR> directive. |
217 | my $paragraph; # which paragraph we're processing (used |
218 | # for error messages) |
219 | my %pages = (); # associative array used to find the location |
220 | # of pages referenced by L<> links. |
221 | my %sections = (); # sections within this page |
222 | my %items = (); # associative array used to find the location |
223 | # of =item directives referenced by C<> links |
39e571d4 |
224 | my $Is83; # is dos with short filenames (8.3) |
225 | |
54310121 |
226 | sub init_globals { |
227 | $dircache = "pod2html-dircache"; |
228 | $itemcache = "pod2html-itemcache"; |
229 | |
230 | @begin_stack = (); # begin/end stack |
231 | |
232 | @libpods = (); # files to search for links from C<> directives |
233 | $htmlroot = "/"; # http-server base directory from which all |
234 | # relative paths in $podpath stem. |
235 | $htmlfile = ""; # write to stdout by default |
236 | $podfile = ""; # read from stdin by default |
237 | @podpath = (); # list of directories containing library pods. |
238 | $podroot = "."; # filesystem base directory from which all |
239 | # relative paths in $podpath stem. |
240 | $recurse = 1; # recurse on subdirectories in $podpath. |
241 | $verbose = 0; # not verbose by default |
242 | $doindex = 1; # non-zero if we should generate an index |
243 | $listlevel = 0; # current list depth |
244 | @listitem = (); # stack of HTML commands to use when a =item is |
245 | # encountered. the top of the stack is the |
246 | # current list. |
247 | @listdata = (); # similar to @listitem, but for the text after |
248 | # an =item |
249 | @listend = (); # similar to @listitem, but the text to use to |
250 | # end the list. |
251 | $ignore = 1; # whether or not to format text. we don't |
252 | # format text until we hit our first pod |
253 | # directive. |
254 | |
255 | @items_seen = (); |
256 | %items_named = (); |
257 | $netscape = 0; # whether or not to use netscape directives. |
258 | $title = ''; # title to give the pod(s) |
259 | $top = 1; # true if we are at the top of the doc. used |
260 | # to prevent the first <HR> directive. |
261 | $paragraph = ''; # which paragraph we're processing (used |
262 | # for error messages) |
54310121 |
263 | %sections = (); # sections within this page |
3e3baf6d |
264 | |
265 | # These are not reinitialised here but are kept as a cache. |
266 | # See get_cache and related cache management code. |
267 | #%pages = (); # associative array used to find the location |
268 | # of pages referenced by L<> links. |
269 | #%items = (); # associative array used to find the location |
54310121 |
270 | # of =item directives referenced by C<> links |
39e571d4 |
271 | $Is83=$^O eq 'dos'; |
54310121 |
272 | } |
273 | |
274 | sub pod2html { |
275 | local(@ARGV) = @_; |
276 | local($/); |
277 | local $_; |
278 | |
279 | init_globals(); |
280 | |
39e571d4 |
281 | $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN()); |
282 | |
54310121 |
283 | # cache of %pages and %items from last time we ran pod2html |
54310121 |
284 | |
285 | #undef $opt_help if defined $opt_help; |
286 | |
287 | # parse the command-line parameters |
288 | parse_command_line(); |
289 | |
290 | # set some variables to their default values if necessary |
291 | local *POD; |
292 | unless (@ARGV && $ARGV[0]) { |
293 | $podfile = "-" unless $podfile; # stdin |
294 | open(POD, "<$podfile") |
295 | || die "$0: cannot open $podfile file for input: $!\n"; |
296 | } else { |
297 | $podfile = $ARGV[0]; # XXX: might be more filenames |
298 | *POD = *ARGV; |
299 | } |
300 | $htmlfile = "-" unless $htmlfile; # stdout |
301 | $htmlroot = "" if $htmlroot eq "/"; # so we don't get a // |
29f227c9 |
302 | $htmldir =~ s#/$## ; # so we don't get a // |
303 | if ( $htmlroot eq '' |
304 | && defined( $htmldir ) |
305 | && $htmldir ne '' |
306 | && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir |
307 | ) |
5a039dd3 |
308 | { |
29f227c9 |
309 | # Set the 'base' url for this file, so that we can use it |
310 | # as the location from which to calculate relative links |
311 | # to other files. If this is '', then absolute links will |
312 | # be used throughout. |
313 | $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1); |
5a039dd3 |
314 | } |
54310121 |
315 | |
316 | # read the pod a paragraph at a time |
317 | warn "Scanning for sections in input file(s)\n" if $verbose; |
318 | $/ = ""; |
319 | my @poddata = <POD>; |
320 | close(POD); |
321 | |
322 | # scan the pod for =head[1-6] directives and build an index |
323 | my $index = scan_headings(\%sections, @poddata); |
324 | |
3e3baf6d |
325 | unless($index) { |
326 | warn "No pod in $podfile\n" if $verbose; |
327 | return; |
328 | } |
329 | |
54310121 |
330 | # open the output file |
331 | open(HTML, ">$htmlfile") |
332 | || die "$0: cannot open $htmlfile file for output: $!\n"; |
333 | |
d011ffae |
334 | # put a title in the HTML file if one wasn't specified |
335 | if ($title eq '') { |
336 | TITLE_SEARCH: { |
337 | for (my $i = 0; $i < @poddata; $i++) { |
338 | if ($poddata[$i] =~ /^=head1\s*NAME\b/m) { |
339 | for my $para ( @poddata[$i, $i+1] ) { |
340 | last TITLE_SEARCH |
341 | if ($title) = $para =~ /(\S+\s+-+.*\S)/s; |
342 | } |
343 | } |
54310121 |
344 | |
d011ffae |
345 | } |
346 | } |
347 | } |
3e3baf6d |
348 | if (!$title and $podfile =~ /\.pod$/) { |
349 | # probably a split pod so take first =head[12] as title |
350 | for (my $i = 0; $i < @poddata; $i++) { |
351 | last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/; |
352 | } |
353 | warn "adopted '$title' as title for $podfile\n" |
354 | if $verbose and $title; |
355 | } |
7b8d334a |
356 | if ($title) { |
357 | $title =~ s/\s*\(.*\)//; |
358 | } else { |
3e3baf6d |
359 | warn "$0: no title for $podfile"; |
54310121 |
360 | $podfile =~ /^(.*)(\.[^.\/]+)?$/; |
361 | $title = ($podfile eq "-" ? 'No Title' : $1); |
3e3baf6d |
362 | warn "using $title" if $verbose; |
54310121 |
363 | } |
364 | print HTML <<END_OF_HEAD; |
7b8d334a |
365 | <HTML> |
366 | <HEAD> |
367 | <TITLE>$title</TITLE> |
368 | <LINK REV="made" HREF="mailto:$Config{perladmin}"> |
369 | </HEAD> |
54310121 |
370 | |
7b8d334a |
371 | <BODY> |
54310121 |
372 | |
373 | END_OF_HEAD |
374 | |
3e3baf6d |
375 | # load/reload/validate/cache %pages and %items |
376 | get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse); |
54310121 |
377 | |
378 | # scan the pod for =item directives |
379 | scan_items("", \%items, @poddata); |
380 | |
381 | # put an index at the top of the file. note, if $doindex is 0 we |
382 | # still generate an index, but surround it with an html comment. |
383 | # that way some other program can extract it if desired. |
384 | $index =~ s/--+/-/g; |
385 | print HTML "<!-- INDEX BEGIN -->\n"; |
386 | print HTML "<!--\n" unless $doindex; |
387 | print HTML $index; |
388 | print HTML "-->\n" unless $doindex; |
389 | print HTML "<!-- INDEX END -->\n\n"; |
390 | print HTML "<HR>\n" if $doindex; |
391 | |
392 | # now convert this file |
393 | warn "Converting input file\n" if $verbose; |
394 | foreach my $i (0..$#poddata) { |
395 | $_ = $poddata[$i]; |
396 | $paragraph = $i+1; |
397 | if (/^(=.*)/s) { # is it a pod directive? |
398 | $ignore = 0; |
399 | $_ = $1; |
400 | if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin |
401 | process_begin($1, $2); |
402 | } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end |
403 | process_end($1, $2); |
404 | } elsif (/^=cut/) { # =cut |
405 | process_cut(); |
406 | } elsif (/^=pod/) { # =pod |
407 | process_pod(); |
408 | } else { |
409 | next if @begin_stack && $begin_stack[-1] ne 'html'; |
410 | |
7b8d334a |
411 | if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading |
54310121 |
412 | process_head($1, $2); |
7b8d334a |
413 | } elsif (/^=item\s*(.*\S)/sm) { # =item text |
54310121 |
414 | process_item($1); |
415 | } elsif (/^=over\s*(.*)/) { # =over N |
416 | process_over(); |
417 | } elsif (/^=back/) { # =back |
418 | process_back(); |
419 | } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for |
420 | process_for($1,$2); |
421 | } else { |
422 | /^=(\S*)\s*/; |
423 | warn "$0: $podfile: unknown pod directive '$1' in " |
424 | . "paragraph $paragraph. ignoring.\n"; |
425 | } |
426 | } |
427 | $top = 0; |
428 | } |
429 | else { |
430 | next if $ignore; |
431 | next if @begin_stack && $begin_stack[-1] ne 'html'; |
432 | my $text = $_; |
433 | process_text(\$text, 1); |
7b8d334a |
434 | print HTML "<P>\n$text"; |
54310121 |
435 | } |
436 | } |
437 | |
438 | # finish off any pending directives |
439 | finish_list(); |
440 | print HTML <<END_OF_TAIL; |
7b8d334a |
441 | </BODY> |
54310121 |
442 | |
7b8d334a |
443 | </HTML> |
54310121 |
444 | END_OF_TAIL |
445 | |
446 | # close the html file |
447 | close(HTML); |
448 | |
449 | warn "Finished\n" if $verbose; |
450 | } |
451 | |
452 | ############################################################################## |
453 | |
454 | my $usage; # see below |
455 | sub usage { |
456 | my $podfile = shift; |
457 | warn "$0: $podfile: @_\n" if @_; |
458 | die $usage; |
459 | } |
460 | |
461 | $usage =<<END_OF_USAGE; |
462 | Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> |
463 | --podpath=<name>:...:<name> --podroot=<name> |
464 | --libpods=<name>:...:<name> --recurse --verbose --index |
465 | --netscape --norecurse --noindex |
466 | |
467 | --flush - flushes the item and directory caches. |
468 | --help - prints this message. |
469 | --htmlroot - http-server base directory from which all relative paths |
470 | in podpath stem (default is /). |
471 | --index - generate an index at the top of the resulting html |
472 | (default). |
473 | --infile - filename for the pod to convert (input taken from stdin |
474 | by default). |
475 | --libpods - colon-separated list of pages to search for =item pod |
476 | directives in as targets of C<> and implicit links (empty |
477 | by default). note, these are not filenames, but rather |
478 | page names like those that appear in L<> links. |
479 | --netscape - will use netscape html directives when applicable. |
480 | --nonetscape - will not use netscape directives (default). |
481 | --outfile - filename for the resulting html file (output sent to |
482 | stdout by default). |
483 | --podpath - colon-separated list of directories containing library |
484 | pods. empty by default. |
485 | --podroot - filesystem base directory from which all relative paths |
486 | in podpath stem (default is .). |
487 | --noindex - don't generate an index at the top of the resulting html. |
488 | --norecurse - don't recurse on those subdirectories listed in podpath. |
489 | --recurse - recurse on those subdirectories listed in podpath |
490 | (default behavior). |
491 | --title - title that will appear in resulting html file. |
492 | --verbose - self-explanatory |
493 | |
494 | END_OF_USAGE |
495 | |
496 | sub parse_command_line { |
29f227c9 |
497 | my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose); |
54310121 |
498 | my $result = GetOptions( |
29f227c9 |
499 | 'flush' => \$opt_flush, |
500 | 'help' => \$opt_help, |
501 | 'htmldir=s' => \$opt_htmldir, |
54310121 |
502 | 'htmlroot=s' => \$opt_htmlroot, |
29f227c9 |
503 | 'index!' => \$opt_index, |
54310121 |
504 | 'infile=s' => \$opt_infile, |
505 | 'libpods=s' => \$opt_libpods, |
506 | 'netscape!' => \$opt_netscape, |
507 | 'outfile=s' => \$opt_outfile, |
508 | 'podpath=s' => \$opt_podpath, |
509 | 'podroot=s' => \$opt_podroot, |
510 | 'norecurse' => \$opt_norecurse, |
511 | 'recurse!' => \$opt_recurse, |
512 | 'title=s' => \$opt_title, |
513 | 'verbose' => \$opt_verbose, |
514 | ); |
515 | usage("-", "invalid parameters") if not $result; |
516 | |
517 | usage("-") if defined $opt_help; # see if the user asked for help |
518 | $opt_help = ""; # just to make -w shut-up. |
519 | |
520 | $podfile = $opt_infile if defined $opt_infile; |
521 | $htmlfile = $opt_outfile if defined $opt_outfile; |
5a039dd3 |
522 | $htmldir = $opt_htmldir if defined $opt_outfile; |
54310121 |
523 | |
524 | @podpath = split(":", $opt_podpath) if defined $opt_podpath; |
525 | @libpods = split(":", $opt_libpods) if defined $opt_libpods; |
526 | |
527 | warn "Flushing item and directory caches\n" |
528 | if $opt_verbose && defined $opt_flush; |
529 | unlink($dircache, $itemcache) if defined $opt_flush; |
530 | |
531 | $htmlroot = $opt_htmlroot if defined $opt_htmlroot; |
532 | $podroot = $opt_podroot if defined $opt_podroot; |
533 | |
534 | $doindex = $opt_index if defined $opt_index; |
535 | $recurse = $opt_recurse if defined $opt_recurse; |
536 | $title = $opt_title if defined $opt_title; |
537 | $verbose = defined $opt_verbose ? 1 : 0; |
538 | $netscape = $opt_netscape if defined $opt_netscape; |
539 | } |
540 | |
3e3baf6d |
541 | |
542 | my $saved_cache_key; |
543 | |
544 | sub get_cache { |
545 | my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; |
546 | my @cache_key_args = @_; |
547 | |
548 | # A first-level cache: |
549 | # Don't bother reading the cache files if they still apply |
550 | # and haven't changed since we last read them. |
551 | |
552 | my $this_cache_key = cache_key(@cache_key_args); |
553 | |
554 | return if $saved_cache_key and $this_cache_key eq $saved_cache_key; |
555 | |
556 | # load the cache of %pages and %items if possible. $tests will be |
557 | # non-zero if successful. |
558 | my $tests = 0; |
559 | if (-f $dircache && -f $itemcache) { |
560 | warn "scanning for item cache\n" if $verbose; |
561 | $tests = load_cache($dircache, $itemcache, $podpath, $podroot); |
562 | } |
563 | |
564 | # if we didn't succeed in loading the cache then we must (re)build |
565 | # %pages and %items. |
566 | if (!$tests) { |
567 | warn "scanning directories in pod-path\n" if $verbose; |
568 | scan_podpath($podroot, $recurse, 0); |
569 | } |
570 | $saved_cache_key = cache_key(@cache_key_args); |
571 | } |
572 | |
573 | sub cache_key { |
574 | my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; |
575 | return join('!', $dircache, $itemcache, $recurse, |
29f227c9 |
576 | @$podpath, $podroot, stat($dircache), stat($itemcache)); |
3e3baf6d |
577 | } |
578 | |
54310121 |
579 | # |
3e3baf6d |
580 | # load_cache - tries to find if the caches stored in $dircache and $itemcache |
54310121 |
581 | # are valid caches of %pages and %items. if they are valid then it loads |
582 | # them and returns a non-zero value. |
583 | # |
3e3baf6d |
584 | |
585 | sub load_cache { |
54310121 |
586 | my($dircache, $itemcache, $podpath, $podroot) = @_; |
587 | my($tests); |
588 | local $_; |
589 | |
590 | $tests = 0; |
591 | |
592 | open(CACHE, "<$itemcache") || |
593 | die "$0: error opening $itemcache for reading: $!\n"; |
594 | $/ = "\n"; |
595 | |
596 | # is it the same podpath? |
597 | $_ = <CACHE>; |
598 | chomp($_); |
3e3baf6d |
599 | $tests++ if (join(":", @$podpath) eq $_); |
54310121 |
600 | |
601 | # is it the same podroot? |
602 | $_ = <CACHE>; |
603 | chomp($_); |
604 | $tests++ if ($podroot eq $_); |
605 | |
606 | # load the cache if its good |
607 | if ($tests != 2) { |
608 | close(CACHE); |
54310121 |
609 | return 0; |
610 | } |
611 | |
612 | warn "loading item cache\n" if $verbose; |
613 | while (<CACHE>) { |
614 | /(.*?) (.*)$/; |
615 | $items{$1} = $2; |
616 | } |
617 | close(CACHE); |
618 | |
619 | warn "scanning for directory cache\n" if $verbose; |
620 | open(CACHE, "<$dircache") || |
621 | die "$0: error opening $dircache for reading: $!\n"; |
622 | $/ = "\n"; |
623 | $tests = 0; |
624 | |
625 | # is it the same podpath? |
626 | $_ = <CACHE>; |
627 | chomp($_); |
3e3baf6d |
628 | $tests++ if (join(":", @$podpath) eq $_); |
54310121 |
629 | |
630 | # is it the same podroot? |
631 | $_ = <CACHE>; |
632 | chomp($_); |
633 | $tests++ if ($podroot eq $_); |
634 | |
635 | # load the cache if its good |
636 | if ($tests != 2) { |
637 | close(CACHE); |
54310121 |
638 | return 0; |
639 | } |
640 | |
641 | warn "loading directory cache\n" if $verbose; |
642 | while (<CACHE>) { |
643 | /(.*?) (.*)$/; |
644 | $pages{$1} = $2; |
645 | } |
646 | |
647 | close(CACHE); |
648 | |
649 | return 1; |
650 | } |
651 | |
652 | # |
653 | # scan_podpath - scans the directories specified in @podpath for directories, |
654 | # .pod files, and .pm files. it also scans the pod files specified in |
655 | # @libpods for =item directives. |
656 | # |
657 | sub scan_podpath { |
3e3baf6d |
658 | my($podroot, $recurse, $append) = @_; |
54310121 |
659 | my($pwd, $dir); |
660 | my($libpod, $dirname, $pod, @files, @poddata); |
661 | |
3e3baf6d |
662 | unless($append) { |
663 | %items = (); |
664 | %pages = (); |
665 | } |
666 | |
54310121 |
667 | # scan each directory listed in @podpath |
668 | $pwd = getcwd(); |
669 | chdir($podroot) |
670 | || die "$0: error changing to directory $podroot: $!\n"; |
671 | foreach $dir (@podpath) { |
672 | scan_dir($dir, $recurse); |
673 | } |
674 | |
675 | # scan the pods listed in @libpods for =item directives |
676 | foreach $libpod (@libpods) { |
677 | # if the page isn't defined then we won't know where to find it |
678 | # on the system. |
679 | next unless defined $pages{$libpod} && $pages{$libpod}; |
680 | |
681 | # if there is a directory then use the .pod and .pm files within it. |
29f227c9 |
682 | # NOTE: Only finds the first so-named directory in the tree. |
683 | # if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { |
684 | if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { |
54310121 |
685 | # find all the .pod and .pm files within the directory |
686 | $dirname = $1; |
687 | opendir(DIR, $dirname) || |
688 | die "$0: error opening directory $dirname: $!\n"; |
689 | @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR)); |
690 | closedir(DIR); |
691 | |
692 | # scan each .pod and .pm file for =item directives |
693 | foreach $pod (@files) { |
694 | open(POD, "<$dirname/$pod") || |
695 | die "$0: error opening $dirname/$pod for input: $!\n"; |
696 | @poddata = <POD>; |
697 | close(POD); |
698 | |
699 | scan_items("$dirname/$pod", @poddata); |
700 | } |
701 | |
702 | # use the names of files as =item directives too. |
703 | foreach $pod (@files) { |
704 | $pod =~ /^(.*)(\.pod|\.pm)$/; |
705 | $items{$1} = "$dirname/$1.html" if $1; |
706 | } |
707 | } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ || |
708 | $pages{$libpod} =~ /([^:]*\.pm):/) { |
709 | # scan the .pod or .pm file for =item directives |
710 | $pod = $1; |
711 | open(POD, "<$pod") || |
712 | die "$0: error opening $pod for input: $!\n"; |
713 | @poddata = <POD>; |
714 | close(POD); |
715 | |
716 | scan_items("$pod", @poddata); |
717 | } else { |
718 | warn "$0: shouldn't be here (line ".__LINE__."\n"; |
719 | } |
720 | } |
721 | @poddata = (); # clean-up a bit |
722 | |
723 | chdir($pwd) |
724 | || die "$0: error changing to directory $pwd: $!\n"; |
725 | |
726 | # cache the item list for later use |
727 | warn "caching items for later use\n" if $verbose; |
728 | open(CACHE, ">$itemcache") || |
729 | die "$0: error open $itemcache for writing: $!\n"; |
730 | |
731 | print CACHE join(":", @podpath) . "\n$podroot\n"; |
732 | foreach my $key (keys %items) { |
733 | print CACHE "$key $items{$key}\n"; |
734 | } |
735 | |
736 | close(CACHE); |
737 | |
738 | # cache the directory list for later use |
739 | warn "caching directories for later use\n" if $verbose; |
740 | open(CACHE, ">$dircache") || |
741 | die "$0: error open $dircache for writing: $!\n"; |
742 | |
743 | print CACHE join(":", @podpath) . "\n$podroot\n"; |
744 | foreach my $key (keys %pages) { |
745 | print CACHE "$key $pages{$key}\n"; |
746 | } |
747 | |
748 | close(CACHE); |
749 | } |
750 | |
751 | # |
752 | # scan_dir - scans the directory specified in $dir for subdirectories, .pod |
753 | # files, and .pm files. notes those that it finds. this information will |
754 | # be used later in order to figure out where the pages specified in L<> |
755 | # links are on the filesystem. |
756 | # |
757 | sub scan_dir { |
758 | my($dir, $recurse) = @_; |
759 | my($t, @subdirs, @pods, $pod, $dirname, @dirs); |
760 | local $_; |
761 | |
762 | @subdirs = (); |
763 | @pods = (); |
764 | |
765 | opendir(DIR, $dir) || |
766 | die "$0: error opening directory $dir: $!\n"; |
767 | while (defined($_ = readdir(DIR))) { |
768 | if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory |
769 | $pages{$_} = "" unless defined $pages{$_}; |
770 | $pages{$_} .= "$dir/$_:"; |
771 | push(@subdirs, $_); |
772 | } elsif (/\.pod$/) { # .pod |
773 | s/\.pod$//; |
774 | $pages{$_} = "" unless defined $pages{$_}; |
775 | $pages{$_} .= "$dir/$_.pod:"; |
776 | push(@pods, "$dir/$_.pod"); |
777 | } elsif (/\.pm$/) { # .pm |
778 | s/\.pm$//; |
779 | $pages{$_} = "" unless defined $pages{$_}; |
780 | $pages{$_} .= "$dir/$_.pm:"; |
781 | push(@pods, "$dir/$_.pm"); |
782 | } |
783 | } |
784 | closedir(DIR); |
785 | |
786 | # recurse on the subdirectories if necessary |
787 | if ($recurse) { |
788 | foreach my $subdir (@subdirs) { |
789 | scan_dir("$dir/$subdir", $recurse); |
790 | } |
791 | } |
792 | } |
793 | |
794 | # |
795 | # scan_headings - scan a pod file for head[1-6] tags, note the tags, and |
796 | # build an index. |
797 | # |
798 | sub scan_headings { |
799 | my($sections, @data) = @_; |
800 | my($tag, $which_head, $title, $listdepth, $index); |
801 | |
be173d55 |
802 | # here we need local $ignore = 0; |
803 | # unfortunately, we can't have it, because $ignore is lexical |
804 | $ignore = 0; |
805 | |
54310121 |
806 | $listdepth = 0; |
807 | $index = ""; |
808 | |
809 | # scan for =head directives, note their name, and build an index |
810 | # pointing to each of them. |
811 | foreach my $line (@data) { |
bb9460ed |
812 | if ($line =~ /^=(head)([1-6])\s+(.*)/) { |
54310121 |
813 | ($tag,$which_head, $title) = ($1,$2,$3); |
814 | chomp($title); |
815 | $$sections{htmlify(0,$title)} = 1; |
816 | |
102c538a |
817 | while ($which_head != $listdepth) { |
818 | if ($which_head > $listdepth) { |
819 | $index .= "\n" . ("\t" x $listdepth) . "<UL>\n"; |
820 | $listdepth++; |
821 | } elsif ($which_head < $listdepth) { |
822 | $listdepth--; |
823 | $index .= "\n" . ("\t" x $listdepth) . "</UL>\n"; |
824 | } |
54310121 |
825 | } |
54310121 |
826 | |
827 | $index .= "\n" . ("\t" x $listdepth) . "<LI>" . |
be173d55 |
828 | "<A HREF=\"#" . htmlify(0,$title) . "\">" . |
7b8d334a |
829 | html_escape(process_text(\$title, 0)) . "</A>"; |
54310121 |
830 | } |
831 | } |
832 | |
833 | # finish off the lists |
834 | while ($listdepth--) { |
835 | $index .= "\n" . ("\t" x $listdepth) . "</UL>\n"; |
836 | } |
837 | |
838 | # get rid of bogus lists |
839 | $index =~ s,\t*<UL>\s*</UL>\n,,g; |
840 | |
bb9460ed |
841 | $ignore = 1; # restore old value; |
be173d55 |
842 | |
54310121 |
843 | return $index; |
844 | } |
845 | |
846 | # |
847 | # scan_items - scans the pod specified by $pod for =item directives. we |
848 | # will use this information later on in resolving C<> links. |
849 | # |
850 | sub scan_items { |
851 | my($pod, @poddata) = @_; |
852 | my($i, $item); |
853 | local $_; |
854 | |
855 | $pod =~ s/\.pod$//; |
856 | $pod .= ".html" if $pod; |
857 | |
858 | foreach $i (0..$#poddata) { |
859 | $_ = $poddata[$i]; |
860 | |
861 | # remove any formatting instructions |
862 | s,[A-Z]<([^<>]*)>,$1,g; |
863 | |
864 | # figure out what kind of item it is and get the first word of |
865 | # it's name. |
866 | if (/^=item\s+(\w*)\s*.*$/s) { |
867 | if ($1 eq "*") { # bullet list |
868 | /\A=item\s+\*\s*(.*?)\s*\Z/s; |
869 | $item = $1; |
7b8d334a |
870 | } elsif ($1 =~ /^\d+/) { # numbered list |
871 | /\A=item\s+\d+\.?(.*?)\s*\Z/s; |
54310121 |
872 | $item = $1; |
873 | } else { |
874 | # /\A=item\s+(.*?)\s*\Z/s; |
875 | /\A=item\s+(\w*)/s; |
876 | $item = $1; |
877 | } |
878 | |
879 | $items{$item} = "$pod" if $item; |
880 | } |
881 | } |
882 | } |
883 | |
884 | # |
885 | # process_head - convert a pod head[1-6] tag and convert it to HTML format. |
886 | # |
887 | sub process_head { |
888 | my($tag, $heading) = @_; |
889 | my $firstword; |
890 | |
891 | # figure out the level of the =head |
892 | $tag =~ /head([1-6])/; |
893 | my $level = $1; |
894 | |
895 | # can't have a heading full of spaces and speechmarks and so on |
896 | $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/; |
897 | |
898 | print HTML "<P>\n" unless $listlevel; |
899 | print HTML "<HR>\n" unless $listlevel || $top; |
900 | print HTML "<H$level>"; # unless $listlevel; |
901 | #print HTML "<H$level>" unless $listlevel; |
be173d55 |
902 | my $convert = $heading; process_text(\$convert, 0); |
7b8d334a |
903 | $convert = html_escape($convert); |
54310121 |
904 | print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>"; |
905 | print HTML "</H$level>"; # unless $listlevel; |
906 | print HTML "\n"; |
907 | } |
908 | |
909 | # |
910 | # process_item - convert a pod item tag and convert it to HTML format. |
911 | # |
912 | sub process_item { |
913 | my $text = $_[0]; |
914 | my($i, $quote, $name); |
915 | |
916 | my $need_preamble = 0; |
917 | my $this_entry; |
918 | |
919 | |
920 | # lots of documents start a list without doing an =over. this is |
921 | # bad! but, the proper thing to do seems to be to just assume |
922 | # they did do an =over. so warn them once and then continue. |
923 | warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n" |
924 | unless $listlevel; |
925 | process_over() unless $listlevel; |
926 | |
927 | return unless $listlevel; |
928 | |
929 | # remove formatting instructions from the text |
930 | 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g; |
931 | pre_escape(\$text); |
932 | |
933 | $need_preamble = $items_seen[$listlevel]++ == 0; |
934 | |
935 | # check if this is the first =item after an =over |
936 | $i = $listlevel - 1; |
937 | my $need_new = $listlevel >= @listitem; |
938 | |
939 | if ($text =~ /\A\*/) { # bullet |
940 | |
941 | if ($need_preamble) { |
942 | push(@listend, "</UL>"); |
943 | print HTML "<UL>\n"; |
944 | } |
945 | |
7b8d334a |
946 | print HTML '<LI>'; |
947 | if ($text =~ /\A\*\s*(.+)\Z/s) { |
948 | print HTML '<STRONG>'; |
949 | if ($items_named{$1}++) { |
950 | print HTML html_escape($1); |
951 | } else { |
952 | my $name = 'item_' . htmlify(1,$1); |
953 | print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; |
954 | } |
955 | print HTML '</STRONG>'; |
956 | } |
54310121 |
957 | |
7b8d334a |
958 | } elsif ($text =~ /\A[\d#]+/) { # numbered list |
54310121 |
959 | |
960 | if ($need_preamble) { |
961 | push(@listend, "</OL>"); |
962 | print HTML "<OL>\n"; |
963 | } |
964 | |
7b8d334a |
965 | print HTML '<LI>'; |
966 | if ($text =~ /\A\d+\.?\s*(.+)\Z/s) { |
967 | print HTML '<STRONG>'; |
968 | if ($items_named{$1}++) { |
969 | print HTML html_escape($1); |
970 | } else { |
971 | my $name = 'item_' . htmlify(0,$1); |
972 | print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; |
973 | } |
974 | print HTML '</STRONG>'; |
975 | } |
54310121 |
976 | |
977 | } else { # all others |
978 | |
979 | if ($need_preamble) { |
980 | push(@listend, '</DL>'); |
981 | print HTML "<DL>\n"; |
982 | } |
983 | |
7b8d334a |
984 | print HTML '<DT>'; |
985 | if ($text =~ /(\S+)/) { |
986 | print HTML '<STRONG>'; |
987 | if ($items_named{$1}++) { |
988 | print HTML html_escape($text); |
989 | } else { |
990 | my $name = 'item_' . htmlify(1,$text); |
991 | print HTML qq(<A NAME="$name">), html_escape($text), '</A>'; |
992 | } |
993 | print HTML '</STRONG>'; |
994 | } |
54310121 |
995 | print HTML '<DD>'; |
996 | } |
997 | |
998 | print HTML "\n"; |
999 | } |
1000 | |
1001 | # |
1002 | # process_over - process a pod over tag and start a corresponding HTML |
1003 | # list. |
1004 | # |
1005 | sub process_over { |
1006 | # start a new list |
1007 | $listlevel++; |
1008 | } |
1009 | |
1010 | # |
1011 | # process_back - process a pod back tag and convert it to HTML format. |
1012 | # |
1013 | sub process_back { |
2ceaccd7 |
1014 | warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n" |
54310121 |
1015 | unless $listlevel; |
1016 | return unless $listlevel; |
1017 | |
1018 | # close off the list. note, I check to see if $listend[$listlevel] is |
1019 | # defined because an =item directive may have never appeared and thus |
1020 | # $listend[$listlevel] may have never been initialized. |
1021 | $listlevel--; |
1022 | print HTML $listend[$listlevel] if defined $listend[$listlevel]; |
1023 | print HTML "\n"; |
1024 | |
1025 | # don't need the corresponding perl code anymore |
1026 | pop(@listitem); |
1027 | pop(@listdata); |
1028 | pop(@listend); |
1029 | |
1030 | pop(@items_seen); |
1031 | } |
1032 | |
1033 | # |
1034 | # process_cut - process a pod cut tag, thus stop ignoring pod directives. |
1035 | # |
1036 | sub process_cut { |
1037 | $ignore = 1; |
1038 | } |
1039 | |
1040 | # |
1041 | # process_pod - process a pod pod tag, thus ignore pod directives until we see a |
1042 | # corresponding cut. |
1043 | # |
1044 | sub process_pod { |
1045 | # no need to set $ignore to 0 cause the main loop did it |
1046 | } |
1047 | |
1048 | # |
1049 | # process_for - process a =for pod tag. if it's for html, split |
c4d9b39d |
1050 | # it out verbatim, if illustration, center it, otherwise ignore it. |
54310121 |
1051 | # |
1052 | sub process_for { |
1053 | my($whom, $text) = @_; |
1054 | if ( $whom =~ /^(pod2)?html$/i) { |
1055 | print HTML $text; |
c4d9b39d |
1056 | } elsif ($whom =~ /^illustration$/i) { |
1057 | 1 while chomp $text; |
1058 | for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) { |
1059 | $text .= $ext, last if -r "$text$ext"; |
1060 | } |
1061 | print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>}; |
1062 | } |
54310121 |
1063 | } |
1064 | |
1065 | # |
1066 | # process_begin - process a =begin pod tag. this pushes |
1067 | # whom we're beginning on the begin stack. if there's a |
1068 | # begin stack, we only print if it us. |
1069 | # |
1070 | sub process_begin { |
1071 | my($whom, $text) = @_; |
1072 | $whom = lc($whom); |
1073 | push (@begin_stack, $whom); |
1074 | if ( $whom =~ /^(pod2)?html$/) { |
1075 | print HTML $text if $text; |
1076 | } |
1077 | } |
1078 | |
1079 | # |
1080 | # process_end - process a =end pod tag. pop the |
1081 | # begin stack. die if we're mismatched. |
1082 | # |
1083 | sub process_end { |
1084 | my($whom, $text) = @_; |
1085 | $whom = lc($whom); |
1086 | if ($begin_stack[-1] ne $whom ) { |
1087 | die "Unmatched begin/end at chunk $paragraph\n" |
1088 | } |
1089 | pop @begin_stack; |
1090 | } |
1091 | |
1092 | # |
1093 | # process_text - handles plaintext that appears in the input pod file. |
1094 | # there may be pod commands embedded within the text so those must be |
1095 | # converted to html commands. |
1096 | # |
1097 | sub process_text { |
1098 | my($text, $escapeQuotes) = @_; |
1099 | my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf); |
1100 | my($podcommand, $params, $tag, $quote); |
1101 | |
1102 | return if $ignore; |
1103 | |
1104 | $quote = 0; # status of double-quote conversion |
1105 | $result = ""; |
1106 | $rest = $$text; |
1107 | |
1108 | if ($rest =~ /^\s+/) { # preformatted text, no pod directives |
be173d55 |
1109 | $rest =~ s/\n+\Z//; |
1110 | $rest =~ s#.*# |
1111 | my $line = $&; |
1112 | 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; |
1113 | $line; |
1114 | #eg; |
54310121 |
1115 | |
1116 | $rest =~ s/&/&/g; |
1117 | $rest =~ s/</</g; |
1118 | $rest =~ s/>/>/g; |
1119 | $rest =~ s/"/"/g; |
1120 | |
1121 | # try and create links for all occurrences of perl.* within |
1122 | # the preformatted text. |
1123 | $rest =~ s{ |
1124 | (\s*)(perl\w+) |
1125 | }{ |
1126 | if (defined $pages{$2}) { # is a link |
1127 | qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>); |
39e571d4 |
1128 | } elsif (defined $pages{dosify($2)}) { # is a link |
1129 | qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>); |
54310121 |
1130 | } else { |
1131 | "$1$2"; |
1132 | } |
1133 | }xeg; |
5a039dd3 |
1134 | # $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g; |
1135 | $rest =~ s{ |
29f227c9 |
1136 | (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)? |
1137 | }{ |
1138 | my $url ; |
1139 | if ( $htmlfileurl ne '' ) { |
1140 | # Here, we take advantage of the knowledge |
1141 | # that $htmlfileurl ne '' implies $htmlroot eq ''. |
1142 | # Since $htmlroot eq '', we need to prepend $htmldir |
1143 | # on the fron of the link to get the absolute path |
1144 | # of the link's target. We check for a leading '/' |
1145 | # to avoid corrupting links that are #, file:, etc. |
1146 | my $old_url = $3 ; |
1147 | $old_url = "$htmldir$old_url" |
1148 | if ( $old_url =~ m{^\/} ) ; |
1149 | $url = relativize_url( "$old_url.html", $htmlfileurl ); |
1150 | # print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ; |
1151 | } |
1152 | else { |
1153 | $url = "$3.html" ; |
1154 | } |
5a039dd3 |
1155 | "$1$url" ; |
1156 | }xeg; |
54310121 |
1157 | |
5a039dd3 |
1158 | # Look for embedded URLs and make them in to links. We don't |
1159 | # relativize them since they are best left as the author intended. |
54310121 |
1160 | my $urls = '(' . join ('|', qw{ |
1161 | http |
1162 | telnet |
1163 | mailto |
1164 | news |
1165 | gopher |
1166 | file |
1167 | wais |
1168 | ftp |
1169 | } ) |
1170 | . ')'; |
1171 | |
1172 | my $ltrs = '\w'; |
1173 | my $gunk = '/#~:.?+=&%@!\-'; |
1174 | my $punc = '.:?\-'; |
1175 | my $any = "${ltrs}${gunk}${punc}"; |
1176 | |
1177 | $rest =~ s{ |
1178 | \b # start at word boundary |
1179 | ( # begin $1 { |
29f227c9 |
1180 | $urls : # need resource and a colon |
1181 | (?!:) # Ignore File::, among others. |
54310121 |
1182 | [$any] +? # followed by on or more |
1183 | # of any valid character, but |
1184 | # be conservative and take only |
1185 | # what you need to.... |
1186 | ) # end $1 } |
1187 | (?= # look-ahead non-consumptive assertion |
1188 | [$punc]* # either 0 or more puntuation |
1189 | [^$any] # followed by a non-url char |
1190 | | # or else |
1191 | $ # then end of the string |
1192 | ) |
1193 | }{<A HREF="$1">$1</A>}igox; |
1194 | |
1195 | $result = "<PRE>" # text should be as it is (verbatim) |
1196 | . "$rest\n" |
1197 | . "</PRE>\n"; |
1198 | } else { # formatted text |
1199 | # parse through the string, stopping each time we find a |
1200 | # pod-escape. once the string has been throughly processed |
1201 | # we can output it. |
ec978fad |
1202 | while (length $rest) { |
54310121 |
1203 | # check to see if there are any possible pod directives in |
1204 | # the remaining part of the text. |
1205 | if ($rest =~ m/[BCEIFLSZ]</) { |
1206 | warn "\$rest\t= $rest\n" unless |
1207 | $rest =~ /\A |
1208 | ([^<]*?) |
1209 | ([BCEIFLSZ]?) |
1210 | < |
1211 | (.*)\Z/xs; |
1212 | |
1213 | $s1 = $1; # pure text |
1214 | $s2 = $2; # the type of pod-escape that follows |
1215 | $s3 = '<'; # '<' |
1216 | $s4 = $3; # the rest of the string |
1217 | } else { |
1218 | $s1 = $rest; |
1219 | $s2 = ""; |
1220 | $s3 = ""; |
1221 | $s4 = ""; |
1222 | } |
1223 | |
1224 | if ($s3 eq '<' && $s2) { # a pod-escape |
1225 | $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1); |
1226 | $podcommand = "$s2<"; |
1227 | $rest = $s4; |
1228 | |
1229 | # find the matching '>' |
1230 | $match = 1; |
1231 | $bf = 0; |
1232 | while ($match && !$bf) { |
1233 | $bf = 1; |
1234 | if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) { |
1235 | $bf = 0; |
1236 | $match++; |
1237 | $podcommand .= $1; |
1238 | $rest = $2; |
1239 | } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) { |
1240 | $bf = 0; |
1241 | $match--; |
1242 | $podcommand .= $1; |
1243 | $rest = $2; |
1244 | } |
1245 | } |
1246 | |
1247 | if ($match != 0) { |
1248 | warn <<WARN; |
1249 | $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph. |
1250 | WARN |
1251 | $result .= substr $podcommand, 0, 2; |
1252 | $rest = substr($podcommand, 2) . $rest; |
1253 | next; |
1254 | } |
1255 | |
1256 | # pull out the parameters to the pod-escape |
1257 | $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s; |
1258 | $tag = $1; |
1259 | $params = $2; |
1260 | |
1261 | # process the text within the pod-escape so that any escapes |
1262 | # which must occur do. |
1263 | process_text(\$params, 0) unless $tag eq 'L'; |
1264 | |
1265 | $s1 = $params; |
1266 | if (!$tag || $tag eq " ") { # <> : no tag |
1267 | $s1 = "<$params>"; |
1268 | } elsif ($tag eq "L") { # L<> : link |
1269 | $s1 = process_L($params); |
1270 | } elsif ($tag eq "I" || # I<> : italicize text |
1271 | $tag eq "B" || # B<> : bold text |
1272 | $tag eq "F") { # F<> : file specification |
1273 | $s1 = process_BFI($tag, $params); |
1274 | } elsif ($tag eq "C") { # C<> : literal code |
1275 | $s1 = process_C($params, 1); |
1276 | } elsif ($tag eq "E") { # E<> : escape |
1277 | $s1 = process_E($params); |
1278 | } elsif ($tag eq "Z") { # Z<> : zero-width character |
1279 | $s1 = process_Z($params); |
1280 | } elsif ($tag eq "S") { # S<> : non-breaking space |
1281 | $s1 = process_S($params); |
1282 | } elsif ($tag eq "X") { # S<> : non-breaking space |
1283 | $s1 = process_X($params); |
1284 | } else { |
1285 | warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n"; |
1286 | } |
1287 | |
1288 | $result .= "$s1"; |
1289 | } else { |
1290 | # for pure text we must deal with implicit links and |
1291 | # double-quotes among other things. |
1292 | $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3"); |
1293 | $rest = $s4; |
1294 | } |
1295 | } |
1296 | } |
1297 | $$text = $result; |
1298 | } |
1299 | |
1300 | sub html_escape { |
1301 | my $rest = $_[0]; |
a3c03ba2 |
1302 | $rest =~ s/&(?!\w+;|#)/&/g; # XXX not bulletproof |
54310121 |
1303 | $rest =~ s/</</g; |
1304 | $rest =~ s/>/>/g; |
1305 | $rest =~ s/"/"/g; |
1306 | return $rest; |
1307 | } |
1308 | |
1309 | # |
1310 | # process_puretext - process pure text (without pod-escapes) converting |
1311 | # double-quotes and handling implicit C<> links. |
1312 | # |
1313 | sub process_puretext { |
1314 | my($text, $quote) = @_; |
1315 | my(@words, $result, $rest, $lead, $trail); |
1316 | |
1317 | # convert double-quotes to single-quotes |
1318 | $text =~ s/\A([^"]*)"/$1''/s if $$quote; |
1319 | while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {} |
1320 | |
1321 | $$quote = ($text =~ m/"/ ? 1 : 0); |
1322 | $text =~ s/\A([^"]*)"/$1``/s if $$quote; |
1323 | |
1324 | # keep track of leading and trailing white-space |
1325 | $lead = ($text =~ /\A(\s*)/s ? $1 : ""); |
1326 | $trail = ($text =~ /(\s*)\Z/s ? $1 : ""); |
1327 | |
1328 | # collapse all white space into a single space |
1329 | $text =~ s/\s+/ /g; |
1330 | @words = split(" ", $text); |
1331 | |
1332 | # process each word individually |
1333 | foreach my $word (@words) { |
1334 | # see if we can infer a link |
1335 | if ($word =~ /^\w+\(/) { |
1336 | # has parenthesis so should have been a C<> ref |
1337 | $word = process_C($word); |
1338 | # $word =~ /^[^()]*]\(/; |
1339 | # if (defined $items{$1} && $items{$1}) { |
1340 | # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_" |
1341 | # . htmlify(0,$word) |
1342 | # . "\">$word</A></CODE>"; |
1343 | # } elsif (defined $items{$word} && $items{$word}) { |
1344 | # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_" |
1345 | # . htmlify(0,$word) |
1346 | # . "\">$word</A></CODE>"; |
1347 | # } else { |
1348 | # $word = "\n<CODE><A HREF=\"#item_" |
1349 | # . htmlify(0,$word) |
1350 | # . "\">$word</A></CODE>"; |
1351 | # } |
1352 | } elsif ($word =~ /^[\$\@%&*]+\w+$/) { |
1353 | # perl variables, should be a C<> ref |
1354 | $word = process_C($word, 1); |
1355 | } elsif ($word =~ m,^\w+://\w,) { |
1356 | # looks like a URL |
5a039dd3 |
1357 | # Don't relativize it: leave it as the author intended |
54310121 |
1358 | $word = qq(<A HREF="$word">$word</A>); |
af47ee55 |
1359 | } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) { |
54310121 |
1360 | # looks like an e-mail address |
7b8d334a |
1361 | my ($w1, $w2, $w3) = ("", $word, ""); |
1362 | ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/; |
1363 | ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/; |
1364 | $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3); |
54310121 |
1365 | } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase? |
7b8d334a |
1366 | $word = html_escape($word) if $word =~ /["&<>]/; |
54310121 |
1367 | $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape; |
1368 | } else { |
7b8d334a |
1369 | $word = html_escape($word) if $word =~ /["&<>]/; |
54310121 |
1370 | } |
1371 | } |
1372 | |
1373 | # build a new string based upon our conversion |
1374 | $result = ""; |
1375 | $rest = join(" ", @words); |
1376 | while (length($rest) > 75) { |
1377 | if ( $rest =~ m/^(.{0,75})\s(.*?)$/o || |
1378 | $rest =~ m/^(\S*)\s(.*?)$/o) { |
1379 | |
1380 | $result .= "$1\n"; |
1381 | $rest = $2; |
1382 | } else { |
1383 | $result .= "$rest\n"; |
1384 | $rest = ""; |
1385 | } |
1386 | } |
1387 | $result .= $rest if $rest; |
1388 | |
1389 | # restore the leading and trailing white-space |
1390 | $result = "$lead$result$trail"; |
1391 | |
1392 | return $result; |
1393 | } |
1394 | |
1395 | # |
1396 | # pre_escape - convert & in text to $amp; |
1397 | # |
1398 | sub pre_escape { |
1399 | my($str) = @_; |
db085819 |
1400 | $$str =~ s/&(?!\w+;|#)/&/g; # XXX not bulletproof |
54310121 |
1401 | } |
1402 | |
1403 | # |
39e571d4 |
1404 | # dosify - convert filenames to 8.3 |
1405 | # |
1406 | sub dosify { |
1407 | my($str) = @_; |
1408 | if ($Is83) { |
1409 | $str = lc $str; |
1410 | $str =~ s/(\.\w+)/substr ($1,0,4)/ge; |
1411 | $str =~ s/(\w+)/substr ($1,0,8)/ge; |
1412 | } |
1413 | return $str; |
1414 | } |
1415 | |
1416 | # |
54310121 |
1417 | # process_L - convert a pod L<> directive to a corresponding HTML link. |
1418 | # most of the links made are inferred rather than known about directly |
1419 | # (i.e it's not known whether the =head\d section exists in the target file, |
1420 | # or whether a .pod file exists in the case of split files). however, the |
1421 | # guessing usually works. |
1422 | # |
1423 | # Unlike the other directives, this should be called with an unprocessed |
1424 | # string, else tags in the link won't be matched. |
1425 | # |
1426 | sub process_L { |
1427 | my($str) = @_; |
39e571d4 |
1428 | my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings |
54310121 |
1429 | |
1430 | $str =~ s/\n/ /g; # undo word-wrapped tags |
1431 | $s1 = $str; |
1432 | for ($s1) { |
b74bceb9 |
1433 | # LREF: a la HREF L<show this text|man/section> |
1434 | $linktext = $1 if s:^([^|]+)\|::; |
1435 | |
54310121 |
1436 | # make sure sections start with a / |
1437 | s,^",/",g; |
1438 | s,^,/,g if (!m,/, && / /); |
1439 | |
1440 | # check if there's a section specified |
1441 | if (m,^(.*?)/"?(.*?)"?$,) { # yes |
1442 | ($page, $section) = ($1, $2); |
1443 | } else { # no |
1444 | ($page, $section) = ($str, ""); |
1445 | } |
1446 | |
1447 | # check if we know that this is a section in this page |
1448 | if (!defined $pages{$page} && defined $sections{$page}) { |
1449 | $section = $page; |
1450 | $page = ""; |
1451 | } |
29f227c9 |
1452 | |
1453 | # remove trailing punctuation, like () |
1454 | $section =~ s/\W*$// ; |
54310121 |
1455 | } |
1456 | |
39e571d4 |
1457 | $page83=dosify($page); |
1458 | $page=$page83 if (defined $pages{$page83}); |
54310121 |
1459 | if ($page eq "") { |
1460 | $link = "#" . htmlify(0,$section); |
b74bceb9 |
1461 | $linktext = $section unless defined($linktext); |
350ccacd |
1462 | } elsif ( $page =~ /::/ ) { |
1463 | $linktext = ($section ? "$section" : "$page"); |
1464 | $page =~ s,::,/,g; |
29f227c9 |
1465 | # Search page cache for an entry keyed under the html page name, |
1466 | # then look to see what directory that page might be in. NOTE: |
1467 | # this will only find one page. A better solution might be to produce |
1468 | # an intermediate page that is an index to all such pages. |
1469 | my $page_name = $page ; |
1470 | $page_name =~ s,^.*/,, ; |
1471 | if ( defined( $pages{ $page_name } ) && |
1472 | $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/ |
1473 | ) { |
1474 | $page = $1 ; |
1475 | } |
1476 | else { |
1477 | # NOTE: This branch assumes that all A::B pages are located in |
1478 | # $htmlroot/A/B.html . This is often incorrect, since they are |
1479 | # often in $htmlroot/lib/A/B.html or such like. Perhaps we could |
1480 | # analyze the contents of %pages and figure out where any |
1481 | # cousins of A::B are, then assume that. So, if A::B isn't found, |
1482 | # but A::C is found in lib/A/C.pm, then A::B is assumed to be in |
1483 | # lib/A/B.pm. This is also limited, but it's an improvement. |
1484 | # Maybe a hints file so that the links point to the correct places |
1485 | # non-theless? |
1486 | # Also, maybe put a warn "$0: cannot resolve..." here. |
1487 | } |
350ccacd |
1488 | $link = "$htmlroot/$page.html"; |
1489 | $link .= "#" . htmlify(0,$section) if ($section); |
54310121 |
1490 | } elsif (!defined $pages{$page}) { |
1491 | warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n"; |
1492 | $link = ""; |
b74bceb9 |
1493 | $linktext = $page unless defined($linktext); |
54310121 |
1494 | } else { |
b74bceb9 |
1495 | $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext); |
54310121 |
1496 | $section = htmlify(0,$section) if $section ne ""; |
1497 | |
1498 | # if there is a directory by the name of the page, then assume that an |
1499 | # appropriate section will exist in the subdirectory |
29f227c9 |
1500 | # if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { |
1501 | if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { |
54310121 |
1502 | $link = "$htmlroot/$1/$section.html"; |
1503 | |
1504 | # since there is no directory by the name of the page, the section will |
1505 | # have to exist within a .html of the same name. thus, make sure there |
1506 | # is a .pod or .pm that might become that .html |
1507 | } else { |
1508 | $section = "#$section"; |
1509 | # check if there is a .pod with the page name |
1510 | if ($pages{$page} =~ /([^:]*)\.pod:/) { |
1511 | $link = "$htmlroot/$1.html$section"; |
1512 | } elsif ($pages{$page} =~ /([^:]*)\.pm:/) { |
1513 | $link = "$htmlroot/$1.html$section"; |
1514 | } else { |
1515 | warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ". |
1516 | "no .pod or .pm found\n"; |
1517 | $link = ""; |
b74bceb9 |
1518 | $linktext = $section unless defined($linktext); |
54310121 |
1519 | } |
1520 | } |
1521 | } |
1522 | |
1523 | process_text(\$linktext, 0); |
1524 | if ($link) { |
29f227c9 |
1525 | # Here, we take advantage of the knowledge that $htmlfileurl ne '' |
1526 | # implies $htmlroot eq ''. This means that the link in question |
1527 | # needs a prefix of $htmldir if it begins with '/'. The test for |
1528 | # the initial '/' is done to avoid '#'-only links, and to allow |
1529 | # for other kinds of links, like file:, ftp:, etc. |
1530 | my $url ; |
1531 | if ( $htmlfileurl ne '' ) { |
1532 | $link = "$htmldir$link" |
1533 | if ( $link =~ m{^/} ) ; |
1534 | |
1535 | $url = relativize_url( $link, $htmlfileurl ) ; |
1536 | # print( " b: [$link,$htmlfileurl,$url]\n" ) ; |
1537 | } |
1538 | else { |
1539 | $url = $link ; |
1540 | } |
1541 | |
5a039dd3 |
1542 | $s1 = "<A HREF=\"$url\">$linktext</A>"; |
54310121 |
1543 | } else { |
1544 | $s1 = "<EM>$linktext</EM>"; |
1545 | } |
1546 | return $s1; |
1547 | } |
1548 | |
1549 | # |
29f227c9 |
1550 | # relativize_url - convert an absolute URL to one relative to a base URL. |
1551 | # Assumes both end in a filename. |
1552 | # |
1553 | sub relativize_url { |
1554 | my ($dest,$source) = @_ ; |
1555 | |
1556 | my ($dest_volume,$dest_directory,$dest_file) = |
1557 | File::Spec::Unix->splitpath( $dest ) ; |
1558 | $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ; |
1559 | |
1560 | my ($source_volume,$source_directory,$source_file) = |
1561 | File::Spec::Unix->splitpath( $source ) ; |
1562 | $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ; |
1563 | |
1564 | my $rel_path = '' ; |
1565 | if ( $dest ne '' ) { |
1566 | $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ; |
1567 | } |
1568 | |
1569 | if ( $rel_path ne '' && |
1570 | substr( $rel_path, -1 ) ne '/' && |
1571 | substr( $dest_file, 0, 1 ) ne '#' |
1572 | ) { |
1573 | $rel_path .= "/$dest_file" ; |
1574 | } |
1575 | else { |
1576 | $rel_path .= "$dest_file" ; |
1577 | } |
1578 | |
1579 | return $rel_path ; |
1580 | } |
1581 | |
1582 | # |
54310121 |
1583 | # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and |
1584 | # convert them to corresponding HTML directives. |
1585 | # |
1586 | sub process_BFI { |
1587 | my($tag, $str) = @_; |
1588 | my($s1); # work string |
1589 | my(%repltext) = ( 'B' => 'STRONG', |
1590 | 'F' => 'EM', |
1591 | 'I' => 'EM'); |
1592 | |
1593 | # extract the modified text and convert to HTML |
1594 | $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>"; |
1595 | return $s1; |
1596 | } |
1597 | |
1598 | # |
1599 | # process_C - process the C<> pod-escape. |
1600 | # |
1601 | sub process_C { |
1602 | my($str, $doref) = @_; |
1603 | my($s1, $s2); |
1604 | |
1605 | $s1 = $str; |
1606 | $s1 =~ s/\([^()]*\)//g; # delete parentheses |
be173d55 |
1607 | $s2 = $s1; |
54310121 |
1608 | $s1 =~ s/\W//g; # delete bogus characters |
7b8d334a |
1609 | $str = html_escape($str); |
54310121 |
1610 | |
1611 | # if there was a pod file that we found earlier with an appropriate |
1612 | # =item directive, then create a link to that page. |
1613 | if ($doref && defined $items{$s1}) { |
5a039dd3 |
1614 | if ( $items{$s1} ) { |
1615 | my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ; |
29f227c9 |
1616 | # Here, we take advantage of the knowledge that $htmlfileurl ne '' |
1617 | # implies $htmlroot eq ''. |
1618 | my $url ; |
1619 | if ( $htmlfileurl ne '' ) { |
1620 | $link = "$htmldir$link" ; |
1621 | $url = relativize_url( $link, $htmlfileurl ) ; |
1622 | } |
1623 | else { |
1624 | $url = $link ; |
1625 | } |
5a039dd3 |
1626 | $s1 = "<A HREF=\"$url\">$str</A>" ; |
1627 | } |
1628 | else { |
1629 | $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ; |
1630 | } |
54310121 |
1631 | $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,; |
1632 | confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/; |
1633 | } else { |
1634 | $s1 = "<CODE>$str</CODE>"; |
1635 | # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose |
1636 | } |
1637 | |
1638 | |
1639 | return $s1; |
1640 | } |
1641 | |
1642 | # |
1643 | # process_E - process the E<> pod directive which seems to escape a character. |
1644 | # |
1645 | sub process_E { |
1646 | my($str) = @_; |
1647 | |
1648 | for ($str) { |
1649 | s,([^/].*),\&$1\;,g; |
1650 | } |
1651 | |
1652 | return $str; |
1653 | } |
1654 | |
1655 | # |
1656 | # process_Z - process the Z<> pod directive which really just amounts to |
1657 | # ignoring it. this allows someone to start a paragraph with an = |
1658 | # |
1659 | sub process_Z { |
1660 | my($str) = @_; |
1661 | |
1662 | # there is no equivalent in HTML for this so just ignore it. |
1663 | $str = ""; |
1664 | return $str; |
1665 | } |
1666 | |
1667 | # |
1668 | # process_S - process the S<> pod directive which means to convert all |
1669 | # spaces in the string to non-breaking spaces (in HTML-eze). |
1670 | # |
1671 | sub process_S { |
1672 | my($str) = @_; |
1673 | |
1674 | # convert all spaces in the text to non-breaking spaces in HTML. |
1675 | $str =~ s/ / /g; |
1676 | return $str; |
1677 | } |
1678 | |
1679 | # |
1680 | # process_X - this is supposed to make an index entry. we'll just |
1681 | # ignore it. |
1682 | # |
1683 | sub process_X { |
1684 | return ''; |
1685 | } |
1686 | |
1687 | |
1688 | # |
29f227c9 |
1689 | # Adapted from Nick Ing-Simmons' PodToHtml package. |
1690 | sub relative_url { |
1691 | my $source_file = shift ; |
1692 | my $destination_file = shift; |
1693 | |
1694 | my $source = URI::file->new_abs($source_file); |
1695 | my $uo = URI::file->new($destination_file,$source)->abs; |
1696 | return $uo->rel->as_string; |
1697 | } |
1698 | |
1699 | |
1700 | # |
54310121 |
1701 | # finish_list - finish off any pending HTML lists. this should be called |
1702 | # after the entire pod file has been read and converted. |
1703 | # |
1704 | sub finish_list { |
7b8d334a |
1705 | while ($listlevel > 0) { |
54310121 |
1706 | print HTML "</DL>\n"; |
1707 | $listlevel--; |
1708 | } |
1709 | } |
1710 | |
1711 | # |
1712 | # htmlify - converts a pod section specification to a suitable section |
1713 | # specification for HTML. if first arg is 1, only takes 1st word. |
1714 | # |
1715 | sub htmlify { |
1716 | my($compact, $heading) = @_; |
1717 | |
1718 | if ($compact) { |
1719 | $heading =~ /^(\w+)/; |
1720 | $heading = $1; |
1721 | } |
1722 | |
1723 | # $heading = lc($heading); |
1724 | $heading =~ s/[^\w\s]/_/g; |
1725 | $heading =~ s/(\s+)/ /g; |
1726 | $heading =~ s/^\s*(.*?)\s*$/$1/s; |
1727 | $heading =~ s/ /_/g; |
1728 | $heading =~ s/\A(.{32}).*\Z/$1/s; |
1729 | $heading =~ s/\s+\Z//; |
1730 | $heading =~ s/_{2,}/_/g; |
1731 | |
1732 | return $heading; |
1733 | } |
1734 | |
1735 | BEGIN { |
1736 | } |
1737 | |
1738 | 1; |