Commit | Line | Data |
54310121 |
1 | #!/usr/bin/perl -w |
2 | |
3 | use Config; # for config options in the makefile |
4 | use Getopt::Long; # for command-line parsing |
5 | use Cwd; |
6 | use Pod::Html; |
7 | |
8 | umask 022; |
9 | |
10 | =head1 NAME |
11 | |
12 | installhtml - converts a collection of POD pages to HTML format. |
13 | |
14 | =head1 SYNOPSIS |
15 | |
16 | installhtml [--help] [--podpath=<name>:...:<name>] [--podroot=<name>] |
17 | [--htmldir=<name>] [--htmlroot=<name>] [--norecurse] [--recurse] |
18 | [--splithead=<name>,...,<name>] [--splititem=<name>,...,<name>] |
19 | [--libpods=<name>,...,<name>] [--verbose] |
20 | |
21 | =head1 DESCRIPTION |
22 | |
23 | I<installhtml> converts a collection of POD pages to a corresponding |
24 | collection of HTML pages. This is primarily used to convert the pod |
25 | pages found in the perl distribution. |
26 | |
27 | =head1 OPTIONS |
28 | |
29 | =over 4 |
30 | |
31 | =item B<--help> help |
32 | |
33 | Displays the usage. |
34 | |
35 | =item B<--podpath> POD search path |
36 | |
e05c04ea |
37 | The list of directories to search for .pod and .pm files to be converted. |
54310121 |
38 | Default is `podroot/.'. |
39 | |
40 | =item B<--podroot> POD search path base directory |
41 | |
42 | The base directory to search for all .pod and .pm files to be converted. |
43 | Default is current directory. |
44 | |
45 | =item B<--htmldir> HTML destination directory |
46 | |
47 | The base directory which all HTML files will be written to. This should |
48 | be a path relative to the filesystem, not the resulting URL. |
49 | |
50 | =item B<--htmlroot> URL base directory |
51 | |
52 | The base directory which all resulting HTML files will be visible at in |
53 | a URL. The default is `/'. |
54 | |
55 | =item B<--recurse> recurse on subdirectories |
56 | |
57 | Whether or not to convert all .pm and .pod files found in subdirectories |
58 | too. Default is to not recurse. |
59 | |
60 | =item B<--splithead> POD files to split on =head directive |
61 | |
e05c04ea |
62 | Colon-separated list of pod files to split by the =head directive. These |
54310121 |
63 | files should have names specified relative to podroot. |
64 | |
65 | =item B<--splititem> POD files to split on =item directive |
66 | |
e05c04ea |
67 | Colon-separated list of all pod files to split by the =item directive. |
54310121 |
68 | I<installhtml> does not do the actual split, rather it invokes I<splitpod> |
69 | to do the dirty work. As with --splithead, these files should have names |
70 | specified relative to podroot. |
71 | |
72 | =item B<--libpods> library PODs for LE<lt>E<gt> links |
73 | |
e05c04ea |
74 | Colon-separated list of "library" pod files. This is the same list that |
54310121 |
75 | will be passed to pod2html when any pod is converted. |
76 | |
77 | =item B<--verbose> verbose output |
78 | |
79 | Self-explanatory. |
80 | |
81 | =back |
82 | |
83 | =head1 EXAMPLE |
84 | |
85 | The following command-line is an example of the one we use to convert |
86 | perl documentation: |
87 | |
88 | ./installhtml --podpath=lib:ext:pod:vms \ |
89 | --podroot=/usr/src/perl \ |
90 | --htmldir=/perl/nmanual \ |
91 | --htmlroot=/perl/nmanual \ |
92 | --splithead=pod/perlipc.pod \ |
93 | --splititem=pod/perlfunc \ |
94 | --libpods=perlfunc:perlguts:perlvar:perlrun:perlop \ |
95 | --recurse \ |
96 | --verbose |
97 | |
98 | =head1 AUTHOR |
99 | |
100 | Chris Hall E<lt>hallc@cs.colorado.eduE<gt> |
101 | |
102 | =head1 TODO |
103 | |
104 | =cut |
105 | |
106 | $usage =<<END_OF_USAGE; |
107 | Usage: $0 --help --podpath=<name>:...:<name> --podroot=<name> |
108 | --htmldir=<name> --htmlroot=<name> --norecurse --recurse |
109 | --splithead=<name>,...,<name> --splititem=<name>,...,<name> |
110 | --libpods=<name>,...,<name> --verbose |
111 | |
112 | --help - this message |
113 | --podpath - colon-separated list of directories containing .pod and |
114 | .pm files to be converted (. by default). |
115 | --podroot - filesystem base directory from which all relative paths in |
116 | podpath stem (default is .). |
117 | --htmldir - directory to store resulting html files in relative |
118 | to the filesystem (\$podroot/html by default). |
119 | --htmlroot - http-server base directory from which all relative paths |
120 | in podpath stem (default is /). |
121 | --libpods - comma-separated list of files to search for =item pod |
122 | directives in as targets of C<> and implicit links (empty |
123 | by default). |
124 | --norecurse - don't recurse on those subdirectories listed in podpath. |
125 | (default behavior). |
126 | --recurse - recurse on those subdirectories listed in podpath |
127 | --splithead - comma-separated list of .pod or .pm files to split. will |
e05c04ea |
128 | split each file into several smaller files at every occurrence |
54310121 |
129 | of a pod =head[1-6] directive. |
130 | --splititem - comma-separated list of .pod or .pm files to split using |
131 | splitpod. |
132 | --splitpod - where the program splitpod can be found (\$podroot/pod by |
133 | default). |
134 | --verbose - self-explanatory. |
135 | |
136 | END_OF_USAGE |
137 | |
138 | @libpods = (); |
139 | @podpath = ( "." ); # colon-separated list of directories containing .pod |
140 | # and .pm files to be converted. |
141 | $podroot = "."; # assume the pods we want are here |
142 | $htmldir = ""; # nothing for now... |
143 | $htmlroot = "/"; # default value |
144 | $recurse = 0; # default behavior |
145 | @splithead = (); # don't split any files by default |
146 | @splititem = (); # don't split any files by default |
147 | $splitpod = ""; # nothing for now. |
148 | |
149 | $verbose = 0; # whether or not to print debugging info |
150 | |
151 | $pod2html = "pod/pod2html"; |
152 | |
153 | |
154 | # parse the command-line |
155 | $result = GetOptions( qw( |
156 | help |
157 | podpath=s |
158 | podroot=s |
159 | htmldir=s |
160 | htmlroot=s |
161 | libpods=s |
162 | recurse! |
163 | splithead=s |
164 | splititem=s |
165 | splitpod=s |
166 | verbose |
167 | )); |
168 | usage("invalid parameters") unless $result; |
169 | parse_command_line(); |
170 | |
171 | |
172 | # set these variables to appropriate values if the user didn't specify |
173 | # values for them. |
174 | $htmldir = "$htmlroot/html" unless $htmldir; |
175 | $splitpod = "$podroot/pod" unless $splitpod; |
176 | |
177 | |
178 | # make sure that the destination directory exists |
179 | (mkdir($htmldir, 0755) || |
180 | die "$0: cannot make directory $htmldir: $!\n") if ! -d $htmldir; |
181 | |
182 | |
183 | # the following array will eventually contain files that are to be |
184 | # ignored in the conversion process. these are files that have been |
185 | # process by splititem or splithead and should not be converted as a |
186 | # result. |
187 | @ignore = (); |
188 | |
189 | |
190 | # split pods. its important to do this before convert ANY pods because |
191 | # it may effect some of the links |
192 | @splitdirs = (); # files in these directories won't get an index |
193 | split_on_head($podroot, $htmldir, \@splitdirs, \@ignore, @splithead); |
194 | split_on_item($podroot, \@splitdirs, \@ignore, @splititem); |
195 | |
196 | |
197 | # convert the pod pages found in @poddirs |
198 | #warn "converting files\n" if $verbose; |
199 | #warn "\@ignore\t= @ignore\n" if $verbose; |
200 | foreach $dir (@podpath) { |
201 | installdir($dir, $recurse, $podroot, \@splitdirs, \@ignore); |
202 | } |
203 | |
204 | |
205 | # now go through and create master indices for each pod we split |
206 | foreach $dir (@splititem) { |
207 | print "creating index $htmldir/$dir.html\n" if $verbose; |
208 | create_index("$htmldir/$dir.html", "$htmldir/$dir"); |
209 | } |
210 | |
211 | foreach $dir (@splithead) { |
212 | $dir .= ".pod" unless $dir =~ /(\.pod|\.pm)$/; |
213 | # let pod2html create the file |
214 | runpod2html($dir, 1); |
215 | |
216 | # now go through and truncate after the index |
217 | $dir =~ /^(.*?)(\.pod|\.pm)?$/sm; |
218 | $file = "$htmldir/$1"; |
219 | print "creating index $file.html\n" if $verbose; |
220 | |
221 | # read in everything until what would have been the first =head |
222 | # directive, patching the index as we go. |
223 | open(H, "<$file.html") || |
224 | die "$0: error opening $file.html for input: $!\n"; |
225 | $/ = ""; |
226 | @data = (); |
227 | while (<H>) { |
228 | last if /NAME=/; |
229 | s,HREF="#(.*)">,HREF="$file/$1.html">,g; |
230 | push @data, $_; |
231 | } |
232 | close(H); |
233 | |
234 | # now rewrite the file |
235 | open(H, ">$file.html") || |
236 | die "$0: error opening $file.html for output: $!\n"; |
237 | print H "@data\n"; |
238 | close(H); |
239 | } |
240 | |
241 | ############################################################################## |
242 | |
243 | |
244 | sub usage { |
245 | warn "$0: @_\n" if @_; |
246 | die $usage; |
247 | } |
248 | |
249 | |
250 | sub parse_command_line { |
251 | usage() if defined $opt_help; |
252 | $opt_help = ""; # make -w shut up |
253 | |
254 | # list of directories |
255 | @podpath = split(":", $opt_podpath) if defined $opt_podpath; |
256 | |
257 | # lists of files |
258 | @splithead = split(",", $opt_splithead) if defined $opt_splithead; |
259 | @splititem = split(",", $opt_splititem) if defined $opt_splititem; |
260 | @libpods = split(",", $opt_libpods) if defined $opt_libpods; |
261 | |
262 | $htmldir = $opt_htmldir if defined $opt_htmldir; |
263 | $htmlroot = $opt_htmlroot if defined $opt_htmlroot; |
264 | $podroot = $opt_podroot if defined $opt_podroot; |
265 | $splitpod = $opt_splitpod if defined $opt_splitpod; |
266 | |
267 | $recurse = $opt_recurse if defined $opt_recurse; |
268 | $verbose = $opt_verbose if defined $opt_verbose; |
269 | } |
270 | |
271 | |
272 | sub create_index { |
273 | my($html, $dir) = @_; |
274 | my(@files, @filedata, @index, $file); |
275 | |
276 | # get the list of .html files in this directory |
277 | opendir(DIR, $dir) || |
278 | die "$0: error opening directory $dir for reading: $!\n"; |
279 | @files = sort(grep(/\.html$/, readdir(DIR))); |
280 | closedir(DIR); |
281 | |
282 | open(HTML, ">$html") || |
283 | die "$0: error opening $html for output: $!\n"; |
284 | |
285 | # for each .html file in the directory, extract the index |
286 | # embedded in the file and throw it into the big index. |
287 | print HTML "<DL COMPACT>\n"; |
288 | foreach $file (@files) { |
289 | $/ = ""; |
290 | |
291 | open(IN, "<$dir/$file") || |
292 | die "$0: error opening $dir/$file for input: $!\n"; |
293 | @filedata = <IN>; |
294 | close(IN); |
295 | |
296 | # pull out the NAME section |
297 | ($name) = grep(/NAME=/, @filedata); |
298 | $name =~ m,/H1>\s(\S+)\s[\s-]*(.*?)\s*$,sm; |
299 | print HTML qq(<A HREF="$dir/$file">); |
300 | print HTML "<DT>$1</A><DD>$2\n" if defined $1; |
301 | # print HTML qq(<A HREF="$dir/$file">$1</A><BR>\n") if defined $1; |
302 | |
303 | next; |
304 | |
305 | @index = grep(/<!-- INDEX BEGIN -->.*<!-- INDEX END -->/s, |
306 | @filedata); |
307 | for (@index) { |
308 | s/<!-- INDEX BEGIN -->(\s*<!--)(.*)(-->\s*)<!-- INDEX END -->/$2/s; |
309 | s,#,$dir/$file#,g; |
310 | # print HTML "$_\n"; |
311 | print HTML "$_\n<P><HR><P>\n"; |
312 | } |
313 | } |
314 | print HTML "</DL>\n"; |
315 | |
316 | close(HTML); |
317 | } |
318 | |
319 | |
320 | sub split_on_head { |
321 | my($podroot, $htmldir, $splitdirs, $ignore, @splithead) = @_; |
322 | my($pod, $dirname, $filename); |
323 | |
324 | # split the files specified in @splithead on =head[1-6] pod directives |
325 | print "splitting files by head.\n" if $verbose && $#splithead >= 0; |
326 | foreach $pod (@splithead) { |
327 | # figure out the directory name and filename |
328 | $pod =~ s,^([^/]*)$,/$1,; |
329 | $pod =~ m,(.*?)/(.*?)(\.pod)?$,; |
330 | $dirname = $1; |
331 | $filename = "$2.pod"; |
332 | |
333 | # since we are splitting this file it shouldn't be converted. |
334 | push(@$ignore, "$podroot/$dirname/$filename"); |
335 | |
336 | # split the pod |
337 | splitpod("$podroot/$dirname/$filename", "$podroot/$dirname", $htmldir, |
338 | $splitdirs); |
339 | } |
340 | } |
341 | |
342 | |
343 | sub split_on_item { |
344 | my($podroot, $splitdirs, $ignore, @splititem) = @_; |
345 | my($pwd, $dirname, $filename); |
346 | |
347 | print "splitting files by item.\n" if $verbose && $#splititem >= 0; |
348 | $pwd = getcwd(); |
349 | foreach $pod (@splititem) { |
350 | # figure out the directory to split into |
351 | $pod =~ s,^([^/]*)$,/$1,; |
352 | $pod =~ m,(.*?)/(.*?)(\.pod)?$,; |
353 | $dirname = "$1/$2"; |
354 | $filename = "$2.pod"; |
355 | |
356 | # since we are splitting this file it shouldn't be converted. |
357 | push(@$ignore, "$podroot/$dirname.pod"); |
358 | |
359 | # split the pod |
360 | push(@$splitdirs, "$podroot/$dirname"); |
361 | if (! -d "$podroot/$dirname") { |
362 | mkdir("$podroot/$dirname", 0755) || |
363 | die "$0: error creating directory $podroot/$dirname: $!\n"; |
364 | } |
365 | chdir("$podroot/$dirname") || |
366 | die "$0: error changing to directory $podroot/$dirname: $!\n"; |
e05c04ea |
367 | system("../splitpod", "../$filename") && |
368 | warn "$0: error running '../splitpod ../$filename'" |
369 | ." from $podroot/$dirname"; |
54310121 |
370 | } |
371 | chdir($pwd); |
372 | } |
373 | |
374 | |
375 | # |
376 | # splitpod - splits a .pod file into several smaller .pod files |
377 | # where a new file is started each time a =head[1-6] pod directive |
378 | # is encountered in the input file. |
379 | # |
380 | sub splitpod { |
381 | my($pod, $poddir, $htmldir, $splitdirs) = @_; |
382 | my(@poddata, @filedata, @heads); |
383 | my($file, $i, $j, $prevsec, $section, $nextsec); |
384 | |
385 | print "splitting $pod\n" if $verbose; |
386 | |
387 | # read the file in paragraphs |
388 | $/ = ""; |
389 | open(SPLITIN, "<$pod") || |
390 | die "$0: error opening $pod for input: $!\n"; |
391 | @filedata = <SPLITIN>; |
392 | close(SPLITIN) || |
393 | die "$0: error closing $pod: $!\n"; |
394 | |
395 | # restore the file internally by =head[1-6] sections |
396 | @poddata = (); |
397 | for ($i = 0, $j = -1; $i <= $#filedata; $i++) { |
398 | $j++ if ($filedata[$i] =~ /^\s*=head[1-6]/); |
399 | if ($j >= 0) { |
400 | $poddata[$j] = "" unless defined $poddata[$j]; |
401 | $poddata[$j] .= "\n$filedata[$i]" if $j >= 0; |
402 | } |
403 | } |
404 | |
405 | # create list of =head[1-6] sections so that we can rewrite |
406 | # L<> links as necessary. |
407 | %heads = (); |
408 | foreach $i (0..$#poddata) { |
409 | $heads{htmlize($1)} = 1 if $poddata[$i] =~ /=head[1-6]\s+(.*)/; |
410 | } |
411 | |
412 | # create a directory of a similar name and store all the |
413 | # files in there |
414 | $pod =~ s,.*/(.*),$1,; # get the last part of the name |
415 | $dir = $pod; |
416 | $dir =~ s/\.pod//g; |
417 | push(@$splitdirs, "$poddir/$dir"); |
418 | mkdir("$poddir/$dir", 0755) || |
419 | die "$0: could not create directory $poddir/$dir: $!\n" |
420 | unless -d "$poddir/$dir"; |
421 | |
422 | $poddata[0] =~ /^\s*=head[1-6]\s+(.*)/; |
423 | $section = ""; |
424 | $nextsec = $1; |
425 | |
426 | # for each section of the file create a separate pod file |
427 | for ($i = 0; $i <= $#poddata; $i++) { |
428 | # determine the "prev" and "next" links |
429 | $prevsec = $section; |
430 | $section = $nextsec; |
431 | if ($i < $#poddata) { |
432 | $poddata[$i+1] =~ /^\s*=head[1-6]\s+(.*)/; |
433 | $nextsec = $1; |
434 | } else { |
435 | $nextsec = ""; |
436 | } |
437 | |
438 | # determine an appropriate filename (this must correspond with |
439 | # what pod2html will try and guess) |
440 | # $poddata[$i] =~ /^\s*=head[1-6]\s+(.*)/; |
441 | $file = "$dir/" . htmlize($section) . ".pod"; |
442 | |
443 | # create the new .pod file |
444 | print "\tcreating $poddir/$file\n" if $verbose; |
445 | open(SPLITOUT, ">$poddir/$file") || |
446 | die "$0: error opening $poddir/$file for output: $!\n"; |
447 | $poddata[$i] =~ s,L<([^<>]*)>, |
448 | defined $heads{htmlize($1)} ? "L<$dir/$1>" : "L<$1>" |
449 | ,ge; |
450 | print SPLITOUT $poddata[$i]."\n\n"; |
451 | print SPLITOUT "=over 4\n\n"; |
452 | print SPLITOUT "=item *\n\nBack to L<$dir/\"$prevsec\">\n\n" if $prevsec; |
453 | print SPLITOUT "=item *\n\nForward to L<$dir/\"$nextsec\">\n\n" if $nextsec; |
454 | print SPLITOUT "=item *\n\nUp to L<$dir>\n\n"; |
455 | print SPLITOUT "=back\n\n"; |
456 | close(SPLITOUT) || |
457 | die "$0: error closing $poddir/$file: $!\n"; |
458 | } |
459 | } |
460 | |
461 | |
462 | # |
463 | # installdir - takes care of converting the .pod and .pm files in the |
464 | # current directory to .html files and then installing those. |
465 | # |
466 | sub installdir { |
467 | my($dir, $recurse, $podroot, $splitdirs, $ignore) = @_; |
468 | my(@dirlist, @podlist, @pmlist, $doindex); |
469 | |
470 | @dirlist = (); # directories to recurse on |
471 | @podlist = (); # .pod files to install |
472 | @pmlist = (); # .pm files to install |
473 | |
474 | # should files in this directory get an index? |
475 | $doindex = (grep($_ eq "$podroot/$dir", @$splitdirs) ? 0 : 1); |
476 | |
477 | opendir(DIR, "$podroot/$dir") |
478 | || die "$0: error opening directory $podroot/$dir: $!\n"; |
479 | |
480 | # find the directories to recurse on |
481 | @dirlist = map { "$dir/$_" } |
482 | grep(-d "$podroot/$dir/$_" && !/^\.{1,2}/, readdir(DIR)) if $recurse; |
483 | rewinddir(DIR); |
484 | |
485 | # find all the .pod files within the directory |
486 | @podlist = map { /^(.*)\.pod$/; "$dir/$1" } |
487 | grep(! -d "$podroot/$dir/$_" && /\.pod$/, readdir(DIR)); |
488 | rewinddir(DIR); |
489 | |
490 | # find all the .pm files within the directory |
491 | @pmlist = map { /^(.*)\.pm$/; "$dir/$1" } |
492 | grep(! -d "$podroot/$dir/$_" && /\.pm$/, readdir(DIR)); |
493 | |
494 | closedir(DIR); |
495 | |
496 | # recurse on all subdirectories we kept track of |
497 | foreach $dir (@dirlist) { |
498 | installdir($dir, $recurse, $podroot, $splitdirs, $ignore); |
499 | } |
500 | |
501 | # install all the pods we found |
502 | foreach $pod (@podlist) { |
503 | # check if we should ignore it. |
504 | next if grep($_ eq "$podroot/$pod.pod", @$ignore); |
505 | |
506 | # check if a .pm files exists too |
507 | if (grep($_ eq "$pod.pm", @pmlist)) { |
508 | print "$0: Warning both `$podroot/$pod.pod' and " |
509 | . "`$podroot/$pod.pm' exist, using pod\n"; |
510 | push(@ignore, "$pod.pm"); |
511 | } |
512 | runpod2html("$pod.pod", $doindex); |
513 | } |
514 | |
515 | # install all the .pm files we found |
516 | foreach $pm (@pmlist) { |
517 | # check if we should ignore it. |
518 | next if grep($_ eq "$pm.pm", @ignore); |
519 | |
520 | runpod2html("$pm.pm", $doindex); |
521 | } |
522 | } |
523 | |
524 | |
525 | # |
526 | # runpod2html - invokes pod2html to convert a .pod or .pm file to a .html |
527 | # file. |
528 | # |
529 | sub runpod2html { |
530 | my($pod, $doindex) = @_; |
531 | my($html, $i, $dir, @dirs); |
532 | |
533 | $html = $pod; |
534 | $html =~ s/\.(pod|pm)$/.html/g; |
535 | |
536 | # make sure the destination directories exist |
537 | @dirs = split("/", $html); |
538 | $dir = "$htmldir/"; |
539 | for ($i = 0; $i < $#dirs; $i++) { |
540 | if (! -d "$dir$dirs[$i]") { |
541 | mkdir("$dir$dirs[$i]", 0755) || |
542 | die "$0: error creating directory $dir$dirs[$i]: $!\n"; |
543 | } |
544 | $dir .= "$dirs[$i]/"; |
545 | } |
546 | |
547 | # invoke pod2html |
548 | print "$podroot/$pod => $htmldir/$html\n" if $verbose; |
549 | #system("./pod2html", |
550 | Pod::Html'pod2html( |
551 | #Pod::Html'pod2html($pod2html, |
552 | "--htmlroot=$htmlroot", |
553 | "--podpath=".join(":", @podpath), |
554 | "--podroot=$podroot", "--netscape", |
555 | ($doindex ? "--index" : "--noindex"), |
556 | "--" . ($recurse ? "" : "no") . "recurse", |
557 | ($#libpods >= 0) ? "--libpods=" . join(":", @libpods) : "", |
558 | "--infile=$podroot/$pod", "--outfile=$htmldir/$html"); |
559 | die "$0: error running $pod2html: $!\n" if $?; |
560 | } |
561 | |
562 | sub htmlize { htmlify(0, @_) } |