pod/pod2latex.PL

   1 #!/usr/local/bin/perl
   2
   3 use Config;
   4 use File::Basename qw(&basename &dirname);
   5 use Cwd;
   6
   7 # List explicitly here the variables you want Configure to
   8 # generate.  Metaconfig only looks for shell variables, so you
   9 # have to mention them as if they were shell variables, not
  10 # %Config entries.  Thus you write
  11 #  $startperl
  12 # to ensure Configure will look for $Config{startperl}.
  13
  14 # This forces PL files to create target in same directory as PL file.
  15 # This is so that make depend always knows where to find PL derivatives.
  16 $origdir = cwd;
  17 chdir dirname($0);
  18 $file = basename($0, '.PL');
  19 $file .= '.com' if $^O eq 'VMS';
  20
  21 open OUT,">$file" or die "Can't create $file: $!";
  22
  23 print "Extracting $file (with variable substitutions)\n";
  24
  25 # In this section, perl variables will be expanded during extraction.
  26 # You can use $Config{...} to use Configure variables.
  27
  28 print OUT <<"!GROK!THIS!";
  29 $Config{startperl}
  30     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
  31         if \$running_under_some_shell;
  32 !GROK!THIS!
  33
  34 # In the following, perl variables are not expanded during extraction.
  35
  36 print OUT <<'!NO!SUBS!';
  37 #
  38 # pod2latex, version 1.1
  39 # by Taro Kawagish (kawagish@imslab.co.jp),  Jan 11, 1995.
  40 #
  41 # pod2latex filters Perl pod documents to LaTeX documents.
  42 #
  43 # What pod2latex does:
  44 # 1. Pod file 'perl_doc_entry.pod' is filtered to 'perl_doc_entry.tex'.
  45 # 2. Indented paragraphs are translated into
  46 #    '\begin{verbatim} ... \end{verbatim}'.
  47 # 3. '=head1 heading' command is translated into '\section{heading}'
  48 # 4. '=head2 heading' command is translated into '\subsection*{heading}'
  49 # 5. '=over N' command is translated into
  50 #        '\begin{itemize}'      if following =item starts with *,
  51 #        '\begin{enumerate}'    if following =item starts with 1.,
  52 #        '\begin{description}'  if else.
  53 #      (indentation level N is ignored.)
  54 # 6. '=item * heading' command is translated into '\item heading',
  55 #    '=item 1. heading' command is translated into '\item heading',
  56 #    '=item heading' command(other) is translated into '\item[heading]'.
  57 # 7. '=back' command is translated into
  58 #        '\end{itemize}'        if started with '\begin{itemize}',
  59 #        '\end{enumerate}'      if started with '\begin{enumerate}',
  60 #        '\end{description}'    if started with '\begin{description}'.
  61 # 8. other paragraphs are translated into strings with TeX special characters
  62 #    escaped.
  63 # 9. In heading text, and other paragraphs, the following translation of pod
  64 #    quotes are done, and then TeX special characters are escaped after that.
  65 #      I<text> to {\em text\/},
  66 #      B<text> to {\bf text},
  67 #      S<text> to text1,
  68 #        where text1 is a string with blank characters replaced with ~,
  69 #      C<text> to {\tt text2},
  70 #        where text2 is a string with TeX special characters escaped to
  71 #        obtain a literal printout,
  72 #      E<text> (HTML escape) to TeX escaped string,
  73 #      L<text> to referencing string as is done by pod2man,
  74 #      F<file> to {\em file\/},
  75 #      Z<> to a null string,
  76 # 10. those headings are indexed:
  77 #       '=head1 heading'   =>  \section{heading}\index{heading}
  78 #       '=head2 heading'   =>  \subsection*{heading}\index{heading}
  79 #                 only when heading does not match frequent patterns such as
  80 #                 DESCRIPTION, DIAGNOSTICS,...
  81 #       '=item heading'   =>  \item{heading}\index{heading}
  82 #
  83 # Usage:
  84 #     pod2latex perl_doc_entry.pod
  85 # this will write to a file 'perl_doc_entry.tex'.
  86 #
  87 # To LaTeX:
  88 # The following commands need to be defined in the preamble of the LaTeX
  89 # document:
  90 # \def\C++{{\rm C\kern-.05em\raise.3ex\hbox{\footnotesize ++}}}
  91 # \def\underscore{\leavevmode\kern.04em\vbox{\hrule width 0.4em height 0.3pt}}
  92 # and \parindent should be set zero:
  93 # \setlength{\parindent}{0pt}
  94 #
  95 # Note:
  96 # This script was written modifing pod2man.
  97 #
  98 # Bug:
  99 # If HTML escapes E<text> other than E<amp>,E<lt>,E<gt>,E<quot> are used
 100 # in C<>, translation will produce wrong character strings.
 101 # Translation of HTML escapes of various European accents might be wrong.
 102
 103
 104 # TeX special characters.
 105 ##$tt_ables = "!@*()-=+|;:'\"`,./?<>";
 106 $backslash_escapables = "#\$%&{}_";
 107 $backslash_escapables2 = "#\$%&{}";     # except _
 108 ##$nonverbables = "^\\~";
 109 ##$bracketesc = "[]";
 110 ##@tex_verb_fences = unpack("aaaaaaaaa","|#@!*+?:;");
 111
 112 @head1_freq_patterns            # =head1 patterns which need not be index'ed
 113     = ("AUTHOR","Author","BUGS","DATE","DESCRIPTION","DIAGNOSTICS",
 114        "ENVIRONMENT","EXAMPLES","FILES","INTRODUCTION","NAME","NOTE",
 115        "SEE ALSO","SYNOPSIS","WARNING");
 116
 117 $indent = 0;
 118
 119 # parse the pods, produce LaTeX.
 120
 121 use Pod::Plainer;
 122 open(POD,"-|") or Pod::Plainer -> new() -> parse_from_file($ARGV[0]), exit;
 123
 124 ($pod=$ARGV[0]) =~ s/\.pod$//;
 125 open(LATEX,">$pod.tex");
 126 &do_hdr();
 127
 128 $cutting = 1;
 129 $begun = "";
 130 $/ = "";                        # record separator is blank lines
 131 while (<POD>) {
 132     if ($cutting) {
 133         next unless /^=/;
 134         $cutting = 0;
 135     }
 136     if ($begun) {
 137        if (/^=end\s+$begun/) {
 138            $begun = "";
 139        }
 140        elsif ($begun =~ /^(tex|latex)$/) {
 141            print LATEX $_;
 142        }
 143        next;
 144     }
 145     chop;
 146     length || (print LATEX  "\n") && next;
 147
 148     # translate indented lines as a verabatim paragraph
 149     if (/^\s/) {
 150         @lines = split(/\n/);
 151         print LATEX  "\\begin{verbatim}\n";
 152         for (@lines) {
 153             1 while s
 154                 {^( [^\t]* ) \t ( \t* ) }
 155                 { $1 . ' ' x (8 - (length($1)%8) + 8*(length($2))) }ex;
 156             print LATEX  $_,"\n";
 157         }
 158         print LATEX  "\\end{verbatim}\n";
 159         next;
 160     }
 161
 162     if (/^=for\s+(\S+)\s*/s) {
 163         if ($1 eq "tex" or $1 eq "latex") {
 164             print LATEX $',"\n";
 165         } else {
 166             # ignore unknown for
 167         }
 168         next;
 169     }
 170     elsif (/^=begin\s+(\S+)\s*/s) {
 171         $begun = $1;
 172         if ($1 eq "tex" or $1 eq "latex") {
 173             print LATEX $'."\n";
 174         }
 175         next;
 176     }
 177
 178     # preserve '=item' line with pod quotes as they are.
 179     if (/^=item/) {
 180         ($bareitem = $_) =~ s/^=item\s*//;
 181     }
 182
 183     # check for things that'll hosed our noremap scheme; affects $_
 184     &init_noremap();
 185
 186     # expand strings "func()" as pod quotes.
 187     if (!/^=item/) {
 188         # first hide pod escapes.
 189         # escaped strings are mapped into the ones with the MSB's on.
 190         s/([A-Z]<[^<>]*>)/noremap($1)/ge;
 191
 192         # func() is a reference to a perl function
 193         s{\b([:\w]+\(\))}{I<$1>}g;
 194         # func(n) is a reference to a man page
 195         s{(\w+)(\([^\s,\051]+\))}{I<$1>$2}g;
 196         # convert simple variable references
 197 #       s/([\$\@%][\w:]+)/C<$1>/g;
 198 #       s/\$[\w:]+\[[0-9]+\]/C<$&>/g;
 199
 200         if (m{ ([\-\w]+\([^\051]*?[\@\$,][^\051]*?\))
 201                }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
 202         {
 203             warn "``$1'' should be a [LCI]<$1> ref";
 204         }
 205         while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
 206             warn "``$1'' should be [CB]<$1> ref";
 207         }
 208
 209         # put back pod quotes so we get the inside of <> processed;
 210         $_ = &clear_noremap($_);
 211     }
 212
 213
 214     # process TeX special characters
 215
 216     # First hide HTML quotes E<> since they can be included in C<>.
 217     s/(E<[^<>]+>)/noremap($1)/ge;
 218
 219     # Then hide C<> type literal quotes.
 220     # String inside of C<> will later be expanded into {\tt ..} strings
 221     # with TeX special characters escaped as needed.
 222     s/(C<[^<>]*>)/&noremap($1)/ge;
 223
 224     # Next escape TeX special characters including other pod quotes B< >,...
 225     #
 226     # NOTE: s/re/&func($str)/e evaluates $str just once in perl5.
 227     # (in perl4 evaluation takes place twice before getting passed to func().)
 228
 229     # - hyphen => ---
 230     s/(\S+)(\s+)-+(\s+)(\S+)/"$1".&noremap(" --- ")."$4"/ge;
 231     # '-', '--', "-"  =>  '{\tt -}', '{\tt --}', "{\tt -}"
 232 ##    s/("|')(\s*)(-+)(\s*)\1/&noremap("$1$2\{\\tt $3\}$4$1")/ge;
 233 ## changed Wed Jan 25 15:26:39 JST 1995
 234     # '-', '--', "-"  =>  '$-$', '$--$', "$-$"
 235     s/(\s+)(['"])(-+)([^'"\-]*)\2(\s+|[,.])/"$1$2".&noremap("\$$3\$")."$4$2$5"/ge;
 236     s/(\s+)(['"])([^'"\-]*)(-+)(\s*)\2(\s+|[,.])/"$1$2$3".&noremap("\$$4\$")."$5$2$6"/ge;
 237     # (--|-)  =>  ($--$|$-$)
 238     s/(\s+)\((-+)([=@%\$\+\\\|\w]*)(-*)([=@%\$\+\\\|\w]*)\)(\s+|[,.])/"$1\(".&noremap("\$$2\$")."$3".&noremap("\$$4\$")."$5\)$6"/ge;
 239     # numeral -  =>  $-$
 240     s/(\(|[0-9]+|\s+)-(\s*\(?\s*[0-9]+)/&noremap("$1\$-\$$2")/ge;
 241     # -- in quotes  =>  two separate -
 242     s/B<([^<>]*)--([^<>]*)>/&noremap("B<$1\{\\tt --\}$2>")/ge;
 243
 244     # backslash escapable characters except _.
 245     s/([$backslash_escapables2])/&noremap("\\$1")/ge;
 246     s/_/&noremap("\\underscore{}")/ge;          # a litle thicker than \_.
 247     # quote TeX special characters |, ^, ~, \.
 248     s/\|/&noremap("\$|\$")/ge;
 249     s/\^/&noremap("\$\\hat{\\hspace{0.4em}}\$")/ge;
 250     s/\~/&noremap("\$\\tilde{\\hspace{0.4em}}\$")/ge;
 251     s/\\/&noremap("\$\\backslash{}\$")/ge;
 252     # quote [ and ] to be used in \item[]
 253     s/([\[\]])/&noremap("{\\tt $1}")/ge;
 254     # characters need to be treated differently in TeX
 255     # keep * if an item heading
 256     s/^(=item[ \t]+)[*]((.|\n)*)/"$1" . &noremap("*") . "$2"/ge;
 257     s/[*]/&noremap("\$\\ast\$")/ge;     # other *
 258
 259     # hide other pod quotes.
 260     s/([ABD-Z]<[^<>]*>)/&noremap($1)/ge;
 261
 262     # escape < and > as math strings,
 263     # now that we are done with hiding pod <> quotes.
 264     s/</&noremap("\$<\$")/ge;
 265     s/>/&noremap("\$>\$")/ge;
 266
 267     # put it back so we get the <> processed again;
 268     $_ = &clear_noremap($_);
 269
 270
 271     # Expand pod quotes recursively:
 272     # (1) type face directives [BIFS]<[^<>]*> to appropriate TeX commands,
 273     # (2) L<[^<>]*> to reference strings,
 274     # (3) C<[^<>]*> to TeX literal quotes,
 275     # (4) HTML quotes E<> inside of C<> quotes.
 276
 277     # Hide E<> again since they can be included in C<>.
 278     s/(E<[^<>]+>)/noremap($1)/ge;
 279
 280     $maxnest = 10;
 281     while ($maxnest-- && /[A-Z]</) {
 282
 283         # bold and italic quotes
 284         s/B<([^<>]*)>/"{\\bf $1}"/eg;
 285         s#I<([^<>]*)>#"{\\em $1\\/}"#eg;
 286
 287         # files and filelike refs in italics
 288         s#F<([^<>]*)>#"{\\em $1\\/}"#eg;
 289
 290         # no break quote -- usually we want C<> for this
 291         s/S<([^<>]*)>/&nobreak($1)/eg;
 292
 293         # LREF: a manpage(3f)
 294         s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the {\\em $1\\/}$2 manpage:g;
 295
 296         # LREF: an =item on another manpage
 297         s{
 298             L<([^/]+)/([:\w]+(\(\))?)>
 299         } {the C<$2> entry in the I<$1> manpage}gx;
 300
 301         # LREF: an =item on this manpage
 302         s{
 303            ((?:L</([:\w]+(\(\))?)>
 304             (,?\s+(and\s+)?)?)+)
 305         } { &internal_lrefs($1) }gex;
 306
 307         # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
 308         # the "func" can disambiguate
 309         s{
 310             L<(?:([a-zA-Z]\S+?) /)?"?(.*?)"?>
 311         }{
 312             do {
 313                 $1      # if no $1, assume it means on this page.
 314                     ?  "the section on I<$2> in the I<$1> manpage"
 315                     :  "the section on I<$2>"
 316             }
 317         }gex;
 318
 319         s/X<([^<>]*)>/\\index{$1}/g;
 320
 321         s/Z<>/\\&/g;            # the "don't format me" thing
 322
 323         # comes last because not subject to reprocessing
 324         s{
 325             C<([^<>]*)>
 326         }{
 327             do {
 328                 ($str = $1) =~ tr/\200-\377/\000-\177/; #normalize hidden stuff
 329                 # expand HTML escapes if any;
 330                 # WARNING: if HTML escapes other than E<amp>,E<lt>,E<gt>,
 331                 # E<quot> are in C<>, they will not be printed correctly.
 332                 $str = &expand_HTML_escapes($str);
 333                 $strverb = &alltt($str);    # Tex verbatim escape of a string.
 334                 &noremap("$strverb");
 335             }
 336         }gex;
 337
 338 #       if ( /C<([^<>]*)/ ) {
 339 #           $str = $1;
 340 #           if ($str !~ /\|/) {         # if includes |
 341 #               s/C<([^<>]*)>/&noremap("\\verb|$str|")/eg;
 342 #           } else {
 343 #               print STDERR "found \| in C<.*> at paragraph $.\n";
 344 #               # find a character not contained in $str to use it as a
 345 #               # separator of the \verb
 346 #               ($chars = $str) =~ s/(\W)/\\$1/g;
 347 #               ## ($chars = $str) =~ s/([\$<>,\|"'\-^{}()*+?\\])/\\$1/g;
 348 #               @fence = grep(!/[ $chars]/,@tex_verb_fences);
 349 #               s/C<([^<>]*)>/&noremap("\\verb$fence[0]$str$fence[0]")/eg;
 350 #           }
 351 #       }
 352     }
 353
 354
 355     # process each pod command
 356     if (s/^=//) {                               # if a command
 357         s/\n/ /g;
 358         ($cmd, $rest) = split(' ', $_, 2);
 359         $rest =~ s/^\s*//;
 360         $rest =~ s/\s*$//;
 361
 362         if (defined $rest) {
 363             &escapes;
 364         }
 365
 366         $rest = &clear_noremap($rest);
 367         $rest = &expand_HTML_escapes($rest);
 368
 369         if ($cmd eq 'cut') {
 370             $cutting = 1;
 371             $lastcmd = 'cut';
 372         }
 373         elsif ($cmd eq 'head1') {       # heading type 1
 374             $rest =~ s/^\s*//; $rest =~ s/\s*$//;
 375             print LATEX  "\n\\subsection*{$rest}";
 376             # put index entry
 377             ($index = $rest) =~ s/^(An?\s+|The\s+)//i;  # remove 'A' and 'The'
 378             # index only those heads not matching the frequent patterns.
 379             foreach $pat (@head1_freq_patterns) {
 380                 if ($index =~ /^$pat/) {
 381                     goto freqpatt;
 382                 }
 383             }
 384             print LATEX  "%\n\\index{$index}\n" if ($index);
 385           freqpatt:
 386             $lastcmd = 'head1';
 387         }
 388         elsif ($cmd eq 'head2') {       # heading type 2
 389             $rest =~ s/^\s*//; $rest =~ s/\s*$//;
 390             print LATEX  "\n\\subsubsection*{$rest}";
 391             # put index entry
 392             ($index = $rest) =~ s/^(An?\s+|The\s+)//i;  # remove 'A' and 'The'
 393             $index =~ s/^Example\s*[1-9][0-9]*\s*:\s*//; # remove 'Example :'
 394             print LATEX  "%\n\\index{$index}\n"  if ($index);
 395             $lastcmd = 'head2';
 396         }
 397         elsif ($cmd eq 'over') {        # 1 level within a listing environment
 398             push(@indent,$indent);
 399             $indent = $rest + 0;
 400             $lastcmd = 'over';
 401         }
 402         elsif ($cmd eq 'back') {        # 1 level out of a listing environment
 403             $indent = pop(@indent);
 404             warn "Unmatched =back\n" unless defined $indent;
 405             $listingcmd = pop(@listingcmd);
 406             print LATEX  "\n\\end{$listingcmd}\n"  if ($listingcmd);
 407             $lastcmd = 'back';
 408         }
 409         elsif ($cmd eq 'item') {        # an item paragraph starts
 410             if ($lastcmd eq 'over') {   # if we have just entered listing env
 411                 # see what type of list environment we are in.
 412                 if ($rest =~ /^[0-9]\.?/) {     # if numeral heading
 413                     $listingcmd = 'enumerate';
 414                 } elsif ($rest =~ /^\*\s*/) {   # if * heading
 415                     $listingcmd = 'itemize';
 416                 } elsif ($rest =~ /^[^*]/) {    # if other headings
 417                     $listingcmd = 'description';
 418                 } else {
 419                     warn "unknown list type for item $rest";
 420                 }
 421                 print LATEX  "\n\\begin{$listingcmd}\n";
 422                 push(@listingcmd,$listingcmd);
 423             } elsif ( !@listingcmd ) {
 424                 warn "Illegal '=item' command without preceding 'over':";
 425                 warn "=item $bareitem";
 426             }
 427
 428             if ($listingcmd eq 'enumerate') {
 429                 $rest =~ s/^[0-9]+\.?\s*//;     # remove numeral heading
 430                 print LATEX  "\n\\item";
 431                 print LATEX  "{\\bf $rest}" if $rest;
 432             } elsif ($listingcmd eq 'itemize') {
 433                 $rest =~ s/^\*\s*//;            # remove * heading
 434                 print LATEX  "\n\\item";
 435                 print LATEX  "{\\bf $rest}" if $rest;
 436             } else {                            # description item
 437                 print LATEX  "\n\\item[$rest]";
 438             }
 439             $lastcmd = 'item';
 440             $rightafter_item = 'yes';
 441
 442             # check if the item heading is short or long.
 443             ($itemhead = $rest) =~ s/{\\bf (\S*)}/$1/g;
 444             if (length($itemhead) < 4) {
 445                 $itemshort = "yes";
 446             } else {
 447                 $itemshort = "no";
 448             }
 449             # write index entry
 450             if ($pod =~ "perldiag") {                   # skip 'perldiag.pod'
 451                 goto noindex;
 452             }
 453             # strip out the item of pod quotes and get a plain text entry
 454             $bareitem =~ s/\n/ /g;                      # remove newlines
 455             $bareitem =~ s/\s*$//;                      # remove trailing space
 456             $bareitem =~ s/[A-Z]<([^<>]*)>/$1/g;        # remove <> quotes
 457             ($index = $bareitem) =~ s/^\*\s+//;         # remove leading '*'
 458             $index =~ s/^(An?\s+|The\s+)//i;            # remove 'A' and 'The'
 459             $index =~ s/^\s*[1-9][0-9]*\s*[.]\s*$//; # remove numeral only
 460             $index =~ s/^\s*\w\s*$//;                   # remove 1 char only's
 461                 # quote ", @ and ! with " to be used in makeindex.
 462             $index =~ s/"/""/g;                         # quote "
 463             $index =~ s/@/"@/g;                         # quote @
 464             $index =~ s/!/"!/g;                         # quote !
 465             ($rest2=$rest) =~ s/^\*\s+//;       # remove *
 466             $rest2 =~ s/"/""/g;                         # quote "
 467             $rest2 =~ s/@/"@/g;                         # quote @
 468             $rest2 =~ s/!/"!/g;                         # quote !
 469             if ($pod =~ "(perlfunc|perlvar)") { # when doc is perlfunc,perlvar
 470                 # take only the 1st word of item heading
 471                 $index =~ s/^([^{}\s]*)({.*})?([^{}\s]*)\s+.*/\1\2\3/;
 472                 $rest2 =~ s/^([^{}\s]*)({.*})?([^{}\s]*)\s+.*/\1\2\3/;
 473             }
 474             if ($index =~ /[A-Za-z\$@%]/) {
 475                     #  write  \index{plain_text_entry@TeX_string_entry}
 476                 print LATEX  "%\n\\index{$index\@$rest2}%\n";
 477             }
 478           noindex:
 479             ;
 480         }
 481         elsif ($cmd eq 'pod') {
 482             ;   # recognise the pod directive, as no op (hs)
 483         }
 484         elsif ($cmd eq 'pod') {
 485             ;    # recognise the pod directive, as no op (hs)
 486         }
 487         else {
 488             warn "Unrecognized directive: $cmd\n";
 489         }
 490     }
 491     else {                                      # if not command
 492         &escapes;
 493         $_ = &clear_noremap($_);
 494         $_ = &expand_HTML_escapes($_);
 495
 496         # if the present paragraphs follows an =item declaration,
 497         # put a line break.
 498         if ($lastcmd eq 'item' &&
 499             $rightafter_item eq 'yes' && $itemshort eq "no") {
 500             print LATEX  "\\hfil\\\\";
 501             $rightafter_item = 'no';
 502         }
 503         print LATEX  "\n",$_;
 504     }
 505 }
 506
 507 print LATEX  "\n";
 508 close(POD);
 509 close(LATEX);
 510
 511
 512 #########################################################################
 513
 514 sub do_hdr {
 515     print LATEX "% LaTeX document produced by pod2latex from \"$pod.pod\".\n";
 516     print LATEX "% The followings need be defined in the preamble of this document:\n";
 517     print LATEX "%\\def\\C++{{\\rm C\\kern-.05em\\raise.3ex\\hbox{\\footnotesize ++}}}\n";
 518     print LATEX "%\\def\\underscore{\\leavevmode\\kern.04em\\vbox{\\hrule width 0.4em height 0.3pt}}\n";
 519     print LATEX "%\\setlength{\\parindent}{0pt}\n";
 520     print LATEX "\n";
 521     $podq = &escape_tex_specials("\U$pod\E");
 522     print LATEX "\\section{$podq}%\n";
 523     print LATEX "\\index{$podq}";
 524     print LATEX "\n";
 525 }
 526
 527 sub nobreak {
 528     my $string = shift;
 529     $string =~ s/ +/~/g;                # TeX no line break
 530     $string;
 531 }
 532
 533 sub noremap {
 534     local($thing_to_hide) = shift;
 535     $thing_to_hide =~ tr/\000-\177/\200-\377/;
 536     return $thing_to_hide;
 537 }
 538
 539 sub init_noremap {
 540         # escape high bit characters in input stream
 541         s/([\200-\377])/"E<".ord($1).">"/ge;
 542 }
 543
 544 sub clear_noremap {
 545     local($tmp) = shift;
 546     $tmp =~ tr/\200-\377/\000-\177/;
 547     return $tmp;
 548 }
 549
 550 sub expand_HTML_escapes {
 551     local($s) = $_[0];
 552     $s =~ s { E<((\d+)|([A-Za-z]+))> }
 553     {
 554         do {
 555                 defined($2)
 556                 ? do { chr($2) }
 557                 :
 558             exists $HTML_Escapes{$3}
 559             ? do { $HTML_Escapes{$3} }
 560             : do {
 561                 warn "Unknown escape: $& in $_";
 562                 "E<$1>";
 563             }
 564         }
 565     }egx;
 566     return $s;
 567 }
 568
 569 sub escapes {
 570     # make C++ into \C++, which is to be defined as
 571     # \def\C++{{\rm C\kern-.05em\raise.3ex\hbox{\footnotesize ++}}}
 572     s/\bC\+\+/\\C++{}/g;
 573 }
 574
 575 # Translate a string into a TeX \tt string to obtain a verbatim print out.
 576 # TeX special characters are escaped by \.
 577 # This can be used inside of LaTeX command arguments.
 578 # We don't use LaTeX \verb since it doesn't work inside of command arguments.
 579 sub alltt {
 580     local($str) = shift;
 581         # other chars than #,\,$,%,&,{,},_,\,^,~ ([ and ] included).
 582     $str =~ s/([^${backslash_escapables}\\\^\~]+)/&noremap("$&")/eg;
 583         # chars #,\,$,%,&,{,}  =>  \# , ...
 584     $str =~ s/([$backslash_escapables2])/&noremap("\\$&")/eg;
 585         # chars _,\,^,~  =>  \char`\_ , ...
 586     $str =~ s/_/&noremap("\\char`\\_")/eg;
 587     $str =~ s/\\/&noremap("\\char`\\\\")/ge;
 588     $str =~ s/\^/\\char`\\^/g;
 589     $str =~ s/\~/\\char`\\~/g;
 590
 591     $str =~ tr/\200-\377/\000-\177/;            # put back
 592     $str = "{\\tt ".$str."}";                   # make it a \tt string
 593     return $str;
 594 }
 595
 596 sub escape_tex_specials {
 597     local($str) = shift;
 598         # other chars than #,\,$,%,&,{,},  _,\,^,~ ([ and ] included).
 599     # backslash escapable characters #,\,$,%,&,{,} except _.
 600     $str =~ s/([$backslash_escapables2])/&noremap("\\$1")/ge;
 601     $str =~ s/_/&noremap("\\underscore{}")/ge;  # \_ is too thin.
 602     # quote TeX special characters |, ^, ~, \.
 603     $str =~ s/\|/&noremap("\$|\$")/ge;
 604     $str =~ s/\^/&noremap("\$\\hat{\\hspace{0.4em}}\$")/ge;
 605     $str =~ s/\~/&noremap("\$\\tilde{\\hspace{0.4em}}\$")/ge;
 606     $str =~ s/\\/&noremap("\$\\backslash{}\$")/ge;
 607     # characters need to be treated differently in TeX
 608     # *
 609     $str =~ s/[*]/&noremap("\$\\ast\$")/ge;
 610     # escape < and > as math string,
 611     $str =~ s/</&noremap("\$<\$")/ge;
 612     $str =~ s/>/&noremap("\$>\$")/ge;
 613     $str =~ tr/\200-\377/\000-\177/;            # put back
 614     return $str;
 615 }
 616
 617 sub internal_lrefs {
 618     local($_) = shift;
 619
 620     s{L</([^>]+)>}{$1}g;
 621     my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
 622     my $retstr = "the ";
 623     my $i;
 624     for ($i = 0; $i <= $#items; $i++) {
 625         $retstr .= "C<$items[$i]>";
 626         $retstr .= ", " if @items > 2 && $i != $#items;
 627         $retstr .= " and " if $i+2 == @items;
 628     }
 629     $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
 630             .  " elsewhere in this document";
 631
 632     return $retstr;
 633 }
 634
 635 # map of HTML escapes to TeX escapes.
 636 BEGIN {
 637 %HTML_Escapes = (
 638     'amp'       =>      '&',    #   ampersand
 639     'lt'        =>      '<',    #   left chevron, less-than
 640     'gt'        =>      '>',    #   right chevron, greater-than
 641     'quot'      =>      '"',    #   double quote
 642
 643     "Aacute"    =>      "\\'{A}",       #   capital A, acute accent
 644     "aacute"    =>      "\\'{a}",       #   small a, acute accent
 645     "Acirc"     =>      "\\^{A}",       #   capital A, circumflex accent
 646     "acirc"     =>      "\\^{a}",       #   small a, circumflex accent
 647     "AElig"     =>      '\\AE',         #   capital AE diphthong (ligature)
 648     "aelig"     =>      '\\ae',         #   small ae diphthong (ligature)
 649     "Agrave"    =>      "\\`{A}",       #   capital A, grave accent
 650     "agrave"    =>      "\\`{a}",       #   small a, grave accent
 651     "Aring"     =>      '\\u{A}',       #   capital A, ring
 652     "aring"     =>      '\\u{a}',       #   small a, ring
 653     "Atilde"    =>      '\\~{A}',       #   capital A, tilde
 654     "atilde"    =>      '\\~{a}',       #   small a, tilde
 655     "Auml"      =>      '\\"{A}',       #   capital A, dieresis or umlaut mark
 656     "auml"      =>      '\\"{a}',       #   small a, dieresis or umlaut mark
 657     "Ccedil"    =>      '\\c{C}',       #   capital C, cedilla
 658     "ccedil"    =>      '\\c{c}',       #   small c, cedilla
 659     "Eacute"    =>      "\\'{E}",       #   capital E, acute accent
 660     "eacute"    =>      "\\'{e}",       #   small e, acute accent
 661     "Ecirc"     =>      "\\^{E}",       #   capital E, circumflex accent
 662     "ecirc"     =>      "\\^{e}",       #   small e, circumflex accent
 663     "Egrave"    =>      "\\`{E}",       #   capital E, grave accent
 664     "egrave"    =>      "\\`{e}",       #   small e, grave accent
 665     "ETH"       =>      '\\OE',         #   capital Eth, Icelandic
 666     "eth"       =>      '\\oe',         #   small eth, Icelandic
 667     "Euml"      =>      '\\"{E}',       #   capital E, dieresis or umlaut mark
 668     "euml"      =>      '\\"{e}',       #   small e, dieresis or umlaut mark
 669     "Iacute"    =>      "\\'{I}",       #   capital I, acute accent
 670     "iacute"    =>      "\\'{i}",       #   small i, acute accent
 671     "Icirc"     =>      "\\^{I}",       #   capital I, circumflex accent
 672     "icirc"     =>      "\\^{i}",       #   small i, circumflex accent
 673     "Igrave"    =>      "\\`{I}",       #   capital I, grave accent
 674     "igrave"    =>      "\\`{i}",       #   small i, grave accent
 675     "Iuml"      =>      '\\"{I}',       #   capital I, dieresis or umlaut mark
 676     "iuml"      =>      '\\"{i}',       #   small i, dieresis or umlaut mark
 677     "Ntilde"    =>      '\\~{N}',       #   capital N, tilde
 678     "ntilde"    =>      '\\~{n}',       #   small n, tilde
 679     "Oacute"    =>      "\\'{O}",       #   capital O, acute accent
 680     "oacute"    =>      "\\'{o}",       #   small o, acute accent
 681     "Ocirc"     =>      "\\^{O}",       #   capital O, circumflex accent
 682     "ocirc"     =>      "\\^{o}",       #   small o, circumflex accent
 683     "Ograve"    =>      "\\`{O}",       #   capital O, grave accent
 684     "ograve"    =>      "\\`{o}",       #   small o, grave accent
 685     "Oslash"    =>      "\\O",          #   capital O, slash
 686     "oslash"    =>      "\\o",          #   small o, slash
 687     "Otilde"    =>      "\\~{O}",       #   capital O, tilde
 688     "otilde"    =>      "\\~{o}",       #   small o, tilde
 689     "Ouml"      =>      '\\"{O}',       #   capital O, dieresis or umlaut mark
 690     "ouml"      =>      '\\"{o}',       #   small o, dieresis or umlaut mark
 691     "szlig"     =>      '\\ss{}',       #   small sharp s, German (sz ligature)
 692     "THORN"     =>      '\\L',          #   capital THORN, Icelandic
 693     "thorn"     =>      '\\l',,         #   small thorn, Icelandic
 694     "Uacute"    =>      "\\'{U}",       #   capital U, acute accent
 695     "uacute"    =>      "\\'{u}",       #   small u, acute accent
 696     "Ucirc"     =>      "\\^{U}",       #   capital U, circumflex accent
 697     "ucirc"     =>      "\\^{u}",       #   small u, circumflex accent
 698     "Ugrave"    =>      "\\`{U}",       #   capital U, grave accent
 699     "ugrave"    =>      "\\`{u}",       #   small u, grave accent
 700     "Uuml"      =>      '\\"{U}',       #   capital U, dieresis or umlaut mark
 701     "uuml"      =>      '\\"{u}',       #   small u, dieresis or umlaut mark
 702     "Yacute"    =>      "\\'{Y}",       #   capital Y, acute accent
 703     "yacute"    =>      "\\'{y}",       #   small y, acute accent
 704     "yuml"      =>      '\\"{y}',       #   small y, dieresis or umlaut mark
 705 );
 706 }
 707 !NO!SUBS!
 708
 709 close OUT or die "Can't close $file: $!";
 710 chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
 711 exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
 712 chdir $origdir;