package Pod::LaTeX;
-# Copyright (C) 2000 by Tim Jenness <t.jenness@jach.hawaii.edu>
-# All Rights Reserved.
-
=head1 NAME
Pod::LaTeX - Convert Pod data to formatted Latex
use vars qw/ $VERSION %HTML_Escapes @LatexSections /;
-$VERSION = '0.53';
+$VERSION = '0.55';
# Definitions of =headN -> latex mapping
@LatexSections = (qw/
subparagraph
/);
-# Standard escape sequences converted to Latex
-# Up to "yuml" these are taken from the original pod2latex
-# command written by Taro Kawagish (kawagish@imslab.co.jp)
+# Standard escape sequences converted to Latex.
+# The Unicode name of each character is given in the comments.
+# Complete LaTeX set added by Peter Acklam.
%HTML_Escapes = (
- 'amp' => '&', # ampersand
- 'lt' => '$<$', # ' left chevron, less-than
- 'gt' => '$>$', # ' right chevron, greater-than
- 'quot' => '"', # double quote
- 'sol' => '/',
- 'verbar' => '$|$',
-
- "Aacute" => "\\'{A}", # capital A, acute accent
- "aacute" => "\\'{a}", # small a, acute accent
- "Acirc" => "\\^{A}", # capital A, circumflex accent
- "acirc" => "\\^{a}", # small a, circumflex accent
- "AElig" => '\\AE', # capital AE diphthong (ligature)
- "aelig" => '\\ae', # small ae diphthong (ligature)
- "Agrave" => "\\`{A}", # capital A, grave accent
- "agrave" => "\\`{a}", # small a, grave accent
- "Aring" => '\\u{A}', # capital A, ring
- "aring" => '\\u{a}', # small a, ring
- "Atilde" => '\\~{A}', # capital A, tilde
- "atilde" => '\\~{a}', # small a, tilde
- "Auml" => '\\"{A}', # capital A, dieresis or umlaut mark
- "auml" => '\\"{a}', # small a, dieresis or umlaut mark
- "Ccedil" => '\\c{C}', # capital C, cedilla
- "ccedil" => '\\c{c}', # small c, cedilla
- "Eacute" => "\\'{E}", # capital E, acute accent
- "eacute" => "\\'{e}", # small e, acute accent
- "Ecirc" => "\\^{E}", # capital E, circumflex accent
- "ecirc" => "\\^{e}", # small e, circumflex accent
- "Egrave" => "\\`{E}", # capital E, grave accent
- "egrave" => "\\`{e}", # small e, grave accent
- "ETH" => '\\OE', # capital Eth, Icelandic
- "eth" => '\\oe', # small eth, Icelandic
- "Euml" => '\\"{E}', # capital E, dieresis or umlaut mark
- "euml" => '\\"{e}', # small e, dieresis or umlaut mark
- "Iacute" => "\\'{I}", # capital I, acute accent
- "iacute" => "\\'{i}", # small i, acute accent
- "Icirc" => "\\^{I}", # capital I, circumflex accent
- "icirc" => "\\^{i}", # small i, circumflex accent
- "Igrave" => "\\`{I}", # capital I, grave accent
- "igrave" => "\\`{i}", # small i, grave accent
- "Iuml" => '\\"{I}', # capital I, dieresis or umlaut mark
- "iuml" => '\\"{i}', # small i, dieresis or umlaut mark
- "Ntilde" => '\\~{N}', # capital N, tilde
- "ntilde" => '\\~{n}', # small n, tilde
- "Oacute" => "\\'{O}", # capital O, acute accent
- "oacute" => "\\'{o}", # small o, acute accent
- "Ocirc" => "\\^{O}", # capital O, circumflex accent
- "ocirc" => "\\^{o}", # small o, circumflex accent
- "Ograve" => "\\`{O}", # capital O, grave accent
- "ograve" => "\\`{o}", # small o, grave accent
- "Oslash" => "\\O", # capital O, slash
- "oslash" => "\\o", # small o, slash
- "Otilde" => "\\~{O}", # capital O, tilde
- "otilde" => "\\~{o}", # small o, tilde
- "Ouml" => '\\"{O}', # capital O, dieresis or umlaut mark
- "ouml" => '\\"{o}', # small o, dieresis or umlaut mark
- "szlig" => '\\ss{}', # small sharp s, German (sz ligature)
- "THORN" => '\\L', # capital THORN, Icelandic
- "thorn" => '\\l',, # small thorn, Icelandic
- "Uacute" => "\\'{U}", # capital U, acute accent
- "uacute" => "\\'{u}", # small u, acute accent
- "Ucirc" => "\\^{U}", # capital U, circumflex accent
- "ucirc" => "\\^{u}", # small u, circumflex accent
- "Ugrave" => "\\`{U}", # capital U, grave accent
- "ugrave" => "\\`{u}", # small u, grave accent
- "Uuml" => '\\"{U}', # capital U, dieresis or umlaut mark
- "uuml" => '\\"{u}', # small u, dieresis or umlaut mark
- "Yacute" => "\\'{Y}", # capital Y, acute accent
- "yacute" => "\\'{y}", # small y, acute accent
- "yuml" => '\\"{y}', # small y, dieresis or umlaut mark
-
- # Added by TimJ
-
- "iexcl" => '!`', # inverted exclamation mark
-# "cent" => ' ', # cent sign
- "pound" => '\pounds', # (UK) pound sign
-# "curren" => ' ', # currency sign
-# "yen" => ' ', # yen sign
-# "brvbar" => ' ', # broken vertical bar
- "sect" => '\S', # section sign
- "uml" => '\"{}', # diaresis
- "copy" => '\copyright', # Copyright symbol
-# "ordf" => ' ', # feminine ordinal indicator
- "laquo" => '$\ll$', # ' # left pointing double angle quotation mark
- "not" => '$\neg$', # ' # not sign
- "shy" => '-', # soft hyphen
-# "reg" => ' ', # registered trademark
- "macr" => '$^-$', # ' # macron, overline
- "deg" => '$^\circ$', # ' # degree sign
- "plusmn" => '$\pm$', # ' # plus-minus sign
- "sup2" => '$^2$', # ' # superscript 2
- "sup3" => '$^3$', # ' # superscript 3
- "acute" => "\\'{}", # acute accent
- "micro" => '$\mu$', # micro sign
- "para" => '\P', # pilcrow sign = paragraph sign
- "middot" => '$\cdot$', # middle dot = Georgian comma
- "cedil" => '\c{}', # cedilla
- "sup1" => '$^1$', # ' # superscript 1
-# "ordm" => ' ', # masculine ordinal indicator
- "raquo" => '$\gg$', # ' # right pointing double angle quotation mark
- "frac14" => '$\frac{1}{4}$', # ' # vulgar fraction one quarter
- "frac12" => '$\frac{1}{2}$', # ' # vulgar fraction one half
- "frac34" => '$\frac{3}{4}$', # ' # vulgar fraction three quarters
- "iquest" => "?'", # inverted question mark
- "times" => '$\times$', # ' # multiplication sign
- "divide" => '$\div$', # division sign
-
- # Greek letters using HTML codes
- "alpha" => '$\alpha$', # '
- "beta" => '$\beta$', # '
- "gamma" => '$\gamma$', # '
- "delta" => '$\delta$', # '
- "epsilon"=> '$\epsilon$', # '
- "zeta" => '$\zeta$', # '
- "eta" => '$\eta$', # '
- "theta" => '$\theta$', # '
- "iota" => '$\iota$', # '
- "kappa" => '$\kappa$', # '
- "lambda" => '$\lambda$', # '
- "mu" => '$\mu$', # '
- "nu" => '$\nu$', # '
- "xi" => '$\xi$', # '
- "omicron"=> '$o$', # '
- "pi" => '$\pi$', # '
- "rho" => '$\rho$', # '
- "sigma" => '$\sigma$', # '
- "tau" => '$\tau$', # '
- "upsilon"=> '$\upsilon$', # '
- "phi" => '$\phi$', # '
- "chi" => '$\chi$', # '
- "psi" => '$\psi$', # '
- "omega" => '$\omega$', # '
-
- "Alpha" => '$A$', # '
- "Beta" => '$B$', # '
- "Gamma" => '$\Gamma$', # '
- "Delta" => '$\Delta$', # '
- "Epsilon"=> '$E$', # '
- "Zeta" => '$Z$', # '
- "Eta" => '$H$', # '
- "Theta" => '$\Theta$', # '
- "Iota" => '$I$', # '
- "Kappa" => '$K$', # '
- "Lambda" => '$\Lambda$', # '
- "Mu" => '$M$', # '
- "Nu" => '$N$', # '
- "Xi" => '$\Xi$', # '
- "Omicron"=> '$O$', # '
- "Pi" => '$\Pi$', # '
- "Rho" => '$R$', # '
- "Sigma" => '$\Sigma$', # '
- "Tau" => '$T$', # '
- "Upsilon"=> '$\Upsilon$', # '
- "Phi" => '$\Phi$', # '
- "Chi" => '$X$', # '
- "Psi" => '$\Psi$', # '
- "Omega" => '$\Omega$', # '
-
-
+ 'sol' => '\textfractionsolidus{}', # xxx - or should it be just '/'
+ 'verbar' => '|',
+
+ # The stuff below is based on the information available at
+ # http://www.w3.org/TR/html401/sgml/entities.html
+
+ # All characters in the range 0xA0-0xFF of the ISO 8859-1 character set.
+ # Several of these characters require the `textcomp' LaTeX package.
+ 'nbsp' => q|~|, # 0xA0 - no-break space = non-breaking space
+ 'iexcl' => q|\textexclamdown{}|, # 0xA1 - inverted exclamation mark
+ 'cent' => q|\textcent{}|, # 0xA2 - cent sign
+ 'pound' => q|\textsterling{}|, # 0xA3 - pound sign
+ 'curren' => q|\textcurrency{}|, # 0xA4 - currency sign
+ 'yen' => q|\textyen{}|, # 0xA5 - yen sign = yuan sign
+ 'brvbar' => q|\textbrokenbar{}|, # 0xA6 - broken bar = broken vertical bar
+ 'sect' => q|\textsection{}|, # 0xA7 - section sign
+ 'uml' => q|\textasciidieresis{}|, # 0xA8 - diaeresis = spacing diaeresis
+ 'copy' => q|\textcopyright{}|, # 0xA9 - copyright sign
+ 'ordf' => q|\textordfeminine{}|, # 0xAA - feminine ordinal indicator
+ 'laquo' => q|\guillemotleft{}|, # 0xAB - left-pointing double angle quotation mark = left pointing guillemet
+ 'not' => q|\textlnot{}|, # 0xAC - not sign
+ 'shy' => q|\-|, # 0xAD - soft hyphen = discretionary hyphen
+ 'reg' => q|\textregistered{}|, # 0xAE - registered sign = registered trade mark sign
+ 'macr' => q|\textasciimacron{}|, # 0xAF - macron = spacing macron = overline = APL overbar
+ 'deg' => q|\textdegree{}|, # 0xB0 - degree sign
+ 'plusmn' => q|\textpm{}|, # 0xB1 - plus-minus sign = plus-or-minus sign
+ 'sup2' => q|\texttwosuperior{}|, # 0xB2 - superscript two = superscript digit two = squared
+ 'sup3' => q|\textthreesuperior{}|, # 0xB3 - superscript three = superscript digit three = cubed
+ 'acute' => q|\textasciiacute{}|, # 0xB4 - acute accent = spacing acute
+ 'micro' => q|\textmu{}|, # 0xB5 - micro sign
+ 'para' => q|\textparagraph{}|, # 0xB6 - pilcrow sign = paragraph sign
+ 'middot' => q|\textperiodcentered{}|, # 0xB7 - middle dot = Georgian comma = Greek middle dot
+ 'cedil' => q|\c{}|, # 0xB8 - cedilla = spacing cedilla
+ 'sup1' => q|\textonesuperior{}|, # 0xB9 - superscript one = superscript digit one
+ 'ordm' => q|\textordmasculine{}|, # 0xBA - masculine ordinal indicator
+ 'raquo' => q|\guillemotright{}|, # 0xBB - right-pointing double angle quotation mark = right pointing guillemet
+ 'frac14' => q|\textonequarter{}|, # 0xBC - vulgar fraction one quarter = fraction one quarter
+ 'frac12' => q|\textonehalf{}|, # 0xBD - vulgar fraction one half = fraction one half
+ 'frac34' => q|\textthreequarters{}|, # 0xBE - vulgar fraction three quarters = fraction three quarters
+ 'iquest' => q|\textquestiondown{}|, # 0xBF - inverted question mark = turned question mark
+ 'Agrave' => q|\`A|, # 0xC0 - latin capital letter A with grave = latin capital letter A grave
+ 'Aacute' => q|\'A|, # 0xC1 - latin capital letter A with acute
+ 'Acirc' => q|\^A|, # 0xC2 - latin capital letter A with circumflex
+ 'Atilde' => q|\~A|, # 0xC3 - latin capital letter A with tilde
+ 'Auml' => q|\"A|, # 0xC4 - latin capital letter A with diaeresis
+ 'Aring' => q|\AA{}|, # 0xC5 - latin capital letter A with ring above = latin capital letter A ring
+ 'AElig' => q|\AE{}|, # 0xC6 - latin capital letter AE = latin capital ligature AE
+ 'Ccedil' => q|\c{C}|, # 0xC7 - latin capital letter C with cedilla
+ 'Egrave' => q|\`E|, # 0xC8 - latin capital letter E with grave
+ 'Eacute' => q|\'E|, # 0xC9 - latin capital letter E with acute
+ 'Ecirc' => q|\^E|, # 0xCA - latin capital letter E with circumflex
+ 'Euml' => q|\"E|, # 0xCB - latin capital letter E with diaeresis
+ 'Igrave' => q|\`I|, # 0xCC - latin capital letter I with grave
+ 'Iacute' => q|\'I|, # 0xCD - latin capital letter I with acute
+ 'Icirc' => q|\^I|, # 0xCE - latin capital letter I with circumflex
+ 'Iuml' => q|\"I|, # 0xCF - latin capital letter I with diaeresis
+ 'ETH' => q|\DH{}|, # 0xD0 - latin capital letter ETH
+ 'Ntilde' => q|\~N|, # 0xD1 - latin capital letter N with tilde
+ 'Ograve' => q|\`O|, # 0xD2 - latin capital letter O with grave
+ 'Oacute' => q|\'O|, # 0xD3 - latin capital letter O with acute
+ 'Ocirc' => q|\^O|, # 0xD4 - latin capital letter O with circumflex
+ 'Otilde' => q|\~O|, # 0xD5 - latin capital letter O with tilde
+ 'Ouml' => q|\"O|, # 0xD6 - latin capital letter O with diaeresis
+ 'times' => q|\texttimes{}|, # 0xD7 - multiplication sign
+ 'Oslash' => q|\O{}|, # 0xD8 - latin capital letter O with stroke = latin capital letter O slash
+ 'Ugrave' => q|\`U|, # 0xD9 - latin capital letter U with grave
+ 'Uacute' => q|\'U|, # 0xDA - latin capital letter U with acute
+ 'Ucirc' => q|\^U|, # 0xDB - latin capital letter U with circumflex
+ 'Uuml' => q|\"U|, # 0xDC - latin capital letter U with diaeresis
+ 'Yacute' => q|\'Y|, # 0xDD - latin capital letter Y with acute
+ 'THORN' => q|\TH{}|, # 0xDE - latin capital letter THORN
+ 'szlig' => q|\ss{}|, # 0xDF - latin small letter sharp s = ess-zed
+ 'agrave' => q|\`a|, # 0xE0 - latin small letter a with grave = latin small letter a grave
+ 'aacute' => q|\'a|, # 0xE1 - latin small letter a with acute
+ 'acirc' => q|\^a|, # 0xE2 - latin small letter a with circumflex
+ 'atilde' => q|\~a|, # 0xE3 - latin small letter a with tilde
+ 'auml' => q|\"a|, # 0xE4 - latin small letter a with diaeresis
+ 'aring' => q|\aa{}|, # 0xE5 - latin small letter a with ring above = latin small letter a ring
+ 'aelig' => q|\ae{}|, # 0xE6 - latin small letter ae = latin small ligature ae
+ 'ccedil' => q|\c{c}|, # 0xE7 - latin small letter c with cedilla
+ 'egrave' => q|\`e|, # 0xE8 - latin small letter e with grave
+ 'eacute' => q|\'e|, # 0xE9 - latin small letter e with acute
+ 'ecirc' => q|\^e|, # 0xEA - latin small letter e with circumflex
+ 'euml' => q|\"e|, # 0xEB - latin small letter e with diaeresis
+ 'igrave' => q|\`i|, # 0xEC - latin small letter i with grave
+ 'iacute' => q|\'i|, # 0xED - latin small letter i with acute
+ 'icirc' => q|\^i|, # 0xEE - latin small letter i with circumflex
+ 'iuml' => q|\"i|, # 0xEF - latin small letter i with diaeresis
+ 'eth' => q|\dh{}|, # 0xF0 - latin small letter eth
+ 'ntilde' => q|\~n|, # 0xF1 - latin small letter n with tilde
+ 'ograve' => q|\`o|, # 0xF2 - latin small letter o with grave
+ 'oacute' => q|\'o|, # 0xF3 - latin small letter o with acute
+ 'ocirc' => q|\^o|, # 0xF4 - latin small letter o with circumflex
+ 'otilde' => q|\~o|, # 0xF5 - latin small letter o with tilde
+ 'ouml' => q|\"o|, # 0xF6 - latin small letter o with diaeresis
+ 'divide' => q|\textdiv{}|, # 0xF7 - division sign
+ 'oslash' => q|\o{}|, # 0xF8 - latin small letter o with stroke, = latin small letter o slash
+ 'ugrave' => q|\`u|, # 0xF9 - latin small letter u with grave
+ 'uacute' => q|\'u|, # 0xFA - latin small letter u with acute
+ 'ucirc' => q|\^u|, # 0xFB - latin small letter u with circumflex
+ 'uuml' => q|\"u|, # 0xFC - latin small letter u with diaeresis
+ 'yacute' => q|\'y|, # 0xFD - latin small letter y with acute
+ 'thorn' => q|\th{}|, # 0xFE - latin small letter thorn
+ 'yuml' => q|\"y|, # 0xFF - latin small letter y with diaeresis
+
+ # Latin Extended-B
+ 'fnof' => q|\textflorin{}|, # latin small f with hook = function = florin
+
+ # Greek
+ 'Alpha' => q|$\mathrm{A}$|, # greek capital letter alpha
+ 'Beta' => q|$\mathrm{B}$|, # greek capital letter beta
+ 'Gamma' => q|$\Gamma$|, # greek capital letter gamma
+ 'Delta' => q|$\Delta$|, # greek capital letter delta
+ 'Epsilon' => q|$\mathrm{E}$|, # greek capital letter epsilon
+ 'Zeta' => q|$\mathrm{Z}$|, # greek capital letter zeta
+ 'Eta' => q|$\mathrm{H}$|, # greek capital letter eta
+ 'Theta' => q|$\Theta$|, # greek capital letter theta
+ 'Iota' => q|$\mathrm{I}$|, # greek capital letter iota
+ 'Kappa' => q|$\mathrm{K}$|, # greek capital letter kappa
+ 'Lambda' => q|$\Lambda$|, # greek capital letter lambda
+ 'Mu' => q|$\mathrm{M}$|, # greek capital letter mu
+ 'Nu' => q|$\mathrm{N}$|, # greek capital letter nu
+ 'Xi' => q|$\Xi$|, # greek capital letter xi
+ 'Omicron' => q|$\mathrm{O}$|, # greek capital letter omicron
+ 'Pi' => q|$\Pi$|, # greek capital letter pi
+ 'Rho' => q|$\mathrm{R}$|, # greek capital letter rho
+ 'Sigma' => q|$\Sigma$|, # greek capital letter sigma
+ 'Tau' => q|$\mathrm{T}$|, # greek capital letter tau
+ 'Upsilon' => q|$\Upsilon$|, # greek capital letter upsilon
+ 'Phi' => q|$\Phi$|, # greek capital letter phi
+ 'Chi' => q|$\mathrm{X}$|, # greek capital letter chi
+ 'Psi' => q|$\Psi$|, # greek capital letter psi
+ 'Omega' => q|$\Omega$|, # greek capital letter omega
+
+ 'alpha' => q|$\alpha$|, # greek small letter alpha
+ 'beta' => q|$\beta$|, # greek small letter beta
+ 'gamma' => q|$\gamma$|, # greek small letter gamma
+ 'delta' => q|$\delta$|, # greek small letter delta
+ 'epsilon' => q|$\epsilon$|, # greek small letter epsilon
+ 'zeta' => q|$\zeta$|, # greek small letter zeta
+ 'eta' => q|$\eta$|, # greek small letter eta
+ 'theta' => q|$\theta$|, # greek small letter theta
+ 'iota' => q|$\iota$|, # greek small letter iota
+ 'kappa' => q|$\kappa$|, # greek small letter kappa
+ 'lambda' => q|$\lambda$|, # greek small letter lambda
+ 'mu' => q|$\mu$|, # greek small letter mu
+ 'nu' => q|$\nu$|, # greek small letter nu
+ 'xi' => q|$\xi$|, # greek small letter xi
+ 'omicron' => q|$o$|, # greek small letter omicron
+ 'pi' => q|$\pi$|, # greek small letter pi
+ 'rho' => q|$\rho$|, # greek small letter rho
+# 'sigmaf' => q||, # greek small letter final sigma
+ 'sigma' => q|$\sigma$|, # greek small letter sigma
+ 'tau' => q|$\tau$|, # greek small letter tau
+ 'upsilon' => q|$\upsilon$|, # greek small letter upsilon
+ 'phi' => q|$\phi$|, # greek small letter phi
+ 'chi' => q|$\chi$|, # greek small letter chi
+ 'psi' => q|$\psi$|, # greek small letter psi
+ 'omega' => q|$\omega$|, # greek small letter omega
+# 'thetasym' => q||, # greek small letter theta symbol
+# 'upsih' => q||, # greek upsilon with hook symbol
+# 'piv' => q||, # greek pi symbol
+
+ # General Punctuation
+ 'bull' => q|\textbullet{}|, # bullet = black small circle
+ # bullet is NOT the same as bullet operator
+ 'hellip' => q|\textellipsis{}|, # horizontal ellipsis = three dot leader
+ 'prime' => q|\textquotesingle{}|, # prime = minutes = feet
+ 'Prime' => q|\textquotedbl{}|, # double prime = seconds = inches
+ 'oline' => q|\textasciimacron{}|, # overline = spacing overscore
+ 'frasl' => q|\textfractionsolidus{}|, # fraction slash
+
+ # Letterlike Symbols
+ 'weierp' => q|$\wp$|, # script capital P = power set = Weierstrass p
+ 'image' => q|$\Re$|, # blackletter capital I = imaginary part
+ 'real' => q|$\Im$|, # blackletter capital R = real part symbol
+ 'trade' => q|\texttrademark{}|, # trade mark sign
+# 'alefsym' => q||, # alef symbol = first transfinite cardinal
+ # alef symbol is NOT the same as hebrew letter alef, although the same
+ # glyph could be used to depict both characters
+
+ # Arrows
+ 'larr' => q|\textleftarrow{}|, # leftwards arrow
+ 'uarr' => q|\textuparrow{}|, # upwards arrow
+ 'rarr' => q|\textrightarrow{}|, # rightwards arrow
+ 'darr' => q|\textdownarrow{}|, # downwards arrow
+ 'harr' => q|$\leftrightarrow$|, # left right arrow
+# 'crarr' => q||, # downwards arrow with corner leftwards = carriage return
+ 'lArr' => q|$\Leftarrow$|, # leftwards double arrow
+ # ISO 10646 does not say that lArr is the same as the 'is implied by'
+ # arrow but also does not have any other character for that function. So
+ # lArr can be used for 'is implied by' as ISOtech suggests
+ 'uArr' => q|$\Uparrow$|, # upwards double arrow
+ 'rArr' => q|$\Rightarrow$|, # rightwards double arrow
+ # ISO 10646 does not say this is the 'implies' character but does not
+ # have another character with this function so ? rArr can be used for
+ # 'implies' as ISOtech suggests
+ 'dArr' => q|$\Downarrow$|, # downwards double arrow
+ 'hArr' => q|$\Leftrightarrow$|, # left right double arrow
+
+ # Mathematical Operators.
+ # Some of these require the `amssymb' package.
+ 'forall' => q|$\forall$|, # for all
+ 'part' => q|$\partial$|, # partial differential
+ 'exist' => q|$\exists$|, # there exists
+ 'empty' => q|$\emptyset$|, # empty set = null set = diameter
+ 'nabla' => q|$\nabla$|, # nabla = backward difference
+ 'isin' => q|$\in$|, # element of
+ 'notin' => q|$\notin$|, # not an element of
+ 'ni' => q|$\ni$|, # contains as member
+ 'prod' => q|$\prod$|, # n-ary product = product sign
+ # prod is NOT the same character as 'greek capital letter pi' though the
+ # same glyph might be used for both
+ 'sum' => q|$\sum$|, # n-ary sumation
+ # sum is NOT the same character as 'greek capital letter sigma' though
+ # the same glyph might be used for both
+ 'minus' => q|$-$|, # minus sign
+ 'lowast' => q|$\ast$|, # asterisk operator
+ 'radic' => q|$\surd$|, # square root = radical sign
+ 'prop' => q|$\propto$|, # proportional to
+ 'infin' => q|$\infty$|, # infinity
+ 'ang' => q|$\angle$|, # angle
+ 'and' => q|$\wedge$|, # logical and = wedge
+ 'or' => q|$\vee$|, # logical or = vee
+ 'cap' => q|$\cap$|, # intersection = cap
+ 'cup' => q|$\cup$|, # union = cup
+ 'int' => q|$\int$|, # integral
+ 'there4' => q|$\therefore$|, # therefore
+ 'sim' => q|$\sim$|, # tilde operator = varies with = similar to
+ # tilde operator is NOT the same character as the tilde
+ 'cong' => q|$\cong$|, # approximately equal to
+ 'asymp' => q|$\asymp$|, # almost equal to = asymptotic to
+ 'ne' => q|$\neq$|, # not equal to
+ 'equiv' => q|$\equiv$|, # identical to
+ 'le' => q|$\leq$|, # less-than or equal to
+ 'ge' => q|$\geq$|, # greater-than or equal to
+ 'sub' => q|$\subset$|, # subset of
+ 'sup' => q|$\supset$|, # superset of
+ # note that nsup, 'not a superset of' is not covered by the Symbol font
+ # encoding and is not included.
+ 'nsub' => q|$\not\subset$|, # not a subset of
+ 'sube' => q|$\subseteq$|, # subset of or equal to
+ 'supe' => q|$\supseteq$|, # superset of or equal to
+ 'oplus' => q|$\oplus$|, # circled plus = direct sum
+ 'otimes' => q|$\otimes$|, # circled times = vector product
+ 'perp' => q|$\perp$|, # up tack = orthogonal to = perpendicular
+ 'sdot' => q|$\cdot$|, # dot operator
+ # dot operator is NOT the same character as middle dot
+
+ # Miscellaneous Technical
+ 'lceil' => q|$\lceil$|, # left ceiling = apl upstile
+ 'rceil' => q|$\rceil$|, # right ceiling
+ 'lfloor' => q|$\lfloor$|, # left floor = apl downstile
+ 'rfloor' => q|$\rfloor$|, # right floor
+ 'lang' => q|$\langle$|, # left-pointing angle bracket = bra
+ # lang is NOT the same character as 'less than' or 'single left-pointing
+ # angle quotation mark'
+ 'rang' => q|$\rangle$|, # right-pointing angle bracket = ket
+ # rang is NOT the same character as 'greater than' or 'single
+ # right-pointing angle quotation mark'
+
+ # Geometric Shapes
+ 'loz' => q|$\lozenge$|, # lozenge
+
+ # Miscellaneous Symbols
+ 'spades' => q|$\spadesuit$|, # black spade suit
+ 'clubs' => q|$\clubsuit$|, # black club suit = shamrock
+ 'hearts' => q|$\heartsuit$|, # black heart suit = valentine
+ 'diams' => q|$\diamondsuit$|, # black diamond suit
+
+ # C0 Controls and Basic Latin
+ 'quot' => q|"|, # quotation mark = APL quote ["]
+ 'amp' => q|\&|, # ampersand
+ 'lt' => q|<|, # less-than sign
+ 'gt' => q|>|, # greater-than sign
+ 'OElig' => q|\OE{}|, # latin capital ligature OE
+ 'oelig' => q|\oe{}|, # latin small ligature oe
+ 'Scaron' => q|\v{S}|, # latin capital letter S with caron
+ 'scaron' => q|\v{s}|, # latin small letter s with caron
+ 'Yuml' => q|\"Y|, # latin capital letter Y with diaeresis
+ 'circ' => q|\textasciicircum{}|, # modifier letter circumflex accent
+ 'tilde' => q|\textasciitilde{}|, # small tilde
+ 'ensp' => q|\phantom{n}|, # en space
+ 'emsp' => q|\hspace{1em}|, # em space
+ 'thinsp' => q|\,|, # thin space
+ 'zwnj' => q|{}|, # zero width non-joiner
+# 'zwj' => q||, # zero width joiner
+# 'lrm' => q||, # left-to-right mark
+# 'rlm' => q||, # right-to-left mark
+ 'ndash' => q|--|, # en dash
+ 'mdash' => q|---|, # em dash
+ 'lsquo' => q|\textquoteleft{}|, # left single quotation mark
+ 'rsquo' => q|\textquoteright{}|, # right single quotation mark
+ 'sbquo' => q|\quotesinglbase{}|, # single low-9 quotation mark
+ 'ldquo' => q|\textquotedblleft{}|, # left double quotation mark
+ 'rdquo' => q|\textquotedblright{}|, # right double quotation mark
+ 'bdquo' => q|\quotedblbase{}|, # double low-9 quotation mark
+ 'dagger' => q|\textdagger{}|, # dagger
+ 'Dagger' => q|\textdaggerdbl{}|, # double dagger
+ 'permil' => q|\textperthousand{}|, # per mille sign
+ 'lsaquo' => q|\guilsinglleft{}|, # single left-pointing angle quotation mark
+ 'rsaquo' => q|\guilsinglright{}|, # single right-pointing angle quotation mark
+ 'euro' => q|\texteuro{}|, # euro sign
);
-
=head1 OBJECT METHODS
The following methods are provided in this module. Methods inherited
# Internals
$self->{_Lists} = []; # For nested lists
$self->{_suppress_all_para} = 0; # For =begin blocks
- $self->{_suppress_next_para} = 0; # For =for blocks
$self->{_dont_modify_any_para}=0; # For =begin blocks
- $self->{_dont_modify_next_para}=0; # For =for blocks
$self->{_CURRENT_HEAD1} = ''; # Name of current HEAD1 section
# Options - only initialise if not already set
This is similar to:
\documentclass{article}
+ \usepackage[T1]{fontenc}
+ \usepackage{textcomp}
\begin{document}
but will be more complicated if table of contents and indexing are required.
If used in conjunction with C<AddPostamble> a full latex document will
be written that could be immediately processed by C<latex>.
+For some pod escapes it may be necessary to include the amsmath
+package. This is not yet added to the preamble automaatically.
+
=cut
sub AddPreamble {
\end{document}
-but can be more complicated if an index is required.
+but can be more complicated if a index is required.
Can be used to set or retrieve the current value.
$add = $parser->AddPostamble();
# Table of contents
my $tableofcontents = '\tableofcontents';
-
+
$tableofcontents = '%% ' . $tableofcontents
unless $self->TableOfContents;
# Roll our own
$preamble = << "__TEX_HEADER__";
\\documentclass{article}
+\\usepackage[T1]{fontenc}
+\\usepackage{textcomp}
$comment
# return if we dont care
return if $command eq 'pod';
+ # Store a copy of the raw text in case we are in a =for
+ # block and need to preserve the existing latex
+ my $rawpara = $paragraph;
+
+ # Do the latex escapes
$paragraph = $self->_replace_special_chars($paragraph);
# Interpolate pod sequences in paragraph
$paragraph = $self->interpolate($paragraph, $line_num);
-
$paragraph =~ s/\s+$//;
+ # Replace characters that can only be done after
+ # interpolation of interior sequences
+ $paragraph = $self->_replace_special_chars_late($paragraph);
+
# Now run the command
if ($command eq 'over') {
} elsif ($command eq 'for') {
- # pass through if latex
- if ($paragraph =~ /^latex/i) {
+ # =for latex
+ # some latex
+
+ # With =for we will get the text for the full paragraph
+ # as well as the format name.
+ # We do not get an additional paragraph later on. The next
+ # paragraph is not governed by the =for
+
+ # The first line contains the format and the rest is the
+ # raw code.
+ my ($format, $chunk) = split(/\n/, $rawpara, 2);
+
+ # If we have got some latex code print it out immediately
+ # unmodified. Else do nothing.
+ if ($format =~ /^latex/i) {
# Make sure that next paragraph is not modfied before printing
- $self->{_dont_modify_next_para} = 1;
+ $self->_output( $chunk );
- } else {
- # Suppress the next paragraph unless it is latex
- $self->{_suppress_next_para} = 1
}
} elsif ($command eq 'end') {
my $self = shift;
my ($paragraph, $line_num, $parobj) = @_;
- # Expand paragraph unless in =for or =begin block
- if ($self->{_dont_modify_any_para} || $self->{_dont_modify_next_para}) {
+ # Expand paragraph unless in =begin block
+ if ($self->{_dont_modify_any_para}) {
# Just print as is
$self->_output($paragraph);
- # Reset flag if in =for
- $self->{_dont_modify_next_para} = 0;
-
} else {
return if $paragraph =~ /^\s+$/;
# Clean trailing space
$paragraph =~ s/\s+$//;
- # Clean tabs
- $paragraph =~ s/\t/ /g;
+ # Clean tabs. Routine taken from Tabs.pm
+ # by David Muir Sharnoff muir@idiom.com,
+ # slightly modified by hsmyers@sdragons.com 10/22/01
+ my @l = split("\n",$paragraph);
+ foreach (@l) {
+ 1 while s/(^|\n)([^\t\n]*)(\t+)/
+ $1. $2 . (" " x
+ (8 * length($3)
+ - (length($2) % 8)))
+ /sex;
+ }
+ $paragraph = join("\n",@l);
+ # End of change.
+
+
$self->_output('\begin{verbatim}' . "\n$paragraph\n". '\end{verbatim}'."\n");
}
my ($paragraph, $line_num, $parobj) = @_;
# print Dumper($self);
-
- # Expand paragraph unless in =for or =begin block
- if ($self->{_dont_modify_any_para} || $self->{_dont_modify_next_para}) {
+
+ # Expand paragraph unless in =begin block
+ if ($self->{_dont_modify_any_para}) {
# Just print as is
$self->_output($paragraph);
- # Reset flag if in =for
- $self->{_dont_modify_next_para} = 0;
-
return;
- }
+ }
+
-
# Escape latex special characters
$paragraph = $self->_replace_special_chars($paragraph);
my $expansion = $self->interpolate($paragraph, $line_num);
$expansion =~ s/\s+$//;
+ # Escape special characters that can not be done earlier
+ $expansion = $self->_replace_special_chars_late($expansion);
# If we are replacing 'head1 NAME' with a section
# we need to look in the paragraph and rewrite things
} elsif ($seq_command eq 'Z') {
# Zero width space
- return '$\!$'; # '
+ return '{}';
} elsif ($seq_command eq 'C') {
return "\\texttt{$seq_argument}";
} elsif ($seq_command eq 'S') {
# non breakable spaces
- my $nbsp = '$\:$'; #'
+ my $nbsp = '~';
$seq_argument =~ s/\s/$nbsp/g;
return $seq_argument;
} elsif ($seq_command eq 'L') {
-
my $link = new Pod::Hyperlink($seq_argument);
# undef on failure
# Use default markup for external references
# (although Starlink would use \xlabel)
my $markup = $link->markup;
-
my ($file, $line) = $pod_seq->file_line();
return $self->interpolate($link->markup, $line);
} elsif ($seq_command eq 'P') {
# Special markup for Pod::Hyperlink
- # Replace :: with /
+ # Replace :: with / - but not sure if I want to do this
+ # any more.
my $link = $seq_argument;
- $link =~ s/::/\//g;
+ $link =~ s|::|/|g;
my $ref = "\\emph{$seq_argument}";
return $ref;
} elsif ($seq_command eq 'Q') {
# Special markup for Pod::Hyperlink
- return "\\textsf{$seq_argument}\n";
+ return "\\textsf{$seq_argument}";
} elsif ($seq_command eq 'X') {
# Index entries
# If paragraphs printing is turned off via =begin/=end or whatver
# simply return immediately
- return if ($self->{_suppress_all_para} || $self->{_suppress_next_para});
+ return if $self->{_suppress_all_para};
# Check to see whether we are starting a new lists
if (scalar($self->lists->[-1]->item) == 0) {
if ($type eq 'description') {
# Handle long items - long items do not wrap
- if (length($paragraph) < 40) {
- # A real description list item
- $self->_output("\\item[$paragraph] \\mbox{}");
+ # If the string is longer than 40 characters we split
+ # it into a real item header and some bold text.
+ my $maxlen = 40;
+ my ($hunk1, $hunk2) = $self->_split_delimited( $paragraph, $maxlen );
+
+ # Print the first hunk
+ $self->_output("\n\\item[$hunk1] ");
+
+ # and the second hunk if it is defined
+ if ($hunk2) {
+ $self->_output("\\textbf{$hunk2}");
} else {
- # The item is now simply bold text
- $self->_output(qq{\\item \\textbf{$paragraph}});
+ # Not there so make sure we have a new line
+ $self->_output("\\mbox{}");
}
} else {
- # If the item was '* Something' we still need to write
- # out the something
+ # If the item was '* Something' or '\d+ something' we still need to write
+ # out the something. Also allow 1) and 1.
my $extra_info = $paragraph;
- $extra_info =~ s/^\*\s*//;
- $self->_output("\\item $extra_info");
+ $extra_info =~ s/^(\*|\d+[\.\)]?)\s*//;
+ $self->_output("\n\\item $extra_info");
}
# Store the item name in the object. Required so that
my $star = ($level >= $self->LevelNoNum ? '*' : '');
# Section
- $self->_output("\\" .$LatexSections[$level] .$star ."{$paragraph\\label{".$label ."}\\index{".$index."}}");
+ $self->_output("\\" .$LatexSections[$level] .$star ."{$paragraph\\label{".$label ."}\\index{".$index."}}\n");
}
$parser->_output($text);
-Does not write anything if a =begin or =for is active that should be
+Does not write anything if a =begin is active that should be
ignored.
=cut
my $self = shift;
my $text = shift;
- print { $self->output_handle } $text
- unless $self->{_suppress_all_para} ||
- $self->{_suppress_next_para};
+ print { $self->output_handle } $text
+ unless $self->{_suppress_all_para};
- # Reset pargraph stuff for =for
- $self->{_suppress_next_para} = 0
- if $self->{_suppress_next_para};
}
$escaped = $parser->_replace_special_chars($paragraph);
-Need to call this routine before interior_sequences are munged but
-not if verbatim.
+Need to call this routine before interior_sequences are munged but not
+if verbatim. It must be called before interpolation of interior
+sequences so that curly brackets and special latex characters inserted
+during interpolation are not themselves escaped. This means that < and
+> can not be modified here since the text still contains interior
+sequences.
Special characters and the C<latex> equivalents are:
\ $\backslash$
^ \^{}
~ \~{}
- | $|$
+ # \#
=cut
# Replace tilde (~) with \texttt{\~{}}
$paragraph =~ s/~/\\texttt\{\\~\{\}\}/g;
+ # Now add the dollars around each \backslash
+ $paragraph =~ s/(\\backslash)/\$$1\$/g;
+ return $paragraph;
+}
+
+=item B<_replace_special_chars_late>
+
+Replace special characters that can not be replaced before interior
+sequence interpolation. See C<_replace_special_chars> for a routine
+to replace special characters prior to interpolation of interior
+sequences.
+
+Does the following transformation:
+
+ < $<$
+ > $>$
+ | $|$
+
+
+=cut
+
+sub _replace_special_chars_late {
+ my $self = shift;
+ my $paragraph = shift;
+
+ # < and >
+ $paragraph =~ s/(<|>)/\$$1\$/g;
+
# Replace | with $|$
$paragraph =~ s'\|'$|$'g;
- # Now add the dollars around each \backslash
- $paragraph =~ s/(\\backslash)/\$$1\$/g;
return $paragraph;
}
return $paragraph
}
+=item B<_split_delimited>
+
+Split the supplied string into two parts at approximately the
+specified word boundary. Special care is made to make sure that it
+does not split in the middle of some curly brackets.
+
+e.g. "this text is \textbf{very bold}" would not be split into
+"this text is \textbf{very" and " bold".
+
+ ($hunk1, $hunk2) = $self->_split_delimited( $para, $length);
+
+The length indicates the maximum length of hunk1.
+
+=cut
+
+# initially Supplied by hsmyers@sdragons.com
+# 10/25/01, utility to split \hbox
+# busting lines. Reformatted by TimJ to match module style.
+sub _split_delimited {
+ my $self = shift;
+ my $input = shift;
+ my $limit = shift;
+
+ # Return immediately if already small
+ return ($input, '') if length($input) < $limit;
+
+ my @output;
+ my $s = '';
+ my $t = '';
+ my $depth = 0;
+ my $token;
+
+ $input =~ s/\n/ /gm;
+ $input .= ' ';
+ foreach ( split ( //, $input ) ) {
+ $token .= $_;
+ if (/\{/) {
+ $depth++;
+ } elsif ( /}/ ) {
+ $depth--;
+ } elsif ( / / and $depth == 0) {
+ push @output, $token if ( $token and $token ne ' ' );
+ $token = '';
+ }
+ }
+
+ foreach (@output) {
+ if (length($s) < $limit) {
+ $s .= $_;
+ } else {
+ $t .= $_;
+ }
+ }
+
+ # Tidy up
+ $s =~ s/\s+$//;
+ $t =~ s/\s+$//;
+ return ($s,$t);
+}
+
=back
=end __PRIVATE__
Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt>
+Bug fixes and improvements have been received from: Simon Cozens
+E<lt>simon@cozens.netE<gt>, Mark A. Hershberger
+E<lt>mah@everybody.orgE<gt>, Marcel Grunauer
+E<lt>marcel@codewerk.comE<gt>, Hugh S Myers
+E<lt>hsmyers@sdragons.comE<gt>, Peter J Acklam
+E<lt>jacklam@math.uio.noE<gt>, Sudhi Herle E<lt>sudhi@herle.netE<gt>
+and Ariel Scolnicov E<lt>ariels@compugen.co.ilE<gt>.
+
+
=head1 COPYRIGHT
-Copyright (C) 2000 Tim Jenness. All Rights Reserved.
+Copyright (C) 2000-2003 Tim Jenness. All Rights Reserved.
-This program is free software; you can redistribute it and/or modify it
-under the same terms as Perl itself.
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
=begin __PRIVATE__
=head1 REVISION
-$Id: LaTeX.pm,v 1.6 2000/08/21 09:05:03 timj Exp $
+$Id: LaTeX.pm,v 1.17 2003/04/05 21:25:49 timj Exp $
=end __PRIVATE__
=cut
+
+1;