[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm

package Pod::Text;

# Version 1.01

=head1 NAME

Pod::Text - convert POD data to formatted ASCII text

=head1 SYNOPSIS

	use Pod::Text;

	pod2text("perlfunc.pod");

Also:

	pod2text < input.pod

=head1 DESCRIPTION

Pod::Text is a module that can convert documentation in the POD format (such
as can be found throughout the Perl distribution) into formatted ASCII.
Termcap is optionally supported for boldface/underline, and can enabled via
C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
will be used to simulate bold and underlined text.

A separate F<pod2text> program is included that is primarily a wrapper for
Pod::Text.

The single function C<pod2text()> can take one or two arguments. The first
should be the name of a file to read the pod from, or "<&STDIN" to read from
STDIN. A second argument, if provided, should be a filehandle glob where
output should be sent.

=head1 AUTHOR

Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>

=head1 TODO

Cleanup work. The input and output locations need to be more flexible,
termcap shouldn't be a global variable, and the terminal speed needs to
be properly calculated.

=cut

use Term::Cap;
require Exporter;
@ISA = Exporter;
@EXPORT = qw(pod2text);

$termcap=0;

#$use_format=1;

$UNDL = "\x1b[4m";
$INV = "\x1b[7m";
$BOLD = "\x1b[1m";
$NORM = "\x1b[0m";

sub pod2text {
local($file,*OUTPUT) = @_;
*OUTPUT = *STDOUT if @_<2;

if($termcap and !$setuptermcap) {
	$setuptermcap=1;

    my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
    $UNDL = $term->{'_us'};
    $INV = $term->{'_mr'};
    $BOLD = $term->{'_md'};
    $NORM = $term->{'_me'};
}

$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
       || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
       ||  $ENV{COLUMNS}
       || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
       || 72;

$/ = "";

$FANCY = 0;

$cutting = 1;
$DEF_INDENT = 4;
$indent = $DEF_INDENT;
$needspace = 0;

open(IN, shift) || die "Couldn't open file!\n";

POD_DIRECTIVE: while (<IN>) {
    if ($cutting) {
	next unless /^=/;
	$cutting = 0;
    }
    1 while s{^(.*?)(\t+)(.*)$}{
	$1
	. (' ' x (length($2) * 8 - length($1) % 8))
	. $3
    }me;
    # Translate verbatim paragraph
    if (/^\s/) {
	$needspace = 1;
	output($_);
	next;
    }

sub prepare_for_output {

    s/\s*$/\n/;
    &init_noremap;

    # need to hide E<> first; they're processed in clear_noremap
    s/(E<[^<>]+>)/noremap($1)/ge;
    $maxnest = 10;
    while ($maxnest-- && /[A-Z]</) {
	unless ($FANCY) {
	    s/C<(.*?)>/`$1'/g;
	} else {
	    s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/ge;
	}
        # s/[IF]<(.*?)>/italic($1)/ge;
        s/I<(.*?)>/*$1*/g;
        # s/[CB]<(.*?)>/bold($1)/ge;
	s/X<.*?>//g;
	# LREF: a manpage(3f)
	s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
	# LREF: an =item on another manpage
	s{
	    L<
		([^/]+)
		/
		(
		    [:\w]+
		    (\(\))?
		)
	    >
	} {the "$2" entry in the $1 manpage}gx;

	# LREF: an =item on this manpage
	s{
	   ((?:
	    L<
		/
		(
		    [:\w]+
		    (\(\))?
		)
	    >
	    (,?\s+(and\s+)?)?
	  )+)
	} { internal_lrefs($1) }gex;

	# LREF: a =head2 (head1?), maybe on a manpage, maybe right here
	# the "func" can disambiguate
	s{
	    L<
		(?:
		    ([a-zA-Z]\S+?) / 
		)?
		"?(.*?)"?
	    >
	}{
	    do {
		$1 	# if no $1, assume it means on this page.
		    ?  "the section on \"$2\" in the $1 manpage"
		    :  "the section on \"$2\""
	    }
	}gex;

        s/[A-Z]<(.*?)>/$1/g;
    }
    clear_noremap(1);
}

    &prepare_for_output;

    if (s/^=//) {
	# $needspace = 0;		# Assume this.
	# s/\n/ /g;
	($Cmd, $_) = split(' ', $_, 2);
	# clear_noremap(1);
	if ($Cmd eq 'cut') {
	    $cutting = 1;
	}
	elsif ($Cmd eq 'head1') {
	    makespace();
	    print OUTPUT;
	    # print OUTPUT uc($_);
	}
	elsif ($Cmd eq 'head2') {
	    makespace();
	    # s/(\w+)/\u\L$1/g;
	    #print ' ' x $DEF_INDENT, $_;
	    # print "\xA7";
	    s/(\w)/\xA7 $1/ if $FANCY;
	    print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
	}
	elsif ($Cmd eq 'over') {
	    push(@indent,$indent);
	    $indent += ($_ + 0) || $DEF_INDENT;
	}
	elsif ($Cmd eq 'back') {
	    $indent = pop(@indent);
	    warn "Unmatched =back\n" unless defined $indent;
	    $needspace = 1;
	}
	elsif ($Cmd eq 'item') {
	    makespace();
	    # s/\A(\s*)\*/$1\xb7/ if $FANCY;
	    # s/^(\s*\*\s+)/$1 /;
	    {
		if (length() + 3 < $indent) {
		    my $paratag = $_;
		    $_ = <>;
		    if (/^=/) {  # tricked!
			local($indent) = $indent[$#index - 1] || $DEF_INDENT;
			output($paratag);
			redo POD_DIRECTIVE;
		    }
		    &prepare_for_output;
		    IP_output($paratag, $_);
		} else {
		    local($indent) = $indent[$#index - 1] || $DEF_INDENT;
		    output($_);
		}
	    }
	}
	else {
	    warn "Unrecognized directive: $Cmd\n";
	}
    }
    else {
	# clear_noremap(1);
	makespace();
	output($_, 1);
    }
}

close(IN);

}

#########################################################################

sub makespace {
    if ($needspace) {
	print OUTPUT "\n";
	$needspace = 0;
    }
}

sub bold {
    my $line = shift;
    return $line if $use_format;
    if($termcap) {
    	$line = "$BOLD$line$NORM";
    } else {
	    $line =~ s/(.)/$1\b$1/g;
	}
#    $line = "$BOLD$line$NORM" if $ansify;
    return $line;
}

sub italic {
    my $line = shift;
    return $line if $use_format;
    if($termcap) {
    	$line = "$UNDL$line$NORM";
    } else {
	    $line =~ s/(.)/$1\b_/g;
    }
#    $line = "$UNDL$line$NORM" if $ansify;
    return $line;
}

# Fill a paragraph including underlined and overstricken chars.
# It's not perfect for words longer than the margin, and it's probably
# slow, but it works.
sub fill {
    local $_ = shift;
    my $par = "";
    my $indent_space = " " x $indent;
    my $marg = $SCREEN-$indent;
    my $line = $indent_space;
    my $line_length;
    foreach (split) {
	my $word_length = length;
	$word_length -= 2 while /\010/g;  # Subtract backspaces

	if ($line_length + $word_length > $marg) {
	    $par .= $line . "\n";
	    $line= $indent_space . $_;
	    $line_length = $word_length;
	}
	else {
	    if ($line_length) {
		$line_length++;
		$line .= " ";
	    }
	    $line_length += $word_length;
	    $line .= $_;
	}
    }
    $par .= "$line\n" if $line;
    $par .= "\n";
    return $par;
}

sub IP_output {
    local($tag, $_) = @_;
    local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
    $tag_cols = $SCREEN - $tag_indent;
    $cols = $SCREEN - $indent;
    $tag =~ s/\s*$//;
    s/\s+/ /g;
    s/^ //;
    $str = "format OUTPUT = \n"
	. (" " x ($tag_indent))
	. '@' . ('<' x ($indent - $tag_indent - 1))
	. "^" .  ("<" x ($cols - 1)) . "\n"
	. '$tag, $_'
	. "\n~~"
	. (" " x ($indent-2))
	. "^" .  ("<" x ($cols - 5)) . "\n"
	. '$_' . "\n\n.\n1";
    #warn $str; warn "tag is $tag, _ is $_";
    eval $str || die;
    write OUTPUT;
}

sub output {
    local($_, $reformat) = @_;
    if ($reformat) {
	$cols = $SCREEN - $indent;
	s/\s+/ /g;
	s/^ //;
	$str = "format OUTPUT = \n~~"
	    . (" " x ($indent-2))
	    . "^" .  ("<" x ($cols - 5)) . "\n"
	    . '$_' . "\n\n.\n1";
	eval $str || die;
	write OUTPUT;
    } else {
	s/^/' ' x $indent/gem;
	s/^\s+\n$/\n/gm;
	print OUTPUT;
    }
}

sub noremap {
    local($thing_to_hide) = shift;
    $thing_to_hide =~ tr/\000-\177/\200-\377/;
    return $thing_to_hide;
}

sub init_noremap {
    die "unmatched init" if $mapready++;
    if ( /[\200-\377]/ ) {
	warn "hit bit char in input stream";
    }
}

sub clear_noremap {
    my $ready_to_print = $_[0];
    die "unmatched clear" unless $mapready--;
    tr/\200-\377/\000-\177/;
    # now for the E<>s, which have been hidden until now
    # otherwise the interative \w<> processing would have
    # been hosed by the E<gt>
    s {
	    E<	
	    ( [A-Za-z]+ )	
	    >	
    } {
	 do {
	     defined $HTML_Escapes{$1}
		? do { $HTML_Escapes{$1} }
		: do {
		    warn "Unknown escape: $& in $_";
		    "E<$1>";
		}
	 }
    }egx if $ready_to_print;
}

sub internal_lrefs {
    local($_) = shift;
    s{L</([^>]+)>}{$1}g;
    my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
    my $retstr = "the ";
    my $i;
    for ($i = 0; $i <= $#items; $i++) {
	$retstr .= "C<$items[$i]>";
	$retstr .= ", " if @items > 2 && $i != $#items;
	$retstr .= " and " if $i+2 == @items;
    }

    $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
	    .  " elsewhere in this document ";

    return $retstr;

}

BEGIN {

%HTML_Escapes = (
    'amp'	=>	'&',	#   ampersand
    'lt'	=>	'<',	#   left chevron, less-than
    'gt'	=>	'>',	#   right chevron, greater-than
    'quot'	=>	'"',	#   double quote

    "Aacute"	=>	"\xC1",	#   capital A, acute accent
    "aacute"	=>	"\xE1",	#   small a, acute accent
    "Acirc"	=>	"\xC2",	#   capital A, circumflex accent
    "acirc"	=>	"\xE2",	#   small a, circumflex accent
    "AElig"	=>	"\xC6",	#   capital AE diphthong (ligature)
    "aelig"	=>	"\xE6",	#   small ae diphthong (ligature)
    "Agrave"	=>	"\xC0",	#   capital A, grave accent
    "agrave"	=>	"\xE0",	#   small a, grave accent
    "Aring"	=>	"\xC5",	#   capital A, ring
    "aring"	=>	"\xE5",	#   small a, ring
    "Atilde"	=>	"\xC3",	#   capital A, tilde
    "atilde"	=>	"\xE3",	#   small a, tilde
    "Auml"	=>	"\xC4",	#   capital A, dieresis or umlaut mark
    "auml"	=>	"\xE4",	#   small a, dieresis or umlaut mark
    "Ccedil"	=>	"\xC7",	#   capital C, cedilla
    "ccedil"	=>	"\xE7",	#   small c, cedilla
    "Eacute"	=>	"\xC9",	#   capital E, acute accent
    "eacute"	=>	"\xE9",	#   small e, acute accent
    "Ecirc"	=>	"\xCA",	#   capital E, circumflex accent
    "ecirc"	=>	"\xEA",	#   small e, circumflex accent
    "Egrave"	=>	"\xC8",	#   capital E, grave accent
    "egrave"	=>	"\xE8",	#   small e, grave accent
    "ETH"	=>	"\xD0",	#   capital Eth, Icelandic
    "eth"	=>	"\xF0",	#   small eth, Icelandic
    "Euml"	=>	"\xCB",	#   capital E, dieresis or umlaut mark
    "euml"	=>	"\xEB",	#   small e, dieresis or umlaut mark
    "Iacute"	=>	"\xCD",	#   capital I, acute accent
    "iacute"	=>	"\xED",	#   small i, acute accent
    "Icirc"	=>	"\xCE",	#   capital I, circumflex accent
    "icirc"	=>	"\xEE",	#   small i, circumflex accent
    "Igrave"	=>	"\xCD",	#   capital I, grave accent
    "igrave"	=>	"\xED",	#   small i, grave accent
    "Iuml"	=>	"\xCF",	#   capital I, dieresis or umlaut mark
    "iuml"	=>	"\xEF",	#   small i, dieresis or umlaut mark
    "Ntilde"	=>	"\xD1",		#   capital N, tilde
    "ntilde"	=>	"\xF1",		#   small n, tilde
    "Oacute"	=>	"\xD3",	#   capital O, acute accent
    "oacute"	=>	"\xF3",	#   small o, acute accent
    "Ocirc"	=>	"\xD4",	#   capital O, circumflex accent
    "ocirc"	=>	"\xF4",	#   small o, circumflex accent
    "Ograve"	=>	"\xD2",	#   capital O, grave accent
    "ograve"	=>	"\xF2",	#   small o, grave accent
    "Oslash"	=>	"\xD8",	#   capital O, slash
    "oslash"	=>	"\xF8",	#   small o, slash
    "Otilde"	=>	"\xD5",	#   capital O, tilde
    "otilde"	=>	"\xF5",	#   small o, tilde
    "Ouml"	=>	"\xD6",	#   capital O, dieresis or umlaut mark
    "ouml"	=>	"\xF6",	#   small o, dieresis or umlaut mark
    "szlig"	=>	"\xDF",		#   small sharp s, German (sz ligature)
    "THORN"	=>	"\xDE",	#   capital THORN, Icelandic
    "thorn"	=>	"\xFE",	#   small thorn, Icelandic
    "Uacute"	=>	"\xDA",	#   capital U, acute accent
    "uacute"	=>	"\xFA",	#   small u, acute accent
    "Ucirc"	=>	"\xDB",	#   capital U, circumflex accent
    "ucirc"	=>	"\xFB",	#   small u, circumflex accent
    "Ugrave"	=>	"\xD9",	#   capital U, grave accent
    "ugrave"	=>	"\xF9",	#   small u, grave accent
    "Uuml"	=>	"\xDC",	#   capital U, dieresis or umlaut mark
    "uuml"	=>	"\xFC",	#   small u, dieresis or umlaut mark
    "Yacute"	=>	"\xDD",	#   capital Y, acute accent
    "yacute"	=>	"\xFD",	#   small y, acute accent
    "yuml"	=>	"\xFF",	#   small y, dieresis or umlaut mark

    "lchevron"	=>	"\xAB",	#   left chevron (double less than)
    "rchevron"	=>	"\xBB",	#   right chevron (double greater than)
);
}

1;
Commit	Line	Data
69e00e79	1	package Pod::Text;
	2
	3	# Version 1.01
	4
	5	=head1 NAME
	6
	7	Pod::Text - convert POD data to formatted ASCII text
	8
	9	=head1 SYNOPSIS
	10
	11	use Pod::Text;
	12
	13	pod2text("perlfunc.pod");
	14
	15	Also:
	16
	17	pod2text < input.pod
	18
	19	=head1 DESCRIPTION
	20
	21	Pod::Text is a module that can convert documentation in the POD format (such
	22	as can be found throughout the Perl distribution) into formatted ASCII.
	23	Termcap is optionally supported for boldface/underline, and can enabled via
	24	C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
	25	will be used to simulate bold and underlined text.
	26
	27	A separate F<pod2text> program is included that is primarily a wrapper for
	28	Pod::Text.
	29
	30	The single function C<pod2text()> can take one or two arguments. The first
	31	should be the name of a file to read the pod from, or "<&STDIN" to read from
	32	STDIN. A second argument, if provided, should be a filehandle glob where
	33	output should be sent.
	34
	35	=head1 AUTHOR
	36
	37	Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
	38
	39	=head1 TODO
	40
	41	Cleanup work. The input and output locations need to be more flexible,
	42	termcap shouldn't be a global variable, and the terminal speed needs to
	43	be properly calculated.
	44
	45	=cut
	46
	47	use Term::Cap;
	48	require Exporter;
	49	@ISA = Exporter;
	50	@EXPORT = qw(pod2text);
	51
	52	$termcap=0;
	53
	54	#$use_format=1;
	55
	56	$UNDL = "\x1b[4m";
	57	$INV = "\x1b[7m";
	58	$BOLD = "\x1b[1m";
	59	$NORM = "\x1b[0m";
	60
	61	sub pod2text {
	62	local($file,*OUTPUT) = @_;
	63	OUTPUT = STDOUT if @_<2;
	64
65	if($termcap and !$setuptermcap) {
66	$setuptermcap=1;
67
68	my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
69	$UNDL = $term->{'_us'};
70	$INV = $term->{'_mr'};
71	$BOLD = $term->{'_md'};
72	$NORM = $term->{'_me'};
73	}
74
75	$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
76	\|\| ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
77	\|\| $ENV{COLUMNS}
78	\|\| (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
79	\|\| 72;
80
81	$/ = "";
82
83	$FANCY = 0;
84
85	$cutting = 1;
86	$DEF_INDENT = 4;
87	$indent = $DEF_INDENT;
88	$needspace = 0;
89
90	open(IN, shift) \|\| die "Couldn't open file!\n";
91
92	POD_DIRECTIVE: while (<IN>) {
93	if ($cutting) {
94	next unless /^=/;
95	$cutting = 0;
96	}
97	1 while s{^(.?)(\t+)(.)$}{
98	$1
99	. (' ' x (length($2) * 8 - length($1) % 8))
100	. $3
101	}me;
102	# Translate verbatim paragraph
103	if (/^\s/) {
104	$needspace = 1;
105	output($_);
106	next;
107	}
108
109	sub prepare_for_output {
110
111	s/\s*$/\n/;
112	&init_noremap;
113
114	# need to hide E<> first; they're processed in clear_noremap
115	s/(E<[^<>]+>)/noremap($1)/ge;
116	$maxnest = 10;
117	while ($maxnest-- && /[A-Z]</) {
118	unless ($FANCY) {
119	s/C<(.*?)>/`$1'/g;
120	} else {
121	s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/ge;
122	}
123	# s/[IF]<(.*?)>/italic($1)/ge;
124	s/I<(.?)>/$1*/g;
125	# s/[CB]<(.*?)>/bold($1)/ge;
126	s/X<.*?>//g;
127	# LREF: a manpage(3f)
128	s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
129	# LREF: an =item on another manpage
130	s{
131	L<
132	([^/]+)
133	/
134	(
135	[:\w]+
136	(\(\))?
137	)
138	>
139	} {the "$2" entry in the $1 manpage}gx;
140
141	# LREF: an =item on this manpage
142	s{
143	((?:
144	L<
145	/
146	(
147	[:\w]+
148	(\(\))?
149	)
150	>
151	(,?\s+(and\s+)?)?
152	)+)
153	} { internal_lrefs($1) }gex;
154
155	# LREF: a =head2 (head1?), maybe on a manpage, maybe right here
156	# the "func" can disambiguate
157	s{
158	L<
159	(?:
160	([a-zA-Z]\S+?) /
161	)?
162	"?(.*?)"?
163	>
164	}{
165	do {
166	$1 # if no $1, assume it means on this page.
167	? "the section on \"$2\" in the $1 manpage"
168	: "the section on \"$2\""
169	}
170	}gex;
171
172	s/[A-Z]<(.*?)>/$1/g;
173	}
174	clear_noremap(1);
175	}
176
177	&prepare_for_output;
178
179	if (s/^=//) {
180	# $needspace = 0; # Assume this.
181	# s/\n/ /g;
182	($Cmd, $_) = split(' ', $_, 2);
183	# clear_noremap(1);
184	if ($Cmd eq 'cut') {
185	$cutting = 1;
186	}
187	elsif ($Cmd eq 'head1') {
188	makespace();
189	print OUTPUT;
190	# print OUTPUT uc($_);
191	}
192	elsif ($Cmd eq 'head2') {
193	makespace();
194	# s/(\w+)/\u\L$1/g;
195	#print ' ' x $DEF_INDENT, $_;
196	# print "\xA7";
197	s/(\w)/\xA7 $1/ if $FANCY;
198	print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
199	}
200	elsif ($Cmd eq 'over') {
201	push(@indent,$indent);
202	$indent += ($_ + 0) \|\| $DEF_INDENT;
203	}
204	elsif ($Cmd eq 'back') {
205	$indent = pop(@indent);
206	warn "Unmatched =back\n" unless defined $indent;
207	$needspace = 1;
208	}
209	elsif ($Cmd eq 'item') {
210	makespace();
211	# s/\A(\s)\/$1\xb7/ if $FANCY;
212	# s/^(\s\\s+)/$1 /;
213	{
214	if (length() + 3 < $indent) {
215	my $paratag = $_;
216	$_ = <>;
217	if (/^=/) { # tricked!
218	local($indent) = $indent[$#index - 1] \|\| $DEF_INDENT;
219	output($paratag);
220	redo POD_DIRECTIVE;
221	}
222	&prepare_for_output;
223	IP_output($paratag, $_);
224	} else {
225	local($indent) = $indent[$#index - 1] \|\| $DEF_INDENT;
226	output($_);
227	}
228	}
229	}
230	else {
231	warn "Unrecognized directive: $Cmd\n";
232	}
233	}
234	else {
235	# clear_noremap(1);
236	makespace();
237	output($_, 1);
238	}
239	}
240
241	close(IN);
242
243	}
244
245	#########################################################################
246
247	sub makespace {
248	if ($needspace) {
249	print OUTPUT "\n";
250	$needspace = 0;
251	}
252	}
253
254	sub bold {
255	my $line = shift;
256	return $line if $use_format;
257	if($termcap) {
258	$line = "$BOLD$line$NORM";
259	} else {
260	$line =~ s/(.)/$1\b$1/g;
261	}
262	# $line = "$BOLD$line$NORM" if $ansify;
263	return $line;
264	}
265
266	sub italic {
267	my $line = shift;
268	return $line if $use_format;
269	if($termcap) {
270	$line = "$UNDL$line$NORM";
271	} else {
272	$line =~ s/(.)/$1\b_/g;
273	}
274	# $line = "$UNDL$line$NORM" if $ansify;
275	return $line;
276	}
277
278	# Fill a paragraph including underlined and overstricken chars.
279	# It's not perfect for words longer than the margin, and it's probably
280	# slow, but it works.
281	sub fill {
282	local $_ = shift;
283	my $par = "";
284	my $indent_space = " " x $indent;
285	my $marg = $SCREEN-$indent;
286	my $line = $indent_space;
287	my $line_length;
288	foreach (split) {
289	my $word_length = length;
290	$word_length -= 2 while /\010/g; # Subtract backspaces
291
292	if ($line_length + $word_length > $marg) {
293	$par .= $line . "\n";
294	$line= $indent_space . $_;
295	$line_length = $word_length;
296	}
297	else {
298	if ($line_length) {
299	$line_length++;
300	$line .= " ";
301	}
302	$line_length += $word_length;
303	$line .= $_;
304	}
305	}
306	$par .= "$line\n" if $line;
307	$par .= "\n";
308	return $par;
309	}
310
311	sub IP_output {
312	local($tag, $_) = @_;
313	local($tag_indent) = $indent[$#index - 1] \|\| $DEF_INDENT;
314	$tag_cols = $SCREEN - $tag_indent;
315	$cols = $SCREEN - $indent;
316	$tag =~ s/\s*$//;
317	s/\s+/ /g;
318	s/^ //;
319	$str = "format OUTPUT = \n"
320	. (" " x ($tag_indent))
321	. '@' . ('<' x ($indent - $tag_indent - 1))
322	. "^" . ("<" x ($cols - 1)) . "\n"
323	. '$tag, $_'
324	. "\n~~"
325	. (" " x ($indent-2))
326	. "^" . ("<" x ($cols - 5)) . "\n"
327	. '$_' . "\n\n.\n1";
328	#warn $str; warn "tag is $tag, _ is $_";
329	eval $str \|\| die;
330	write OUTPUT;
331	}
332
333	sub output {
334	local($_, $reformat) = @_;
335	if ($reformat) {
336	$cols = $SCREEN - $indent;
337	s/\s+/ /g;
338	s/^ //;
339	$str = "format OUTPUT = \n~~"
340	. (" " x ($indent-2))
341	. "^" . ("<" x ($cols - 5)) . "\n"
342	. '$_' . "\n\n.\n1";
343	eval $str \|\| die;
344	write OUTPUT;
345	} else {
346	s/^/' ' x $indent/gem;
347	s/^\s+\n$/\n/gm;
348	print OUTPUT;
349	}
350	}
351
352	sub noremap {
353	local($thing_to_hide) = shift;
354	$thing_to_hide =~ tr/\000-\177/\200-\377/;
355	return $thing_to_hide;
356	}
357
358	sub init_noremap {
359	die "unmatched init" if $mapready++;
360	if ( /[\200-\377]/ ) {
361	warn "hit bit char in input stream";
362	}
363	}
364
365	sub clear_noremap {
366	my $ready_to_print = $_[0];
367	die "unmatched clear" unless $mapready--;
368	tr/\200-\377/\000-\177/;
369	# now for the E<>s, which have been hidden until now
370	# otherwise the interative \w<> processing would have
371	# been hosed by the E<gt>
372	s {
373	E<
374	( [A-Za-z]+ )
375	>
376	} {
377	do {
378	defined $HTML_Escapes{$1}
379	? do { $HTML_Escapes{$1} }
380	: do {
381	warn "Unknown escape: $& in $_";
382	"E<$1>";
383	}
384	}
385	}egx if $ready_to_print;
386	}
387
388	sub internal_lrefs {
389	local($_) = shift;
390	s{L</([^>]+)>}{$1}g;
391	my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
392	my $retstr = "the ";
393	my $i;
394	for ($i = 0; $i <= $#items; $i++) {
395	$retstr .= "C<$items[$i]>";
396	$retstr .= ", " if @items > 2 && $i != $#items;
397	$retstr .= " and " if $i+2 == @items;
398	}
399
400	$retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
401	. " elsewhere in this document ";
402
403	return $retstr;
404
405	}
406
407	BEGIN {
408
409	%HTML_Escapes = (
410	'amp' => '&', # ampersand
411	'lt' => '<', # left chevron, less-than
412	'gt' => '>', # right chevron, greater-than
413	'quot' => '"', # double quote
414
415	"Aacute" => "\xC1", # capital A, acute accent
416	"aacute" => "\xE1", # small a, acute accent
417	"Acirc" => "\xC2", # capital A, circumflex accent
418	"acirc" => "\xE2", # small a, circumflex accent
419	"AElig" => "\xC6", # capital AE diphthong (ligature)
420	"aelig" => "\xE6", # small ae diphthong (ligature)
421	"Agrave" => "\xC0", # capital A, grave accent
422	"agrave" => "\xE0", # small a, grave accent
423	"Aring" => "\xC5", # capital A, ring
424	"aring" => "\xE5", # small a, ring
425	"Atilde" => "\xC3", # capital A, tilde
426	"atilde" => "\xE3", # small a, tilde
427	"Auml" => "\xC4", # capital A, dieresis or umlaut mark
428	"auml" => "\xE4", # small a, dieresis or umlaut mark
429	"Ccedil" => "\xC7", # capital C, cedilla
430	"ccedil" => "\xE7", # small c, cedilla
431	"Eacute" => "\xC9", # capital E, acute accent
432	"eacute" => "\xE9", # small e, acute accent
433	"Ecirc" => "\xCA", # capital E, circumflex accent
434	"ecirc" => "\xEA", # small e, circumflex accent
435	"Egrave" => "\xC8", # capital E, grave accent
436	"egrave" => "\xE8", # small e, grave accent
437	"ETH" => "\xD0", # capital Eth, Icelandic
438	"eth" => "\xF0", # small eth, Icelandic
439	"Euml" => "\xCB", # capital E, dieresis or umlaut mark
440	"euml" => "\xEB", # small e, dieresis or umlaut mark
441	"Iacute" => "\xCD", # capital I, acute accent
442	"iacute" => "\xED", # small i, acute accent
443	"Icirc" => "\xCE", # capital I, circumflex accent
444	"icirc" => "\xEE", # small i, circumflex accent
445	"Igrave" => "\xCD", # capital I, grave accent
446	"igrave" => "\xED", # small i, grave accent
447	"Iuml" => "\xCF", # capital I, dieresis or umlaut mark
448	"iuml" => "\xEF", # small i, dieresis or umlaut mark
449	"Ntilde" => "\xD1", # capital N, tilde
450	"ntilde" => "\xF1", # small n, tilde
451	"Oacute" => "\xD3", # capital O, acute accent
452	"oacute" => "\xF3", # small o, acute accent
453	"Ocirc" => "\xD4", # capital O, circumflex accent
454	"ocirc" => "\xF4", # small o, circumflex accent
455	"Ograve" => "\xD2", # capital O, grave accent
456	"ograve" => "\xF2", # small o, grave accent
457	"Oslash" => "\xD8", # capital O, slash
458	"oslash" => "\xF8", # small o, slash
459	"Otilde" => "\xD5", # capital O, tilde
460	"otilde" => "\xF5", # small o, tilde
461	"Ouml" => "\xD6", # capital O, dieresis or umlaut mark
462	"ouml" => "\xF6", # small o, dieresis or umlaut mark
463	"szlig" => "\xDF", # small sharp s, German (sz ligature)
464	"THORN" => "\xDE", # capital THORN, Icelandic
465	"thorn" => "\xFE", # small thorn, Icelandic
466	"Uacute" => "\xDA", # capital U, acute accent
467	"uacute" => "\xFA", # small u, acute accent
468	"Ucirc" => "\xDB", # capital U, circumflex accent
469	"ucirc" => "\xFB", # small u, circumflex accent
470	"Ugrave" => "\xD9", # capital U, grave accent
471	"ugrave" => "\xF9", # small u, grave accent
472	"Uuml" => "\xDC", # capital U, dieresis or umlaut mark
473	"uuml" => "\xFC", # small u, dieresis or umlaut mark
474	"Yacute" => "\xDD", # capital Y, acute accent
475	"yacute" => "\xFD", # small y, acute accent
476	"yuml" => "\xFF", # small y, dieresis or umlaut mark
477
478	"lchevron" => "\xAB", # left chevron (double less than)
479	"rchevron" => "\xBB", # right chevron (double greater than)
480	);
481	}
482
483	1;