3 package Pod::Simple::RTF;
6 #sub Pod::Simple::DEBUG () {4};
7 #sub Pod::Simple::PullParser::DEBUG () {4};
10 use vars qw($VERSION @ISA %Escape $WRAP %Tagmap);
12 use Pod::Simple::PullParser ();
13 BEGIN {@ISA = ('Pod::Simple::PullParser')}
16 BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
18 $WRAP = 1 unless defined $WRAP;
20 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 m/^([-A-Za-z]+)=(\w[^\=]*)$/s or die "what's <$_>?";
25 ( $1, "{\\$2\n", "/$1", "}" );
32 # 'foo=bar' means ('foo' => '{\bar'."\n", '/foo' => '}')
36 'C=cs19\f1\lang1024\noproof',
37 'F=cs17\i\lang1024\noproof',
41 'VerbatimBI=cs28\b\i',
43 map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
45 underline=ul smallcaps=scaps shadow=shad
46 superscript=super subscript=sub strikethrough=strike
47 outline=outl emboss=embo engrave=impr
48 dotted-underline=uld dash-underline=uldash
49 dot-dash-underline=uldashd dot-dot-dash-underline=uldashdd
50 double-underline=uldb thick-underline=ulth
51 word-underline=ulw wave-underline=ulwave
53 # But no double-strikethrough, because MSWord can't agree with the
54 # RTF spec on whether it's supposed to be \strikedl or \striked1 (!!!)
58 'L=pod' => '{\cs22\i'."\n",
59 'L=url' => '{\cs23\i'."\n",
60 'L=man' => '{\cs24\i'."\n",
66 'Verbatim' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n",
67 '/Verbatim' => "\n\\par}\n",
68 'VerbatimFormatted' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n",
69 '/VerbatimFormatted' => "\n\\par}\n",
70 'Para' => "\n{\\pard\\li#rtfindent#\\sa180\n",
71 '/Para' => "\n\\par}\n",
72 'head1' => "\n{\\pard\\li#rtfindent#\\s31\\keepn\\sb90\\sa180\\f2\\fs#head1_halfpoint_size#\\ul{\n",
73 '/head1' => "\n}\\par}\n",
74 'head2' => "\n{\\pard\\li#rtfindent#\\s32\\keepn\\sb90\\sa180\\f2\\fs#head2_halfpoint_size#\\ul{\n",
75 '/head2' => "\n}\\par}\n",
76 'head3' => "\n{\\pard\\li#rtfindent#\\s33\\keepn\\sb90\\sa180\\f2\\fs#head3_halfpoint_size#\\ul{\n",
77 '/head3' => "\n}\\par}\n",
78 'head4' => "\n{\\pard\\li#rtfindent#\\s34\\keepn\\sb90\\sa180\\f2\\fs#head4_halfpoint_size#\\ul{\n",
79 '/head4' => "\n}\\par}\n",
80 # wordpad borks on \tc\tcl1, or I'd put that in =head1 and =head2
82 'item-bullet' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
83 '/item-bullet' => "\n\\par}\n",
84 'item-number' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
85 '/item-number' => "\n\\par}\n",
86 'item-text' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
87 '/item-text' => "\n\\par}\n",
89 # we don't need any styles for over-* and /over-*
93 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
95 my $new = shift->SUPER::new(@_);
98 $new->accept_targets( 'rtf', 'RTF' );
100 $new->{'Tagmap'} = {%Tagmap};
102 $new->accept_codes(@_to_accept);
103 $new->accept_codes('VerbatimFormatted');
104 DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n";
106 ( $ENV{'RTFDEFLANG'} || '') =~ m/^(\d{1,10})$/s ? $1
107 : ($ENV{'RTFDEFLANG'} || '') =~ m/^0?x([a-fA-F0-9]{1,10})$/s ? hex($1)
109 : ($ENV{'RTFDEFLANG'} || '') =~ m/^([a-fA-F0-9]{4})$/s ? hex($1)
110 # yes, tolerate even more hex!
114 $new->head1_halfpoint_size(32);
115 $new->head2_halfpoint_size(28);
116 $new->head3_halfpoint_size(25);
117 $new->head4_halfpoint_size(22);
118 $new->codeblock_halfpoint_size(18);
119 $new->header_halfpoint_size(17);
120 $new->normal_halfpoint_size(25);
125 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127 __PACKAGE__->_accessorize(
129 'head1_halfpoint_size',
130 'head2_halfpoint_size',
131 'head3_halfpoint_size',
132 'head4_halfpoint_size',
133 'codeblock_halfpoint_size',
134 'header_halfpoint_size',
135 'normal_halfpoint_size',
136 'no_proofing_exemptions',
140 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
143 return $self->do_middle if $self->bare_output;
145 $self->do_beginning && $self->do_middle && $self->do_end;
149 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
151 sub do_middle { # the main work
153 my $fh = $self->{'output_fh'};
155 my($token, $type, $tagname, $scratch);
158 $self->{'rtfindent'} = 0 unless defined $self->{'rtfindent'};
160 while($token = $self->get_token) {
162 if( ($type = $token->type) eq 'text' ) {
163 if( $self->{'rtfverbatim'} ) {
164 DEBUG > 1 and print " $type " , $token->text, " in verbatim!\n";
165 rtf_esc_codely($scratch = $token->text);
170 DEBUG > 1 and print " $type " , $token->text, "\n";
172 $scratch = $token->text;
173 $scratch =~ tr/\t\cb\cc/ /d;
175 $self->{'no_proofing_exemptions'} or $scratch =~
179 (?<=[\cm\cj\t "\[\<\(])
180 ) # start on whitespace, sequence-start, or quote
181 ( # something looking like a Perl token:
183 [\$\@\:\<\*\\_]\S+ # either starting with a sigil, etc.
186 # or starting alpha, but containing anything strange:
188 [a-zA-Z'\x80-\xFF]+[\$\@\:_<>\(\\\*]\S+
197 [^\cm\cj\n]{65} # Snare 65 characters from a line
198 [^\cm\cj\n\x20]{0,50} # and finish any current word
200 (\x20{1,10})(?![\cm\cj\n]) # capture some spaces not at line-end
201 /$1$2\n/gx # and put a NL before those spaces
203 # This may wrap at well past the 65th column, but not past the 120th.
207 } elsif( $type eq 'start' ) {
208 DEBUG > 1 and print " +$type ",$token->tagname,
209 " (", map("<$_> ", %{$token->attr_hash}), ")\n";
211 if( ($tagname = $token->tagname) eq 'Verbatim'
212 or $tagname eq 'VerbatimFormatted'
214 ++$self->{'rtfverbatim'};
215 my $next = $self->get_token;
216 next unless defined $next;
218 if($next->type eq 'text') {
219 my $t = $next->text_r;
220 while( $$t =~ m/$/mg ) {
221 last if ++$line_count > 15; # no point in counting further
223 DEBUG > 3 and print " verbatim line count: $line_count\n";
225 $self->unget_token($next);
226 $self->{'rtfkeep'} = ($line_count > 15) ? '' : '\keepn' ;
228 } elsif( $tagname =~ m/^item-/s ) {
230 my $text_count_here = 0;
231 $self->{'rtfitemkeepn'} = '';
232 # Some heuristics to stop item-*'s functioning as subheadings
233 # from getting split from the things they're subheadings for.
235 # It's not terribly pretty, but it really does make things pretty.
238 push @to_unget, $self->get_token;
239 pop(@to_unget), last unless defined $to_unget[-1];
240 # Erroneously used to be "unshift" instead of pop! Adds instead
241 # of removes, and operates on the beginning instead of the end!
243 if($to_unget[-1]->type eq 'text') {
244 if( ($text_count_here += length ${$to_unget[-1]->text_r}) > 150 ){
245 DEBUG > 1 and print " item-* is too long to be keepn'd.\n";
248 } elsif (@to_unget > 1 and
249 $to_unget[-2]->type eq 'end' and
250 $to_unget[-2]->tagname =~ m/^item-/s
252 # Bail out here, after setting rtfitemkeepn yea or nay.
253 $self->{'rtfitemkeepn'} = '\keepn' if
254 $to_unget[-1]->type eq 'start' and
255 $to_unget[-1]->tagname eq 'Para';
257 DEBUG > 1 and printf " item-* before %s(%s) %s keepn'd.\n",
259 $to_unget[-1]->can('tagname') ? $to_unget[-1]->tagname : '',
260 $self->{'rtfitemkeepn'} ? "gets" : "doesn't get";
262 } elsif (@to_unget > 40) {
263 DEBUG > 1 and print " item-* now has too many tokens (",
265 (DEBUG > 4) ? (q<: >, map($_->dump, @to_unget)) : (),
266 ") to be keepn'd.\n";
269 # else keep while'ing along
271 # Now put it aaaaall back...
272 $self->unget_token(@to_unget);
274 } elsif( $tagname =~ m/^over-/s ) {
277 int($token->attr('indent') * 4 * $self->normal_halfpoint_size);
278 DEBUG and print "Indenting over $indent_stack[-1] twips.\n";
279 $self->{'rtfindent'} += $indent_stack[-1];
281 } elsif ($tagname eq 'L') {
282 $tagname .= '=' . ($token->attr('type') || 'pod');
284 } elsif ($tagname eq 'Data') {
285 my $next = $self->get_token;
286 next unless defined $next;
287 unless( $next->type eq 'text' ) {
288 $self->unget_token($next);
291 DEBUG and print " raw text ", $next->text, "\n";
292 printf $fh "\n" . $next->text . "\n";
296 defined($scratch = $self->{'Tagmap'}{$tagname}) or next;
297 $scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate
300 if ($tagname eq 'item-number') {
301 print $fh $token->attr('number'), ". \n";
302 } elsif ($tagname eq 'item-bullet') {
303 print $fh "\\'95 \n";
304 #for funky testing: print $fh '', rtf_esc("\x{4E4B}\x{9053}");
307 } elsif( $type eq 'end' ) {
308 DEBUG > 1 and print " -$type ",$token->tagname,"\n";
309 if( ($tagname = $token->tagname) =~ m/^over-/s ) {
310 DEBUG and print "Indenting back $indent_stack[-1] twips.\n";
311 $self->{'rtfindent'} -= pop @indent_stack;
313 } elsif( $tagname eq 'Verbatim' or $tagname eq 'VerbatimFormatted') {
314 --$self->{'rtfverbatim'};
316 defined($scratch = $self->{'Tagmap'}{"/$tagname"}) or next;
317 $scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate
324 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
327 my $fh = $self->{'output_fh'};
328 return print $fh join '',
341 my $fh = $self->{'output_fh'};
342 return print $fh '}'; # that should do it
345 ###########################################################################
348 return sprintf <<'END',
351 {\*\cs10 \additive Default Paragraph Font;}
352 {\*\cs16 \additive \i \sbasedon10 pod-I;}
353 {\*\cs17 \additive \i\lang1024\noproof \sbasedon10 pod-F;}
354 {\*\cs18 \additive \b \sbasedon10 pod-B;}
355 {\*\cs19 \additive \f1\lang1024\noproof\sbasedon10 pod-C;}
356 {\s20\ql \li0\ri0\sa180\widctlpar\f1\fs%s\lang1024\noproof\sbasedon0 \snext0 pod-codeblock;}
357 {\*\cs21 \additive \lang1024\noproof \sbasedon10 pod-computerese;}
358 {\*\cs22 \additive \i\lang1024\noproof\sbasedon10 pod-L-pod;}
359 {\*\cs23 \additive \i\lang1024\noproof\sbasedon10 pod-L-url;}
360 {\*\cs24 \additive \i\lang1024\noproof\sbasedon10 pod-L-man;}
362 {\*\cs25 \additive \f1\lang1024\noproof\sbasedon0 pod-codelbock-plain;}
363 {\*\cs26 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-ital;}
364 {\*\cs27 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold;}
365 {\*\cs28 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold-ital;}
367 {\s31\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head1;}
368 {\s32\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head2;}
369 {\s33\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head3;}
370 {\s34\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head4;}
375 $_[0]->codeblock_halfpoint_size(),
376 $_[0]->head1_halfpoint_size(),
377 $_[0]->head2_halfpoint_size(),
378 $_[0]->head3_halfpoint_size(),
379 $_[0]->head4_halfpoint_size(),
383 ###########################################################################
384 # Override these as necessary for further customization
387 return <<'END'; # text font, code font, heading font
389 {\f0\froman Times New Roman;}
390 {\f1\fmodern Courier New;}
406 {\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
414 my $class = ref($self) || $self;
416 my $tag = __PACKAGE__ . ' ' . $VERSION;
418 unless($class eq __PACKAGE__) {
420 $tag = " v" . $self->VERSION . $tag if defined $self->VERSION;
421 $tag = $class . $tag;
424 return sprintf <<'END',
428 under Perl v%s at %s GMT}
429 {\author [see doc]}{\company [see doc]}{\operator [see doc]}
434 # None of the following things should need escaping, I dare say!
436 $ISA[0], $ISA[0]->VERSION(),
443 my $title = $self->get_short_title();
444 DEBUG and print "Short Title: <$title>\n";
445 $title .= ' ' if length $title;
450 # make sure it ends in a comma and a space, unless it's 0-length
452 my $is_obviously_module_name;
453 $is_obviously_module_name = 1
454 if $title =~ m/^\S+$/s and $title =~ m/::/s;
455 # catches the most common case, at least
457 DEBUG and print "Title0: <$title>\n";
458 $title = rtf_esc($title);
459 DEBUG and print "Title1: <$title>\n";
460 $title = '\lang1024\noproof ' . $title
461 if $is_obviously_module_name;
463 return sprintf <<'END',
464 \deflang%s\plain\lang%s\widowctrl
465 {\header\pard\qr\plain\f2\fs%s
471 ($self->doc_lang) x 2,
472 $self->header_halfpoint_size,
474 $self->normal_halfpoint_size,
478 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
479 #-------------------------------------------------------------------------
484 if(!defined wantarray) { # void context: alter in-place!
486 s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
487 s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
490 } elsif(wantarray) { # return an array
491 return map {; ($x = $_) =~
492 s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
493 $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
496 } else { # return a single scalar
497 ($x = ((@_ == 1) ? $_[0] : join '', @_)
498 ) =~ s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
499 # Escape \, {, }, -, control chars, and 7f-ff.
500 $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
506 # Doesn't change "-" to hard-hyphen, nor apply computerese style-smarts.
507 # We don't want to change the "-" to hard-hyphen, because we want to
508 # be able to paste this into a file and run it without there being
509 # dire screaming about the mysterious hard-hyphen character (which
510 # looks just like a normal dash character).
513 if(!defined wantarray) { # void context: alter in-place!
515 s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
516 s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
519 } elsif(wantarray) { # return an array
520 return map {; ($x = $_) =~
521 s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
522 $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
525 } else { # return a single scalar
526 ($x = ((@_ == 1) ? $_[0] : join '', @_)
527 ) =~ s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
528 # Escape \, {, }, -, control chars, and 7f-ff.
529 $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
535 map( (chr($_),chr($_)), # things not apparently needing escaping
537 map( (chr($_),sprintf("\\'%02x", $_)), # apparently escapeworthy things
538 0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46),
540 # We get to escape out 'F' so that we can send RTF files thru the mail
541 # without the slightest worry that paragraphs beginning with "From"
544 # And some refinements:
549 "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay)
550 "\f" => "\n\\page\n", # Formfeed
551 "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen
552 "\xA0" => "\\~", # Latin-1 non-breaking space
553 "\xAD" => "\\-", # Latin-1 soft (optional) hyphen
558 "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1
567 Pod::Simple::RTF -- format Pod as RTF
571 perl -MPod::Simple::RTF -e \
572 "exit Pod::Simple::RTF->filter(shift)->any_errata_seen" \
573 thingy.pod > thingy.rtf
577 This class is a formatter that takes Pod and renders it as RTF, good for
578 viewing/printing in MSWord, WordPad/write.exe, TextEdit, etc.
580 This is a subclass of L<Pod::Simple> and inherits all its methods.
582 =head1 FORMAT CONTROL ATTRIBUTES
584 You can set these attributes on the parser object before you
585 call C<parse_file> (or a similar method) on it:
589 =item $parser->head1_halfpoint_size( I<halfpoint_integer> );
591 =item $parser->head2_halfpoint_size( I<halfpoint_integer> );
593 =item $parser->head3_halfpoint_size( I<halfpoint_integer> );
595 =item $parser->head4_halfpoint_size( I<halfpoint_integer> );
597 These methods set the size (in half-points, like 52 for 26-point)
598 that these heading levels will appear as.
600 =item $parser->codeblock_halfpoint_size( I<halfpoint_integer> );
602 This method sets the size (in half-points, like 21 for 10.5-point)
603 that codeblocks ("verbatim sections") will appear as.
605 =item $parser->header_halfpoint_size( I<halfpoint_integer> );
607 This method sets the size (in half-points, like 15 for 7.5-point)
608 that the header on each page will appear in. The header
609 is usually just "I<modulename> p. I<pagenumber>".
611 =item $parser->normal_halfpoint_size( I<halfpoint_integer> );
613 This method sets the size (in half-points, like 26 for 13-point)
614 that normal paragraphic text will appear in.
616 =item $parser->no_proofing_exemptions( I<true_or_false> );
618 Set this value to true if you don't want the formatter to try
619 putting a hidden code on all Perl symbols (as best as it can
620 notice them) that labels them as being not in English, and
621 so not worth spellchecking.
623 =item $parser->doc_lang( I<microsoft_decimal_language_code> )
625 This sets the language code to tag this document as being in. By
626 default, it is currently the value of the environment variable
627 C<RTFDEFLANG>, or if that's not set, then the value
628 1033 (for US English).
630 Setting this appropriately is useful if you want to use the RTF
631 to spellcheck, and/or if you want it to hyphenate right.
633 Here are some notable values:
637 3081 Australia English
645 1044 Norwegian (Bokmal)
646 2068 Norwegian (Nynorsk)
650 If you are particularly interested in customizing this module's output
651 even more, see the source and/or write to me.
655 L<Pod::Simple>, L<RTF::Writer>, L<RTF::Cookbook>, L<RTF::Document>,
658 =head1 COPYRIGHT AND DISCLAIMERS
660 Copyright (c) 2002 Sean M. Burke. All rights reserved.
662 This library is free software; you can redistribute it and/or modify it
663 under the same terms as Perl itself.
665 This program is distributed in the hope that it will be useful, but
666 without any warranty; without even the implied warranty of
667 merchantability or fitness for a particular purpose.
671 Sean M. Burke C<sburke@cpan.org>