PodParser-1.093 update (from Brad Appleton's site)
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
664bb207 4# Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
66aff6dd 13$VERSION = 1.097; ## Current version of this package
360aca43 14require 5.004; ## requires this Perl version or later
15
e2c3adef 16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43 18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
29
360aca43 30=head1 OPTIONS/ARGUMENTS
31
32C<$filepath> is the input POD to read and C<$outputpath> is
33where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 34indicating a file-path, or else a reference to an open filehandle.
360aca43 35If unspecified, the input-file it defaults to C<\*STDIN>, and
36the output-file defaults to C<\*STDERR>.
37
e2c3adef 38=head2 podchecker()
39
40This function can take a hash of options:
e3237417 41
42=over 4
43
44=item B<-warnings> =E<gt> I<val>
45
46Turn warnings on/off. See L<"Warnings">.
47
48=back
360aca43 49
50=head1 DESCRIPTION
51
52B<podchecker> will perform syntax checking of Perl5 POD format documentation.
53
e2c3adef 54I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43 55
56It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef 57additional features they wish to see in B<Pod::Checker> and B<podchecker>
58and verify that the checks are consistent with L<perlpod>.
360aca43 59
e2c3adef 60The following checks are preformed:
e3237417 61
62=over 4
63
64=item *
65
e2c3adef 66Unknown '=xxxx' commands, unknown 'X<...>' interior-sequences,
67and unterminated interior sequences.
68
69=item *
70
71Check for proper balancing of C<=begin> and C<=end>. The contents of such
72a block are generally ignored, i.e. no syntax checks are performed.
e3237417 73
74=item *
75
76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
77
78=item *
79
e2c3adef 80Check for same nested interior-sequences (e.g.
81C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 82
83=item *
84
e2c3adef 85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417 86
87=item *
88
e2c3adef 89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
90for details.
e3237417 91
92=item *
93
e2c3adef 94Check for unresolved document-internal links. This check may also reveal
95misspelled links that seem to be internal links but should be links
96to something else.
e3237417 97
98=back
99
e2c3adef 100=head2 Additional Features
101
102While checking, this module collects document properties, e.g. the nodes
103for hyperlinks (C<=headX>, C<=item>). POD translators can use this feature
104to syntax-check and get the nodes in a first pass before actually starting
105to convert. This is expensive in terms of execution time, but allows for
106very robust conversions.
107
108=head1 DIAGNOSTICS
e3237417 109
e2c3adef 110=head2 Errors
e3237417 111
112=over 4
113
66aff6dd 114=item * empty =headn
115
116A heading (C<=head1> or C<=head2>) without any text? That ain't no
117heading!
118
e2c3adef 119=item * =over on line I<N> without closing =back
e3237417 120
e2c3adef 121The C<=over> command does not have a corresponding C<=back> before the
122next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 123
e2c3adef 124=item * =item without previous =over
e3237417 125
e2c3adef 126=item * =back without previous =over
e3237417 127
e2c3adef 128An C<=item> or C<=back> command has been found outside a
129C<=over>/C<=back> block.
e3237417 130
e2c3adef 131=item * No argument for =begin
e3237417 132
e2c3adef 133A C<=begin> command was found that is not followed by the formatter
134specification.
e3237417 135
e2c3adef 136=item * =end without =begin
e3237417 137
e2c3adef 138A standalone C<=end> command was found.
139
140=item * Nested =begin's
141
66aff6dd 142There were at least two consecutive C<=begin> commands without
e2c3adef 143the corresponding C<=end>. Only one C<=begin> may be active at
144a time.
145
146=item * =for without formatter specification
e3237417 147
e2c3adef 148There is no specification of the formatter after the C<=for> command.
149
150=item * unresolved internal link I<NAME>
151
152The given link to I<NAME> does not have a matching node in the current
153POD. This also happend when a single word node name is not enclosed in
154C<"">.
155
156=item * Unknown command "I<CMD>"
157
158An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
159C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, C<=for>, C<=pod>,
160C<=cut>
161
162=item * Unknown interior-sequence "I<SEQ>"
163
164An invalid markup command has been encountered. Valid are:
165C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
166C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
167C<ZE<lt>E<gt>>
168
169=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
170
171Two nested identical markup commands have been found. Generally this
172does not make sense.
173
174=item * garbled entity I<STRING>
175
66aff6dd 176The I<STRING> found cannot be interpreted as a character entity.
177
178=item * Entity number out of range
179
180An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 181
182=item * malformed link LE<lt>E<gt>
183
184The link found cannot be parsed because it does not conform to the
185syntax described in L<perlpod>.
e3237417 186
66aff6dd 187=item * nonempty ZE<lt>E<gt>
188
189The C<ZE<lt>E<gt>> sequence is supposed to be empty.
190
191=item * Spurious text after =pod / =cut
192
193The commands C<=pod> and C<=cut> do not take any arguments.
194
195=item * Spurious character(s) after =back
196
197The C<=back> command does not take any arguments.
198
e3237417 199=back
200
e2c3adef 201=head2 Warnings
e3237417 202
e2c3adef 203These may not necessarily cause trouble, but indicate mediocre style.
204
205=over 4
206
66aff6dd 207=item * multiple occurence of link target I<name>
208
209The POD file has some C<=item> and/or C<=head> commands that have
210the same text. Potential hyperlinks to such a text cannot be unique then.
211
212=item * line containing nothing but whitespace in paragraph
213
214There is some whitespace on a seemingly empty line. POD is very sensitive
215to such things, so this is flagged. B<vi> users switch on the B<list>
216option to avoid this problem.
217
218=item * file does not start with =head
219
220The file starts with a different POD directive than head.
221This is most probably something you do not want.
222
e2c3adef 223=item * No numeric argument for =over
224
225The C<=over> command is supposed to have a numeric argument (the
226indentation).
227
66aff6dd 228=item * previous =item has no contents
e2c3adef 229
66aff6dd 230There is a list C<=item> right above the flagged line that has no
231text contents. You probably want to delete empty items.
232
233=item * preceding non-item paragraph(s)
234
235A list introduced by C<=over> starts with a text or verbatim paragraph,
236but continues with C<=item>s. Move the non-item paragraph out of the
237C<=over>/C<=back> block.
238
239=item * =item type mismatch (I<one> vs. I<two>)
240
241A list started with e.g. a bulletted C<=item> and continued with a
242numbered one. This is obviously inconsistent. For most translators the
243type of the I<first> C<=item> determines the type of the list.
e2c3adef 244
245=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
246
247Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
248can potentially cause errors as they could be misinterpreted as
249markup commands.
250
66aff6dd 251=item * Unknown entity
e2c3adef 252
253A character entity was found that does not belong to the standard
66aff6dd 254ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 255
256=item * No items in =over
257
66aff6dd 258The list opened with C<=over> does not contain any items.
e2c3adef 259
260=item * No argument for =item
261
262C<=item> without any parameters is deprecated. It should either be followed
263by C<*> to indicate an unordered list, by a number (optionally followed
264by a dot) to indicate an ordered (numbered) list or simple text for a
265definition list.
266
66aff6dd 267=item * empty section in previous paragraph
268
269The previous section (introduced by a C<=head> command) does not contain
270any text. This usually indicates that something is missing. Note: A
271C<=head1> followed immediately by C<=head2> does not trigger this warning.
272
e2c3adef 273=item * Verbatim paragraph in NAME section
274
275The NAME section (C<=head1 NAME>) should consist of a single paragraph
276with the script/module name, followed by a dash `-' and a very short
277description of what the thing is good for.
278
279=item * Hyperlinks
280
281There are some warnings wrt. hyperlinks:
282Leading/trailing whitespace, newlines in hyperlinks,
283brackets C<()>.
284
285=back
e3237417 286
287=head1 RETURN VALUE
288
289B<podchecker> returns the number of POD syntax errors found or -1 if
290there were no POD commands at all found in the file.
291
360aca43 292=head1 EXAMPLES
293
294I<[T.B.D.]>
295
296=head1 AUTHOR
297
e3237417 298Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
299Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
360aca43 300
301Based on code for B<Pod::Text::pod2text()> written by
302Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
303
304=cut
305
306#############################################################################
307
308use strict;
309#use diagnostics;
310use Carp;
311use Exporter;
312use Pod::Parser;
313
314use vars qw(@ISA @EXPORT);
315@ISA = qw(Pod::Parser);
316@EXPORT = qw(&podchecker);
317
318use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
319
320my %VALID_COMMANDS = (
321 'pod' => 1,
322 'cut' => 1,
323 'head1' => 1,
324 'head2' => 1,
325 'over' => 1,
326 'back' => 1,
327 'item' => 1,
328 'for' => 1,
329 'begin' => 1,
330 'end' => 1,
331);
332
333my %VALID_SEQUENCES = (
334 'I' => 1,
335 'B' => 1,
336 'S' => 1,
337 'C' => 1,
338 'L' => 1,
339 'F' => 1,
340 'X' => 1,
341 'Z' => 1,
342 'E' => 1,
343);
344
e2c3adef 345# stolen from HTML::Entities
346my %ENTITIES = (
347 # Some normal chars that have special meaning in SGML context
348 amp => '&', # ampersand
349'gt' => '>', # greater than
350'lt' => '<', # less than
351 quot => '"', # double quote
352
353 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
354 AElig => 'Æ', # capital AE diphthong (ligature)
355 Aacute => 'Á', # capital A, acute accent
356 Acirc => 'Â', # capital A, circumflex accent
357 Agrave => 'À', # capital A, grave accent
358 Aring => 'Å', # capital A, ring
359 Atilde => 'Ã', # capital A, tilde
360 Auml => 'Ä', # capital A, dieresis or umlaut mark
361 Ccedil => 'Ç', # capital C, cedilla
362 ETH => 'Ð', # capital Eth, Icelandic
363 Eacute => 'É', # capital E, acute accent
364 Ecirc => 'Ê', # capital E, circumflex accent
365 Egrave => 'È', # capital E, grave accent
366 Euml => 'Ë', # capital E, dieresis or umlaut mark
367 Iacute => 'Í', # capital I, acute accent
368 Icirc => 'Î', # capital I, circumflex accent
369 Igrave => 'Ì', # capital I, grave accent
370 Iuml => 'Ï', # capital I, dieresis or umlaut mark
371 Ntilde => 'Ñ', # capital N, tilde
372 Oacute => 'Ó', # capital O, acute accent
373 Ocirc => 'Ô', # capital O, circumflex accent
374 Ograve => 'Ò', # capital O, grave accent
375 Oslash => 'Ø', # capital O, slash
376 Otilde => 'Õ', # capital O, tilde
377 Ouml => 'Ö', # capital O, dieresis or umlaut mark
378 THORN => 'Þ', # capital THORN, Icelandic
379 Uacute => 'Ú', # capital U, acute accent
380 Ucirc => 'Û', # capital U, circumflex accent
381 Ugrave => 'Ù', # capital U, grave accent
382 Uuml => 'Ü', # capital U, dieresis or umlaut mark
383 Yacute => 'Ý', # capital Y, acute accent
384 aacute => 'á', # small a, acute accent
385 acirc => 'â', # small a, circumflex accent
386 aelig => 'æ', # small ae diphthong (ligature)
387 agrave => 'à', # small a, grave accent
388 aring => 'å', # small a, ring
389 atilde => 'ã', # small a, tilde
390 auml => 'ä', # small a, dieresis or umlaut mark
391 ccedil => 'ç', # small c, cedilla
392 eacute => 'é', # small e, acute accent
393 ecirc => 'ê', # small e, circumflex accent
394 egrave => 'è', # small e, grave accent
395 eth => 'ð', # small eth, Icelandic
396 euml => 'ë', # small e, dieresis or umlaut mark
397 iacute => 'í', # small i, acute accent
398 icirc => 'î', # small i, circumflex accent
399 igrave => 'ì', # small i, grave accent
400 iuml => 'ï', # small i, dieresis or umlaut mark
401 ntilde => 'ñ', # small n, tilde
402 oacute => 'ó', # small o, acute accent
403 ocirc => 'ô', # small o, circumflex accent
404 ograve => 'ò', # small o, grave accent
405 oslash => 'ø', # small o, slash
406 otilde => 'õ', # small o, tilde
407 ouml => 'ö', # small o, dieresis or umlaut mark
408 szlig => 'ß', # small sharp s, German (sz ligature)
409 thorn => 'þ', # small thorn, Icelandic
410 uacute => 'ú', # small u, acute accent
411 ucirc => 'û', # small u, circumflex accent
412 ugrave => 'ù', # small u, grave accent
413 uuml => 'ü', # small u, dieresis or umlaut mark
414 yacute => 'ý', # small y, acute accent
415 yuml => 'ÿ', # small y, dieresis or umlaut mark
416
417 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
418 copy => '©', # copyright sign
419 reg => '®', # registered sign
420 nbsp => "\240", # non breaking space
421
422 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
423 iexcl => '¡',
424 cent => '¢',
425 pound => '£',
426 curren => '¤',
427 yen => '¥',
428 brvbar => '¦',
429 sect => '§',
430 uml => '¨',
431 ordf => 'ª',
432 laquo => '«',
433'not' => '¬', # not is a keyword in perl
434 shy => '­',
435 macr => '¯',
436 deg => '°',
437 plusmn => '±',
438 sup1 => '¹',
439 sup2 => '²',
440 sup3 => '³',
441 acute => '´',
442 micro => 'µ',
443 para => '¶',
444 middot => '·',
445 cedil => '¸',
446 ordm => 'º',
447 raquo => '»',
448 frac14 => '¼',
449 frac12 => '½',
450 frac34 => '¾',
451 iquest => '¿',
452'times' => '×', # times is a keyword in perl
453 divide => '÷',
66aff6dd 454
455# some POD special entities
456 verbar => '|',
457 sol => '/'
e2c3adef 458);
459
360aca43 460##---------------------------------------------------------------------------
461
462##---------------------------------
463## Function definitions begin here
464##---------------------------------
465
e3237417 466sub podchecker( $ ; $ % ) {
467 my ($infile, $outfile, %options) = @_;
360aca43 468 local $_;
469
470 ## Set defaults
471 $infile ||= \*STDIN;
472 $outfile ||= \*STDERR;
473
474 ## Now create a pod checker
e3237417 475 my $checker = new Pod::Checker(%options);
66aff6dd 476 $checker->parseopts(-process_cut_cmd => 1);
360aca43 477
478 ## Now check the pod document for errors
479 $checker->parse_from_file($infile, $outfile);
480
481 ## Return the number of errors found
482 return $checker->num_errors();
483}
484
485##---------------------------------------------------------------------------
486
487##-------------------------------
488## Method definitions begin here
489##-------------------------------
490
66aff6dd 491## sub new {
492## my $this = shift;
493## my $class = ref($this) || $this;
494## my %params = @_;
495## my $self = {%params};
496## bless $self, $class;
497## $self->initialize();
498## return $self;
499## }
360aca43 500
501sub initialize {
502 my $self = shift;
664bb207 503 ## Initialize number of errors, and setup an error function to
504 ## increment this number and then print to the designated output.
505 $self->{_NUM_ERRORS} = 0;
e2c3adef 506 $self->errorsub('poderror'); # set the error handling subroutine
e3237417 507 $self->{_commands} = 0; # total number of POD commands encountered
508 $self->{_list_stack} = []; # stack for nested lists
509 $self->{_have_begin} = ''; # stores =begin
510 $self->{_links} = []; # stack for internal hyperlinks
511 $self->{_nodes} = []; # stack for =head/=item nodes
e2c3adef 512 # print warnings?
e3237417 513 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 514 $self->{_current_head1} = ''; # the current =head1 block
664bb207 515}
516
e2c3adef 517# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 518sub poderror {
519 my $self = shift;
520 my %opts = (ref $_[0]) ? %{shift()} : ();
521
522 ## Retrieve options
523 chomp( my $msg = ($opts{-msg} || "")."@_" );
524 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
525 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd 526 unless (exists $opts{-severity}) {
527 ## See if can find severity in message prefix
528 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
529 }
664bb207 530 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
531
e3237417 532 ## Increment error count and print message "
533 ++($self->{_NUM_ERRORS})
534 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
664bb207 535 my $out_fh = $self->output_handle();
e2c3adef 536 print $out_fh ($severity, $msg, $line, $file, "\n")
537 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43 538}
539
e2c3adef 540# set/retrieve the number of errors found
360aca43 541sub num_errors {
542 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
543}
544
e2c3adef 545# set and/or retrieve canonical name of POD
546sub name {
547 return (@_ > 1 && $_[1]) ?
548 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
549}
550
551# set/return nodes of the current POD
552sub node {
553 my ($self,$text) = @_;
554 if(defined $text) {
66aff6dd 555 $text =~ s/\s+$//s; # strip trailing whitespace
556 $text =~ s/\s+/ /gs; # collapse whitespace
557 # add node, order important!
e2c3adef 558 push(@{$self->{_nodes}}, $text);
66aff6dd 559 # keep also a uniqueness counter
560 $self->{_unique_nodes}->{$text}++;
e2c3adef 561 return $text;
562 }
563 @{$self->{_nodes}};
564}
565
566# set/return hyperlinks of the current POD
567sub hyperlink {
568 my $self = shift;
569 if($_[0]) {
570 push(@{$self->{_links}}, $_[0]);
571 return $_[0];
572 }
573 @{$self->{_links}};
574}
575
e3237417 576## overrides for Pod::Parser
577
360aca43 578sub end_pod {
66aff6dd 579 ## Do some final checks and
580 ## print the number of errors found
581 my $self = shift;
582 my $infile = $self->input_file();
583 my $out_fh = $self->output_handle();
584
585 if(@{$self->{_list_stack}}) {
586 # _TODO_ display, but don't count them for now
587 my $list;
588 while(($list = $self->_close_list('EOF',$infile)) &&
589 $list->indent() ne 'auto') {
590 $self->poderror({ -line => 'EOF', -file => $infile,
591 -severity => 'ERROR', -msg => "=over on line " .
592 $list->start() . " without closing =back" }); #"
593 }
594 }
595
596 # check validity of document internal hyperlinks
597 # first build the node names from the paragraph text
598 my %nodes;
599 foreach($self->node()) {
600 $nodes{$_} = 1;
601 if(/^(\S+)\s+/) {
602 # we have more than one word. Use the first as a node, too.
603 # This is used heavily in perlfunc.pod
604 $nodes{$1} ||= 2; # derived node
605 }
606 }
607 foreach($self->hyperlink()) {
608 my $line = '';
609 s/^(\d+):// && ($line = $1);
610 if($_ && !$nodes{$_}) {
611 $self->poderror({ -line => $line, -file => $infile,
612 -severity => 'ERROR',
613 -msg => "unresolved internal link '$_'"});
614 }
615 }
616 foreach(grep($self->{_unique_nodes}->{$_} > 1,
617 keys %{$self->{_unique_nodes}})) {
618 $self->poderror({ -line => '-', -file => $infile,
619 -severity => 'WARNING',
620 -msg => "multiple occurence of link target '$_'"});
621 }
622
623 ## Print the number of errors found
624 my $num_errors = $self->num_errors();
625 if ($num_errors > 0) {
626 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 627 ($num_errors == 1) ? "error" : "errors");
66aff6dd 628 }
629 elsif($self->{_commands} == 0) {
630 print $out_fh "$infile does not contain any pod commands.\n";
631 $self->num_errors(-1);
632 }
633 else {
634 print $out_fh "$infile pod syntax OK.\n";
635 }
360aca43 636}
637
e2c3adef 638# check a POD command directive
360aca43 639sub command {
664bb207 640 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 641 my ($file, $line) = $pod_para->file_line;
360aca43 642 ## Check the command syntax
e3237417 643 my $arg; # this will hold the command argument
664bb207 644 if (! $VALID_COMMANDS{$cmd}) {
645 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 646 -msg => "Unknown command '$cmd'" });
360aca43 647 }
648 else {
66aff6dd 649 # found a valid command
650 if(!$self->{_commands}++ && $cmd !~ /^head/) {
651 $self->poderror({ -line => $line, -file => $file,
652 -severity => 'WARNING',
653 -msg => "file does not start with =head" });
654 }
e3237417 655 ## check syntax of particular command
656 if($cmd eq 'over') {
e2c3adef 657 # check for argument
658 $arg = $self->interpolate_and_check($paragraph, $line,$file);
659 my $indent = 4; # default
660 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
661 $indent = $1;
662 } else {
663 $self->poderror({ -line => $line, -file => $file,
664 -severity => 'WARNING',
665 -msg => "No numeric argument for =over"});
666 }
e3237417 667 # start a new list
66aff6dd 668 $self->_open_list($indent,$line,$file);
e3237417 669 }
670 elsif($cmd eq 'item') {
e2c3adef 671 # are we in a list?
e3237417 672 unless(@{$self->{_list_stack}}) {
673 $self->poderror({ -line => $line, -file => $file,
674 -severity => 'ERROR',
675 -msg => "=item without previous =over" });
e2c3adef 676 # auto-open in case we encounter many more
66aff6dd 677 $self->_open_list('auto',$line,$file);
678 }
679 my $list = $self->{_list_stack}->[0];
680 # check whether the previous item had some contents
681 if(defined $self->{_list_item_contents} &&
682 $self->{_list_item_contents} == 0) {
683 $self->poderror({ -line => $line, -file => $file,
684 -severity => 'WARNING',
685 -msg => "previous =item has no contents" });
686 }
687 if($list->{_has_par}) {
688 $self->poderror({ -line => $line, -file => $file,
689 -severity => 'WARNING',
690 -msg => "preceding non-item paragraph(s)" });
691 delete $list->{_has_par};
e3237417 692 }
e2c3adef 693 # check for argument
694 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 695 if($arg && $arg =~ /(\S+)/) {
696 $arg =~ s/[\s\n]+$//;
697 my $type;
698 if($arg =~ /^[*]\s*(\S*.*)/) {
699 $type = 'bullet';
700 $self->{_list_item_contents} = $1 ? 1 : 0;
701 $arg = $1;
702 }
703 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
704 $type = 'number';
705 $self->{_list_item_contents} = $1 ? 1 : 0;
706 $arg = $1;
707 }
708 else {
709 $type = 'definition';
710 $self->{_list_item_contents} = 1;
711 }
712 my $first = $list->type();
713 if($first && $first ne $type) {
714 $self->poderror({ -line => $line, -file => $file,
715 -severity => 'WARNING',
716 -msg => "=item type mismatch ('$first' vs. '$type')"});
717 }
718 else { # first item
719 $list->type($type);
720 }
721 }
722 else {
e2c3adef 723 $self->poderror({ -line => $line, -file => $file,
724 -severity => 'WARNING',
725 -msg => "No argument for =item" });
726 $arg = ' '; # empty
66aff6dd 727 $self->{_list_item_contents} = 0;
e3237417 728 }
e2c3adef 729 # add this item
66aff6dd 730 $list->item($arg);
e2c3adef 731 # remember this node
732 $self->node($arg);
e3237417 733 }
734 elsif($cmd eq 'back') {
735 # check if we have an open list
736 unless(@{$self->{_list_stack}}) {
737 $self->poderror({ -line => $line, -file => $file,
738 -severity => 'ERROR',
739 -msg => "=back without previous =over" });
740 }
741 else {
742 # check for spurious characters
e2c3adef 743 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 744 if($arg && $arg =~ /\S/) {
745 $self->poderror({ -line => $line, -file => $file,
66aff6dd 746 -severity => 'ERROR',
e3237417 747 -msg => "Spurious character(s) after =back" });
748 }
749 # close list
66aff6dd 750 my $list = $self->_close_list($line,$file);
e3237417 751 # check for empty lists
752 if(!$list->item() && $self->{-warnings}) {
753 $self->poderror({ -line => $line, -file => $file,
754 -severity => 'WARNING',
755 -msg => "No items in =over (at line " .
756 $list->start() . ") / =back list"}); #"
757 }
758 }
759 }
66aff6dd 760 elsif($cmd =~ /^head(\d+)/) {
761 if(defined $self->{_commands_in_head} &&
762 $self->{_commands_in_head} == 0 &&
763 defined $self->{_last_head} &&
764 $self->{_last_head} >= $1) {
765 $self->poderror({ -line => $line, -file => $file,
766 -severity => 'WARNING',
767 -msg => "empty section in previous paragraph"});
768 }
769 $self->{_commands_in_head} = -1;
770 $self->{_last_head} = $1;
e3237417 771 # check if there is an open list
772 if(@{$self->{_list_stack}}) {
773 my $list;
66aff6dd 774 while(($list = $self->_close_list($line,$file)) &&
775 $list->indent() ne 'auto') {
e3237417 776 $self->poderror({ -line => $line, -file => $file,
777 -severity => 'ERROR',
e2c3adef 778 -msg => "=over on line ". $list->start() .
779 " without closing =back (at $cmd)" });
e3237417 780 }
781 }
782 # remember this node
e2c3adef 783 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 784 $arg =~ s/[\s\n]+$//s;
785 $self->node($arg);
786 unless(length($arg)) {
787 $self->poderror({ -line => $line, -file => $file,
788 -severity => 'ERROR',
789 -msg => "empty =$cmd"});
790 }
e2c3adef 791 if($cmd eq 'head1') {
e2c3adef 792 $self->{_current_head1} = $arg;
793 } else {
794 $self->{_current_head1} = '';
795 }
e3237417 796 }
797 elsif($cmd eq 'begin') {
798 if($self->{_have_begin}) {
799 # already have a begin
800 $self->poderror({ -line => $line, -file => $file,
801 -severity => 'ERROR',
802 -msg => "Nested =begin's (first at line " .
803 $self->{_have_begin} . ")"});
804 }
805 else {
806 # check for argument
e2c3adef 807 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 808 unless($arg && $arg =~ /(\S+)/) {
809 $self->poderror({ -line => $line, -file => $file,
e2c3adef 810 -severity => 'ERROR',
e3237417 811 -msg => "No argument for =begin"});
812 }
813 # remember the =begin
814 $self->{_have_begin} = "$line:$1";
815 }
816 }
817 elsif($cmd eq 'end') {
818 if($self->{_have_begin}) {
819 # close the existing =begin
820 $self->{_have_begin} = '';
821 # check for spurious characters
e2c3adef 822 $arg = $self->interpolate_and_check($paragraph, $line,$file);
823 # the closing argument is optional
824 #if($arg && $arg =~ /\S/) {
825 # $self->poderror({ -line => $line, -file => $file,
826 # -severity => 'WARNING',
827 # -msg => "Spurious character(s) after =end" });
828 #}
e3237417 829 }
830 else {
831 # don't have a matching =begin
832 $self->poderror({ -line => $line, -file => $file,
e2c3adef 833 -severity => 'ERROR',
e3237417 834 -msg => "=end without =begin" });
835 }
836 }
e2c3adef 837 elsif($cmd eq 'for') {
838 unless($paragraph =~ /\s*(\S+)\s*/) {
839 $self->poderror({ -line => $line, -file => $file,
840 -severity => 'ERROR',
841 -msg => "=for without formatter specification" });
842 }
843 $arg = ''; # do not expand paragraph below
844 }
66aff6dd 845 elsif($cmd =~ /^(pod|cut)$/) {
846 # check for argument
847 $arg = $self->interpolate_and_check($paragraph, $line,$file);
848 if($arg && $arg =~ /(\S+)/) {
849 $self->poderror({ -line => $line, -file => $file,
850 -severity => 'ERROR',
851 -msg => "Spurious text after =$cmd"});
852 }
853 }
854 $self->{_commands_in_head}++;
e3237417 855 ## Check the interior sequences in the command-text
e2c3adef 856 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 857 unless(defined $arg);
e2c3adef 858 }
360aca43 859}
860
66aff6dd 861sub _open_list
862{
863 my ($self,$indent,$line,$file) = @_;
864 my $list = Pod::List->new(
865 -indent => $indent,
866 -start => $line,
867 -file => $file);
868 unshift(@{$self->{_list_stack}}, $list);
869 undef $self->{_list_item_contents};
870 $list;
871}
872
873sub _close_list
874{
875 my ($self,$line,$file) = @_;
876 my $list = shift(@{$self->{_list_stack}});
877 if(defined $self->{_list_item_contents} &&
878 $self->{_list_item_contents} == 0) {
879 $self->poderror({ -line => $line, -file => $file,
880 -severity => 'WARNING',
881 -msg => "previous =item has no contents" });
882 }
883 undef $self->{_list_item_contents};
884 $list;
885}
886
e2c3adef 887# process a block of some text
888sub interpolate_and_check {
e3237417 889 my ($self, $paragraph, $line, $file) = @_;
890 ## Check the interior sequences in the command-text
891 # and return the text
892 $self->_check_ptree(
893 $self->parse_text($paragraph,$line), $line, $file, '');
894}
895
896sub _check_ptree {
897 my ($self,$ptree,$line,$file,$nestlist) = @_;
898 local($_);
899 my $text = '';
900 # process each node in the parse tree
901 foreach(@$ptree) {
902 # regular text chunk
903 unless(ref) {
904 my $count;
905 # count the unescaped angle brackets
906 my $i = $_;
e2c3adef 907 if($count = $i =~ tr/<>/<>/) {
e3237417 908 $self->poderror({ -line => $line, -file => $file,
909 -severity => 'WARNING',
e2c3adef 910 -msg => "$count unescaped <> in paragraph" })
911 if($self->{-warnings});
e3237417 912 }
913 $text .= $i;
914 next;
915 }
916 # have an interior sequence
917 my $cmd = $_->cmd_name();
918 my $contents = $_->parse_tree();
919 ($file,$line) = $_->file_line();
920 # check for valid tag
921 if (! $VALID_SEQUENCES{$cmd}) {
922 $self->poderror({ -line => $line, -file => $file,
923 -severity => 'ERROR',
66aff6dd 924 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 925 # expand it anyway
926 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
927 next;
928 }
929 if($nestlist =~ /$cmd/) {
930 $self->poderror({ -line => $line, -file => $file,
931 -severity => 'ERROR',
932 -msg => "nested commands $cmd<...$cmd<...>...>"});
933 # _TODO_ should we add the contents anyway?
934 # expand it anyway, see below
935 }
936 if($cmd eq 'E') {
937 # preserve entities
938 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
939 $self->poderror({ -line => $line, -file => $file,
940 -severity => 'ERROR',
941 -msg => "garbled entity " . $_->raw_text()});
942 next;
943 }
e2c3adef 944 my $ent = $$contents[0];
66aff6dd 945 my $val;
946 if($ent =~ /^0x[0-9a-f]+$/i) {
947 # hexadec entity
948 $val = hex($ent);
949 }
950 elsif($ent =~ /^0\d+$/) {
951 # octal
952 $val = oct($ent);
953 }
954 elsif($ent =~ /^\d+$/) {
e2c3adef 955 # numeric entity
66aff6dd 956 $val = $ent;
957 }
958 if(defined $val) {
959 if($val>0 && $val<256) {
960 $text .= chr($val);
961 }
962 else {
963 $self->poderror({ -line => $line, -file => $file,
964 -severity => 'ERROR',
965 -msg => "Entity number out of range " . $_->raw_text()});
966 }
e2c3adef 967 }
968 elsif($ENTITIES{$ent}) {
969 # known ISO entity
970 $text .= $ENTITIES{$ent};
971 }
972 else {
973 $self->poderror({ -line => $line, -file => $file,
974 -severity => 'WARNING',
66aff6dd 975 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef 976 $text .= "E<$ent>";
977 }
e3237417 978 }
979 elsif($cmd eq 'L') {
980 # try to parse the hyperlink
981 my $link = Pod::Hyperlink->new($contents->raw_text());
982 unless(defined $link) {
983 $self->poderror({ -line => $line, -file => $file,
984 -severity => 'ERROR',
e2c3adef 985 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417 986 next;
987 }
988 $link->line($line); # remember line
989 if($self->{-warnings}) {
990 foreach my $w ($link->warning()) {
991 $self->poderror({ -line => $line, -file => $file,
992 -severity => 'WARNING',
993 -msg => $w });
994 }
995 }
996 # check the link text
997 $text .= $self->_check_ptree($self->parse_text($link->text(),
998 $line), $line, $file, "$nestlist$cmd");
999 my $node = '';
e2c3adef 1000 # remember internal link
e3237417 1001 # _TODO_ what if there is a link to the page itself by the name,
e2c3adef 1002 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
1003 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
1004 $node = $self->_check_ptree($self->parse_text($link->node(),
1005 $line), $line, $file, "$nestlist$cmd");
1006 $self->hyperlink("$line:$node") if($node);
1007 }
e3237417 1008 }
1009 elsif($cmd =~ /[BCFIS]/) {
1010 # add the guts
1011 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1012 }
66aff6dd 1013 elsif($cmd eq 'Z') {
1014 if(length($contents->raw_text())) {
1015 $self->poderror({ -line => $line, -file => $file,
1016 -severity => 'ERROR',
1017 -msg => "Nonempty Z<>"});
1018 }
1019 }
1020 else { # X<>
1021 # check, but add nothing to $text
e3237417 1022 $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1023 }
1024 }
1025 $text;
1026}
1027
e2c3adef 1028# _TODO_ overloadable methods for BC..Z<...> expansion?
e3237417 1029
e2c3adef 1030# process a block of verbatim text
360aca43 1031sub verbatim {
66aff6dd 1032 ## Nothing particular to check
e2c3adef 1033 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1034
1035 $self->_preproc_par($paragraph);
1036
e2c3adef 1037 if($self->{_current_head1} eq 'NAME') {
1038 my ($file, $line) = $pod_para->file_line;
1039 $self->poderror({ -line => $line, -file => $file,
1040 -severity => 'WARNING',
1041 -msg => 'Verbatim paragraph in NAME section' });
1042 }
360aca43 1043}
1044
e2c3adef 1045# process a block of regular text
360aca43 1046sub textblock {
1047 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1048 my ($file, $line) = $pod_para->file_line;
e3237417 1049
66aff6dd 1050 $self->_preproc_par($paragraph);
1051
e2c3adef 1052 # skip this paragraph if in a =begin block
1053 unless($self->{_have_begin}) {
1054 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1055 if($self->{_current_head1} eq 'NAME') {
1056 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1057 # this is the canonical name
1058 $self->{-name} = $1 unless(defined $self->{-name});
1059 }
e3237417 1060 }
1061 }
e3237417 1062}
1063
66aff6dd 1064sub _preproc_par
1065{
1066 my $self = shift;
1067 $_[0] =~ s/[\s\n]+$//;
1068 if($_[0]) {
1069 $self->{_commands_in_head}++;
1070 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1071 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1072 $self->{_list_stack}->[0]->{_has_par} = 1;
1073 }
1074 }
1075}
1076
e3237417 10771;
66aff6dd 1078