Darwin is not Windows.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Checker.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
92e3d63a 13$VERSION = 1.2; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef 16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43 18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43 31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43 36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef 39=head2 podchecker()
40
41This function can take a hash of options:
e3237417 42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a 47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417 49
50=back
360aca43 51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
e2c3adef 56I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43 57
58It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef 59additional features they wish to see in B<Pod::Checker> and B<podchecker>
60and verify that the checks are consistent with L<perlpod>.
360aca43 61
48f30392 62The following checks are currently preformed:
e3237417 63
64=over 4
65
66=item *
67
48f30392 68Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef 69and unterminated interior sequences.
70
71=item *
72
73Check for proper balancing of C<=begin> and C<=end>. The contents of such
74a block are generally ignored, i.e. no syntax checks are performed.
e3237417 75
76=item *
77
78Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
79
80=item *
81
e2c3adef 82Check for same nested interior-sequences (e.g.
83C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417 84
85=item *
86
e2c3adef 87Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417 88
89=item *
90
e2c3adef 91Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
92for details.
e3237417 93
94=item *
95
e2c3adef 96Check for unresolved document-internal links. This check may also reveal
97misspelled links that seem to be internal links but should be links
98to something else.
e3237417 99
100=back
101
e2c3adef 102=head1 DIAGNOSTICS
e3237417 103
e2c3adef 104=head2 Errors
e3237417 105
106=over 4
107
66aff6dd 108=item * empty =headn
109
110A heading (C<=head1> or C<=head2>) without any text? That ain't no
111heading!
112
e2c3adef 113=item * =over on line I<N> without closing =back
e3237417 114
e2c3adef 115The C<=over> command does not have a corresponding C<=back> before the
116next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 117
e2c3adef 118=item * =item without previous =over
e3237417 119
e2c3adef 120=item * =back without previous =over
e3237417 121
e2c3adef 122An C<=item> or C<=back> command has been found outside a
123C<=over>/C<=back> block.
e3237417 124
e2c3adef 125=item * No argument for =begin
e3237417 126
e2c3adef 127A C<=begin> command was found that is not followed by the formatter
128specification.
e3237417 129
e2c3adef 130=item * =end without =begin
e3237417 131
e2c3adef 132A standalone C<=end> command was found.
133
134=item * Nested =begin's
135
66aff6dd 136There were at least two consecutive C<=begin> commands without
e2c3adef 137the corresponding C<=end>. Only one C<=begin> may be active at
138a time.
139
140=item * =for without formatter specification
e3237417 141
e2c3adef 142There is no specification of the formatter after the C<=for> command.
143
144=item * unresolved internal link I<NAME>
145
146The given link to I<NAME> does not have a matching node in the current
147POD. This also happend when a single word node name is not enclosed in
148C<"">.
149
150=item * Unknown command "I<CMD>"
151
152An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d 153C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
154C<=for>, C<=pod>, C<=cut>
e2c3adef 155
156=item * Unknown interior-sequence "I<SEQ>"
157
158An invalid markup command has been encountered. Valid are:
159C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
160C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
161C<ZE<lt>E<gt>>
162
163=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
164
165Two nested identical markup commands have been found. Generally this
166does not make sense.
167
168=item * garbled entity I<STRING>
169
66aff6dd 170The I<STRING> found cannot be interpreted as a character entity.
171
172=item * Entity number out of range
173
174An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef 175
176=item * malformed link LE<lt>E<gt>
177
178The link found cannot be parsed because it does not conform to the
179syntax described in L<perlpod>.
e3237417 180
66aff6dd 181=item * nonempty ZE<lt>E<gt>
182
183The C<ZE<lt>E<gt>> sequence is supposed to be empty.
184
48f30392 185=item * empty XE<lt>E<gt>
186
187The index entry specified contains nothing but whitespace.
188
66aff6dd 189=item * Spurious text after =pod / =cut
190
191The commands C<=pod> and C<=cut> do not take any arguments.
192
193=item * Spurious character(s) after =back
194
195The C<=back> command does not take any arguments.
196
e3237417 197=back
198
e2c3adef 199=head2 Warnings
e3237417 200
e2c3adef 201These may not necessarily cause trouble, but indicate mediocre style.
202
203=over 4
204
051b35bf 205=item * multiple occurence of link target I<name>
66aff6dd 206
207The POD file has some C<=item> and/or C<=head> commands that have
208the same text. Potential hyperlinks to such a text cannot be unique then.
209
210=item * line containing nothing but whitespace in paragraph
211
212There is some whitespace on a seemingly empty line. POD is very sensitive
213to such things, so this is flagged. B<vi> users switch on the B<list>
214option to avoid this problem.
215
92e3d63a 216=begin _disabled_
217
66aff6dd 218=item * file does not start with =head
219
220The file starts with a different POD directive than head.
221This is most probably something you do not want.
222
92e3d63a 223=end _disabled_
224
e2c3adef 225=item * No numeric argument for =over
226
227The C<=over> command is supposed to have a numeric argument (the
228indentation).
229
66aff6dd 230=item * previous =item has no contents
e2c3adef 231
66aff6dd 232There is a list C<=item> right above the flagged line that has no
233text contents. You probably want to delete empty items.
234
235=item * preceding non-item paragraph(s)
236
237A list introduced by C<=over> starts with a text or verbatim paragraph,
238but continues with C<=item>s. Move the non-item paragraph out of the
239C<=over>/C<=back> block.
240
241=item * =item type mismatch (I<one> vs. I<two>)
242
243A list started with e.g. a bulletted C<=item> and continued with a
244numbered one. This is obviously inconsistent. For most translators the
245type of the I<first> C<=item> determines the type of the list.
e2c3adef 246
247=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
248
249Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
250can potentially cause errors as they could be misinterpreted as
92e3d63a 251markup commands. This is only printed when the -warnings level is
252greater than 1.
e2c3adef 253
66aff6dd 254=item * Unknown entity
e2c3adef 255
256A character entity was found that does not belong to the standard
66aff6dd 257ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef 258
259=item * No items in =over
260
66aff6dd 261The list opened with C<=over> does not contain any items.
e2c3adef 262
263=item * No argument for =item
264
265C<=item> without any parameters is deprecated. It should either be followed
266by C<*> to indicate an unordered list, by a number (optionally followed
267by a dot) to indicate an ordered (numbered) list or simple text for a
268definition list.
269
66aff6dd 270=item * empty section in previous paragraph
271
272The previous section (introduced by a C<=head> command) does not contain
273any text. This usually indicates that something is missing. Note: A
274C<=head1> followed immediately by C<=head2> does not trigger this warning.
275
e2c3adef 276=item * Verbatim paragraph in NAME section
277
278The NAME section (C<=head1 NAME>) should consist of a single paragraph
279with the script/module name, followed by a dash `-' and a very short
280description of what the thing is good for.
281
92e3d63a 282=back
283
284=head2 Hyperlinks
285
286There are some warnings wrt. malformed hyperlinks.
287
288=over 4
289
290=item * collapsing newlines to blanks
291
292A hyperlink LE<lt>...E<gt> spans more than one line. This may indicate
293and error.
e2c3adef 294
92e3d63a 295=item * ignoring leading/trailing whitespace in link
296
297There is whitespace at the beginning or the end of the contents of
298LE<lt>...E<gt>.
299
300=item * (section) in '$page' deprecated
301
302There is a section detected in the page name of LE<lt>...E<gt>, e.g.
303C<LE<gt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
304Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
305to expand this to appropriate code. For links to (builtin) functions,
306please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
307
308=item * alternative text/node '%s' contains non-escaped | or /
309
310The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
311Although the hyperlink parser does its best to determine which "/" is
312text and which is a delimiter in case of doubt, one ought to escape
313these literal characters like this:
314
315 / E<sol>
316 | E<verbar>
e2c3adef 317
318=back
e3237417 319
320=head1 RETURN VALUE
321
322B<podchecker> returns the number of POD syntax errors found or -1 if
323there were no POD commands at all found in the file.
324
360aca43 325=head1 EXAMPLES
326
327I<[T.B.D.]>
328
48f30392 329=head1 INTERFACE
360aca43 330
48f30392 331While checking, this module collects document properties, e.g. the nodes
332for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
333POD translators can use this feature to syntax-check and get the nodes in
334a first pass before actually starting to convert. This is expensive in terms
335of execution time, but allows for very robust conversions.
360aca43 336
337=cut
338
339#############################################################################
340
341use strict;
342#use diagnostics;
343use Carp;
344use Exporter;
345use Pod::Parser;
346
347use vars qw(@ISA @EXPORT);
348@ISA = qw(Pod::Parser);
349@EXPORT = qw(&podchecker);
350
351use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
352
353my %VALID_COMMANDS = (
354 'pod' => 1,
355 'cut' => 1,
356 'head1' => 1,
357 'head2' => 1,
c6b85e5d 358 'head3' => 1,
359 'head4' => 1,
360aca43 360 'over' => 1,
361 'back' => 1,
362 'item' => 1,
363 'for' => 1,
364 'begin' => 1,
365 'end' => 1,
366);
367
368my %VALID_SEQUENCES = (
369 'I' => 1,
370 'B' => 1,
371 'S' => 1,
372 'C' => 1,
373 'L' => 1,
374 'F' => 1,
375 'X' => 1,
376 'Z' => 1,
377 'E' => 1,
378);
379
e2c3adef 380# stolen from HTML::Entities
381my %ENTITIES = (
382 # Some normal chars that have special meaning in SGML context
383 amp => '&', # ampersand
384'gt' => '>', # greater than
385'lt' => '<', # less than
386 quot => '"', # double quote
387
388 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
389 AElig => 'Æ', # capital AE diphthong (ligature)
390 Aacute => 'Á', # capital A, acute accent
391 Acirc => 'Â', # capital A, circumflex accent
392 Agrave => 'À', # capital A, grave accent
393 Aring => 'Å', # capital A, ring
394 Atilde => 'Ã', # capital A, tilde
395 Auml => 'Ä', # capital A, dieresis or umlaut mark
396 Ccedil => 'Ç', # capital C, cedilla
397 ETH => 'Ð', # capital Eth, Icelandic
398 Eacute => 'É', # capital E, acute accent
399 Ecirc => 'Ê', # capital E, circumflex accent
400 Egrave => 'È', # capital E, grave accent
401 Euml => 'Ë', # capital E, dieresis or umlaut mark
402 Iacute => 'Í', # capital I, acute accent
403 Icirc => 'Î', # capital I, circumflex accent
404 Igrave => 'Ì', # capital I, grave accent
405 Iuml => 'Ï', # capital I, dieresis or umlaut mark
406 Ntilde => 'Ñ', # capital N, tilde
407 Oacute => 'Ó', # capital O, acute accent
408 Ocirc => 'Ô', # capital O, circumflex accent
409 Ograve => 'Ò', # capital O, grave accent
410 Oslash => 'Ø', # capital O, slash
411 Otilde => 'Õ', # capital O, tilde
412 Ouml => 'Ö', # capital O, dieresis or umlaut mark
413 THORN => 'Þ', # capital THORN, Icelandic
414 Uacute => 'Ú', # capital U, acute accent
415 Ucirc => 'Û', # capital U, circumflex accent
416 Ugrave => 'Ù', # capital U, grave accent
417 Uuml => 'Ü', # capital U, dieresis or umlaut mark
418 Yacute => 'Ý', # capital Y, acute accent
419 aacute => 'á', # small a, acute accent
420 acirc => 'â', # small a, circumflex accent
421 aelig => 'æ', # small ae diphthong (ligature)
422 agrave => 'à', # small a, grave accent
423 aring => 'å', # small a, ring
424 atilde => 'ã', # small a, tilde
425 auml => 'ä', # small a, dieresis or umlaut mark
426 ccedil => 'ç', # small c, cedilla
427 eacute => 'é', # small e, acute accent
428 ecirc => 'ê', # small e, circumflex accent
429 egrave => 'è', # small e, grave accent
430 eth => 'ð', # small eth, Icelandic
431 euml => 'ë', # small e, dieresis or umlaut mark
432 iacute => 'í', # small i, acute accent
433 icirc => 'î', # small i, circumflex accent
434 igrave => 'ì', # small i, grave accent
435 iuml => 'ï', # small i, dieresis or umlaut mark
436 ntilde => 'ñ', # small n, tilde
437 oacute => 'ó', # small o, acute accent
438 ocirc => 'ô', # small o, circumflex accent
439 ograve => 'ò', # small o, grave accent
440 oslash => 'ø', # small o, slash
441 otilde => 'õ', # small o, tilde
442 ouml => 'ö', # small o, dieresis or umlaut mark
443 szlig => 'ß', # small sharp s, German (sz ligature)
444 thorn => 'þ', # small thorn, Icelandic
445 uacute => 'ú', # small u, acute accent
446 ucirc => 'û', # small u, circumflex accent
447 ugrave => 'ù', # small u, grave accent
448 uuml => 'ü', # small u, dieresis or umlaut mark
449 yacute => 'ý', # small y, acute accent
450 yuml => 'ÿ', # small y, dieresis or umlaut mark
451
452 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
453 copy => '©', # copyright sign
454 reg => '®', # registered sign
455 nbsp => "\240", # non breaking space
456
457 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
458 iexcl => '¡',
459 cent => '¢',
460 pound => '£',
461 curren => '¤',
462 yen => '¥',
463 brvbar => '¦',
464 sect => '§',
465 uml => '¨',
466 ordf => 'ª',
467 laquo => '«',
468'not' => '¬', # not is a keyword in perl
469 shy => '­',
470 macr => '¯',
471 deg => '°',
472 plusmn => '±',
473 sup1 => '¹',
474 sup2 => '²',
475 sup3 => '³',
476 acute => '´',
477 micro => 'µ',
478 para => '¶',
479 middot => '·',
480 cedil => '¸',
481 ordm => 'º',
482 raquo => '»',
483 frac14 => '¼',
484 frac12 => '½',
485 frac34 => '¾',
486 iquest => '¿',
487'times' => '×', # times is a keyword in perl
488 divide => '÷',
66aff6dd 489
490# some POD special entities
491 verbar => '|',
492 sol => '/'
e2c3adef 493);
494
360aca43 495##---------------------------------------------------------------------------
496
497##---------------------------------
498## Function definitions begin here
499##---------------------------------
500
e3237417 501sub podchecker( $ ; $ % ) {
502 my ($infile, $outfile, %options) = @_;
360aca43 503 local $_;
504
505 ## Set defaults
506 $infile ||= \*STDIN;
507 $outfile ||= \*STDERR;
508
509 ## Now create a pod checker
e3237417 510 my $checker = new Pod::Checker(%options);
360aca43 511
512 ## Now check the pod document for errors
513 $checker->parse_from_file($infile, $outfile);
48f30392 514
360aca43 515 ## Return the number of errors found
516 return $checker->num_errors();
517}
518
519##---------------------------------------------------------------------------
520
521##-------------------------------
522## Method definitions begin here
523##-------------------------------
524
92e3d63a 525##################################
526
527=over 4
528
529=item C<Pod::Checker-E<gt>new( %options )>
530
531Return a reference to a new Pod::Checker object that inherits from
532Pod::Parser and is used for calling the required methods later. The
533following options are recognized:
534
535C<-warnings =E<gt> num>
536 Print warnings if C<num> is true. The higher the value of C<num>,
537the more warnings are printed. Currently there are only levels 1 and 2.
538
539C<-quiet =E<gt> num>
540 If C<num> is true, do not print any errors/warnings. This is useful
541when Pod::Checker is used to munge POD code into plain text from within
542POD formatters.
543
544=cut
545
66aff6dd 546## sub new {
547## my $this = shift;
548## my $class = ref($this) || $this;
549## my %params = @_;
550## my $self = {%params};
551## bless $self, $class;
552## $self->initialize();
553## return $self;
554## }
360aca43 555
556sub initialize {
557 my $self = shift;
664bb207 558 ## Initialize number of errors, and setup an error function to
559 ## increment this number and then print to the designated output.
560 $self->{_NUM_ERRORS} = 0;
92e3d63a 561 $self->{-quiet} ||= 0;
562 # set the error handling subroutine
563 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417 564 $self->{_commands} = 0; # total number of POD commands encountered
565 $self->{_list_stack} = []; # stack for nested lists
566 $self->{_have_begin} = ''; # stores =begin
567 $self->{_links} = []; # stack for internal hyperlinks
568 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 569 $self->{_index} = []; # text in X<>
e2c3adef 570 # print warnings?
e3237417 571 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 572 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 573 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207 574}
575
48f30392 576##################################
577
48f30392 578=item C<$checker-E<gt>poderror( @args )>
579
580=item C<$checker-E<gt>poderror( {%opts}, @args )>
581
582Internal method for printing errors and warnings. If no options are
583given, simply prints "@_". The following options are recognized and used
584to form the output:
585
586 -msg
587
588A message to print prior to C<@args>.
589
590 -line
591
592The line number the error occurred in.
593
594 -file
595
596The file (name) the error occurred in.
597
598 -severity
599
600The error level, should be 'WARNING' or 'ERROR'.
601
602=cut
603
e2c3adef 604# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207 605sub poderror {
606 my $self = shift;
607 my %opts = (ref $_[0]) ? %{shift()} : ();
608
609 ## Retrieve options
610 chomp( my $msg = ($opts{-msg} || "")."@_" );
611 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
612 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd 613 unless (exists $opts{-severity}) {
614 ## See if can find severity in message prefix
615 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
616 }
664bb207 617 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
618
e3237417 619 ## Increment error count and print message "
620 ++($self->{_NUM_ERRORS})
621 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
92e3d63a 622 my $out_fh = $self->output_handle() || \*STDERR;
e2c3adef 623 print $out_fh ($severity, $msg, $line, $file, "\n")
624 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43 625}
626
48f30392 627##################################
628
629=item C<$checker-E<gt>num_errors()>
630
631Set (if argument specified) and retrieve the number of errors found.
632
633=cut
634
360aca43 635sub num_errors {
636 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
637}
638
48f30392 639##################################
640
641=item C<$checker-E<gt>name()>
642
643Set (if argument specified) and retrieve the canonical name of POD as
644found in the C<=head1 NAME> section.
645
646=cut
647
e2c3adef 648sub name {
649 return (@_ > 1 && $_[1]) ?
650 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
651}
652
48f30392 653##################################
654
655=item C<$checker-E<gt>node()>
656
657Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
658and C<=item>) of the current POD. The nodes are returned in the order of
659their occurence. They consist of plain text, each piece of whitespace is
660collapsed to a single blank.
661
662=cut
663
e2c3adef 664sub node {
665 my ($self,$text) = @_;
666 if(defined $text) {
66aff6dd 667 $text =~ s/\s+$//s; # strip trailing whitespace
668 $text =~ s/\s+/ /gs; # collapse whitespace
669 # add node, order important!
e2c3adef 670 push(@{$self->{_nodes}}, $text);
66aff6dd 671 # keep also a uniqueness counter
48f30392 672 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef 673 return $text;
674 }
675 @{$self->{_nodes}};
676}
677
48f30392 678##################################
679
680=item C<$checker-E<gt>idx()>
681
682Add (if argument specified) and retrieve the index entries (as defined by
683C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
684of whitespace is collapsed to a single blank.
685
686=cut
687
688# set/return index entries of current POD
689sub idx {
690 my ($self,$text) = @_;
691 if(defined $text) {
692 $text =~ s/\s+$//s; # strip trailing whitespace
693 $text =~ s/\s+/ /gs; # collapse whitespace
694 # add node, order important!
695 push(@{$self->{_index}}, $text);
696 # keep also a uniqueness counter
697 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
698 return $text;
699 }
700 @{$self->{_index}};
701}
702
703##################################
704
705=item C<$checker-E<gt>hyperlink()>
706
707Add (if argument specified) and retrieve the hyperlinks (as defined by
708C<LE<lt>E<gt>>) of the current POD. They consist of an 2-item array: line
709number and C<Pod::Hyperlink> object.
710
711=back
712
713=cut
714
e2c3adef 715# set/return hyperlinks of the current POD
716sub hyperlink {
717 my $self = shift;
718 if($_[0]) {
719 push(@{$self->{_links}}, $_[0]);
720 return $_[0];
721 }
722 @{$self->{_links}};
723}
724
e3237417 725## overrides for Pod::Parser
726
360aca43 727sub end_pod {
66aff6dd 728 ## Do some final checks and
729 ## print the number of errors found
730 my $self = shift;
731 my $infile = $self->input_file();
732 my $out_fh = $self->output_handle();
733
734 if(@{$self->{_list_stack}}) {
735 # _TODO_ display, but don't count them for now
736 my $list;
737 while(($list = $self->_close_list('EOF',$infile)) &&
738 $list->indent() ne 'auto') {
739 $self->poderror({ -line => 'EOF', -file => $infile,
740 -severity => 'ERROR', -msg => "=over on line " .
741 $list->start() . " without closing =back" }); #"
742 }
743 }
744
745 # check validity of document internal hyperlinks
746 # first build the node names from the paragraph text
747 my %nodes;
748 foreach($self->node()) {
749 $nodes{$_} = 1;
92e3d63a 750 if(/^(\S+)\s+\S/) {
66aff6dd 751 # we have more than one word. Use the first as a node, too.
752 # This is used heavily in perlfunc.pod
753 $nodes{$1} ||= 2; # derived node
754 }
755 }
92e3d63a 756 foreach($self->idx()) {
757 $nodes{$_} = 3; # index node
758 }
66aff6dd 759 foreach($self->hyperlink()) {
48f30392 760 my ($line,$link) = @$_;
761 # _TODO_ what if there is a link to the page itself by the name,
762 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
763 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
764 my $node = $self->_check_ptree($self->parse_text($link->node(),
765 $line), $line, $infile, 'L');
766 if($node && !$nodes{$node}) {
767 $self->poderror({ -line => $line || '', -file => $infile,
768 -severity => 'ERROR',
769 -msg => "unresolved internal link '$node'"});
770 }
66aff6dd 771 }
772 }
48f30392 773
774 # check the internal nodes for uniqueness. This pertains to
775 # =headX, =item and X<...>
66aff6dd 776 foreach(grep($self->{_unique_nodes}->{$_} > 1,
777 keys %{$self->{_unique_nodes}})) {
778 $self->poderror({ -line => '-', -file => $infile,
779 -severity => 'WARNING',
780 -msg => "multiple occurence of link target '$_'"});
781 }
782
783 ## Print the number of errors found
784 my $num_errors = $self->num_errors();
785 if ($num_errors > 0) {
786 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 787 ($num_errors == 1) ? "error" : "errors");
66aff6dd 788 }
789 elsif($self->{_commands} == 0) {
790 print $out_fh "$infile does not contain any pod commands.\n";
791 $self->num_errors(-1);
792 }
793 else {
794 print $out_fh "$infile pod syntax OK.\n";
795 }
360aca43 796}
797
e2c3adef 798# check a POD command directive
360aca43 799sub command {
664bb207 800 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 801 my ($file, $line) = $pod_para->file_line;
360aca43 802 ## Check the command syntax
e3237417 803 my $arg; # this will hold the command argument
664bb207 804 if (! $VALID_COMMANDS{$cmd}) {
805 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 806 -msg => "Unknown command '$cmd'" });
360aca43 807 }
92e3d63a 808 else { # found a valid command
809 $self->{_commands}++; # delete this line if below is enabled again
810
811 ##### following check disabled due to strong request
812 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
813 # $self->poderror({ -line => $line, -file => $file,
814 # -severity => 'WARNING',
815 # -msg => "file does not start with =head" });
816 #}
817
818 # check syntax of particular command
e3237417 819 if($cmd eq 'over') {
e2c3adef 820 # check for argument
821 $arg = $self->interpolate_and_check($paragraph, $line,$file);
822 my $indent = 4; # default
823 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
824 $indent = $1;
825 } else {
826 $self->poderror({ -line => $line, -file => $file,
827 -severity => 'WARNING',
828 -msg => "No numeric argument for =over"});
829 }
e3237417 830 # start a new list
66aff6dd 831 $self->_open_list($indent,$line,$file);
e3237417 832 }
833 elsif($cmd eq 'item') {
e2c3adef 834 # are we in a list?
e3237417 835 unless(@{$self->{_list_stack}}) {
836 $self->poderror({ -line => $line, -file => $file,
837 -severity => 'ERROR',
838 -msg => "=item without previous =over" });
e2c3adef 839 # auto-open in case we encounter many more
66aff6dd 840 $self->_open_list('auto',$line,$file);
841 }
842 my $list = $self->{_list_stack}->[0];
843 # check whether the previous item had some contents
844 if(defined $self->{_list_item_contents} &&
845 $self->{_list_item_contents} == 0) {
846 $self->poderror({ -line => $line, -file => $file,
847 -severity => 'WARNING',
848 -msg => "previous =item has no contents" });
849 }
850 if($list->{_has_par}) {
851 $self->poderror({ -line => $line, -file => $file,
852 -severity => 'WARNING',
853 -msg => "preceding non-item paragraph(s)" });
854 delete $list->{_has_par};
e3237417 855 }
e2c3adef 856 # check for argument
857 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd 858 if($arg && $arg =~ /(\S+)/) {
859 $arg =~ s/[\s\n]+$//;
860 my $type;
861 if($arg =~ /^[*]\s*(\S*.*)/) {
862 $type = 'bullet';
863 $self->{_list_item_contents} = $1 ? 1 : 0;
864 $arg = $1;
865 }
866 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
867 $type = 'number';
868 $self->{_list_item_contents} = $1 ? 1 : 0;
869 $arg = $1;
870 }
871 else {
872 $type = 'definition';
873 $self->{_list_item_contents} = 1;
874 }
875 my $first = $list->type();
876 if($first && $first ne $type) {
877 $self->poderror({ -line => $line, -file => $file,
878 -severity => 'WARNING',
879 -msg => "=item type mismatch ('$first' vs. '$type')"});
880 }
881 else { # first item
882 $list->type($type);
883 }
884 }
885 else {
e2c3adef 886 $self->poderror({ -line => $line, -file => $file,
887 -severity => 'WARNING',
888 -msg => "No argument for =item" });
889 $arg = ' '; # empty
66aff6dd 890 $self->{_list_item_contents} = 0;
e3237417 891 }
e2c3adef 892 # add this item
66aff6dd 893 $list->item($arg);
e2c3adef 894 # remember this node
895 $self->node($arg);
e3237417 896 }
897 elsif($cmd eq 'back') {
898 # check if we have an open list
899 unless(@{$self->{_list_stack}}) {
900 $self->poderror({ -line => $line, -file => $file,
901 -severity => 'ERROR',
902 -msg => "=back without previous =over" });
903 }
904 else {
905 # check for spurious characters
e2c3adef 906 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 907 if($arg && $arg =~ /\S/) {
908 $self->poderror({ -line => $line, -file => $file,
66aff6dd 909 -severity => 'ERROR',
e3237417 910 -msg => "Spurious character(s) after =back" });
911 }
912 # close list
66aff6dd 913 my $list = $self->_close_list($line,$file);
e3237417 914 # check for empty lists
915 if(!$list->item() && $self->{-warnings}) {
916 $self->poderror({ -line => $line, -file => $file,
917 -severity => 'WARNING',
918 -msg => "No items in =over (at line " .
919 $list->start() . ") / =back list"}); #"
920 }
921 }
922 }
66aff6dd 923 elsif($cmd =~ /^head(\d+)/) {
48f30392 924 # check whether the previous =head section had some contents
66aff6dd 925 if(defined $self->{_commands_in_head} &&
926 $self->{_commands_in_head} == 0 &&
927 defined $self->{_last_head} &&
928 $self->{_last_head} >= $1) {
929 $self->poderror({ -line => $line, -file => $file,
930 -severity => 'WARNING',
931 -msg => "empty section in previous paragraph"});
932 }
933 $self->{_commands_in_head} = -1;
934 $self->{_last_head} = $1;
e3237417 935 # check if there is an open list
936 if(@{$self->{_list_stack}}) {
937 my $list;
66aff6dd 938 while(($list = $self->_close_list($line,$file)) &&
939 $list->indent() ne 'auto') {
e3237417 940 $self->poderror({ -line => $line, -file => $file,
941 -severity => 'ERROR',
e2c3adef 942 -msg => "=over on line ". $list->start() .
943 " without closing =back (at $cmd)" });
e3237417 944 }
945 }
946 # remember this node
e2c3adef 947 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd 948 $arg =~ s/[\s\n]+$//s;
949 $self->node($arg);
950 unless(length($arg)) {
951 $self->poderror({ -line => $line, -file => $file,
952 -severity => 'ERROR',
953 -msg => "empty =$cmd"});
954 }
e2c3adef 955 if($cmd eq 'head1') {
e2c3adef 956 $self->{_current_head1} = $arg;
957 } else {
958 $self->{_current_head1} = '';
959 }
e3237417 960 }
961 elsif($cmd eq 'begin') {
962 if($self->{_have_begin}) {
963 # already have a begin
964 $self->poderror({ -line => $line, -file => $file,
965 -severity => 'ERROR',
966 -msg => "Nested =begin's (first at line " .
967 $self->{_have_begin} . ")"});
968 }
969 else {
970 # check for argument
e2c3adef 971 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417 972 unless($arg && $arg =~ /(\S+)/) {
973 $self->poderror({ -line => $line, -file => $file,
e2c3adef 974 -severity => 'ERROR',
e3237417 975 -msg => "No argument for =begin"});
976 }
977 # remember the =begin
978 $self->{_have_begin} = "$line:$1";
979 }
980 }
981 elsif($cmd eq 'end') {
982 if($self->{_have_begin}) {
983 # close the existing =begin
984 $self->{_have_begin} = '';
985 # check for spurious characters
e2c3adef 986 $arg = $self->interpolate_and_check($paragraph, $line,$file);
987 # the closing argument is optional
988 #if($arg && $arg =~ /\S/) {
989 # $self->poderror({ -line => $line, -file => $file,
990 # -severity => 'WARNING',
991 # -msg => "Spurious character(s) after =end" });
992 #}
e3237417 993 }
994 else {
995 # don't have a matching =begin
996 $self->poderror({ -line => $line, -file => $file,
e2c3adef 997 -severity => 'ERROR',
e3237417 998 -msg => "=end without =begin" });
999 }
1000 }
e2c3adef 1001 elsif($cmd eq 'for') {
1002 unless($paragraph =~ /\s*(\S+)\s*/) {
1003 $self->poderror({ -line => $line, -file => $file,
1004 -severity => 'ERROR',
1005 -msg => "=for without formatter specification" });
1006 }
1007 $arg = ''; # do not expand paragraph below
1008 }
66aff6dd 1009 elsif($cmd =~ /^(pod|cut)$/) {
1010 # check for argument
1011 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1012 if($arg && $arg =~ /(\S+)/) {
1013 $self->poderror({ -line => $line, -file => $file,
1014 -severity => 'ERROR',
1015 -msg => "Spurious text after =$cmd"});
1016 }
1017 }
1018 $self->{_commands_in_head}++;
e3237417 1019 ## Check the interior sequences in the command-text
e2c3adef 1020 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1021 unless(defined $arg);
e2c3adef 1022 }
360aca43 1023}
1024
66aff6dd 1025sub _open_list
1026{
1027 my ($self,$indent,$line,$file) = @_;
1028 my $list = Pod::List->new(
1029 -indent => $indent,
1030 -start => $line,
1031 -file => $file);
1032 unshift(@{$self->{_list_stack}}, $list);
1033 undef $self->{_list_item_contents};
1034 $list;
1035}
1036
1037sub _close_list
1038{
1039 my ($self,$line,$file) = @_;
1040 my $list = shift(@{$self->{_list_stack}});
1041 if(defined $self->{_list_item_contents} &&
1042 $self->{_list_item_contents} == 0) {
1043 $self->poderror({ -line => $line, -file => $file,
1044 -severity => 'WARNING',
1045 -msg => "previous =item has no contents" });
1046 }
1047 undef $self->{_list_item_contents};
1048 $list;
1049}
1050
e2c3adef 1051# process a block of some text
1052sub interpolate_and_check {
e3237417 1053 my ($self, $paragraph, $line, $file) = @_;
1054 ## Check the interior sequences in the command-text
1055 # and return the text
1056 $self->_check_ptree(
1057 $self->parse_text($paragraph,$line), $line, $file, '');
1058}
1059
1060sub _check_ptree {
1061 my ($self,$ptree,$line,$file,$nestlist) = @_;
1062 local($_);
1063 my $text = '';
1064 # process each node in the parse tree
1065 foreach(@$ptree) {
1066 # regular text chunk
1067 unless(ref) {
1068 my $count;
1069 # count the unescaped angle brackets
92e3d63a 1070 # complain only when warning level is greater than 1
e3237417 1071 my $i = $_;
e2c3adef 1072 if($count = $i =~ tr/<>/<>/) {
e3237417 1073 $self->poderror({ -line => $line, -file => $file,
1074 -severity => 'WARNING',
e2c3adef 1075 -msg => "$count unescaped <> in paragraph" })
92e3d63a 1076 if($self->{-warnings} && $self->{-warnings}>1);
e3237417 1077 }
1078 $text .= $i;
1079 next;
1080 }
1081 # have an interior sequence
1082 my $cmd = $_->cmd_name();
1083 my $contents = $_->parse_tree();
1084 ($file,$line) = $_->file_line();
1085 # check for valid tag
1086 if (! $VALID_SEQUENCES{$cmd}) {
1087 $self->poderror({ -line => $line, -file => $file,
1088 -severity => 'ERROR',
66aff6dd 1089 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417 1090 # expand it anyway
1091 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1092 next;
1093 }
1094 if($nestlist =~ /$cmd/) {
1095 $self->poderror({ -line => $line, -file => $file,
1096 -severity => 'ERROR',
1097 -msg => "nested commands $cmd<...$cmd<...>...>"});
1098 # _TODO_ should we add the contents anyway?
1099 # expand it anyway, see below
1100 }
1101 if($cmd eq 'E') {
1102 # preserve entities
1103 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1104 $self->poderror({ -line => $line, -file => $file,
1105 -severity => 'ERROR',
1106 -msg => "garbled entity " . $_->raw_text()});
1107 next;
1108 }
e2c3adef 1109 my $ent = $$contents[0];
66aff6dd 1110 my $val;
1111 if($ent =~ /^0x[0-9a-f]+$/i) {
1112 # hexadec entity
1113 $val = hex($ent);
1114 }
1115 elsif($ent =~ /^0\d+$/) {
1116 # octal
1117 $val = oct($ent);
1118 }
1119 elsif($ent =~ /^\d+$/) {
e2c3adef 1120 # numeric entity
66aff6dd 1121 $val = $ent;
1122 }
1123 if(defined $val) {
1124 if($val>0 && $val<256) {
1125 $text .= chr($val);
1126 }
1127 else {
1128 $self->poderror({ -line => $line, -file => $file,
1129 -severity => 'ERROR',
1130 -msg => "Entity number out of range " . $_->raw_text()});
1131 }
e2c3adef 1132 }
1133 elsif($ENTITIES{$ent}) {
1134 # known ISO entity
1135 $text .= $ENTITIES{$ent};
1136 }
1137 else {
1138 $self->poderror({ -line => $line, -file => $file,
1139 -severity => 'WARNING',
66aff6dd 1140 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef 1141 $text .= "E<$ent>";
1142 }
e3237417 1143 }
1144 elsif($cmd eq 'L') {
1145 # try to parse the hyperlink
1146 my $link = Pod::Hyperlink->new($contents->raw_text());
1147 unless(defined $link) {
1148 $self->poderror({ -line => $line, -file => $file,
1149 -severity => 'ERROR',
e2c3adef 1150 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417 1151 next;
1152 }
1153 $link->line($line); # remember line
1154 if($self->{-warnings}) {
1155 foreach my $w ($link->warning()) {
1156 $self->poderror({ -line => $line, -file => $file,
1157 -severity => 'WARNING',
1158 -msg => $w });
1159 }
1160 }
1161 # check the link text
1162 $text .= $self->_check_ptree($self->parse_text($link->text(),
1163 $line), $line, $file, "$nestlist$cmd");
48f30392 1164 # remember link
1165 $self->hyperlink([$line,$link]);
e3237417 1166 }
1167 elsif($cmd =~ /[BCFIS]/) {
1168 # add the guts
1169 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1170 }
66aff6dd 1171 elsif($cmd eq 'Z') {
1172 if(length($contents->raw_text())) {
1173 $self->poderror({ -line => $line, -file => $file,
1174 -severity => 'ERROR',
1175 -msg => "Nonempty Z<>"});
1176 }
1177 }
48f30392 1178 elsif($cmd eq 'X') {
1179 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1180 if($idx =~ /^\s*$/s) {
1181 $self->poderror({ -line => $line, -file => $file,
1182 -severity => 'ERROR',
1183 -msg => "Empty X<>"});
1184 }
1185 else {
1186 # remember this node
1187 $self->idx($idx);
1188 }
1189 }
1190 else {
1191 # not reached
1192 die "internal error";
e3237417 1193 }
1194 }
1195 $text;
1196}
1197
e2c3adef 1198# process a block of verbatim text
360aca43 1199sub verbatim {
66aff6dd 1200 ## Nothing particular to check
e2c3adef 1201 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd 1202
1203 $self->_preproc_par($paragraph);
1204
e2c3adef 1205 if($self->{_current_head1} eq 'NAME') {
1206 my ($file, $line) = $pod_para->file_line;
1207 $self->poderror({ -line => $line, -file => $file,
1208 -severity => 'WARNING',
1209 -msg => 'Verbatim paragraph in NAME section' });
1210 }
360aca43 1211}
1212
e2c3adef 1213# process a block of regular text
360aca43 1214sub textblock {
1215 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1216 my ($file, $line) = $pod_para->file_line;
e3237417 1217
66aff6dd 1218 $self->_preproc_par($paragraph);
1219
e2c3adef 1220 # skip this paragraph if in a =begin block
1221 unless($self->{_have_begin}) {
1222 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1223 if($self->{_current_head1} eq 'NAME') {
1224 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1225 # this is the canonical name
1226 $self->{-name} = $1 unless(defined $self->{-name});
1227 }
e3237417 1228 }
1229 }
e3237417 1230}
1231
66aff6dd 1232sub _preproc_par
1233{
1234 my $self = shift;
1235 $_[0] =~ s/[\s\n]+$//;
1236 if($_[0]) {
1237 $self->{_commands_in_head}++;
1238 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1239 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1240 $self->{_list_stack}->[0]->{_has_par} = 1;
1241 }
1242 }
1243}
1244
e3237417 12451;
66aff6dd 1246
48f30392 1247__END__
1248
1249=head1 AUTHOR
1250
1251Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1252Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
1253
1254Based on code for B<Pod::Text::pod2text()> written by
1255Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1256
1257=cut
1258