Commit | Line | Data |
360aca43 |
1 | ############################################################################# |
2 | # Pod/Checker.pm -- check pod documents for syntax errors |
3 | # |
664bb207 |
4 | # Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved. |
360aca43 |
5 | # This file is part of "PodParser". PodParser is free software; |
6 | # you can redistribute it and/or modify it under the same terms |
7 | # as Perl itself. |
8 | ############################################################################# |
9 | |
10 | package Pod::Checker; |
11 | |
12 | use vars qw($VERSION); |
e3237417 |
13 | $VERSION = 1.090; ## Current version of this package |
360aca43 |
14 | require 5.004; ## requires this Perl version or later |
15 | |
16 | =head1 NAME |
17 | |
18 | Pod::Checker, podchecker() - check pod documents for syntax errors |
19 | |
20 | =head1 SYNOPSIS |
21 | |
22 | use Pod::Checker; |
23 | |
e3237417 |
24 | $syntax_okay = podchecker($filepath, $outputpath, %options); |
360aca43 |
25 | |
26 | =head1 OPTIONS/ARGUMENTS |
27 | |
28 | C<$filepath> is the input POD to read and C<$outputpath> is |
29 | where to write POD syntax error messages. Either argument may be a scalar |
30 | indcating a file-path, or else a reference to an open filehandle. |
31 | If unspecified, the input-file it defaults to C<\*STDIN>, and |
32 | the output-file defaults to C<\*STDERR>. |
33 | |
e3237417 |
34 | =head2 Options |
35 | |
36 | =over 4 |
37 | |
38 | =item B<-warnings> =E<gt> I<val> |
39 | |
40 | Turn warnings on/off. See L<"Warnings">. |
41 | |
42 | =back |
360aca43 |
43 | |
44 | =head1 DESCRIPTION |
45 | |
46 | B<podchecker> will perform syntax checking of Perl5 POD format documentation. |
47 | |
48 | I<NOTE THAT THIS MODULE IS CURRENTLY IN THE INITIAL DEVELOPMENT STAGE!> |
49 | As of this writing, all it does is check for unknown '=xxxx' commands, |
50 | unknown 'X<...>' interior-sequences, and unterminated interior sequences. |
51 | |
52 | It is hoped that curious/ambitious user will help flesh out and add the |
53 | additional features they wish to see in B<Pod::Checker> and B<podchecker>. |
54 | |
e3237417 |
55 | The following additional checks are preformed: |
56 | |
57 | =over 4 |
58 | |
59 | =item * |
60 | |
61 | Check for proper balancing of C<=begin> and C<=end>. |
62 | |
63 | =item * |
64 | |
65 | Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. |
66 | |
67 | =item * |
68 | |
69 | Check for same nested interior-sequences (e.g. C<LE<lt>...LE<lt>...E<gt>...E<gt>>). |
70 | |
71 | =item * |
72 | |
73 | Check for malformed entities. |
74 | |
75 | =item * |
76 | |
77 | Check for correct syntax of hyperlinks C<LE<lt>E<gt>>. See L<perlpod> for |
78 | details. |
79 | |
80 | =item * |
81 | |
82 | Check for unresolved document-internal links. |
83 | |
84 | =back |
85 | |
86 | =head2 Warnings |
87 | |
88 | The following warnings are printed. These may not necessarily cause trouble, |
89 | but indicate mediocre style. |
90 | |
91 | =over 4 |
92 | |
93 | =item * |
94 | |
95 | Spurious characters after C<=back> and C<=end>. |
96 | |
97 | =item * |
98 | |
99 | Unescaped C<E<lt>> and C<E<gt>> in the text. |
100 | |
101 | =item * |
102 | |
103 | Missing arguments for C<=begin> and C<=over>. |
104 | |
105 | =item * |
106 | |
107 | Empty C<=over> / C<=back> list. |
108 | |
109 | =item * |
110 | |
111 | Hyperlinks: leading/trailing whitespace, brackets C<()> in the page name. |
112 | |
113 | =back |
114 | |
115 | =head1 DIAGNOSTICS |
116 | |
117 | I<[T.B.D.]> |
118 | |
119 | =head1 RETURN VALUE |
120 | |
121 | B<podchecker> returns the number of POD syntax errors found or -1 if |
122 | there were no POD commands at all found in the file. |
123 | |
360aca43 |
124 | =head1 EXAMPLES |
125 | |
126 | I<[T.B.D.]> |
127 | |
128 | =head1 AUTHOR |
129 | |
e3237417 |
130 | Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version), |
131 | Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt> |
360aca43 |
132 | |
133 | Based on code for B<Pod::Text::pod2text()> written by |
134 | Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> |
135 | |
136 | =cut |
137 | |
138 | ############################################################################# |
139 | |
140 | use strict; |
141 | #use diagnostics; |
142 | use Carp; |
143 | use Exporter; |
144 | use Pod::Parser; |
145 | |
146 | use vars qw(@ISA @EXPORT); |
147 | @ISA = qw(Pod::Parser); |
148 | @EXPORT = qw(&podchecker); |
149 | |
150 | use vars qw(%VALID_COMMANDS %VALID_SEQUENCES); |
151 | |
152 | my %VALID_COMMANDS = ( |
153 | 'pod' => 1, |
154 | 'cut' => 1, |
155 | 'head1' => 1, |
156 | 'head2' => 1, |
157 | 'over' => 1, |
158 | 'back' => 1, |
159 | 'item' => 1, |
160 | 'for' => 1, |
161 | 'begin' => 1, |
162 | 'end' => 1, |
163 | ); |
164 | |
165 | my %VALID_SEQUENCES = ( |
166 | 'I' => 1, |
167 | 'B' => 1, |
168 | 'S' => 1, |
169 | 'C' => 1, |
170 | 'L' => 1, |
171 | 'F' => 1, |
172 | 'X' => 1, |
173 | 'Z' => 1, |
174 | 'E' => 1, |
175 | ); |
176 | |
177 | ##--------------------------------------------------------------------------- |
178 | |
179 | ##--------------------------------- |
180 | ## Function definitions begin here |
181 | ##--------------------------------- |
182 | |
e3237417 |
183 | sub podchecker( $ ; $ % ) { |
184 | my ($infile, $outfile, %options) = @_; |
360aca43 |
185 | local $_; |
186 | |
187 | ## Set defaults |
188 | $infile ||= \*STDIN; |
189 | $outfile ||= \*STDERR; |
190 | |
191 | ## Now create a pod checker |
e3237417 |
192 | my $checker = new Pod::Checker(%options); |
360aca43 |
193 | |
194 | ## Now check the pod document for errors |
195 | $checker->parse_from_file($infile, $outfile); |
196 | |
197 | ## Return the number of errors found |
198 | return $checker->num_errors(); |
199 | } |
200 | |
201 | ##--------------------------------------------------------------------------- |
202 | |
203 | ##------------------------------- |
204 | ## Method definitions begin here |
205 | ##------------------------------- |
206 | |
207 | sub new { |
208 | my $this = shift; |
209 | my $class = ref($this) || $this; |
210 | my %params = @_; |
211 | my $self = {%params}; |
212 | bless $self, $class; |
213 | $self->initialize(); |
214 | return $self; |
215 | } |
216 | |
217 | sub initialize { |
218 | my $self = shift; |
664bb207 |
219 | ## Initialize number of errors, and setup an error function to |
220 | ## increment this number and then print to the designated output. |
221 | $self->{_NUM_ERRORS} = 0; |
222 | $self->errorsub('poderror'); |
e3237417 |
223 | $self->{_commands} = 0; # total number of POD commands encountered |
224 | $self->{_list_stack} = []; # stack for nested lists |
225 | $self->{_have_begin} = ''; # stores =begin |
226 | $self->{_links} = []; # stack for internal hyperlinks |
227 | $self->{_nodes} = []; # stack for =head/=item nodes |
228 | $self->{-warnings} = 1 unless(defined $self->{-warnings}); |
664bb207 |
229 | } |
230 | |
231 | ## Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) |
232 | sub poderror { |
233 | my $self = shift; |
234 | my %opts = (ref $_[0]) ? %{shift()} : (); |
235 | |
236 | ## Retrieve options |
237 | chomp( my $msg = ($opts{-msg} || "")."@_" ); |
238 | my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ""; |
239 | my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ""; |
240 | my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ""; |
241 | |
e3237417 |
242 | ## Increment error count and print message " |
243 | ++($self->{_NUM_ERRORS}) |
244 | if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); |
664bb207 |
245 | my $out_fh = $self->output_handle(); |
246 | print $out_fh ($severity, $msg, $line, $file, "\n"); |
360aca43 |
247 | } |
248 | |
249 | sub num_errors { |
250 | return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; |
251 | } |
252 | |
e3237417 |
253 | ## overrides for Pod::Parser |
254 | |
360aca43 |
255 | sub end_pod { |
e3237417 |
256 | ## Do some final checks and |
257 | ## print the number of errors found |
360aca43 |
258 | my $self = shift; |
259 | my $infile = $self->input_file(); |
260 | my $out_fh = $self->output_handle(); |
261 | |
e3237417 |
262 | if(@{$self->{_list_stack}}) { |
263 | # _TODO_ display, but don't count them for now |
264 | my $list; |
265 | while($list = shift(@{$self->{_list_stack}})) { |
266 | $self->poderror({ -line => 'EOF', -file => $infile, |
267 | -severity => 'ERROR', -msg => "=over on line " . |
268 | $list->start() . " without closing =back" }); #" |
269 | } |
270 | } |
271 | |
272 | # check validity of document internal hyperlinks |
273 | # first build the node names from the paragraph text |
274 | my %nodes; |
275 | foreach($self->node()) { |
276 | #print "Have node: +$_+\n"; |
277 | $nodes{$_} = 1; |
278 | if(/^(\S+)\s+/) { |
279 | # we have more than one word. Use the first as a node, too. |
280 | # This is used heavily in perlfunc.pod |
281 | $nodes{$1} ||= 2; # derived node |
282 | } |
283 | } |
284 | foreach($self->hyperlink()) { |
285 | #print "Seek node: +$_+\n"; |
286 | my $line = ''; |
287 | s/^(\d+):// && ($line = $1); |
288 | if($_ && !$nodes{$_}) { |
289 | $self->poderror({ -line => $line, -file => $infile, |
290 | -severity => 'ERROR', |
291 | -msg => "unresolved internal link `$_'"}); |
292 | } |
293 | } |
294 | |
295 | ## Print the number of errors found |
360aca43 |
296 | my $num_errors = $self->num_errors(); |
297 | if ($num_errors > 0) { |
298 | printf $out_fh ("$infile has $num_errors pod syntax %s.\n", |
299 | ($num_errors == 1) ? "error" : "errors"); |
300 | } |
e3237417 |
301 | elsif($self->{_commands} == 0) { |
302 | print $out_fh "$infile does not contain any pod commands.\n"; |
303 | $self->num_errors(-1); |
304 | } |
360aca43 |
305 | else { |
306 | print $out_fh "$infile pod syntax OK.\n"; |
307 | } |
308 | } |
309 | |
310 | sub command { |
664bb207 |
311 | my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_; |
360aca43 |
312 | my ($file, $line) = $pod_para->file_line; |
360aca43 |
313 | ## Check the command syntax |
e3237417 |
314 | my $arg; # this will hold the command argument |
664bb207 |
315 | if (! $VALID_COMMANDS{$cmd}) { |
316 | $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', |
317 | -msg => "Unknown command \"$cmd\"" }); |
360aca43 |
318 | } |
319 | else { |
e3237417 |
320 | $self->{_commands}++; # found a valid command |
321 | ## check syntax of particular command |
322 | if($cmd eq 'over') { |
323 | # start a new list |
324 | unshift(@{$self->{_list_stack}}, |
325 | Pod::List->new( |
326 | -indent => $paragraph, |
327 | -start => $line, |
328 | -file => $file)); |
329 | } |
330 | elsif($cmd eq 'item') { |
331 | unless(@{$self->{_list_stack}}) { |
332 | $self->poderror({ -line => $line, -file => $file, |
333 | -severity => 'ERROR', |
334 | -msg => "=item without previous =over" }); |
335 | } |
336 | else { |
337 | # check for argument |
338 | $arg = $self->_interpolate_and_check($paragraph, $line, $file); |
339 | unless($arg && $arg =~ /(\S+)/) { |
340 | $self->poderror({ -line => $line, -file => $file, |
341 | -severity => 'WARNING', |
342 | -msg => "No argument for =item" }); |
343 | } |
344 | # add this item |
345 | $self->{_list_stack}[0]->item($arg || ''); |
346 | # remember this node |
347 | $self->node($arg) if($arg); |
348 | } |
349 | } |
350 | elsif($cmd eq 'back') { |
351 | # check if we have an open list |
352 | unless(@{$self->{_list_stack}}) { |
353 | $self->poderror({ -line => $line, -file => $file, |
354 | -severity => 'ERROR', |
355 | -msg => "=back without previous =over" }); |
356 | } |
357 | else { |
358 | # check for spurious characters |
359 | $arg = $self->_interpolate_and_check($paragraph, $line,$file); |
360 | if($arg && $arg =~ /\S/) { |
361 | $self->poderror({ -line => $line, -file => $file, |
362 | -severity => 'WARNING', |
363 | -msg => "Spurious character(s) after =back" }); |
364 | } |
365 | # close list |
366 | my $list = shift @{$self->{_list_stack}}; |
367 | # check for empty lists |
368 | if(!$list->item() && $self->{-warnings}) { |
369 | $self->poderror({ -line => $line, -file => $file, |
370 | -severity => 'WARNING', |
371 | -msg => "No items in =over (at line " . |
372 | $list->start() . ") / =back list"}); #" |
373 | } |
374 | } |
375 | } |
376 | elsif($cmd =~ /^head/) { |
377 | # check if there is an open list |
378 | if(@{$self->{_list_stack}}) { |
379 | my $list; |
380 | while($list = shift(@{$self->{_list_stack}})) { |
381 | $self->poderror({ -line => $line, -file => $file, |
382 | -severity => 'ERROR', |
383 | -msg => "unclosed =over (line ". $list->start() . |
384 | ") at $cmd" }); |
385 | } |
386 | } |
387 | # remember this node |
388 | $arg = $self->_interpolate_and_check($paragraph, $line,$file); |
389 | $self->node($arg) if($arg); |
390 | } |
391 | elsif($cmd eq 'begin') { |
392 | if($self->{_have_begin}) { |
393 | # already have a begin |
394 | $self->poderror({ -line => $line, -file => $file, |
395 | -severity => 'ERROR', |
396 | -msg => "Nested =begin's (first at line " . |
397 | $self->{_have_begin} . ")"}); |
398 | } |
399 | else { |
400 | # check for argument |
401 | $arg = $self->_interpolate_and_check($paragraph, $line,$file); |
402 | unless($arg && $arg =~ /(\S+)/) { |
403 | $self->poderror({ -line => $line, -file => $file, |
404 | -severity => 'WARNING', |
405 | -msg => "No argument for =begin"}); |
406 | } |
407 | # remember the =begin |
408 | $self->{_have_begin} = "$line:$1"; |
409 | } |
410 | } |
411 | elsif($cmd eq 'end') { |
412 | if($self->{_have_begin}) { |
413 | # close the existing =begin |
414 | $self->{_have_begin} = ''; |
415 | # check for spurious characters |
416 | $arg = $self->_interpolate_and_check($paragraph, $line,$file); |
417 | if($arg && $arg =~ /\S/) { |
418 | $self->poderror({ -line => $line, -file => $file, |
419 | -severity => 'WARNING', |
420 | -msg => "Spurious character(s) after =end" }); |
421 | } |
422 | } |
423 | else { |
424 | # don't have a matching =begin |
425 | $self->poderror({ -line => $line, -file => $file, |
426 | -severity => 'WARNING', |
427 | -msg => "=end without =begin" }); |
428 | } |
429 | } |
360aca43 |
430 | } |
e3237417 |
431 | ## Check the interior sequences in the command-text |
432 | $self->_interpolate_and_check($paragraph, $line,$file) |
433 | unless(defined $arg); |
360aca43 |
434 | } |
435 | |
e3237417 |
436 | sub _interpolate_and_check { |
437 | my ($self, $paragraph, $line, $file) = @_; |
438 | ## Check the interior sequences in the command-text |
439 | # and return the text |
440 | $self->_check_ptree( |
441 | $self->parse_text($paragraph,$line), $line, $file, ''); |
442 | } |
443 | |
444 | sub _check_ptree { |
445 | my ($self,$ptree,$line,$file,$nestlist) = @_; |
446 | local($_); |
447 | my $text = ''; |
448 | # process each node in the parse tree |
449 | foreach(@$ptree) { |
450 | # regular text chunk |
451 | unless(ref) { |
452 | my $count; |
453 | # count the unescaped angle brackets |
454 | my $i = $_; |
455 | if($count = $i =~ s/[<>]/$self->expand_unescaped_bracket($&)/ge) { |
456 | $self->poderror({ -line => $line, -file => $file, |
457 | -severity => 'WARNING', |
458 | -msg => "$count unescaped <>" }); |
459 | } |
460 | $text .= $i; |
461 | next; |
462 | } |
463 | # have an interior sequence |
464 | my $cmd = $_->cmd_name(); |
465 | my $contents = $_->parse_tree(); |
466 | ($file,$line) = $_->file_line(); |
467 | # check for valid tag |
468 | if (! $VALID_SEQUENCES{$cmd}) { |
469 | $self->poderror({ -line => $line, -file => $file, |
470 | -severity => 'ERROR', |
471 | -msg => qq(Unknown interior-sequence "$cmd")}); |
472 | # expand it anyway |
473 | $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); |
474 | next; |
475 | } |
476 | if($nestlist =~ /$cmd/) { |
477 | $self->poderror({ -line => $line, -file => $file, |
478 | -severity => 'ERROR', |
479 | -msg => "nested commands $cmd<...$cmd<...>...>"}); |
480 | # _TODO_ should we add the contents anyway? |
481 | # expand it anyway, see below |
482 | } |
483 | if($cmd eq 'E') { |
484 | # preserve entities |
485 | if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) { |
486 | $self->poderror({ -line => $line, -file => $file, |
487 | -severity => 'ERROR', |
488 | -msg => "garbled entity " . $_->raw_text()}); |
489 | next; |
490 | } |
491 | $text .= $self->expand_entity($$contents[0]); |
492 | } |
493 | elsif($cmd eq 'L') { |
494 | # try to parse the hyperlink |
495 | my $link = Pod::Hyperlink->new($contents->raw_text()); |
496 | unless(defined $link) { |
497 | $self->poderror({ -line => $line, -file => $file, |
498 | -severity => 'ERROR', |
499 | -msg => "malformed link L<>: $@"}); |
500 | next; |
501 | } |
502 | $link->line($line); # remember line |
503 | if($self->{-warnings}) { |
504 | foreach my $w ($link->warning()) { |
505 | $self->poderror({ -line => $line, -file => $file, |
506 | -severity => 'WARNING', |
507 | -msg => $w }); |
508 | } |
509 | } |
510 | # check the link text |
511 | $text .= $self->_check_ptree($self->parse_text($link->text(), |
512 | $line), $line, $file, "$nestlist$cmd"); |
513 | my $node = ''; |
514 | $node = $self->_check_ptree($self->parse_text($link->node(), |
515 | $line), $line, $file, "$nestlist$cmd") |
516 | if($link->node()); |
517 | # store internal link |
518 | # _TODO_ what if there is a link to the page itself by the name, |
519 | # e.g. Tk::Pod : L<Tk::Pod/"DESCRIPTION"> |
520 | $self->hyperlink("$line:$node") if($node && !$link->page()); |
521 | } |
522 | elsif($cmd =~ /[BCFIS]/) { |
523 | # add the guts |
524 | $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); |
525 | } |
526 | else { |
527 | # check, but add nothing to $text (X<>, Z<>) |
528 | $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); |
529 | } |
530 | } |
531 | $text; |
532 | } |
533 | |
534 | # default method - just return it |
535 | sub expand_unescaped_bracket { |
536 | my ($self,$bracket) = @_; |
537 | $bracket; |
538 | } |
539 | |
540 | # keep the entities |
541 | sub expand_entity { |
542 | my ($self,$entity) = @_; |
543 | "E<$entity>"; |
544 | } |
545 | |
546 | # _TODO_ overloadable methods for BC..Z<...> expansion |
547 | |
360aca43 |
548 | sub verbatim { |
549 | ## Nothing to check |
550 | ## my ($self, $paragraph, $line_num, $pod_para) = @_; |
551 | } |
552 | |
553 | sub textblock { |
554 | my ($self, $paragraph, $line_num, $pod_para) = @_; |
e3237417 |
555 | my ($file, $line) = $pod_para->file_line; |
556 | $self->_interpolate_and_check($paragraph, $line,$file); |
360aca43 |
557 | } |
558 | |
e3237417 |
559 | # set/return nodes of the current POD |
560 | sub node { |
561 | my ($self,$text) = @_; |
562 | if(defined $text) { |
563 | $text =~ s/[\s\n]+$//; # strip trailing whitespace |
564 | # add node |
565 | push(@{$self->{_nodes}}, $text); |
566 | return $text; |
567 | } |
568 | @{$self->{_nodes}}; |
569 | } |
570 | |
571 | # set/return hyperlinks of the current POD |
572 | sub hyperlink { |
573 | my $self = shift; |
574 | if($_[0]) { |
575 | push(@{$self->{_links}}, $_[0]); |
576 | return $_[0]; |
577 | } |
578 | @{$self->{_links}}; |
579 | } |
580 | |
581 | #----------------------------------------------------------------------------- |
582 | # Pod::List |
583 | # |
584 | # class to hold POD list info (=over, =item, =back) |
585 | #----------------------------------------------------------------------------- |
586 | |
587 | package Pod::List; |
588 | |
589 | use Carp; |
590 | |
591 | sub new { |
592 | my $this = shift; |
593 | my $class = ref($this) || $this; |
594 | my %params = @_; |
595 | my $self = {%params}; |
596 | bless $self, $class; |
597 | $self->initialize(); |
598 | return $self; |
599 | } |
600 | |
601 | sub initialize { |
602 | my $self = shift; |
603 | $self->{-file} ||= 'unknown'; |
604 | $self->{-start} ||= 'unknown'; |
605 | $self->{-indent} ||= 4; # perlpod: "should be the default" |
606 | $self->{_items} = []; |
607 | } |
608 | |
609 | # The POD file name the list appears in |
610 | sub file { |
611 | return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; |
612 | } |
613 | |
614 | # The line in the file the node appears |
615 | sub start { |
616 | return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start}; |
617 | } |
618 | |
619 | # indent level |
620 | sub indent { |
621 | return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent}; |
622 | } |
623 | |
624 | # The individual =items of this list |
625 | sub item { |
626 | my ($self,$item) = @_; |
627 | if(defined $item) { |
628 | push(@{$self->{_items}}, $item); |
629 | return $item; |
360aca43 |
630 | } |
631 | else { |
e3237417 |
632 | return @{$self->{_items}}; |
360aca43 |
633 | } |
634 | } |
635 | |
e3237417 |
636 | #----------------------------------------------------------------------------- |
637 | # Pod::Hyperlink |
638 | # |
639 | # class to hold hyperlinks (L<>) |
640 | #----------------------------------------------------------------------------- |
641 | |
642 | package Pod::Hyperlink; |
643 | |
644 | =head1 NAME |
645 | |
646 | Pod::Hyperlink - class for manipulation of POD hyperlinks |
647 | |
648 | =head1 SYNOPSIS |
649 | |
650 | my $link = Pod::Hyperlink->new('alternative text|page/"section in page"'); |
651 | |
652 | =head1 DESCRIPTION |
653 | |
654 | The B<Pod::Hyperlink> class is mainly designed to parse the contents of the |
655 | C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the |
656 | different parts of a POD hyperlink. |
657 | |
658 | =head1 METHODS |
659 | |
660 | =over 4 |
661 | |
662 | =item new() |
663 | |
664 | The B<new()> method can either be passed a set of key/value pairs or a single |
665 | scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object |
666 | of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a |
667 | failure, the error message is stored in C<$@>. |
668 | |
669 | =item parse() |
670 | |
671 | This method can be used to (re)parse a (new) hyperlink. The result is stored |
672 | in the current object. |
673 | |
674 | =item markup($on,$off,$pageon,$pageoff) |
675 | |
676 | The result of this method is a string the represents the textual value of the |
677 | link, but with included arbitrary markers that highlight the active portion |
678 | of the link. This will mainly be used by POD translators and saves the |
679 | effort of determining which words have to be highlighted. Examples: Depending |
680 | on the type of link, the following text will be returned, the C<*> represent |
681 | the places where the section/item specific on/off markers will be placed |
682 | (link to a specific node) and C<+> for the pageon/pageoff markers (link to the |
683 | top of the page). |
684 | |
685 | the +perl+ manpage |
686 | the *$|* entry in the +perlvar+ manpage |
687 | the section on *OPTIONS* in the +perldoc+ manpage |
688 | the section on *DESCRIPTION* elsewhere in this document |
689 | |
690 | This method is read-only. |
691 | |
692 | =item text() |
693 | |
694 | This method returns the textual representation of the hyperlink as above, |
695 | but without markers (read only). |
696 | |
697 | =item warning() |
698 | |
699 | After parsing, this method returns any warnings ecountered during the |
700 | parsing process. |
701 | |
702 | =item page() |
703 | |
704 | This method sets or returns the POD page this link points to. |
705 | |
706 | =item node() |
707 | |
708 | As above, but the destination node text of the link. |
709 | |
710 | =item type() |
711 | |
712 | The node type, either C<section> or C<item>. |
713 | |
714 | =item alttext() |
715 | |
716 | Sets or returns an alternative text specified in the link. |
717 | |
718 | =item line(), file() |
719 | |
720 | Just simple slots for storing information about the line and the file |
721 | the link was incountered in. Has to be filled in manually. |
722 | |
723 | =back |
724 | |
725 | =head1 AUTHOR |
726 | |
727 | Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing |
728 | a lot of things from L<pod2man> and L<pod2roff>. |
729 | |
730 | =cut |
731 | |
732 | use Carp; |
733 | |
734 | sub new { |
735 | my $this = shift; |
736 | my $class = ref($this) || $this; |
737 | my $self = +{}; |
738 | bless $self, $class; |
739 | $self->initialize(); |
740 | if(defined $_[0]) { |
741 | if(ref($_[0])) { |
742 | # called with a list of parameters |
743 | %$self = %{$_[0]}; |
744 | } |
745 | else { |
746 | # called with L<> contents |
747 | return undef unless($self->parse($_[0])); |
748 | } |
749 | } |
750 | return $self; |
751 | } |
752 | |
753 | sub initialize { |
754 | my $self = shift; |
755 | $self->{-line} ||= 'undef'; |
756 | $self->{-file} ||= 'undef'; |
757 | $self->{-page} ||= ''; |
758 | $self->{-node} ||= ''; |
759 | $self->{-alttext} ||= ''; |
760 | $self->{-type} ||= 'undef'; |
761 | $self->{_warnings} = []; |
762 | $self->_construct_text(); |
763 | } |
764 | |
765 | sub parse { |
766 | my $self = shift; |
767 | local($_) = $_[0]; |
768 | # syntax check the link and extract destination |
769 | my ($alttext,$page,$section,$item) = ('','','',''); |
770 | |
771 | # strip leading/trailing whitespace |
772 | if(s/^[\s\n]+//) { |
773 | $self->warning("ignoring leading whitespace in link"); |
774 | } |
775 | if(s/[\s\n]+$//) { |
776 | $self->warning("ignoring trailing whitespace in link"); |
777 | } |
778 | |
779 | # collapse newlines with whitespace |
780 | s/\s*\n\s*/ /g; |
781 | |
782 | # extract alternative text |
783 | if(s!^([^|/"\n]*)[|]!!) { |
784 | $alttext = $1; |
785 | } |
786 | # extract page |
787 | if(s!^([^|/"\s]*)(?=/|$)!!) { |
788 | $page = $1; |
789 | } |
790 | # extract section |
791 | if(s!^/?"([^"\n]+)"$!!) { # e.g. L</"blah blah"> |
792 | $section = $1; |
793 | } |
794 | # extact item |
795 | if(s!^/(.*)$!!) { |
796 | $item = $1; |
797 | } |
798 | # last chance here |
799 | if(s!^([^|"\s\n/][^"\n/]*)$!!) { # e.g. L<lah di dah> |
800 | $section = $1; |
801 | } |
802 | # now there should be nothing left |
803 | if(length) { |
804 | _invalid_link("garbled entry (spurious characters `$_')"); |
805 | return undef; |
806 | } |
807 | elsif(!(length($page) || length($section) || length($item))) { |
808 | _invalid_link("empty link"); |
809 | return undef; |
810 | } |
811 | elsif($alttext =~ /[<>]/) { |
812 | _invalid_link("alternative text contains < or >"); |
813 | return undef; |
814 | } |
815 | else { # no errors so far |
816 | if($page =~ /[(]\d\w*[)]$/) { |
817 | $self->warning("brackets in `$page'"); |
818 | $page = $`; # strip that extension |
819 | } |
820 | if($page =~ /^(\s*)(\S+)(\s*)/ && (length($1) || length($3))) { |
821 | $self->warning("whitespace in `$page'"); |
822 | $page = $2; # strip that extension |
823 | } |
824 | } |
825 | $self->page($page); |
826 | $self->node($section || $item); # _TODO_ do not distinguish for now |
827 | $self->alttext($alttext); |
828 | $self->type($item ? 'item' : 'section'); |
829 | 1; |
830 | } |
831 | |
832 | sub _construct_text { |
833 | my $self = shift; |
834 | my $alttext = $self->alttext(); |
835 | my $type = $self->type(); |
836 | my $section = $self->node(); |
837 | my $page = $self->page(); |
838 | $self->{_text} = |
839 | $alttext ? $alttext : ( |
840 | !$section ? '' : |
841 | $type eq 'item' ? 'the ' . $section . ' entry' : |
842 | 'the section on ' . $section ) . |
843 | ($page ? ($section ? ' in ':''). 'the ' . $page . ' manpage' : |
844 | 'elsewhere in this document'); |
845 | # for being marked up later |
846 | $self->{_markup} = |
847 | $alttext ? '<SECTON>' . $alttext . '<SECTOFF>' : ( |
848 | !$section ? '' : |
849 | $type eq 'item' ? 'the <SECTON>' . $section . '<SECTOFF> entry' : |
850 | 'the section on <SECTON>' . $section . '<SECTOFF>' ) . |
851 | ($page ? ($section ? ' in ':'') . 'the <PAGEON>' . |
852 | $page . '<PAGEOFF> manpage' : |
853 | ' elsewhere in this document'); |
854 | } |
855 | |
856 | # include markup |
857 | sub markup { |
858 | my ($self,$on,$off,$pageon,$pageoff) = @_; |
859 | $on ||= ''; |
860 | $off ||= ''; |
861 | $pageon ||= ''; |
862 | $pageoff ||= ''; |
863 | $_[0]->_construct_text; |
864 | my $str = $self->{_markup}; |
865 | $str =~ s/<SECTON>/$on/; |
866 | $str =~ s/<SECTOFF>/$off/; |
867 | $str =~ s/<PAGEON>/$pageon/; |
868 | $str =~ s/<PAGEOFF>/$pageoff/; |
869 | return $str; |
870 | } |
871 | |
872 | # The complete link's text |
873 | sub text { |
874 | $_[0]->_construct_text(); |
875 | $_[0]->{_text}; |
876 | } |
877 | |
878 | # The POD page the link appears on |
879 | sub warning { |
880 | my $self = shift; |
881 | if(@_) { |
882 | push(@{$self->{_warnings}}, @_); |
883 | return @_; |
884 | } |
885 | return @{$self->{_warnings}}; |
886 | } |
887 | |
888 | # The POD file name the link appears in |
889 | sub file { |
890 | return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; |
891 | } |
892 | |
893 | # The line in the file the link appears |
894 | sub line { |
895 | return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line}; |
896 | } |
897 | |
898 | # The POD page the link appears on |
899 | sub page { |
900 | return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page}; |
901 | } |
902 | |
903 | # The link destination |
904 | sub node { |
905 | return (@_ > 1) ? ($_[0]->{-node} = $_[1]) : $_[0]->{-node}; |
906 | } |
907 | |
908 | # Potential alternative text |
909 | sub alttext { |
910 | return (@_ > 1) ? ($_[0]->{-alttext} = $_[1]) : $_[0]->{-alttext}; |
911 | } |
912 | |
913 | # The type |
914 | sub type { |
915 | return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; |
916 | } |
917 | |
918 | sub _invalid_link { |
919 | my ($msg) = @_; |
920 | # this sets @_ |
921 | #eval { die "$msg\n" }; |
922 | #chomp $@; |
923 | $@ = $msg; # this seems to work, too! |
924 | undef; |
925 | } |
926 | |
927 | 1; |