Re: Copious warnings from Sys::Syslog
[p5sagit/p5-mst-13.2.git] / lib / Pod / Select.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Select.pm -- function to select portions of POD docs
3#
66aff6dd 4# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved.
360aca43 5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Select;
11
12use vars qw($VERSION);
92e3d63a 13$VERSION = 1.13; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
16#############################################################################
17
18=head1 NAME
19
20Pod::Select, podselect() - extract selected sections of POD from input
21
22=head1 SYNOPSIS
23
24 use Pod::Select;
25
26 ## Select all the POD sections for each file in @filelist
27 ## and print the result on standard output.
28 podselect(@filelist);
29
30 ## Same as above, but write to tmp.out
31 podselect({-output => "tmp.out"}, @filelist):
32
33 ## Select from the given filelist, only those POD sections that are
34 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
35 podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist):
36
37 ## Select the "DESCRIPTION" section of the PODs from STDIN and write
38 ## the result to STDERR.
39 podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN);
40
41or
42
43 use Pod::Select;
44
45 ## Create a parser object for selecting POD sections from the input
46 $parser = new Pod::Select();
47
48 ## Select all the POD sections for each file in @filelist
49 ## and print the result to tmp.out.
50 $parser->parse_from_file("<&STDIN", "tmp.out");
51
52 ## Select from the given filelist, only those POD sections that are
53 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
54 $parser->select("NAME|SYNOPSIS", "OPTIONS");
55 for (@filelist) { $parser->parse_from_file($_); }
56
57 ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from
58 ## STDIN and write the result to STDERR.
59 $parser->select("DESCRIPTION");
60 $parser->add_selection("SEE ALSO");
61 $parser->parse_from_filehandle(\*STDIN, \*STDERR);
62
63=head1 REQUIRES
64
828c4421 65perl5.005, Pod::Parser, Exporter, Carp
360aca43 66
67=head1 EXPORTS
68
69podselect()
70
71=head1 DESCRIPTION
72
73B<podselect()> is a function which will extract specified sections of
74pod documentation from an input stream. This ability is provided by the
75B<Pod::Select> module which is a subclass of B<Pod::Parser>.
76B<Pod::Select> provides a method named B<select()> to specify the set of
77POD sections to select for processing/printing. B<podselect()> merely
78creates a B<Pod::Select> object and then invokes the B<podselect()>
79followed by B<parse_from_file()>.
80
81=head1 SECTION SPECIFICATIONS
82
83B<podselect()> and B<Pod::Select::select()> may be given one or more
84"section specifications" to restrict the text processed to only the
85desired set of sections and their corresponding subsections. A section
86specification is a string containing one or more Perl-style regular
87expressions separated by forward slashes ("/"). If you need to use a
88forward slash literally within a section title you can escape it with a
89backslash ("\/").
90
91The formal syntax of a section specification is:
92
93=over 4
94
92e3d63a 95=item *
360aca43 96
97I<head1-title-regex>/I<head2-title-regex>/...
98
99=back
100
101Any omitted or empty regular expressions will default to ".*".
102Please note that each regular expression given is implicitly
103anchored by adding "^" and "$" to the beginning and end. Also, if a
104given regular expression starts with a "!" character, then the
105expression is I<negated> (so C<!foo> would match anything I<except>
106C<foo>).
107
108Some example section specifications follow.
109
110=over 4
111
551e1d92 112=item *
113
360aca43 114Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections:
115
116C<NAME|SYNOPSIS>
117
551e1d92 118=item *
119
360aca43 120Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION>
121section:
122
123C<DESCRIPTION/Question|Answer>
124
551e1d92 125=item *
126
360aca43 127Match the C<Comments> subsection of I<all> sections:
128
129C</Comments>
130
551e1d92 131=item *
132
360aca43 133Match all subsections of C<DESCRIPTION> I<except> for C<Comments>:
134
135C<DESCRIPTION/!Comments>
136
551e1d92 137=item *
138
360aca43 139Match the C<DESCRIPTION> section but do I<not> match any of its subsections:
140
141C<DESCRIPTION/!.+>
142
551e1d92 143=item *
144
360aca43 145Match all top level sections but none of their subsections:
146
147C</!.+>
148
149=back
150
151=begin _NOT_IMPLEMENTED_
152
153=head1 RANGE SPECIFICATIONS
154
155B<podselect()> and B<Pod::Select::select()> may be given one or more
156"range specifications" to restrict the text processed to only the
157desired ranges of paragraphs in the desired set of sections. A range
158specification is a string containing a single Perl-style regular
159expression (a regex), or else two Perl-style regular expressions
160(regexs) separated by a ".." (Perl's "range" operator is "..").
161The regexs in a range specification are delimited by forward slashes
162("/"). If you need to use a forward slash literally within a regex you
163can escape it with a backslash ("\/").
164
165The formal syntax of a range specification is:
166
167=over 4
168
92e3d63a 169=item *
360aca43 170
171/I<start-range-regex>/[../I<end-range-regex>/]
172
173=back
174
175Where each the item inside square brackets (the ".." followed by the
176end-range-regex) is optional. Each "range-regex" is of the form:
177
178 =cmd-expr text-expr
179
180Where I<cmd-expr> is intended to match the name of one or more POD
181commands, and I<text-expr> is intended to match the paragraph text for
182the command. If a range-regex is supposed to match a POD command, then
183the first character of the regex (the one after the initial '/')
d1be9408 184absolutely I<must> be a single '=' character; it may not be anything
360aca43 185else (not even a regex meta-character) if it is supposed to match
186against the name of a POD command.
187
188If no I<=cmd-expr> is given then the text-expr will be matched against
189plain textblocks unless it is preceded by a space, in which case it is
190matched against verbatim text-blocks. If no I<text-expr> is given then
191only the command-portion of the paragraph is matched against.
192
193Note that these two expressions are each implicitly anchored. This
194means that when matching against the command-name, there will be an
195implicit '^' and '$' around the given I<=cmd-expr>; and when matching
196against the paragraph text there will be an implicit '\A' and '\Z'
197around the given I<text-expr>.
198
199Unlike with section-specs, the '!' character does I<not> have any special
200meaning (negation or otherwise) at the beginning of a range-spec!
201
202Some example range specifications follow.
203
204=over 4
205
206=item
207Match all C<=for html> paragraphs:
208
209C</=for html/>
210
211=item
212Match all paragraphs between C<=begin html> and C<=end html>
213(note that this will I<not> work correctly if such sections
214are nested):
215
216C</=begin html/../=end html/>
217
218=item
219Match all paragraphs between the given C<=item> name until the end of the
220current section:
221
222C</=item mine/../=head\d/>
223
224=item
225Match all paragraphs between the given C<=item> until the next item, or
226until the end of the itemized list (note that this will I<not> work as
227desired if the item contains an itemized list nested within it):
228
229C</=item mine/../=(item|back)/>
230
231=back
232
233=end _NOT_IMPLEMENTED_
234
235=cut
236
237#############################################################################
238
239use strict;
240#use diagnostics;
241use Carp;
242use Pod::Parser 1.04;
243use vars qw(@ISA @EXPORT $MAX_HEADING_LEVEL);
244
245@ISA = qw(Pod::Parser);
246@EXPORT = qw(&podselect);
247
248## Maximum number of heading levels supported for '=headN' directives
249*MAX_HEADING_LEVEL = \3;
250
251#############################################################################
252
253=head1 OBJECT METHODS
254
255The following methods are provided in this module. Each one takes a
256reference to the object itself as an implicit first parameter.
257
258=cut
259
260##---------------------------------------------------------------------------
261
262## =begin _PRIVATE_
263##
264## =head1 B<_init_headings()>
265##
266## Initialize the current set of active section headings.
267##
268## =cut
269##
270## =end _PRIVATE_
271
272use vars qw(%myData @section_headings);
273
274sub _init_headings {
275 my $self = shift;
276 local *myData = $self;
277
278 ## Initialize current section heading titles if necessary
279 unless (defined $myData{_SECTION_HEADINGS}) {
280 local *section_headings = $myData{_SECTION_HEADINGS} = [];
281 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
282 $section_headings[$i] = '';
283 }
284 }
285}
286
287##---------------------------------------------------------------------------
288
289=head1 B<curr_headings()>
290
291 ($head1, $head2, $head3, ...) = $parser->curr_headings();
292 $head1 = $parser->curr_headings(1);
293
294This method returns a list of the currently active section headings and
295subheadings in the document being parsed. The list of headings returned
296corresponds to the most recently parsed paragraph of the input.
297
298If an argument is given, it must correspond to the desired section
299heading number, in which case only the specified section heading is
300returned. If there is no current section heading at the specified
301level, then C<undef> is returned.
302
303=cut
304
305sub curr_headings {
306 my $self = shift;
307 $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS});
308 my @headings = @{ $self->{_SECTION_HEADINGS} };
309 return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings;
310}
311
312##---------------------------------------------------------------------------
313
314=head1 B<select()>
315
316 $parser->select($section_spec1,$section_spec2,...);
317
318This method is used to select the particular sections and subsections of
319POD documentation that are to be printed and/or processed. The existing
320set of selected sections is I<replaced> with the given set of sections.
321See B<add_selection()> for adding to the current set of selected
322sections.
323
324Each of the C<$section_spec> arguments should be a section specification
325as described in L<"SECTION SPECIFICATIONS">. The section specifications
326are parsed by this method and the resulting regular expressions are
327stored in the invoking object.
328
329If no C<$section_spec> arguments are given, then the existing set of
330selected sections is cleared out (which means C<all> sections will be
331processed).
332
333This method should I<not> normally be overridden by subclasses.
334
335=cut
336
337use vars qw(@selected_sections);
338
339sub select {
340 my $self = shift;
341 my @sections = @_;
342 local *myData = $self;
343 local $_;
344
345### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?)
346
347 ##---------------------------------------------------------------------
348 ## The following is a blatant hack for backward compatibility, and for
349 ## implementing add_selection(). If the *first* *argument* is the
350 ## string "+", then the remaining section specifications are *added*
351 ## to the current set of selections; otherwise the given section
352 ## specifications will *replace* the current set of selections.
353 ##
354 ## This should probably be fixed someday, but for the present time,
355 ## it seems incredibly unlikely that "+" would ever correspond to
356 ## a legitimate section heading
357 ##---------------------------------------------------------------------
358 my $add = ($sections[0] eq "+") ? shift(@sections) : "";
359
360 ## Reset the set of sections to use
361 unless (@sections > 0) {
362 delete $myData{_SELECTED_SECTIONS} unless ($add);
363 return;
364 }
365 $myData{_SELECTED_SECTIONS} = []
366 unless ($add && exists $myData{_SELECTED_SECTIONS});
367 local *selected_sections = $myData{_SELECTED_SECTIONS};
368
369 ## Compile each spec
370 my $spec;
371 for $spec (@sections) {
372 if ( defined($_ = &_compile_section_spec($spec)) ) {
373 ## Store them in our sections array
374 push(@selected_sections, $_);
375 }
376 else {
377 carp "Ignoring section spec \"$spec\"!\n";
378 }
379 }
380}
381
382##---------------------------------------------------------------------------
383
384=head1 B<add_selection()>
385
386 $parser->add_selection($section_spec1,$section_spec2,...);
387
388This method is used to add to the currently selected sections and
389subsections of POD documentation that are to be printed and/or
390processed. See <select()> for replacing the currently selected sections.
391
392Each of the C<$section_spec> arguments should be a section specification
393as described in L<"SECTION SPECIFICATIONS">. The section specifications
394are parsed by this method and the resulting regular expressions are
395stored in the invoking object.
396
397This method should I<not> normally be overridden by subclasses.
398
399=cut
400
401sub add_selection {
402 my $self = shift;
403 $self->select("+", @_);
404}
405
406##---------------------------------------------------------------------------
407
408=head1 B<clear_selections()>
409
410 $parser->clear_selections();
411
412This method takes no arguments, it has the exact same effect as invoking
413<select()> with no arguments.
414
415=cut
416
417sub clear_selections {
418 my $self = shift;
419 $self->select();
420}
421
422##---------------------------------------------------------------------------
423
424=head1 B<match_section()>
425
426 $boolean = $parser->match_section($heading1,$heading2,...);
427
428Returns a value of true if the given section and subsection heading
429titles match any of the currently selected section specifications in
430effect from prior calls to B<select()> and B<add_selection()> (or if
431there are no explictly selected/deselected sections).
432
433The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of
434the corresponding sections, subsections, etc. to try and match. If
435C<$headingN> is omitted then it defaults to the current corresponding
436section heading title in the input.
437
438This method should I<not> normally be overridden by subclasses.
439
440=cut
441
442sub match_section {
443 my $self = shift;
444 my (@headings) = @_;
445 local *myData = $self;
446
447 ## Return true if no restrictions were explicitly specified
448 my $selections = (exists $myData{_SELECTED_SECTIONS})
449 ? $myData{_SELECTED_SECTIONS} : undef;
450 return 1 unless ((defined $selections) && (@{$selections} > 0));
451
452 ## Default any unspecified sections to the current one
453 my @current_headings = $self->curr_headings();
454 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
455 (defined $headings[$i]) or $headings[$i] = $current_headings[$i];
456 }
457
458 ## Look for a match against the specified section expressions
459 my ($section_spec, $regex, $negated, $match);
460 for $section_spec ( @{$selections} ) {
461 ##------------------------------------------------------
462 ## Each portion of this spec must match in order for
463 ## the spec to be matched. So we will start with a
464 ## match-value of 'true' and logically 'and' it with
465 ## the results of matching a given element of the spec.
466 ##------------------------------------------------------
467 $match = 1;
468 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
469 $regex = $section_spec->[$i];
470 $negated = ($regex =~ s/^\!//);
471 $match &= ($negated ? ($headings[$i] !~ /${regex}/)
472 : ($headings[$i] =~ /${regex}/));
473 last unless ($match);
474 }
475 return 1 if ($match);
476 }
477 return 0; ## no match
478}
479
480##---------------------------------------------------------------------------
481
482=head1 B<is_selected()>
483
484 $boolean = $parser->is_selected($paragraph);
485
486This method is used to determine if the block of text given in
487C<$paragraph> falls within the currently selected set of POD sections
488and subsections to be printed or processed. This method is also
489responsible for keeping track of the current input section and
490subsections. It is assumed that C<$paragraph> is the most recently read
491(but not yet processed) input paragraph.
492
493The value returned will be true if the C<$paragraph> and the rest of the
494text in the same section as C<$paragraph> should be selected (included)
495for processing; otherwise a false value is returned.
496
497=cut
498
499sub is_selected {
500 my ($self, $paragraph) = @_;
501 local $_;
502 local *myData = $self;
503
504 $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS});
505
506 ## Keep track of current sections levels and headings
507 $_ = $paragraph;
508 if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*)\s*$/) {
509 ## This is a section heading command
510 my ($level, $heading) = ($2, $3);
511 $level = 1 + (length($1) / 3) if ((! length $level) || (length $1));
512 ## Reset the current section heading at this level
513 $myData{_SECTION_HEADINGS}->[$level - 1] = $heading;
514 ## Reset subsection headings of this one to empty
515 for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) {
516 $myData{_SECTION_HEADINGS}->[$i] = '';
517 }
518 }
519
520 return $self->match_section();
521}
522
523#############################################################################
524
525=head1 EXPORTED FUNCTIONS
526
527The following functions are exported by this module. Please note that
528these are functions (not methods) and therefore C<do not> take an
529implicit first argument.
530
531=cut
532
533##---------------------------------------------------------------------------
534
535=head1 B<podselect()>
536
537 podselect(\%options,@filelist);
538
539B<podselect> will print the raw (untranslated) POD paragraphs of all
540POD sections in the given input files specified by C<@filelist>
541according to the given options.
542
543If any argument to B<podselect> is a reference to a hash
544(associative array) then the values with the following keys are
545processed as follows:
546
547=over 4
548
549=item B<-output>
550
551A string corresponding to the desired output file (or ">&STDOUT"
552or ">&STDERR"). The default is to use standard output.
553
554=item B<-sections>
555
556A reference to an array of sections specifications (as described in
557L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD
558sections and subsections to be selected from input. If no section
559specifications are given, then all sections of the PODs are used.
560
561=begin _NOT_IMPLEMENTED_
562
563=item B<-ranges>
564
565A reference to an array of range specifications (as described in
566L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD
567paragraphs to be selected from the desired input sections. If no range
568specifications are given, then all paragraphs of the desired sections
569are used.
570
571=end _NOT_IMPLEMENTED_
572
573=back
574
575All other arguments should correspond to the names of input files
576containing POD sections. A file name of "-" or "<&STDIN" will
577be interpeted to mean standard input (which is the default if no
578filenames are given).
579
580=cut
581
582sub podselect {
583 my(@argv) = @_;
584 my %defaults = ();
585 my $pod_parser = new Pod::Select(%defaults);
586 my $num_inputs = 0;
587 my $output = ">&STDOUT";
588 my %opts = ();
589 local $_;
590 for (@argv) {
591 if (ref($_)) {
592 next unless (ref($_) eq 'HASH');
593 %opts = (%defaults, %{$_});
594
595 ##-------------------------------------------------------------
596 ## Need this for backward compatibility since we formerly used
597 ## options that were all uppercase words rather than ones that
598 ## looked like Unix command-line options.
599 ## to be uppercase keywords)
600 ##-------------------------------------------------------------
601 %opts = map {
602 my ($key, $val) = (lc $_, $opts{$_});
603 $key =~ s/^(?=\w)/-/;
604 $key =~ /^-se[cl]/ and $key = '-sections';
605 #! $key eq '-range' and $key .= 's';
606 ($key => $val);
607 } (keys %opts);
608
609 ## Process the options
610 (exists $opts{'-output'}) and $output = $opts{'-output'};
611
612 ## Select the desired sections
613 $pod_parser->select(@{ $opts{'-sections'} })
614 if ( (defined $opts{'-sections'})
615 && ((ref $opts{'-sections'}) eq 'ARRAY') );
616
617 #! ## Select the desired paragraph ranges
618 #! $pod_parser->select(@{ $opts{'-ranges'} })
619 #! if ( (defined $opts{'-ranges'})
620 #! && ((ref $opts{'-ranges'}) eq 'ARRAY') );
621 }
622 else {
623 $pod_parser->parse_from_file($_, $output);
624 ++$num_inputs;
625 }
626 }
627 $pod_parser->parse_from_file("-") unless ($num_inputs > 0);
628}
629
630#############################################################################
631
632=head1 PRIVATE METHODS AND DATA
633
634B<Pod::Select> makes uses a number of internal methods and data fields
635which clients should not need to see or use. For the sake of avoiding
636name collisions with client data and methods, these methods and fields
637are briefly discussed here. Determined hackers may obtain further
638information about them by reading the B<Pod::Select> source code.
639
640Private data fields are stored in the hash-object whose reference is
641returned by the B<new()> constructor for this class. The names of all
642private methods and data-fields used by B<Pod::Select> begin with a
643prefix of "_" and match the regular expression C</^_\w+$/>.
644
645=cut
646
647##---------------------------------------------------------------------------
648
649=begin _PRIVATE_
650
651=head1 B<_compile_section_spec()>
652
653 $listref = $parser->_compile_section_spec($section_spec);
654
655This function (note it is a function and I<not> a method) takes a
656section specification (as described in L<"SECTION SPECIFICATIONS">)
657given in C<$section_sepc>, and compiles it into a list of regular
658expressions. If C<$section_spec> has no syntax errors, then a reference
659to the list (array) of corresponding regular expressions is returned;
660otherwise C<undef> is returned and an error message is printed (using
661B<carp>) for each invalid regex.
662
663=end _PRIVATE_
664
665=cut
666
667sub _compile_section_spec {
668 my ($section_spec) = @_;
669 my (@regexs, $negated);
670
671 ## Compile the spec into a list of regexs
672 local $_ = $section_spec;
673 s|\\\\|\001|g; ## handle escaped backward slashes
674 s|\\/|\002|g; ## handle escaped forward slashes
675
676 ## Parse the regexs for the heading titles
677 @regexs = split('/', $_, $MAX_HEADING_LEVEL);
678
679 ## Set default regex for ommitted levels
680 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
681 $regexs[$i] = '.*' unless ((defined $regexs[$i])
682 && (length $regexs[$i]));
683 }
684 ## Modify the regexs as needed and validate their syntax
685 my $bad_regexs = 0;
686 for (@regexs) {
687 $_ .= '.+' if ($_ eq '!');
688 s|\001|\\\\|g; ## restore escaped backward slashes
689 s|\002|\\/|g; ## restore escaped forward slashes
690 $negated = s/^\!//; ## check for negation
691 eval "/$_/"; ## check regex syntax
692 if ($@) {
693 ++$bad_regexs;
694 carp "Bad regular expression /$_/ in \"$section_spec\": $@\n";
695 }
696 else {
697 ## Add the forward and rear anchors (and put the negator back)
698 $_ = '^' . $_ unless (/^\^/);
699 $_ = $_ . '$' unless (/\$$/);
700 $_ = '!' . $_ if ($negated);
701 }
702 }
703 return (! $bad_regexs) ? [ @regexs ] : undef;
704}
705
706##---------------------------------------------------------------------------
707
708=begin _PRIVATE_
709
710=head2 $self->{_SECTION_HEADINGS}
711
712A reference to an array of the current section heading titles for each
713heading level (note that the first heading level title is at index 0).
714
715=end _PRIVATE_
716
717=cut
718
719##---------------------------------------------------------------------------
720
721=begin _PRIVATE_
722
723=head2 $self->{_SELECTED_SECTIONS}
724
725A reference to an array of references to arrays. Each subarray is a list
726of anchored regular expressions (preceded by a "!" if the expression is to
727be negated). The index of the expression in the subarray should correspond
728to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}>
729that it is to be matched against.
730
731=end _PRIVATE_
732
733=cut
734
735#############################################################################
736
737=head1 SEE ALSO
738
739L<Pod::Parser>
740
741=head1 AUTHOR
742
743Brad Appleton E<lt>bradapp@enteract.comE<gt>
744
745Based on code for B<pod2text> written by
746Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
747
748=cut
749
7501;
751