major pod update from Tom Christiansen
[p5sagit/p5-mst-13.2.git] / lib / Pod / Select.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/Select.pm -- function to select portions of POD docs
3#
4# Based on Tom Christiansen's pod2text() function
5# (with extensive modifications).
6#
7# Copyright (C) 1996-1999 Tom Christiansen. All rights reserved.
8# This file is part of "PodParser". PodParser is free software;
9# you can redistribute it and/or modify it under the same terms
10# as Perl itself.
11#############################################################################
12
13package Pod::Select;
14
15use vars qw($VERSION);
e9fdc7d2 16$VERSION = 1.081; ## Current version of this package
360aca43 17require 5.004; ## requires this Perl version or later
18
19#############################################################################
20
21=head1 NAME
22
23Pod::Select, podselect() - extract selected sections of POD from input
24
25=head1 SYNOPSIS
26
27 use Pod::Select;
28
29 ## Select all the POD sections for each file in @filelist
30 ## and print the result on standard output.
31 podselect(@filelist);
32
33 ## Same as above, but write to tmp.out
34 podselect({-output => "tmp.out"}, @filelist):
35
36 ## Select from the given filelist, only those POD sections that are
37 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
38 podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist):
39
40 ## Select the "DESCRIPTION" section of the PODs from STDIN and write
41 ## the result to STDERR.
42 podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN);
43
44or
45
46 use Pod::Select;
47
48 ## Create a parser object for selecting POD sections from the input
49 $parser = new Pod::Select();
50
51 ## Select all the POD sections for each file in @filelist
52 ## and print the result to tmp.out.
53 $parser->parse_from_file("<&STDIN", "tmp.out");
54
55 ## Select from the given filelist, only those POD sections that are
56 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
57 $parser->select("NAME|SYNOPSIS", "OPTIONS");
58 for (@filelist) { $parser->parse_from_file($_); }
59
60 ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from
61 ## STDIN and write the result to STDERR.
62 $parser->select("DESCRIPTION");
63 $parser->add_selection("SEE ALSO");
64 $parser->parse_from_filehandle(\*STDIN, \*STDERR);
65
66=head1 REQUIRES
67
68perl5.004, Pod::Parser, Exporter, FileHandle, Carp
69
70=head1 EXPORTS
71
72podselect()
73
74=head1 DESCRIPTION
75
76B<podselect()> is a function which will extract specified sections of
77pod documentation from an input stream. This ability is provided by the
78B<Pod::Select> module which is a subclass of B<Pod::Parser>.
79B<Pod::Select> provides a method named B<select()> to specify the set of
80POD sections to select for processing/printing. B<podselect()> merely
81creates a B<Pod::Select> object and then invokes the B<podselect()>
82followed by B<parse_from_file()>.
83
84=head1 SECTION SPECIFICATIONS
85
86B<podselect()> and B<Pod::Select::select()> may be given one or more
87"section specifications" to restrict the text processed to only the
88desired set of sections and their corresponding subsections. A section
89specification is a string containing one or more Perl-style regular
90expressions separated by forward slashes ("/"). If you need to use a
91forward slash literally within a section title you can escape it with a
92backslash ("\/").
93
94The formal syntax of a section specification is:
95
96=over 4
97
98=item
99
100I<head1-title-regex>/I<head2-title-regex>/...
101
102=back
103
104Any omitted or empty regular expressions will default to ".*".
105Please note that each regular expression given is implicitly
106anchored by adding "^" and "$" to the beginning and end. Also, if a
107given regular expression starts with a "!" character, then the
108expression is I<negated> (so C<!foo> would match anything I<except>
109C<foo>).
110
111Some example section specifications follow.
112
113=over 4
114
115=item
116Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections:
117
118C<NAME|SYNOPSIS>
119
120=item
121Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION>
122section:
123
124C<DESCRIPTION/Question|Answer>
125
126=item
127Match the C<Comments> subsection of I<all> sections:
128
129C</Comments>
130
131=item
132Match all subsections of C<DESCRIPTION> I<except> for C<Comments>:
133
134C<DESCRIPTION/!Comments>
135
136=item
137Match the C<DESCRIPTION> section but do I<not> match any of its subsections:
138
139C<DESCRIPTION/!.+>
140
141=item
142Match all top level sections but none of their subsections:
143
144C</!.+>
145
146=back
147
148=begin _NOT_IMPLEMENTED_
149
150=head1 RANGE SPECIFICATIONS
151
152B<podselect()> and B<Pod::Select::select()> may be given one or more
153"range specifications" to restrict the text processed to only the
154desired ranges of paragraphs in the desired set of sections. A range
155specification is a string containing a single Perl-style regular
156expression (a regex), or else two Perl-style regular expressions
157(regexs) separated by a ".." (Perl's "range" operator is "..").
158The regexs in a range specification are delimited by forward slashes
159("/"). If you need to use a forward slash literally within a regex you
160can escape it with a backslash ("\/").
161
162The formal syntax of a range specification is:
163
164=over 4
165
166=item
167
168/I<start-range-regex>/[../I<end-range-regex>/]
169
170=back
171
172Where each the item inside square brackets (the ".." followed by the
173end-range-regex) is optional. Each "range-regex" is of the form:
174
175 =cmd-expr text-expr
176
177Where I<cmd-expr> is intended to match the name of one or more POD
178commands, and I<text-expr> is intended to match the paragraph text for
179the command. If a range-regex is supposed to match a POD command, then
180the first character of the regex (the one after the initial '/')
181absolutely I<must> be an single '=' character; it may not be anything
182else (not even a regex meta-character) if it is supposed to match
183against the name of a POD command.
184
185If no I<=cmd-expr> is given then the text-expr will be matched against
186plain textblocks unless it is preceded by a space, in which case it is
187matched against verbatim text-blocks. If no I<text-expr> is given then
188only the command-portion of the paragraph is matched against.
189
190Note that these two expressions are each implicitly anchored. This
191means that when matching against the command-name, there will be an
192implicit '^' and '$' around the given I<=cmd-expr>; and when matching
193against the paragraph text there will be an implicit '\A' and '\Z'
194around the given I<text-expr>.
195
196Unlike with section-specs, the '!' character does I<not> have any special
197meaning (negation or otherwise) at the beginning of a range-spec!
198
199Some example range specifications follow.
200
201=over 4
202
203=item
204Match all C<=for html> paragraphs:
205
206C</=for html/>
207
208=item
209Match all paragraphs between C<=begin html> and C<=end html>
210(note that this will I<not> work correctly if such sections
211are nested):
212
213C</=begin html/../=end html/>
214
215=item
216Match all paragraphs between the given C<=item> name until the end of the
217current section:
218
219C</=item mine/../=head\d/>
220
221=item
222Match all paragraphs between the given C<=item> until the next item, or
223until the end of the itemized list (note that this will I<not> work as
224desired if the item contains an itemized list nested within it):
225
226C</=item mine/../=(item|back)/>
227
228=back
229
230=end _NOT_IMPLEMENTED_
231
232=cut
233
234#############################################################################
235
236use strict;
237#use diagnostics;
238use Carp;
239use Pod::Parser 1.04;
240use vars qw(@ISA @EXPORT $MAX_HEADING_LEVEL);
241
242@ISA = qw(Pod::Parser);
243@EXPORT = qw(&podselect);
244
245## Maximum number of heading levels supported for '=headN' directives
246*MAX_HEADING_LEVEL = \3;
247
248#############################################################################
249
250=head1 OBJECT METHODS
251
252The following methods are provided in this module. Each one takes a
253reference to the object itself as an implicit first parameter.
254
255=cut
256
257##---------------------------------------------------------------------------
258
259## =begin _PRIVATE_
260##
261## =head1 B<_init_headings()>
262##
263## Initialize the current set of active section headings.
264##
265## =cut
266##
267## =end _PRIVATE_
268
269use vars qw(%myData @section_headings);
270
271sub _init_headings {
272 my $self = shift;
273 local *myData = $self;
274
275 ## Initialize current section heading titles if necessary
276 unless (defined $myData{_SECTION_HEADINGS}) {
277 local *section_headings = $myData{_SECTION_HEADINGS} = [];
278 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
279 $section_headings[$i] = '';
280 }
281 }
282}
283
284##---------------------------------------------------------------------------
285
286=head1 B<curr_headings()>
287
288 ($head1, $head2, $head3, ...) = $parser->curr_headings();
289 $head1 = $parser->curr_headings(1);
290
291This method returns a list of the currently active section headings and
292subheadings in the document being parsed. The list of headings returned
293corresponds to the most recently parsed paragraph of the input.
294
295If an argument is given, it must correspond to the desired section
296heading number, in which case only the specified section heading is
297returned. If there is no current section heading at the specified
298level, then C<undef> is returned.
299
300=cut
301
302sub curr_headings {
303 my $self = shift;
304 $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS});
305 my @headings = @{ $self->{_SECTION_HEADINGS} };
306 return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings;
307}
308
309##---------------------------------------------------------------------------
310
311=head1 B<select()>
312
313 $parser->select($section_spec1,$section_spec2,...);
314
315This method is used to select the particular sections and subsections of
316POD documentation that are to be printed and/or processed. The existing
317set of selected sections is I<replaced> with the given set of sections.
318See B<add_selection()> for adding to the current set of selected
319sections.
320
321Each of the C<$section_spec> arguments should be a section specification
322as described in L<"SECTION SPECIFICATIONS">. The section specifications
323are parsed by this method and the resulting regular expressions are
324stored in the invoking object.
325
326If no C<$section_spec> arguments are given, then the existing set of
327selected sections is cleared out (which means C<all> sections will be
328processed).
329
330This method should I<not> normally be overridden by subclasses.
331
332=cut
333
334use vars qw(@selected_sections);
335
336sub select {
337 my $self = shift;
338 my @sections = @_;
339 local *myData = $self;
340 local $_;
341
342### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?)
343
344 ##---------------------------------------------------------------------
345 ## The following is a blatant hack for backward compatibility, and for
346 ## implementing add_selection(). If the *first* *argument* is the
347 ## string "+", then the remaining section specifications are *added*
348 ## to the current set of selections; otherwise the given section
349 ## specifications will *replace* the current set of selections.
350 ##
351 ## This should probably be fixed someday, but for the present time,
352 ## it seems incredibly unlikely that "+" would ever correspond to
353 ## a legitimate section heading
354 ##---------------------------------------------------------------------
355 my $add = ($sections[0] eq "+") ? shift(@sections) : "";
356
357 ## Reset the set of sections to use
358 unless (@sections > 0) {
359 delete $myData{_SELECTED_SECTIONS} unless ($add);
360 return;
361 }
362 $myData{_SELECTED_SECTIONS} = []
363 unless ($add && exists $myData{_SELECTED_SECTIONS});
364 local *selected_sections = $myData{_SELECTED_SECTIONS};
365
366 ## Compile each spec
367 my $spec;
368 for $spec (@sections) {
369 if ( defined($_ = &_compile_section_spec($spec)) ) {
370 ## Store them in our sections array
371 push(@selected_sections, $_);
372 }
373 else {
374 carp "Ignoring section spec \"$spec\"!\n";
375 }
376 }
377}
378
379##---------------------------------------------------------------------------
380
381=head1 B<add_selection()>
382
383 $parser->add_selection($section_spec1,$section_spec2,...);
384
385This method is used to add to the currently selected sections and
386subsections of POD documentation that are to be printed and/or
387processed. See <select()> for replacing the currently selected sections.
388
389Each of the C<$section_spec> arguments should be a section specification
390as described in L<"SECTION SPECIFICATIONS">. The section specifications
391are parsed by this method and the resulting regular expressions are
392stored in the invoking object.
393
394This method should I<not> normally be overridden by subclasses.
395
396=cut
397
398sub add_selection {
399 my $self = shift;
400 $self->select("+", @_);
401}
402
403##---------------------------------------------------------------------------
404
405=head1 B<clear_selections()>
406
407 $parser->clear_selections();
408
409This method takes no arguments, it has the exact same effect as invoking
410<select()> with no arguments.
411
412=cut
413
414sub clear_selections {
415 my $self = shift;
416 $self->select();
417}
418
419##---------------------------------------------------------------------------
420
421=head1 B<match_section()>
422
423 $boolean = $parser->match_section($heading1,$heading2,...);
424
425Returns a value of true if the given section and subsection heading
426titles match any of the currently selected section specifications in
427effect from prior calls to B<select()> and B<add_selection()> (or if
428there are no explictly selected/deselected sections).
429
430The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of
431the corresponding sections, subsections, etc. to try and match. If
432C<$headingN> is omitted then it defaults to the current corresponding
433section heading title in the input.
434
435This method should I<not> normally be overridden by subclasses.
436
437=cut
438
439sub match_section {
440 my $self = shift;
441 my (@headings) = @_;
442 local *myData = $self;
443
444 ## Return true if no restrictions were explicitly specified
445 my $selections = (exists $myData{_SELECTED_SECTIONS})
446 ? $myData{_SELECTED_SECTIONS} : undef;
447 return 1 unless ((defined $selections) && (@{$selections} > 0));
448
449 ## Default any unspecified sections to the current one
450 my @current_headings = $self->curr_headings();
451 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
452 (defined $headings[$i]) or $headings[$i] = $current_headings[$i];
453 }
454
455 ## Look for a match against the specified section expressions
456 my ($section_spec, $regex, $negated, $match);
457 for $section_spec ( @{$selections} ) {
458 ##------------------------------------------------------
459 ## Each portion of this spec must match in order for
460 ## the spec to be matched. So we will start with a
461 ## match-value of 'true' and logically 'and' it with
462 ## the results of matching a given element of the spec.
463 ##------------------------------------------------------
464 $match = 1;
465 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
466 $regex = $section_spec->[$i];
467 $negated = ($regex =~ s/^\!//);
468 $match &= ($negated ? ($headings[$i] !~ /${regex}/)
469 : ($headings[$i] =~ /${regex}/));
470 last unless ($match);
471 }
472 return 1 if ($match);
473 }
474 return 0; ## no match
475}
476
477##---------------------------------------------------------------------------
478
479=head1 B<is_selected()>
480
481 $boolean = $parser->is_selected($paragraph);
482
483This method is used to determine if the block of text given in
484C<$paragraph> falls within the currently selected set of POD sections
485and subsections to be printed or processed. This method is also
486responsible for keeping track of the current input section and
487subsections. It is assumed that C<$paragraph> is the most recently read
488(but not yet processed) input paragraph.
489
490The value returned will be true if the C<$paragraph> and the rest of the
491text in the same section as C<$paragraph> should be selected (included)
492for processing; otherwise a false value is returned.
493
494=cut
495
496sub is_selected {
497 my ($self, $paragraph) = @_;
498 local $_;
499 local *myData = $self;
500
501 $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS});
502
503 ## Keep track of current sections levels and headings
504 $_ = $paragraph;
505 if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*)\s*$/) {
506 ## This is a section heading command
507 my ($level, $heading) = ($2, $3);
508 $level = 1 + (length($1) / 3) if ((! length $level) || (length $1));
509 ## Reset the current section heading at this level
510 $myData{_SECTION_HEADINGS}->[$level - 1] = $heading;
511 ## Reset subsection headings of this one to empty
512 for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) {
513 $myData{_SECTION_HEADINGS}->[$i] = '';
514 }
515 }
516
517 return $self->match_section();
518}
519
520#############################################################################
521
522=head1 EXPORTED FUNCTIONS
523
524The following functions are exported by this module. Please note that
525these are functions (not methods) and therefore C<do not> take an
526implicit first argument.
527
528=cut
529
530##---------------------------------------------------------------------------
531
532=head1 B<podselect()>
533
534 podselect(\%options,@filelist);
535
536B<podselect> will print the raw (untranslated) POD paragraphs of all
537POD sections in the given input files specified by C<@filelist>
538according to the given options.
539
540If any argument to B<podselect> is a reference to a hash
541(associative array) then the values with the following keys are
542processed as follows:
543
544=over 4
545
546=item B<-output>
547
548A string corresponding to the desired output file (or ">&STDOUT"
549or ">&STDERR"). The default is to use standard output.
550
551=item B<-sections>
552
553A reference to an array of sections specifications (as described in
554L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD
555sections and subsections to be selected from input. If no section
556specifications are given, then all sections of the PODs are used.
557
558=begin _NOT_IMPLEMENTED_
559
560=item B<-ranges>
561
562A reference to an array of range specifications (as described in
563L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD
564paragraphs to be selected from the desired input sections. If no range
565specifications are given, then all paragraphs of the desired sections
566are used.
567
568=end _NOT_IMPLEMENTED_
569
570=back
571
572All other arguments should correspond to the names of input files
573containing POD sections. A file name of "-" or "<&STDIN" will
574be interpeted to mean standard input (which is the default if no
575filenames are given).
576
577=cut
578
579sub podselect {
580 my(@argv) = @_;
581 my %defaults = ();
582 my $pod_parser = new Pod::Select(%defaults);
583 my $num_inputs = 0;
584 my $output = ">&STDOUT";
585 my %opts = ();
586 local $_;
587 for (@argv) {
588 if (ref($_)) {
589 next unless (ref($_) eq 'HASH');
590 %opts = (%defaults, %{$_});
591
592 ##-------------------------------------------------------------
593 ## Need this for backward compatibility since we formerly used
594 ## options that were all uppercase words rather than ones that
595 ## looked like Unix command-line options.
596 ## to be uppercase keywords)
597 ##-------------------------------------------------------------
598 %opts = map {
599 my ($key, $val) = (lc $_, $opts{$_});
600 $key =~ s/^(?=\w)/-/;
601 $key =~ /^-se[cl]/ and $key = '-sections';
602 #! $key eq '-range' and $key .= 's';
603 ($key => $val);
604 } (keys %opts);
605
606 ## Process the options
607 (exists $opts{'-output'}) and $output = $opts{'-output'};
608
609 ## Select the desired sections
610 $pod_parser->select(@{ $opts{'-sections'} })
611 if ( (defined $opts{'-sections'})
612 && ((ref $opts{'-sections'}) eq 'ARRAY') );
613
614 #! ## Select the desired paragraph ranges
615 #! $pod_parser->select(@{ $opts{'-ranges'} })
616 #! if ( (defined $opts{'-ranges'})
617 #! && ((ref $opts{'-ranges'}) eq 'ARRAY') );
618 }
619 else {
620 $pod_parser->parse_from_file($_, $output);
621 ++$num_inputs;
622 }
623 }
624 $pod_parser->parse_from_file("-") unless ($num_inputs > 0);
625}
626
627#############################################################################
628
629=head1 PRIVATE METHODS AND DATA
630
631B<Pod::Select> makes uses a number of internal methods and data fields
632which clients should not need to see or use. For the sake of avoiding
633name collisions with client data and methods, these methods and fields
634are briefly discussed here. Determined hackers may obtain further
635information about them by reading the B<Pod::Select> source code.
636
637Private data fields are stored in the hash-object whose reference is
638returned by the B<new()> constructor for this class. The names of all
639private methods and data-fields used by B<Pod::Select> begin with a
640prefix of "_" and match the regular expression C</^_\w+$/>.
641
642=cut
643
644##---------------------------------------------------------------------------
645
646=begin _PRIVATE_
647
648=head1 B<_compile_section_spec()>
649
650 $listref = $parser->_compile_section_spec($section_spec);
651
652This function (note it is a function and I<not> a method) takes a
653section specification (as described in L<"SECTION SPECIFICATIONS">)
654given in C<$section_sepc>, and compiles it into a list of regular
655expressions. If C<$section_spec> has no syntax errors, then a reference
656to the list (array) of corresponding regular expressions is returned;
657otherwise C<undef> is returned and an error message is printed (using
658B<carp>) for each invalid regex.
659
660=end _PRIVATE_
661
662=cut
663
664sub _compile_section_spec {
665 my ($section_spec) = @_;
666 my (@regexs, $negated);
667
668 ## Compile the spec into a list of regexs
669 local $_ = $section_spec;
670 s|\\\\|\001|g; ## handle escaped backward slashes
671 s|\\/|\002|g; ## handle escaped forward slashes
672
673 ## Parse the regexs for the heading titles
674 @regexs = split('/', $_, $MAX_HEADING_LEVEL);
675
676 ## Set default regex for ommitted levels
677 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
678 $regexs[$i] = '.*' unless ((defined $regexs[$i])
679 && (length $regexs[$i]));
680 }
681 ## Modify the regexs as needed and validate their syntax
682 my $bad_regexs = 0;
683 for (@regexs) {
684 $_ .= '.+' if ($_ eq '!');
685 s|\001|\\\\|g; ## restore escaped backward slashes
686 s|\002|\\/|g; ## restore escaped forward slashes
687 $negated = s/^\!//; ## check for negation
688 eval "/$_/"; ## check regex syntax
689 if ($@) {
690 ++$bad_regexs;
691 carp "Bad regular expression /$_/ in \"$section_spec\": $@\n";
692 }
693 else {
694 ## Add the forward and rear anchors (and put the negator back)
695 $_ = '^' . $_ unless (/^\^/);
696 $_ = $_ . '$' unless (/\$$/);
697 $_ = '!' . $_ if ($negated);
698 }
699 }
700 return (! $bad_regexs) ? [ @regexs ] : undef;
701}
702
703##---------------------------------------------------------------------------
704
705=begin _PRIVATE_
706
707=head2 $self->{_SECTION_HEADINGS}
708
709A reference to an array of the current section heading titles for each
710heading level (note that the first heading level title is at index 0).
711
712=end _PRIVATE_
713
714=cut
715
716##---------------------------------------------------------------------------
717
718=begin _PRIVATE_
719
720=head2 $self->{_SELECTED_SECTIONS}
721
722A reference to an array of references to arrays. Each subarray is a list
723of anchored regular expressions (preceded by a "!" if the expression is to
724be negated). The index of the expression in the subarray should correspond
725to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}>
726that it is to be matched against.
727
728=end _PRIVATE_
729
730=cut
731
732#############################################################################
733
734=head1 SEE ALSO
735
736L<Pod::Parser>
737
738=head1 AUTHOR
739
740Brad Appleton E<lt>bradapp@enteract.comE<gt>
741
742Based on code for B<pod2text> written by
743Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
744
745=cut
746
7471;
748