Commit | Line | Data |
351625bd |
1 | |
2 | require 5.005; |
3 | package Pod::Simple::Search; |
4 | use strict; |
5 | |
6 | use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY); |
9ea6f39e |
7 | $VERSION = 3.04; ## Current version of this package |
351625bd |
8 | |
9 | BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level |
10 | use Carp (); |
11 | |
12 | $SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i; |
13 | # flag to occasionally sleep for $SLEEPY - 1 seconds. |
14 | |
15 | $MAX_VERSION_WITHIN ||= 60; |
16 | |
17 | ############################################################################# |
18 | |
19 | #use diagnostics; |
20 | use File::Spec (); |
21 | use File::Basename qw( basename ); |
22 | use Config (); |
23 | use Cwd qw( cwd ); |
24 | |
25 | #========================================================================== |
26 | __PACKAGE__->_accessorize( # Make my dumb accessor methods |
27 | 'callback', 'progress', 'dir_prefix', 'inc', 'laborious', 'limit_glob', |
28 | 'limit_re', 'shadows', 'verbose', 'name2path', 'path2name', |
29 | ); |
30 | #========================================================================== |
31 | |
32 | sub new { |
33 | my $class = shift; |
34 | my $self = bless {}, ref($class) || $class; |
35 | $self->init; |
36 | return $self; |
37 | } |
38 | |
39 | sub init { |
40 | my $self = shift; |
41 | $self->inc(1); |
42 | $self->verbose(DEBUG); |
43 | return $self; |
44 | } |
45 | |
46 | #-------------------------------------------------------------------------- |
47 | |
48 | sub survey { |
49 | my($self, @search_dirs) = @_; |
50 | $self = $self->new unless ref $self; # tolerate being a class method |
51 | |
52 | $self->_expand_inc( \@search_dirs ); |
53 | |
54 | |
55 | $self->{'_scan_count'} = 0; |
56 | $self->{'_dirs_visited'} = {}; |
57 | $self->path2name( {} ); |
58 | $self->name2path( {} ); |
59 | $self->limit_re( $self->_limit_glob_to_limit_re ) if $self->{'limit_glob'}; |
60 | my $cwd = cwd(); |
61 | my $verbose = $self->verbose; |
62 | local $_; # don't clobber the caller's $_ ! |
63 | |
64 | foreach my $try (@search_dirs) { |
65 | unless( File::Spec->file_name_is_absolute($try) ) { |
66 | # make path absolute |
67 | $try = File::Spec->catfile( $cwd ,$try); |
68 | } |
69 | # simplify path |
046e7abe |
70 | $try = File::Spec->canonpath($try); |
351625bd |
71 | |
72 | my $start_in; |
73 | my $modname_prefix; |
74 | if($self->{'dir_prefix'}) { |
75 | $start_in = File::Spec->catdir( |
76 | $try, |
77 | grep length($_), split '[\\/:]+', $self->{'dir_prefix'} |
78 | ); |
79 | $modname_prefix = [grep length($_), split m{[:/\\]}, $self->{'dir_prefix'}]; |
80 | $verbose and print "Appending \"$self->{'dir_prefix'}\" to $try, ", |
81 | "giving $start_in (= @$modname_prefix)\n"; |
82 | } else { |
83 | $start_in = $try; |
84 | } |
85 | |
86 | if( $self->{'_dirs_visited'}{$start_in} ) { |
87 | $verbose and print "Directory '$start_in' already seen, skipping.\n"; |
88 | next; |
89 | } else { |
90 | $self->{'_dirs_visited'}{$start_in} = 1; |
91 | } |
92 | |
93 | unless(-e $start_in) { |
94 | $verbose and print "Skipping non-existent $start_in\n"; |
95 | next; |
96 | } |
97 | |
98 | my $closure = $self->_make_search_callback; |
99 | |
100 | if(-d $start_in) { |
101 | # Normal case: |
102 | $verbose and print "Beginning excursion under $start_in\n"; |
103 | $self->_recurse_dir( $start_in, $closure, $modname_prefix ); |
104 | $verbose and print "Back from excursion under $start_in\n\n"; |
105 | |
106 | } elsif(-f _) { |
107 | # A excursion consisting of just one file! |
108 | $_ = basename($start_in); |
109 | $verbose and print "Pondering $start_in ($_)\n"; |
110 | $closure->($start_in, $_, 0, []); |
111 | |
112 | } else { |
113 | $verbose and print "Skipping mysterious $start_in\n"; |
114 | } |
115 | } |
116 | $self->progress and $self->progress->done( |
117 | "Noted $$self{'_scan_count'} Pod files total"); |
118 | |
119 | return unless defined wantarray; # void |
120 | return $self->name2path unless wantarray; # scalar |
121 | return $self->name2path, $self->path2name; # list |
122 | } |
123 | |
124 | |
125 | #========================================================================== |
126 | sub _make_search_callback { |
127 | my $self = $_[0]; |
128 | |
129 | # Put the options in variables, for easy access |
130 | my( $laborious, $verbose, $shadows, $limit_re, $callback, $progress,$path2name,$name2path) = |
131 | map scalar($self->$_()), |
132 | qw(laborious verbose shadows limit_re callback progress path2name name2path); |
133 | |
134 | my($file, $shortname, $isdir, $modname_bits); |
135 | return sub { |
136 | ($file, $shortname, $isdir, $modname_bits) = @_; |
137 | |
138 | if($isdir) { # this never gets called on the startdir itself, just subdirs |
139 | |
140 | if( $self->{'_dirs_visited'}{$file} ) { |
141 | $verbose and print "Directory '$file' already seen, skipping.\n"; |
142 | return 'PRUNE'; |
351625bd |
143 | } |
144 | |
4f90f8a5 |
145 | print "Looking in dir $file\n" if $verbose; |
351625bd |
146 | |
4f90f8a5 |
147 | unless ($laborious) { # $laborious overrides pruning |
148 | if( m/^(\d+\.[\d_]{3,})\z/s |
149 | and do { my $x = $1; $x =~ tr/_//d; $x != $] } |
150 | ) { |
151 | $verbose and print "Perl $] version mismatch on $_, skipping.\n"; |
152 | return 'PRUNE'; |
153 | } |
351625bd |
154 | |
4f90f8a5 |
155 | if( m/^([A-Za-z][a-zA-Z0-9_]*)\z/s ) { |
156 | $verbose and print "$_ is a well-named module subdir. Looking....\n"; |
157 | } else { |
158 | $verbose and print "$_ is a fishy directory name. Skipping.\n"; |
159 | return 'PRUNE'; |
160 | } |
161 | } # end unless $laborious |
351625bd |
162 | |
4f90f8a5 |
163 | $self->{'_dirs_visited'}{$file} = 1; |
351625bd |
164 | return; # (not pruning); |
165 | } |
166 | |
167 | |
168 | # Make sure it's a file even worth even considering |
169 | if($laborious) { |
170 | unless( |
171 | m/\.(pod|pm|plx?)\z/i || -x _ and -T _ |
172 | # Note that the cheapest operation (the RE) is run first. |
173 | ) { |
174 | $verbose > 1 and print " Brushing off uninteresting $file\n"; |
175 | return; |
176 | } |
177 | } else { |
178 | unless( m/^[-_a-zA-Z0-9]+\.(?:pod|pm|plx?)\z/is ) { |
179 | $verbose > 1 and print " Brushing off oddly-named $file\n"; |
180 | return; |
181 | } |
182 | } |
183 | |
184 | $verbose and print "Considering item $file\n"; |
185 | my $name = $self->_path2modname( $file, $shortname, $modname_bits ); |
186 | $verbose > 0.01 and print " Nominating $file as $name\n"; |
187 | |
188 | if($limit_re and $name !~ m/$limit_re/i) { |
189 | $verbose and print "Shunning $name as not matching $limit_re\n"; |
190 | return; |
191 | } |
192 | |
193 | if( !$shadows and $name2path->{$name} ) { |
194 | $verbose and print "Not worth considering $file ", |
195 | "-- already saw $name as ", |
196 | join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n"; |
197 | return; |
198 | } |
199 | |
200 | # Put off until as late as possible the expense of |
201 | # actually reading the file: |
202 | if( m/\.pod\z/is ) { |
203 | # just assume it has pod, okay? |
204 | } else { |
205 | $progress and $progress->reach($self->{'_scan_count'}, "Scanning $file"); |
206 | return unless $self->contains_pod( $file ); |
207 | } |
208 | ++ $self->{'_scan_count'}; |
209 | |
210 | # Or finally take note of it: |
211 | if( $name2path->{$name} ) { |
212 | $verbose and print |
213 | "Duplicate POD found (shadowing?): $name ($file)\n", |
214 | " Already seen in ", |
215 | join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n"; |
216 | } else { |
217 | $name2path->{$name} = $file; # Noting just the first occurrence |
218 | } |
219 | $verbose and print " Noting $name = $file\n"; |
220 | if( $callback ) { |
221 | local $_ = $_; # insulate from changes, just in case |
222 | $callback->($file, $name); |
223 | } |
224 | $path2name->{$file} = $name; |
225 | return; |
226 | } |
227 | } |
228 | |
229 | #========================================================================== |
230 | |
231 | sub _path2modname { |
232 | my($self, $file, $shortname, $modname_bits) = @_; |
233 | |
234 | # this code simplifies the POD name for Perl modules: |
235 | # * remove "site_perl" |
236 | # * remove e.g. "i586-linux" (from 'archname') |
237 | # * remove e.g. 5.00503 |
238 | # * remove pod/ if followed by perl*.pod (e.g. in pod/perlfunc.pod) |
046e7abe |
239 | # * dig into the file for case-preserved name if not already mixed case |
351625bd |
240 | |
241 | my @m = @$modname_bits; |
242 | my $x; |
046e7abe |
243 | my $verbose = $self->verbose; |
351625bd |
244 | |
245 | # Shaving off leading naughty-bits |
246 | while(@m |
247 | and defined($x = lc( $m[0] )) |
248 | and( $x eq 'site_perl' |
249 | or($x eq 'pod' and @m == 1 and $shortname =~ m{^perl.*\.pod$}s ) |
250 | or $x =~ m{\\d+\\.z\\d+([_.]?\\d+)?} # if looks like a vernum |
251 | or $x eq lc( $Config::Config{'archname'} ) |
252 | )) { shift @m } |
253 | |
254 | my $name = join '::', @m, $shortname; |
255 | $self->_simplify_base($name); |
046e7abe |
256 | |
9ea6f39e |
257 | # On VMS, case-preserved document names can't be constructed from |
258 | # filenames, so try to extract them from the "=head1 NAME" tag in the |
259 | # file instead. |
260 | if ($^O eq 'VMS' && ($name eq lc($name) || $name eq uc($name))) { |
046e7abe |
261 | open PODFILE, "<$file" or die "_path2modname: Can't open $file: $!"; |
262 | my $in_pod = 0; |
263 | my $in_name = 0; |
9ea6f39e |
264 | my $line; |
265 | while ($line = <PODFILE>) { |
266 | chomp $line; |
267 | $in_pod = 1 if ($line =~ m/^=\w/); |
268 | $in_pod = 0 if ($line =~ m/^=cut/); |
046e7abe |
269 | next unless $in_pod; # skip non-pod text |
9ea6f39e |
270 | next if ($line =~ m/^\s*\z/); # and blank lines |
271 | next if ($in_pod && ($line =~ m/^X</)); # and commands |
046e7abe |
272 | if ($in_name) { |
9ea6f39e |
273 | if ($line =~ m/(\w+::)?(\w+)/) { |
046e7abe |
274 | # substitute case-preserved version of name |
275 | my $podname = $2; |
9ea6f39e |
276 | my $prefix = $1 || ''; |
046e7abe |
277 | $verbose and print "Attempting case restore of '$name' from '$prefix$podname'\n"; |
278 | unless ($name =~ s/$prefix$podname/$prefix$podname/i) { |
279 | $verbose and print "Attempting case restore of '$name' from '$podname'\n"; |
280 | $name =~ s/$podname/$podname/i; |
281 | } |
282 | last; |
283 | } |
284 | } |
9ea6f39e |
285 | $in_name = 1 if ($line =~ m/^=head1 NAME/); |
046e7abe |
286 | } |
287 | close PODFILE; |
288 | } |
289 | |
351625bd |
290 | return $name; |
291 | } |
292 | |
293 | #========================================================================== |
294 | |
295 | sub _recurse_dir { |
296 | my($self, $startdir, $callback, $modname_bits) = @_; |
297 | |
298 | my $maxdepth = $self->{'fs_recursion_maxdepth'} || 10; |
299 | my $verbose = $self->verbose; |
300 | |
301 | my $here_string = File::Spec->curdir; |
302 | my $up_string = File::Spec->updir; |
303 | $modname_bits ||= []; |
304 | |
305 | my $recursor; |
306 | $recursor = sub { |
307 | my($dir_long, $dir_bare) = @_; |
308 | if( @$modname_bits >= 10 ) { |
309 | $verbose and print "Too deep! [@$modname_bits]\n"; |
310 | return; |
311 | } |
312 | |
313 | unless(-d $dir_long) { |
314 | $verbose > 2 and print "But it's not a dir! $dir_long\n"; |
315 | return; |
316 | } |
317 | unless( opendir(INDIR, $dir_long) ) { |
318 | $verbose > 2 and print "Can't opendir $dir_long : $!\n"; |
319 | closedir(INDIR); |
320 | return |
321 | } |
322 | my @items = sort readdir(INDIR); |
323 | closedir(INDIR); |
324 | |
325 | push @$modname_bits, $dir_bare unless $dir_bare eq ''; |
326 | |
327 | my $i_full; |
328 | foreach my $i (@items) { |
329 | next if $i eq $here_string or $i eq $up_string or $i eq ''; |
330 | $i_full = File::Spec->catfile( $dir_long, $i ); |
331 | |
332 | if(!-r $i_full) { |
333 | $verbose and print "Skipping unreadable $i_full\n"; |
334 | |
335 | } elsif(-f $i_full) { |
336 | $_ = $i; |
337 | $callback->( $i_full, $i, 0, $modname_bits ); |
338 | |
339 | } elsif(-d _) { |
046e7abe |
340 | $i =~ s/\.DIR\z//i if $^O eq 'VMS'; |
351625bd |
341 | $_ = $i; |
342 | my $rv = $callback->( $i_full, $i, 1, $modname_bits ) || ''; |
343 | |
344 | if($rv eq 'PRUNE') { |
345 | $verbose > 1 and print "OK, pruning"; |
346 | } else { |
347 | # Otherwise, recurse into it |
348 | $recursor->( File::Spec->catdir($dir_long, $i) , $i); |
349 | } |
350 | } else { |
351 | $verbose > 1 and print "Skipping oddity $i_full\n"; |
352 | } |
353 | } |
354 | pop @$modname_bits; |
355 | return; |
356 | };; |
357 | |
358 | local $_; |
359 | $recursor->($startdir, ''); |
360 | |
361 | undef $recursor; # allow it to be GC'd |
362 | |
363 | return; |
364 | } |
365 | |
366 | |
367 | #========================================================================== |
368 | |
369 | sub run { |
370 | # A function, useful in one-liners |
371 | |
372 | my $self = __PACKAGE__->new; |
373 | $self->limit_glob($ARGV[0]) if @ARGV; |
374 | $self->callback( sub { |
375 | my($file, $name) = @_; |
376 | my $version = ''; |
377 | |
378 | # Yes, I know we won't catch the version in like a File/Thing.pm |
379 | # if we see File/Thing.pod first. That's just the way the |
380 | # cookie crumbles. -- SMB |
381 | |
382 | if($file =~ m/\.pod$/i) { |
383 | # Don't bother looking for $VERSION in .pod files |
384 | DEBUG and print "Not looking for \$VERSION in .pod $file\n"; |
385 | } elsif( !open(INPOD, $file) ) { |
386 | DEBUG and print "Couldn't open $file: $!\n"; |
387 | close(INPOD); |
388 | } else { |
389 | # Sane case: file is readable |
390 | my $lines = 0; |
391 | while(<INPOD>) { |
392 | last if $lines++ > $MAX_VERSION_WITHIN; # some degree of sanity |
393 | if( s/^\s*\$VERSION\s*=\s*//s and m/\d/ ) { |
394 | DEBUG and print "Found version line (#$lines): $_"; |
395 | s/\s*\#.*//s; |
396 | s/\;\s*$//s; |
397 | s/\s+$//s; |
398 | s/\t+/ /s; # nix tabs |
399 | # Optimize the most common cases: |
400 | $_ = "v$1" |
401 | if m{^v?["']?([0-9_]+(\.[0-9_]+)*)["']?$}s |
402 | # like in $VERSION = "3.14159"; |
403 | or m{\$Revision:\s*([0-9_]+(?:\.[0-9_]+)*)\s*\$}s |
404 | # like in sprintf("%d.%02d", q$Revision: 4.13 $ =~ /(\d+)\.(\d+)/); |
405 | ; |
406 | |
407 | # Like in sprintf("%d.%s", map {s/_//g; $_} q$Name: release-0_55-public $ =~ /-(\d+)_([\d_]+)/) |
408 | $_ = sprintf("v%d.%s", |
409 | map {s/_//g; $_} |
410 | $1 =~ m/-(\d+)_([\d_]+)/) # snare just the numeric part |
411 | if m{\$Name:\s*([^\$]+)\$}s |
412 | ; |
413 | $version = $_; |
414 | DEBUG and print "Noting $version as version\n"; |
415 | last; |
416 | } |
417 | } |
418 | close(INPOD); |
419 | } |
420 | print "$name\t$version\t$file\n"; |
421 | return; |
422 | # End of callback! |
423 | }); |
424 | |
425 | $self->survey; |
426 | } |
427 | |
428 | #========================================================================== |
429 | |
430 | sub simplify_name { |
431 | my($self, $str) = @_; |
432 | |
433 | # Remove all path components |
434 | # XXX Why not just use basename()? -- SMB |
435 | |
436 | if ($^O eq 'MacOS') { $str =~ s{^.*:+}{}s } |
437 | else { $str =~ s{^.*/+}{}s } |
438 | |
439 | $self->_simplify_base($str); |
440 | return $str; |
441 | } |
442 | |
443 | #========================================================================== |
444 | |
445 | sub _simplify_base { # Internal method only |
446 | |
447 | # strip Perl's own extensions |
448 | $_[1] =~ s/\.(pod|pm|plx?)\z//i; |
449 | |
450 | # strip meaningless extensions on Win32 and OS/2 |
451 | $_[1] =~ s/\.(bat|exe|cmd)\z//i if $^O =~ /mswin|os2/i; |
452 | |
453 | # strip meaningless extensions on VMS |
454 | $_[1] =~ s/\.(com)\z//i if $^O eq 'VMS'; |
455 | |
456 | return; |
457 | } |
458 | |
459 | #========================================================================== |
460 | |
461 | sub _expand_inc { |
462 | my($self, $search_dirs) = @_; |
463 | |
464 | return unless $self->{'inc'}; |
465 | |
466 | if ($^O eq 'MacOS') { |
467 | push @$search_dirs, |
468 | grep $_ ne File::Spec->curdir, $self->_mac_whammy(@INC); |
469 | # Any other OSs need custom handling here? |
470 | } else { |
471 | push @$search_dirs, grep $_ ne File::Spec->curdir, @INC; |
472 | } |
473 | |
474 | $self->{'laborious'} = 0; # Since inc said to use INC |
475 | return; |
476 | } |
477 | |
478 | #========================================================================== |
479 | |
480 | sub _mac_whammy { # Tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS |
481 | my @them; |
482 | (undef,@them) = @_; |
483 | for $_ (@them) { |
484 | if ( $_ eq '.' ) { |
485 | $_ = ':'; |
486 | } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) { |
487 | $_ = ':'. $_; |
488 | } else { |
489 | $_ =~ s|^\./|:|; |
490 | } |
491 | } |
492 | return @them; |
493 | } |
494 | |
495 | #========================================================================== |
496 | |
497 | sub _limit_glob_to_limit_re { |
498 | my $self = $_[0]; |
499 | my $limit_glob = $self->{'limit_glob'} || return; |
500 | |
501 | my $limit_re = '^' . quotemeta($limit_glob) . '$'; |
502 | $limit_re =~ s/\\\?/./g; # glob "?" => "." |
503 | $limit_re =~ s/\\\*/.*?/g; # glob "*" => ".*?" |
504 | $limit_re =~ s/\.\*\?\$$//s; # final glob "*" => ".*?$" => "" |
505 | |
506 | $self->{'verbose'} and print "Turning limit_glob $limit_glob into re $limit_re\n"; |
507 | |
508 | # A common optimization: |
509 | if(!exists($self->{'dir_prefix'}) |
510 | and $limit_glob =~ m/^(?:\w+\:\:)+/s # like "File::*" or "File::Thing*" |
511 | # Optimize for sane and common cases (but not things like "*::File") |
512 | ) { |
513 | $self->{'dir_prefix'} = join "::", $limit_glob =~ m/^(?:\w+::)+/sg; |
514 | $self->{'verbose'} and print " and setting dir_prefix to $self->{'dir_prefix'}\n"; |
515 | } |
516 | |
517 | return $limit_re; |
518 | } |
519 | |
520 | #========================================================================== |
521 | |
522 | # contribution mostly from Tim Jenness <t.jenness@jach.hawaii.edu> |
523 | |
524 | sub find { |
525 | my($self, $pod, @search_dirs) = @_; |
526 | $self = $self->new unless ref $self; # tolerate being a class method |
527 | |
528 | # Check usage |
529 | Carp::carp 'Usage: \$self->find($podname, ...)' |
530 | unless defined $pod and length $pod; |
531 | |
532 | my $verbose = $self->verbose; |
533 | |
534 | # Split on :: and then join the name together using File::Spec |
535 | my @parts = split /::/, $pod; |
536 | $verbose and print "Chomping {$pod} => {@parts}\n"; |
537 | |
538 | #@search_dirs = File::Spec->curdir unless @search_dirs; |
539 | |
540 | if( $self->inc ) { |
541 | if( $^O eq 'MacOS' ) { |
542 | push @search_dirs, $self->_mac_whammy(@INC); |
543 | } else { |
544 | push @search_dirs, @INC; |
545 | } |
546 | |
547 | # Add location of pod documentation for perl man pages (eg perlfunc) |
548 | # This is a pod directory in the private install tree |
549 | #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'}, |
550 | # 'pod'); |
551 | #push (@search_dirs, $perlpoddir) |
552 | # if -d $perlpoddir; |
553 | |
554 | # Add location of binaries such as pod2text: |
555 | push @search_dirs, $Config::Config{'scriptdir'}; |
556 | # and if that's undef or q{} or nonexistent, we just ignore it later |
557 | } |
558 | |
559 | my %seen_dir; |
560 | Dir: |
561 | foreach my $dir ( @search_dirs ) { |
562 | next unless defined $dir and length $dir; |
563 | next if $seen_dir{$dir}; |
564 | $seen_dir{$dir} = 1; |
565 | unless(-d $dir) { |
566 | print "Directory $dir does not exist\n" if $verbose; |
567 | next Dir; |
568 | } |
569 | |
570 | print "Looking in directory $dir\n" if $verbose; |
571 | my $fullname = File::Spec->catfile( $dir, @parts ); |
572 | print "Filename is now $fullname\n" if $verbose; |
573 | |
574 | foreach my $ext ('', '.pod', '.pm', '.pl') { # possible extensions |
575 | my $fullext = $fullname . $ext; |
576 | if( -f $fullext and $self->contains_pod( $fullext ) ){ |
577 | print "FOUND: $fullext\n" if $verbose; |
578 | return $fullext; |
579 | } |
580 | } |
581 | my $subdir = File::Spec->catdir($dir,'pod'); |
582 | if(-d $subdir) { # slip in the ./pod dir too |
583 | $verbose and print "Noticing $subdir and stopping there...\n"; |
584 | $dir = $subdir; |
585 | redo Dir; |
586 | } |
587 | } |
588 | |
589 | return undef; |
590 | } |
591 | |
592 | #========================================================================== |
593 | |
594 | sub contains_pod { |
595 | my($self, $file) = @_; |
596 | my $verbose = $self->{'verbose'}; |
597 | |
598 | # check for one line of POD |
599 | $verbose > 1 and print " Scanning $file for pod...\n"; |
600 | unless( open(MAYBEPOD,"<$file") ) { |
601 | print "Error: $file is unreadable: $!\n"; |
602 | return undef; |
603 | } |
604 | |
605 | sleep($SLEEPY - 1) if $SLEEPY; |
606 | # avoid totally hogging the processor on OSs with poor process control |
607 | |
608 | local $_; |
609 | while( <MAYBEPOD> ) { |
610 | if(m/^=(head\d|pod|over|item)\b/s) { |
611 | close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting"; |
612 | chomp; |
613 | $verbose > 1 and print " Found some pod ($_) in $file\n"; |
614 | return 1; |
615 | } |
616 | } |
617 | close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting"; |
618 | $verbose > 1 and print " No POD in $file, skipping.\n"; |
619 | return 0; |
620 | } |
621 | |
622 | #========================================================================== |
623 | |
624 | sub _accessorize { # A simple-minded method-maker |
625 | shift; |
626 | no strict 'refs'; |
627 | foreach my $attrname (@_) { |
628 | *{caller() . '::' . $attrname} = sub { |
629 | use strict; |
630 | $Carp::CarpLevel = 1, Carp::croak( |
631 | "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)" |
632 | ) unless (@_ == 1 or @_ == 2) and ref $_[0]; |
633 | |
634 | # Read access: |
635 | return $_[0]->{$attrname} if @_ == 1; |
636 | |
637 | # Write access: |
638 | $_[0]->{$attrname} = $_[1]; |
639 | return $_[0]; # RETURNS MYSELF! |
640 | }; |
641 | } |
642 | # Ya know, they say accessories make the ensemble! |
643 | return; |
644 | } |
645 | |
646 | #========================================================================== |
647 | sub _state_as_string { |
648 | my $self = $_[0]; |
649 | return '' unless ref $self; |
650 | my @out = "{\n # State of $self ...\n"; |
651 | foreach my $k (sort keys %$self) { |
652 | push @out, " ", _esc($k), " => ", _esc($self->{$k}), ",\n"; |
653 | } |
654 | push @out, "}\n"; |
655 | my $x = join '', @out; |
656 | $x =~ s/^/#/mg; |
657 | return $x; |
658 | } |
659 | |
660 | sub _esc { |
661 | my $in = $_[0]; |
662 | return 'undef' unless defined $in; |
663 | $in =~ |
664 | s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> |
665 | <'\\x'.(unpack("H2",$1))>eg; |
666 | return qq{"$in"}; |
667 | } |
668 | |
669 | #========================================================================== |
670 | |
671 | run() unless caller; # run if "perl whatever/Search.pm" |
672 | |
673 | 1; |
674 | |
675 | #========================================================================== |
676 | |
677 | __END__ |
678 | |
679 | |
680 | =head1 NAME |
681 | |
682 | Pod::Simple::Search - find POD documents in directory trees |
683 | |
684 | =head1 SYNOPSIS |
685 | |
686 | use Pod::Simple::Search; |
687 | my $name2path = Pod::Simple::Search->new->limit_glob('LWP::*')->survey; |
688 | print "Looky see what I found: ", |
689 | join(' ', sort keys %$name2path), "\n"; |
690 | |
691 | print "LWPUA docs = ", |
692 | Pod::Simple::Search->new->find('LWP::UserAgent') || "?", |
693 | "\n"; |
694 | |
695 | =head1 DESCRIPTION |
696 | |
697 | B<Pod::Simple::Search> is a class that you use for running searches |
698 | for Pod files. An object of this class has several attributes |
699 | (mostly options for controlling search options), and some methods |
700 | for searching based on those attributes. |
701 | |
702 | The way to use this class is to make a new object of this class, |
703 | set any options, and then call one of the search options |
704 | (probably C<survey> or C<find>). The sections below discuss the |
705 | syntaxes for doing all that. |
706 | |
707 | |
708 | =head1 CONSTRUCTOR |
709 | |
710 | This class provides the one constructor, called C<new>. |
711 | It takes no parameters: |
712 | |
713 | use Pod::Simple::Search; |
714 | my $search = Pod::Simple::Search->new; |
715 | |
716 | =head1 ACCESSORS |
717 | |
718 | This class defines several methods for setting (and, occasionally, |
719 | reading) the contents of an object. With two exceptions (discussed at |
720 | the end of this section), these attributes are just for controlling the |
721 | way searches are carried out. |
722 | |
723 | Note that each of these return C<$self> when you call them as |
724 | C<< $self->I<whatever(value)> >>. That's so that you can chain |
725 | together set-attribute calls like this: |
726 | |
727 | my $name2path = |
728 | Pod::Simple::Search->new |
729 | -> inc(0) -> verbose(1) -> callback(\&blab) |
730 | ->survey(@there); |
731 | |
732 | ...which works exactly as if you'd done this: |
733 | |
734 | my $search = Pod::Simple::Search->new; |
735 | $search->inc(0); |
736 | $search->verbose(1); |
737 | $search->callback(\&blab); |
738 | my $name2path = $search->survey(@there); |
739 | |
740 | =over |
741 | |
742 | =item $search->inc( I<true-or-false> ); |
743 | |
744 | This attribute, if set to a true value, means that searches should |
745 | implicitly add perl's I<@INC> paths. This |
746 | automatically considers paths specified in the C<PERL5LIB> environment |
747 | as this is prepended to I<@INC> by the Perl interpreter itself. |
748 | This attribute's default value is B<TRUE>. If you want to search |
749 | only specific directories, set $self->inc(0) before calling |
750 | $inc->survey or $inc->find. |
751 | |
752 | |
753 | =item $search->verbose( I<nonnegative-number> ); |
754 | |
755 | This attribute, if set to a nonzero positive value, will make searches output |
756 | (via C<warn>) notes about what they're doing as they do it. |
757 | This option may be useful for debugging a pod-related module. |
758 | This attribute's default value is zero, meaning that no C<warn> messages |
759 | are produced. (Setting verbose to 1 turns on some messages, and setting |
760 | it to 2 turns on even more messages, i.e., makes the following search(es) |
761 | even more verbose than 1 would make them.) |
762 | |
763 | |
764 | =item $search->limit_glob( I<some-glob-string> ); |
765 | |
766 | This option means that you want to limit the results just to items whose |
767 | podnames match the given glob/wildcard expression. For example, you |
768 | might limit your search to just "LWP::*", to search only for modules |
769 | starting with "LWP::*" (but not including the module "LWP" itself); or |
770 | you might limit your search to "LW*" to see only modules whose (full) |
771 | names begin with "LW"; or you might search for "*Find*" to search for |
772 | all modules with "Find" somewhere in their full name. (You can also use |
773 | "?" in a glob expression; so "DB?" will match "DBI" and "DBD".) |
774 | |
775 | |
776 | =item $search->callback( I<\&some_routine> ); |
777 | |
778 | This attribute means that every time this search sees a matching |
779 | Pod file, it should call this callback routine. The routine is called |
780 | with two parameters: the current file's filespec, and its pod name. |
781 | (For example: C<("/etc/perljunk/File/Crunk.pm", "File::Crunk")> would |
782 | be in C<@_>.) |
783 | |
784 | The callback routine's return value is not used for anything. |
785 | |
786 | This attribute's default value is false, meaning that no callback |
787 | is called. |
788 | |
789 | =item $search->laborious( I<true-or-false> ); |
790 | |
791 | Unless you set this attribute to a true value, Pod::Search will |
792 | apply Perl-specific heuristics to find the correct module PODs quickly. |
4f90f8a5 |
793 | This attribute's default value is false. You won't normally need |
794 | to set this to true. |
351625bd |
795 | |
796 | Specifically: Turning on this option will disable the heuristics for |
797 | seeing only files with Perl-like extensions, omitting subdirectories |
798 | that are numeric but do I<not> match the current Perl interpreter's |
799 | version ID, suppressing F<site_perl> as a module hierarchy name, etc. |
800 | |
801 | |
802 | =item $search->shadows( I<true-or-false> ); |
803 | |
804 | Unless you set this attribute to a true value, Pod::Simple::Search will |
805 | consider only the first file of a given modulename as it looks thru the |
806 | specified directories; that is, with this option off, if |
807 | Pod::Simple::Search has seen a C<somepathdir/Foo/Bar.pm> already in this |
808 | search, then it won't bother looking at a C<somelaterpathdir/Foo/Bar.pm> |
809 | later on in that search, because that file is merely a "shadow". But if |
810 | you turn on C<< $self->shadows(1) >>, then these "shadow" files are |
811 | inspected too, and are noted in the pathname2podname return hash. |
812 | |
813 | This attribute's default value is false; and normally you won't |
814 | need to turn it on. |
815 | |
816 | |
817 | =item $search->limit_re( I<some-regxp> ); |
818 | |
819 | Setting this attribute (to a value that's a regexp) means that you want |
820 | to limit the results just to items whose podnames match the given |
821 | regexp. Normally this option is not needed, and the more efficient |
822 | C<limit_glob> attribute is used instead. |
823 | |
824 | |
825 | =item $search->dir_prefix( I<some-string-value> ); |
826 | |
827 | Setting this attribute to a string value means that the searches should |
828 | begin in the specified subdirectory name (like "Pod" or "File::Find", |
829 | also expressable as "File/Find"). For example, the search option |
830 | C<< $search->limit_glob("File::Find::R*") >> |
831 | is the same as the combination of the search options |
832 | C<< $search->limit_re("^File::Find::R") -> dir_prefix("File::Find") >>. |
833 | |
834 | Normally you don't need to know about the C<dir_prefix> option, but I |
835 | include it in case it might prove useful for someone somewhere. |
836 | |
837 | (Implementationally, searching with limit_glob ends up setting limit_re |
838 | and usually dir_prefix.) |
839 | |
840 | |
841 | =item $search->progress( I<some-progress-object> ); |
842 | |
843 | If you set a value for this attribute, the value is expected |
844 | to be an object (probably of a class that you define) that has a |
845 | C<reach> method and a C<done> method. This is meant for reporting |
846 | progress during the search, if you don't want to use a simple |
847 | callback. |
848 | |
849 | Normally you don't need to know about the C<progress> option, but I |
850 | include it in case it might prove useful for someone somewhere. |
851 | |
852 | While a search is in progress, the progress object's C<reach> and |
853 | C<done> methods are called like this: |
854 | |
855 | # Every time a file is being scanned for pod: |
856 | $progress->reach($count, "Scanning $file"); ++$count; |
857 | |
858 | # And then at the end of the search: |
859 | $progress->done("Noted $count Pod files total"); |
860 | |
861 | Internally, we often set this to an object of class |
862 | Pod::Simple::Progress. That class is probably undocumented, |
863 | but you may wish to look at its source. |
864 | |
865 | |
866 | =item $name2path = $self->name2path; |
867 | |
868 | This attribute is not a search parameter, but is used to report the |
869 | result of C<survey> method, as discussed in the next section. |
870 | |
871 | =item $path2name = $self->path2name; |
872 | |
873 | This attribute is not a search parameter, but is used to report the |
874 | result of C<survey> method, as discussed in the next section. |
875 | |
876 | =back |
877 | |
878 | =head1 MAIN SEARCH METHODS |
879 | |
880 | Once you've actually set any options you want (if any), you can go |
881 | ahead and use the following methods to search for Pod files |
882 | in particular ways. |
883 | |
884 | |
885 | =head2 C<< $search->survey( @directories ) >> |
886 | |
887 | The method C<survey> searches for POD documents in a given set of |
888 | files and/or directories. This runs the search according to the various |
889 | options set by the accessors above. (For example, if the C<inc> attribute |
890 | is on, as it is by default, then the perl @INC directories are implicitly |
891 | added to the list of directories (if any) that you specify.) |
892 | |
893 | The return value of C<survey> is two hashes: |
894 | |
895 | =over |
896 | |
897 | =item C<name2path> |
898 | |
899 | A hash that maps from each pod-name to the filespec (like |
900 | "Stuff::Thing" => "/whatever/plib/Stuff/Thing.pm") |
901 | |
902 | =item C<path2name> |
903 | |
904 | A hash that maps from each Pod filespec to its pod-name (like |
905 | "/whatever/plib/Stuff/Thing.pm" => "Stuff::Thing") |
906 | |
907 | =back |
908 | |
909 | Besides saving these hashes as the hashref attributes |
910 | C<name2path> and C<path2name>, calling this function also returns |
911 | these hashrefs. In list context, the return value of |
912 | C<< $search->survey >> is the list C<(\%name2path, \%path2name)>. |
913 | In scalar context, the return value is C<\%name2path>. |
914 | Or you can just call this in void context. |
915 | |
916 | Regardless of calling context, calling C<survey> saves |
917 | its results in its C<name2path> and C<path2name> attributes. |
918 | |
919 | E.g., when searching in F<$HOME/perl5lib>, the file |
920 | F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>, |
921 | whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be |
922 | I<Myclass::Subclass>. The name information can be used for POD |
923 | translators. |
924 | |
925 | Only text files containing at least one valid POD command are found. |
926 | |
927 | In verbose mode, a warning is printed if shadows are found (i.e., more |
928 | than one POD file with the same POD name is found, e.g. F<CPAN.pm> in |
929 | different directories). This usually indicates duplicate occurrences of |
930 | modules in the I<@INC> search path, which is occasionally inadvertent |
931 | (but is often simply a case of a user's path dir having a more recent |
932 | version than the system's general path dirs in general.) |
933 | |
934 | The options to this argument is a list of either directories that are |
935 | searched recursively, or files. (Usually you wouldn't specify files, |
936 | but just dirs.) Or you can just specify an empty-list, as in |
937 | $name2path; with the |
938 | C<inc> option on, as it is by default, teh |
939 | |
940 | The POD names of files are the plain basenames with any Perl-like |
941 | extension (.pm, .pl, .pod) stripped, and path separators replaced by |
942 | C<::>'s. |
943 | |
944 | Calling Pod::Simple::Search->search(...) is short for |
945 | Pod::Simple::Search->new->search(...). That is, a throwaway object |
946 | with default attribute values is used. |
947 | |
948 | |
949 | =head2 C<< $search->simplify_name( $str ) >> |
950 | |
951 | The method B<simplify_name> is equivalent to B<basename>, but also |
952 | strips Perl-like extensions (.pm, .pl, .pod) and extensions like |
953 | F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively. |
954 | |
955 | |
956 | =head2 C<< $search->find( $pod ) >> |
957 | |
958 | =head2 C<< $search->find( $pod, @search_dirs ) >> |
959 | |
960 | Returns the location of a Pod file, given a Pod/module/script name |
961 | (like "Foo::Bar" or "perlvar" or "perldoc"), and an idea of |
962 | what files/directories to look in. |
963 | It searches according to the various options set by the accessors above. |
964 | (For example, if the C<inc> attribute is on, as it is by default, then |
965 | the perl @INC directories are implicitly added to the list of |
966 | directories (if any) that you specify.) |
967 | |
968 | This returns the full path of the first occurrence to the file. |
969 | Package names (eg 'A::B') are automatically converted to directory |
970 | names in the selected directory. Additionally, '.pm', '.pl' and '.pod' |
971 | are automatically appended to the search as required. |
972 | (So, for example, under Unix, "A::B" is converted to "somedir/A/B.pm", |
973 | "somedir/A/B.pod", or "somedir/A/B.pl", as appropriate.) |
974 | |
975 | If no such Pod file is found, this method returns undef. |
976 | |
977 | If any of the given search directories contains a F<pod/> subdirectory, |
978 | then it is searched. (That's how we manage to find F<perlfunc>, |
979 | for example, which is usually in F<pod/perlfunc> in most Perl dists.) |
980 | |
981 | The C<verbose> and C<inc> attributes influence the behavior of this |
982 | search; notably, C<inc>, if true, adds @INC I<and also |
983 | $Config::Config{'scriptdir'}> to the list of directories to search. |
984 | |
985 | It is common to simply say C<< $filename = Pod::Simple::Search-> new |
986 | ->find("perlvar") >> so that just the @INC (well, and scriptdir) |
987 | directories are searched. (This happens because the C<inc> |
988 | attribute is true by default.) |
989 | |
990 | Calling Pod::Simple::Search->find(...) is short for |
991 | Pod::Simple::Search->new->find(...). That is, a throwaway object |
992 | with default attribute values is used. |
993 | |
994 | |
995 | =head2 C<< $self->contains_pod( $file ) >> |
996 | |
997 | Returns true if the supplied filename (not POD module) contains some Pod |
998 | documentation. |
999 | |
1000 | |
1001 | =head1 AUTHOR |
1002 | |
1003 | Sean M. Burke E<lt>sburke@cpan.orgE<gt> |
1004 | borrowed code from |
1005 | Marek Rouchal's Pod::Find, which in turn |
1006 | heavily borrowed code from Nick Ing-Simmons' PodToHtml. |
1007 | |
1008 | Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt> provided |
1009 | C<find> and C<contains_pod> to Pod::Find. |
1010 | |
1011 | =head1 SEE ALSO |
1012 | |
1013 | L<Pod::Simple>, L<Pod::Perldoc> |
1014 | |
1015 | =cut |
1016 | |