1 package CPANPLUS::Internals::Source;
7 use CPANPLUS::Module::Fake;
8 use CPANPLUS::Module::Author;
9 use CPANPLUS::Internals::Constants;
13 use Locale::Maketext::Simple Class => 'CPANPLUS', Style => 'gettext';
14 use Params::Check qw[check];
15 use IPC::Cmd qw[can_run];
16 use Module::Load::Conditional qw[can_load];
18 $Params::Check::VERBOSE = 1;
24 CPANPLUS::Internals::Source
28 ### lazy load author/module trees ###
35 CPANPLUS::Internals::Source controls the updating of source files and
36 the parsing of them into usable module/author trees to be used by
39 Functions exist to check if source files are still C<good to use> as
40 well as update them, and then parse them.
42 The flow looks like this:
44 $cb->_author_tree || $cb->_module_tree
49 $cb->__create_author_tree
50 $cb->__retrieve_source
51 $cb->__create_module_tree
52 $cb->__retrieve_source
53 $cb->__create_dslip_tree
54 $cb->__retrieve_source
64 my $recurse; # flag to prevent recursive calls to *_tree functions
66 ### lazy loading of module tree
70 unless ($self->{_modtree} or $recurse++ > 0) {
71 my $uptodate = $self->_check_trees( @_[1..$#_] );
72 $self->_build_trees(uptodate => $uptodate);
76 return $self->{_modtree};
79 ### lazy loading of author tree
83 unless ($self->{_authortree} or $recurse++ > 0) {
84 my $uptodate = $self->_check_trees( @_[1..$#_] );
85 $self->_build_trees(uptodate => $uptodate);
89 return $self->{_authortree};
96 =head2 $cb->_check_trees( [update_source => BOOL, path => PATH, verbose => BOOL] )
98 Retrieve source files and return a boolean indicating whether or not
99 the source files are up to date.
101 Takes several arguments:
107 A flag to force re-fetching of the source files, even
108 if they are still up to date.
112 The absolute path to the directory holding the source files.
116 A boolean flag indicating whether or not to be verbose.
120 Will get information from the config file by default.
124 ### retrieve source files, and returns a boolean indicating if it's up to date
126 my ($self, %hash) = @_;
127 my $conf = $self->configure_object;
134 path => { default => $conf->get_conf('base'),
137 verbose => { default => $conf->get_conf('verbose'),
140 update_source => { default => 0, store => \$update_source },
143 my $args = check( $tmpl, \%hash ) or return;
145 ### if the user never wants to update their source without explicitly
146 ### telling us, shortcircuit here
147 return 1 if $conf->get_conf('no_update') && !$update_source;
149 ### a check to see if our source files are still up to date ###
150 msg( loc("Checking if source files are up to date"), $verbose );
152 my $uptodate = 1; # default return value
154 for my $name (qw[auth dslip mod]) {
155 for my $file ( $conf->_get_source( $name ) ) {
156 $self->__check_uptodate(
157 file => File::Spec->catfile( $args->{path}, $file ),
159 update_source => $update_source,
170 =head2 $cb->__check_uptodate( file => $file, name => $name, [update_source => BOOL, verbose => BOOL] )
172 C<__check_uptodate> checks if a given source file is still up-to-date
173 and if not, or when C<update_source> is true, will re-fetch the source
176 Takes the following arguments:
182 The source file to check.
186 The internal shortcut name for the source file (used for config
191 Flag to force updating of sourcefiles regardless.
195 Boolean to indicate whether to be verbose or not.
199 Returns a boolean value indicating whether the current files are up
204 ### this method checks whether or not the source files we are using are still up to date
205 sub __check_uptodate {
208 my $conf = $self->configure_object;
212 file => { required => 1 },
213 name => { required => 1 },
214 update_source => { default => 0 },
215 verbose => { default => $conf->get_conf('verbose') },
218 my $args = check( $tmpl, \%hash ) or return;
221 unless ( -e $args->{'file'} && (
222 ( stat $args->{'file'} )[9]
223 + $conf->_get_source('update') )
228 if ( $flag or $args->{'update_source'} ) {
230 if ( $self->_update_source( name => $args->{'name'} ) ) {
231 return 0; # return 0 so 'uptodate' will be set to 0, meaning no use
232 # of previously stored hashrefs!
234 msg( loc("Unable to update source, attempting to get away with using old source file!"), $args->{verbose} );
245 =head2 $cb->_update_source( name => $name, [path => $path, verbose => BOOL] )
247 This method does the actual fetching of source files.
249 It takes the following arguments:
255 The internal shortcut name for the source file (used for config
260 The full path where to write the files.
264 Boolean to indicate whether to be verbose or not.
268 Returns a boolean to indicate success.
272 ### this sub fetches new source files ###
276 my $conf = $self->configure_object;
280 name => { required => 1 },
281 path => { default => $conf->get_conf('base') },
282 verbose => { default => $conf->get_conf('verbose') },
285 my $args = check( $tmpl, \%hash ) or return;
288 my $path = $args->{path};
291 { ### this could use a clean up - Kane
292 ### no worries about the / -> we get it from the _ftp configuration, so
293 ### it's not platform dependant. -kane
294 my ($dir, $file) = $conf->_get_mirror( $args->{'name'} ) =~ m|(.+/)(.+)$|sg;
296 msg( loc("Updating source file '%1'", $file), $args->{'verbose'} );
298 my $fake = CPANPLUS::Module::Fake->new(
299 module => $args->{'name'},
305 ### can't use $fake->fetch here, since ->parent won't work --
306 ### the sources haven't been saved yet
307 my $rv = $self->_fetch(
315 error( loc("Couldn't fetch '%1'", $file) );
319 ### `touch` the file, so windoze knows it's new -jmb
320 ### works on *nix too, good fix -Kane
321 ### make sure it is writable first, otherwise the `touch` will fail
322 unless (chmod ( 0644, File::Spec->catfile($path, $file) ) &&
323 utime ( $now, $now, File::Spec->catfile($path, $file) )) {
324 error( loc("Couldn't touch %1", $file) );
333 =head2 $cb->_build_trees( uptodate => BOOL, [use_stored => BOOL, path => $path, verbose => BOOL] )
335 This method rebuilds the author- and module-trees from source.
337 It takes the following arguments:
343 Indicates whether any on disk caches are still ok to use.
347 The absolute path to the directory holding the source files.
351 A boolean flag indicating whether or not to be verbose.
355 A boolean flag indicating whether or not it is ok to use previously
356 stored trees. Defaults to true.
360 Returns a boolean indicating success.
364 ### (re)build the trees ###
366 my ($self, %hash) = @_;
367 my $conf = $self->configure_object;
369 my($path,$uptodate,$use_stored);
371 path => { default => $conf->get_conf('base'), store => \$path },
372 verbose => { default => $conf->get_conf('verbose') },
373 uptodate => { required => 1, store => \$uptodate },
374 use_stored => { default => 1, store => \$use_stored },
377 my $args = check( $tmpl, \%hash ) or return undef;
379 ### retrieve the stored source files ###
380 my $stored = $self->__retrieve_source(
382 uptodate => $uptodate && $use_stored,
383 verbose => $args->{'verbose'},
386 ### build the trees ###
387 $self->{_authortree} = $stored->{_authortree} ||
388 $self->__create_author_tree(
389 uptodate => $uptodate,
391 verbose => $args->{verbose},
393 $self->{_modtree} = $stored->{_modtree} ||
394 $self->_create_mod_tree(
395 uptodate => $uptodate,
397 verbose => $args->{verbose},
400 ### return if we weren't able to build the trees ###
401 return unless $self->{_modtree} && $self->{_authortree};
403 ### write the stored files to disk, so we can keep using them
404 ### from now on, till they become invalid
405 ### write them if the original sources weren't uptodate, or
406 ### we didn't just load storable files
407 $self->_save_source() if !$uptodate or not keys %$stored;
409 ### still necessary? can only run one instance now ###
410 ### will probably stay that way --kane
411 # my $id = $self->_store_id( $self );
413 # unless ( $id == $self->_id ) {
414 # error( loc("IDs do not match: %1 != %2. Storage failed!", $id, $self->_id) );
422 =head2 $cb->__retrieve_source(name => $name, [path => $path, uptodate => BOOL, verbose => BOOL])
424 This method retrieves a I<storable>d tree identified by C<$name>.
426 It takes the following arguments:
432 The internal name for the source file to retrieve.
436 A flag indicating whether the file-cache is up-to-date or not.
440 The absolute path to the directory holding the source files.
444 A boolean flag indicating whether or not to be verbose.
448 Will get information from the config file by default.
450 Returns a tree on success, false on failure.
454 sub __retrieve_source {
457 my $conf = $self->configure_object;
460 path => { default => $conf->get_conf('base') },
461 verbose => { default => $conf->get_conf('verbose') },
462 uptodate => { default => 0 },
465 my $args = check( $tmpl, \%hash ) or return;
467 ### check if we can retrieve a frozen data structure with storable ###
468 my $storable = can_load( modules => {'Storable' => '0.0'} )
469 if $conf->get_conf('storable');
471 return unless $storable;
473 ### $stored is the name of the frozen data structure ###
474 my $stored = $self->__storable_file( $args->{path} );
476 if ($storable && -e $stored && -s _ && $args->{'uptodate'}) {
477 msg( loc("Retrieving %1", $stored), $args->{'verbose'} );
479 my $href = Storable::retrieve($stored);
488 =head2 $cb->_save_source([verbose => BOOL, path => $path])
490 This method saves all the parsed trees in I<storable>d format if
491 C<Storable> is available.
493 It takes the following arguments:
499 The absolute path to the directory holding the source files.
503 A boolean flag indicating whether or not to be verbose.
507 Will get information from the config file by default.
509 Returns true on success, false on failure.
516 my $conf = $self->configure_object;
520 path => { default => $conf->get_conf('base'), allow => DIR_EXISTS },
521 verbose => { default => $conf->get_conf('verbose') },
522 force => { default => 1 },
525 my $args = check( $tmpl, \%hash ) or return;
527 my $aref = [qw[_modtree _authortree]];
529 ### check if we can retrieve a frozen data structure with storable ###
531 $storable = can_load( modules => {'Storable' => '0.0'} )
532 if $conf->get_conf('storable');
533 return unless $storable;
536 foreach my $key ( @$aref ) {
537 next unless ref( $self->{$key} );
538 $to_write->{$key} = $self->{$key};
541 return unless keys %$to_write;
543 ### $stored is the name of the frozen data structure ###
544 my $stored = $self->__storable_file( $args->{path} );
546 if (-e $stored && not -w $stored) {
547 msg( loc("%1 not writable; skipped.", $stored), $args->{'verbose'} );
551 msg( loc("Writing compiled source information to disk. This might take a little while."),
552 $args->{'verbose'} );
555 unless( Storable::nstore( $to_write, $stored ) ) {
556 error( loc("could not store %1!", $stored) );
560 return $flag ? 0 : 1;
563 sub __storable_file {
565 my $conf = $self->configure_object;
566 my $path = shift or return;
568 ### check if we can retrieve a frozen data structure with storable ###
569 my $storable = $conf->get_conf('storable')
570 ? can_load( modules => {'Storable' => '0.0'} )
573 return unless $storable;
575 ### $stored is the name of the frozen data structure ###
576 ### changed to use File::Spec->catfile -jmb
577 my $stored = File::Spec->rel2abs(
580 $conf->_get_source('stored') #file
582 $Storable::VERSION #the version of storable
583 . '.stored' #append a suffix
592 =head2 $cb->__create_author_tree([path => $path, uptodate => BOOL, verbose => BOOL])
594 This method opens a source files and parses its contents into a
595 searchable author-tree or restores a file-cached version of a
596 previous parse, if the sources are uptodate and the file-cache exists.
598 It takes the following arguments:
604 A flag indicating whether the file-cache is uptodate or not.
608 The absolute path to the directory holding the source files.
612 A boolean flag indicating whether or not to be verbose.
616 Will get information from the config file by default.
618 Returns a tree on success, false on failure.
622 sub __create_author_tree() {
625 my $conf = $self->configure_object;
629 path => { default => $conf->get_conf('base') },
630 verbose => { default => $conf->get_conf('verbose') },
631 uptodate => { default => 0 },
634 my $args = check( $tmpl, \%hash ) or return;
636 my $file = File::Spec->catfile(
638 $conf->_get_source('auth')
641 msg(loc("Rebuilding author tree, this might take a while"),
644 ### extract the file ###
645 my $ae = Archive::Extract->new( archive => $file ) or return;
646 my $out = STRIP_GZ_SUFFIX->($file);
648 ### make sure to set the PREFER_BIN flag if desired ###
649 { local $Archive::Extract::PREFER_BIN = $conf->get_conf('prefer_bin');
650 $ae->extract( to => $out ) or return;
653 my $cont = $self->_get_file_contents( file => $out ) or return;
655 ### don't need it anymore ###
658 for ( split /\n/, $cont ) {
659 my($id, $name, $email) = m/^alias \s+
661 "\s* ([^\"\<]+?) \s* <(.+)> \s*"
664 $tree->{$id} = CPANPLUS::Module::Author->new(
665 author => $name, #authors name
666 email => $email, #authors email address
667 cpanid => $id, #authors CPAN ID
668 _id => $self->_id, #id of this internals object
674 } #__create_author_tree
678 =head2 $cb->_create_mod_tree([path => $path, uptodate => BOOL, verbose => BOOL])
680 This method opens a source files and parses its contents into a
681 searchable module-tree or restores a file-cached version of a
682 previous parse, if the sources are uptodate and the file-cache exists.
684 It takes the following arguments:
690 A flag indicating whether the file-cache is up-to-date or not.
694 The absolute path to the directory holding the source files.
698 A boolean flag indicating whether or not to be verbose.
702 Will get information from the config file by default.
704 Returns a tree on success, false on failure.
708 ### this builds a hash reference with the structure of the cpan module tree ###
709 sub _create_mod_tree {
712 my $conf = $self->configure_object;
716 path => { default => $conf->get_conf('base') },
717 verbose => { default => $conf->get_conf('verbose') },
718 uptodate => { default => 0 },
721 my $args = check( $tmpl, \%hash ) or return undef;
722 my $file = File::Spec->catfile($args->{path}, $conf->_get_source('mod'));
724 msg(loc("Rebuilding module tree, this might take a while"),
728 my $dslip_tree = $self->__create_dslip_tree( %$args );
730 ### extract the file ###
731 my $ae = Archive::Extract->new( archive => $file ) or return;
732 my $out = STRIP_GZ_SUFFIX->($file);
734 ### make sure to set the PREFER_BIN flag if desired ###
735 { local $Archive::Extract::PREFER_BIN = $conf->get_conf('prefer_bin');
736 $ae->extract( to => $out ) or return;
739 my $cont = $self->_get_file_contents( file => $out ) or return;
741 ### don't need it anymore ###
747 for ( split /\n/, $cont ) {
749 ### quick hack to read past the header of the file ###
750 ### this is still rather evil... fix some time - Kane
751 $flag = 1 if m|^\s*$|;
754 ### skip empty lines ###
758 my @data = split /\s+/;
760 ### filter out the author and filename as well ###
761 ### authors can apparently have digits in their names,
762 ### and dirs can have dots... blah!
763 my ($author, $package) = $data[2] =~
766 ([A-Z\d-]+) (?:/[\S]+)?/
770 ### remove file name from the path
771 $data[2] =~ s|/[^/]+$||;
774 unless( $self->author_tree($author) ) {
775 error( loc( "No such author '%1' -- can't make module object " .
776 "'%2' that is supposed to belong to this author",
777 $author, $data[0] ) );
781 ### adding the dslip info
782 ### probably can use some optimization
784 for my $item ( qw[ statd stats statl stati statp ] ) {
785 ### checking if there's an entry in the dslip info before
786 ### catting it on. appeasing warnings this way
787 $dslip .= $dslip_tree->{ $data[0] }->{$item}
788 ? $dslip_tree->{ $data[0] }->{$item}
792 ### Every module get's stored as a module object ###
793 $tree->{ $data[0] } = CPANPLUS::Module->new(
794 module => $data[0], # full module name
795 version => ($data[1] eq 'undef' # version number
798 path => File::Spec::Unix->catfile(
799 $conf->_get_mirror('base'),
801 ), # extended path on the cpan mirror,
803 comment => $data[3], # comment on the module
804 author => $self->author_tree($author),
805 package => $package, # package name, like
806 # 'foo-bar-baz-1.03.tar.gz'
807 description => $dslip_tree->{ $data[0] }->{'description'},
809 _id => $self->_id, #id of this internals object
820 =head2 $cb->__create_dslip_tree([path => $path, uptodate => BOOL, verbose => BOOL])
822 This method opens a source files and parses its contents into a
823 searchable dslip-tree or restores a file-cached version of a
824 previous parse, if the sources are uptodate and the file-cache exists.
826 It takes the following arguments:
832 A flag indicating whether the file-cache is uptodate or not.
836 The absolute path to the directory holding the source files.
840 A boolean flag indicating whether or not to be verbose.
844 Will get information from the config file by default.
846 Returns a tree on success, false on failure.
850 sub __create_dslip_tree {
853 my $conf = $self->configure_object;
856 path => { default => $conf->get_conf('base') },
857 verbose => { default => $conf->get_conf('verbose') },
858 uptodate => { default => 0 },
861 my $args = check( $tmpl, \%hash ) or return;
863 ### get the file name of the source ###
864 my $file = File::Spec->catfile($args->{path}, $conf->_get_source('dslip'));
866 ### extract the file ###
867 my $ae = Archive::Extract->new( archive => $file ) or return;
868 my $out = STRIP_GZ_SUFFIX->($file);
870 ### make sure to set the PREFER_BIN flag if desired ###
871 { local $Archive::Extract::PREFER_BIN = $conf->get_conf('prefer_bin');
872 $ae->extract( to => $out ) or return;
875 my $in = $self->_get_file_contents( file => $out ) or return;
877 ### don't need it anymore ###
881 ### get rid of the comments and the code ###
882 ### need a smarter parser, some people have this in their dslip info:
890 # 'Implements Linear Threshold Units',
892 # "\x{c4}dd \x{fc}ml\x{e4}\x{fc}ts t\x{f6} \x{eb}v\x{eb}r\x{ff}th\x{ef}ng!",
896 ### also, older versions say:
898 ### and newer versions say:
899 ### $CPANPLUS::Modulelist::cols = [...]
900 ### split '$cols' and '$data' into 2 variables ###
901 ### use this regex to make sure dslips with ';' in them don't cause
903 my ($ds_one, $ds_two) = ($in =~ m|.+}\s+
904 (\$(?:CPAN::Modulelist::)?cols.*?)
905 (\$(?:CPAN::Modulelist::)?data.*)
908 ### eval them into existence ###
909 ### still not too fond of this solution - kane ###
911 { #local $@; can't use this, it's buggy -kane
913 $cols = eval $ds_one;
914 error( loc("Error in eval of dslip source files: %1", $@) ) if $@;
916 $data = eval $ds_two;
917 error( loc("Error in eval of dslip source files: %1", $@) ) if $@;
922 my $primary = "modid";
924 ### this comes from CPAN::Modulelist
925 ### which is in 03modlist.data.gz
929 $tree->{$hash{$primary}} = \%hash;
934 } #__create_dslip_tree
938 =head2 $cb->_dslip_defs ()
940 This function returns the definition structure (ARRAYREF) of the
945 ### these are the definitions used for dslip info
946 ### they shouldn't change over time.. so hardcoding them doesn't appear to
947 ### be a problem. if it is, we need to parse 03modlist.data better to filter
949 ### right now, this is just used to look up dslip info from a module
956 [ q|Development Stage|, {
957 i => loc('Idea, listed to gain consensus or as a placeholder'),
958 c => loc('under construction but pre-alpha (not yet released)'),
959 a => loc('Alpha testing'),
960 b => loc('Beta testing'),
961 R => loc('Released'),
962 M => loc('Mature (no rigorous definition)'),
963 S => loc('Standard, supplied with Perl 5'),
967 [ q|Support Level|, {
968 m => loc('Mailing-list'),
969 d => loc('Developer'),
970 u => loc('Usenet newsgroup comp.lang.perl.modules'),
971 n => loc('None known, try comp.lang.perl.modules'),
972 a => loc('Abandoned; volunteers welcome to take over maintainance'),
976 [ q|Language Used|, {
977 p => loc('Perl-only, no compiler needed, should be platform independent'),
978 c => loc('C and perl, a C compiler will be needed'),
979 h => loc('Hybrid, written in perl with optional C code, no compiler needed'),
980 '+' => loc('C++ and perl, a C++ compiler will be needed'),
981 o => loc('perl and another language other than C or C++'),
985 [ q|Interface Style|, {
986 f => loc('plain Functions, no references used'),
987 h => loc('hybrid, object and function interfaces available'),
988 n => loc('no interface at all (huh?)'),
989 r => loc('some use of unblessed References or ties'),
990 O => loc('Object oriented using blessed references and/or inheritance'),
994 [ q|Public License|, {
995 p => loc('Standard-Perl: user may choose between GPL and Artistic'),
996 g => loc('GPL: GNU General Public License'),
997 l => loc('LGPL: "GNU Lesser General Public License" (previously known as "GNU Library General Public License")'),
998 b => loc('BSD: The BSD License'),
999 a => loc('Artistic license alone'),
1000 o => loc('other (but distribution allowed without restrictions)'),
1008 # c-indentation-style: bsd
1010 # indent-tabs-mode: nil
1012 # vim: expandtab shiftwidth=4: