1 package CPANPLUS::Internals::Source;
7 use CPANPLUS::Module::Fake;
8 use CPANPLUS::Module::Author;
9 use CPANPLUS::Internals::Constants;
13 use Locale::Maketext::Simple Class => 'CPANPLUS', Style => 'gettext';
14 use Params::Check qw[check];
15 use IPC::Cmd qw[can_run];
16 use Module::Load::Conditional qw[can_load];
18 $Params::Check::VERBOSE = 1;
24 CPANPLUS::Internals::Source
28 ### lazy load author/module trees ###
35 CPANPLUS::Internals::Source controls the updating of source files and
36 the parsing of them into usable module/author trees to be used by
39 Functions exist to check if source files are still C<good to use> as
40 well as update them, and then parse them.
42 The flow looks like this:
44 $cb->_author_tree || $cb->_module_tree
49 $cb->__create_author_tree
50 $cb->__retrieve_source
51 $cb->__create_module_tree
52 $cb->__retrieve_source
53 $cb->__create_dslip_tree
54 $cb->__retrieve_source
64 my $recurse; # flag to prevent recursive calls to *_tree functions
66 ### lazy loading of module tree
70 unless ($self->{_modtree} or $recurse++ > 0) {
71 my $uptodate = $self->_check_trees( @_[1..$#_] );
72 $self->_build_trees(uptodate => $uptodate);
76 return $self->{_modtree};
79 ### lazy loading of author tree
83 unless ($self->{_authortree} or $recurse++ > 0) {
84 my $uptodate = $self->_check_trees( @_[1..$#_] );
85 $self->_build_trees(uptodate => $uptodate);
89 return $self->{_authortree};
96 =head2 $cb->_check_trees( [update_source => BOOL, path => PATH, verbose => BOOL] )
98 Retrieve source files and return a boolean indicating whether or not
99 the source files are up to date.
101 Takes several arguments:
107 A flag to force re-fetching of the source files, even
108 if they are still up to date.
112 The absolute path to the directory holding the source files.
116 A boolean flag indicating whether or not to be verbose.
120 Will get information from the config file by default.
124 ### retrieve source files, and returns a boolean indicating if it's up to date
126 my ($self, %hash) = @_;
127 my $conf = $self->configure_object;
134 path => { default => $conf->get_conf('base'),
137 verbose => { default => $conf->get_conf('verbose'),
140 update_source => { default => 0, store => \$update_source },
143 my $args = check( $tmpl, \%hash ) or return;
145 ### if the user never wants to update their source without explicitly
146 ### telling us, shortcircuit here
147 return 1 if $conf->get_conf('no_update') && !$update_source;
149 ### a check to see if our source files are still up to date ###
150 msg( loc("Checking if source files are up to date"), $verbose );
152 my $uptodate = 1; # default return value
154 for my $name (qw[auth dslip mod]) {
155 for my $file ( $conf->_get_source( $name ) ) {
156 $self->__check_uptodate(
157 file => File::Spec->catfile( $args->{path}, $file ),
159 update_source => $update_source,
170 =head2 $cb->__check_uptodate( file => $file, name => $name, [update_source => BOOL, verbose => BOOL] )
172 C<__check_uptodate> checks if a given source file is still up-to-date
173 and if not, or when C<update_source> is true, will re-fetch the source
176 Takes the following arguments:
182 The source file to check.
186 The internal shortcut name for the source file (used for config
191 Flag to force updating of sourcefiles regardless.
195 Boolean to indicate whether to be verbose or not.
199 Returns a boolean value indicating whether the current files are up
204 ### this method checks whether or not the source files we are using are still up to date
205 sub __check_uptodate {
208 my $conf = $self->configure_object;
212 file => { required => 1 },
213 name => { required => 1 },
214 update_source => { default => 0 },
215 verbose => { default => $conf->get_conf('verbose') },
218 my $args = check( $tmpl, \%hash ) or return;
221 unless ( -e $args->{'file'} && (
222 ( stat $args->{'file'} )[9]
223 + $conf->_get_source('update') )
228 if ( $flag or $args->{'update_source'} ) {
230 if ( $self->_update_source( name => $args->{'name'} ) ) {
231 return 0; # return 0 so 'uptodate' will be set to 0, meaning no use
232 # of previously stored hashrefs!
234 msg( loc("Unable to update source, attempting to get away with using old source file!"), $args->{verbose} );
245 =head2 $cb->_update_source( name => $name, [path => $path, verbose => BOOL] )
247 This method does the actual fetching of source files.
249 It takes the following arguments:
255 The internal shortcut name for the source file (used for config
260 The full path where to write the files.
264 Boolean to indicate whether to be verbose or not.
268 Returns a boolean to indicate success.
272 ### this sub fetches new source files ###
276 my $conf = $self->configure_object;
280 name => { required => 1 },
281 path => { default => $conf->get_conf('base') },
282 verbose => { default => $conf->get_conf('verbose') },
285 my $args = check( $tmpl, \%hash ) or return;
288 my $path = $args->{path};
291 { ### this could use a clean up - Kane
292 ### no worries about the / -> we get it from the _ftp configuration, so
293 ### it's not platform dependant. -kane
294 my ($dir, $file) = $conf->_get_mirror( $args->{'name'} ) =~ m|(.+/)(.+)$|sg;
296 msg( loc("Updating source file '%1'", $file), $args->{'verbose'} );
298 my $fake = CPANPLUS::Module::Fake->new(
299 module => $args->{'name'},
305 ### can't use $fake->fetch here, since ->parent won't work --
306 ### the sources haven't been saved yet
307 my $rv = $self->_fetch(
315 error( loc("Couldn't fetch '%1'", $file) );
319 ### `touch` the file, so windoze knows it's new -jmb
320 ### works on *nix too, good fix -Kane
321 utime ( $now, $now, File::Spec->catfile($path, $file) ) or
322 error( loc("Couldn't touch %1", $file) );
330 =head2 $cb->_build_trees( uptodate => BOOL, [use_stored => BOOL, path => $path, verbose => BOOL] )
332 This method rebuilds the author- and module-trees from source.
334 It takes the following arguments:
340 Indicates whether any on disk caches are still ok to use.
344 The absolute path to the directory holding the source files.
348 A boolean flag indicating whether or not to be verbose.
352 A boolean flag indicating whether or not it is ok to use previously
353 stored trees. Defaults to true.
357 Returns a boolean indicating success.
361 ### (re)build the trees ###
363 my ($self, %hash) = @_;
364 my $conf = $self->configure_object;
366 my($path,$uptodate,$use_stored);
368 path => { default => $conf->get_conf('base'), store => \$path },
369 verbose => { default => $conf->get_conf('verbose') },
370 uptodate => { required => 1, store => \$uptodate },
371 use_stored => { default => 1, store => \$use_stored },
374 my $args = check( $tmpl, \%hash ) or return undef;
376 ### retrieve the stored source files ###
377 my $stored = $self->__retrieve_source(
379 uptodate => $uptodate && $use_stored,
380 verbose => $args->{'verbose'},
383 ### build the trees ###
384 $self->{_authortree} = $stored->{_authortree} ||
385 $self->__create_author_tree(
386 uptodate => $uptodate,
388 verbose => $args->{verbose},
390 $self->{_modtree} = $stored->{_modtree} ||
391 $self->_create_mod_tree(
392 uptodate => $uptodate,
394 verbose => $args->{verbose},
397 ### return if we weren't able to build the trees ###
398 return unless $self->{_modtree} && $self->{_authortree};
400 ### write the stored files to disk, so we can keep using them
401 ### from now on, till they become invalid
402 ### write them if the original sources weren't uptodate, or
403 ### we didn't just load storable files
404 $self->_save_source() if !$uptodate or not keys %$stored;
406 ### still necessary? can only run one instance now ###
407 ### will probably stay that way --kane
408 # my $id = $self->_store_id( $self );
410 # unless ( $id == $self->_id ) {
411 # error( loc("IDs do not match: %1 != %2. Storage failed!", $id, $self->_id) );
419 =head2 $cb->__retrieve_source(name => $name, [path => $path, uptodate => BOOL, verbose => BOOL])
421 This method retrieves a I<storable>d tree identified by C<$name>.
423 It takes the following arguments:
429 The internal name for the source file to retrieve.
433 A flag indicating whether the file-cache is up-to-date or not.
437 The absolute path to the directory holding the source files.
441 A boolean flag indicating whether or not to be verbose.
445 Will get information from the config file by default.
447 Returns a tree on success, false on failure.
451 sub __retrieve_source {
454 my $conf = $self->configure_object;
457 path => { default => $conf->get_conf('base') },
458 verbose => { default => $conf->get_conf('verbose') },
459 uptodate => { default => 0 },
462 my $args = check( $tmpl, \%hash ) or return;
464 ### check if we can retrieve a frozen data structure with storable ###
465 my $storable = can_load( modules => {'Storable' => '0.0'} )
466 if $conf->get_conf('storable');
468 return unless $storable;
470 ### $stored is the name of the frozen data structure ###
471 my $stored = $self->__storable_file( $args->{path} );
473 if ($storable && -e $stored && -s _ && $args->{'uptodate'}) {
474 msg( loc("Retrieving %1", $stored), $args->{'verbose'} );
476 my $href = Storable::retrieve($stored);
485 =head2 $cb->_save_source([verbose => BOOL, path => $path])
487 This method saves all the parsed trees in I<storable>d format if
488 C<Storable> is available.
490 It takes the following arguments:
496 The absolute path to the directory holding the source files.
500 A boolean flag indicating whether or not to be verbose.
504 Will get information from the config file by default.
506 Returns true on success, false on failure.
513 my $conf = $self->configure_object;
517 path => { default => $conf->get_conf('base'), allow => DIR_EXISTS },
518 verbose => { default => $conf->get_conf('verbose') },
519 force => { default => 1 },
522 my $args = check( $tmpl, \%hash ) or return;
524 my $aref = [qw[_modtree _authortree]];
526 ### check if we can retrieve a frozen data structure with storable ###
528 $storable = can_load( modules => {'Storable' => '0.0'} )
529 if $conf->get_conf('storable');
530 return unless $storable;
533 foreach my $key ( @$aref ) {
534 next unless ref( $self->{$key} );
535 $to_write->{$key} = $self->{$key};
538 return unless keys %$to_write;
540 ### $stored is the name of the frozen data structure ###
541 my $stored = $self->__storable_file( $args->{path} );
543 if (-e $stored && not -w $stored) {
544 msg( loc("%1 not writable; skipped.", $stored), $args->{'verbose'} );
548 msg( loc("Writing compiled source information to disk. This might take a little while."),
549 $args->{'verbose'} );
552 unless( Storable::nstore( $to_write, $stored ) ) {
553 error( loc("could not store %1!", $stored) );
557 return $flag ? 0 : 1;
560 sub __storable_file {
562 my $conf = $self->configure_object;
563 my $path = shift or return;
565 ### check if we can retrieve a frozen data structure with storable ###
566 my $storable = $conf->get_conf('storable')
567 ? can_load( modules => {'Storable' => '0.0'} )
570 return unless $storable;
572 ### $stored is the name of the frozen data structure ###
573 ### changed to use File::Spec->catfile -jmb
574 my $stored = File::Spec->rel2abs(
577 $conf->_get_source('stored') #file
579 $Storable::VERSION #the version of storable
580 . '.stored' #append a suffix
589 =head2 $cb->__create_author_tree([path => $path, uptodate => BOOL, verbose => BOOL])
591 This method opens a source files and parses its contents into a
592 searchable author-tree or restores a file-cached version of a
593 previous parse, if the sources are uptodate and the file-cache exists.
595 It takes the following arguments:
601 A flag indicating whether the file-cache is uptodate or not.
605 The absolute path to the directory holding the source files.
609 A boolean flag indicating whether or not to be verbose.
613 Will get information from the config file by default.
615 Returns a tree on success, false on failure.
619 sub __create_author_tree() {
622 my $conf = $self->configure_object;
626 path => { default => $conf->get_conf('base') },
627 verbose => { default => $conf->get_conf('verbose') },
628 uptodate => { default => 0 },
631 my $args = check( $tmpl, \%hash ) or return;
633 my $file = File::Spec->catfile(
635 $conf->_get_source('auth')
638 msg(loc("Rebuilding author tree, this might take a while"),
641 ### extract the file ###
642 my $ae = Archive::Extract->new( archive => $file ) or return;
643 my $out = STRIP_GZ_SUFFIX->($file);
645 ### make sure to set the PREFER_BIN flag if desired ###
646 { local $Archive::Extract::PREFER_BIN = $conf->get_conf('prefer_bin');
647 $ae->extract( to => $out ) or return;
650 my $cont = $self->_get_file_contents( file => $out ) or return;
652 ### don't need it anymore ###
655 for ( split /\n/, $cont ) {
656 my($id, $name, $email) = m/^alias \s+
658 "\s* ([^\"\<]+?) \s* <(.+)> \s*"
661 $tree->{$id} = CPANPLUS::Module::Author->new(
662 author => $name, #authors name
663 email => $email, #authors email address
664 cpanid => $id, #authors CPAN ID
665 _id => $self->_id, #id of this internals object
671 } #__create_author_tree
675 =head2 $cb->_create_mod_tree([path => $path, uptodate => BOOL, verbose => BOOL])
677 This method opens a source files and parses its contents into a
678 searchable module-tree or restores a file-cached version of a
679 previous parse, if the sources are uptodate and the file-cache exists.
681 It takes the following arguments:
687 A flag indicating whether the file-cache is up-to-date or not.
691 The absolute path to the directory holding the source files.
695 A boolean flag indicating whether or not to be verbose.
699 Will get information from the config file by default.
701 Returns a tree on success, false on failure.
705 ### this builds a hash reference with the structure of the cpan module tree ###
706 sub _create_mod_tree {
709 my $conf = $self->configure_object;
713 path => { default => $conf->get_conf('base') },
714 verbose => { default => $conf->get_conf('verbose') },
715 uptodate => { default => 0 },
718 my $args = check( $tmpl, \%hash ) or return undef;
719 my $file = File::Spec->catfile($args->{path}, $conf->_get_source('mod'));
721 msg(loc("Rebuilding module tree, this might take a while"),
725 my $dslip_tree = $self->__create_dslip_tree( %$args );
727 ### extract the file ###
728 my $ae = Archive::Extract->new( archive => $file ) or return;
729 my $out = STRIP_GZ_SUFFIX->($file);
731 ### make sure to set the PREFER_BIN flag if desired ###
732 { local $Archive::Extract::PREFER_BIN = $conf->get_conf('prefer_bin');
733 $ae->extract( to => $out ) or return;
736 my $cont = $self->_get_file_contents( file => $out ) or return;
738 ### don't need it anymore ###
744 for ( split /\n/, $cont ) {
746 ### quick hack to read past the header of the file ###
747 ### this is still rather evil... fix some time - Kane
748 $flag = 1 if m|^\s*$|;
751 ### skip empty lines ###
755 my @data = split /\s+/;
757 ### filter out the author and filename as well ###
758 ### authors can apparently have digits in their names,
759 ### and dirs can have dots... blah!
760 my ($author, $package) = $data[2] =~
763 ([A-Z\d-]+) (?:/[\S]+)?/
767 ### remove file name from the path
768 $data[2] =~ s|/[^/]+$||;
771 unless( $self->author_tree($author) ) {
772 error( loc( "No such author '%1' -- can't make module object " .
773 "'%2' that is supposed to belong to this author",
774 $author, $data[0] ) );
778 ### adding the dslip info
779 ### probably can use some optimization
781 for my $item ( qw[ statd stats statl stati statp ] ) {
782 ### checking if there's an entry in the dslip info before
783 ### catting it on. appeasing warnings this way
784 $dslip .= $dslip_tree->{ $data[0] }->{$item}
785 ? $dslip_tree->{ $data[0] }->{$item}
789 ### Every module get's stored as a module object ###
790 $tree->{ $data[0] } = CPANPLUS::Module->new(
791 module => $data[0], # full module name
792 version => ($data[1] eq 'undef' # version number
795 path => File::Spec::Unix->catfile(
796 $conf->_get_mirror('base'),
798 ), # extended path on the cpan mirror,
800 comment => $data[3], # comment on the module
801 author => $self->author_tree($author),
802 package => $package, # package name, like
803 # 'foo-bar-baz-1.03.tar.gz'
804 description => $dslip_tree->{ $data[0] }->{'description'},
806 _id => $self->_id, #id of this internals object
817 =head2 $cb->__create_dslip_tree([path => $path, uptodate => BOOL, verbose => BOOL])
819 This method opens a source files and parses its contents into a
820 searchable dslip-tree or restores a file-cached version of a
821 previous parse, if the sources are uptodate and the file-cache exists.
823 It takes the following arguments:
829 A flag indicating whether the file-cache is uptodate or not.
833 The absolute path to the directory holding the source files.
837 A boolean flag indicating whether or not to be verbose.
841 Will get information from the config file by default.
843 Returns a tree on success, false on failure.
847 sub __create_dslip_tree {
850 my $conf = $self->configure_object;
853 path => { default => $conf->get_conf('base') },
854 verbose => { default => $conf->get_conf('verbose') },
855 uptodate => { default => 0 },
858 my $args = check( $tmpl, \%hash ) or return;
860 ### get the file name of the source ###
861 my $file = File::Spec->catfile($args->{path}, $conf->_get_source('dslip'));
863 ### extract the file ###
864 my $ae = Archive::Extract->new( archive => $file ) or return;
865 my $out = STRIP_GZ_SUFFIX->($file);
867 ### make sure to set the PREFER_BIN flag if desired ###
868 { local $Archive::Extract::PREFER_BIN = $conf->get_conf('prefer_bin');
869 $ae->extract( to => $out ) or return;
872 my $in = $self->_get_file_contents( file => $out ) or return;
874 ### don't need it anymore ###
878 ### get rid of the comments and the code ###
879 ### need a smarter parser, some people have this in their dslip info:
887 # 'Implements Linear Threshold Units',
889 # "\x{c4}dd \x{fc}ml\x{e4}\x{fc}ts t\x{f6} \x{eb}v\x{eb}r\x{ff}th\x{ef}ng!",
893 ### also, older versions say:
895 ### and newer versions say:
896 ### $CPANPLUS::Modulelist::cols = [...]
897 ### split '$cols' and '$data' into 2 variables ###
898 ### use this regex to make sure dslips with ';' in them don't cause
900 my ($ds_one, $ds_two) = ($in =~ m|.+}\s+
901 (\$(?:CPAN::Modulelist::)?cols.*?)
902 (\$(?:CPAN::Modulelist::)?data.*)
905 ### eval them into existence ###
906 ### still not too fond of this solution - kane ###
908 { #local $@; can't use this, it's buggy -kane
910 $cols = eval $ds_one;
911 error( loc("Error in eval of dslip source files: %1", $@) ) if $@;
913 $data = eval $ds_two;
914 error( loc("Error in eval of dslip source files: %1", $@) ) if $@;
919 my $primary = "modid";
921 ### this comes from CPAN::Modulelist
922 ### which is in 03modlist.data.gz
926 $tree->{$hash{$primary}} = \%hash;
931 } #__create_dslip_tree
935 =head2 $cb->_dslip_defs ()
937 This function returns the definition structure (ARRAYREF) of the
942 ### these are the definitions used for dslip info
943 ### they shouldn't change over time.. so hardcoding them doesn't appear to
944 ### be a problem. if it is, we need to parse 03modlist.data better to filter
946 ### right now, this is just used to look up dslip info from a module
953 [ q|Development Stage|, {
954 i => loc('Idea, listed to gain consensus or as a placeholder'),
955 c => loc('under construction but pre-alpha (not yet released)'),
956 a => loc('Alpha testing'),
957 b => loc('Beta testing'),
958 R => loc('Released'),
959 M => loc('Mature (no rigorous definition)'),
960 S => loc('Standard, supplied with Perl 5'),
964 [ q|Support Level|, {
965 m => loc('Mailing-list'),
966 d => loc('Developer'),
967 u => loc('Usenet newsgroup comp.lang.perl.modules'),
968 n => loc('None known, try comp.lang.perl.modules'),
969 a => loc('Abandoned; volunteers welcome to take over maintainance'),
973 [ q|Language Used|, {
974 p => loc('Perl-only, no compiler needed, should be platform independent'),
975 c => loc('C and perl, a C compiler will be needed'),
976 h => loc('Hybrid, written in perl with optional C code, no compiler needed'),
977 '+' => loc('C++ and perl, a C++ compiler will be needed'),
978 o => loc('perl and another language other than C or C++'),
982 [ q|Interface Style|, {
983 f => loc('plain Functions, no references used'),
984 h => loc('hybrid, object and function interfaces available'),
985 n => loc('no interface at all (huh?)'),
986 r => loc('some use of unblessed References or ties'),
987 O => loc('Object oriented using blessed references and/or inheritance'),
991 [ q|Public License|, {
992 p => loc('Standard-Perl: user may choose between GPL and Artistic'),
993 g => loc('GPL: GNU General Public License'),
994 l => loc('LGPL: "GNU Lesser General Public License" (previously known as "GNU Library General Public License")'),
995 b => loc('BSD: The BSD License'),
996 a => loc('Artistic license alone'),
997 o => loc('other (but distribution allowed without restrictions)'),
1005 # c-indentation-style: bsd
1007 # indent-tabs-mode: nil
1009 # vim: expandtab shiftwidth=4: