More robust yacc/bison failure output handling.
[p5sagit/p5-mst-13.2.git] / lib / AutoSplit.pm
CommitLineData
a0d0e21e 1package AutoSplit;
2
4e6ea2c3 3use Exporter ();
4use Config qw(%Config);
5use Carp qw(carp);
6use File::Basename ();
68dc0745 7use File::Path qw(mkpath);
4e6ea2c3 8use strict;
9use vars qw(
10 $VERSION @ISA @EXPORT @EXPORT_OK
11 $Verbose $Keep $Maxlen $CheckForAutoloader $CheckModTime
12 );
a0d0e21e 13
4e6ea2c3 14$VERSION = "1.0302";
a0d0e21e 15@ISA = qw(Exporter);
16@EXPORT = qw(&autosplit &autosplit_lib_modules);
3edbfbe5 17@EXPORT_OK = qw($Verbose $Keep $Maxlen $CheckForAutoloader $CheckModTime);
a0d0e21e 18
f06db76b 19=head1 NAME
20
21AutoSplit - split a package for autoloading
22
cb1a09d0 23=head1 SYNOPSIS
24
4e6ea2c3 25 autosplit($file, $dir, $keep, $check, $modtime);
84dc3c4d 26
4e6ea2c3 27 autosplit_lib_modules(@modules);
cb1a09d0 28
f06db76b 29=head1 DESCRIPTION
30
31This function will split up your program into files that the AutoLoader
21c92a1d 32module can handle. It is used by both the standard perl libraries and by
33the MakeMaker utility, to automatically configure libraries for autoloading.
34
35The C<autosplit> interface splits the specified file into a hierarchy
36rooted at the directory C<$dir>. It creates directories as needed to reflect
37class hierarchy, and creates the file F<autosplit.ix>. This file acts as
38both forward declaration of all package routines, and as timestamp for the
39last update of the hierarchy.
40
4e6ea2c3 41The remaining three arguments to C<autosplit> govern other options to
42the autosplitter.
43
44=over 2
45
46=item $keep
47
48If the third argument, I<$keep>, is false, then any
49pre-existing C<*.al> files in the autoload directory are removed if
50they are no longer part of the module (obsoleted functions).
51$keep defaults to 0.
52
53=item $check
54
55The
56fourth argument, I<$check>, instructs C<autosplit> to check the module
57currently being split to ensure that it does include a C<use>
58specification for the AutoLoader module, and skips the module if
59AutoLoader is not detected.
60$check defaults to 1.
61
62=item $modtime
63
64Lastly, the I<$modtime> argument specifies
65that C<autosplit> is to check the modification time of the module
66against that of the C<autosplit.ix> file, and only split the module if
67it is newer.
68$modtime defaults to 1.
69
70=back
21c92a1d 71
72Typical use of AutoSplit in the perl MakeMaker utility is via the command-line
73with:
74
75 perl -e 'use AutoSplit; autosplit($ARGV[0], $ARGV[1], 0, 1, 1)'
76
77Defined as a Make macro, it is invoked with file and directory arguments;
78C<autosplit> will split the specified file into the specified directory and
79delete obsolete C<.al> files, after checking first that the module does use
80the AutoLoader, and ensuring that the module is not already currently split
81in its current form (the modtime test).
82
83The C<autosplit_lib_modules> form is used in the building of perl. It takes
84as input a list of files (modules) that are assumed to reside in a directory
85B<lib> relative to the current directory. Each file is sent to the
86autosplitter one at a time, to be split into the directory B<lib/auto>.
87
88In both usages of the autosplitter, only subroutines defined following the
4e6ea2c3 89perl I<__END__> token are split out into separate files. Some
21c92a1d 90routines may be placed prior to this marker to force their immediate loading
91and parsing.
92
4e6ea2c3 93=head2 Multiple packages
94
95As of version 1.01 of the AutoSplit module it is possible to have
96multiple packages within a single file. Both of the following cases
97are supported:
98
99 package NAME;
100 __END__
101 sub AAA { ... }
102 package NAME::option1;
103 sub BBB { ... }
104 package NAME::option2;
105 sub BBB { ... }
21c92a1d 106
4e6ea2c3 107 package NAME;
108 __END__
109 sub AAA { ... }
110 sub NAME::option1::BBB { ... }
111 sub NAME::option2::BBB { ... }
21c92a1d 112
113=head1 DIAGNOSTICS
114
4e6ea2c3 115C<AutoSplit> will inform the user if it is necessary to create the
116top-level directory specified in the invocation. It is preferred that
117the script or installation process that invokes C<AutoSplit> have
118created the full directory path ahead of time. This warning may
119indicate that the module is being split into an incorrect path.
21c92a1d 120
4e6ea2c3 121C<AutoSplit> will warn the user of all subroutines whose name causes
122potential file naming conflicts on machines with drastically limited
123(8 characters or less) file name length. Since the subroutine name is
124used as the file name, these warnings can aid in portability to such
125systems.
21c92a1d 126
4e6ea2c3 127Warnings are issued and the file skipped if C<AutoSplit> cannot locate
128either the I<__END__> marker or a "package Name;"-style specification.
21c92a1d 129
4e6ea2c3 130C<AutoSplit> will also emit general diagnostics for inability to
131create directories or files.
f06db76b 132
133=cut
134
a0d0e21e 135# for portability warn about names longer than $maxlen
136$Maxlen = 8; # 8 for dos, 11 (14-".al") for SYSVR3
137$Verbose = 1; # 0=none, 1=minimal, 2=list .al files
138$Keep = 0;
3edbfbe5 139$CheckForAutoloader = 1;
140$CheckModTime = 1;
a0d0e21e 141
4e6ea2c3 142my $IndexFile = "autosplit.ix"; # file also serves as timestamp
143my $maxflen = 255;
a0d0e21e 144$maxflen = 14 if $Config{'d_flexfnam'} ne 'define';
39e571d4 145if (defined (&Dos::UseLFN)) {
146 $maxflen = Dos::UseLFN() ? 255 : 11;
147}
4e6ea2c3 148my $Is_VMS = ($^O eq 'VMS');
a0d0e21e 149
3edbfbe5 150
a0d0e21e 151sub autosplit{
4e6ea2c3 152 my($file, $autodir, $keep, $ckal, $ckmt) = @_;
75f92628 153 # $file - the perl source file to be split (after __END__)
154 # $autodir - the ".../auto" dir below which to write split subs
155 # Handle optional flags:
4e6ea2c3 156 $keep = $Keep unless defined $keep;
75f92628 157 $ckal = $CheckForAutoloader unless defined $ckal;
158 $ckmt = $CheckModTime unless defined $ckmt;
159 autosplit_file($file, $autodir, $keep, $ckal, $ckmt);
a0d0e21e 160}
161
162
a0d0e21e 163# This function is used during perl building/installation
21c92a1d 164# ./miniperl -e 'use AutoSplit; autosplit_lib_modules(@ARGV)' ...
a0d0e21e 165
166sub autosplit_lib_modules{
167 my(@modules) = @_; # list of Module names
168
3e3baf6d 169 while(defined($_ = shift @modules)){
a0d0e21e 170 s#::#/#g; # incase specified as ABC::XYZ
4633a7c4 171 s|\\|/|g; # bug in ksh OS/2
a0d0e21e 172 s#^lib/##; # incase specified as lib/*.pm
c6538b72 173 if ($Is_VMS && /[:>\]]/) { # may need to convert VMS-style filespecs
a0d0e21e 174 my ($dir,$name) = (/(.*])(.*)/);
175 $dir =~ s/.*lib[\.\]]//;
176 $dir =~ s#[\.\]]#/#g;
177 $_ = $dir . $name;
178 }
4e6ea2c3 179 autosplit_file("lib/$_", "lib/auto",
180 $Keep, $CheckForAutoloader, $CheckModTime);
a0d0e21e 181 }
182 0;
183}
184
185
186# private functions
187
4e6ea2c3 188sub autosplit_file {
189 my($filename, $autodir, $keep, $check_for_autoloader, $check_mod_time)
190 = @_;
191 my(@outfiles);
6e7678af 192 local($_);
4e6ea2c3 193 local($/) = "\n";
a0d0e21e 194
195 # where to write output files
4e6ea2c3 196 $autodir ||= "lib/auto";
f86702cc 197 if ($Is_VMS) {
4e6ea2c3 198 ($autodir = VMS::Filespec::unixpath($autodir)) =~ s|/$||;
f86702cc 199 $filename = VMS::Filespec::unixify($filename); # may have dirs
200 }
3edbfbe5 201 unless (-d $autodir){
68dc0745 202 mkpath($autodir,0,0755);
4e6ea2c3 203 # We should never need to create the auto dir
204 # here. installperl (or similar) should have done
205 # it. Expecting it to exist is a valuable sanity check against
206 # autosplitting into some random directory by mistake.
207 print "Warning: AutoSplit had to create top-level " .
208 "$autodir unexpectedly.\n";
3edbfbe5 209 }
a0d0e21e 210
211 # allow just a package name to be used
212 $filename .= ".pm" unless ($filename =~ m/\.pm$/);
213
4e6ea2c3 214 open(IN, "<$filename") or die "AutoSplit: Can't open $filename: $!\n";
a0d0e21e 215 my($pm_mod_time) = (stat($filename))[9];
216 my($autoloader_seen) = 0;
f06db76b 217 my($in_pod) = 0;
4e6ea2c3 218 my($def_package,$last_package,$this_package,$fnr);
a0d0e21e 219 while (<IN>) {
f06db76b 220 # Skip pod text.
4e6ea2c3 221 $fnr++;
f06db76b 222 $in_pod = 1 if /^=/;
223 $in_pod = 0 if /^=cut/;
224 next if ($in_pod || /^=cut/);
225
a0d0e21e 226 # record last package name seen
4e6ea2c3 227 $def_package = $1 if (m/^\s*package\s+([\w:]+)\s*;/);
3edbfbe5 228 ++$autoloader_seen if m/^\s*(use|require)\s+AutoLoader\b/;
a0d0e21e 229 ++$autoloader_seen if m/\bISA\s*=.*\bAutoLoader\b/;
230 last if /^__END__/;
231 }
3edbfbe5 232 if ($check_for_autoloader && !$autoloader_seen){
4e6ea2c3 233 print "AutoSplit skipped $filename: no AutoLoader used\n"
234 if ($Verbose>=2);
235 return 0;
3edbfbe5 236 }
a0d0e21e 237 $_ or die "Can't find __END__ in $filename\n";
238
4e6ea2c3 239 $def_package or die "Can't find 'package Name;' in $filename\n";
a0d0e21e 240
4e6ea2c3 241 my($modpname) = _modpname($def_package);
a0d0e21e 242
4e6ea2c3 243 # this _has_ to match so we have a reasonable timestamp file
244 die "Package $def_package ($modpname.pm) does not ".
245 "match filename $filename"
68dc0745 246 unless ($filename =~ m/\Q$modpname.pm\E$/ or
39e571d4 247 ($^O eq 'dos') or ($^O eq 'MSWin32') or
c6538b72 248 $Is_VMS && $filename =~ m/$modpname.pm/i);
a0d0e21e 249
68dc0745 250 my($al_idx_file) = "$autodir/$modpname/$IndexFile";
251
a0d0e21e 252 if ($check_mod_time){
253 my($al_ts_time) = (stat("$al_idx_file"))[9] || 1;
254 if ($al_ts_time >= $pm_mod_time){
4e6ea2c3 255 print "AutoSplit skipped ($al_idx_file newer than $filename)\n"
a0d0e21e 256 if ($Verbose >= 2);
257 return undef; # one undef, not a list
258 }
259 }
260
4e6ea2c3 261 print "AutoSplitting $filename ($autodir/$modpname)\n"
a0d0e21e 262 if $Verbose;
263
264 unless (-d "$autodir/$modpname"){
68dc0745 265 mkpath("$autodir/$modpname",0,0777);
a0d0e21e 266 }
267
268 # We must try to deal with some SVR3 systems with a limit of 14
269 # characters for file names. Sadly we *cannot* simply truncate all
270 # file names to 14 characters on these systems because we *must*
271 # create filenames which exactly match the names used by AutoLoader.pm.
272 # This is a problem because some systems silently truncate the file
273 # names while others treat long file names as an error.
274
39e571d4 275 my $Is83 = $maxflen==11; # plain, case INSENSITIVE dos filenames
276
4e6ea2c3 277 my(@subnames, $subname, %proto, %package);
96bc026d 278 my @cache = ();
279 my $caching = 1;
4e6ea2c3 280 $last_package = '';
a0d0e21e 281 while (<IN>) {
4e6ea2c3 282 $fnr++;
283 $in_pod = 1 if /^=/;
284 $in_pod = 0 if /^=cut/;
285 next if ($in_pod || /^=cut/);
286 # the following (tempting) old coding gives big troubles if a
287 # cut is forgotten at EOF:
288 # next if /^=\w/ .. /^=cut/;
289 if (/^package\s+([\w:]+)\s*;/) {
290 $this_package = $def_package = $1;
a0d0e21e 291 }
4633a7c4 292 if (/^sub\s+([\w:]+)(\s*\(.*?\))?/) {
4e6ea2c3 293 print OUT "# end of $last_package\::$subname\n1;\n"
294 if $last_package;
295 $subname = $1;
296 my $proto = $2 || '';
297 if ($subname =~ s/(.*):://){
298 $this_package = $1;
299 } else {
300 $this_package = $def_package;
a0d0e21e 301 }
4e6ea2c3 302 my $fq_subname = "$this_package\::$subname";
303 $package{$fq_subname} = $this_package;
304 $proto{$fq_subname} = $proto;
305 push(@subnames, $fq_subname);
a0d0e21e 306 my($lname, $sname) = ($subname, substr($subname,0,$maxflen-3));
4e6ea2c3 307 $modpname = _modpname($this_package);
308 mkpath("$autodir/$modpname",0,0777);
a0d0e21e 309 my($lpath) = "$autodir/$modpname/$lname.al";
310 my($spath) = "$autodir/$modpname/$sname.al";
4e6ea2c3 311 my $path;
312 if (!$Is83 and open(OUT, ">$lpath")){
313 $path=$lpath;
a0d0e21e 314 print " writing $lpath\n" if ($Verbose>=2);
4e6ea2c3 315 } else {
316 open(OUT, ">$spath") or die "Can't create $spath: $!\n";
317 $path=$spath;
318 print " writing $spath (with truncated name)\n"
319 if ($Verbose>=1);
a0d0e21e 320 }
4e6ea2c3 321 push(@outfiles, $path);
322 print OUT <<EOT;
323# NOTE: Derived from $filename.
324# Changes made here will be lost when autosplit again.
325# See AutoSplit.pm.
326package $this_package;
327
328#line $fnr "$filename (autosplit into $path)"
329EOT
96bc026d 330 print OUT @cache;
331 @cache = ();
332 $caching = 0;
333 }
334 if($caching) {
335 push(@cache, $_) if @cache || /\S/;
4e6ea2c3 336 } else {
96bc026d 337 print OUT $_;
338 }
4e6ea2c3 339 if(/^\}/) {
96bc026d 340 if($caching) {
341 print OUT @cache;
342 @cache = ();
343 }
344 print OUT "\n";
345 $caching = 1;
a0d0e21e 346 }
4e6ea2c3 347 $last_package = $this_package if defined $this_package;
a0d0e21e 348 }
4e6ea2c3 349 print OUT @cache,"1;\n# end of $last_package\::$subname\n";
a0d0e21e 350 close(OUT);
351 close(IN);
4e6ea2c3 352
a0d0e21e 353 if (!$keep){ # don't keep any obsolete *.al files in the directory
4e6ea2c3 354 my(%outfiles);
355 # @outfiles{@outfiles} = @outfiles;
356 # perl downcases all filenames on VMS (which upcases all filenames) so
357 # we'd better downcase the sub name list too, or subs with upper case
358 # letters in them will get their .al files deleted right after they're
8f8c40b1 359 # created. (The mixed case sub name won't match the all-lowercase
4e6ea2c3 360 # filename, and so be cleaned up as a scrap file)
361 if ($Is_VMS or $Is83) {
362 %outfiles = map {lc($_) => lc($_) } @outfiles;
363 } else {
364 @outfiles{@outfiles} = @outfiles;
365 }
366 my(%outdirs,@outdirs);
367 for (@outfiles) {
368 $outdirs{File::Basename::dirname($_)}||=1;
369 }
370 for my $dir (keys %outdirs) {
371 opendir(OUTDIR,$dir);
372 foreach (sort readdir(OUTDIR)){
373 next unless /\.al$/;
374 my($file) = "$dir/$_";
8f8c40b1 375 $file = lc $file if $Is83 or $Is_VMS;
4e6ea2c3 376 next if $outfiles{$file};
377 print " deleting $file\n" if ($Verbose>=2);
378 my($deleted,$thistime); # catch all versions on VMS
379 do { $deleted += ($thistime = unlink $file) } while ($thistime);
380 carp "Unable to delete $file: $!" unless $deleted;
381 }
382 closedir(OUTDIR);
a0d0e21e 383 }
a0d0e21e 384 }
385
386 open(TS,">$al_idx_file") or
387 carp "AutoSplit: unable to create timestamp file ($al_idx_file): $!";
4e6ea2c3 388 print TS "# Index created by AutoSplit for $filename\n";
389 print TS "# (file acts as timestamp)\n";
390 $last_package = '';
391 for my $fqs (@subnames) {
392 my($subname) = $fqs;
393 $subname =~ s/.*:://;
394 print TS "package $package{$fqs};\n"
395 unless $last_package eq $package{$fqs};
396 print TS "sub $subname $proto{$fqs};\n";
397 $last_package = $package{$fqs};
398 }
f06db76b 399 print TS "1;\n";
a0d0e21e 400 close(TS);
401
4e6ea2c3 402 _check_unique($filename, $Maxlen, 1, @outfiles);
a0d0e21e 403
4e6ea2c3 404 @outfiles;
a0d0e21e 405}
406
4e6ea2c3 407sub _modpname ($) {
408 my($package) = @_;
409 my $modpname = $package;
410 if ($^O eq 'MSWin32') {
411 $modpname =~ s#::#\\#g;
412 } else {
413 $modpname =~ s#::#/#g;
414 }
415 $modpname;
416}
a0d0e21e 417
4e6ea2c3 418sub _check_unique {
419 my($filename, $maxlen, $warn, @outfiles) = @_;
a0d0e21e 420 my(%notuniq) = ();
421 my(%shorts) = ();
4e6ea2c3 422 my(@toolong) = grep(
423 length(File::Basename::basename($_))
424 > $maxlen,
425 @outfiles
426 );
427
428 foreach (@toolong){
429 my($dir) = File::Basename::dirname($_);
430 my($file) = File::Basename::basename($_);
431 my($trunc) = substr($file,0,$maxlen);
432 $notuniq{$dir}{$trunc} = 1 if $shorts{$dir}{$trunc};
433 $shorts{$dir}{$trunc} = $shorts{$dir}{$trunc} ?
434 "$shorts{$dir}{$trunc}, $file" : $file;
a0d0e21e 435 }
436 if (%notuniq && $warn){
4e6ea2c3 437 print "$filename: some names are not unique when " .
438 "truncated to $maxlen characters:\n";
439 foreach my $dir (sort keys %notuniq){
440 print " directory $dir:\n";
441 foreach my $trunc (sort keys %{$notuniq{$dir}}) {
442 print " $shorts{$dir}{$trunc} truncate to $trunc\n";
443 }
a0d0e21e 444 }
445 }
a0d0e21e 446}
447
4481;
449__END__
450
451# test functions so AutoSplit.pm can be applied to itself:
4e6ea2c3 452sub test1 ($) { "test 1\n"; }
453sub test2 ($$) { "test 2\n"; }
454sub test3 ($$$) { "test 3\n"; }
455sub testtesttesttest4_1 { "test 4\n"; }
456sub testtesttesttest4_2 { "duplicate test 4\n"; }
457sub Just::Another::test5 { "another test 5\n"; }
458sub test6 { return join ":", __FILE__,__LINE__; }
459package Yet::Another::AutoSplit;
460sub testtesttesttest4_1 ($) { "another test 4\n"; }
461sub testtesttesttest4_2 ($$) { "another duplicate test 4\n"; }