From: Larry Wall Date: Sun, 9 Jun 1991 12:36:21 +0000 (+0000) Subject: perl 4.0 patch 10: (combined patch) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1462b684862954f3522657efc93a3264698e4a9f;p=p5sagit%2Fp5-mst-13.2.git perl 4.0 patch 10: (combined patch) Subject: pack(hh,1) dumped core Subject: read didn't work from character special files open for writing Subject: close-on-exec wrongly set on system file descriptors Subject: //g only worked first time through Subject: perl -v printed incorrect copyright notice Subject: certain pattern optimizations were botched Subject: documented some newer features in addenda Subject: $) and $| incorrectly handled in run-time patterns Subject: added tests for case-insensitive regular expressions Subject: m'$foo' now treats string as single quoted --- diff --git a/doarg.c b/doarg.c index 2a1d5eb..e339536 100644 --- a/doarg.c +++ b/doarg.c @@ -1,4 +1,4 @@ -/* $RCSfile: doarg.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 10:42:17 $ +/* $RCSfile: doarg.c,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:18:41 $ * * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,9 @@ * License or the Artistic License, as specified in the README file. * * $Log: doarg.c,v $ + * Revision 4.0.1.3 91/06/10 01:18:41 lwall + * patch10: pack(hh,1) dumped core + * * Revision 4.0.1.2 91/06/07 10:42:17 lwall * patch4: new copyright notice * patch4: // wouldn't use previous pattern if it started with a null character @@ -494,9 +497,10 @@ int *arglast; case 'b': { char *savepat = pat; - int saveitems = items; + int saveitems; fromstr = NEXTFROM; + saveitems = items; aptr = str_get(fromstr); if (pat[-1] == '*') len = fromstr->str_cur; @@ -551,9 +555,10 @@ int *arglast; case 'h': { char *savepat = pat; - int saveitems = items; + int saveitems; fromstr = NEXTFROM; + saveitems = items; aptr = str_get(fromstr); if (pat[-1] == '*') len = fromstr->str_cur; diff --git a/doio.c b/doio.c index e93c305..2f1ea17 100644 --- a/doio.c +++ b/doio.c @@ -1,4 +1,4 @@ -/* $RCSfile: doio.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 10:53:39 $ +/* $RCSfile: doio.c,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:21:19 $ * * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,10 @@ * License or the Artistic License, as specified in the README file. * * $Log: doio.c,v $ + * Revision 4.0.1.3 91/06/10 01:21:19 lwall + * patch10: read didn't work from character special files open for writing + * patch10: close-on-exec wrongly set on system file descriptors + * * Revision 4.0.1.2 91/06/07 10:53:39 lwall * patch4: new copyright notice * patch4: system fd's are now treated specially @@ -237,17 +241,13 @@ int len; (void)fclose(fp); goto say_false; } - if (S_ISSOCK(statbuf.st_mode) || (S_ISCHR(statbuf.st_mode) && writing)) + if (S_ISSOCK(statbuf.st_mode)) stio->type = 's'; /* in case a socket was passed in to us */ #ifdef S_IFMT else if (!(statbuf.st_mode & S_IFMT)) stio->type = 's'; /* some OS's return 0 on fstat()ed socket */ #endif } -#if defined(HAS_FCNTL) && defined(F_SETFD) - fd = fileno(fp); - fcntl(fd,F_SETFD,fd > maxsysfd); -#endif if (saveifp) { /* must use old fp? */ fd = fileno(saveifp); if (saveofp) { @@ -263,16 +263,22 @@ int len; } fp = saveifp; } +#if defined(HAS_FCNTL) && defined(F_SETFD) + fd = fileno(fp); + fcntl(fd,F_SETFD,fd > maxsysfd); +#endif stio->ifp = fp; if (writing) { - if (stio->type != 's') - stio->ofp = fp; - else + if (stio->type == 's' + || (stio->type == '>' && S_ISCHR(statbuf.st_mode)) ) { if (!(stio->ofp = fdopen(fileno(fp),"w"))) { fclose(fp); stio->ifp = Nullfp; goto say_false; } + } + else + stio->ofp = fp; } return TRUE; diff --git a/dolist.c b/dolist.c index c1f4ed5..7527874 100644 --- a/dolist.c +++ b/dolist.c @@ -1,4 +1,4 @@ -/* $RCSfile: dolist.c,v $$Revision: 4.0.1.1 $$Date: 91/06/07 10:58:28 $ +/* $RCSfile: dolist.c,v $$Revision: 4.0.1.2 $$Date: 91/06/10 01:22:15 $ * * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,9 @@ * License or the Artistic License, as specified in the README file. * * $Log: dolist.c,v $ + * Revision 4.0.1.2 91/06/10 01:22:15 lwall + * patch10: //g only worked first time through + * * Revision 4.0.1.1 91/06/07 10:58:28 lwall * patch4: new copyright notice * patch4: added global modifier for pattern matches @@ -202,6 +205,8 @@ int *arglast; goto gotcha; } else { + if (global) + spat->spat_regexp->startp[0] = Nullch; if (gimme == G_ARRAY) return sp; str_sset(str,&str_no); @@ -276,6 +281,8 @@ yup: nope: spat->spat_regexp->startp[0] = Nullch; ++spat->spat_short->str_u.str_useful; + if (global) + spat->spat_regexp->startp[0] = Nullch; if (gimme == G_ARRAY) return sp; str_sset(str,&str_no); diff --git a/patchlevel.h b/patchlevel.h index 618bca4..4e0e918 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -1 +1 @@ -#define PATCHLEVEL 9 +#define PATCHLEVEL 10 diff --git a/perl.c b/perl.c index e489159..664c898 100644 --- a/perl.c +++ b/perl.c @@ -1,4 +1,4 @@ -char rcsid[] = "$RCSfile: perl.c,v $$Revision: 4.0.1.3 $$Date: 91/06/07 11:40:18 $\nPatch level: ###\n"; +char rcsid[] = "$RCSfile: perl.c,v $$Revision: 4.0.1.4 $$Date: 91/06/10 01:23:07 $\nPatch level: ###\n"; /* * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,9 @@ char rcsid[] = "$RCSfile: perl.c,v $$Revision: 4.0.1.3 $$Date: 91/06/07 11:40:18 * License or the Artistic License, as specified in the README file. * * $Log: perl.c,v $ + * Revision 4.0.1.4 91/06/10 01:23:07 lwall + * patch10: perl -v printed incorrect copyright notice + * * Revision 4.0.1.3 91/06/07 11:40:18 lwall * patch4: changed old $^P to $^X * @@ -1199,8 +1202,8 @@ char *s; #endif #endif fputs("\n\ -Perl may be copied only under the terms of the GNU General Public License,\n\ -a copy of which can be found with the Perl 4.0 distribution kit.\n",stdout); +Perl may be copied only under the terms of either the Artistic License or the\n\ +GNU General Public License, which may be found in the Perl 4.0 source kit.\n",stdout); #ifdef MSDOS usage(origargv[0]); #endif diff --git a/perl.h b/perl.h index 43737aa..4ab86d9 100644 --- a/perl.h +++ b/perl.h @@ -1,4 +1,4 @@ -/* $RCSfile: perl.h,v $$Revision: 4.0.1.2 $$Date: 91/06/07 11:28:33 $ +/* $RCSfile: perl.h,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:25:10 $ * * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,9 @@ * License or the Artistic License, as specified in the README file. * * $Log: perl.h,v $ + * Revision 4.0.1.3 91/06/10 01:25:10 lwall + * patch10: certain pattern optimizations were botched + * * Revision 4.0.1.2 91/06/07 11:28:33 lwall * patch4: new copyright notice * patch4: made some allowances for "semi-standard" C @@ -749,6 +752,7 @@ FILE *popen(); STR *interp(); void free_arg(); STIO *stio_new(); +void hoistmust(); EXT struct stat statbuf; EXT struct stat statcache; diff --git a/perl.man b/perl.man index 50a5f9b..f059208 100644 --- a/perl.man +++ b/perl.man @@ -1,7 +1,10 @@ .rn '' }` -''' $RCSfile: perl.man,v $$Revision: 4.0.1.2 $$Date: 91/06/07 11:41:23 $ +''' $RCSfile: perl.man,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:26:02 $ ''' ''' $Log: perl.man,v $ +''' Revision 4.0.1.3 91/06/10 01:26:02 lwall +''' patch10: documented some newer features in addenda +''' ''' Revision 4.0.1.2 91/06/07 11:41:23 lwall ''' patch4: added global modifier for pattern matches ''' patch4: default top-of-form format is now FILEHANDLE_TOP @@ -5802,6 +5805,11 @@ In double-quote context, more escapes are supported: \ee, \ea, \ex1b, \ec[, The .B $/ variable may now be set to a multi-character delimiter. +.PP +There is now a g modifier on ordinary pattern matching that causes it +to iterate through a string finding multiple matches. +.PP +All of the $^X variables are new except for $^T. .SH BUGS .PP .I Perl diff --git a/str.c b/str.c index 5ff6a41..cf5e1f9 100644 --- a/str.c +++ b/str.c @@ -1,4 +1,4 @@ -/* $RCSfile: str.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 11:58:13 $ +/* $RCSfile: str.c,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:27:54 $ * * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,9 @@ * License or the Artistic License, as specified in the README file. * * $Log: str.c,v $ + * Revision 4.0.1.3 91/06/10 01:27:54 lwall + * patch10: $) and $| incorrectly handled in run-time patterns + * * Revision 4.0.1.2 91/06/07 11:58:13 lwall * patch4: new copyright notice * patch4: taint check on undefined string could cause core dump @@ -939,8 +942,14 @@ STR *src; ++s; t = s; } - else if ((*s == '@' || (*s == '$' && !index(nointrp,s[1]))) && - s+1 < send) { + else if (*s == '$' && s+1 < send && *nointrp && index(nointrp,s[1])) { + str_ncat(str, t, s - t); + str_ncat(str, "$b", 2); + str_ncat(str, s, 2); + s += 2; + t = s; + } + else if ((*s == '@' || *s == '$') && s+1 < send) { str_ncat(str,t,s-t); t = s; if (*s == '$' && s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) @@ -1171,6 +1180,9 @@ int sp; if (s-t > 0) str_ncat(str,t,s-t); switch(*++s) { + default: + fatal("panic: unknown interp cookie\n"); + break; case 'a': str_scat(str,*++elem); break; diff --git a/t/op/pat.t b/t/op/pat.t index 5223ef0..8c3adc9 100644 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -1,8 +1,8 @@ #!./perl -# $RCSfile: pat.t,v $$Revision: 4.0.1.1 $$Date: 91/06/07 12:01:26 $ +# $RCSfile: pat.t,v $$Revision: 4.0.1.2 $$Date: 91/06/10 01:29:34 $ -print "1..48\n"; +print "1..51\n"; $x = "abc\ndef\n"; @@ -174,3 +174,11 @@ for $iter (1..5) { $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; print $x eq '505550555' ? "ok 48\n" : "not ok 48 $x\n"; + +$xyz = 'xyz'; +print "abc" =~ /^abc$|$xyz/ ? "ok 49\n" : "not ok 49\n"; + +# perl 4.009 says "unmatched ()" +eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; +print $@ eq "" ? "ok 50\n" : "not ok 50\n"; +print $result eq "abc:bc" ? "ok 51\n" : "not ok 51\n"; diff --git a/t/op/re_tests b/t/op/re_tests index 01d9940..ee03d6f 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -135,3 +135,140 @@ a(bc)d abcd y $1-\$1-\\$1 bc-$1-\bc a[-]?c ac y $& ac (abc)\1 abcabc y $1 abc ([a-c]*)\1 abcabc y $1 abc +'abc'i ABC y $& ABC +'abc'i XBC n - - +'abc'i AXC n - - +'abc'i ABX n - - +'abc'i XABCY y $& ABC +'abc'i ABABC y $& ABC +'ab*c'i ABC y $& ABC +'ab*bc'i ABC y $& ABC +'ab*bc'i ABBC y $& ABBC +'ab*bc'i ABBBBC y $& ABBBBC +'ab{0,}bc'i ABBBBC y $& ABBBBC +'ab+bc'i ABBC y $& ABBC +'ab+bc'i ABC n - - +'ab+bc'i ABQ n - - +'ab{1,}bc'i ABQ n - - +'ab+bc'i ABBBBC y $& ABBBBC +'ab{1,}bc'i ABBBBC y $& ABBBBC +'ab{1,3}bc'i ABBBBC y $& ABBBBC +'ab{3,4}bc'i ABBBBC y $& ABBBBC +'ab{4,5}bc'i ABBBBC n - - +'ab?bc'i ABBC y $& ABBC +'ab?bc'i ABC y $& ABC +'ab{0,1}bc'i ABC y $& ABC +'ab?bc'i ABBBBC n - - +'ab?c'i ABC y $& ABC +'ab{0,1}c'i ABC y $& ABC +'^abc$'i ABC y $& ABC +'^abc$'i ABCC n - - +'^abc'i ABCC y $& ABC +'^abc$'i AABC n - - +'abc$'i AABC y $& ABC +'^'i ABC y $& +'$'i ABC y $& +'a.c'i ABC y $& ABC +'a.c'i AXC y $& AXC +'a.*c'i AXYZC y $& AXYZC +'a.*c'i AXYZD n - - +'a[bc]d'i ABC n - - +'a[bc]d'i ABD y $& ABD +'a[b-d]e'i ABD n - - +'a[b-d]e'i ACE y $& ACE +'a[b-d]'i AAC y $& AC +'a[-b]'i A- y $& A- +'a[b-]'i A- y $& A- +'a[b-a]'i - c - - +'a[]b'i - c - - +'a['i - c - - +'a]'i A] y $& A] +'a[]]b'i A]B y $& A]B +'a[^bc]d'i AED y $& AED +'a[^bc]d'i ABD n - - +'a[^-b]c'i ADC y $& ADC +'a[^-b]c'i A-C n - - +'a[^]b]c'i A]C n - - +'a[^]b]c'i ADC y $& ADC +'ab|cd'i ABC y $& AB +'ab|cd'i ABCD y $& AB +'()ef'i DEF y $&-$1 EF- +'()*'i - c - - +'*a'i - c - - +'^*'i - c - - +'$*'i - c - - +'(*)b'i - c - - +'$b'i B n - - +'a\'i - c - - +'a\(b'i A(B y $&-$1 A(B- +'a\(*b'i AB y $& AB +'a\(*b'i A((B y $& A((B +'a\\b'i A\B y $& A\B +'abc)'i - c - - +'(abc'i - c - - +'((a))'i ABC y $&-$1-$2 A-A-A +'(a)b(c)'i ABC y $&-$1-$2 ABC-A-C +'a+b+c'i AABBABC y $& ABC +'a{1,}b{1,}c'i AABBABC y $& ABC +'a**'i - c - - +'a*?'i - c - - +'(a*)*'i - c - - +'(a*)+'i - c - - +'(a|)*'i - c - - +'(a*|b)*'i - c - - +'(a+|b)*'i AB y $&-$1 AB-B +'(a+|b){0,}'i AB y $&-$1 AB-B +'(a+|b)+'i AB y $&-$1 AB-B +'(a+|b){1,}'i AB y $&-$1 AB-B +'(a+|b)?'i AB y $&-$1 A-A +'(a+|b){0,1}'i AB y $&-$1 A-A +'(^)*'i - c - - +'(ab|)*'i - c - - +')('i - c - - +'[^ab]*'i CDE y $& CDE +'abc'i n - - +'a*'i y $& +'([abc])*d'i ABBBCD y $&-$1 ABBBCD-C +'([abc])*bcd'i ABCD y $&-$1 ABCD-A +'a|b|c|d|e'i E y $& E +'(a|b|c|d|e)f'i EF y $&-$1 EF-E +'((a*|b))*'i - c - - +'abcd*efg'i ABCDEFG y $& ABCDEFG +'ab*'i XABYABBBZ y $& AB +'ab*'i XAYABBBZ y $& A +'(ab|cd)e'i ABCDE y $&-$1 CDE-CD +'[abhgefdc]ij'i HIJ y $& HIJ +'^(ab|cd)e'i ABCDE n x$1y XY +'(abc|)ef'i ABCDEF y $&-$1 EF- +'(a|b)c*d'i ABCD y $&-$1 BCD-B +'(ab|ab*)bc'i ABC y $&-$1 ABC-A +'a([bc]*)c*'i ABC y $&-$1 ABC-BC +'a([bc]*)(c*d)'i ABCD y $&-$1-$2 ABCD-BC-D +'a([bc]+)(c*d)'i ABCD y $&-$1-$2 ABCD-BC-D +'a([bc]*)(c+d)'i ABCD y $&-$1-$2 ABCD-B-CD +'a[bcd]*dcdcde'i ADCDCDE y $& ADCDCDE +'a[bcd]+dcdcde'i ADCDCDE n - - +'(ab|a)b*c'i ABC y $&-$1 ABC-AB +'((a)(b)c)(d)'i ABCD y $1-$2-$3-$4 ABC-A-B-D +'[a-zA-Z_][a-zA-Z0-9_]*'i ALPHA y $& ALPHA +'^a(bc+|b[eh])g|.h$'i ABH y $&-$1 BH- +'(bc+d$|ef*g.|h?i(j|k))'i EFFGZ y $&-$1-$2 EFFGZ-EFFGZ- +'(bc+d$|ef*g.|h?i(j|k))'i IJ y $&-$1-$2 IJ-IJ-J +'(bc+d$|ef*g.|h?i(j|k))'i EFFG n - - +'(bc+d$|ef*g.|h?i(j|k))'i BCDD n - - +'(bc+d$|ef*g.|h?i(j|k))'i REFFGZ y $&-$1-$2 EFFGZ-EFFGZ- +'((((((((((a))))))))))'i A y $10 A +'((((((((((a))))))))))\10'i AA y $& AA +'((((((((((a))))))))))\41'i AA n - - +'((((((((((a))))))))))\41'i A! y $& A! +'(((((((((a)))))))))'i A y $& A +'multiple words of text'i UH-UH n - - +'multiple words'i MULTIPLE WORDS, YEAH y $& MULTIPLE WORDS +'(.*)c(.*)'i ABCDE y $&-$1-$2 ABCDE-AB-DE +'\((.*), (.*)\)'i (A, B) y ($2, $1) (B, A) +'[k]'i AB n - - +'abcd'i ABCD y $&-\$&-\\$& ABCD-$&-\ABCD +'a(bc)d'i ABCD y $1-\$1-\\$1 BC-$1-\BC +'a[-]?c'i AC y $& AC +'(abc)\1'i ABCABC y $1 ABC +'([a-c]*)\1'i ABCABC y $1 ABC diff --git a/t/op/regexp.t b/t/op/regexp.t index 92f084a..e488a82 100644 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -1,6 +1,6 @@ #!./perl -# $Header: regexp.t,v 4.0 91/03/20 01:54:22 lwall Locked $ +# $RCSfile: regexp.t,v $$Revision: 4.0.1.1 $$Date: 91/06/10 01:30:29 $ open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || die "Can't open re_tests"; @@ -11,10 +11,12 @@ close(TESTS); print "1..$numtests\n"; open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || die "Can't open re_tests"; +$| = 1; while () { ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_); $input = join(':',$pat,$subject,$result,$repl,$expect); - eval "\$match = (\$subject =~ \$pat); \$got = \"$repl\";"; + $pat = "'$pat'" unless $pat =~ /^'/; + eval "\$match = (\$subject =~ m$pat); \$got = \"$repl\";"; if ($result eq 'c') { if ($@ ne '') {print "ok $.\n";} else {print "not ok $.\n";} } diff --git a/toke.c b/toke.c index 4411284..d46a960 100644 --- a/toke.c +++ b/toke.c @@ -1,4 +1,4 @@ -/* $RCSfile: toke.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 12:05:56 $ +/* $RCSfile: toke.c,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:32:26 $ * * Copyright (c) 1991, Larry Wall * @@ -6,6 +6,10 @@ * License or the Artistic License, as specified in the README file. * * $Log: toke.c,v $ + * Revision 4.0.1.3 91/06/10 01:32:26 lwall + * patch10: m'$foo' now treats string as single quoted + * patch10: certain pattern optimizations were botched + * * Revision 4.0.1.2 91/06/07 12:05:56 lwall * patch4: new copyright notice * patch4: debugger lost track of lines in eval @@ -1514,6 +1518,7 @@ register char *s; int len; SPAT savespat; STR *str = Str_new(93,0); + char delim; Newz(801,spat,1,SPAT); spat->spat_next = curstash->tbl_spatroot; /* link into spat list */ @@ -1538,7 +1543,7 @@ register char *s; yylval.arg = Nullarg; return s; } - s++; + delim = *s++; while (*s == 'i' || *s == 'o' || *s == 'g') { if (*s == 'i') { s++; @@ -1556,7 +1561,11 @@ register char *s; } len = str->str_cur; e = str->str_ptr + len; - for (d = str->str_ptr; d < e; d++) { + if (delim == '\'') + d = e; + else + d = str->str_ptr; + for (; d < e; d++) { if (*d == '\\') d++; else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') || @@ -1738,15 +1747,18 @@ get_repl: return s; } +void hoistmust(spat) register SPAT *spat; { if (!spat->spat_short && spat->spat_regexp->regstart && (!spat->spat_regexp->regmust || spat->spat_regexp->reganch & ROPT_ANCH) ) { - spat->spat_short = spat->spat_regexp->regstart; if (!(spat->spat_regexp->reganch & ROPT_ANCH)) spat->spat_flags |= SPAT_SCANFIRST; + else if (spat->spat_flags & SPAT_FOLD) + return; + spat->spat_short = str_smake(spat->spat_regexp->regstart); } else if (spat->spat_regexp->regmust) {/* is there a better short-circuit? */ if (spat->spat_short &&