From: Larry Wall Date: Wed, 8 Aug 1990 17:07:07 +0000 (+0000) Subject: perl 3.0 patch #25 patch #19, continued X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=00bf170e31343ccc4fac7a63f6a3acf5e76c3616;p=p5sagit%2Fp5-mst-13.2.git perl 3.0 patch #25 patch #19, continued See patch #19. --- diff --git a/eg/relink b/eg/relink index 2d8e5f6..d31f96e 100644 --- a/eg/relink +++ b/eg/relink @@ -1,14 +1,18 @@ #!/usr/bin/perl +'di'; +'ig00'; +# +# $Header: relink,v 3.0.1.2 90/08/09 03:17:44 lwall Locked $ +# +# $Log: relink,v $ +# Revision 3.0.1.2 90/08/09 03:17:44 lwall +# patch19: added man page for relink and rename +# ($op = shift) || die "Usage: relink perlexpr [filenames]\n"; if (!@ARGV) { - if (-t) { - @ARGV = <*>; - } - else { - @ARGV = ; - chop(@ARGV); - } + @ARGV = ; + chop(@ARGV); } for (@ARGV) { next unless -l; # symbolic link? @@ -22,3 +26,60 @@ for (@ARGV) { symlink($_, $name); } } +############################################################################## + + # These next few lines are legal in both Perl and nroff. + +.00; # finish .ig + +'di \" finish diversion--previous line must be blank +.nr nl 0-1 \" fake up transition to first page again +.nr % 0 \" start at page 1 +';<<'.ex'; #__END__ ############# From here on it's a standard manual page ############ +.TH RELINK 1 "July 30, 1990" +.AT 3 +.SH LINK +relink \- relinks multiple symbolic links +.SH SYNOPSIS +.B relink perlexpr [symlinknames] +.SH DESCRIPTION +.I Relink +relinks the symbolic links given according to the rule specified as the +first argument. +The argument is a Perl expression which is expected to modify the $_ +string in Perl for at least some of the names specified. +For each symbolic link named on the command line, the Perl expression +will be executed on the contents of the symbolic link with that name. +If a given symbolic link's contents is not modified by the expression, +it will not be changed. +If a name given on the command line is not a symbolic link, it will be ignored. +If no names are given on the command line, names will be read +via standard input. +.PP +For example, to relink all symbolic links in the current directory +pointing to somewhere in X11R3 so that they point to X11R4, you might say +.nf + + relink 's/X11R3/X11R4/' * + +.fi +To change all occurences of links in the system from /usr/spool to /var/spool, +you'd say +.nf + + find / -type l -print | relink 's#/usr/spool#/var/spool#' + +.fi +.SH ENVIRONMENT +No environment variables are used. +.SH FILES +.SH AUTHOR +Larry Wall +.SH "SEE ALSO" +ln(1) +.br +perl(1) +.SH DIAGNOSTICS +If you give an invalid Perl expression you'll get a syntax error. +.SH BUGS +.ex diff --git a/eg/rename b/eg/rename index 1bb19d7..f63a0eb 100644 --- a/eg/rename +++ b/eg/rename @@ -1,14 +1,18 @@ #!/usr/bin/perl +'di'; +'ig00'; +# +# $Header: rename,v 3.0.1.2 90/08/09 03:17:57 lwall Locked $ +# +# $Log: rename,v $ +# Revision 3.0.1.2 90/08/09 03:17:57 lwall +# patch19: added man page for relink and rename +# ($op = shift) || die "Usage: rename perlexpr [filenames]\n"; if (!@ARGV) { - if (-t) { - @ARGV = <*>; - } - else { - @ARGV = ; - chop(@ARGV); - } + @ARGV = ; + chop(@ARGV); } for (@ARGV) { $was = $_; @@ -16,3 +20,58 @@ for (@ARGV) { die $@ if $@; rename($was,$_) unless $was eq $_; } +############################################################################## + + # These next few lines are legal in both Perl and nroff. + +.00; # finish .ig + +'di \" finish diversion--previous line must be blank +.nr nl 0-1 \" fake up transition to first page again +.nr % 0 \" start at page 1 +';<<'.ex'; #__END__ ############# From here on it's a standard manual page ############ +.TH RENAME 1 "July 30, 1990" +.AT 3 +.SH NAME +rename \- renames multiple files +.SH SYNOPSIS +.B rename perlexpr [files] +.SH DESCRIPTION +.I Rename +renames the filenames supplied according to the rule specified as the +first argument. +The argument is a Perl expression which is expected to modify the $_ +string in Perl for at least some of the filenames specified. +If a given filename is not modified by the expression, it will not be +renamed. +If no filenames are given on the command line, filenames will be read +via standard input. +.PP +For example, to rename all files matching *.bak to strip the extension, +you might say +.nf + + rename 's/\e.bak$//' *.bak + +.fi +To translate uppercase names to lower, you'd use +.nf + + rename 'y/A-Z/a-z/' * + +.fi +.SH ENVIRONMENT +No environment variables are used. +.SH FILES +.SH AUTHOR +Larry Wall +.SH "SEE ALSO" +mv(1) +.br +perl(1) +.SH DIAGNOSTICS +If you give an invalid Perl expression you'll get a syntax error. +.SH BUGS +.I Rename +does not check for the existence of target filenames, so use with care. +.ex diff --git a/h2pl/eg/sizeof.ph b/h2pl/eg/sizeof.ph new file mode 100644 index 0000000..285bff1 --- /dev/null +++ b/h2pl/eg/sizeof.ph @@ -0,0 +1,14 @@ +$sizeof{'char'} = 1; +$sizeof{'int'} = 4; +$sizeof{'long'} = 4; +$sizeof{'struct arpreq'} = 36; +$sizeof{'struct ifconf'} = 8; +$sizeof{'struct ifreq'} = 32; +$sizeof{'struct ltchars'} = 6; +$sizeof{'struct pcntl'} = 116; +$sizeof{'struct rtentry'} = 52; +$sizeof{'struct sgttyb'} = 6; +$sizeof{'struct tchars'} = 6; +$sizeof{'struct ttychars'} = 14; +$sizeof{'struct winsize'} = 8; +$sizeof{'struct termios'} = 132; diff --git a/lib/pwd.pl b/lib/pwd.pl new file mode 100644 index 0000000..c141e98 --- /dev/null +++ b/lib/pwd.pl @@ -0,0 +1,48 @@ +;# pwd.pl - keeps track of current working directory in PWD environment var +;# +;# $Header: pwd.pl,v 3.0.1.1 90/08/09 04:01:24 lwall Locked $ +;# +;# $Log: pwd.pl,v $ +;# Revision 3.0.1.1 90/08/09 04:01:24 lwall +;# patch19: Initial revision +;# +;# +;# Usage: +;# require "pwd.pl"; +;# &initpwd; +;# ... +;# &chdir($newdir); + +package pwd; + +sub main'initpwd { + if ($ENV{'PWD'}) { + local($dd,$di) = stat('.'); + local($pd,$pi) = stat($ENV{'PWD'}); + return if $di == $pi && $dd == $pd; + } + chop($ENV{'PWD'} = `pwd`); +} + +sub main'chdir { + local($newdir) = shift; + if (chdir $newdir) { + if ($newdir =~ m#^/#) { + $ENV{'PWD'} = $newdir; + } + else { + local(@curdir) = split(m#/#,$ENV{'PWD'}); + @curdir = '' unless @curdir; + foreach $component (split(m#/#, $newdir)) { + next if $component eq '.'; + pop(@curdir),next if $component eq '..'; + push(@curdir,$component); + } + $ENV{'PWD'} = join('/',@curdir) || '/'; + } + } + else { + 0; + } +} + diff --git a/msdos/popen.c b/msdos/popen.c index 60b2179..4cc58d1 100644 --- a/msdos/popen.c +++ b/msdos/popen.c @@ -1,4 +1,4 @@ -/* $Header: popen.c,v 3.0.1.1 90/03/27 16:11:57 lwall Locked $ +/* $Header: popen.c,v 3.0.1.2 90/08/09 04:04:42 lwall Locked $ * * (C) Copyright 1988, 1990 Diomidis Spinellis. * @@ -6,6 +6,9 @@ * as specified in the README file that comes with the perl 3.0 kit. * * $Log: popen.c,v $ + * Revision 3.0.1.2 90/08/09 04:04:42 lwall + * patch19: various MSDOS and OS/2 patches folded in + * * Revision 3.0.1.1 90/03/27 16:11:57 lwall * patch16: MSDOS support * @@ -85,7 +88,7 @@ mypopen(const char *command, const char *t) switch (*t) { case 'r': - sprintf(buff, "%s >%s", command, name); + sprintf(buff, "%s>%s", command, name); if (system(buff) || (f = fopen(name, "r")) == NULL) { free(name); return NULL; diff --git a/os2/popen.c b/os2/popen.c new file mode 100644 index 0000000..7c71ccc --- /dev/null +++ b/os2/popen.c @@ -0,0 +1,210 @@ +/* + * Pipe support for OS/2. + * + * WARNING: I am guilty of chumminess with the runtime library because + * I had no choice. Details to follow. + * + */ + +#include "EXTERN.h" +#include "perl.h" +#define INCL_DOSPROCESS +#define INCL_DOSQUEUES +#define INCL_DOSMISC +#define INCL_DOSMEMMGR +#include + +extern char **environ; + +/* This mysterious array _osfile is used internally by the runtime + * library to remember assorted things about open file handles. + * The problem is that we are creating file handles via DosMakePipe, + * rather than via the runtime library. This means that we have + * to fake the runtime library into thinking that the handles we've + * created are honest file handles. So just before doing the fdopen, + * we poke in a magic value that fools the library functions into + * thinking that the handle is already open in text mode. + * + * This might not work for your compiler, so beware. + */ +extern char _osfile[]; + +/* The maximum number of simultaneously open pipes. We create an + * array of this size to record information about each open pipe. + */ +#define MAXPIPES 5 + +/* Information to remember about each open pipe. + * The (FILE *) that popen returns is stored because that's the only + * way we can keep track of the pipes. + */ +typedef struct pipeinfo { + FILE *pfId; /* Which FILE we're talking about */ + HFILE hfMe; /* handle I should close at pclose */ + PID pidChild; /* Child's PID */ + CHAR fReading; /* A read or write pipe? */ +} PIPEINFO, *PPIPEINFO; /* pi and ppi */ + +static PIPEINFO PipeInfo[MAXPIPES]; + +FILE *mypopen(const char *command, const char *t) +{ + typedef char *PSZZ; + PSZZ pszzPipeArgs = 0; + PSZZ pszzEnviron = 0; + PSZ *ppsz; + PSZ psz; + FILE *f; + HFILE hfMe, hfYou; + HFILE hf, hfSave; + RESULTCODES rc; + USHORT us; + PPIPEINFO ppi; + UINT i; + + /* Validate pipe type */ + if (*t != 'w' && *t != 'r') fatal("Unknown pipe type"); + + /* Room for another pipe? */ + for (ppi = &PipeInfo[0]; ppi < &PipeInfo[MAXPIPES]; ppi++) + if (ppi->pfId == 0) goto foundone; + return NULL; + +foundone: + + /* Make the pipe */ + if (DosMakePipe(&hfMe, &hfYou, 0)) return NULL; + + /* Build the environment. First compute its length, then copy + * the environment strings into it. + */ + i = 0; + for (ppsz = environ; *ppsz; ppsz++) i += 1 + strlen(*ppsz); + New(1204, pszzEnviron, 1+i, CHAR); + + psz = pszzEnviron; + for (ppsz = environ; *ppsz; ppsz++) { + strcpy(psz, *ppsz); + psz += 1 + strlen(*ppsz); + } + *psz = 0; + + /* Build the command string to execute. + * 6 = length(0 "/c " 0 0) + */ + if (DosScanEnv("COMSPEC", &psz)) psz = "C:\\OS2\\cmd.exe"; +#if 0 + New(1203, pszzPipeArgs, strlen(psz) + strlen(command) + 6, CHAR); +#else +#define pszzPipeArgs buf +#endif + sprintf(pszzPipeArgs, "%s%c/c %s%c", psz, 0, command, 0); + + /* Now some stuff that depends on what kind of pipe we're doing. + * We pull a sneaky trick; namely, that stdin = 0 = false, + * and stdout = 1 = true. The end result is that if the + * pipe is a read pipe, then hf = 1; if it's a write pipe, then + * hf = 0 and Me and You are reversed. + */ + if (!(hf = (*t == 'r'))) { + /* The meaning of Me and You is reversed for write pipes. */ + hfSave = hfYou; hfYou = hfMe; hfMe = hfSave; + } + + ppi->fReading = hf; + + /* Trick number 1: Fooling the runtime library into thinking + * that the file handle is legit. + * + * Trick number 2: Don't let my handle go over to the child! + * Since the child never closes it (why should it?), I'd better + * make sure he never sees it in the first place. Otherwise, + * we are in deadlock city. + */ + _osfile[hfMe] = 0x81; /* Danger, Will Robinson! */ + if (!(ppi->pfId = fdopen(hfMe, t))) goto no_fdopen; + DosSetFHandState(hfMe, OPEN_FLAGS_NOINHERIT); + + /* Save the original handle because we're going to diddle it */ + hfSave = 0xFFFF; + if (DosDupHandle(hf, &hfSave)) goto no_dup_init; + + /* Force the child's handle onto the stdio handle */ + if (DosDupHandle(hfYou, &hf)) goto no_force_dup; + DosClose(hfYou); + + /* Now run the guy servicing the pipe */ + us = DosExecPgm(NULL, 0, EXEC_ASYNCRESULT, pszzPipeArgs, pszzEnviron, + &rc, pszzPipeArgs); + + /* Restore stdio handle, even if exec failed. */ + DosDupHandle(hfSave, &hf); close(hfSave); + + /* See if the exec succeeded. */ + if (us) goto no_exec_pgm; + + /* Remember the child's PID */ + ppi->pidChild = rc.codeTerminate; + + Safefree(pszzEnviron); + + /* Phew. */ + return ppi->pfId; + + /* Here is where we clean up after an error. */ +no_exec_pgm: ; +no_force_dup: close(hfSave); +no_dup_init: fclose(f); +no_fdopen: + DosClose(hfMe); DosClose(hfYou); + ppi->pfId = 0; + Safefree(pszzEnviron); + return NULL; +} + + +/* mypclose: Closes the pipe associated with the file handle. + * After waiting for the child process to terminate, its return + * code is returned. If the stream was not associated with a pipe, + * we return -1. + */ +int +mypclose(FILE *f) +{ + PPIPEINFO ppi; + RESULTCODES rc; + USHORT us; + + /* Find the pipe this (FILE *) refers to */ + for (ppi = &PipeInfo[0]; ppi < &PipeInfo[MAXPIPES]; ppi++) + if (ppi->pfId == f) goto foundit; + return -1; +foundit: + if (ppi->fReading && !DosRead(fileno(f), &rc, 1, &us) && us > 0) { + DosKillProcess(DKP_PROCESSTREE, ppi->pidChild); + } + fclose(f); + DosCwait(DCWA_PROCESS, DCWW_WAIT, &rc, &ppi->pidChild, ppi->pidChild); + ppi->pfId = 0; + return rc.codeResult; +} + +/* pipe: The only tricky thing is letting the runtime library know about + * our two new file descriptors. + */ +int pipe(int filedes[2]) +{ + HFILE hfRead, hfWrite; + USHORT usResult; + + usResult = DosMakePipe(&hfRead, &hfWrite, 0); + if (usResult) { + /* Error 4 == ERROR_TOO_MANY_OPEN_FILES */ + errno = (usResult == 4) ? ENFILE : ENOMEM; + return -1; + } + _osfile[hfRead] = _osfile[hfWrite] = 0x81;/* Danger, Will Robinson! */ + filedes[0] = hfRead; + filedes[1] = hfWrite; + return 0; +} diff --git a/patchlevel.h b/patchlevel.h index f198d8a..10c8c21 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -1 +1 @@ -#define PATCHLEVEL 24 +#define PATCHLEVEL 25 diff --git a/regcomp.c b/regcomp.c index 9a7be67..68da52e 100644 --- a/regcomp.c +++ b/regcomp.c @@ -7,9 +7,16 @@ * blame Henry for some of the lack of readability. */ -/* $Header: regcomp.c,v 3.0.1.3 90/03/12 16:59:22 lwall Locked $ +/* $Header: regcomp.c,v 3.0.1.4 90/08/09 05:05:33 lwall Locked $ * * $Log: regcomp.c,v $ + * Revision 3.0.1.4 90/08/09 05:05:33 lwall + * patch19: sped up /x+y/ patterns greatly by not retrying on every x + * patch19: inhibited backoff on patterns anchored to the end like /\s+$/ + * patch19: sped up {m,n} on simple items + * patch19: optimized /.*whatever/ to /^.*whatever/ + * patch19: fixed character classes to allow backslashing hyphen + * * Revision 3.0.1.3 90/03/12 16:59:22 lwall * patch13: pattern matches can now use \0 to mean \000 * @@ -121,11 +128,10 @@ STATIC void regoptail(); * of the structure of the compiled regexp. [I'll say.] */ regexp * -regcomp(exp,xend,fold,rare) +regcomp(exp,xend,fold) char *exp; char *xend; int fold; -int rare; { register regexp *r; register char *scan; @@ -137,6 +143,7 @@ int rare; int curback; extern char *safemalloc(); extern char *savestr(); + int sawplus = 0; if (exp == NULL) fatal("NULL regexp argument"); @@ -190,8 +197,14 @@ int rare; first = scan; while ((OP(first) > OPEN && OP(first) < CLOSE) || (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) || - (OP(first) == PLUS) ) + (OP(first) == PLUS) || + (OP(first) == CURLY && ARG1(first) > 0) ) { + if (OP(first) == CURLY) + first += 4; + else if (OP(first) == PLUS) + sawplus = 2; first = NEXTOPER(first); + } /* Starting-point info. */ if (OP(first) == EXACTLY) { @@ -204,8 +217,10 @@ int rare; r->regstclass = first; else if (OP(first) == BOUND || OP(first) == NBOUND) r->regstclass = first; - else if (OP(first) == BOL) - r->reganch++; + else if (OP(first) == BOL || + (OP(first) == STAR && OP(NEXTOPER(first)) == ANY) ) + r->reganch = 1; /* kinda turn .* into ^.* */ + r->reganch |= sawplus; #ifdef DEBUGGING if (debug & 512) @@ -449,21 +464,44 @@ int *flagp; next++; } if (*next == '}') { /* got one */ - regsawbracket++; /* remember we clobbered exp */ if (!max) max = next; regparse++; iter = atoi(regparse); + if (flags&SIMPLE) { /* we can do it right after all */ + int tmp; + + reginsert(CURLY, ret); + if (*max == ',') + max++; + tmp = atoi(max); + if (tmp && tmp < iter) + fatal("Can't do {n,m} with n > m"); + if (regcode != ®dummy) { +#ifdef REGALIGN + *(unsigned short *)(ret+3) = iter; + *(unsigned short *)(ret+5) = tmp; +#else + ret[3] = iter >> 8; ret[4] = iter & 0377; + ret[5] = tmp >> 8; ret[6] = tmp & 0377; +#endif + } + regparse = next; + goto nest_check; + } + regsawbracket++; /* remember we clobbered exp */ if (iter > 0) { ch = *max; sprintf(regparse,"%.*d", max-regparse, iter - 1); *max = ch; - if (*max == ',' && atoi(max+1) > 0) { + if (*max == ',' && max[1] != '}') { + if (atoi(max+1) <= 0) + fatal("Can't do {n,m} with n > m"); ch = *next; sprintf(max+1,"%.*d", next-(max+1), atoi(max+1) - 1); *next = ch; } - if (iter != 1 || (*max == ',' || atoi(max+1))) { + if (iter != 1 || *max == ',') { regparse = origparse; /* back up input pointer */ regnpar = orignpar; /* don't make more parens */ } @@ -793,20 +831,20 @@ regclass() register char *ret; register int def; + ret = regnode(ANYOF); if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT); regparse++; def = 0; } else { - ret = regnode(ANYOF); def = 255; } bits = regcode; for (class = 0; class < 32; class++) regc(def); if (*regparse == ']' || *regparse == '-') - regset(bits,def,lastclass = *regparse++); + goto skipcond; /* allow 1st char to be ] or - */ while (regparse < regxend && *regparse != ']') { + skipcond: class = UCHARAT(regparse++); if (class == '\\') { class = UCHARAT(regparse++); @@ -863,19 +901,21 @@ regclass() break; } } - if (!range && class == '-' && regparse < regxend && - *regparse != ']') { - range = 1; - continue; - } if (range) { if (lastclass > class) FAIL("invalid [] range in regexp"); + range = 0; } - else - lastclass = class - 1; - range = 0; - for (lastclass++; lastclass <= class; lastclass++) { + else { + lastclass = class; + if (*regparse == '-' && regparse+1 < regxend && + regparse[1] != ']') { + regparse++; + range = 1; + continue; /* do it next time */ + } + } + for ( ; lastclass <= class; lastclass++) { regset(bits,def,lastclass); if (regfold && isupper(lastclass)) regset(bits,def,tolower(lastclass)); @@ -949,21 +989,22 @@ char *opnd; register char *src; register char *dst; register char *place; + register offset = (op == CURLY ? 4 : 0); if (regcode == ®dummy) { #ifdef REGALIGN - regsize += 4; + regsize += 4 + offset; #else - regsize += 3; + regsize += 3 + offset; #endif return; } src = regcode; #ifdef REGALIGN - regcode += 4; + regcode += 4 + offset; #else - regcode += 3; + regcode += 3 + offset; #endif dst = regcode; while (src > opnd) @@ -973,6 +1014,8 @@ char *opnd; *place++ = op; *place++ = '\0'; *place++ = '\0'; + while (offset-- > 0) + *place++ = '\0'; } /* @@ -1081,7 +1124,7 @@ regexp *r; else fprintf(stderr,"(%d)", (s-r->program)+(next-s)); s += 3; - if (op == ANYOF || op == ANYBUT) { + if (op == ANYOF) { s += 32; } if (op == EXACTLY) { @@ -1101,8 +1144,10 @@ regexp *r; fprintf(stderr,"start `%s' ", r->regstart->str_ptr); if (r->regstclass) fprintf(stderr,"stclass `%s' ", regprop(r->regstclass)); - if (r->reganch) + if (r->reganch & 1) fprintf(stderr,"anchored "); + if (r->reganch & 2) + fprintf(stderr,"plus "); if (r->regmust != NULL) fprintf(stderr,"must have \"%s\" back %d ", r->regmust->str_ptr, r->regback); @@ -1133,9 +1178,6 @@ char *op; case ANYOF: p = "ANYOF"; break; - case ANYBUT: - p = "ANYBUT"; - break; case BRANCH: p = "BRANCH"; break; @@ -1175,6 +1217,11 @@ char *op; case NDIGIT: p = "NDIGIT"; break; + case CURLY: + (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}", + ARG1(op),ARG2(op)); + p = NULL; + break; case REF: case REF+1: case REF+2: diff --git a/regcomp.h b/regcomp.h index c4c6520..a2e2fbb 100644 --- a/regcomp.h +++ b/regcomp.h @@ -1,6 +1,9 @@ -/* $Header: regcomp.h,v 3.0 89/10/18 15:22:39 lwall Locked $ +/* $Header: regcomp.h,v 3.0.1.1 90/08/09 05:06:49 lwall Locked $ * * $Log: regcomp.h,v $ + * Revision 3.0.1.1 90/08/09 05:06:49 lwall + * patch19: sped up {m,n} on simple items + * * Revision 3.0 89/10/18 15:22:39 lwall * 3.0 baseline * @@ -57,8 +60,8 @@ #define BOL 1 /* no Match "" at beginning of line. */ #define EOL 2 /* no Match "" at end of line. */ #define ANY 3 /* no Match any one character. */ -#define ANYOF 4 /* str Match any character in this string. */ -#define ANYBUT 5 /* str Match any character not in this string. */ +#define ANYOF 4 /* str Match character in (or not in) this class. */ +#define CURLY 5 /* str Match this simple thing {n,m} times. */ #define BRANCH 6 /* node Match this alternative, or the next... */ #define BACK 7 /* no Match "", "next" ptr points backward. */ #define EXACTLY 8 /* str Match this string (preceded by length). */ @@ -105,7 +108,7 @@ #ifndef DOINIT extern char varies[]; #else -char varies[] = {BRANCH,BACK,STAR,PLUS, +char varies[] = {BRANCH,BACK,STAR,PLUS,CURLY, REF+1,REF+2,REF+3,REF+4,REF+5,REF+6,REF+7,REF+8,REF+9,0}; #endif @@ -113,7 +116,7 @@ char varies[] = {BRANCH,BACK,STAR,PLUS, #ifndef DOINIT extern char simple[]; #else -char simple[] = {ANY,ANYOF,ANYBUT,ALNUM,NALNUM,SPACE,NSPACE,DIGIT,NDIGIT,0}; +char simple[] = {ANY,ANYOF,ALNUM,NALNUM,SPACE,NSPACE,DIGIT,NDIGIT,0}; #endif EXT char regdummy; @@ -145,8 +148,12 @@ EXT char regdummy; #ifndef lint #ifdef REGALIGN #define NEXT(p) (*(short*)(p+1)) +#define ARG1(p) (*(unsigned short*)(p+3)) +#define ARG2(p) (*(unsigned short*)(p+5)) #else #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) +#define ARG1(p) (((*((p)+3)&0377)<<8) + (*((p)+4)&0377)) +#define ARG2(p) (((*((p)+5)&0377)<<8) + (*((p)+6)&0377)) #endif #else /* lint */ #define NEXT(p) 0 diff --git a/regexec.c b/regexec.c index 2c6213b..61439ea 100644 --- a/regexec.c +++ b/regexec.c @@ -7,9 +7,16 @@ * blame Henry for some of the lack of readability. */ -/* $Header: regexec.c,v 3.0.1.3 90/02/28 18:14:39 lwall Locked $ +/* $Header: regexec.c,v 3.0.1.4 90/08/09 05:12:03 lwall Locked $ * * $Log: regexec.c,v $ + * Revision 3.0.1.4 90/08/09 05:12:03 lwall + * patch19: sped up /x+y/ patterns greatly by not retrying on every x + * patch19: inhibited backoff on patterns anchored to the end like /\s+$/ + * patch19: sped up {m,n} on simple items + * patch19: $' broke on embedded nulls + * patch19: $ will now only match at end of string if $* == 0 + * * Revision 3.0.1.3 90/02/28 18:14:39 lwall * patch9: /[\200-\377]/ didn't work on machines with signed chars * patch9: \d, \w, and \s could misfire on characters with high bit set @@ -198,7 +205,7 @@ int safebase; /* no need to remember string in subbase */ /* Simplest case: anchored match need be tried only once. */ /* [unless multiline is set] */ - if (prog->reganch) { + if (prog->reganch & 1) { if (regtry(prog, string)) goto got_it; else if (multiline) { @@ -208,9 +215,9 @@ int safebase; /* no need to remember string in subbase */ /* for multiline we only have to try after newlines */ if (s > string) s--; - for (; s < strend; s++) { - if (*s == '\n') { - if (++s < strend && regtry(prog, s)) + while (s < strend) { + if (*s++ == '\n') { + if (s < strend && regtry(prog, s)) goto got_it; } } @@ -220,8 +227,22 @@ int safebase; /* no need to remember string in subbase */ /* Messy cases: unanchored match. */ if (prog->regstart) { - /* We know what string it must start with. */ - if (prog->regstart->str_pok == 3) { + if (prog->reganch & 2) { /* we have /x+whatever/ */ + /* it must be a one character string */ + i = prog->regstart->str_ptr[0]; + while (s < strend) { + if (*s == i) { + if (regtry(prog, s)) + goto got_it; + s++; + while (s < strend && *s == i) + s++; + } + s++; + } + } + else if (prog->regstart->str_pok == 3) { + /* We know what string it must start with. */ #ifndef lint while ((s = fbminstr((unsigned char*)s, (unsigned char*)strend, prog->regstart)) != NULL) @@ -246,18 +267,26 @@ int safebase; /* no need to remember string in subbase */ goto phooey; } if (c = prog->regstclass) { + int doevery = (prog->reganch & 2) == 0; + if (minlen) dontbother = minlen - 1; strend -= dontbother; /* don't bother with what can't match */ + tmp = 1; /* We know what class it must start with. */ switch (OP(c)) { - case ANYOF: case ANYBUT: + case ANYOF: c = OPERAND(c); while (s < strend) { i = UCHARAT(s); - if (!(c[i >> 3] & (1 << (i&7)))) - if (regtry(prog, s)) + if (!(c[i >> 3] & (1 << (i&7)))) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; @@ -305,50 +334,80 @@ int safebase; /* no need to remember string in subbase */ case ALNUM: while (s < strend) { i = *s; - if (isALNUM(i)) - if (regtry(prog, s)) + if (isALNUM(i)) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; case NALNUM: while (s < strend) { i = *s; - if (!isALNUM(i)) - if (regtry(prog, s)) + if (!isALNUM(i)) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; case SPACE: while (s < strend) { - if (isSPACE(*s)) - if (regtry(prog, s)) + if (isSPACE(*s)) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; case NSPACE: while (s < strend) { - if (!isSPACE(*s)) - if (regtry(prog, s)) + if (!isSPACE(*s)) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; case DIGIT: while (s < strend) { - if (isDIGIT(*s)) - if (regtry(prog, s)) + if (isDIGIT(*s)) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; case NDIGIT: while (s < strend) { - if (!isDIGIT(*s)) - if (regtry(prog, s)) + if (!isDIGIT(*s)) { + if (tmp && regtry(prog, s)) goto got_it; + else + tmp = doevery; + } + else + tmp = 1; s++; } break; @@ -379,6 +438,7 @@ int safebase; /* no need to remember string in subbase */ if (prog->subbase) Safefree(prog->subbase); prog->subbase = s; + prog->subend = s+i; } else s = prog->subbase; @@ -486,14 +546,16 @@ char *prog; ((nextchar || locinput < regeol) && locinput[-1] == '\n') ) { - regtill--; + regtill = regbol; break; } return(0); case EOL: if ((nextchar || locinput < regeol) && nextchar != '\n') return(0); - regtill--; + if (!multiline && regeol - locinput > 1) + return 0; + regtill = regbol; break; case ANY: if ((nextchar == '\0' && locinput >= regeol) || @@ -507,7 +569,7 @@ char *prog; /* Inline the first character, for speed. */ if (*s != nextchar) return(0); - if (locinput + ln > regeol) + if (regeol - locinput < ln) return 0; if (ln > 1 && bcmp(s, locinput, ln) != 0) return(0); @@ -515,7 +577,6 @@ char *prog; nextchar = *locinput; break; case ANYOF: - case ANYBUT: s = OPERAND(scan); if (nextchar < 0) nextchar = UCHARAT(locinput); @@ -685,19 +746,33 @@ char *prog; } } break; + case CURLY: + ln = ARG1(scan); /* min to match */ + n = ARG2(scan); /* max to match */ + scan = NEXTOPER(scan) + 4; + goto repeat; case STAR: + ln = 0; + n = 0; + scan = NEXTOPER(scan); + goto repeat; case PLUS: /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ + ln = 1; + n = 0; + scan = NEXTOPER(scan); + repeat: if (OP(next) == EXACTLY) nextchar = *(OPERAND(next)+1); else nextchar = -1000; - ln = (OP(scan) == STAR) ? 0 : 1; reginput = locinput; - n = regrepeat(NEXTOPER(scan)); + n = regrepeat(scan, n); + if (!multiline && OP(next) == EOL) + ln = n; /* why back off? */ while (n >= ln) { /* If it could work, try it. */ if (nextchar == -1000 || *reginput == nextchar) @@ -739,8 +814,9 @@ char *prog; * rather than incrementing count on every character.] */ static int -regrepeat(p) +regrepeat(p, max) char *p; +int max; { register char *scan; register char *opnd; @@ -748,6 +824,8 @@ char *p; register char *loceol = regeol; scan = reginput; + if (max && max < loceol - scan) + loceol = scan + max; opnd = OPERAND(p); switch (OP(p)) { case ANY: @@ -760,7 +838,6 @@ char *p; scan++; break; case ANYOF: - case ANYBUT: c = UCHARAT(scan); while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) { scan++; diff --git a/regexp.h b/regexp.h index 576f0b1..39620da 100644 --- a/regexp.h +++ b/regexp.h @@ -5,9 +5,12 @@ * not the System V one. */ -/* $Header: regexp.h,v 3.0 89/10/18 15:22:46 lwall Locked $ +/* $Header: regexp.h,v 3.0.1.1 90/08/09 05:12:55 lwall Locked $ * * $Log: regexp.h,v $ + * Revision 3.0.1.1 90/08/09 05:12:55 lwall + * patch19: $' broke on embedded nulls + * * Revision 3.0 89/10/18 15:22:46 lwall * 3.0 baseline * @@ -24,6 +27,7 @@ typedef struct regexp { int regback; /* Can regmust locate first try? */ char *precomp; /* pre-compilation regular expression */ char *subbase; /* saved string so \digit works forever */ + char *subend; /* end of subbase */ char reganch; /* Internal use only. */ char do_folding; /* do case-insensitive match? */ char lastparen; /* last paren matched */ diff --git a/stab.c b/stab.c index 30b797b..15ae9b3 100644 --- a/stab.c +++ b/stab.c @@ -1,4 +1,4 @@ -/* $Header: stab.c,v 3.0.1.6 90/03/27 16:22:11 lwall Locked $ +/* $Header: stab.c,v 3.0.1.7 90/08/09 05:17:48 lwall Locked $ * * Copyright (c) 1989, Larry Wall * @@ -6,6 +6,15 @@ * as specified in the README file that comes with the perl 3.0 kit. * * $Log: stab.c,v $ + * Revision 3.0.1.7 90/08/09 05:17:48 lwall + * patch19: fixed double include of + * patch19: $' broke on embedded nulls + * patch19: $< and $> better supported on machines without setreuid + * patch19: Added support for linked-in C subroutines + * patch19: %ENV wasn't forced to be global like it should + * patch19: $| didn't work before the filehandle was opened + * patch19: $! now returns "" in string context if errno == 0 + * * Revision 3.0.1.6 90/03/27 16:22:11 lwall * patch16: support for machines that can't cast negative floats to unsigned ints * @@ -38,7 +47,9 @@ #include "EXTERN.h" #include "perl.h" +#ifndef NSIG #include +#endif static char *sig_name[] = { SIG_NAME,0 @@ -105,7 +116,7 @@ STR *str; if (curspat) { if (curspat->spat_regexp && (s = curspat->spat_regexp->endp[0]) ) { - str_set(stab_val(stab),s); + str_nset(stab_val(stab),s, curspat->spat_regexp->subend - s); } else str_nset(stab_val(stab),"",0); @@ -151,6 +162,8 @@ STR *str; str_numset(stab_val(stab),(double)arybase); break; case '|': + if (!stab_io(curoutstab)) + stab_io(curoutstab) = stio_new(); str_numset(stab_val(stab), (double)((stab_io(curoutstab)->flags & IOF_FLUSH) != 0) ); break; @@ -165,7 +178,7 @@ STR *str; break; case '!': str_numset(stab_val(stab), (double)errno); - str_set(stab_val(stab), strerror(errno)); + str_set(stab_val(stab), errno ? strerror(errno) : ""); stab_val(stab)->str_nok = 1; /* what a wonderful hack! */ break; case '<': @@ -199,6 +212,14 @@ STR *str; #endif str_set(stab_val(stab),buf); break; + default: + { + struct ufuncs *uf = (struct ufuncs *)str->str_ptr; + + if (uf && uf->uf_val) + uf->uf_val(uf->uf_index, stab_val(stab)); + } + break; } return stab_val(stab); } @@ -256,10 +277,17 @@ STR *str; stab->str_pok = 1; strcpy(stab_magic(stab),"StB"); stab_val(stab) = Str_new(70,0); - stab_line(stab) = line; + stab_line(stab) = curcmd->c_line; } - else + else { stab = stabent(s,TRUE); + if (!stab_xarray(stab)) + aadd(stab); + if (!stab_xhash(stab)) + hadd(stab); + if (!stab_io(stab)) + stab_io(stab) = stio_new(); + } str_sset(str,stab); } break; @@ -305,6 +333,8 @@ STR *str; stab_io(curoutstab)->page = (long)str_gnum(str); break; case '|': + if (!stab_io(curoutstab)) + stab_io(curoutstab) = stio_new(); stab_io(curoutstab)->flags &= ~IOF_FLUSH; if (str_gnum(str) != 0.0) { stab_io(curoutstab)->flags |= IOF_FLUSH; @@ -366,7 +396,10 @@ STR *str; if (setreuid((UIDTYPE)uid, (UIDTYPE)-1) < 0) uid = (int)getuid(); #else - fatal("setruid() not implemented"); + if (uid == euid) /* special case $< = $> */ + setuid(uid); + else + fatal("setruid() not implemented"); #endif #endif break; @@ -386,7 +419,10 @@ STR *str; if (setreuid((UIDTYPE)-1, (UIDTYPE)euid) < 0) euid = (int)geteuid(); #else - fatal("seteuid() not implemented"); + if (euid == uid) /* special case $> = $< */ + setuid(euid); + else + fatal("seteuid() not implemented"); #endif #endif break; @@ -429,6 +465,14 @@ STR *str; case ':': chopset = str_get(str); break; + default: + { + struct ufuncs *uf = (struct ufuncs *)str->str_magic->str_ptr; + + if (uf && uf->uf_set) + uf->uf_set(uf->uf_index, str); + } + break; } break; } @@ -465,6 +509,9 @@ int sig; ARRAY *oldstack = stack; SUBR *sub; +#ifdef OS2 /* or anybody else who requires SIG_ACK */ + signal(sig, SIG_ACK); +#endif stab = stabent( str_get(hfetch(stab_hash(sigstab),sig_name[sig],strlen(sig_name[sig]), TRUE)), TRUE); @@ -555,7 +602,7 @@ int add; if (*name == 'I' && strEQ(name, "INC")) global = TRUE; } - else if (*name >= 'A') { + else if (*name > 'A') { if (*name == 'E' && strEQ(name, "ENV")) global = TRUE; } @@ -615,7 +662,7 @@ int add; stab->str_pok = 1; strcpy(stab_magic(stab),"StB"); stab_val(stab) = Str_new(72,0); - stab_line(stab) = line; + stab_line(stab) = curcmd->c_line; str_magic(stab,stab,'*',name,len); return stab; } @@ -644,7 +691,7 @@ register int max; stab = (STAB*)entry->hent_val; if (stab->str_pok & SP_MULTI) continue; - line = stab_line(stab); + curcmd->c_line = stab_line(stab); warn("Possible typo: \"%s\"", stab_name(stab)); } } diff --git a/x2p/s2p.SH b/x2p/s2p.SH index 08230b0..66d7b72 100644 --- a/x2p/s2p.SH +++ b/x2p/s2p.SH @@ -28,9 +28,12 @@ $spitshell >s2p <>s2p <<'!NO!SUBS!' -# $Header: s2p.SH,v 3.0.1.3 90/03/01 10:31:21 lwall Locked $ +# $Header: s2p.SH,v 3.0.1.4 90/08/09 05:50:43 lwall Locked $ # # $Log: s2p.SH,v $ +# Revision 3.0.1.4 90/08/09 05:50:43 lwall +# patch19: s2p didn't translate \n right +# # Revision 3.0.1.3 90/03/01 10:31:21 lwall # patch9: s2p didn't handle \< and \> # @@ -424,6 +427,9 @@ ${space}next line;"; $len = length($_); $_ = substr($_,0,--$len); } + elsif (substr($_,$i,1) =~ /^[n]$/) { + ; + } elsif (!$repl && substr($_,$i,1) =~ /^[(){}\w]$/) { $i--; $len--; @@ -612,7 +618,7 @@ sub fetchpat { if ($delim eq '\\') { s/(.)//; $ch = $1; - $delim = '' if $ch =~ /^[(){}\w]$/; + $delim = '' if $ch =~ /^[(){}A-Za-mo-z]$/; $ch = 'b' if $ch =~ /^[<>]$/; $delim .= $ch; }