perl 3.0 patch #25 patch #19, continued
Larry Wall [Wed, 8 Aug 1990 17:07:07 +0000 (17:07 +0000)]
See patch #19.

13 files changed:
eg/relink
eg/rename
h2pl/eg/sizeof.ph [new file with mode: 0644]
lib/pwd.pl [new file with mode: 0644]
msdos/popen.c
os2/popen.c [new file with mode: 0644]
patchlevel.h
regcomp.c
regcomp.h
regexec.c
regexp.h
stab.c
x2p/s2p.SH

index 2d8e5f6..d31f96e 100644 (file)
--- a/eg/relink
+++ b/eg/relink
@@ -1,14 +1,18 @@
 #!/usr/bin/perl
+'di';
+'ig00';
+#
+# $Header: relink,v 3.0.1.2 90/08/09 03:17:44 lwall Locked $
+#
+# $Log:        relink,v $
+# Revision 3.0.1.2  90/08/09  03:17:44  lwall
+# patch19: added man page for relink and rename
+# 
 
 ($op = shift) || die "Usage: relink perlexpr [filenames]\n";
 if (!@ARGV) {
-    if (-t) {
-       @ARGV = <*>;
-    }
-    else {
-       @ARGV = <STDIN>;
-       chop(@ARGV);
-    }
+    @ARGV = <STDIN>;
+    chop(@ARGV);
 }
 for (@ARGV) {
     next unless -l;            # symbolic link?
@@ -22,3 +26,60 @@ for (@ARGV) {
        symlink($_, $name);
     }
 }
+##############################################################################
+
+       # These next few lines are legal in both Perl and nroff.
+
+.00;                   # finish .ig
+'di                    \" finish diversion--previous line must be blank
+.nr nl 0-1             \" fake up transition to first page again
+.nr % 0                        \" start at page 1
+';<<'.ex'; #__END__ ############# From here on it's a standard manual page ############
+.TH RELINK 1 "July 30, 1990"
+.AT 3
+.SH LINK
+relink \- relinks multiple symbolic links
+.SH SYNOPSIS
+.B relink perlexpr [symlinknames]
+.SH DESCRIPTION
+.I Relink
+relinks the symbolic links given according to the rule specified as the
+first argument.
+The argument is a Perl expression which is expected to modify the $_
+string in Perl for at least some of the names specified.
+For each symbolic link named on the command line, the Perl expression
+will be executed on the contents of the symbolic link with that name.
+If a given symbolic link's contents is not modified by the expression,
+it will not be changed.
+If a name given on the command line is not a symbolic link, it will be ignored.
+If no names are given on the command line, names will be read
+via standard input.
+.PP
+For example, to relink all symbolic links in the current directory
+pointing to somewhere in X11R3 so that they point to X11R4, you might say
+.nf
+
+       relink 's/X11R3/X11R4/' *
+
+.fi
+To change all occurences of links in the system from /usr/spool to /var/spool,
+you'd say
+.nf
+
+       find / -type l -print | relink 's#/usr/spool#/var/spool#'
+
+.fi
+.SH ENVIRONMENT
+No environment variables are used.
+.SH FILES
+.SH AUTHOR
+Larry Wall
+.SH "SEE ALSO"
+ln(1)
+.br
+perl(1)
+.SH DIAGNOSTICS
+If you give an invalid Perl expression you'll get a syntax error.
+.SH BUGS
+.ex
index 1bb19d7..f63a0eb 100644 (file)
--- a/eg/rename
+++ b/eg/rename
@@ -1,14 +1,18 @@
 #!/usr/bin/perl
+'di';
+'ig00';
+#
+# $Header: rename,v 3.0.1.2 90/08/09 03:17:57 lwall Locked $
+#
+# $Log:        rename,v $
+# Revision 3.0.1.2  90/08/09  03:17:57  lwall
+# patch19: added man page for relink and rename
+# 
 
 ($op = shift) || die "Usage: rename perlexpr [filenames]\n";
 if (!@ARGV) {
-    if (-t) {
-       @ARGV = <*>;
-    }
-    else {
-       @ARGV = <STDIN>;
-       chop(@ARGV);
-    }
+    @ARGV = <STDIN>;
+    chop(@ARGV);
 }
 for (@ARGV) {
     $was = $_;
@@ -16,3 +20,58 @@ for (@ARGV) {
     die $@ if $@;
     rename($was,$_) unless $was eq $_;
 }
+##############################################################################
+
+       # These next few lines are legal in both Perl and nroff.
+
+.00;                   # finish .ig
+'di                    \" finish diversion--previous line must be blank
+.nr nl 0-1             \" fake up transition to first page again
+.nr % 0                        \" start at page 1
+';<<'.ex'; #__END__ ############# From here on it's a standard manual page ############
+.TH RENAME 1 "July 30, 1990"
+.AT 3
+.SH NAME
+rename \- renames multiple files
+.SH SYNOPSIS
+.B rename perlexpr [files]
+.SH DESCRIPTION
+.I Rename
+renames the filenames supplied according to the rule specified as the
+first argument.
+The argument is a Perl expression which is expected to modify the $_
+string in Perl for at least some of the filenames specified.
+If a given filename is not modified by the expression, it will not be
+renamed.
+If no filenames are given on the command line, filenames will be read
+via standard input.
+.PP
+For example, to rename all files matching *.bak to strip the extension,
+you might say
+.nf
+
+       rename 's/\e.bak$//' *.bak
+
+.fi
+To translate uppercase names to lower, you'd use
+.nf
+
+       rename 'y/A-Z/a-z/' *
+
+.fi
+.SH ENVIRONMENT
+No environment variables are used.
+.SH FILES
+.SH AUTHOR
+Larry Wall
+.SH "SEE ALSO"
+mv(1)
+.br
+perl(1)
+.SH DIAGNOSTICS
+If you give an invalid Perl expression you'll get a syntax error.
+.SH BUGS
+.I Rename
+does not check for the existence of target filenames, so use with care.
+.ex
diff --git a/h2pl/eg/sizeof.ph b/h2pl/eg/sizeof.ph
new file mode 100644 (file)
index 0000000..285bff1
--- /dev/null
@@ -0,0 +1,14 @@
+$sizeof{'char'} = 1;
+$sizeof{'int'} = 4;
+$sizeof{'long'} = 4;
+$sizeof{'struct arpreq'} = 36;
+$sizeof{'struct ifconf'} = 8;
+$sizeof{'struct ifreq'} = 32;
+$sizeof{'struct ltchars'} = 6;
+$sizeof{'struct pcntl'} = 116;
+$sizeof{'struct rtentry'} = 52;
+$sizeof{'struct sgttyb'} = 6;
+$sizeof{'struct tchars'} = 6;
+$sizeof{'struct ttychars'} = 14;
+$sizeof{'struct winsize'} = 8;
+$sizeof{'struct termios'} = 132;
diff --git a/lib/pwd.pl b/lib/pwd.pl
new file mode 100644 (file)
index 0000000..c141e98
--- /dev/null
@@ -0,0 +1,48 @@
+;# pwd.pl - keeps track of current working directory in PWD environment var
+;#
+;# $Header: pwd.pl,v 3.0.1.1 90/08/09 04:01:24 lwall Locked $
+;#
+;# $Log:       pwd.pl,v $
+;# Revision 3.0.1.1  90/08/09  04:01:24  lwall
+;# patch19: Initial revision
+;# 
+;#
+;# Usage:
+;#     require "pwd.pl";
+;#     &initpwd;
+;#     ...
+;#     &chdir($newdir);
+
+package pwd;
+
+sub main'initpwd {
+    if ($ENV{'PWD'}) {
+       local($dd,$di) = stat('.');
+       local($pd,$pi) = stat($ENV{'PWD'});
+       return if $di == $pi && $dd == $pd;
+    }
+    chop($ENV{'PWD'} = `pwd`);
+}
+
+sub main'chdir {
+    local($newdir) = shift;
+    if (chdir $newdir) {
+       if ($newdir =~ m#^/#) {
+           $ENV{'PWD'} = $newdir;
+       }
+       else {
+           local(@curdir) = split(m#/#,$ENV{'PWD'});
+           @curdir = '' unless @curdir;
+           foreach $component (split(m#/#, $newdir)) {
+               next if $component eq '.';
+               pop(@curdir),next if $component eq '..';
+               push(@curdir,$component);
+           }
+           $ENV{'PWD'} = join('/',@curdir) || '/';
+       }
+    }
+    else {
+       0;
+    }
+}
+
index 60b2179..4cc58d1 100644 (file)
@@ -1,4 +1,4 @@
-/* $Header: popen.c,v 3.0.1.1 90/03/27 16:11:57 lwall Locked $
+/* $Header: popen.c,v 3.0.1.2 90/08/09 04:04:42 lwall Locked $
  *
  *    (C) Copyright 1988, 1990 Diomidis Spinellis.
  *
@@ -6,6 +6,9 @@
  *    as specified in the README file that comes with the perl 3.0 kit.
  *
  * $Log:       popen.c,v $
+ * Revision 3.0.1.2  90/08/09  04:04:42  lwall
+ * patch19: various MSDOS and OS/2 patches folded in
+ * 
  * Revision 3.0.1.1  90/03/27  16:11:57  lwall
  * patch16: MSDOS support
  * 
@@ -85,7 +88,7 @@ mypopen(const char *command, const char *t)
 
        switch (*t) {
        case 'r':
-               sprintf(buff, "%s >%s", command, name);
+               sprintf(buff, "%s>%s", command, name);
                if (system(buff) || (f = fopen(name, "r")) == NULL) {
                        free(name);
                        return NULL;
diff --git a/os2/popen.c b/os2/popen.c
new file mode 100644 (file)
index 0000000..7c71ccc
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Pipe support for OS/2.
+ *
+ * WARNING:  I am guilty of chumminess with the runtime library because
+ *           I had no choice.  Details to follow.
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#define INCL_DOSPROCESS
+#define INCL_DOSQUEUES
+#define INCL_DOSMISC
+#define INCL_DOSMEMMGR
+#include <os2.h>
+
+extern char **environ;
+
+/* This mysterious array _osfile is used internally by the runtime
+ * library to remember assorted things about open file handles.
+ * The problem is that we are creating file handles via DosMakePipe,
+ * rather than via the runtime library.  This means that we have
+ * to fake the runtime library into thinking that the handles we've
+ * created are honest file handles.  So just before doing the fdopen,
+ * we poke in a magic value that fools the library functions into
+ * thinking that the handle is already open in text mode.
+ *
+ * This might not work for your compiler, so beware.
+ */
+extern char _osfile[];
+
+/* The maximum number of simultaneously open pipes.  We create an
+ * array of this size to record information about each open pipe.
+ */
+#define MAXPIPES 5
+
+/* Information to remember about each open pipe.
+ * The (FILE *) that popen returns is stored because that's the only
+ * way we can keep track of the pipes.
+ */
+typedef struct pipeinfo {
+       FILE *pfId;             /* Which FILE we're talking about */
+       HFILE hfMe;             /* handle I should close at pclose */
+       PID pidChild;           /* Child's PID */
+       CHAR fReading;          /* A read or write pipe? */
+} PIPEINFO, *PPIPEINFO;                /* pi and ppi */
+
+static PIPEINFO PipeInfo[MAXPIPES];
+
+FILE *mypopen(const char *command, const char *t)
+{
+       typedef char *PSZZ;
+       PSZZ pszzPipeArgs = 0;
+       PSZZ pszzEnviron = 0;
+       PSZ *ppsz;
+       PSZ psz;
+       FILE *f;
+       HFILE hfMe, hfYou;
+       HFILE hf, hfSave;
+       RESULTCODES rc;
+       USHORT us;
+       PPIPEINFO ppi;
+       UINT i;
+
+       /* Validate pipe type */
+       if (*t != 'w' && *t != 'r') fatal("Unknown pipe type");
+
+       /* Room for another pipe? */
+       for (ppi = &PipeInfo[0]; ppi < &PipeInfo[MAXPIPES]; ppi++)
+               if (ppi->pfId == 0) goto foundone;
+       return NULL;
+
+foundone:
+
+       /* Make the pipe */
+       if (DosMakePipe(&hfMe, &hfYou, 0)) return NULL;
+
+       /* Build the environment.  First compute its length, then copy
+        * the environment strings into it.
+        */
+       i = 0;
+       for (ppsz = environ; *ppsz; ppsz++) i += 1 + strlen(*ppsz);
+       New(1204, pszzEnviron, 1+i, CHAR);
+
+       psz = pszzEnviron;
+       for (ppsz = environ; *ppsz; ppsz++) {
+               strcpy(psz, *ppsz);
+               psz += 1 + strlen(*ppsz);
+       }
+       *psz = 0;
+
+       /* Build the command string to execute.
+        * 6 = length(0 "/c " 0 0)
+        */
+       if (DosScanEnv("COMSPEC", &psz)) psz = "C:\\OS2\\cmd.exe";
+#if 0
+       New(1203, pszzPipeArgs, strlen(psz) + strlen(command) + 6, CHAR);
+#else
+#define pszzPipeArgs buf
+#endif
+       sprintf(pszzPipeArgs, "%s%c/c %s%c", psz, 0, command, 0);
+
+       /* Now some stuff that depends on what kind of pipe we're doing.
+        * We pull a sneaky trick; namely, that stdin = 0 = false,
+        * and stdout = 1 = true.  The end result is that if the
+        * pipe is a read pipe, then hf = 1; if it's a write pipe, then
+        * hf = 0 and Me and You are reversed.
+        */
+       if (!(hf = (*t == 'r'))) {
+               /* The meaning of Me and You is reversed for write pipes. */
+               hfSave = hfYou; hfYou = hfMe; hfMe = hfSave;
+       }
+
+       ppi->fReading = hf;
+
+       /* Trick number 1:  Fooling the runtime library into thinking
+        * that the file handle is legit.
+        *
+        * Trick number 2:  Don't let my handle go over to the child!
+        * Since the child never closes it (why should it?), I'd better
+        * make sure he never sees it in the first place.  Otherwise,
+        * we are in deadlock city.
+        */
+       _osfile[hfMe] = 0x81;           /* Danger, Will Robinson! */
+       if (!(ppi->pfId = fdopen(hfMe, t))) goto no_fdopen;
+       DosSetFHandState(hfMe, OPEN_FLAGS_NOINHERIT);
+
+       /* Save the original handle because we're going to diddle it */
+       hfSave = 0xFFFF;
+       if (DosDupHandle(hf, &hfSave)) goto no_dup_init;
+
+       /* Force the child's handle onto the stdio handle */
+       if (DosDupHandle(hfYou, &hf)) goto no_force_dup;
+       DosClose(hfYou);
+
+       /* Now run the guy servicing the pipe */
+       us = DosExecPgm(NULL, 0, EXEC_ASYNCRESULT, pszzPipeArgs, pszzEnviron,
+                       &rc, pszzPipeArgs);
+
+       /* Restore stdio handle, even if exec failed. */
+       DosDupHandle(hfSave, &hf); close(hfSave);
+
+       /* See if the exec succeeded. */
+       if (us) goto no_exec_pgm;
+
+       /* Remember the child's PID */
+       ppi->pidChild = rc.codeTerminate;
+
+       Safefree(pszzEnviron);
+
+       /* Phew. */
+       return ppi->pfId;
+
+       /* Here is where we clean up after an error. */
+no_exec_pgm: ;
+no_force_dup: close(hfSave);
+no_dup_init: fclose(f);
+no_fdopen:
+       DosClose(hfMe); DosClose(hfYou);
+       ppi->pfId = 0;
+       Safefree(pszzEnviron);
+       return NULL;
+}
+
+
+/* mypclose:  Closes the pipe associated with the file handle.
+ * After waiting for the child process to terminate, its return
+ * code is returned.  If the stream was not associated with a pipe,
+ * we return -1.
+ */
+int
+mypclose(FILE *f)
+{
+       PPIPEINFO ppi;
+       RESULTCODES rc;
+       USHORT us;
+
+       /* Find the pipe this (FILE *) refers to */
+       for (ppi = &PipeInfo[0]; ppi < &PipeInfo[MAXPIPES]; ppi++)
+               if (ppi->pfId == f) goto foundit;
+       return -1;
+foundit:
+       if (ppi->fReading && !DosRead(fileno(f), &rc, 1, &us) && us > 0) {
+               DosKillProcess(DKP_PROCESSTREE, ppi->pidChild);
+       }
+       fclose(f);
+       DosCwait(DCWA_PROCESS, DCWW_WAIT, &rc, &ppi->pidChild, ppi->pidChild);
+       ppi->pfId = 0;
+       return rc.codeResult;
+}
+
+/* pipe:  The only tricky thing is letting the runtime library know about
+ * our two new file descriptors.
+ */
+int pipe(int filedes[2])
+{
+       HFILE hfRead, hfWrite;
+       USHORT usResult;
+
+       usResult = DosMakePipe(&hfRead, &hfWrite, 0);
+       if (usResult) {
+               /* Error 4 == ERROR_TOO_MANY_OPEN_FILES */
+               errno = (usResult == 4) ? ENFILE : ENOMEM;
+               return -1;
+       }
+       _osfile[hfRead] = _osfile[hfWrite] = 0x81;/* Danger, Will Robinson! */
+       filedes[0] = hfRead;
+       filedes[1] = hfWrite;
+       return 0;
+}
index f198d8a..10c8c21 100644 (file)
@@ -1 +1 @@
-#define PATCHLEVEL 24
+#define PATCHLEVEL 25
index 9a7be67..68da52e 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -7,9 +7,16 @@
  * blame Henry for some of the lack of readability.
  */
 
-/* $Header: regcomp.c,v 3.0.1.3 90/03/12 16:59:22 lwall Locked $
+/* $Header: regcomp.c,v 3.0.1.4 90/08/09 05:05:33 lwall Locked $
  *
  * $Log:       regcomp.c,v $
+ * Revision 3.0.1.4  90/08/09  05:05:33  lwall
+ * patch19: sped up /x+y/ patterns greatly by not retrying on every x
+ * patch19: inhibited backoff on patterns anchored to the end like /\s+$/
+ * patch19: sped up {m,n} on simple items
+ * patch19: optimized /.*whatever/ to /^.*whatever/
+ * patch19: fixed character classes to allow backslashing hyphen
+ * 
  * Revision 3.0.1.3  90/03/12  16:59:22  lwall
  * patch13: pattern matches can now use \0 to mean \000
  * 
@@ -121,11 +128,10 @@ STATIC void regoptail();
  * of the structure of the compiled regexp.  [I'll say.]
  */
 regexp *
-regcomp(exp,xend,fold,rare)
+regcomp(exp,xend,fold)
 char *exp;
 char *xend;
 int fold;
-int rare;
 {
        register regexp *r;
        register char *scan;
@@ -137,6 +143,7 @@ int rare;
        int curback;
        extern char *safemalloc();
        extern char *savestr();
+       int sawplus = 0;
 
        if (exp == NULL)
                fatal("NULL regexp argument");
@@ -190,8 +197,14 @@ int rare;
                first = scan;
                while ((OP(first) > OPEN && OP(first) < CLOSE) ||
                    (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
-                   (OP(first) == PLUS) )
+                   (OP(first) == PLUS) ||
+                   (OP(first) == CURLY && ARG1(first) > 0) ) {
+                       if (OP(first) == CURLY)
+                           first += 4;
+                       else if (OP(first) == PLUS)
+                           sawplus = 2;
                        first = NEXTOPER(first);
+               }
 
                /* Starting-point info. */
                if (OP(first) == EXACTLY) {
@@ -204,8 +217,10 @@ int rare;
                        r->regstclass = first;
                else if (OP(first) == BOUND || OP(first) == NBOUND)
                        r->regstclass = first;
-               else if (OP(first) == BOL)
-                       r->reganch++;
+               else if (OP(first) == BOL ||
+                   (OP(first) == STAR && OP(NEXTOPER(first)) == ANY) )
+                       r->reganch = 1;         /* kinda turn .* into ^.* */
+               r->reganch |= sawplus;
 
 #ifdef DEBUGGING
                if (debug & 512)
@@ -449,21 +464,44 @@ int *flagp;
                next++;
            }
            if (*next == '}') {         /* got one */
-               regsawbracket++;        /* remember we clobbered exp */
                if (!max)
                    max = next;
                regparse++;
                iter = atoi(regparse);
+               if (flags&SIMPLE) {     /* we can do it right after all */
+                   int tmp;
+
+                   reginsert(CURLY, ret);
+                   if (*max == ',')
+                       max++;
+                   tmp = atoi(max);
+                   if (tmp && tmp < iter)
+                       fatal("Can't do {n,m} with n > m");
+                   if (regcode != &regdummy) {
+#ifdef REGALIGN
+                       *(unsigned short *)(ret+3) = iter;
+                       *(unsigned short *)(ret+5) = tmp;
+#else
+                       ret[3] = iter >> 8; ret[4] = iter & 0377;
+                       ret[5] = tmp  >> 8; ret[6] = tmp  & 0377;
+#endif
+                   }
+                   regparse = next;
+                   goto nest_check;
+               }
+               regsawbracket++;        /* remember we clobbered exp */
                if (iter > 0) {
                    ch = *max;
                    sprintf(regparse,"%.*d", max-regparse, iter - 1);
                    *max = ch;
-                   if (*max == ',' && atoi(max+1) > 0) {
+                   if (*max == ',' && max[1] != '}') {
+                       if (atoi(max+1) <= 0)
+                           fatal("Can't do {n,m} with n > m");
                        ch = *next;
                        sprintf(max+1,"%.*d", next-(max+1), atoi(max+1) - 1);
                        *next = ch;
                    }
-                   if (iter != 1 || (*max == ',' || atoi(max+1))) {
+                   if (iter != 1 || *max == ',') {
                        regparse = origparse;   /* back up input pointer */
                        regnpar = orignpar;     /* don't make more parens */
                    }
@@ -793,20 +831,20 @@ regclass()
        register char *ret;
        register int def;
 
+       ret = regnode(ANYOF);
        if (*regparse == '^') { /* Complement of range. */
-               ret = regnode(ANYBUT);
                regparse++;
                def = 0;
        } else {
-               ret = regnode(ANYOF);
                def = 255;
        }
        bits = regcode;
        for (class = 0; class < 32; class++)
            regc(def);
        if (*regparse == ']' || *regparse == '-')
-               regset(bits,def,lastclass = *regparse++);
+               goto skipcond;          /* allow 1st char to be ] or - */
        while (regparse < regxend && *regparse != ']') {
+             skipcond:
                class = UCHARAT(regparse++);
                if (class == '\\') {
                        class = UCHARAT(regparse++);
@@ -863,19 +901,21 @@ regclass()
                                break;
                        }
                }
-               if (!range && class == '-' && regparse < regxend &&
-                   *regparse != ']') {
-                       range = 1;
-                       continue;
-               }
                if (range) {
                        if (lastclass > class)
                                FAIL("invalid [] range in regexp");
+                       range = 0;
                }
-               else
-                       lastclass = class - 1;
-               range = 0;
-               for (lastclass++; lastclass <= class; lastclass++) {
+               else {
+                       lastclass = class;
+                       if (*regparse == '-' && regparse+1 < regxend &&
+                           regparse[1] != ']') {
+                               regparse++;
+                               range = 1;
+                               continue;       /* do it next time */
+                       }
+               }
+               for ( ; lastclass <= class; lastclass++) {
                        regset(bits,def,lastclass);
                        if (regfold && isupper(lastclass))
                                regset(bits,def,tolower(lastclass));
@@ -949,21 +989,22 @@ char *opnd;
        register char *src;
        register char *dst;
        register char *place;
+       register offset = (op == CURLY ? 4 : 0);
 
        if (regcode == &regdummy) {
 #ifdef REGALIGN
-               regsize += 4;
+               regsize += 4 + offset;
 #else
-               regsize += 3;
+               regsize += 3 + offset;
 #endif
                return;
        }
 
        src = regcode;
 #ifdef REGALIGN
-       regcode += 4;
+       regcode += 4 + offset;
 #else
-       regcode += 3;
+       regcode += 3 + offset;
 #endif
        dst = regcode;
        while (src > opnd)
@@ -973,6 +1014,8 @@ char *opnd;
        *place++ = op;
        *place++ = '\0';
        *place++ = '\0';
+       while (offset-- > 0)
+           *place++ = '\0';
 }
 
 /*
@@ -1081,7 +1124,7 @@ regexp *r;
                else 
                        fprintf(stderr,"(%d)", (s-r->program)+(next-s));
                s += 3;
-               if (op == ANYOF || op == ANYBUT) {
+               if (op == ANYOF) {
                        s += 32;
                }
                if (op == EXACTLY) {
@@ -1101,8 +1144,10 @@ regexp *r;
                fprintf(stderr,"start `%s' ", r->regstart->str_ptr);
        if (r->regstclass)
                fprintf(stderr,"stclass `%s' ", regprop(r->regstclass));
-       if (r->reganch)
+       if (r->reganch & 1)
                fprintf(stderr,"anchored ");
+       if (r->reganch & 2)
+               fprintf(stderr,"plus ");
        if (r->regmust != NULL)
                fprintf(stderr,"must have \"%s\" back %d ", r->regmust->str_ptr,
                  r->regback);
@@ -1133,9 +1178,6 @@ char *op;
        case ANYOF:
                p = "ANYOF";
                break;
-       case ANYBUT:
-               p = "ANYBUT";
-               break;
        case BRANCH:
                p = "BRANCH";
                break;
@@ -1175,6 +1217,11 @@ char *op;
        case NDIGIT:
                p = "NDIGIT";
                break;
+       case CURLY:
+               (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}",
+                   ARG1(op),ARG2(op));
+               p = NULL;
+               break;
        case REF:
        case REF+1:
        case REF+2:
index c4c6520..a2e2fbb 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -1,6 +1,9 @@
-/* $Header: regcomp.h,v 3.0 89/10/18 15:22:39 lwall Locked $
+/* $Header: regcomp.h,v 3.0.1.1 90/08/09 05:06:49 lwall Locked $
  *
  * $Log:       regcomp.h,v $
+ * Revision 3.0.1.1  90/08/09  05:06:49  lwall
+ * patch19: sped up {m,n} on simple items
+ * 
  * Revision 3.0  89/10/18  15:22:39  lwall
  * 3.0 baseline
  * 
@@ -57,8 +60,8 @@
 #define        BOL     1       /* no   Match "" at beginning of line. */
 #define        EOL     2       /* no   Match "" at end of line. */
 #define        ANY     3       /* no   Match any one character. */
-#define        ANYOF   4       /* str  Match any character in this string. */
-#define        ANYBUT  5       /* str  Match any character not in this string. */
+#define        ANYOF   4       /* str  Match character in (or not in) this class. */
+#define        CURLY   5       /* str  Match this simple thing {n,m} times. */
 #define        BRANCH  6       /* node Match this alternative, or the next... */
 #define        BACK    7       /* no   Match "", "next" ptr points backward. */
 #define        EXACTLY 8       /* str  Match this string (preceded by length). */
 #ifndef DOINIT
 extern char varies[];
 #else
-char varies[] = {BRANCH,BACK,STAR,PLUS,
+char varies[] = {BRANCH,BACK,STAR,PLUS,CURLY,
        REF+1,REF+2,REF+3,REF+4,REF+5,REF+6,REF+7,REF+8,REF+9,0};
 #endif
 
@@ -113,7 +116,7 @@ char varies[] = {BRANCH,BACK,STAR,PLUS,
 #ifndef DOINIT
 extern char simple[];
 #else
-char simple[] = {ANY,ANYOF,ANYBUT,ALNUM,NALNUM,SPACE,NSPACE,DIGIT,NDIGIT,0};
+char simple[] = {ANY,ANYOF,ALNUM,NALNUM,SPACE,NSPACE,DIGIT,NDIGIT,0};
 #endif
 
 EXT char regdummy;
@@ -145,8 +148,12 @@ EXT char regdummy;
 #ifndef lint
 #ifdef REGALIGN
 #define NEXT(p) (*(short*)(p+1))
+#define ARG1(p) (*(unsigned short*)(p+3))
+#define ARG2(p) (*(unsigned short*)(p+5))
 #else
 #define        NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
+#define        ARG1(p) (((*((p)+3)&0377)<<8) + (*((p)+4)&0377))
+#define        ARG2(p) (((*((p)+5)&0377)<<8) + (*((p)+6)&0377))
 #endif
 #else /* lint */
 #define NEXT(p) 0
index 2c6213b..61439ea 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -7,9 +7,16 @@
  * blame Henry for some of the lack of readability.
  */
 
-/* $Header: regexec.c,v 3.0.1.3 90/02/28 18:14:39 lwall Locked $
+/* $Header: regexec.c,v 3.0.1.4 90/08/09 05:12:03 lwall Locked $
  *
  * $Log:       regexec.c,v $
+ * Revision 3.0.1.4  90/08/09  05:12:03  lwall
+ * patch19: sped up /x+y/ patterns greatly by not retrying on every x
+ * patch19: inhibited backoff on patterns anchored to the end like /\s+$/
+ * patch19: sped up {m,n} on simple items
+ * patch19: $' broke on embedded nulls
+ * patch19: $ will now only match at end of string if $* == 0
+ * 
  * Revision 3.0.1.3  90/02/28  18:14:39  lwall
  * patch9: /[\200-\377]/ didn't work on machines with signed chars
  * patch9: \d, \w, and \s could misfire on characters with high bit set
@@ -198,7 +205,7 @@ int safebase;       /* no need to remember string in subbase */
 
        /* Simplest case:  anchored match need be tried only once. */
        /*  [unless multiline is set] */
-       if (prog->reganch) {
+       if (prog->reganch & 1) {
                if (regtry(prog, string))
                        goto got_it;
                else if (multiline) {
@@ -208,9 +215,9 @@ int safebase;       /* no need to remember string in subbase */
                        /* for multiline we only have to try after newlines */
                        if (s > string)
                            s--;
-                       for (; s < strend; s++) {
-                           if (*s == '\n') {
-                               if (++s < strend && regtry(prog, s))
+                       while (s < strend) {
+                           if (*s++ == '\n') {
+                               if (s < strend && regtry(prog, s))
                                    goto got_it;
                            }
                        }
@@ -220,8 +227,22 @@ int safebase;      /* no need to remember string in subbase */
 
        /* Messy cases:  unanchored match. */
        if (prog->regstart) {
-               /* We know what string it must start with. */
-               if (prog->regstart->str_pok == 3) {
+               if (prog->reganch & 2) {        /* we have /x+whatever/ */
+                   /* it must be a one character string */
+                   i = prog->regstart->str_ptr[0];
+                   while (s < strend) {
+                           if (*s == i) {
+                                   if (regtry(prog, s))
+                                           goto got_it;
+                                   s++;
+                                   while (s < strend && *s == i)
+                                       s++;
+                           }
+                           s++;
+                   }
+               }
+               else if (prog->regstart->str_pok == 3) {
+                   /* We know what string it must start with. */
 #ifndef lint
                    while ((s = fbminstr((unsigned char*)s,
                      (unsigned char*)strend, prog->regstart)) != NULL)
@@ -246,18 +267,26 @@ int safebase;     /* no need to remember string in subbase */
                goto phooey;
        }
        if (c = prog->regstclass) {
+               int doevery = (prog->reganch & 2) == 0;
+
                if (minlen)
                    dontbother = minlen - 1;
                strend -= dontbother;   /* don't bother with what can't match */
+               tmp = 1;
                /* We know what class it must start with. */
                switch (OP(c)) {
-               case ANYOF: case ANYBUT:
+               case ANYOF:
                    c = OPERAND(c);
                    while (s < strend) {
                            i = UCHARAT(s);
-                           if (!(c[i >> 3] & (1 << (i&7))))
-                                   if (regtry(prog, s))
+                           if (!(c[i >> 3] & (1 << (i&7)))) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
@@ -305,50 +334,80 @@ int safebase;     /* no need to remember string in subbase */
                case ALNUM:
                    while (s < strend) {
                            i = *s;
-                           if (isALNUM(i))
-                                   if (regtry(prog, s))
+                           if (isALNUM(i)) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
                case NALNUM:
                    while (s < strend) {
                            i = *s;
-                           if (!isALNUM(i))
-                                   if (regtry(prog, s))
+                           if (!isALNUM(i)) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
                case SPACE:
                    while (s < strend) {
-                           if (isSPACE(*s))
-                                   if (regtry(prog, s))
+                           if (isSPACE(*s)) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
                case NSPACE:
                    while (s < strend) {
-                           if (!isSPACE(*s))
-                                   if (regtry(prog, s))
+                           if (!isSPACE(*s)) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
                case DIGIT:
                    while (s < strend) {
-                           if (isDIGIT(*s))
-                                   if (regtry(prog, s))
+                           if (isDIGIT(*s)) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
                case NDIGIT:
                    while (s < strend) {
-                           if (!isDIGIT(*s))
-                                   if (regtry(prog, s))
+                           if (!isDIGIT(*s)) {
+                                   if (tmp && regtry(prog, s))
                                            goto got_it;
+                                   else
+                                           tmp = doevery;
+                           }
+                           else
+                                   tmp = 1;
                            s++;
                    }
                    break;
@@ -379,6 +438,7 @@ int safebase;       /* no need to remember string in subbase */
                    if (prog->subbase)
                            Safefree(prog->subbase);
                    prog->subbase = s;
+                   prog->subend = s+i;
                }
                else
                    s = prog->subbase;
@@ -486,14 +546,16 @@ char *prog;
                            ((nextchar || locinput < regeol) &&
                              locinput[-1] == '\n') )
                        {
-                               regtill--;
+                               regtill = regbol;
                                break;
                        }
                        return(0);
                case EOL:
                        if ((nextchar || locinput < regeol) && nextchar != '\n')
                                return(0);
-                       regtill--;
+                       if (!multiline && regeol - locinput > 1)
+                               return 0;
+                       regtill = regbol;
                        break;
                case ANY:
                        if ((nextchar == '\0' && locinput >= regeol) ||
@@ -507,7 +569,7 @@ char *prog;
                        /* Inline the first character, for speed. */
                        if (*s != nextchar)
                                return(0);
-                       if (locinput + ln > regeol)
+                       if (regeol - locinput < ln)
                                return 0;
                        if (ln > 1 && bcmp(s, locinput, ln) != 0)
                                return(0);
@@ -515,7 +577,6 @@ char *prog;
                        nextchar = *locinput;
                        break;
                case ANYOF:
-               case ANYBUT:
                        s = OPERAND(scan);
                        if (nextchar < 0)
                                nextchar = UCHARAT(locinput);
@@ -685,19 +746,33 @@ char *prog;
                                }
                        }
                        break;
+               case CURLY:
+                       ln = ARG1(scan);  /* min to match */
+                       n  = ARG2(scan);  /* max to match */
+                       scan = NEXTOPER(scan) + 4;
+                       goto repeat;
                case STAR:
+                       ln = 0;
+                       n = 0;
+                       scan = NEXTOPER(scan);
+                       goto repeat;
                case PLUS:
                        /*
                         * Lookahead to avoid useless match attempts
                         * when we know what character comes next.
                         */
+                       ln = 1;
+                       n = 0;
+                       scan = NEXTOPER(scan);
+                   repeat:
                        if (OP(next) == EXACTLY)
                                nextchar = *(OPERAND(next)+1);
                        else
                                nextchar = -1000;
-                       ln = (OP(scan) == STAR) ? 0 : 1;
                        reginput = locinput;
-                       n = regrepeat(NEXTOPER(scan));
+                       n = regrepeat(scan, n);
+                       if (!multiline && OP(next) == EOL)
+                           ln = n;                     /* why back off? */
                        while (n >= ln) {
                                /* If it could work, try it. */
                                if (nextchar == -1000 || *reginput == nextchar)
@@ -739,8 +814,9 @@ char *prog;
  * rather than incrementing count on every character.]
  */
 static int
-regrepeat(p)
+regrepeat(p, max)
 char *p;
+int max;
 {
        register char *scan;
        register char *opnd;
@@ -748,6 +824,8 @@ char *p;
        register char *loceol = regeol;
 
        scan = reginput;
+       if (max && max < loceol - scan)
+           loceol = scan + max;
        opnd = OPERAND(p);
        switch (OP(p)) {
        case ANY:
@@ -760,7 +838,6 @@ char *p;
                        scan++;
                break;
        case ANYOF:
-       case ANYBUT:
                c = UCHARAT(scan);
                while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
                        scan++;
index 576f0b1..39620da 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -5,9 +5,12 @@
  * not the System V one.
  */
 
-/* $Header: regexp.h,v 3.0 89/10/18 15:22:46 lwall Locked $
+/* $Header: regexp.h,v 3.0.1.1 90/08/09 05:12:55 lwall Locked $
  *
  * $Log:       regexp.h,v $
+ * Revision 3.0.1.1  90/08/09  05:12:55  lwall
+ * patch19: $' broke on embedded nulls
+ * 
  * Revision 3.0  89/10/18  15:22:46  lwall
  * 3.0 baseline
  * 
@@ -24,6 +27,7 @@ typedef struct regexp {
        int regback;            /* Can regmust locate first try? */
        char *precomp;          /* pre-compilation regular expression */
        char *subbase;          /* saved string so \digit works forever */
+       char *subend;           /* end of subbase */
        char reganch;           /* Internal use only. */
        char do_folding;        /* do case-insensitive match? */
        char lastparen;         /* last paren matched */
diff --git a/stab.c b/stab.c
index 30b797b..15ae9b3 100644 (file)
--- a/stab.c
+++ b/stab.c
@@ -1,4 +1,4 @@
-/* $Header: stab.c,v 3.0.1.6 90/03/27 16:22:11 lwall Locked $
+/* $Header: stab.c,v 3.0.1.7 90/08/09 05:17:48 lwall Locked $
  *
  *    Copyright (c) 1989, Larry Wall
  *
@@ -6,6 +6,15 @@
  *    as specified in the README file that comes with the perl 3.0 kit.
  *
  * $Log:       stab.c,v $
+ * Revision 3.0.1.7  90/08/09  05:17:48  lwall
+ * patch19: fixed double include of <signal.h>
+ * patch19: $' broke on embedded nulls
+ * patch19: $< and $> better supported on machines without setreuid
+ * patch19: Added support for linked-in C subroutines
+ * patch19: %ENV wasn't forced to be global like it should
+ * patch19: $| didn't work before the filehandle was opened
+ * patch19: $! now returns "" in string context if errno == 0
+ * 
  * Revision 3.0.1.6  90/03/27  16:22:11  lwall
  * patch16: support for machines that can't cast negative floats to unsigned ints
  * 
@@ -38,7 +47,9 @@
 #include "EXTERN.h"
 #include "perl.h"
 
+#ifndef NSIG
 #include <signal.h>
+#endif
 
 static char *sig_name[] = {
     SIG_NAME,0
@@ -105,7 +116,7 @@ STR *str;
        if (curspat) {
            if (curspat->spat_regexp &&
              (s = curspat->spat_regexp->endp[0]) ) {
-               str_set(stab_val(stab),s);
+               str_nset(stab_val(stab),s, curspat->spat_regexp->subend - s);
            }
            else
                str_nset(stab_val(stab),"",0);
@@ -151,6 +162,8 @@ STR *str;
        str_numset(stab_val(stab),(double)arybase);
        break;
     case '|':
+       if (!stab_io(curoutstab))
+           stab_io(curoutstab) = stio_new();
        str_numset(stab_val(stab),
           (double)((stab_io(curoutstab)->flags & IOF_FLUSH) != 0) );
        break;
@@ -165,7 +178,7 @@ STR *str;
        break;
     case '!':
        str_numset(stab_val(stab), (double)errno);
-       str_set(stab_val(stab), strerror(errno));
+       str_set(stab_val(stab), errno ? strerror(errno) : "");
        stab_val(stab)->str_nok = 1;    /* what a wonderful hack! */
        break;
     case '<':
@@ -199,6 +212,14 @@ STR *str;
 #endif
        str_set(stab_val(stab),buf);
        break;
+    default:
+       {
+           struct ufuncs *uf = (struct ufuncs *)str->str_ptr;
+
+           if (uf && uf->uf_val)
+               uf->uf_val(uf->uf_index, stab_val(stab));
+       }
+       break;
     }
     return stab_val(stab);
 }
@@ -256,10 +277,17 @@ STR *str;
                stab->str_pok = 1;
                strcpy(stab_magic(stab),"StB");
                stab_val(stab) = Str_new(70,0);
-               stab_line(stab) = line;
+               stab_line(stab) = curcmd->c_line;
            }
-           else
+           else {
                stab = stabent(s,TRUE);
+               if (!stab_xarray(stab))
+                   aadd(stab);
+               if (!stab_xhash(stab))
+                   hadd(stab);
+               if (!stab_io(stab))
+                   stab_io(stab) = stio_new();
+           }
            str_sset(str,stab);
        }
        break;
@@ -305,6 +333,8 @@ STR *str;
            stab_io(curoutstab)->page = (long)str_gnum(str);
            break;
        case '|':
+           if (!stab_io(curoutstab))
+               stab_io(curoutstab) = stio_new();
            stab_io(curoutstab)->flags &= ~IOF_FLUSH;
            if (str_gnum(str) != 0.0) {
                stab_io(curoutstab)->flags |= IOF_FLUSH;
@@ -366,7 +396,10 @@ STR *str;
            if (setreuid((UIDTYPE)uid, (UIDTYPE)-1) < 0)
                uid = (int)getuid();
 #else
-           fatal("setruid() not implemented");
+           if (uid == euid)            /* special case $< = $> */
+               setuid(uid);
+           else
+               fatal("setruid() not implemented");
 #endif
 #endif
            break;
@@ -386,7 +419,10 @@ STR *str;
            if (setreuid((UIDTYPE)-1, (UIDTYPE)euid) < 0)
                euid = (int)geteuid();
 #else
-           fatal("seteuid() not implemented");
+           if (euid == uid)            /* special case $> = $< */
+               setuid(euid);
+           else
+               fatal("seteuid() not implemented");
 #endif
 #endif
            break;
@@ -429,6 +465,14 @@ STR *str;
        case ':':
            chopset = str_get(str);
            break;
+       default:
+           {
+               struct ufuncs *uf = (struct ufuncs *)str->str_magic->str_ptr;
+
+               if (uf && uf->uf_set)
+                   uf->uf_set(uf->uf_index, str);
+           }
+           break;
        }
        break;
     }
@@ -465,6 +509,9 @@ int sig;
     ARRAY *oldstack = stack;
     SUBR *sub;
 
+#ifdef OS2             /* or anybody else who requires SIG_ACK */
+    signal(sig, SIG_ACK);
+#endif
     stab = stabent(
        str_get(hfetch(stab_hash(sigstab),sig_name[sig],strlen(sig_name[sig]),
          TRUE)), TRUE);
@@ -555,7 +602,7 @@ int add;
            if (*name == 'I' && strEQ(name, "INC"))
                global = TRUE;
        }
-       else if (*name >= 'A') {
+       else if (*name > 'A') {
            if (*name == 'E' && strEQ(name, "ENV"))
                global = TRUE;
        }
@@ -615,7 +662,7 @@ int add;
        stab->str_pok = 1;
        strcpy(stab_magic(stab),"StB");
        stab_val(stab) = Str_new(72,0);
-       stab_line(stab) = line;
+       stab_line(stab) = curcmd->c_line;
        str_magic(stab,stab,'*',name,len);
        return stab;
     }
@@ -644,7 +691,7 @@ register int max;
            stab = (STAB*)entry->hent_val;
            if (stab->str_pok & SP_MULTI)
                continue;
-           line = stab_line(stab);
+           curcmd->c_line = stab_line(stab);
            warn("Possible typo: \"%s\"", stab_name(stab));
        }
     }
index 08230b0..66d7b72 100644 (file)
@@ -28,9 +28,12 @@ $spitshell >s2p <<!GROK!THIS!
 : In the following dollars and backticks do not need the extra backslash.
 $spitshell >>s2p <<'!NO!SUBS!'
 
-# $Header: s2p.SH,v 3.0.1.3 90/03/01 10:31:21 lwall Locked $
+# $Header: s2p.SH,v 3.0.1.4 90/08/09 05:50:43 lwall Locked $
 #
 # $Log:        s2p.SH,v $
+# Revision 3.0.1.4  90/08/09  05:50:43  lwall
+# patch19: s2p didn't translate \n right
+# 
 # Revision 3.0.1.3  90/03/01  10:31:21  lwall
 # patch9: s2p didn't handle \< and \>
 # 
@@ -424,6 +427,9 @@ ${space}next line;";
                        $len = length($_);
                        $_ = substr($_,0,--$len);
                    }
+                   elsif (substr($_,$i,1) =~ /^[n]$/) {
+                       ;
+                   }
                    elsif (!$repl && substr($_,$i,1) =~ /^[(){}\w]$/) {
                        $i--;
                        $len--;
@@ -612,7 +618,7 @@ sub fetchpat {
        if ($delim eq '\\') {
            s/(.)//;
            $ch = $1;
-           $delim = '' if $ch =~ /^[(){}\w]$/;
+           $delim = '' if $ch =~ /^[(){}A-Za-mo-z]$/;
            $ch = 'b' if $ch =~ /^[<>]$/;
            $delim .= $ch;
        }