1 /* $RCSfile: toke.c,v $$Revision: 4.0.1.4 $$Date: 91/11/05 19:02:48 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
9 * Revision 4.0.1.4 91/11/05 19:02:48 lwall
10 * patch11: \x and \c were subject to double interpretation in regexps
11 * patch11: prepared for ctype implementations that don't define isascii()
12 * patch11: nested list operators could miscount parens
13 * patch11: once-thru blocks didn't display right in the debugger
14 * patch11: sort eval "whatever" didn't work
15 * patch11: underscore is now allowed within literal octal and hex numbers
17 * Revision 4.0.1.3 91/06/10 01:32:26 lwall
18 * patch10: m'$foo' now treats string as single quoted
19 * patch10: certain pattern optimizations were botched
21 * Revision 4.0.1.2 91/06/07 12:05:56 lwall
22 * patch4: new copyright notice
23 * patch4: debugger lost track of lines in eval
24 * patch4: //o and s///o now optimize themselves fully at runtime
25 * patch4: added global modifier for pattern matches
27 * Revision 4.0.1.1 91/04/12 09:18:18 lwall
28 * patch1: perl -de "print" wouldn't stop at the first statement
30 * Revision 4.0 91/03/20 01:42:14 lwall
50 /* which backslash sequences to keep in m// or s// */
52 static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtfeaxc0123456789[{]}";
54 char *reparse; /* if non-null, scanident found ${foo[$bar]} */
61 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
63 #define META(c) ((c) | 128)
65 #define RETURN(retval) return (bufptr = s,(int)retval)
66 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
67 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
68 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
69 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
70 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
71 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
72 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
73 #define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
74 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
75 #define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
76 #define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
77 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
78 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
79 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
80 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
81 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
82 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
83 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
84 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
85 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
86 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
87 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
88 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
89 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
90 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
91 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
93 /* This bit of chicanery makes a unary function followed by
94 * a parenthesis into a function with one argument, highest precedence.
96 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
97 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
99 /* This does similarly for list operators, merely by pretending that the
100 * paren came before the listop rather than after.
102 #define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
103 (*s = (char) META('('), bufptr = oldbufptr, '(') : \
104 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
105 /* grandfather return to old style */
106 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
112 while (s < bufend && isSPACE(*s))
121 #define UNI(f) return uni(f,s)
122 #define LOP(f) return lop(f,s)
162 #endif /* CRIPPLED_CC */
166 register char *s = bufptr;
169 static bool in_format = FALSE;
170 static bool firstline = TRUE;
171 extern int yychar; /* last token */
173 oldoldbufptr = oldbufptr;
180 fprintf(stderr,"Tokener at %s",s);
182 fprintf(stderr,"Tokener at %s\n",s);
186 if ((*s & 127) == '(') {
191 warn("Unrecognized character \\%03o ignored", *s++ & 255);
197 if ((*s & 127) == '(') {
202 warn("Unrecognized character \\%03o ignored", *s++ & 255);
206 goto fake_eof; /* emulate EOF on ^D or ^Z */
211 goto retry; /* ignore stray nulls */
214 if (minus_n || minus_p || perldb) {
218 char *pdb = getenv("PERLDB");
220 str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
221 str_cat(linestr, ";");
223 if (minus_n || minus_p) {
224 str_cat(linestr,"line: while (<>) {");
226 str_cat(linestr,"chop;");
228 str_cat(linestr,"@F=split(' ');");
230 oldoldbufptr = oldbufptr = s = str_get(linestr);
231 bufend = linestr->str_ptr + linestr->str_cur;
237 yylval.formval = load_format();
239 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
240 bufend = linestr->str_ptr + linestr->str_cur;
246 #endif /* CRYPTSCRIPT */
248 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
252 (void)mypclose(rsfp);
253 else if ((FILE*)rsfp == stdin)
259 if (minus_n || minus_p) {
260 str_set(linestr,minus_p ? ";}continue{print" : "");
261 str_cat(linestr,";}");
262 oldoldbufptr = oldbufptr = s = str_get(linestr);
263 bufend = linestr->str_ptr + linestr->str_cur;
264 minus_n = minus_p = 0;
267 oldoldbufptr = oldbufptr = s = str_get(linestr);
269 RETURN(';'); /* not infinite loop because rsfp is NULL now */
271 if (doextract && *linestr->str_ptr == '#')
274 oldoldbufptr = oldbufptr = bufptr = s;
276 STR *str = Str_new(85,0);
278 str_sset(str,linestr);
279 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
287 bufend = linestr->str_ptr + linestr->str_cur;
288 if (curcmd->c_line == 1) {
289 if (*s == '#' && s[1] == '!') {
290 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
298 while (s < bufend && !isSPACE(*s))
301 while (s < bufend && isSPACE(*s))
304 Newz(899,newargv,origargc+3,char*);
306 while (s < bufend && !isSPACE(*s))
309 Copy(origargv+1, newargv+2, origargc+1, char*);
315 fatal("Can't exec %s", cmd);
319 while (s < bufend && isSPACE(*s))
321 if (*s == ':') /* for csh's that have to exec sh scripts */
326 case ' ': case '\t': case '\f': case '\r': case 013:
330 if (preprocess && s == str_get(linestr) &&
331 s[1] == ' ' && (isDIGIT(s[2]) || strnEQ(s+2,"line ",5)) ) {
332 while (*s && !isDIGIT(*s))
334 curcmd->c_line = atoi(s)-1;
338 while (s < d && isSPACE(*s)) s++;
339 s[strlen(s)-1] = '\0'; /* wipe out newline */
342 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
345 curcmd->c_filestab = fstab(s);
347 curcmd->c_filestab = fstab(origfilename);
348 oldoldbufptr = oldbufptr = s = str_get(linestr);
352 if (in_eval && !rsfp) {
354 while (s < d && *s != '\n')
360 yylval.formval = load_format();
362 oldoldbufptr = oldbufptr = s = bufptr + 1;
373 if (s[1] && isALPHA(s[1]) && !isALPHA(s[2])) {
376 case 'r': FTST(O_FTEREAD);
377 case 'w': FTST(O_FTEWRITE);
378 case 'x': FTST(O_FTEEXEC);
379 case 'o': FTST(O_FTEOWNED);
380 case 'R': FTST(O_FTRREAD);
381 case 'W': FTST(O_FTRWRITE);
382 case 'X': FTST(O_FTREXEC);
383 case 'O': FTST(O_FTROWNED);
384 case 'e': FTST(O_FTIS);
385 case 'z': FTST(O_FTZERO);
386 case 's': FTST(O_FTSIZE);
387 case 'f': FTST(O_FTFILE);
388 case 'd': FTST(O_FTDIR);
389 case 'l': FTST(O_FTLINK);
390 case 'p': FTST(O_FTPIPE);
391 case 'S': FTST(O_FTSOCK);
392 case 'u': FTST(O_FTSUID);
393 case 'g': FTST(O_FTSGID);
394 case 'k': FTST(O_FTSVTX);
395 case 'b': FTST(O_FTBLK);
396 case 'c': FTST(O_FTCHR);
397 case 't': FTST(O_FTTTY);
398 case 'T': FTST(O_FTTEXT);
399 case 'B': FTST(O_FTBINARY);
400 case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
401 case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
402 case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
430 s = scanident(s,bufend,tokenbuf);
431 yylval.stabval = stabent(tokenbuf,TRUE);
442 s = scanident(s,bufend,tokenbuf);
443 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
459 yylval.ival = curcmd->c_line;
460 if (isSPACE(*s) || *s == '#')
461 cmdline = NOLINE; /* invalidate current command line number */
464 if (curcmd->c_line < cmdline)
465 cmdline = curcmd->c_line;
483 while (s < d && isSPACE(*s))
485 if (isALPHA(*s) || *s == '_' || *s == '\'')
486 *(--s) = '\\'; /* force next ident to WORD */
545 while (isALNUM(*s) || *s == '\'') \
547 while (d[-1] == '\'') \
553 if (s[1] == '#' && (isALPHA(s[2]) || s[2] == '_')) {
555 s = scanident(s,bufend,tokenbuf);
556 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
560 s = scanident(s,bufend,tokenbuf);
561 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
569 yylval.stabval = stabent(tokenbuf,TRUE);
574 s = scanident(s,bufend,tokenbuf);
577 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
580 case '/': /* may either be division or pattern */
581 case '?': /* may either be conditional or pattern */
592 if (!expectterm || !isDIGIT(s[1])) {
601 case '0': case '1': case '2': case '3': case '4':
602 case '5': case '6': case '7': case '8': case '9':
603 case '\'': case '"': case '`':
607 case '\\': /* some magic to force next word to be a WORD */
608 s++; /* used by do and sub to force a separate namespace */
613 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
614 ARG *arg = op_new(1);
617 arg->arg_type = O_ITEM;
619 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
621 strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
622 arg[1].arg_type = A_SINGLE;
623 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
626 else if (strEQ(d,"__END__")) {
632 if (stab = stabent("DATA",FALSE)) {
633 stab->str_pok |= SP_MULTI;
634 stab_io(stab) = stio_new();
635 stab_io(stab)->ifp = rsfp;
636 #if defined(HAS_FCNTL) && defined(F_SETFD)
638 fcntl(fd,F_SETFD,fd >= 3);
641 stab_io(stab)->type = '|';
642 else if ((FILE*)rsfp == stdin)
643 stab_io(stab)->type = '-';
645 stab_io(stab)->type = '<';
655 if (strEQ(d,"alarm"))
657 if (strEQ(d,"accept"))
659 if (strEQ(d,"atan2"))
666 if (strEQ(d,"binmode"))
673 if (strEQ(d,"continue"))
675 if (strEQ(d,"chdir")) {
676 (void)stabent("ENV",TRUE); /* may use HOME */
679 if (strEQ(d,"close"))
681 if (strEQ(d,"closedir"))
685 if (strEQ(d,"caller"))
687 if (strEQ(d,"crypt")) {
689 static int cryptseen = 0;
696 if (strEQ(d,"chmod"))
698 if (strEQ(d,"chown"))
700 if (strEQ(d,"connect"))
704 if (strEQ(d,"chroot"))
711 while (s < d && isSPACE(*s))
713 if (isALPHA(*s) || *s == '_')
714 *(--s) = '\\'; /* force next ident to WORD */
719 if (strEQ(d,"defined"))
721 if (strEQ(d,"delete"))
723 if (strEQ(d,"dbmopen"))
725 if (strEQ(d,"dbmclose"))
734 if (strEQ(d,"elsif")) {
735 yylval.ival = curcmd->c_line;
738 if (strEQ(d,"eq") || strEQ(d,"EQ"))
742 if (strEQ(d,"eval")) {
743 allstabs = TRUE; /* must initialize everything since */
744 UNI(O_EVAL); /* we don't know what will be used */
752 if (strEQ(d,"exec")) {
756 if (strEQ(d,"endhostent"))
758 if (strEQ(d,"endnetent"))
760 if (strEQ(d,"endservent"))
762 if (strEQ(d,"endprotoent"))
764 if (strEQ(d,"endpwent"))
766 if (strEQ(d,"endgrent"))
771 if (strEQ(d,"for") || strEQ(d,"foreach")) {
772 yylval.ival = curcmd->c_line;
775 if (strEQ(d,"format")) {
777 while (s < d && isSPACE(*s))
779 if (isALPHA(*s) || *s == '_')
780 *(--s) = '\\'; /* force next ident to WORD */
782 allstabs = TRUE; /* must initialize everything since */
783 OPERATOR(FORMAT); /* we don't know what will be used */
787 if (strEQ(d,"fcntl"))
789 if (strEQ(d,"fileno"))
791 if (strEQ(d,"flock"))
796 if (strEQ(d,"gt") || strEQ(d,"GT"))
798 if (strEQ(d,"ge") || strEQ(d,"GE"))
804 if (strEQ(d,"gmtime"))
808 if (strnEQ(d,"get",3)) {
815 if (strEQ(d,"priority"))
817 if (strEQ(d,"protobyname"))
819 if (strEQ(d,"protobynumber"))
821 if (strEQ(d,"protoent"))
823 if (strEQ(d,"pwent"))
825 if (strEQ(d,"pwnam"))
827 if (strEQ(d,"pwuid"))
829 if (strEQ(d,"peername"))
832 else if (*d == 'h') {
833 if (strEQ(d,"hostbyname"))
835 if (strEQ(d,"hostbyaddr"))
837 if (strEQ(d,"hostent"))
840 else if (*d == 'n') {
841 if (strEQ(d,"netbyname"))
843 if (strEQ(d,"netbyaddr"))
845 if (strEQ(d,"netent"))
848 else if (*d == 's') {
849 if (strEQ(d,"servbyname"))
851 if (strEQ(d,"servbyport"))
853 if (strEQ(d,"servent"))
855 if (strEQ(d,"sockname"))
857 if (strEQ(d,"sockopt"))
860 else if (*d == 'g') {
861 if (strEQ(d,"grent"))
863 if (strEQ(d,"grnam"))
865 if (strEQ(d,"grgid"))
868 else if (*d == 'l') {
869 if (strEQ(d,"login"))
883 yylval.ival = curcmd->c_line;
886 if (strEQ(d,"index"))
890 if (strEQ(d,"ioctl"))
909 if (strEQ(d,"local"))
911 if (strEQ(d,"length"))
913 if (strEQ(d,"lt") || strEQ(d,"LT"))
915 if (strEQ(d,"le") || strEQ(d,"LE"))
917 if (strEQ(d,"localtime"))
923 if (strEQ(d,"listen"))
925 if (strEQ(d,"lstat"))
941 RETURN(1); /* force error */
945 if (strEQ(d,"mkdir"))
949 if (strEQ(d,"msgctl"))
951 if (strEQ(d,"msgget"))
953 if (strEQ(d,"msgrcv"))
955 if (strEQ(d,"msgsnd"))
964 if (strEQ(d,"ne") || strEQ(d,"NE"))
975 if (strEQ(d,"opendir"))
980 if (strEQ(d,"print")) {
981 checkcomma(s,"filehandle");
984 if (strEQ(d,"printf")) {
985 checkcomma(s,"filehandle");
988 if (strEQ(d,"push")) {
989 yylval.ival = O_PUSH;
996 if (strEQ(d,"package"))
1007 if (strEQ(d,"qq")) {
1011 if (strEQ(d,"qx")) {
1018 if (strEQ(d,"return"))
1020 if (strEQ(d,"require")) {
1021 allstabs = TRUE; /* must initialize everything since */
1022 UNI(O_REQUIRE); /* we don't know what will be used */
1024 if (strEQ(d,"reset"))
1026 if (strEQ(d,"redo"))
1028 if (strEQ(d,"rename"))
1030 if (strEQ(d,"rand"))
1032 if (strEQ(d,"rmdir"))
1034 if (strEQ(d,"rindex"))
1036 if (strEQ(d,"read"))
1038 if (strEQ(d,"readdir"))
1040 if (strEQ(d,"rewinddir"))
1042 if (strEQ(d,"recv"))
1044 if (strEQ(d,"reverse"))
1046 if (strEQ(d,"readlink"))
1062 RETURN(1); /* force error */
1069 if (strEQ(d,"scalar"))
1075 if (strEQ(d,"select"))
1077 if (strEQ(d,"seek"))
1079 if (strEQ(d,"semctl"))
1081 if (strEQ(d,"semget"))
1083 if (strEQ(d,"semop"))
1085 if (strEQ(d,"send"))
1087 if (strEQ(d,"setpgrp"))
1089 if (strEQ(d,"setpriority"))
1090 FUN3(O_SETPRIORITY);
1091 if (strEQ(d,"sethostent"))
1093 if (strEQ(d,"setnetent"))
1095 if (strEQ(d,"setservent"))
1097 if (strEQ(d,"setprotoent"))
1099 if (strEQ(d,"setpwent"))
1101 if (strEQ(d,"setgrent"))
1103 if (strEQ(d,"seekdir"))
1105 if (strEQ(d,"setsockopt"))
1112 if (strEQ(d,"shift"))
1114 if (strEQ(d,"shmctl"))
1116 if (strEQ(d,"shmget"))
1118 if (strEQ(d,"shmread"))
1120 if (strEQ(d,"shmwrite"))
1122 if (strEQ(d,"shutdown"))
1133 if (strEQ(d,"sleep"))
1140 if (strEQ(d,"socket"))
1142 if (strEQ(d,"socketpair"))
1144 if (strEQ(d,"sort")) {
1145 checkcomma(s,"subroutine name");
1147 while (s < d && isSPACE(*s)) s++;
1148 if (*s == ';' || *s == ')') /* probably a close */
1149 fatal("sort is now a reserved word");
1150 if (isALPHA(*s) || *s == '_') {
1152 for (d = s; isALNUM(*d); d++) ;
1153 strncpy(tokenbuf,s,d-s);
1154 if (strNE(tokenbuf,"keys") &&
1155 strNE(tokenbuf,"values") &&
1156 strNE(tokenbuf,"split") &&
1157 strNE(tokenbuf,"grep") &&
1158 strNE(tokenbuf,"readdir") &&
1159 strNE(tokenbuf,"unpack") &&
1160 strNE(tokenbuf,"do") &&
1161 strNE(tokenbuf,"eval") &&
1162 (d >= bufend || isSPACE(*d)) )
1163 *(--s) = '\\'; /* force next ident to WORD */
1169 if (strEQ(d,"split"))
1171 if (strEQ(d,"sprintf"))
1173 if (strEQ(d,"splice")) {
1174 yylval.ival = O_SPLICE;
1179 if (strEQ(d,"sqrt"))
1183 if (strEQ(d,"srand"))
1189 if (strEQ(d,"stat"))
1191 if (strEQ(d,"study")) {
1197 if (strEQ(d,"substr"))
1199 if (strEQ(d,"sub")) {
1200 yylval.ival = savestack->ary_fill; /* restore stuff on reduce */
1206 subline = curcmd->c_line;
1208 while (s < d && isSPACE(*s))
1210 if (isALPHA(*s) || *s == '_' || *s == '\'') {
1212 str_sset(subname,curstname);
1213 str_ncat(subname,"'",1);
1214 for (d = s+1; isALNUM(*d) || *d == '\''; d++)
1219 str_ncat(subname,s,d-s);
1221 *(--s) = '\\'; /* force next ident to WORD */
1224 str_set(subname,"?");
1233 if (strEQ(d,"system")) {
1237 if (strEQ(d,"symlink"))
1239 if (strEQ(d,"syscall"))
1241 if (strEQ(d,"sysread"))
1243 if (strEQ(d,"syswrite"))
1252 if (strEQ(d,"tr")) {
1257 RETURN(1); /* force error */
1259 if (strEQ(d,"tell"))
1261 if (strEQ(d,"telldir"))
1263 if (strEQ(d,"time"))
1265 if (strEQ(d,"times"))
1267 if (strEQ(d,"truncate"))
1272 if (strEQ(d,"using"))
1274 if (strEQ(d,"until")) {
1275 yylval.ival = curcmd->c_line;
1278 if (strEQ(d,"unless")) {
1279 yylval.ival = curcmd->c_line;
1282 if (strEQ(d,"unlink"))
1284 if (strEQ(d,"undef"))
1286 if (strEQ(d,"unpack"))
1288 if (strEQ(d,"utime"))
1290 if (strEQ(d,"umask"))
1292 if (strEQ(d,"unshift")) {
1293 yylval.ival = O_UNSHIFT;
1299 if (strEQ(d,"values"))
1301 if (strEQ(d,"vec")) {
1308 if (strEQ(d,"while")) {
1309 yylval.ival = curcmd->c_line;
1312 if (strEQ(d,"warn"))
1314 if (strEQ(d,"wait"))
1316 if (strEQ(d,"waitpid"))
1318 if (strEQ(d,"wantarray")) {
1319 yylval.arg = op_new(1);
1320 yylval.arg->arg_type = O_ITEM;
1321 yylval.arg[1].arg_type = A_WANTARRAY;
1324 if (strEQ(d,"write"))
1329 if (!expectterm && strEQ(d,"x"))
1349 yylval.cval = savestr(d);
1351 if (oldoldbufptr && oldoldbufptr < bufptr) {
1352 while (isSPACE(*oldoldbufptr))
1354 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1356 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1359 return (CLINE, bufptr = s, (int)WORD);
1371 while (s < bufend && isSPACE(*s))
1373 if (isALPHA(*s) || *s == '_') {
1377 while (s < bufend && isSPACE(*s))
1382 "tell eof times getlogin wait length shift umask getppid \
1383 cos exp int log rand sin sqrt ord wantarray",
1388 fatal("No comma allowed after %s", what);
1394 scanident(s,send,dest)
1396 register char *send;
1410 while (isALNUM(*s) || *s == '\'')
1413 while (d > dest+1 && d[-1] == '\'')
1419 if (*d == '{' /* } */ ) {
1422 while (s < send && brackets) {
1423 if (!reparse && (d == dest || (*s && isALNUM(*s) ))) {
1433 if (reparse && reparse == s - 1)
1447 if (*d == '^' && (isUPPER(*s) || index("[\\]^_?", *s))) {
1458 scanconst(spat,string,len)
1463 register STR *tmpstr;
1467 char *origstring = string;
1468 static char *vert = "|";
1470 if (ninstr(string, string+len, vert, vert+1))
1474 tmpstr = Str_new(86,len);
1475 str_nset(tmpstr,string,len);
1476 t = str_get(tmpstr);
1478 tmpstr->str_u.str_useful = 100;
1479 for (d=t; d < e; ) {
1487 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1492 if (d[1] && index("wWbB0123456789sSdDlLuUExc",d[1])) {
1496 (void)bcopy(d+1,d,e-d);
1521 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1533 tmpstr->str_cur = d - t;
1535 spat->spat_flags |= SPAT_ALL;
1536 if (*origstring != '^')
1537 spat->spat_flags |= SPAT_SCANFIRST;
1538 spat->spat_short = tmpstr;
1539 spat->spat_slen = d - t;
1546 register SPAT *spat;
1551 STR *str = Str_new(93,0);
1554 Newz(801,spat,1,SPAT);
1555 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1556 curstash->tbl_spatroot = spat;
1565 spat->spat_flags |= SPAT_ONCE;
1568 fatal("panic: scanpat");
1570 s = str_append_till(str,s,bufend,s[-1],patleave);
1573 yyerror("Search pattern not terminated");
1574 yylval.arg = Nullarg;
1578 while (*s == 'i' || *s == 'o' || *s == 'g') {
1582 spat->spat_flags |= SPAT_FOLD;
1586 spat->spat_flags |= SPAT_KEEP;
1590 spat->spat_flags |= SPAT_GLOBAL;
1594 e = str->str_ptr + len;
1599 for (; d < e; d++) {
1602 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1606 spat->spat_runtime = arg = op_new(1);
1607 arg->arg_type = O_ITEM;
1608 arg[1].arg_type = A_DOUBLE;
1609 arg[1].arg_ptr.arg_str = str_smake(str);
1610 d = scanident(d,bufend,buf);
1611 (void)stabent(buf,TRUE); /* make sure it's created */
1612 for (; d < e; d++) {
1615 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1616 d = scanident(d,bufend,buf);
1617 (void)stabent(buf,TRUE);
1619 else if (*d == '@') {
1620 d = scanident(d,bufend,buf);
1621 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1622 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1623 (void)stabent(buf,TRUE);
1626 goto got_pat; /* skip compiling for now */
1629 if (spat->spat_flags & SPAT_FOLD)
1633 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1635 scanconst(spat,str->str_ptr,len);
1636 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1637 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1638 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1639 spat->spat_flags & SPAT_FOLD);
1640 /* Note that this regexp can still be used if someone says
1641 * something like /a/ && s//b/; so we can't delete it.
1645 if (spat->spat_flags & SPAT_FOLD)
1649 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1651 if (spat->spat_short)
1652 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1653 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1654 spat->spat_flags & SPAT_FOLD);
1659 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1667 register SPAT *spat;
1671 STR *str = Str_new(93,0);
1673 Newz(802,spat,1,SPAT);
1674 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1675 curstash->tbl_spatroot = spat;
1677 s = str_append_till(str,s+1,bufend,*s,patleave);
1680 yyerror("Substitution pattern not terminated");
1681 yylval.arg = Nullarg;
1685 e = str->str_ptr + len;
1686 for (d = str->str_ptr; d < e; d++) {
1689 else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
1693 spat->spat_runtime = arg = op_new(1);
1694 arg->arg_type = O_ITEM;
1695 arg[1].arg_type = A_DOUBLE;
1696 arg[1].arg_ptr.arg_str = str_smake(str);
1697 d = scanident(d,e,buf);
1698 (void)stabent(buf,TRUE); /* make sure it's created */
1700 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1701 d = scanident(d,e,buf);
1702 (void)stabent(buf,TRUE);
1704 else if (*d == '@' && d[-1] != '\\') {
1705 d = scanident(d,e,buf);
1706 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1707 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1708 (void)stabent(buf,TRUE);
1711 goto get_repl; /* skip compiling for now */
1714 scanconst(spat,str->str_ptr,len);
1719 yyerror("Substitution replacement not terminated");
1720 yylval.arg = Nullarg;
1723 spat->spat_repl = yylval.arg;
1724 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1725 spat->spat_flags |= SPAT_CONST;
1726 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1730 spat->spat_flags |= SPAT_CONST;
1731 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1732 e = tmpstr->str_ptr + tmpstr->str_cur;
1733 for (t = tmpstr->str_ptr; t < e; t++) {
1734 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1735 (t[1] == '{' /*}*/ && isDIGIT(t[2])) ))
1736 spat->spat_flags &= ~SPAT_CONST;
1739 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1742 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1743 spat->spat_repl[1].arg_type = A_SINGLE;
1744 spat->spat_repl = make_op(
1745 (spat->spat_repl[1].arg_type == A_SINGLE ? O_EVALONCE : O_EVAL),
1750 spat->spat_flags &= ~SPAT_CONST;
1754 spat->spat_flags |= SPAT_GLOBAL;
1759 spat->spat_flags |= SPAT_FOLD;
1760 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1761 str_free(spat->spat_short); /* anchored opt doesn't do */
1762 spat->spat_short = Nullstr; /* case insensitive match */
1763 spat->spat_slen = 0;
1768 spat->spat_flags |= SPAT_KEEP;
1771 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1772 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1773 if (!spat->spat_runtime) {
1774 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1775 spat->spat_flags & SPAT_FOLD);
1778 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1785 register SPAT *spat;
1787 if (!spat->spat_short && spat->spat_regexp->regstart &&
1788 (!spat->spat_regexp->regmust || spat->spat_regexp->reganch & ROPT_ANCH)
1790 if (!(spat->spat_regexp->reganch & ROPT_ANCH))
1791 spat->spat_flags |= SPAT_SCANFIRST;
1792 else if (spat->spat_flags & SPAT_FOLD)
1794 spat->spat_short = str_smake(spat->spat_regexp->regstart);
1796 else if (spat->spat_regexp->regmust) {/* is there a better short-circuit? */
1797 if (spat->spat_short &&
1798 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1800 if (spat->spat_flags & SPAT_SCANFIRST) {
1801 str_free(spat->spat_short);
1802 spat->spat_short = Nullstr;
1805 str_free(spat->spat_regexp->regmust);
1806 spat->spat_regexp->regmust = Nullstr;
1810 if (!spat->spat_short || /* promote the better string */
1811 ((spat->spat_flags & SPAT_SCANFIRST) &&
1812 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1813 str_free(spat->spat_short); /* ok if null */
1814 spat->spat_short = spat->spat_regexp->regmust;
1815 spat->spat_regexp->regmust = Nullstr;
1816 spat->spat_flags |= SPAT_SCANFIRST;
1822 expand_charset(s,len,retlen)
1828 register char *d = t;
1830 register char *send = s + len;
1832 while (s < send && d - t <= 256) {
1833 if (s[1] == '-' && s+2 < send) {
1834 for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
1843 return nsavestr(t,d-t);
1851 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1854 register short *tbl;
1862 New(803,tbl,256,short);
1863 arg[2].arg_type = A_NULL;
1864 arg[2].arg_ptr.arg_cval = (char*) tbl;
1867 yyerror("Translation pattern not terminated");
1868 yylval.arg = Nullarg;
1871 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1872 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1873 arg_free(yylval.arg);
1876 yyerror("Translation replacement not terminated");
1877 yylval.arg = Nullarg;
1880 complement = delete = squash = 0;
1881 while (*s == 'c' || *s == 'd' || *s == 's') {
1890 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1891 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1892 arg_free(yylval.arg);
1893 arg[2].arg_len = delete|squash;
1895 if (!rlen && !delete) {
1900 Zero(tbl, 256, short);
1901 for (i = 0; i < tlen; i++)
1902 tbl[t[i] & 0377] = -1;
1903 for (i = 0, j = 0; i < 256; i++) {
1917 for (i = 0; i < 256; i++)
1919 for (i = 0, j = 0; i < tlen; i++,j++) {
1922 if (tbl[t[i] & 0377] == -1)
1923 tbl[t[i] & 0377] = -2;
1928 if (tbl[t[i] & 0377] == -1)
1929 tbl[t[i] & 0377] = r[j] & 0377;
1945 register char *send;
1946 register bool makesingle = FALSE;
1947 register STAB *stab;
1948 bool alwaysdollar = FALSE;
1949 bool hereis = FALSE;
1952 char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
1957 arg->arg_type = O_ITEM;
1960 default: /* a substitution replacement */
1961 arg[1].arg_type = A_DOUBLE;
1962 makesingle = TRUE; /* maybe disable runtime scanning */
1972 arg[1].arg_type = A_SINGLE;
1977 else if (s[1] == '.')
1991 yyerror("Illegal octal digit");
1993 case '0': case '1': case '2': case '3': case '4':
1994 case '5': case '6': case '7':
1998 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1999 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2003 i += (*s++ & 7) + 9;
2008 str = Str_new(92,0);
2009 str_numset(str,(double)i);
2011 Safefree(str->str_ptr);
2012 str->str_ptr = Nullch;
2013 str->str_len = str->str_cur = 0;
2015 arg[1].arg_ptr.arg_str = str;
2018 case '1': case '2': case '3': case '4': case '5':
2019 case '6': case '7': case '8': case '9': case '.':
2021 arg[1].arg_type = A_SINGLE;
2023 while (isDIGIT(*s) || *s == '_') {
2029 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
2031 while (isDIGIT(*s) || *s == '_') {
2038 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
2040 if (*s == '+' || *s == '-')
2046 str = Str_new(92,0);
2047 str_numset(str,atof(tokenbuf));
2049 Safefree(str->str_ptr);
2050 str->str_ptr = Nullch;
2051 str->str_len = str->str_cur = 0;
2053 arg[1].arg_ptr.arg_str = str;
2061 if (*++s && index("`'\"",*s)) {
2063 s = cpytill(d,s,bufend,term,&len);
2075 } /* assuming tokenbuf won't clobber */
2080 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
2081 herewas = str_make(s,bufend-s);
2083 s--, herewas = str_make(s,d-s);
2084 s += herewas->str_cur;
2092 s = cpytill(d,s,bufend,'>',&len);
2096 while (*d && (isALNUM(*d) || *d == '\''))
2098 if (d - tokenbuf != len) {
2100 arg[1].arg_type = A_GLOB;
2101 d = nsavestr(d,len);
2102 arg[1].arg_ptr.arg_stab = stab = genstab();
2103 stab_io(stab) = stio_new();
2104 stab_val(stab) = str_make(d,len);
2111 (void)strcpy(d,"ARGV");
2113 arg[1].arg_type = A_INDREAD;
2114 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
2117 arg[1].arg_type = A_READ;
2118 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
2119 if (!stab_io(arg[1].arg_ptr.arg_stab))
2120 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
2121 if (strEQ(d,"ARGV")) {
2122 (void)aadd(arg[1].arg_ptr.arg_stab);
2123 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
2144 arg[1].arg_type = A_SINGLE;
2151 arg[1].arg_type = A_DOUBLE;
2152 makesingle = TRUE; /* maybe disable runtime scanning */
2153 alwaysdollar = TRUE; /* treat $) and $| as variables */
2158 arg[1].arg_type = A_BACKTICK;
2160 alwaysdollar = TRUE; /* treat $) and $| as variables */
2167 multi_start = curcmd->c_line;
2169 multi_open = multi_close = '<';
2172 if (term && (tmps = index("([{< )]}> )]}>",term)))
2176 tmpstr = Str_new(87,80);
2181 while (s < bufend &&
2182 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2187 curcmd->c_line = multi_start;
2188 fatal("EOF in string");
2190 str_nset(tmpstr,d+1,s-d);
2192 str_ncat(herewas,s,bufend-s);
2193 str_replace(linestr,herewas);
2194 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2195 bufend = linestr->str_ptr + linestr->str_cur;
2199 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2202 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2203 while (s >= bufend) { /* multiple line string? */
2205 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2206 curcmd->c_line = multi_start;
2207 fatal("EOF in string");
2211 STR *str = Str_new(88,0);
2213 str_sset(str,linestr);
2214 astore(stab_xarray(curcmd->c_filestab),
2215 (int)curcmd->c_line,str);
2217 bufend = linestr->str_ptr + linestr->str_cur;
2219 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2222 str_scat(linestr,herewas);
2223 bufend = linestr->str_ptr + linestr->str_cur;
2227 str_scat(tmpstr,linestr);
2231 s = str_append_till(tmpstr,s,bufend,term,leave);
2233 multi_end = curcmd->c_line;
2235 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2236 tmpstr->str_len = tmpstr->str_cur + 1;
2237 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2239 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2240 arg[1].arg_ptr.arg_str = tmpstr;
2244 s = tmpstr->str_ptr;
2245 send = s + tmpstr->str_cur;
2246 while (s < send) { /* see if we can make SINGLE */
2247 if (*s == '\\' && s[1] && isDIGIT(s[1]) && !isDIGIT(s[2]) &&
2248 !alwaysdollar && s[1] != '0')
2249 *s = '$'; /* grandfather \digit in subst */
2250 if ((*s == '$' || *s == '@') && s+1 < send &&
2251 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2252 makesingle = FALSE; /* force interpretation */
2254 else if (*s == '\\' && s+1 < send) {
2255 if (index("lLuUE",s[1]))
2261 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2263 if ((*s == '$' && s+1 < send &&
2264 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2265 (*s == '@' && s+1 < send) ) {
2266 if (s[1] == '#' && (isALPHA(s[2]) || s[2] == '_'))
2268 len = scanident(s,send,tokenbuf) - s;
2269 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2270 || strEQ(tokenbuf,"ENV")
2271 || strEQ(tokenbuf,"SIG")
2272 || strEQ(tokenbuf,"INC") )
2273 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2278 else if (*s == '\\' && s+1 < send) {
2282 if (!makesingle && (!leave || (*s && index(leave,*s))))
2286 case '0': case '1': case '2': case '3':
2287 case '4': case '5': case '6': case '7':
2288 *d++ = scanoct(s, 3, &len);
2292 *d++ = scanhex(++s, 2, &len);
2331 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2332 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2334 tmpstr->str_cur = d - tmpstr->str_ptr;
2335 arg[1].arg_ptr.arg_str = tmpstr;
2351 register FCMD *fprev = &froot;
2352 register FCMD *fcmd;
2359 Zero(&froot, 1, FCMD);
2361 while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
2363 if (in_eval && !rsfp) {
2364 eol = index(s,'\n');
2369 eol = bufend = linestr->str_ptr + linestr->str_cur;
2371 STR *tmpstr = Str_new(89,0);
2373 str_nset(tmpstr, s, eol-s);
2374 astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
2378 for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
2381 return froot.f_next;
2388 flinebeg = Nullfcmd;
2392 Newz(804,fcmd,1,FCMD);
2393 fprev->f_next = fcmd;
2395 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2405 fcmd->f_pre = nsavestr(s, t-s);
2406 fcmd->f_presize = t-s;
2410 fcmd->f_flags |= FC_NOBLANK;
2412 fcmd->f_flags |= FC_REPEAT;
2416 flinebeg = fcmd; /* start values here */
2418 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2421 fcmd->f_type = F_LINES;
2425 fcmd->f_type = F_LEFT;
2430 fcmd->f_type = F_RIGHT;
2435 fcmd->f_type = F_CENTER;
2441 /* Catch the special case @... and handle it as a string
2443 if (*s == '.' && s[1] == '.') {
2444 goto default_format;
2446 fcmd->f_type = F_DECIMAL;
2450 /* Read a format in the form @####.####, where either group
2451 of ### may be empty, or the final .### may be missing. */
2459 fcmd->f_decimals = s-p;
2460 fcmd->f_flags |= FC_DP;
2462 fcmd->f_decimals = 0;
2468 fcmd->f_type = F_LEFT;
2471 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2472 fcmd->f_flags |= FC_MORE;
2481 (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
2484 if (in_eval && !rsfp) {
2485 eol = index(s,'\n');
2490 eol = bufend = linestr->str_ptr + linestr->str_cur;
2492 STR *tmpstr = Str_new(90,0);
2494 str_nset(tmpstr, s, eol-s);
2495 astore(stab_xarray(curcmd->c_filestab),
2496 (int)curcmd->c_line,tmpstr);
2498 if (strnEQ(s,".\n",2)) {
2500 yyerror("Missing values line");
2501 return froot.f_next;
2507 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2508 str->str_u.str_hash = curstash;
2509 str_nset(str,"(",1);
2510 flinebeg->f_line = curcmd->c_line;
2512 if (!flinebeg->f_next->f_type || index(s, ',')) {
2514 str_ncat(str, s, eol - s - 1);
2515 str_ncat(str,",$$);",5);
2520 while (s < eol && isSPACE(*s))
2525 case ' ': case '\t': case '\n': case ';':
2526 str_ncat(str, t, s - t);
2527 str_ncat(str, "," ,1);
2528 while (s < eol && (isSPACE(*s) || *s == ';'))
2533 str_ncat(str, t, s - t);
2535 s = scanident(s,eol,tokenbuf);
2536 str_ncat(str, t, s - t);
2538 if (s < eol && *s && index("$'\"",*s))
2539 str_ncat(str, ",", 1);
2541 case '"': case '\'':
2542 str_ncat(str, t, s - t);
2545 while (s < eol && (*s != *t || s[-1] == '\\'))
2549 str_ncat(str, t, s - t);
2551 if (s < eol && *s && index("$'\"",*s))
2552 str_ncat(str, ",", 1);
2555 yyerror("Please use commas to separate fields");
2558 str_ncat(str,"$$);",4);
2563 bufptr = str_get(linestr);
2564 yyerror("Format not terminated");
2565 return froot.f_next;
2572 cshlen = strlen(cshname);