1 /* $RCSfile: toke.c,v $$Revision: 4.0.1.3 $$Date: 91/06/10 01:32:26 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
9 * Revision 4.0.1.3 91/06/10 01:32:26 lwall
10 * patch10: m'$foo' now treats string as single quoted
11 * patch10: certain pattern optimizations were botched
13 * Revision 4.0.1.2 91/06/07 12:05:56 lwall
14 * patch4: new copyright notice
15 * patch4: debugger lost track of lines in eval
16 * patch4: //o and s///o now optimize themselves fully at runtime
17 * patch4: added global modifier for pattern matches
19 * Revision 4.0.1.1 91/04/12 09:18:18 lwall
20 * patch1: perl -de "print" wouldn't stop at the first statement
22 * Revision 4.0 91/03/20 01:42:14 lwall
42 /* which backslash sequences to keep in m// or s// */
44 static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtf0123456789[{]}";
46 char *reparse; /* if non-null, scanident found ${foo[$bar]} */
53 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
55 #define META(c) ((c) | 128)
57 #define RETURN(retval) return (bufptr = s,(int)retval)
58 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
59 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
60 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
61 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
62 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
63 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
64 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
65 #define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
66 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
67 #define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
68 #define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
69 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
70 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
71 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
72 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
73 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
74 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
75 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
76 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
77 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
78 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
79 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
80 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
81 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
82 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
83 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
85 /* This bit of chicanery makes a unary function followed by
86 * a parenthesis into a function with one argument, highest precedence.
88 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
89 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
91 /* This does similarly for list operators, merely by pretending that the
92 * paren came before the listop rather than after.
94 #define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
95 (*s = META('('), bufptr = oldbufptr, '(') : \
96 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
97 /* grandfather return to old style */
98 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
104 while (s < bufend && isascii(*s) && isspace(*s))
113 #define UNI(f) return uni(f,s)
114 #define LOP(f) return lop(f,s)
154 #endif /* CRIPPLED_CC */
158 register char *s = bufptr;
161 static bool in_format = FALSE;
162 static bool firstline = TRUE;
163 extern int yychar; /* last token */
165 oldoldbufptr = oldbufptr;
172 fprintf(stderr,"Tokener at %s",s);
174 fprintf(stderr,"Tokener at %s\n",s);
178 if ((*s & 127) == '(')
181 warn("Unrecognized character \\%03o ignored", *s++ & 255);
187 if ((*s & 127) == '(')
190 warn("Unrecognized character \\%03o ignored", *s++ & 255);
194 goto fake_eof; /* emulate EOF on ^D or ^Z */
199 goto retry; /* ignore stray nulls */
202 if (minus_n || minus_p || perldb) {
206 char *pdb = getenv("PERLDB");
208 str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
209 str_cat(linestr, ";");
211 if (minus_n || minus_p) {
212 str_cat(linestr,"line: while (<>) {");
214 str_cat(linestr,"chop;");
216 str_cat(linestr,"@F=split(' ');");
218 oldoldbufptr = oldbufptr = s = str_get(linestr);
219 bufend = linestr->str_ptr + linestr->str_cur;
225 yylval.formval = load_format();
227 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
228 bufend = linestr->str_ptr + linestr->str_cur;
234 #endif /* CRYPTSCRIPT */
236 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
240 (void)mypclose(rsfp);
241 else if (rsfp == stdin)
247 if (minus_n || minus_p) {
248 str_set(linestr,minus_p ? ";}continue{print" : "");
249 str_cat(linestr,";}");
250 oldoldbufptr = oldbufptr = s = str_get(linestr);
251 bufend = linestr->str_ptr + linestr->str_cur;
252 minus_n = minus_p = 0;
255 oldoldbufptr = oldbufptr = s = str_get(linestr);
257 RETURN(';'); /* not infinite loop because rsfp is NULL now */
259 if (doextract && *linestr->str_ptr == '#')
262 oldoldbufptr = oldbufptr = bufptr = s;
264 STR *str = Str_new(85,0);
266 str_sset(str,linestr);
267 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
275 bufend = linestr->str_ptr + linestr->str_cur;
276 if (curcmd->c_line == 1) {
277 if (*s == '#' && s[1] == '!') {
278 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
286 while (s < bufend && !isspace(*s))
289 while (s < bufend && isspace(*s))
292 Newz(899,newargv,origargc+3,char*);
294 while (s < bufend && !isspace(*s))
297 Copy(origargv+1, newargv+2, origargc+1, char*);
303 fatal("Can't exec %s", cmd);
307 while (s < bufend && isspace(*s))
309 if (*s == ':') /* for csh's that have to exec sh scripts */
314 case ' ': case '\t': case '\f': case '\r': case 013:
318 if (preprocess && s == str_get(linestr) &&
319 s[1] == ' ' && isdigit(s[2])) {
320 curcmd->c_line = atoi(s+2)-1;
321 for (s += 2; isdigit(*s); s++) ;
323 while (s < d && isspace(*s)) s++;
324 s[strlen(s)-1] = '\0'; /* wipe out newline */
327 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
330 curcmd->c_filestab = fstab(s);
332 curcmd->c_filestab = fstab(origfilename);
333 oldoldbufptr = oldbufptr = s = str_get(linestr);
337 if (in_eval && !rsfp) {
339 while (s < d && *s != '\n')
345 yylval.formval = load_format();
347 oldoldbufptr = oldbufptr = s = bufptr + 1;
358 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
361 case 'r': FTST(O_FTEREAD);
362 case 'w': FTST(O_FTEWRITE);
363 case 'x': FTST(O_FTEEXEC);
364 case 'o': FTST(O_FTEOWNED);
365 case 'R': FTST(O_FTRREAD);
366 case 'W': FTST(O_FTRWRITE);
367 case 'X': FTST(O_FTREXEC);
368 case 'O': FTST(O_FTROWNED);
369 case 'e': FTST(O_FTIS);
370 case 'z': FTST(O_FTZERO);
371 case 's': FTST(O_FTSIZE);
372 case 'f': FTST(O_FTFILE);
373 case 'd': FTST(O_FTDIR);
374 case 'l': FTST(O_FTLINK);
375 case 'p': FTST(O_FTPIPE);
376 case 'S': FTST(O_FTSOCK);
377 case 'u': FTST(O_FTSUID);
378 case 'g': FTST(O_FTSGID);
379 case 'k': FTST(O_FTSVTX);
380 case 'b': FTST(O_FTBLK);
381 case 'c': FTST(O_FTCHR);
382 case 't': FTST(O_FTTTY);
383 case 'T': FTST(O_FTTEXT);
384 case 'B': FTST(O_FTBINARY);
385 case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
386 case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
387 case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
415 s = scanident(s,bufend,tokenbuf);
416 yylval.stabval = stabent(tokenbuf,TRUE);
427 s = scanident(s,bufend,tokenbuf);
428 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
444 if (isspace(*s) || *s == '#')
445 cmdline = NOLINE; /* invalidate current command line number */
448 if (curcmd->c_line < cmdline)
449 cmdline = curcmd->c_line;
467 while (s < d && isspace(*s))
469 if (isalpha(*s) || *s == '_' || *s == '\'')
470 *(--s) = '\\'; /* force next ident to WORD */
529 while (isascii(*s) && \
530 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
532 while (d[-1] == '\'') \
538 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
540 s = scanident(s,bufend,tokenbuf);
541 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
545 s = scanident(s,bufend,tokenbuf);
546 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
554 yylval.stabval = stabent(tokenbuf,TRUE);
559 s = scanident(s,bufend,tokenbuf);
562 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
565 case '/': /* may either be division or pattern */
566 case '?': /* may either be conditional or pattern */
577 if (!expectterm || !isdigit(s[1])) {
586 case '0': case '1': case '2': case '3': case '4':
587 case '5': case '6': case '7': case '8': case '9':
588 case '\'': case '"': case '`':
592 case '\\': /* some magic to force next word to be a WORD */
593 s++; /* used by do and sub to force a separate namespace */
598 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
599 ARG *arg = op_new(1);
602 arg->arg_type = O_ITEM;
604 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
606 strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
607 arg[1].arg_type = A_SINGLE;
608 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
611 else if (strEQ(d,"__END__")) {
616 if (stab = stabent("DATA",FALSE)) {
617 stab->str_pok |= SP_MULTI;
618 stab_io(stab) = stio_new();
619 stab_io(stab)->ifp = rsfp;
620 #if defined(HAS_FCNTL) && defined(F_SETFD)
622 fcntl(fd,F_SETFD,fd >= 3);
625 stab_io(stab)->type = '|';
626 else if (rsfp == stdin)
627 stab_io(stab)->type = '-';
629 stab_io(stab)->type = '<';
639 if (strEQ(d,"alarm"))
641 if (strEQ(d,"accept"))
643 if (strEQ(d,"atan2"))
650 if (strEQ(d,"binmode"))
657 if (strEQ(d,"continue"))
659 if (strEQ(d,"chdir")) {
660 (void)stabent("ENV",TRUE); /* may use HOME */
663 if (strEQ(d,"close"))
665 if (strEQ(d,"closedir"))
669 if (strEQ(d,"caller"))
671 if (strEQ(d,"crypt")) {
677 if (strEQ(d,"chmod"))
679 if (strEQ(d,"chown"))
681 if (strEQ(d,"connect"))
685 if (strEQ(d,"chroot"))
692 while (s < d && isspace(*s))
694 if (isalpha(*s) || *s == '_')
695 *(--s) = '\\'; /* force next ident to WORD */
700 if (strEQ(d,"defined"))
702 if (strEQ(d,"delete"))
704 if (strEQ(d,"dbmopen"))
706 if (strEQ(d,"dbmclose"))
715 if (strEQ(d,"elsif")) {
716 yylval.ival = curcmd->c_line;
719 if (strEQ(d,"eq") || strEQ(d,"EQ"))
723 if (strEQ(d,"eval")) {
724 allstabs = TRUE; /* must initialize everything since */
725 UNI(O_EVAL); /* we don't know what will be used */
733 if (strEQ(d,"exec")) {
737 if (strEQ(d,"endhostent"))
739 if (strEQ(d,"endnetent"))
741 if (strEQ(d,"endservent"))
743 if (strEQ(d,"endprotoent"))
745 if (strEQ(d,"endpwent"))
747 if (strEQ(d,"endgrent"))
752 if (strEQ(d,"for") || strEQ(d,"foreach")) {
753 yylval.ival = curcmd->c_line;
756 if (strEQ(d,"format")) {
758 while (s < d && isspace(*s))
760 if (isalpha(*s) || *s == '_')
761 *(--s) = '\\'; /* force next ident to WORD */
763 allstabs = TRUE; /* must initialize everything since */
764 OPERATOR(FORMAT); /* we don't know what will be used */
768 if (strEQ(d,"fcntl"))
770 if (strEQ(d,"fileno"))
772 if (strEQ(d,"flock"))
777 if (strEQ(d,"gt") || strEQ(d,"GT"))
779 if (strEQ(d,"ge") || strEQ(d,"GE"))
785 if (strEQ(d,"gmtime"))
789 if (strnEQ(d,"get",3)) {
796 if (strEQ(d,"priority"))
798 if (strEQ(d,"protobyname"))
800 if (strEQ(d,"protobynumber"))
802 if (strEQ(d,"protoent"))
804 if (strEQ(d,"pwent"))
806 if (strEQ(d,"pwnam"))
808 if (strEQ(d,"pwuid"))
810 if (strEQ(d,"peername"))
813 else if (*d == 'h') {
814 if (strEQ(d,"hostbyname"))
816 if (strEQ(d,"hostbyaddr"))
818 if (strEQ(d,"hostent"))
821 else if (*d == 'n') {
822 if (strEQ(d,"netbyname"))
824 if (strEQ(d,"netbyaddr"))
826 if (strEQ(d,"netent"))
829 else if (*d == 's') {
830 if (strEQ(d,"servbyname"))
832 if (strEQ(d,"servbyport"))
834 if (strEQ(d,"servent"))
836 if (strEQ(d,"sockname"))
838 if (strEQ(d,"sockopt"))
841 else if (*d == 'g') {
842 if (strEQ(d,"grent"))
844 if (strEQ(d,"grnam"))
846 if (strEQ(d,"grgid"))
849 else if (*d == 'l') {
850 if (strEQ(d,"login"))
864 yylval.ival = curcmd->c_line;
867 if (strEQ(d,"index"))
871 if (strEQ(d,"ioctl"))
890 if (strEQ(d,"local"))
892 if (strEQ(d,"length"))
894 if (strEQ(d,"lt") || strEQ(d,"LT"))
896 if (strEQ(d,"le") || strEQ(d,"LE"))
898 if (strEQ(d,"localtime"))
904 if (strEQ(d,"listen"))
906 if (strEQ(d,"lstat"))
922 RETURN(1); /* force error */
926 if (strEQ(d,"mkdir"))
930 if (strEQ(d,"msgctl"))
932 if (strEQ(d,"msgget"))
934 if (strEQ(d,"msgrcv"))
936 if (strEQ(d,"msgsnd"))
945 if (strEQ(d,"ne") || strEQ(d,"NE"))
956 if (strEQ(d,"opendir"))
961 if (strEQ(d,"print")) {
962 checkcomma(s,"filehandle");
965 if (strEQ(d,"printf")) {
966 checkcomma(s,"filehandle");
969 if (strEQ(d,"push")) {
970 yylval.ival = O_PUSH;
977 if (strEQ(d,"package"))
999 if (strEQ(d,"return"))
1001 if (strEQ(d,"require")) {
1002 allstabs = TRUE; /* must initialize everything since */
1003 UNI(O_REQUIRE); /* we don't know what will be used */
1005 if (strEQ(d,"reset"))
1007 if (strEQ(d,"redo"))
1009 if (strEQ(d,"rename"))
1011 if (strEQ(d,"rand"))
1013 if (strEQ(d,"rmdir"))
1015 if (strEQ(d,"rindex"))
1017 if (strEQ(d,"read"))
1019 if (strEQ(d,"readdir"))
1021 if (strEQ(d,"rewinddir"))
1023 if (strEQ(d,"recv"))
1025 if (strEQ(d,"reverse"))
1027 if (strEQ(d,"readlink"))
1043 RETURN(1); /* force error */
1050 if (strEQ(d,"scalar"))
1056 if (strEQ(d,"select"))
1058 if (strEQ(d,"seek"))
1060 if (strEQ(d,"semctl"))
1062 if (strEQ(d,"semget"))
1064 if (strEQ(d,"semop"))
1066 if (strEQ(d,"send"))
1068 if (strEQ(d,"setpgrp"))
1070 if (strEQ(d,"setpriority"))
1071 FUN3(O_SETPRIORITY);
1072 if (strEQ(d,"sethostent"))
1074 if (strEQ(d,"setnetent"))
1076 if (strEQ(d,"setservent"))
1078 if (strEQ(d,"setprotoent"))
1080 if (strEQ(d,"setpwent"))
1082 if (strEQ(d,"setgrent"))
1084 if (strEQ(d,"seekdir"))
1086 if (strEQ(d,"setsockopt"))
1093 if (strEQ(d,"shift"))
1095 if (strEQ(d,"shmctl"))
1097 if (strEQ(d,"shmget"))
1099 if (strEQ(d,"shmread"))
1101 if (strEQ(d,"shmwrite"))
1103 if (strEQ(d,"shutdown"))
1114 if (strEQ(d,"sleep"))
1121 if (strEQ(d,"socket"))
1123 if (strEQ(d,"socketpair"))
1125 if (strEQ(d,"sort")) {
1126 checkcomma(s,"subroutine name");
1128 while (s < d && isascii(*s) && isspace(*s)) s++;
1129 if (*s == ';' || *s == ')') /* probably a close */
1130 fatal("sort is now a reserved word");
1131 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1132 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
1133 strncpy(tokenbuf,s,d-s);
1134 if (strNE(tokenbuf,"keys") &&
1135 strNE(tokenbuf,"values") &&
1136 strNE(tokenbuf,"split") &&
1137 strNE(tokenbuf,"grep") &&
1138 strNE(tokenbuf,"readdir") &&
1139 strNE(tokenbuf,"unpack") &&
1140 strNE(tokenbuf,"do") &&
1141 (d >= bufend || isspace(*d)) )
1142 *(--s) = '\\'; /* force next ident to WORD */
1148 if (strEQ(d,"split"))
1150 if (strEQ(d,"sprintf"))
1152 if (strEQ(d,"splice")) {
1153 yylval.ival = O_SPLICE;
1158 if (strEQ(d,"sqrt"))
1162 if (strEQ(d,"srand"))
1168 if (strEQ(d,"stat"))
1170 if (strEQ(d,"study")) {
1176 if (strEQ(d,"substr"))
1178 if (strEQ(d,"sub")) {
1179 subline = curcmd->c_line;
1181 while (s < d && isspace(*s))
1183 if (isalpha(*s) || *s == '_' || *s == '\'') {
1185 str_sset(subname,curstname);
1186 str_ncat(subname,"'",1);
1188 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1192 str_ncat(subname,s,d-s);
1194 *(--s) = '\\'; /* force next ident to WORD */
1197 str_set(subname,"?");
1206 if (strEQ(d,"system")) {
1210 if (strEQ(d,"symlink"))
1212 if (strEQ(d,"syscall"))
1214 if (strEQ(d,"sysread"))
1216 if (strEQ(d,"syswrite"))
1225 if (strEQ(d,"tr")) {
1230 RETURN(1); /* force error */
1232 if (strEQ(d,"tell"))
1234 if (strEQ(d,"telldir"))
1236 if (strEQ(d,"time"))
1238 if (strEQ(d,"times"))
1240 if (strEQ(d,"truncate"))
1245 if (strEQ(d,"using"))
1247 if (strEQ(d,"until")) {
1248 yylval.ival = curcmd->c_line;
1251 if (strEQ(d,"unless")) {
1252 yylval.ival = curcmd->c_line;
1255 if (strEQ(d,"unlink"))
1257 if (strEQ(d,"undef"))
1259 if (strEQ(d,"unpack"))
1261 if (strEQ(d,"utime"))
1263 if (strEQ(d,"umask"))
1265 if (strEQ(d,"unshift")) {
1266 yylval.ival = O_UNSHIFT;
1272 if (strEQ(d,"values"))
1274 if (strEQ(d,"vec")) {
1281 if (strEQ(d,"while")) {
1282 yylval.ival = curcmd->c_line;
1285 if (strEQ(d,"warn"))
1287 if (strEQ(d,"wait"))
1289 if (strEQ(d,"waitpid"))
1291 if (strEQ(d,"wantarray")) {
1292 yylval.arg = op_new(1);
1293 yylval.arg->arg_type = O_ITEM;
1294 yylval.arg[1].arg_type = A_WANTARRAY;
1297 if (strEQ(d,"write"))
1302 if (!expectterm && strEQ(d,"x"))
1322 yylval.cval = savestr(d);
1324 if (oldoldbufptr && oldoldbufptr < bufptr) {
1325 while (isspace(*oldoldbufptr))
1327 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1329 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1332 return (CLINE, bufptr = s, (int)WORD);
1344 while (s < bufend && isascii(*s) && isspace(*s))
1346 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1348 while (isalpha(*s) || isdigit(*s) || *s == '_')
1350 while (s < bufend && isspace(*s))
1355 "tell eof times getlogin wait length shift umask getppid \
1356 cos exp int log rand sin sqrt ord wantarray",
1361 fatal("No comma allowed after %s", what);
1367 scanident(s,send,dest)
1369 register char *send;
1383 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1386 while (d > dest+1 && d[-1] == '\'')
1392 if (*d == '{' /* } */ ) {
1395 while (s < send && brackets) {
1396 if (!reparse && (d == dest || (*s && isascii(*s) &&
1397 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1407 if (reparse && reparse == s - 1)
1421 if (*d == '^' && (isupper(*s) || index("[\\]^_?",*s)))
1427 scanconst(spat,string,len)
1432 register STR *retstr;
1436 char *origstring = string;
1437 static char *vert = "|";
1439 if (ninstr(string, string+len, vert, vert+1))
1443 retstr = Str_new(86,len);
1444 str_nset(retstr,string,len);
1445 t = str_get(retstr);
1447 retstr->str_u.str_useful = 100;
1448 for (d=t; d < e; ) {
1456 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1460 if (d[1] && index("wWbB0123456789sSdDlLuUE",d[1])) {
1464 (void)bcopy(d+1,d,e-d);
1489 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1501 retstr->str_cur = d - t;
1503 spat->spat_flags |= SPAT_ALL;
1504 if (*origstring != '^')
1505 spat->spat_flags |= SPAT_SCANFIRST;
1506 spat->spat_short = retstr;
1507 spat->spat_slen = d - t;
1515 register SPAT *spat;
1520 STR *str = Str_new(93,0);
1523 Newz(801,spat,1,SPAT);
1524 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1525 curstash->tbl_spatroot = spat;
1534 spat->spat_flags |= SPAT_ONCE;
1537 fatal("panic: scanpat");
1539 s = str_append_till(str,s,bufend,s[-1],patleave);
1542 yyerror("Search pattern not terminated");
1543 yylval.arg = Nullarg;
1547 while (*s == 'i' || *s == 'o' || *s == 'g') {
1551 spat->spat_flags |= SPAT_FOLD;
1555 spat->spat_flags |= SPAT_KEEP;
1559 spat->spat_flags |= SPAT_GLOBAL;
1563 e = str->str_ptr + len;
1568 for (; d < e; d++) {
1571 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1575 spat->spat_runtime = arg = op_new(1);
1576 arg->arg_type = O_ITEM;
1577 arg[1].arg_type = A_DOUBLE;
1578 arg[1].arg_ptr.arg_str = str_smake(str);
1579 d = scanident(d,bufend,buf);
1580 (void)stabent(buf,TRUE); /* make sure it's created */
1581 for (; d < e; d++) {
1584 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1585 d = scanident(d,bufend,buf);
1586 (void)stabent(buf,TRUE);
1588 else if (*d == '@') {
1589 d = scanident(d,bufend,buf);
1590 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1591 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1592 (void)stabent(buf,TRUE);
1595 goto got_pat; /* skip compiling for now */
1598 if (spat->spat_flags & SPAT_FOLD)
1602 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1604 scanconst(spat,str->str_ptr,len);
1605 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1606 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1607 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1608 spat->spat_flags & SPAT_FOLD);
1609 /* Note that this regexp can still be used if someone says
1610 * something like /a/ && s//b/; so we can't delete it.
1614 if (spat->spat_flags & SPAT_FOLD)
1618 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1620 if (spat->spat_short)
1621 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1622 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1623 spat->spat_flags & SPAT_FOLD);
1628 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1636 register SPAT *spat;
1640 STR *str = Str_new(93,0);
1642 Newz(802,spat,1,SPAT);
1643 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1644 curstash->tbl_spatroot = spat;
1646 s = str_append_till(str,s+1,bufend,*s,patleave);
1649 yyerror("Substitution pattern not terminated");
1650 yylval.arg = Nullarg;
1654 e = str->str_ptr + len;
1655 for (d = str->str_ptr; d < e; d++) {
1658 else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
1662 spat->spat_runtime = arg = op_new(1);
1663 arg->arg_type = O_ITEM;
1664 arg[1].arg_type = A_DOUBLE;
1665 arg[1].arg_ptr.arg_str = str_smake(str);
1666 d = scanident(d,bufend,buf);
1667 (void)stabent(buf,TRUE); /* make sure it's created */
1669 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1670 d = scanident(d,bufend,buf);
1671 (void)stabent(buf,TRUE);
1673 else if (*d == '@' && d[-1] != '\\') {
1674 d = scanident(d,bufend,buf);
1675 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1676 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1677 (void)stabent(buf,TRUE);
1680 goto get_repl; /* skip compiling for now */
1683 scanconst(spat,str->str_ptr,len);
1688 yyerror("Substitution replacement not terminated");
1689 yylval.arg = Nullarg;
1692 spat->spat_repl = yylval.arg;
1693 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1694 spat->spat_flags |= SPAT_CONST;
1695 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1699 spat->spat_flags |= SPAT_CONST;
1700 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1701 e = tmpstr->str_ptr + tmpstr->str_cur;
1702 for (t = tmpstr->str_ptr; t < e; t++) {
1703 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1704 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1705 spat->spat_flags &= ~SPAT_CONST;
1708 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1711 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1712 spat->spat_repl[1].arg_type = A_SINGLE;
1713 spat->spat_repl = make_op(O_EVAL,2,
1717 spat->spat_flags &= ~SPAT_CONST;
1721 spat->spat_flags |= SPAT_GLOBAL;
1726 spat->spat_flags |= SPAT_FOLD;
1727 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1728 str_free(spat->spat_short); /* anchored opt doesn't do */
1729 spat->spat_short = Nullstr; /* case insensitive match */
1730 spat->spat_slen = 0;
1735 spat->spat_flags |= SPAT_KEEP;
1738 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1739 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1740 if (!spat->spat_runtime) {
1741 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1742 spat->spat_flags & SPAT_FOLD);
1745 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1752 register SPAT *spat;
1754 if (!spat->spat_short && spat->spat_regexp->regstart &&
1755 (!spat->spat_regexp->regmust || spat->spat_regexp->reganch & ROPT_ANCH)
1757 if (!(spat->spat_regexp->reganch & ROPT_ANCH))
1758 spat->spat_flags |= SPAT_SCANFIRST;
1759 else if (spat->spat_flags & SPAT_FOLD)
1761 spat->spat_short = str_smake(spat->spat_regexp->regstart);
1763 else if (spat->spat_regexp->regmust) {/* is there a better short-circuit? */
1764 if (spat->spat_short &&
1765 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1767 if (spat->spat_flags & SPAT_SCANFIRST) {
1768 str_free(spat->spat_short);
1769 spat->spat_short = Nullstr;
1772 str_free(spat->spat_regexp->regmust);
1773 spat->spat_regexp->regmust = Nullstr;
1777 if (!spat->spat_short || /* promote the better string */
1778 ((spat->spat_flags & SPAT_SCANFIRST) &&
1779 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1780 str_free(spat->spat_short); /* ok if null */
1781 spat->spat_short = spat->spat_regexp->regmust;
1782 spat->spat_regexp->regmust = Nullstr;
1783 spat->spat_flags |= SPAT_SCANFIRST;
1789 expand_charset(s,len,retlen)
1795 register char *d = t;
1797 register char *send = s + len;
1799 while (s < send && d - t <= 256) {
1800 if (s[1] == '-' && s+2 < send) {
1801 for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
1810 return nsavestr(t,d-t);
1818 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1821 register short *tbl;
1829 New(803,tbl,256,short);
1830 arg[2].arg_type = A_NULL;
1831 arg[2].arg_ptr.arg_cval = (char*) tbl;
1834 yyerror("Translation pattern not terminated");
1835 yylval.arg = Nullarg;
1838 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1839 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1840 arg_free(yylval.arg);
1843 yyerror("Translation replacement not terminated");
1844 yylval.arg = Nullarg;
1847 complement = delete = squash = 0;
1848 while (*s == 'c' || *s == 'd' || *s == 's') {
1857 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1858 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1859 arg_free(yylval.arg);
1860 arg[2].arg_len = delete|squash;
1862 if (!rlen && !delete) {
1867 Zero(tbl, 256, short);
1868 for (i = 0; i < tlen; i++)
1869 tbl[t[i] & 0377] = -1;
1870 for (i = 0, j = 0; i < 256; i++) {
1884 for (i = 0; i < 256; i++)
1886 for (i = 0, j = 0; i < tlen; i++,j++) {
1889 if (tbl[t[i] & 0377] == -1)
1890 tbl[t[i] & 0377] = -2;
1895 if (tbl[t[i] & 0377] == -1)
1896 tbl[t[i] & 0377] = r[j] & 0377;
1912 register char *send;
1913 register bool makesingle = FALSE;
1914 register STAB *stab;
1915 bool alwaysdollar = FALSE;
1916 bool hereis = FALSE;
1919 char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
1924 arg->arg_type = O_ITEM;
1927 default: /* a substitution replacement */
1928 arg[1].arg_type = A_DOUBLE;
1929 makesingle = TRUE; /* maybe disable runtime scanning */
1939 arg[1].arg_type = A_SINGLE;
1944 else if (s[1] == '.')
1955 yyerror("Illegal octal digit");
1957 case '0': case '1': case '2': case '3': case '4':
1958 case '5': case '6': case '7':
1962 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1963 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1967 i += (*s++ & 7) + 9;
1972 str = Str_new(92,0);
1973 str_numset(str,(double)i);
1975 Safefree(str->str_ptr);
1976 str->str_ptr = Nullch;
1977 str->str_len = str->str_cur = 0;
1979 arg[1].arg_ptr.arg_str = str;
1982 case '1': case '2': case '3': case '4': case '5':
1983 case '6': case '7': case '8': case '9': case '.':
1985 arg[1].arg_type = A_SINGLE;
1987 while (isdigit(*s) || *s == '_') {
1993 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1995 while (isdigit(*s) || *s == '_') {
2002 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
2004 if (*s == '+' || *s == '-')
2010 str = Str_new(92,0);
2011 str_numset(str,atof(tokenbuf));
2013 Safefree(str->str_ptr);
2014 str->str_ptr = Nullch;
2015 str->str_len = str->str_cur = 0;
2017 arg[1].arg_ptr.arg_str = str;
2025 if (*++s && index("`'\"",*s)) {
2027 s = cpytill(d,s,bufend,term,&len);
2037 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
2039 } /* assuming tokenbuf won't clobber */
2044 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
2045 herewas = str_make(s,bufend-s);
2047 s--, herewas = str_make(s,d-s);
2048 s += herewas->str_cur;
2056 s = cpytill(d,s,bufend,'>',&len);
2061 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
2063 if (d - tokenbuf != len) {
2065 arg[1].arg_type = A_GLOB;
2066 d = nsavestr(d,len);
2067 arg[1].arg_ptr.arg_stab = stab = genstab();
2068 stab_io(stab) = stio_new();
2069 stab_val(stab) = str_make(d,len);
2076 (void)strcpy(d,"ARGV");
2078 arg[1].arg_type = A_INDREAD;
2079 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
2082 arg[1].arg_type = A_READ;
2083 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
2084 if (!stab_io(arg[1].arg_ptr.arg_stab))
2085 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
2086 if (strEQ(d,"ARGV")) {
2087 (void)aadd(arg[1].arg_ptr.arg_stab);
2088 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
2109 arg[1].arg_type = A_SINGLE;
2116 arg[1].arg_type = A_DOUBLE;
2117 makesingle = TRUE; /* maybe disable runtime scanning */
2118 alwaysdollar = TRUE; /* treat $) and $| as variables */
2123 arg[1].arg_type = A_BACKTICK;
2125 alwaysdollar = TRUE; /* treat $) and $| as variables */
2132 multi_start = curcmd->c_line;
2134 multi_open = multi_close = '<';
2137 if (term && (tmps = index("([{< )]}> )]}>",term)))
2141 tmpstr = Str_new(87,80);
2146 while (s < bufend &&
2147 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2152 curcmd->c_line = multi_start;
2153 fatal("EOF in string");
2155 str_nset(tmpstr,d+1,s-d);
2157 str_ncat(herewas,s,bufend-s);
2158 str_replace(linestr,herewas);
2159 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2160 bufend = linestr->str_ptr + linestr->str_cur;
2164 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2167 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2168 while (s >= bufend) { /* multiple line string? */
2170 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2171 curcmd->c_line = multi_start;
2172 fatal("EOF in string");
2176 STR *str = Str_new(88,0);
2178 str_sset(str,linestr);
2179 astore(stab_xarray(curcmd->c_filestab),
2180 (int)curcmd->c_line,str);
2182 bufend = linestr->str_ptr + linestr->str_cur;
2184 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2187 str_scat(linestr,herewas);
2188 bufend = linestr->str_ptr + linestr->str_cur;
2192 str_scat(tmpstr,linestr);
2196 s = str_append_till(tmpstr,s,bufend,term,leave);
2198 multi_end = curcmd->c_line;
2200 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2201 tmpstr->str_len = tmpstr->str_cur + 1;
2202 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2204 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2205 arg[1].arg_ptr.arg_str = tmpstr;
2209 s = tmpstr->str_ptr;
2210 send = s + tmpstr->str_cur;
2211 while (s < send) { /* see if we can make SINGLE */
2212 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
2213 !alwaysdollar && s[1] != '0')
2214 *s = '$'; /* grandfather \digit in subst */
2215 if ((*s == '$' || *s == '@') && s+1 < send &&
2216 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2217 makesingle = FALSE; /* force interpretation */
2219 else if (*s == '\\' && s+1 < send) {
2220 if (index("lLuUE",s[1]))
2226 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2228 if ((*s == '$' && s+1 < send &&
2229 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2230 (*s == '@' && s+1 < send) ) {
2231 len = scanident(s,send,tokenbuf) - s;
2232 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2233 || strEQ(tokenbuf,"ENV")
2234 || strEQ(tokenbuf,"SIG")
2235 || strEQ(tokenbuf,"INC") )
2236 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2241 else if (*s == '\\' && s+1 < send) {
2245 if (!makesingle && (!leave || (*s && index(leave,*s))))
2249 case '0': case '1': case '2': case '3':
2250 case '4': case '5': case '6': case '7':
2251 *d++ = scanoct(s, 3, &len);
2255 *d++ = scanhex(++s, 2, &len);
2294 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2295 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2297 tmpstr->str_cur = d - tmpstr->str_ptr;
2298 arg[1].arg_ptr.arg_str = tmpstr;
2314 register FCMD *fprev = &froot;
2315 register FCMD *fcmd;
2322 Zero(&froot, 1, FCMD);
2324 while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
2326 if (in_eval && !rsfp) {
2327 eol = index(s,'\n');
2332 eol = bufend = linestr->str_ptr + linestr->str_cur;
2334 STR *tmpstr = Str_new(89,0);
2336 str_nset(tmpstr, s, eol-s);
2337 astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
2340 for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
2343 return froot.f_next;
2350 flinebeg = Nullfcmd;
2354 Newz(804,fcmd,1,FCMD);
2355 fprev->f_next = fcmd;
2357 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2367 fcmd->f_pre = nsavestr(s, t-s);
2368 fcmd->f_presize = t-s;
2372 fcmd->f_flags |= FC_NOBLANK;
2374 fcmd->f_flags |= FC_REPEAT;
2378 flinebeg = fcmd; /* start values here */
2380 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2383 fcmd->f_type = F_LINES;
2387 fcmd->f_type = F_LEFT;
2392 fcmd->f_type = F_RIGHT;
2397 fcmd->f_type = F_CENTER;
2403 /* Catch the special case @... and handle it as a string
2405 if (*s == '.' && s[1] == '.') {
2406 goto default_format;
2408 fcmd->f_type = F_DECIMAL;
2412 /* Read a format in the form @####.####, where either group
2413 of ### may be empty, or the final .### may be missing. */
2421 fcmd->f_decimals = s-p;
2422 fcmd->f_flags |= FC_DP;
2424 fcmd->f_decimals = 0;
2430 fcmd->f_type = F_LEFT;
2433 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2434 fcmd->f_flags |= FC_MORE;
2443 (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
2446 if (in_eval && !rsfp) {
2447 eol = index(s,'\n');
2452 eol = bufend = linestr->str_ptr + linestr->str_cur;
2454 STR *tmpstr = Str_new(90,0);
2456 str_nset(tmpstr, s, eol-s);
2457 astore(stab_xarray(curcmd->c_filestab),
2458 (int)curcmd->c_line,tmpstr);
2460 if (strnEQ(s,".\n",2)) {
2462 yyerror("Missing values line");
2463 return froot.f_next;
2469 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2470 str->str_u.str_hash = curstash;
2471 str_nset(str,"(",1);
2472 flinebeg->f_line = curcmd->c_line;
2474 if (!flinebeg->f_next->f_type || index(s, ',')) {
2476 str_ncat(str, s, eol - s - 1);
2477 str_ncat(str,",$$);",5);
2482 while (s < eol && isspace(*s))
2487 case ' ': case '\t': case '\n': case ';':
2488 str_ncat(str, t, s - t);
2489 str_ncat(str, "," ,1);
2490 while (s < eol && (isspace(*s) || *s == ';'))
2495 str_ncat(str, t, s - t);
2497 s = scanident(s,eol,tokenbuf);
2498 str_ncat(str, t, s - t);
2500 if (s < eol && *s && index("$'\"",*s))
2501 str_ncat(str, ",", 1);
2503 case '"': case '\'':
2504 str_ncat(str, t, s - t);
2507 while (s < eol && (*s != *t || s[-1] == '\\'))
2511 str_ncat(str, t, s - t);
2513 if (s < eol && *s && index("$'\"",*s))
2514 str_ncat(str, ",", 1);
2517 yyerror("Please use commas to separate fields");
2520 str_ncat(str,"$$);",4);
2525 bufptr = str_get(linestr);
2526 yyerror("Format not terminated");
2527 return froot.f_next;
2534 cshlen = strlen(cshname);