1 /* $RCSfile: toke.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 12:05:56 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
9 * Revision 4.0.1.2 91/06/07 12:05:56 lwall
10 * patch4: new copyright notice
11 * patch4: debugger lost track of lines in eval
12 * patch4: //o and s///o now optimize themselves fully at runtime
13 * patch4: added global modifier for pattern matches
15 * Revision 4.0.1.1 91/04/12 09:18:18 lwall
16 * patch1: perl -de "print" wouldn't stop at the first statement
18 * Revision 4.0 91/03/20 01:42:14 lwall
38 /* which backslash sequences to keep in m// or s// */
40 static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtf0123456789[{]}";
42 char *reparse; /* if non-null, scanident found ${foo[$bar]} */
49 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
51 #define META(c) ((c) | 128)
53 #define RETURN(retval) return (bufptr = s,(int)retval)
54 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
55 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
56 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
57 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
58 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
59 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
60 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
61 #define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
62 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
63 #define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
64 #define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
65 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
66 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
67 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
68 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
69 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
70 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
71 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
72 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
73 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
74 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
75 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
76 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
77 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
78 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
79 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
81 /* This bit of chicanery makes a unary function followed by
82 * a parenthesis into a function with one argument, highest precedence.
84 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
85 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
87 /* This does similarly for list operators, merely by pretending that the
88 * paren came before the listop rather than after.
90 #define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
91 (*s = META('('), bufptr = oldbufptr, '(') : \
92 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
93 /* grandfather return to old style */
94 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
100 while (s < bufend && isascii(*s) && isspace(*s))
109 #define UNI(f) return uni(f,s)
110 #define LOP(f) return lop(f,s)
150 #endif /* CRIPPLED_CC */
154 register char *s = bufptr;
157 static bool in_format = FALSE;
158 static bool firstline = TRUE;
159 extern int yychar; /* last token */
161 oldoldbufptr = oldbufptr;
168 fprintf(stderr,"Tokener at %s",s);
170 fprintf(stderr,"Tokener at %s\n",s);
174 if ((*s & 127) == '(')
177 warn("Unrecognized character \\%03o ignored", *s++ & 255);
183 if ((*s & 127) == '(')
186 warn("Unrecognized character \\%03o ignored", *s++ & 255);
190 goto fake_eof; /* emulate EOF on ^D or ^Z */
195 goto retry; /* ignore stray nulls */
198 if (minus_n || minus_p || perldb) {
202 char *pdb = getenv("PERLDB");
204 str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
205 str_cat(linestr, ";");
207 if (minus_n || minus_p) {
208 str_cat(linestr,"line: while (<>) {");
210 str_cat(linestr,"chop;");
212 str_cat(linestr,"@F=split(' ');");
214 oldoldbufptr = oldbufptr = s = str_get(linestr);
215 bufend = linestr->str_ptr + linestr->str_cur;
221 yylval.formval = load_format();
223 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
224 bufend = linestr->str_ptr + linestr->str_cur;
230 #endif /* CRYPTSCRIPT */
232 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
236 (void)mypclose(rsfp);
237 else if (rsfp == stdin)
243 if (minus_n || minus_p) {
244 str_set(linestr,minus_p ? ";}continue{print" : "");
245 str_cat(linestr,";}");
246 oldoldbufptr = oldbufptr = s = str_get(linestr);
247 bufend = linestr->str_ptr + linestr->str_cur;
248 minus_n = minus_p = 0;
251 oldoldbufptr = oldbufptr = s = str_get(linestr);
253 RETURN(';'); /* not infinite loop because rsfp is NULL now */
255 if (doextract && *linestr->str_ptr == '#')
258 oldoldbufptr = oldbufptr = bufptr = s;
260 STR *str = Str_new(85,0);
262 str_sset(str,linestr);
263 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
271 bufend = linestr->str_ptr + linestr->str_cur;
272 if (curcmd->c_line == 1) {
273 if (*s == '#' && s[1] == '!') {
274 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
282 while (s < bufend && !isspace(*s))
285 while (s < bufend && isspace(*s))
288 Newz(899,newargv,origargc+3,char*);
290 while (s < bufend && !isspace(*s))
293 Copy(origargv+1, newargv+2, origargc+1, char*);
299 fatal("Can't exec %s", cmd);
303 while (s < bufend && isspace(*s))
305 if (*s == ':') /* for csh's that have to exec sh scripts */
310 case ' ': case '\t': case '\f': case '\r': case 013:
314 if (preprocess && s == str_get(linestr) &&
315 s[1] == ' ' && isdigit(s[2])) {
316 curcmd->c_line = atoi(s+2)-1;
317 for (s += 2; isdigit(*s); s++) ;
319 while (s < d && isspace(*s)) s++;
320 s[strlen(s)-1] = '\0'; /* wipe out newline */
323 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
326 curcmd->c_filestab = fstab(s);
328 curcmd->c_filestab = fstab(origfilename);
329 oldoldbufptr = oldbufptr = s = str_get(linestr);
333 if (in_eval && !rsfp) {
335 while (s < d && *s != '\n')
341 yylval.formval = load_format();
343 oldoldbufptr = oldbufptr = s = bufptr + 1;
354 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
357 case 'r': FTST(O_FTEREAD);
358 case 'w': FTST(O_FTEWRITE);
359 case 'x': FTST(O_FTEEXEC);
360 case 'o': FTST(O_FTEOWNED);
361 case 'R': FTST(O_FTRREAD);
362 case 'W': FTST(O_FTRWRITE);
363 case 'X': FTST(O_FTREXEC);
364 case 'O': FTST(O_FTROWNED);
365 case 'e': FTST(O_FTIS);
366 case 'z': FTST(O_FTZERO);
367 case 's': FTST(O_FTSIZE);
368 case 'f': FTST(O_FTFILE);
369 case 'd': FTST(O_FTDIR);
370 case 'l': FTST(O_FTLINK);
371 case 'p': FTST(O_FTPIPE);
372 case 'S': FTST(O_FTSOCK);
373 case 'u': FTST(O_FTSUID);
374 case 'g': FTST(O_FTSGID);
375 case 'k': FTST(O_FTSVTX);
376 case 'b': FTST(O_FTBLK);
377 case 'c': FTST(O_FTCHR);
378 case 't': FTST(O_FTTTY);
379 case 'T': FTST(O_FTTEXT);
380 case 'B': FTST(O_FTBINARY);
381 case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
382 case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
383 case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
411 s = scanident(s,bufend,tokenbuf);
412 yylval.stabval = stabent(tokenbuf,TRUE);
423 s = scanident(s,bufend,tokenbuf);
424 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
440 if (isspace(*s) || *s == '#')
441 cmdline = NOLINE; /* invalidate current command line number */
444 if (curcmd->c_line < cmdline)
445 cmdline = curcmd->c_line;
463 while (s < d && isspace(*s))
465 if (isalpha(*s) || *s == '_' || *s == '\'')
466 *(--s) = '\\'; /* force next ident to WORD */
525 while (isascii(*s) && \
526 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
528 while (d[-1] == '\'') \
534 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
536 s = scanident(s,bufend,tokenbuf);
537 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
541 s = scanident(s,bufend,tokenbuf);
542 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
550 yylval.stabval = stabent(tokenbuf,TRUE);
555 s = scanident(s,bufend,tokenbuf);
558 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
561 case '/': /* may either be division or pattern */
562 case '?': /* may either be conditional or pattern */
573 if (!expectterm || !isdigit(s[1])) {
582 case '0': case '1': case '2': case '3': case '4':
583 case '5': case '6': case '7': case '8': case '9':
584 case '\'': case '"': case '`':
588 case '\\': /* some magic to force next word to be a WORD */
589 s++; /* used by do and sub to force a separate namespace */
594 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
595 ARG *arg = op_new(1);
598 arg->arg_type = O_ITEM;
600 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
602 strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
603 arg[1].arg_type = A_SINGLE;
604 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
607 else if (strEQ(d,"__END__")) {
612 if (stab = stabent("DATA",FALSE)) {
613 stab->str_pok |= SP_MULTI;
614 stab_io(stab) = stio_new();
615 stab_io(stab)->ifp = rsfp;
616 #if defined(HAS_FCNTL) && defined(F_SETFD)
618 fcntl(fd,F_SETFD,fd >= 3);
621 stab_io(stab)->type = '|';
622 else if (rsfp == stdin)
623 stab_io(stab)->type = '-';
625 stab_io(stab)->type = '<';
635 if (strEQ(d,"alarm"))
637 if (strEQ(d,"accept"))
639 if (strEQ(d,"atan2"))
646 if (strEQ(d,"binmode"))
653 if (strEQ(d,"continue"))
655 if (strEQ(d,"chdir")) {
656 (void)stabent("ENV",TRUE); /* may use HOME */
659 if (strEQ(d,"close"))
661 if (strEQ(d,"closedir"))
665 if (strEQ(d,"caller"))
667 if (strEQ(d,"crypt")) {
673 if (strEQ(d,"chmod"))
675 if (strEQ(d,"chown"))
677 if (strEQ(d,"connect"))
681 if (strEQ(d,"chroot"))
688 while (s < d && isspace(*s))
690 if (isalpha(*s) || *s == '_')
691 *(--s) = '\\'; /* force next ident to WORD */
696 if (strEQ(d,"defined"))
698 if (strEQ(d,"delete"))
700 if (strEQ(d,"dbmopen"))
702 if (strEQ(d,"dbmclose"))
711 if (strEQ(d,"elsif")) {
712 yylval.ival = curcmd->c_line;
715 if (strEQ(d,"eq") || strEQ(d,"EQ"))
719 if (strEQ(d,"eval")) {
720 allstabs = TRUE; /* must initialize everything since */
721 UNI(O_EVAL); /* we don't know what will be used */
729 if (strEQ(d,"exec")) {
733 if (strEQ(d,"endhostent"))
735 if (strEQ(d,"endnetent"))
737 if (strEQ(d,"endservent"))
739 if (strEQ(d,"endprotoent"))
741 if (strEQ(d,"endpwent"))
743 if (strEQ(d,"endgrent"))
748 if (strEQ(d,"for") || strEQ(d,"foreach")) {
749 yylval.ival = curcmd->c_line;
752 if (strEQ(d,"format")) {
754 while (s < d && isspace(*s))
756 if (isalpha(*s) || *s == '_')
757 *(--s) = '\\'; /* force next ident to WORD */
759 allstabs = TRUE; /* must initialize everything since */
760 OPERATOR(FORMAT); /* we don't know what will be used */
764 if (strEQ(d,"fcntl"))
766 if (strEQ(d,"fileno"))
768 if (strEQ(d,"flock"))
773 if (strEQ(d,"gt") || strEQ(d,"GT"))
775 if (strEQ(d,"ge") || strEQ(d,"GE"))
781 if (strEQ(d,"gmtime"))
785 if (strnEQ(d,"get",3)) {
792 if (strEQ(d,"priority"))
794 if (strEQ(d,"protobyname"))
796 if (strEQ(d,"protobynumber"))
798 if (strEQ(d,"protoent"))
800 if (strEQ(d,"pwent"))
802 if (strEQ(d,"pwnam"))
804 if (strEQ(d,"pwuid"))
806 if (strEQ(d,"peername"))
809 else if (*d == 'h') {
810 if (strEQ(d,"hostbyname"))
812 if (strEQ(d,"hostbyaddr"))
814 if (strEQ(d,"hostent"))
817 else if (*d == 'n') {
818 if (strEQ(d,"netbyname"))
820 if (strEQ(d,"netbyaddr"))
822 if (strEQ(d,"netent"))
825 else if (*d == 's') {
826 if (strEQ(d,"servbyname"))
828 if (strEQ(d,"servbyport"))
830 if (strEQ(d,"servent"))
832 if (strEQ(d,"sockname"))
834 if (strEQ(d,"sockopt"))
837 else if (*d == 'g') {
838 if (strEQ(d,"grent"))
840 if (strEQ(d,"grnam"))
842 if (strEQ(d,"grgid"))
845 else if (*d == 'l') {
846 if (strEQ(d,"login"))
860 yylval.ival = curcmd->c_line;
863 if (strEQ(d,"index"))
867 if (strEQ(d,"ioctl"))
886 if (strEQ(d,"local"))
888 if (strEQ(d,"length"))
890 if (strEQ(d,"lt") || strEQ(d,"LT"))
892 if (strEQ(d,"le") || strEQ(d,"LE"))
894 if (strEQ(d,"localtime"))
900 if (strEQ(d,"listen"))
902 if (strEQ(d,"lstat"))
918 RETURN(1); /* force error */
922 if (strEQ(d,"mkdir"))
926 if (strEQ(d,"msgctl"))
928 if (strEQ(d,"msgget"))
930 if (strEQ(d,"msgrcv"))
932 if (strEQ(d,"msgsnd"))
941 if (strEQ(d,"ne") || strEQ(d,"NE"))
952 if (strEQ(d,"opendir"))
957 if (strEQ(d,"print")) {
958 checkcomma(s,"filehandle");
961 if (strEQ(d,"printf")) {
962 checkcomma(s,"filehandle");
965 if (strEQ(d,"push")) {
966 yylval.ival = O_PUSH;
973 if (strEQ(d,"package"))
995 if (strEQ(d,"return"))
997 if (strEQ(d,"require")) {
998 allstabs = TRUE; /* must initialize everything since */
999 UNI(O_REQUIRE); /* we don't know what will be used */
1001 if (strEQ(d,"reset"))
1003 if (strEQ(d,"redo"))
1005 if (strEQ(d,"rename"))
1007 if (strEQ(d,"rand"))
1009 if (strEQ(d,"rmdir"))
1011 if (strEQ(d,"rindex"))
1013 if (strEQ(d,"read"))
1015 if (strEQ(d,"readdir"))
1017 if (strEQ(d,"rewinddir"))
1019 if (strEQ(d,"recv"))
1021 if (strEQ(d,"reverse"))
1023 if (strEQ(d,"readlink"))
1039 RETURN(1); /* force error */
1046 if (strEQ(d,"scalar"))
1052 if (strEQ(d,"select"))
1054 if (strEQ(d,"seek"))
1056 if (strEQ(d,"semctl"))
1058 if (strEQ(d,"semget"))
1060 if (strEQ(d,"semop"))
1062 if (strEQ(d,"send"))
1064 if (strEQ(d,"setpgrp"))
1066 if (strEQ(d,"setpriority"))
1067 FUN3(O_SETPRIORITY);
1068 if (strEQ(d,"sethostent"))
1070 if (strEQ(d,"setnetent"))
1072 if (strEQ(d,"setservent"))
1074 if (strEQ(d,"setprotoent"))
1076 if (strEQ(d,"setpwent"))
1078 if (strEQ(d,"setgrent"))
1080 if (strEQ(d,"seekdir"))
1082 if (strEQ(d,"setsockopt"))
1089 if (strEQ(d,"shift"))
1091 if (strEQ(d,"shmctl"))
1093 if (strEQ(d,"shmget"))
1095 if (strEQ(d,"shmread"))
1097 if (strEQ(d,"shmwrite"))
1099 if (strEQ(d,"shutdown"))
1110 if (strEQ(d,"sleep"))
1117 if (strEQ(d,"socket"))
1119 if (strEQ(d,"socketpair"))
1121 if (strEQ(d,"sort")) {
1122 checkcomma(s,"subroutine name");
1124 while (s < d && isascii(*s) && isspace(*s)) s++;
1125 if (*s == ';' || *s == ')') /* probably a close */
1126 fatal("sort is now a reserved word");
1127 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1128 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
1129 strncpy(tokenbuf,s,d-s);
1130 if (strNE(tokenbuf,"keys") &&
1131 strNE(tokenbuf,"values") &&
1132 strNE(tokenbuf,"split") &&
1133 strNE(tokenbuf,"grep") &&
1134 strNE(tokenbuf,"readdir") &&
1135 strNE(tokenbuf,"unpack") &&
1136 strNE(tokenbuf,"do") &&
1137 (d >= bufend || isspace(*d)) )
1138 *(--s) = '\\'; /* force next ident to WORD */
1144 if (strEQ(d,"split"))
1146 if (strEQ(d,"sprintf"))
1148 if (strEQ(d,"splice")) {
1149 yylval.ival = O_SPLICE;
1154 if (strEQ(d,"sqrt"))
1158 if (strEQ(d,"srand"))
1164 if (strEQ(d,"stat"))
1166 if (strEQ(d,"study")) {
1172 if (strEQ(d,"substr"))
1174 if (strEQ(d,"sub")) {
1175 subline = curcmd->c_line;
1177 while (s < d && isspace(*s))
1179 if (isalpha(*s) || *s == '_' || *s == '\'') {
1181 str_sset(subname,curstname);
1182 str_ncat(subname,"'",1);
1184 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1188 str_ncat(subname,s,d-s);
1190 *(--s) = '\\'; /* force next ident to WORD */
1193 str_set(subname,"?");
1202 if (strEQ(d,"system")) {
1206 if (strEQ(d,"symlink"))
1208 if (strEQ(d,"syscall"))
1210 if (strEQ(d,"sysread"))
1212 if (strEQ(d,"syswrite"))
1221 if (strEQ(d,"tr")) {
1226 RETURN(1); /* force error */
1228 if (strEQ(d,"tell"))
1230 if (strEQ(d,"telldir"))
1232 if (strEQ(d,"time"))
1234 if (strEQ(d,"times"))
1236 if (strEQ(d,"truncate"))
1241 if (strEQ(d,"using"))
1243 if (strEQ(d,"until")) {
1244 yylval.ival = curcmd->c_line;
1247 if (strEQ(d,"unless")) {
1248 yylval.ival = curcmd->c_line;
1251 if (strEQ(d,"unlink"))
1253 if (strEQ(d,"undef"))
1255 if (strEQ(d,"unpack"))
1257 if (strEQ(d,"utime"))
1259 if (strEQ(d,"umask"))
1261 if (strEQ(d,"unshift")) {
1262 yylval.ival = O_UNSHIFT;
1268 if (strEQ(d,"values"))
1270 if (strEQ(d,"vec")) {
1277 if (strEQ(d,"while")) {
1278 yylval.ival = curcmd->c_line;
1281 if (strEQ(d,"warn"))
1283 if (strEQ(d,"wait"))
1285 if (strEQ(d,"waitpid"))
1287 if (strEQ(d,"wantarray")) {
1288 yylval.arg = op_new(1);
1289 yylval.arg->arg_type = O_ITEM;
1290 yylval.arg[1].arg_type = A_WANTARRAY;
1293 if (strEQ(d,"write"))
1298 if (!expectterm && strEQ(d,"x"))
1318 yylval.cval = savestr(d);
1320 if (oldoldbufptr && oldoldbufptr < bufptr) {
1321 while (isspace(*oldoldbufptr))
1323 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1325 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1328 return (CLINE, bufptr = s, (int)WORD);
1340 while (s < bufend && isascii(*s) && isspace(*s))
1342 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1344 while (isalpha(*s) || isdigit(*s) || *s == '_')
1346 while (s < bufend && isspace(*s))
1351 "tell eof times getlogin wait length shift umask getppid \
1352 cos exp int log rand sin sqrt ord wantarray",
1357 fatal("No comma allowed after %s", what);
1363 scanident(s,send,dest)
1365 register char *send;
1379 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1382 while (d > dest+1 && d[-1] == '\'')
1388 if (*d == '{' /* } */ ) {
1391 while (s < send && brackets) {
1392 if (!reparse && (d == dest || (*s && isascii(*s) &&
1393 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1403 if (reparse && reparse == s - 1)
1417 if (*d == '^' && (isupper(*s) || index("[\\]^_?",*s)))
1423 scanconst(spat,string,len)
1428 register STR *retstr;
1432 char *origstring = string;
1433 static char *vert = "|";
1435 if (ninstr(string, string+len, vert, vert+1))
1439 retstr = Str_new(86,len);
1440 str_nset(retstr,string,len);
1441 t = str_get(retstr);
1443 retstr->str_u.str_useful = 100;
1444 for (d=t; d < e; ) {
1452 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1456 if (d[1] && index("wWbB0123456789sSdDlLuUE",d[1])) {
1460 (void)bcopy(d+1,d,e-d);
1485 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1497 retstr->str_cur = d - t;
1499 spat->spat_flags |= SPAT_ALL;
1500 if (*origstring != '^')
1501 spat->spat_flags |= SPAT_SCANFIRST;
1502 spat->spat_short = retstr;
1503 spat->spat_slen = d - t;
1511 register SPAT *spat;
1516 STR *str = Str_new(93,0);
1518 Newz(801,spat,1,SPAT);
1519 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1520 curstash->tbl_spatroot = spat;
1529 spat->spat_flags |= SPAT_ONCE;
1532 fatal("panic: scanpat");
1534 s = str_append_till(str,s,bufend,s[-1],patleave);
1537 yyerror("Search pattern not terminated");
1538 yylval.arg = Nullarg;
1542 while (*s == 'i' || *s == 'o' || *s == 'g') {
1546 spat->spat_flags |= SPAT_FOLD;
1550 spat->spat_flags |= SPAT_KEEP;
1554 spat->spat_flags |= SPAT_GLOBAL;
1558 e = str->str_ptr + len;
1559 for (d = str->str_ptr; d < e; d++) {
1562 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1566 spat->spat_runtime = arg = op_new(1);
1567 arg->arg_type = O_ITEM;
1568 arg[1].arg_type = A_DOUBLE;
1569 arg[1].arg_ptr.arg_str = str_smake(str);
1570 d = scanident(d,bufend,buf);
1571 (void)stabent(buf,TRUE); /* make sure it's created */
1572 for (; d < e; d++) {
1575 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1576 d = scanident(d,bufend,buf);
1577 (void)stabent(buf,TRUE);
1579 else if (*d == '@') {
1580 d = scanident(d,bufend,buf);
1581 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1582 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1583 (void)stabent(buf,TRUE);
1586 goto got_pat; /* skip compiling for now */
1589 if (spat->spat_flags & SPAT_FOLD)
1593 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1595 scanconst(spat,str->str_ptr,len);
1596 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1597 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1598 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1599 spat->spat_flags & SPAT_FOLD);
1600 /* Note that this regexp can still be used if someone says
1601 * something like /a/ && s//b/; so we can't delete it.
1605 if (spat->spat_flags & SPAT_FOLD)
1609 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1611 if (spat->spat_short)
1612 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1613 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1614 spat->spat_flags & SPAT_FOLD);
1619 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1627 register SPAT *spat;
1631 STR *str = Str_new(93,0);
1633 Newz(802,spat,1,SPAT);
1634 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1635 curstash->tbl_spatroot = spat;
1637 s = str_append_till(str,s+1,bufend,*s,patleave);
1640 yyerror("Substitution pattern not terminated");
1641 yylval.arg = Nullarg;
1645 e = str->str_ptr + len;
1646 for (d = str->str_ptr; d < e; d++) {
1649 else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
1653 spat->spat_runtime = arg = op_new(1);
1654 arg->arg_type = O_ITEM;
1655 arg[1].arg_type = A_DOUBLE;
1656 arg[1].arg_ptr.arg_str = str_smake(str);
1657 d = scanident(d,bufend,buf);
1658 (void)stabent(buf,TRUE); /* make sure it's created */
1660 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1661 d = scanident(d,bufend,buf);
1662 (void)stabent(buf,TRUE);
1664 else if (*d == '@' && d[-1] != '\\') {
1665 d = scanident(d,bufend,buf);
1666 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1667 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1668 (void)stabent(buf,TRUE);
1671 goto get_repl; /* skip compiling for now */
1674 scanconst(spat,str->str_ptr,len);
1679 yyerror("Substitution replacement not terminated");
1680 yylval.arg = Nullarg;
1683 spat->spat_repl = yylval.arg;
1684 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1685 spat->spat_flags |= SPAT_CONST;
1686 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1690 spat->spat_flags |= SPAT_CONST;
1691 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1692 e = tmpstr->str_ptr + tmpstr->str_cur;
1693 for (t = tmpstr->str_ptr; t < e; t++) {
1694 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1695 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1696 spat->spat_flags &= ~SPAT_CONST;
1699 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1702 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1703 spat->spat_repl[1].arg_type = A_SINGLE;
1704 spat->spat_repl = make_op(O_EVAL,2,
1708 spat->spat_flags &= ~SPAT_CONST;
1712 spat->spat_flags |= SPAT_GLOBAL;
1717 spat->spat_flags |= SPAT_FOLD;
1718 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1719 str_free(spat->spat_short); /* anchored opt doesn't do */
1720 spat->spat_short = Nullstr; /* case insensitive match */
1721 spat->spat_slen = 0;
1726 spat->spat_flags |= SPAT_KEEP;
1729 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1730 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1731 if (!spat->spat_runtime) {
1732 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1733 spat->spat_flags & SPAT_FOLD);
1736 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1742 register SPAT *spat;
1744 if (!spat->spat_short && spat->spat_regexp->regstart &&
1745 (!spat->spat_regexp->regmust || spat->spat_regexp->reganch & ROPT_ANCH)
1747 spat->spat_short = spat->spat_regexp->regstart;
1748 if (!(spat->spat_regexp->reganch & ROPT_ANCH))
1749 spat->spat_flags |= SPAT_SCANFIRST;
1751 else if (spat->spat_regexp->regmust) {/* is there a better short-circuit? */
1752 if (spat->spat_short &&
1753 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1755 if (spat->spat_flags & SPAT_SCANFIRST) {
1756 str_free(spat->spat_short);
1757 spat->spat_short = Nullstr;
1760 str_free(spat->spat_regexp->regmust);
1761 spat->spat_regexp->regmust = Nullstr;
1765 if (!spat->spat_short || /* promote the better string */
1766 ((spat->spat_flags & SPAT_SCANFIRST) &&
1767 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1768 str_free(spat->spat_short); /* ok if null */
1769 spat->spat_short = spat->spat_regexp->regmust;
1770 spat->spat_regexp->regmust = Nullstr;
1771 spat->spat_flags |= SPAT_SCANFIRST;
1777 expand_charset(s,len,retlen)
1783 register char *d = t;
1785 register char *send = s + len;
1787 while (s < send && d - t <= 256) {
1788 if (s[1] == '-' && s+2 < send) {
1789 for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
1798 return nsavestr(t,d-t);
1806 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1809 register short *tbl;
1817 New(803,tbl,256,short);
1818 arg[2].arg_type = A_NULL;
1819 arg[2].arg_ptr.arg_cval = (char*) tbl;
1822 yyerror("Translation pattern not terminated");
1823 yylval.arg = Nullarg;
1826 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1827 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1828 arg_free(yylval.arg);
1831 yyerror("Translation replacement not terminated");
1832 yylval.arg = Nullarg;
1835 complement = delete = squash = 0;
1836 while (*s == 'c' || *s == 'd' || *s == 's') {
1845 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1846 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1847 arg_free(yylval.arg);
1848 arg[2].arg_len = delete|squash;
1850 if (!rlen && !delete) {
1855 Zero(tbl, 256, short);
1856 for (i = 0; i < tlen; i++)
1857 tbl[t[i] & 0377] = -1;
1858 for (i = 0, j = 0; i < 256; i++) {
1872 for (i = 0; i < 256; i++)
1874 for (i = 0, j = 0; i < tlen; i++,j++) {
1877 if (tbl[t[i] & 0377] == -1)
1878 tbl[t[i] & 0377] = -2;
1883 if (tbl[t[i] & 0377] == -1)
1884 tbl[t[i] & 0377] = r[j] & 0377;
1900 register char *send;
1901 register bool makesingle = FALSE;
1902 register STAB *stab;
1903 bool alwaysdollar = FALSE;
1904 bool hereis = FALSE;
1907 char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
1912 arg->arg_type = O_ITEM;
1915 default: /* a substitution replacement */
1916 arg[1].arg_type = A_DOUBLE;
1917 makesingle = TRUE; /* maybe disable runtime scanning */
1927 arg[1].arg_type = A_SINGLE;
1932 else if (s[1] == '.')
1943 yyerror("Illegal octal digit");
1945 case '0': case '1': case '2': case '3': case '4':
1946 case '5': case '6': case '7':
1950 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1951 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1955 i += (*s++ & 7) + 9;
1960 str = Str_new(92,0);
1961 str_numset(str,(double)i);
1963 Safefree(str->str_ptr);
1964 str->str_ptr = Nullch;
1965 str->str_len = str->str_cur = 0;
1967 arg[1].arg_ptr.arg_str = str;
1970 case '1': case '2': case '3': case '4': case '5':
1971 case '6': case '7': case '8': case '9': case '.':
1973 arg[1].arg_type = A_SINGLE;
1975 while (isdigit(*s) || *s == '_') {
1981 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1983 while (isdigit(*s) || *s == '_') {
1990 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1992 if (*s == '+' || *s == '-')
1998 str = Str_new(92,0);
1999 str_numset(str,atof(tokenbuf));
2001 Safefree(str->str_ptr);
2002 str->str_ptr = Nullch;
2003 str->str_len = str->str_cur = 0;
2005 arg[1].arg_ptr.arg_str = str;
2013 if (*++s && index("`'\"",*s)) {
2015 s = cpytill(d,s,bufend,term,&len);
2025 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
2027 } /* assuming tokenbuf won't clobber */
2032 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
2033 herewas = str_make(s,bufend-s);
2035 s--, herewas = str_make(s,d-s);
2036 s += herewas->str_cur;
2044 s = cpytill(d,s,bufend,'>',&len);
2049 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
2051 if (d - tokenbuf != len) {
2053 arg[1].arg_type = A_GLOB;
2054 d = nsavestr(d,len);
2055 arg[1].arg_ptr.arg_stab = stab = genstab();
2056 stab_io(stab) = stio_new();
2057 stab_val(stab) = str_make(d,len);
2064 (void)strcpy(d,"ARGV");
2066 arg[1].arg_type = A_INDREAD;
2067 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
2070 arg[1].arg_type = A_READ;
2071 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
2072 if (!stab_io(arg[1].arg_ptr.arg_stab))
2073 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
2074 if (strEQ(d,"ARGV")) {
2075 (void)aadd(arg[1].arg_ptr.arg_stab);
2076 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
2097 arg[1].arg_type = A_SINGLE;
2104 arg[1].arg_type = A_DOUBLE;
2105 makesingle = TRUE; /* maybe disable runtime scanning */
2106 alwaysdollar = TRUE; /* treat $) and $| as variables */
2111 arg[1].arg_type = A_BACKTICK;
2113 alwaysdollar = TRUE; /* treat $) and $| as variables */
2120 multi_start = curcmd->c_line;
2122 multi_open = multi_close = '<';
2125 if (term && (tmps = index("([{< )]}> )]}>",term)))
2129 tmpstr = Str_new(87,80);
2134 while (s < bufend &&
2135 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2140 curcmd->c_line = multi_start;
2141 fatal("EOF in string");
2143 str_nset(tmpstr,d+1,s-d);
2145 str_ncat(herewas,s,bufend-s);
2146 str_replace(linestr,herewas);
2147 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2148 bufend = linestr->str_ptr + linestr->str_cur;
2152 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2155 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2156 while (s >= bufend) { /* multiple line string? */
2158 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2159 curcmd->c_line = multi_start;
2160 fatal("EOF in string");
2164 STR *str = Str_new(88,0);
2166 str_sset(str,linestr);
2167 astore(stab_xarray(curcmd->c_filestab),
2168 (int)curcmd->c_line,str);
2170 bufend = linestr->str_ptr + linestr->str_cur;
2172 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2175 str_scat(linestr,herewas);
2176 bufend = linestr->str_ptr + linestr->str_cur;
2180 str_scat(tmpstr,linestr);
2184 s = str_append_till(tmpstr,s,bufend,term,leave);
2186 multi_end = curcmd->c_line;
2188 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2189 tmpstr->str_len = tmpstr->str_cur + 1;
2190 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2192 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2193 arg[1].arg_ptr.arg_str = tmpstr;
2197 s = tmpstr->str_ptr;
2198 send = s + tmpstr->str_cur;
2199 while (s < send) { /* see if we can make SINGLE */
2200 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
2201 !alwaysdollar && s[1] != '0')
2202 *s = '$'; /* grandfather \digit in subst */
2203 if ((*s == '$' || *s == '@') && s+1 < send &&
2204 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2205 makesingle = FALSE; /* force interpretation */
2207 else if (*s == '\\' && s+1 < send) {
2208 if (index("lLuUE",s[1]))
2214 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2216 if ((*s == '$' && s+1 < send &&
2217 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2218 (*s == '@' && s+1 < send) ) {
2219 len = scanident(s,send,tokenbuf) - s;
2220 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2221 || strEQ(tokenbuf,"ENV")
2222 || strEQ(tokenbuf,"SIG")
2223 || strEQ(tokenbuf,"INC") )
2224 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2229 else if (*s == '\\' && s+1 < send) {
2233 if (!makesingle && (!leave || (*s && index(leave,*s))))
2237 case '0': case '1': case '2': case '3':
2238 case '4': case '5': case '6': case '7':
2239 *d++ = scanoct(s, 3, &len);
2243 *d++ = scanhex(++s, 2, &len);
2282 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2283 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2285 tmpstr->str_cur = d - tmpstr->str_ptr;
2286 arg[1].arg_ptr.arg_str = tmpstr;
2302 register FCMD *fprev = &froot;
2303 register FCMD *fcmd;
2310 Zero(&froot, 1, FCMD);
2312 while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
2314 if (in_eval && !rsfp) {
2315 eol = index(s,'\n');
2320 eol = bufend = linestr->str_ptr + linestr->str_cur;
2322 STR *tmpstr = Str_new(89,0);
2324 str_nset(tmpstr, s, eol-s);
2325 astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
2328 for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
2331 return froot.f_next;
2338 flinebeg = Nullfcmd;
2342 Newz(804,fcmd,1,FCMD);
2343 fprev->f_next = fcmd;
2345 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2355 fcmd->f_pre = nsavestr(s, t-s);
2356 fcmd->f_presize = t-s;
2360 fcmd->f_flags |= FC_NOBLANK;
2362 fcmd->f_flags |= FC_REPEAT;
2366 flinebeg = fcmd; /* start values here */
2368 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2371 fcmd->f_type = F_LINES;
2375 fcmd->f_type = F_LEFT;
2380 fcmd->f_type = F_RIGHT;
2385 fcmd->f_type = F_CENTER;
2391 /* Catch the special case @... and handle it as a string
2393 if (*s == '.' && s[1] == '.') {
2394 goto default_format;
2396 fcmd->f_type = F_DECIMAL;
2400 /* Read a format in the form @####.####, where either group
2401 of ### may be empty, or the final .### may be missing. */
2409 fcmd->f_decimals = s-p;
2410 fcmd->f_flags |= FC_DP;
2412 fcmd->f_decimals = 0;
2418 fcmd->f_type = F_LEFT;
2421 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2422 fcmd->f_flags |= FC_MORE;
2431 (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
2434 if (in_eval && !rsfp) {
2435 eol = index(s,'\n');
2440 eol = bufend = linestr->str_ptr + linestr->str_cur;
2442 STR *tmpstr = Str_new(90,0);
2444 str_nset(tmpstr, s, eol-s);
2445 astore(stab_xarray(curcmd->c_filestab),
2446 (int)curcmd->c_line,tmpstr);
2448 if (strnEQ(s,".\n",2)) {
2450 yyerror("Missing values line");
2451 return froot.f_next;
2457 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2458 str->str_u.str_hash = curstash;
2459 str_nset(str,"(",1);
2460 flinebeg->f_line = curcmd->c_line;
2462 if (!flinebeg->f_next->f_type || index(s, ',')) {
2464 str_ncat(str, s, eol - s - 1);
2465 str_ncat(str,",$$);",5);
2470 while (s < eol && isspace(*s))
2475 case ' ': case '\t': case '\n': case ';':
2476 str_ncat(str, t, s - t);
2477 str_ncat(str, "," ,1);
2478 while (s < eol && (isspace(*s) || *s == ';'))
2483 str_ncat(str, t, s - t);
2485 s = scanident(s,eol,tokenbuf);
2486 str_ncat(str, t, s - t);
2488 if (s < eol && *s && index("$'\"",*s))
2489 str_ncat(str, ",", 1);
2491 case '"': case '\'':
2492 str_ncat(str, t, s - t);
2495 while (s < eol && (*s != *t || s[-1] == '\\'))
2499 str_ncat(str, t, s - t);
2501 if (s < eol && *s && index("$'\"",*s))
2502 str_ncat(str, ",", 1);
2505 yyerror("Please use commas to separate fields");
2508 str_ncat(str,"$$);",4);
2513 bufptr = str_get(linestr);
2514 yyerror("Format not terminated");
2515 return froot.f_next;
2522 cshlen = strlen(cshname);