1 /* $Header: toke.c,v 4.0 91/03/20 01:42:14 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 4.0 91/03/20 01:42:14 lwall
25 /* which backslash sequences to keep in m// or s// */
27 static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtf0123456789[{]}";
29 char *reparse; /* if non-null, scanident found ${foo[$bar]} */
36 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
38 #define META(c) ((c) | 128)
40 #define RETURN(retval) return (bufptr = s,(int)retval)
41 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
42 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
43 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
44 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
45 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
46 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
47 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
48 #define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
49 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
50 #define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
51 #define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
52 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
53 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
54 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
55 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
56 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
57 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
58 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
59 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
60 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
61 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
62 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
63 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
64 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
65 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
66 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
68 /* This bit of chicanery makes a unary function followed by
69 * a parenthesis into a function with one argument, highest precedence.
71 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
72 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
74 /* This does similarly for list operators, merely by pretending that the
75 * paren came before the listop rather than after.
77 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
78 (*s = META('('), bufptr = oldbufptr, '(') : \
79 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
80 /* grandfather return to old style */
81 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
87 while (s < bufend && isascii(*s) && isspace(*s))
96 #define UNI(f) return uni(f,s)
97 #define LOP(f) return lop(f,s)
136 #endif /* CRIPPLED_CC */
140 register char *s = bufptr;
143 static bool in_format = FALSE;
144 static bool firstline = TRUE;
145 extern int yychar; /* last token */
147 oldoldbufptr = oldbufptr;
154 fprintf(stderr,"Tokener at %s",s);
156 fprintf(stderr,"Tokener at %s\n",s);
160 if ((*s & 127) == '(')
163 warn("Unrecognized character \\%03o ignored", *s++ & 255);
169 if ((*s & 127) == '(')
172 warn("Unrecognized character \\%03o ignored", *s++ & 255);
176 goto fake_eof; /* emulate EOF on ^D or ^Z */
181 goto retry; /* ignore stray nulls */
184 if (minus_n || minus_p || perldb) {
188 char *pdb = getenv("PERLDB");
190 str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
191 str_cat(linestr, ";");
193 if (minus_n || minus_p) {
194 str_cat(linestr,"line: while (<>) {");
196 str_cat(linestr,"chop;");
198 str_cat(linestr,"@F=split(' ');");
200 oldoldbufptr = oldbufptr = s = str_get(linestr);
201 bufend = linestr->str_ptr + linestr->str_cur;
207 yylval.formval = load_format();
209 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
210 bufend = linestr->str_ptr + linestr->str_cur;
216 #endif /* CRYPTSCRIPT */
218 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
222 (void)mypclose(rsfp);
223 else if (rsfp == stdin)
229 if (minus_n || minus_p) {
230 str_set(linestr,minus_p ? ";}continue{print" : "");
231 str_cat(linestr,";}");
232 oldoldbufptr = oldbufptr = s = str_get(linestr);
233 bufend = linestr->str_ptr + linestr->str_cur;
234 minus_n = minus_p = 0;
237 oldoldbufptr = oldbufptr = s = str_get(linestr);
239 RETURN(';'); /* not infinite loop because rsfp is NULL now */
241 if (doextract && *linestr->str_ptr == '#')
244 oldoldbufptr = oldbufptr = bufptr = s;
246 STR *str = Str_new(85,0);
248 str_sset(str,linestr);
249 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
257 bufend = linestr->str_ptr + linestr->str_cur;
258 if (curcmd->c_line == 1) {
259 if (*s == '#' && s[1] == '!') {
260 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
268 while (s < bufend && !isspace(*s))
271 while (s < bufend && isspace(*s))
274 Newz(899,newargv,origargc+3,char*);
276 while (s < bufend && !isspace(*s))
279 Copy(origargv+1, newargv+2, origargc+1, char*);
285 fatal("Can't exec %s", cmd);
289 while (s < bufend && isspace(*s))
291 if (*s == ':') /* for csh's that have to exec sh scripts */
296 case ' ': case '\t': case '\f': case '\r': case 013:
300 if (preprocess && s == str_get(linestr) &&
301 s[1] == ' ' && isdigit(s[2])) {
302 curcmd->c_line = atoi(s+2)-1;
303 for (s += 2; isdigit(*s); s++) ;
305 while (s < d && isspace(*s)) s++;
306 s[strlen(s)-1] = '\0'; /* wipe out newline */
309 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
312 curcmd->c_filestab = fstab(s);
314 curcmd->c_filestab = fstab(origfilename);
315 oldoldbufptr = oldbufptr = s = str_get(linestr);
319 if (in_eval && !rsfp) {
321 while (s < d && *s != '\n')
326 STR *str = Str_new(85,0);
328 str_nset(str,linestr->str_ptr, s - linestr->str_ptr);
329 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
330 str_chop(linestr, s);
334 yylval.formval = load_format();
336 oldoldbufptr = oldbufptr = s = bufptr + 1;
347 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
350 case 'r': FTST(O_FTEREAD);
351 case 'w': FTST(O_FTEWRITE);
352 case 'x': FTST(O_FTEEXEC);
353 case 'o': FTST(O_FTEOWNED);
354 case 'R': FTST(O_FTRREAD);
355 case 'W': FTST(O_FTRWRITE);
356 case 'X': FTST(O_FTREXEC);
357 case 'O': FTST(O_FTROWNED);
358 case 'e': FTST(O_FTIS);
359 case 'z': FTST(O_FTZERO);
360 case 's': FTST(O_FTSIZE);
361 case 'f': FTST(O_FTFILE);
362 case 'd': FTST(O_FTDIR);
363 case 'l': FTST(O_FTLINK);
364 case 'p': FTST(O_FTPIPE);
365 case 'S': FTST(O_FTSOCK);
366 case 'u': FTST(O_FTSUID);
367 case 'g': FTST(O_FTSGID);
368 case 'k': FTST(O_FTSVTX);
369 case 'b': FTST(O_FTBLK);
370 case 'c': FTST(O_FTCHR);
371 case 't': FTST(O_FTTTY);
372 case 'T': FTST(O_FTTEXT);
373 case 'B': FTST(O_FTBINARY);
374 case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
375 case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
376 case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
404 s = scanident(s,bufend,tokenbuf);
405 yylval.stabval = stabent(tokenbuf,TRUE);
416 s = scanident(s,bufend,tokenbuf);
417 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
433 if (isspace(*s) || *s == '#')
434 cmdline = NOLINE; /* invalidate current command line number */
437 if (curcmd->c_line < cmdline)
438 cmdline = curcmd->c_line;
456 while (s < d && isspace(*s))
458 if (isalpha(*s) || *s == '_' || *s == '\'')
459 *(--s) = '\\'; /* force next ident to WORD */
518 while (isascii(*s) && \
519 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
521 while (d[-1] == '\'') \
527 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
529 s = scanident(s,bufend,tokenbuf);
530 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
534 s = scanident(s,bufend,tokenbuf);
535 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
543 yylval.stabval = stabent(tokenbuf,TRUE);
548 s = scanident(s,bufend,tokenbuf);
551 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
554 case '/': /* may either be division or pattern */
555 case '?': /* may either be conditional or pattern */
566 if (!expectterm || !isdigit(s[1])) {
575 case '0': case '1': case '2': case '3': case '4':
576 case '5': case '6': case '7': case '8': case '9':
577 case '\'': case '"': case '`':
581 case '\\': /* some magic to force next word to be a WORD */
582 s++; /* used by do and sub to force a separate namespace */
587 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
588 ARG *arg = op_new(1);
591 arg->arg_type = O_ITEM;
593 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
595 strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
596 arg[1].arg_type = A_SINGLE;
597 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
600 else if (strEQ(d,"__END__")) {
605 if (stab = stabent("DATA",FALSE)) {
606 stab->str_pok |= SP_MULTI;
607 stab_io(stab) = stio_new();
608 stab_io(stab)->ifp = rsfp;
609 #if defined(HAS_FCNTL) && defined(F_SETFD)
611 fcntl(fd,F_SETFD,fd >= 3);
614 stab_io(stab)->type = '|';
615 else if (rsfp == stdin)
616 stab_io(stab)->type = '-';
618 stab_io(stab)->type = '<';
628 if (strEQ(d,"alarm"))
630 if (strEQ(d,"accept"))
632 if (strEQ(d,"atan2"))
639 if (strEQ(d,"binmode"))
646 if (strEQ(d,"continue"))
648 if (strEQ(d,"chdir")) {
649 (void)stabent("ENV",TRUE); /* may use HOME */
652 if (strEQ(d,"close"))
654 if (strEQ(d,"closedir"))
658 if (strEQ(d,"caller"))
660 if (strEQ(d,"crypt")) {
666 if (strEQ(d,"chmod"))
668 if (strEQ(d,"chown"))
670 if (strEQ(d,"connect"))
674 if (strEQ(d,"chroot"))
681 while (s < d && isspace(*s))
683 if (isalpha(*s) || *s == '_')
684 *(--s) = '\\'; /* force next ident to WORD */
689 if (strEQ(d,"defined"))
691 if (strEQ(d,"delete"))
693 if (strEQ(d,"dbmopen"))
695 if (strEQ(d,"dbmclose"))
704 if (strEQ(d,"elsif")) {
705 yylval.ival = curcmd->c_line;
708 if (strEQ(d,"eq") || strEQ(d,"EQ"))
712 if (strEQ(d,"eval")) {
713 allstabs = TRUE; /* must initialize everything since */
714 UNI(O_EVAL); /* we don't know what will be used */
722 if (strEQ(d,"exec")) {
726 if (strEQ(d,"endhostent"))
728 if (strEQ(d,"endnetent"))
730 if (strEQ(d,"endservent"))
732 if (strEQ(d,"endprotoent"))
734 if (strEQ(d,"endpwent"))
736 if (strEQ(d,"endgrent"))
741 if (strEQ(d,"for") || strEQ(d,"foreach")) {
742 yylval.ival = curcmd->c_line;
745 if (strEQ(d,"format")) {
747 while (s < d && isspace(*s))
749 if (isalpha(*s) || *s == '_')
750 *(--s) = '\\'; /* force next ident to WORD */
752 allstabs = TRUE; /* must initialize everything since */
753 OPERATOR(FORMAT); /* we don't know what will be used */
757 if (strEQ(d,"fcntl"))
759 if (strEQ(d,"fileno"))
761 if (strEQ(d,"flock"))
766 if (strEQ(d,"gt") || strEQ(d,"GT"))
768 if (strEQ(d,"ge") || strEQ(d,"GE"))
774 if (strEQ(d,"gmtime"))
778 if (strnEQ(d,"get",3)) {
785 if (strEQ(d,"priority"))
787 if (strEQ(d,"protobyname"))
789 if (strEQ(d,"protobynumber"))
791 if (strEQ(d,"protoent"))
793 if (strEQ(d,"pwent"))
795 if (strEQ(d,"pwnam"))
797 if (strEQ(d,"pwuid"))
799 if (strEQ(d,"peername"))
802 else if (*d == 'h') {
803 if (strEQ(d,"hostbyname"))
805 if (strEQ(d,"hostbyaddr"))
807 if (strEQ(d,"hostent"))
810 else if (*d == 'n') {
811 if (strEQ(d,"netbyname"))
813 if (strEQ(d,"netbyaddr"))
815 if (strEQ(d,"netent"))
818 else if (*d == 's') {
819 if (strEQ(d,"servbyname"))
821 if (strEQ(d,"servbyport"))
823 if (strEQ(d,"servent"))
825 if (strEQ(d,"sockname"))
827 if (strEQ(d,"sockopt"))
830 else if (*d == 'g') {
831 if (strEQ(d,"grent"))
833 if (strEQ(d,"grnam"))
835 if (strEQ(d,"grgid"))
838 else if (*d == 'l') {
839 if (strEQ(d,"login"))
853 yylval.ival = curcmd->c_line;
856 if (strEQ(d,"index"))
860 if (strEQ(d,"ioctl"))
879 if (strEQ(d,"local"))
881 if (strEQ(d,"length"))
883 if (strEQ(d,"lt") || strEQ(d,"LT"))
885 if (strEQ(d,"le") || strEQ(d,"LE"))
887 if (strEQ(d,"localtime"))
893 if (strEQ(d,"listen"))
895 if (strEQ(d,"lstat"))
911 RETURN(1); /* force error */
915 if (strEQ(d,"mkdir"))
919 if (strEQ(d,"msgctl"))
921 if (strEQ(d,"msgget"))
923 if (strEQ(d,"msgrcv"))
925 if (strEQ(d,"msgsnd"))
934 if (strEQ(d,"ne") || strEQ(d,"NE"))
945 if (strEQ(d,"opendir"))
950 if (strEQ(d,"print")) {
951 checkcomma(s,"filehandle");
954 if (strEQ(d,"printf")) {
955 checkcomma(s,"filehandle");
958 if (strEQ(d,"push")) {
959 yylval.ival = O_PUSH;
966 if (strEQ(d,"package"))
988 if (strEQ(d,"return"))
990 if (strEQ(d,"require")) {
991 allstabs = TRUE; /* must initialize everything since */
992 UNI(O_REQUIRE); /* we don't know what will be used */
994 if (strEQ(d,"reset"))
998 if (strEQ(d,"rename"))
1000 if (strEQ(d,"rand"))
1002 if (strEQ(d,"rmdir"))
1004 if (strEQ(d,"rindex"))
1006 if (strEQ(d,"read"))
1008 if (strEQ(d,"readdir"))
1010 if (strEQ(d,"rewinddir"))
1012 if (strEQ(d,"recv"))
1014 if (strEQ(d,"reverse"))
1016 if (strEQ(d,"readlink"))
1032 RETURN(1); /* force error */
1039 if (strEQ(d,"scalar"))
1045 if (strEQ(d,"select"))
1047 if (strEQ(d,"seek"))
1049 if (strEQ(d,"semctl"))
1051 if (strEQ(d,"semget"))
1053 if (strEQ(d,"semop"))
1055 if (strEQ(d,"send"))
1057 if (strEQ(d,"setpgrp"))
1059 if (strEQ(d,"setpriority"))
1060 FUN3(O_SETPRIORITY);
1061 if (strEQ(d,"sethostent"))
1063 if (strEQ(d,"setnetent"))
1065 if (strEQ(d,"setservent"))
1067 if (strEQ(d,"setprotoent"))
1069 if (strEQ(d,"setpwent"))
1071 if (strEQ(d,"setgrent"))
1073 if (strEQ(d,"seekdir"))
1075 if (strEQ(d,"setsockopt"))
1082 if (strEQ(d,"shift"))
1084 if (strEQ(d,"shmctl"))
1086 if (strEQ(d,"shmget"))
1088 if (strEQ(d,"shmread"))
1090 if (strEQ(d,"shmwrite"))
1092 if (strEQ(d,"shutdown"))
1103 if (strEQ(d,"sleep"))
1110 if (strEQ(d,"socket"))
1112 if (strEQ(d,"socketpair"))
1114 if (strEQ(d,"sort")) {
1115 checkcomma(s,"subroutine name");
1117 while (s < d && isascii(*s) && isspace(*s)) s++;
1118 if (*s == ';' || *s == ')') /* probably a close */
1119 fatal("sort is now a reserved word");
1120 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1121 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
1122 strncpy(tokenbuf,s,d-s);
1123 if (strNE(tokenbuf,"keys") &&
1124 strNE(tokenbuf,"values") &&
1125 strNE(tokenbuf,"split") &&
1126 strNE(tokenbuf,"grep") &&
1127 strNE(tokenbuf,"readdir") &&
1128 strNE(tokenbuf,"unpack") &&
1129 strNE(tokenbuf,"do") &&
1130 (d >= bufend || isspace(*d)) )
1131 *(--s) = '\\'; /* force next ident to WORD */
1137 if (strEQ(d,"split"))
1139 if (strEQ(d,"sprintf"))
1141 if (strEQ(d,"splice")) {
1142 yylval.ival = O_SPLICE;
1147 if (strEQ(d,"sqrt"))
1151 if (strEQ(d,"srand"))
1157 if (strEQ(d,"stat"))
1159 if (strEQ(d,"study")) {
1165 if (strEQ(d,"substr"))
1167 if (strEQ(d,"sub")) {
1168 subline = curcmd->c_line;
1170 while (s < d && isspace(*s))
1172 if (isalpha(*s) || *s == '_' || *s == '\'') {
1174 str_sset(subname,curstname);
1175 str_ncat(subname,"'",1);
1177 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1181 str_ncat(subname,s,d-s);
1183 *(--s) = '\\'; /* force next ident to WORD */
1186 str_set(subname,"?");
1195 if (strEQ(d,"system")) {
1199 if (strEQ(d,"symlink"))
1201 if (strEQ(d,"syscall"))
1203 if (strEQ(d,"sysread"))
1205 if (strEQ(d,"syswrite"))
1214 if (strEQ(d,"tr")) {
1219 RETURN(1); /* force error */
1221 if (strEQ(d,"tell"))
1223 if (strEQ(d,"telldir"))
1225 if (strEQ(d,"time"))
1227 if (strEQ(d,"times"))
1229 if (strEQ(d,"truncate"))
1234 if (strEQ(d,"using"))
1236 if (strEQ(d,"until")) {
1237 yylval.ival = curcmd->c_line;
1240 if (strEQ(d,"unless")) {
1241 yylval.ival = curcmd->c_line;
1244 if (strEQ(d,"unlink"))
1246 if (strEQ(d,"undef"))
1248 if (strEQ(d,"unpack"))
1250 if (strEQ(d,"utime"))
1252 if (strEQ(d,"umask"))
1254 if (strEQ(d,"unshift")) {
1255 yylval.ival = O_UNSHIFT;
1261 if (strEQ(d,"values"))
1263 if (strEQ(d,"vec")) {
1270 if (strEQ(d,"while")) {
1271 yylval.ival = curcmd->c_line;
1274 if (strEQ(d,"warn"))
1276 if (strEQ(d,"wait"))
1278 if (strEQ(d,"waitpid"))
1280 if (strEQ(d,"wantarray")) {
1281 yylval.arg = op_new(1);
1282 yylval.arg->arg_type = O_ITEM;
1283 yylval.arg[1].arg_type = A_WANTARRAY;
1286 if (strEQ(d,"write"))
1291 if (!expectterm && strEQ(d,"x"))
1311 yylval.cval = savestr(d);
1313 if (oldoldbufptr && oldoldbufptr < bufptr) {
1314 while (isspace(*oldoldbufptr))
1316 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1318 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1321 return (CLINE, bufptr = s, (int)WORD);
1333 while (s < bufend && isascii(*s) && isspace(*s))
1335 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1337 while (isalpha(*s) || isdigit(*s) || *s == '_')
1339 while (s < bufend && isspace(*s))
1344 "tell eof times getlogin wait length shift umask getppid \
1345 cos exp int log rand sin sqrt ord wantarray",
1350 fatal("No comma allowed after %s", what);
1356 scanident(s,send,dest)
1358 register char *send;
1372 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1375 while (d > dest+1 && d[-1] == '\'')
1381 if (*d == '{' /* } */ ) {
1384 while (s < send && brackets) {
1385 if (!reparse && (d == dest || (*s && isascii(*s) &&
1386 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1396 if (reparse && reparse == s - 1)
1410 if (*d == '^' && (isupper(*s) || index("[\\]^_?",*s)))
1416 scanconst(string,len)
1420 register STR *retstr;
1425 if (index(string,'|')) {
1428 retstr = Str_new(86,len);
1429 str_nset(retstr,string,len);
1430 t = str_get(retstr);
1432 retstr->str_u.str_useful = 100;
1433 for (d=t; d < e; ) {
1441 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1445 if (d[1] && index("wWbB0123456789sSdDlLuUE",d[1])) {
1449 (void)bcopy(d+1,d,e-d);
1474 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1486 retstr->str_cur = d - t;
1494 register SPAT *spat;
1499 STR *str = Str_new(93,0);
1501 Newz(801,spat,1,SPAT);
1502 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1503 curstash->tbl_spatroot = spat;
1512 spat->spat_flags |= SPAT_ONCE;
1515 fatal("panic: scanpat");
1517 s = str_append_till(str,s,bufend,s[-1],patleave);
1520 yyerror("Search pattern not terminated");
1521 yylval.arg = Nullarg;
1525 while (*s == 'i' || *s == 'o') {
1529 spat->spat_flags |= SPAT_FOLD;
1533 spat->spat_flags |= SPAT_KEEP;
1537 e = str->str_ptr + len;
1538 for (d = str->str_ptr; d < e; d++) {
1541 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1545 spat->spat_runtime = arg = op_new(1);
1546 arg->arg_type = O_ITEM;
1547 arg[1].arg_type = A_DOUBLE;
1548 arg[1].arg_ptr.arg_str = str_smake(str);
1549 d = scanident(d,bufend,buf);
1550 (void)stabent(buf,TRUE); /* make sure it's created */
1551 for (; d < e; d++) {
1554 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1555 d = scanident(d,bufend,buf);
1556 (void)stabent(buf,TRUE);
1558 else if (*d == '@') {
1559 d = scanident(d,bufend,buf);
1560 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1561 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1562 (void)stabent(buf,TRUE);
1565 goto got_pat; /* skip compiling for now */
1568 if (spat->spat_flags & SPAT_FOLD)
1572 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1574 if (*str->str_ptr == '^') {
1575 spat->spat_short = scanconst(str->str_ptr+1,len-1);
1576 if (spat->spat_short) {
1577 spat->spat_slen = spat->spat_short->str_cur;
1578 if (spat->spat_slen == len - 1)
1579 spat->spat_flags |= SPAT_ALL;
1583 spat->spat_flags |= SPAT_SCANFIRST;
1584 spat->spat_short = scanconst(str->str_ptr,len);
1585 if (spat->spat_short) {
1586 spat->spat_slen = spat->spat_short->str_cur;
1587 if (spat->spat_slen == len)
1588 spat->spat_flags |= SPAT_ALL;
1591 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1592 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1593 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1594 spat->spat_flags & SPAT_FOLD);
1595 /* Note that this regexp can still be used if someone says
1596 * something like /a/ && s//b/; so we can't delete it.
1600 if (spat->spat_flags & SPAT_FOLD)
1604 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1606 if (spat->spat_short)
1607 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1608 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1609 spat->spat_flags & SPAT_FOLD);
1614 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1622 register SPAT *spat;
1626 STR *str = Str_new(93,0);
1628 Newz(802,spat,1,SPAT);
1629 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1630 curstash->tbl_spatroot = spat;
1632 s = str_append_till(str,s+1,bufend,*s,patleave);
1635 yyerror("Substitution pattern not terminated");
1636 yylval.arg = Nullarg;
1640 e = str->str_ptr + len;
1641 for (d = str->str_ptr; d < e; d++) {
1644 else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
1648 spat->spat_runtime = arg = op_new(1);
1649 arg->arg_type = O_ITEM;
1650 arg[1].arg_type = A_DOUBLE;
1651 arg[1].arg_ptr.arg_str = str_smake(str);
1652 d = scanident(d,bufend,buf);
1653 (void)stabent(buf,TRUE); /* make sure it's created */
1655 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1656 d = scanident(d,bufend,buf);
1657 (void)stabent(buf,TRUE);
1659 else if (*d == '@' && d[-1] != '\\') {
1660 d = scanident(d,bufend,buf);
1661 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1662 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1663 (void)stabent(buf,TRUE);
1666 goto get_repl; /* skip compiling for now */
1669 if (*str->str_ptr == '^') {
1670 spat->spat_short = scanconst(str->str_ptr+1,len-1);
1671 if (spat->spat_short)
1672 spat->spat_slen = spat->spat_short->str_cur;
1675 spat->spat_flags |= SPAT_SCANFIRST;
1676 spat->spat_short = scanconst(str->str_ptr,len);
1677 if (spat->spat_short)
1678 spat->spat_slen = spat->spat_short->str_cur;
1684 yyerror("Substitution replacement not terminated");
1685 yylval.arg = Nullarg;
1688 spat->spat_repl = yylval.arg;
1689 spat->spat_flags |= SPAT_ONCE;
1690 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1691 spat->spat_flags |= SPAT_CONST;
1692 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1696 spat->spat_flags |= SPAT_CONST;
1697 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1698 e = tmpstr->str_ptr + tmpstr->str_cur;
1699 for (t = tmpstr->str_ptr; t < e; t++) {
1700 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1701 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1702 spat->spat_flags &= ~SPAT_CONST;
1705 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1708 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1709 spat->spat_repl[1].arg_type = A_SINGLE;
1710 spat->spat_repl = make_op(O_EVAL,2,
1714 spat->spat_flags &= ~SPAT_CONST;
1718 spat->spat_flags &= ~SPAT_ONCE;
1723 spat->spat_flags |= SPAT_FOLD;
1724 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1725 str_free(spat->spat_short); /* anchored opt doesn't do */
1726 spat->spat_short = Nullstr; /* case insensitive match */
1727 spat->spat_slen = 0;
1732 spat->spat_flags |= SPAT_KEEP;
1735 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1736 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1737 if (!spat->spat_runtime) {
1738 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1739 spat->spat_flags & SPAT_FOLD);
1742 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1748 register SPAT *spat;
1750 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1751 if (spat->spat_short &&
1752 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1754 if (spat->spat_flags & SPAT_SCANFIRST) {
1755 str_free(spat->spat_short);
1756 spat->spat_short = Nullstr;
1759 str_free(spat->spat_regexp->regmust);
1760 spat->spat_regexp->regmust = Nullstr;
1764 if (!spat->spat_short || /* promote the better string */
1765 ((spat->spat_flags & SPAT_SCANFIRST) &&
1766 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1767 str_free(spat->spat_short); /* ok if null */
1768 spat->spat_short = spat->spat_regexp->regmust;
1769 spat->spat_regexp->regmust = Nullstr;
1770 spat->spat_flags |= SPAT_SCANFIRST;
1776 expand_charset(s,len,retlen)
1782 register char *d = t;
1784 register char *send = s + len;
1786 while (s < send && d - t <= 256) {
1787 if (s[1] == '-' && s+2 < send) {
1788 for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
1797 return nsavestr(t,d-t);
1805 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1808 register short *tbl;
1816 New(803,tbl,256,short);
1817 arg[2].arg_type = A_NULL;
1818 arg[2].arg_ptr.arg_cval = (char*) tbl;
1821 yyerror("Translation pattern not terminated");
1822 yylval.arg = Nullarg;
1825 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1826 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1827 arg_free(yylval.arg);
1830 yyerror("Translation replacement not terminated");
1831 yylval.arg = Nullarg;
1834 complement = delete = squash = 0;
1835 while (*s == 'c' || *s == 'd' || *s == 's') {
1844 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1845 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1846 arg_free(yylval.arg);
1847 arg[2].arg_len = delete|squash;
1849 if (!rlen && !delete) {
1854 Zero(tbl, 256, short);
1855 for (i = 0; i < tlen; i++)
1856 tbl[t[i] & 0377] = -1;
1857 for (i = 0, j = 0; i < 256; i++) {
1871 for (i = 0; i < 256; i++)
1873 for (i = 0, j = 0; i < tlen; i++,j++) {
1876 if (tbl[t[i] & 0377] == -1)
1877 tbl[t[i] & 0377] = -2;
1882 if (tbl[t[i] & 0377] == -1)
1883 tbl[t[i] & 0377] = r[j] & 0377;
1899 register char *send;
1900 register bool makesingle = FALSE;
1901 register STAB *stab;
1902 bool alwaysdollar = FALSE;
1903 bool hereis = FALSE;
1906 char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
1911 arg->arg_type = O_ITEM;
1914 default: /* a substitution replacement */
1915 arg[1].arg_type = A_DOUBLE;
1916 makesingle = TRUE; /* maybe disable runtime scanning */
1926 arg[1].arg_type = A_SINGLE;
1931 else if (s[1] == '.')
1942 yyerror("Illegal octal digit");
1944 case '0': case '1': case '2': case '3': case '4':
1945 case '5': case '6': case '7':
1949 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1950 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1954 i += (*s++ & 7) + 9;
1959 str = Str_new(92,0);
1960 str_numset(str,(double)i);
1962 Safefree(str->str_ptr);
1963 str->str_ptr = Nullch;
1964 str->str_len = str->str_cur = 0;
1966 arg[1].arg_ptr.arg_str = str;
1969 case '1': case '2': case '3': case '4': case '5':
1970 case '6': case '7': case '8': case '9': case '.':
1972 arg[1].arg_type = A_SINGLE;
1974 while (isdigit(*s) || *s == '_') {
1980 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1982 while (isdigit(*s) || *s == '_') {
1989 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1991 if (*s == '+' || *s == '-')
1997 str = Str_new(92,0);
1998 str_numset(str,atof(tokenbuf));
2000 Safefree(str->str_ptr);
2001 str->str_ptr = Nullch;
2002 str->str_len = str->str_cur = 0;
2004 arg[1].arg_ptr.arg_str = str;
2012 if (*++s && index("`'\"",*s)) {
2014 s = cpytill(d,s,bufend,term,&len);
2024 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
2026 } /* assuming tokenbuf won't clobber */
2031 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
2032 herewas = str_make(s,bufend-s);
2034 s--, herewas = str_make(s,d-s);
2035 s += herewas->str_cur;
2043 s = cpytill(d,s,bufend,'>',&len);
2048 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
2050 if (d - tokenbuf != len) {
2052 arg[1].arg_type = A_GLOB;
2053 d = nsavestr(d,len);
2054 arg[1].arg_ptr.arg_stab = stab = genstab();
2055 stab_io(stab) = stio_new();
2056 stab_val(stab) = str_make(d,len);
2063 (void)strcpy(d,"ARGV");
2065 arg[1].arg_type = A_INDREAD;
2066 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
2069 arg[1].arg_type = A_READ;
2070 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
2071 if (!stab_io(arg[1].arg_ptr.arg_stab))
2072 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
2073 if (strEQ(d,"ARGV")) {
2074 (void)aadd(arg[1].arg_ptr.arg_stab);
2075 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
2096 arg[1].arg_type = A_SINGLE;
2103 arg[1].arg_type = A_DOUBLE;
2104 makesingle = TRUE; /* maybe disable runtime scanning */
2105 alwaysdollar = TRUE; /* treat $) and $| as variables */
2110 arg[1].arg_type = A_BACKTICK;
2112 alwaysdollar = TRUE; /* treat $) and $| as variables */
2118 multi_start = curcmd->c_line;
2120 multi_open = multi_close = '<';
2123 if (term && (tmps = index("([{< )]}> )]}>",term)))
2127 tmpstr = Str_new(87,80);
2132 while (s < bufend &&
2133 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2138 curcmd->c_line = multi_start;
2139 fatal("EOF in string");
2141 str_nset(tmpstr,d+1,s-d);
2143 str_ncat(herewas,s,bufend-s);
2144 str_replace(linestr,herewas);
2145 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2146 bufend = linestr->str_ptr + linestr->str_cur;
2150 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2153 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2154 while (s >= bufend) { /* multiple line string? */
2156 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2157 curcmd->c_line = multi_start;
2158 fatal("EOF in string");
2162 STR *str = Str_new(88,0);
2164 str_sset(str,linestr);
2165 astore(stab_xarray(curcmd->c_filestab),
2166 (int)curcmd->c_line,str);
2168 bufend = linestr->str_ptr + linestr->str_cur;
2170 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2173 str_scat(linestr,herewas);
2174 bufend = linestr->str_ptr + linestr->str_cur;
2178 str_scat(tmpstr,linestr);
2182 s = str_append_till(tmpstr,s,bufend,term,leave);
2184 multi_end = curcmd->c_line;
2186 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2187 tmpstr->str_len = tmpstr->str_cur + 1;
2188 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2190 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2191 arg[1].arg_ptr.arg_str = tmpstr;
2195 s = tmpstr->str_ptr;
2196 send = s + tmpstr->str_cur;
2197 while (s < send) { /* see if we can make SINGLE */
2198 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
2199 !alwaysdollar && s[1] != '0')
2200 *s = '$'; /* grandfather \digit in subst */
2201 if ((*s == '$' || *s == '@') && s+1 < send &&
2202 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2203 makesingle = FALSE; /* force interpretation */
2205 else if (*s == '\\' && s+1 < send) {
2206 if (index("lLuUE",s[1]))
2212 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2214 if ((*s == '$' && s+1 < send &&
2215 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2216 (*s == '@' && s+1 < send) ) {
2217 len = scanident(s,send,tokenbuf) - s;
2218 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2219 || strEQ(tokenbuf,"ENV")
2220 || strEQ(tokenbuf,"SIG")
2221 || strEQ(tokenbuf,"INC") )
2222 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2227 else if (*s == '\\' && s+1 < send) {
2231 if (!makesingle && (!leave || (*s && index(leave,*s))))
2235 case '0': case '1': case '2': case '3':
2236 case '4': case '5': case '6': case '7':
2237 *d++ = scanoct(s, 3, &len);
2241 *d++ = scanhex(++s, 2, &len);
2280 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2281 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2283 tmpstr->str_cur = d - tmpstr->str_ptr;
2284 arg[1].arg_ptr.arg_str = tmpstr;
2300 register FCMD *fprev = &froot;
2301 register FCMD *fcmd;
2308 Zero(&froot, 1, FCMD);
2310 while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
2312 if (in_eval && !rsfp) {
2313 eol = index(s,'\n');
2318 eol = bufend = linestr->str_ptr + linestr->str_cur;
2320 STR *tmpstr = Str_new(89,0);
2322 str_nset(tmpstr, s, eol-s);
2323 astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
2326 for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
2329 return froot.f_next;
2336 flinebeg = Nullfcmd;
2340 Newz(804,fcmd,1,FCMD);
2341 fprev->f_next = fcmd;
2343 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2353 fcmd->f_pre = nsavestr(s, t-s);
2354 fcmd->f_presize = t-s;
2358 fcmd->f_flags |= FC_NOBLANK;
2360 fcmd->f_flags |= FC_REPEAT;
2364 flinebeg = fcmd; /* start values here */
2366 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2369 fcmd->f_type = F_LINES;
2373 fcmd->f_type = F_LEFT;
2378 fcmd->f_type = F_RIGHT;
2383 fcmd->f_type = F_CENTER;
2389 /* Catch the special case @... and handle it as a string
2391 if (*s == '.' && s[1] == '.') {
2392 goto default_format;
2394 fcmd->f_type = F_DECIMAL;
2398 /* Read a format in the form @####.####, where either group
2399 of ### may be empty, or the final .### may be missing. */
2407 fcmd->f_decimals = s-p;
2408 fcmd->f_flags |= FC_DP;
2410 fcmd->f_decimals = 0;
2416 fcmd->f_type = F_LEFT;
2419 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2420 fcmd->f_flags |= FC_MORE;
2429 (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
2432 if (in_eval && !rsfp) {
2433 eol = index(s,'\n');
2438 eol = bufend = linestr->str_ptr + linestr->str_cur;
2440 STR *tmpstr = Str_new(90,0);
2442 str_nset(tmpstr, s, eol-s);
2443 astore(stab_xarray(curcmd->c_filestab),
2444 (int)curcmd->c_line,tmpstr);
2446 if (strnEQ(s,".\n",2)) {
2448 yyerror("Missing values line");
2449 return froot.f_next;
2455 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2456 str->str_u.str_hash = curstash;
2457 str_nset(str,"(",1);
2458 flinebeg->f_line = curcmd->c_line;
2460 if (!flinebeg->f_next->f_type || index(s, ',')) {
2462 str_ncat(str, s, eol - s - 1);
2463 str_ncat(str,",$$);",5);
2468 while (s < eol && isspace(*s))
2473 case ' ': case '\t': case '\n': case ';':
2474 str_ncat(str, t, s - t);
2475 str_ncat(str, "," ,1);
2476 while (s < eol && (isspace(*s) || *s == ';'))
2481 str_ncat(str, t, s - t);
2483 s = scanident(s,eol,tokenbuf);
2484 str_ncat(str, t, s - t);
2486 if (s < eol && *s && index("$'\"",*s))
2487 str_ncat(str, ",", 1);
2489 case '"': case '\'':
2490 str_ncat(str, t, s - t);
2493 while (s < eol && (*s != *t || s[-1] == '\\'))
2497 str_ncat(str, t, s - t);
2499 if (s < eol && *s && index("$'\"",*s))
2500 str_ncat(str, ",", 1);
2503 yyerror("Please use commas to separate fields");
2506 str_ncat(str,"$$);",4);
2511 bufptr = str_get(linestr);
2512 yyerror("Format not terminated");
2513 return froot.f_next;
2520 cshlen = strlen(cshname);