1 /* $Header: toke.c,v 3.0.1.4 89/12/21 20:26:56 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.4 89/12/21 20:26:56 lwall
10 * patch7: -d switch incompatible with -p or -n
11 * patch7: " ''$foo'' " didn't parse right
12 * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
14 * Revision 3.0.1.3 89/11/17 15:43:15 lwall
15 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
16 * patch5: } misadjusted expection of subsequent term or operator
17 * patch5: y/abcde// didn't work
19 * Revision 3.0.1.2 89/11/11 05:04:42 lwall
20 * patch2: fixed a CLINE macro conflict
22 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
23 * patch1: disambiguated word after "sort" better
25 * Revision 3.0 89/10/18 15:32:33 lwall
34 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
39 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
41 #define META(c) ((c) | 128)
43 #define RETURN(retval) return (bufptr = s,(int)retval)
44 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
45 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
46 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
47 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
48 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
49 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
50 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
51 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
52 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
53 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
54 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
55 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
56 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
57 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
58 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
59 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
60 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
61 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
62 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
63 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
64 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
65 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
66 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
67 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
69 /* This bit of chicanery makes a unary function followed by
70 * a parenthesis into a function with one argument, highest precedence.
72 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
73 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
75 /* This does similarly for list operators, merely by pretending that the
76 * paren came before the listop rather than after.
78 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
79 (*s = META('('), bufptr = oldbufptr, '(') : \
80 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
86 while (s < bufend && isascii(*s) && isspace(*s))
95 #define UNI(f) return uni(f,s)
96 #define LOP(f) return lop(f,s)
135 #endif /* CRIPPLED_CC */
139 register char *s = bufptr;
142 static bool in_format = FALSE;
143 static bool firstline = TRUE;
144 extern int yychar; /* last token */
146 oldoldbufptr = oldbufptr;
153 fprintf(stderr,"Tokener at %s",s);
155 fprintf(stderr,"Tokener at %s\n",s);
159 if ((*s & 127) == '(')
162 warn("Unrecognized character \\%03o ignored", *s++);
168 goto retry; /* ignore stray nulls */
171 if (minus_n || minus_p || perldb) {
174 str_cat(linestr,"do 'perldb.pl'; print $@;");
175 if (minus_n || minus_p) {
176 str_cat(linestr,"line: while (<>) {");
178 str_cat(linestr,"@F=split(' ');");
180 oldoldbufptr = oldbufptr = s = str_get(linestr);
181 bufend = linestr->str_ptr + linestr->str_cur;
186 yylval.formval = load_format();
188 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
189 bufend = linestr->str_ptr + linestr->str_cur;
193 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
195 (void)mypclose(rsfp);
196 else if (rsfp != stdin)
199 if (minus_n || minus_p) {
200 str_set(linestr,minus_p ? "}continue{print;" : "");
201 str_cat(linestr,"}");
202 oldoldbufptr = oldbufptr = s = str_get(linestr);
203 bufend = linestr->str_ptr + linestr->str_cur;
204 minus_n = minus_p = 0;
207 oldoldbufptr = oldbufptr = s = str_get(linestr);
211 oldoldbufptr = oldbufptr = bufptr = s;
213 STR *str = Str_new(85,0);
215 str_sset(str,linestr);
216 astore(lineary,(int)line,str);
224 bufend = linestr->str_ptr + linestr->str_cur;
226 while (s < bufend && isspace(*s))
228 if (*s == ':') /* for csh's that have to exec sh scripts */
233 case ' ': case '\t': case '\f':
238 if (preprocess && s == str_get(linestr) &&
239 s[1] == ' ' && isdigit(s[2])) {
241 for (s += 2; isdigit(*s); s++) ;
243 while (s < d && isspace(*s)) s++;
246 s[strlen(s)-1] = '\0'; /* wipe out newline */
249 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
252 filename = savestr(s);
254 filename = savestr(origfilename);
255 oldoldbufptr = oldbufptr = s = str_get(linestr);
257 if (in_eval && !rsfp) {
259 while (s < d && *s != '\n')
272 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
275 case 'r': FTST(O_FTEREAD);
276 case 'w': FTST(O_FTEWRITE);
277 case 'x': FTST(O_FTEEXEC);
278 case 'o': FTST(O_FTEOWNED);
279 case 'R': FTST(O_FTRREAD);
280 case 'W': FTST(O_FTRWRITE);
281 case 'X': FTST(O_FTREXEC);
282 case 'O': FTST(O_FTROWNED);
283 case 'e': FTST(O_FTIS);
284 case 'z': FTST(O_FTZERO);
285 case 's': FTST(O_FTSIZE);
286 case 'f': FTST(O_FTFILE);
287 case 'd': FTST(O_FTDIR);
288 case 'l': FTST(O_FTLINK);
289 case 'p': FTST(O_FTPIPE);
290 case 'S': FTST(O_FTSOCK);
291 case 'u': FTST(O_FTSUID);
292 case 'g': FTST(O_FTSGID);
293 case 'k': FTST(O_FTSVTX);
294 case 'b': FTST(O_FTBLK);
295 case 'c': FTST(O_FTCHR);
296 case 't': FTST(O_FTTTY);
297 case 'T': FTST(O_FTTEXT);
298 case 'B': FTST(O_FTBINARY);
326 s = scanreg(s,bufend,tokenbuf);
327 yylval.stabval = stabent(tokenbuf,TRUE);
338 s = scanreg(s,bufend,tokenbuf);
339 yylval.stabval = stabent(tokenbuf,TRUE);
355 if (isspace(*s) || *s == '#')
356 cmdline = NOLINE; /* invalidate current command line number */
378 while (s < d && isspace(*s))
380 if (isalpha(*s) || *s == '_' || *s == '\'')
381 *(--s) = '\\'; /* force next ident to WORD */
435 while (isascii(*s) && \
436 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
438 while (d[-1] == '\'') \
444 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
446 s = scanreg(s,bufend,tokenbuf);
447 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
451 s = scanreg(s,bufend,tokenbuf);
452 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
460 yylval.stabval = stabent(tokenbuf,TRUE);
465 s = scanreg(s,bufend,tokenbuf);
468 yylval.stabval = stabent(tokenbuf,TRUE);
471 case '/': /* may either be division or pattern */
472 case '?': /* may either be conditional or pattern */
483 if (!expectterm || !isdigit(s[1])) {
492 case '0': case '1': case '2': case '3': case '4':
493 case '5': case '6': case '7': case '8': case '9':
494 case '\'': case '"': case '`':
498 case '\\': /* some magic to force next word to be a WORD */
499 s++; /* used by do and sub to force a separate namespace */
506 if (strEQ(d,"accept"))
508 if (strEQ(d,"atan2"))
520 if (strEQ(d,"continue"))
522 if (strEQ(d,"chdir"))
524 if (strEQ(d,"close"))
526 if (strEQ(d,"closedir"))
528 if (strEQ(d,"crypt")) {
534 if (strEQ(d,"chmod"))
536 if (strEQ(d,"chown"))
538 if (strEQ(d,"connect"))
542 if (strEQ(d,"chroot"))
549 while (s < d && isspace(*s))
551 if (isalpha(*s) || *s == '_')
552 *(--s) = '\\'; /* force next ident to WORD */
557 if (strEQ(d,"defined"))
559 if (strEQ(d,"delete"))
561 if (strEQ(d,"dbmopen"))
563 if (strEQ(d,"dbmclose"))
572 if (strEQ(d,"elsif")) {
576 if (strEQ(d,"eq") || strEQ(d,"EQ"))
580 if (strEQ(d,"eval")) {
581 allstabs = TRUE; /* must initialize everything since */
582 UNI(O_EVAL); /* we don't know what will be used */
590 if (strEQ(d,"exec")) {
594 if (strEQ(d,"endhostent"))
596 if (strEQ(d,"endnetent"))
598 if (strEQ(d,"endservent"))
600 if (strEQ(d,"endprotoent"))
602 if (strEQ(d,"endpwent"))
604 if (strEQ(d,"endgrent"))
611 if (strEQ(d,"foreach"))
613 if (strEQ(d,"format")) {
615 while (s < d && isspace(*s))
617 if (isalpha(*s) || *s == '_')
618 *(--s) = '\\'; /* force next ident to WORD */
620 allstabs = TRUE; /* must initialize everything since */
621 OPERATOR(FORMAT); /* we don't know what will be used */
625 if (strEQ(d,"fcntl"))
627 if (strEQ(d,"fileno"))
629 if (strEQ(d,"flock"))
634 if (strEQ(d,"gt") || strEQ(d,"GT"))
636 if (strEQ(d,"ge") || strEQ(d,"GE"))
642 if (strEQ(d,"gmtime"))
646 if (strnEQ(d,"get",3)) {
653 if (strEQ(d,"priority"))
655 if (strEQ(d,"protobyname"))
657 if (strEQ(d,"protobynumber"))
659 if (strEQ(d,"protoent"))
661 if (strEQ(d,"pwent"))
663 if (strEQ(d,"pwnam"))
665 if (strEQ(d,"pwuid"))
667 if (strEQ(d,"peername"))
670 else if (*d == 'h') {
671 if (strEQ(d,"hostbyname"))
673 if (strEQ(d,"hostbyaddr"))
675 if (strEQ(d,"hostent"))
678 else if (*d == 'n') {
679 if (strEQ(d,"netbyname"))
681 if (strEQ(d,"netbyaddr"))
683 if (strEQ(d,"netent"))
686 else if (*d == 's') {
687 if (strEQ(d,"servbyname"))
689 if (strEQ(d,"servbyport"))
691 if (strEQ(d,"servent"))
693 if (strEQ(d,"sockname"))
695 if (strEQ(d,"sockopt"))
698 else if (*d == 'g') {
699 if (strEQ(d,"grent"))
701 if (strEQ(d,"grnam"))
703 if (strEQ(d,"grgid"))
706 else if (*d == 'l') {
707 if (strEQ(d,"login"))
724 if (strEQ(d,"index"))
728 if (strEQ(d,"ioctl"))
747 if (strEQ(d,"local"))
749 if (strEQ(d,"length"))
751 if (strEQ(d,"lt") || strEQ(d,"LT"))
753 if (strEQ(d,"le") || strEQ(d,"LE"))
755 if (strEQ(d,"localtime"))
761 if (strEQ(d,"listen"))
763 if (strEQ(d,"lstat"))
779 RETURN(1); /* force error */
781 if (strEQ(d,"mkdir"))
788 if (strEQ(d,"ne") || strEQ(d,"NE"))
799 if (strEQ(d,"opendir"))
804 if (strEQ(d,"print")) {
805 checkcomma(s,"filehandle");
808 if (strEQ(d,"printf")) {
809 checkcomma(s,"filehandle");
812 if (strEQ(d,"push")) {
813 yylval.ival = O_PUSH;
820 if (strEQ(d,"package"))
836 if (strEQ(d,"return"))
838 if (strEQ(d,"reset"))
842 if (strEQ(d,"rename"))
846 if (strEQ(d,"rmdir"))
848 if (strEQ(d,"rindex"))
852 if (strEQ(d,"readdir"))
854 if (strEQ(d,"rewinddir"))
858 if (strEQ(d,"reverse"))
860 if (strEQ(d,"readlink"))
876 RETURN(1); /* force error */
885 if (strEQ(d,"select"))
891 if (strEQ(d,"setpgrp"))
893 if (strEQ(d,"setpriority"))
895 if (strEQ(d,"sethostent"))
897 if (strEQ(d,"setnetent"))
899 if (strEQ(d,"setservent"))
901 if (strEQ(d,"setprotoent"))
903 if (strEQ(d,"setpwent"))
905 if (strEQ(d,"setgrent"))
907 if (strEQ(d,"seekdir"))
909 if (strEQ(d,"setsockopt"))
916 if (strEQ(d,"shift"))
918 if (strEQ(d,"shutdown"))
929 if (strEQ(d,"sleep"))
936 if (strEQ(d,"socket"))
938 if (strEQ(d,"socketpair"))
940 if (strEQ(d,"sort")) {
941 checkcomma(s,"subroutine name");
943 while (s < d && isascii(*s) && isspace(*s)) s++;
944 if (*s == ';' || *s == ')') /* probably a close */
945 fatal("sort is now a reserved word");
946 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
947 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
948 strncpy(tokenbuf,s,d-s);
949 if (strNE(tokenbuf,"keys") &&
950 strNE(tokenbuf,"values") &&
951 strNE(tokenbuf,"split") &&
952 strNE(tokenbuf,"grep") &&
953 strNE(tokenbuf,"readdir") &&
954 strNE(tokenbuf,"unpack") &&
955 strNE(tokenbuf,"do") &&
956 (d >= bufend || isspace(*d)) )
957 *(--s) = '\\'; /* force next ident to WORD */
963 if (strEQ(d,"split"))
965 if (strEQ(d,"sprintf"))
973 if (strEQ(d,"srand"))
981 if (strEQ(d,"study")) {
987 if (strEQ(d,"substr"))
989 if (strEQ(d,"sub")) {
992 while (s < d && isspace(*s))
994 if (isalpha(*s) || *s == '_' || *s == '\'') {
996 str_sset(subname,curstname);
997 str_ncat(subname,"'",1);
999 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1003 str_ncat(subname,s,d-s);
1005 *(--s) = '\\'; /* force next ident to WORD */
1008 str_set(subname,"?");
1017 if (strEQ(d,"system")) {
1021 if (strEQ(d,"symlink"))
1023 if (strEQ(d,"syscall"))
1032 if (strEQ(d,"tr")) {
1037 RETURN(1); /* force error */
1039 if (strEQ(d,"tell"))
1041 if (strEQ(d,"telldir"))
1043 if (strEQ(d,"time"))
1045 if (strEQ(d,"times"))
1050 if (strEQ(d,"using"))
1052 if (strEQ(d,"until")) {
1056 if (strEQ(d,"unless")) {
1060 if (strEQ(d,"unlink"))
1062 if (strEQ(d,"undef"))
1064 if (strEQ(d,"unpack"))
1066 if (strEQ(d,"utime"))
1068 if (strEQ(d,"umask"))
1070 if (strEQ(d,"unshift")) {
1071 yylval.ival = O_UNSHIFT;
1077 if (strEQ(d,"values"))
1079 if (strEQ(d,"vec")) {
1086 if (strEQ(d,"while")) {
1090 if (strEQ(d,"warn"))
1092 if (strEQ(d,"wait"))
1094 if (strEQ(d,"wantarray")) {
1095 yylval.arg = op_new(1);
1096 yylval.arg->arg_type = O_ITEM;
1097 yylval.arg[1].arg_type = A_WANTARRAY;
1100 if (strEQ(d,"write"))
1105 if (!expectterm && strEQ(d,"x"))
1125 yylval.cval = savestr(d);
1127 if (oldoldbufptr && oldoldbufptr < bufptr) {
1128 while (isspace(*oldoldbufptr))
1130 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1132 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1135 return (CLINE, bufptr = s, (int)WORD);
1145 while (s < bufend && isascii(*s) && isspace(*s))
1147 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1149 while (isalpha(*s) || isdigit(*s) || *s == '_')
1151 while (s < bufend && isspace(*s))
1154 fatal("No comma allowed after %s", what);
1159 scanreg(s,send,dest)
1161 register char *send;
1175 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1178 while (d > dest+1 && d[-1] == '\'')
1184 if (*d == '{' /* } */ ) {
1187 while (s < send && brackets) {
1188 if (!reparse && (d == dest || (*s && isascii(*s) &&
1189 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1199 if (reparse && reparse == s - 1)
1213 if (*d == '^' && !isspace(*s))
1219 scanconst(string,len)
1223 register STR *retstr;
1228 if (index(string,'|')) {
1231 retstr = Str_new(86,len);
1232 str_nset(retstr,string,len);
1233 t = str_get(retstr);
1235 retstr->str_u.str_useful = 100;
1236 for (d=t; d < e; ) {
1244 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1248 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1252 (void)bcopy(d+1,d,e-d);
1271 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1283 retstr->str_cur = d - t;
1291 register SPAT *spat;
1297 Newz(801,spat,1,SPAT);
1298 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1299 curstash->tbl_spatroot = spat;
1308 spat->spat_flags |= SPAT_ONCE;
1311 fatal("panic: scanpat");
1313 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1315 yyerror("Search pattern not terminated");
1316 yylval.arg = Nullarg;
1320 while (*s == 'i' || *s == 'o') {
1324 spat->spat_flags |= SPAT_FOLD;
1328 spat->spat_flags |= SPAT_KEEP;
1332 for (d=tokenbuf; d < e; d++) {
1333 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1334 (*d == '@' && d[-1] != '\\')) {
1337 spat->spat_runtime = arg = op_new(1);
1338 arg->arg_type = O_ITEM;
1339 arg[1].arg_type = A_DOUBLE;
1340 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1341 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1342 d = scanreg(d,bufend,buf);
1343 (void)stabent(buf,TRUE); /* make sure it's created */
1344 for (; d < e; d++) {
1345 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1346 d = scanreg(d,bufend,buf);
1347 (void)stabent(buf,TRUE);
1349 else if (*d == '@' && d[-1] != '\\') {
1350 d = scanreg(d,bufend,buf);
1351 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1352 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1353 (void)stabent(buf,TRUE);
1356 goto got_pat; /* skip compiling for now */
1359 if (spat->spat_flags & SPAT_FOLD)
1363 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1365 if (*tokenbuf == '^') {
1366 spat->spat_short = scanconst(tokenbuf+1,len-1);
1367 if (spat->spat_short) {
1368 spat->spat_slen = spat->spat_short->str_cur;
1369 if (spat->spat_slen == len - 1)
1370 spat->spat_flags |= SPAT_ALL;
1374 spat->spat_flags |= SPAT_SCANFIRST;
1375 spat->spat_short = scanconst(tokenbuf,len);
1376 if (spat->spat_short) {
1377 spat->spat_slen = spat->spat_short->str_cur;
1378 if (spat->spat_slen == len)
1379 spat->spat_flags |= SPAT_ALL;
1382 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1383 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1384 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1385 spat->spat_flags & SPAT_FOLD,1);
1386 /* Note that this regexp can still be used if someone says
1387 * something like /a/ && s//b/; so we can't delete it.
1391 if (spat->spat_flags & SPAT_FOLD)
1395 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1397 if (spat->spat_short)
1398 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1399 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1400 spat->spat_flags & SPAT_FOLD,1);
1404 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1412 register SPAT *spat;
1417 Newz(802,spat,1,SPAT);
1418 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1419 curstash->tbl_spatroot = spat;
1421 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1423 yyerror("Substitution pattern not terminated");
1424 yylval.arg = Nullarg;
1428 for (d=tokenbuf; d < e; d++) {
1429 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1430 (*d == '@' && d[-1] != '\\')) {
1433 spat->spat_runtime = arg = op_new(1);
1434 arg->arg_type = O_ITEM;
1435 arg[1].arg_type = A_DOUBLE;
1436 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1437 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1438 d = scanreg(d,bufend,buf);
1439 (void)stabent(buf,TRUE); /* make sure it's created */
1441 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1442 d = scanreg(d,bufend,buf);
1443 (void)stabent(buf,TRUE);
1445 else if (*d == '@' && d[-1] != '\\') {
1446 d = scanreg(d,bufend,buf);
1447 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1448 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1449 (void)stabent(buf,TRUE);
1452 goto get_repl; /* skip compiling for now */
1455 if (*tokenbuf == '^') {
1456 spat->spat_short = scanconst(tokenbuf+1,len-1);
1457 if (spat->spat_short)
1458 spat->spat_slen = spat->spat_short->str_cur;
1461 spat->spat_flags |= SPAT_SCANFIRST;
1462 spat->spat_short = scanconst(tokenbuf,len);
1463 if (spat->spat_short)
1464 spat->spat_slen = spat->spat_short->str_cur;
1466 d = nsavestr(tokenbuf,len);
1470 yyerror("Substitution replacement not terminated");
1471 yylval.arg = Nullarg;
1474 spat->spat_repl = yylval.arg;
1475 spat->spat_flags |= SPAT_ONCE;
1476 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1477 spat->spat_flags |= SPAT_CONST;
1478 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1482 spat->spat_flags |= SPAT_CONST;
1483 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1484 e = tmpstr->str_ptr + tmpstr->str_cur;
1485 for (t = tmpstr->str_ptr; t < e; t++) {
1486 if (*t == '$' && t[1] && index("`'&+0123456789",t[1]))
1487 spat->spat_flags &= ~SPAT_CONST;
1490 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1493 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1494 spat->spat_repl[1].arg_type = A_SINGLE;
1495 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1499 spat->spat_flags &= ~SPAT_CONST;
1503 spat->spat_flags &= ~SPAT_ONCE;
1508 spat->spat_flags |= SPAT_FOLD;
1509 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1510 str_free(spat->spat_short); /* anchored opt doesn't do */
1511 spat->spat_short = Nullstr; /* case insensitive match */
1512 spat->spat_slen = 0;
1517 spat->spat_flags |= SPAT_KEEP;
1520 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1521 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1522 if (!spat->spat_runtime) {
1523 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1527 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1532 register SPAT *spat;
1534 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1535 if (spat->spat_short &&
1536 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1538 if (spat->spat_flags & SPAT_SCANFIRST) {
1539 str_free(spat->spat_short);
1540 spat->spat_short = Nullstr;
1543 str_free(spat->spat_regexp->regmust);
1544 spat->spat_regexp->regmust = Nullstr;
1548 if (!spat->spat_short || /* promote the better string */
1549 ((spat->spat_flags & SPAT_SCANFIRST) &&
1550 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1551 str_free(spat->spat_short); /* ok if null */
1552 spat->spat_short = spat->spat_regexp->regmust;
1553 spat->spat_regexp->regmust = Nullstr;
1554 spat->spat_flags |= SPAT_SCANFIRST;
1560 expand_charset(s,len,retlen)
1566 register char *d = t;
1568 register char *send = s + len;
1571 if (s[1] == '-' && s+2 < send) {
1572 for (i = s[0]; i <= s[2]; i++)
1581 return nsavestr(t,d-t);
1589 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1597 Newz(803,tbl,256,char);
1598 arg[2].arg_type = A_NULL;
1599 arg[2].arg_ptr.arg_cval = tbl;
1602 yyerror("Translation pattern not terminated");
1603 yylval.arg = Nullarg;
1606 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1607 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1608 free_arg(yylval.arg);
1611 yyerror("Translation replacement not terminated");
1612 yylval.arg = Nullarg;
1615 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1616 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1617 free_arg(yylval.arg);
1623 for (i = 0, j = 0; i < tlen; i++,j++) {
1626 tbl[t[i] & 0377] = r[j];
1641 register char *send;
1642 register bool makesingle = FALSE;
1643 register STAB *stab;
1644 bool alwaysdollar = FALSE;
1645 bool hereis = FALSE;
1647 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1652 arg->arg_type = O_ITEM;
1655 default: /* a substitution replacement */
1656 arg[1].arg_type = A_DOUBLE;
1657 makesingle = TRUE; /* maybe disable runtime scanning */
1667 arg[1].arg_type = A_SINGLE;
1672 else if (s[1] == '.')
1683 yyerror("Illegal octal digit");
1685 case '0': case '1': case '2': case '3': case '4':
1686 case '5': case '6': case '7':
1690 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1691 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1695 i += (*s++ & 7) + 9;
1700 (void)sprintf(tokenbuf,"%ld",i);
1701 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
1702 #ifdef MICROPORT /* Microport 2.4 hack */
1703 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1705 (void)str_2num(arg[1].arg_ptr.arg_str);
1706 #endif /* Microport 2.4 hack */
1709 case '1': case '2': case '3': case '4': case '5':
1710 case '6': case '7': case '8': case '9': case '.':
1712 arg[1].arg_type = A_SINGLE;
1714 while (isdigit(*s) || *s == '_') {
1720 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1722 while (isdigit(*s) || *s == '_') {
1729 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1731 if (*s == '+' || *s == '-')
1737 arg[1].arg_ptr.arg_str = str_make(tokenbuf, d - tokenbuf);
1738 #ifdef MICROPORT /* Microport 2.4 hack */
1739 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1741 (void)str_2num(arg[1].arg_ptr.arg_str);
1742 #endif /* Microport 2.4 hack */
1750 if (*++s && index("`'\"",*s)) {
1752 s = cpytill(d,s,bufend,term,&len);
1762 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1764 } /* assuming tokenbuf won't clobber */
1769 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1770 herewas = str_make(s,bufend-s);
1772 s--, herewas = str_make(s,d-s);
1773 s += herewas->str_cur;
1781 s = cpytill(d,s,bufend,'>',&len);
1786 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1788 if (d - tokenbuf != len) {
1790 arg[1].arg_type = A_GLOB;
1791 d = nsavestr(d,len);
1792 arg[1].arg_ptr.arg_stab = stab = genstab();
1793 stab_io(stab) = stio_new();
1794 stab_val(stab) = str_make(d,len);
1795 stab_val(stab)->str_u.str_hash = curstash;
1802 (void)strcpy(d,"ARGV");
1804 arg[1].arg_type = A_INDREAD;
1805 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1808 arg[1].arg_type = A_READ;
1809 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1810 yyerror("Can't get both program and data from <STDIN>");
1811 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1812 if (!stab_io(arg[1].arg_ptr.arg_stab))
1813 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1814 if (strEQ(d,"ARGV")) {
1815 (void)aadd(arg[1].arg_ptr.arg_stab);
1816 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1833 arg[1].arg_type = A_SINGLE;
1840 arg[1].arg_type = A_DOUBLE;
1841 makesingle = TRUE; /* maybe disable runtime scanning */
1842 alwaysdollar = TRUE; /* treat $) and $| as variables */
1847 arg[1].arg_type = A_BACKTICK;
1849 alwaysdollar = TRUE; /* treat $) and $| as variables */
1857 multi_open = multi_close = '<';
1860 if (tmps = index("([{< )]}> )]}>",term))
1864 tmpstr = Str_new(87,0);
1869 while (s < bufend &&
1870 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
1876 fatal("EOF in string");
1878 str_nset(tmpstr,d+1,s-d);
1880 str_ncat(herewas,s,bufend-s);
1881 str_replace(linestr,herewas);
1882 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
1883 bufend = linestr->str_ptr + linestr->str_cur;
1888 s = str_append_till(tmpstr,s+1,bufend,term,leave);
1889 while (s >= bufend) { /* multiple line string? */
1891 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
1893 fatal("EOF in string");
1897 STR *str = Str_new(88,0);
1899 str_sset(str,linestr);
1900 astore(lineary,(int)line,str);
1902 bufend = linestr->str_ptr + linestr->str_cur;
1904 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
1907 str_scat(linestr,herewas);
1908 bufend = linestr->str_ptr + linestr->str_cur;
1912 str_scat(tmpstr,linestr);
1916 s = str_append_till(tmpstr,s,bufend,term,leave);
1920 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
1921 tmpstr->str_len = tmpstr->str_cur + 1;
1922 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
1924 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
1925 arg[1].arg_ptr.arg_str = tmpstr;
1929 s = tmpstr->str_ptr;
1930 send = s + tmpstr->str_cur;
1931 while (s < send) { /* see if we can make SINGLE */
1932 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1934 *s = '$'; /* grandfather \digit in subst */
1935 if ((*s == '$' || *s == '@') && s+1 < send &&
1936 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
1937 makesingle = FALSE; /* force interpretation */
1939 else if (*s == '\\' && s+1 < send) {
1944 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1946 if ((*s == '$' && s+1 < send &&
1947 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
1948 (*s == '@' && s+1 < send) ) {
1949 len = scanreg(s,bufend,tokenbuf) - s;
1950 if (*s == '$' || strEQ(tokenbuf,"ARGV")
1951 || strEQ(tokenbuf,"ENV")
1952 || strEQ(tokenbuf,"SIG")
1953 || strEQ(tokenbuf,"INC") )
1954 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
1959 else if (*s == '\\' && s+1 < send) {
1963 if (!makesingle && (!leave || (*s && index(leave,*s))))
1967 case '0': case '1': case '2': case '3':
1968 case '4': case '5': case '6': case '7':
1970 if (s < send && *s && index("01234567",*s)) {
1974 if (s < send && *s && index("01234567",*s)) {
2003 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2004 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2006 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
2008 tmpstr->str_cur = d - tmpstr->str_ptr;
2009 arg[1].arg_ptr.arg_str = tmpstr;
2024 register FCMD *fprev = &froot;
2025 register FCMD *fcmd;
2032 Zero(&froot, 1, FCMD);
2033 while ((s = str_gets(linestr,rsfp, 0)) != Nullch) {
2036 STR *tmpstr = Str_new(89,0);
2038 str_sset(tmpstr,linestr);
2039 astore(lineary,(int)line,tmpstr);
2041 bufend = linestr->str_ptr + linestr->str_cur;
2042 if (strEQ(s,".\n")) {
2044 return froot.f_next;
2048 flinebeg = Nullfcmd;
2051 while (s < bufend) {
2052 Newz(804,fcmd,1,FCMD);
2053 fprev->f_next = fcmd;
2055 for (t=s; t < bufend && *t != '@' && *t != '^'; t++) {
2065 fcmd->f_pre = nsavestr(s, t-s);
2066 fcmd->f_presize = t-s;
2070 fcmd->f_flags |= FC_NOBLANK;
2072 fcmd->f_flags |= FC_REPEAT;
2076 flinebeg = fcmd; /* start values here */
2078 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2081 fcmd->f_type = F_LINES;
2085 fcmd->f_type = F_LEFT;
2090 fcmd->f_type = F_RIGHT;
2095 fcmd->f_type = F_CENTER;
2100 fcmd->f_type = F_LEFT;
2103 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2104 fcmd->f_flags |= FC_MORE;
2112 if ((s = str_gets(linestr, rsfp, 0)) == Nullch)
2116 STR *tmpstr = Str_new(90,0);
2118 str_sset(tmpstr,linestr);
2119 astore(lineary,(int)line,tmpstr);
2121 if (strEQ(s,".\n")) {
2123 yyerror("Missing values line");
2124 return froot.f_next;
2128 bufend = linestr->str_ptr + linestr->str_cur;
2129 str = flinebeg->f_unparsed = Str_new(91,bufend - bufptr);
2130 str->str_u.str_hash = curstash;
2131 str_nset(str,"(",1);
2132 flinebeg->f_line = line;
2133 if (!flinebeg->f_next->f_type || index(linestr->str_ptr, ',')) {
2134 str_scat(str,linestr);
2135 str_ncat(str,",$$);",5);
2138 while (s < bufend && isspace(*s))
2141 while (s < bufend) {
2143 case ' ': case '\t': case '\n': case ';':
2144 str_ncat(str, t, s - t);
2145 str_ncat(str, "," ,1);
2146 while (s < bufend && (isspace(*s) || *s == ';'))
2151 str_ncat(str, t, s - t);
2153 s = scanreg(s,bufend,tokenbuf);
2154 str_ncat(str, t, s - t);
2156 if (s < bufend && *s && index("$'\"",*s))
2157 str_ncat(str, ",", 1);
2159 case '"': case '\'':
2160 str_ncat(str, t, s - t);
2163 while (s < bufend && (*s != *t || s[-1] == '\\'))
2167 str_ncat(str, t, s - t);
2169 if (s < bufend && *s && index("$'\"",*s))
2170 str_ncat(str, ",", 1);
2173 yyerror("Please use commas to separate fields");
2176 str_ncat(str,"$$);",4);
2181 bufptr = str_get(linestr);
2182 yyerror("Format not terminated");
2183 return froot.f_next;
2190 cshlen = strlen(cshname);