1 /* $Header: toke.c,v 3.0.1.5 90/02/28 18:47:06 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.5 90/02/28 18:47:06 lwall
10 * patch9: return grandfathered to never be function call
11 * patch9: non-existent perldb.pl now gives reasonable error message
12 * patch9: perl can now start up other interpreters scripts
13 * patch9: line numbers were bogus during certain portions of foreach evaluation
14 * patch9: null hereis core dumped
16 * Revision 3.0.1.4 89/12/21 20:26:56 lwall
17 * patch7: -d switch incompatible with -p or -n
18 * patch7: " ''$foo'' " didn't parse right
19 * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
21 * Revision 3.0.1.3 89/11/17 15:43:15 lwall
22 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
23 * patch5: } misadjusted expection of subsequent term or operator
24 * patch5: y/abcde// didn't work
26 * Revision 3.0.1.2 89/11/11 05:04:42 lwall
27 * patch2: fixed a CLINE macro conflict
29 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
30 * patch1: disambiguated word after "sort" better
32 * Revision 3.0 89/10/18 15:32:33 lwall
41 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
46 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
48 #define META(c) ((c) | 128)
50 #define RETURN(retval) return (bufptr = s,(int)retval)
51 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
52 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
53 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
54 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
55 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
56 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
57 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
58 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
59 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
60 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
61 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
62 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
63 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
64 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
65 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
66 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
67 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
68 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
69 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
70 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
71 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
72 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
73 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
74 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
76 /* This bit of chicanery makes a unary function followed by
77 * a parenthesis into a function with one argument, highest precedence.
79 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
80 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
82 /* This does similarly for list operators, merely by pretending that the
83 * paren came before the listop rather than after.
85 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
86 (*s = META('('), bufptr = oldbufptr, '(') : \
87 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
88 /* grandfather return to old style */
89 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
95 while (s < bufend && isascii(*s) && isspace(*s))
104 #define UNI(f) return uni(f,s)
105 #define LOP(f) return lop(f,s)
144 #endif /* CRIPPLED_CC */
148 register char *s = bufptr;
151 static bool in_format = FALSE;
152 static bool firstline = TRUE;
153 extern int yychar; /* last token */
155 oldoldbufptr = oldbufptr;
162 fprintf(stderr,"Tokener at %s",s);
164 fprintf(stderr,"Tokener at %s\n",s);
168 if ((*s & 127) == '(')
171 warn("Unrecognized character \\%03o ignored", *s++);
177 goto retry; /* ignore stray nulls */
180 if (minus_n || minus_p || perldb) {
184 "do 'perldb.pl' || die \"Can't find perldb.pl in @INC\"; print $@;");
185 if (minus_n || minus_p) {
186 str_cat(linestr,"line: while (<>) {");
188 str_cat(linestr,"@F=split(' ');");
190 oldoldbufptr = oldbufptr = s = str_get(linestr);
191 bufend = linestr->str_ptr + linestr->str_cur;
196 yylval.formval = load_format();
198 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
199 bufend = linestr->str_ptr + linestr->str_cur;
203 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
205 (void)mypclose(rsfp);
206 else if (rsfp != stdin)
209 if (minus_n || minus_p) {
210 str_set(linestr,minus_p ? "}continue{print;" : "");
211 str_cat(linestr,"}");
212 oldoldbufptr = oldbufptr = s = str_get(linestr);
213 bufend = linestr->str_ptr + linestr->str_cur;
214 minus_n = minus_p = 0;
217 oldoldbufptr = oldbufptr = s = str_get(linestr);
221 oldoldbufptr = oldbufptr = bufptr = s;
223 STR *str = Str_new(85,0);
225 str_sset(str,linestr);
226 astore(lineary,(int)line,str);
234 bufend = linestr->str_ptr + linestr->str_cur;
236 if (*s == '#' && s[1] == '!') {
237 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
245 while (s < bufend && !isspace(*s))
248 while (s < bufend && isspace(*s))
251 Newz(899,newargv,origargc+3,char*);
253 while (s < bufend && !isspace(*s))
256 Copy(origargv+1, newargv+2, origargc+1, char*);
262 fatal("Can't exec %s", cmd);
266 while (s < bufend && isspace(*s))
268 if (*s == ':') /* for csh's that have to exec sh scripts */
273 case ' ': case '\t': case '\f':
278 if (preprocess && s == str_get(linestr) &&
279 s[1] == ' ' && isdigit(s[2])) {
281 for (s += 2; isdigit(*s); s++) ;
283 while (s < d && isspace(*s)) s++;
286 s[strlen(s)-1] = '\0'; /* wipe out newline */
289 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
292 filename = savestr(s);
294 filename = savestr(origfilename);
295 oldoldbufptr = oldbufptr = s = str_get(linestr);
297 if (in_eval && !rsfp) {
299 while (s < d && *s != '\n')
312 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
315 case 'r': FTST(O_FTEREAD);
316 case 'w': FTST(O_FTEWRITE);
317 case 'x': FTST(O_FTEEXEC);
318 case 'o': FTST(O_FTEOWNED);
319 case 'R': FTST(O_FTRREAD);
320 case 'W': FTST(O_FTRWRITE);
321 case 'X': FTST(O_FTREXEC);
322 case 'O': FTST(O_FTROWNED);
323 case 'e': FTST(O_FTIS);
324 case 'z': FTST(O_FTZERO);
325 case 's': FTST(O_FTSIZE);
326 case 'f': FTST(O_FTFILE);
327 case 'd': FTST(O_FTDIR);
328 case 'l': FTST(O_FTLINK);
329 case 'p': FTST(O_FTPIPE);
330 case 'S': FTST(O_FTSOCK);
331 case 'u': FTST(O_FTSUID);
332 case 'g': FTST(O_FTSGID);
333 case 'k': FTST(O_FTSVTX);
334 case 'b': FTST(O_FTBLK);
335 case 'c': FTST(O_FTCHR);
336 case 't': FTST(O_FTTTY);
337 case 'T': FTST(O_FTTEXT);
338 case 'B': FTST(O_FTBINARY);
366 s = scanreg(s,bufend,tokenbuf);
367 yylval.stabval = stabent(tokenbuf,TRUE);
378 s = scanreg(s,bufend,tokenbuf);
379 yylval.stabval = stabent(tokenbuf,TRUE);
395 if (isspace(*s) || *s == '#')
396 cmdline = NOLINE; /* invalidate current command line number */
418 while (s < d && isspace(*s))
420 if (isalpha(*s) || *s == '_' || *s == '\'')
421 *(--s) = '\\'; /* force next ident to WORD */
475 while (isascii(*s) && \
476 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
478 while (d[-1] == '\'') \
484 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
486 s = scanreg(s,bufend,tokenbuf);
487 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
491 s = scanreg(s,bufend,tokenbuf);
492 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
500 yylval.stabval = stabent(tokenbuf,TRUE);
505 s = scanreg(s,bufend,tokenbuf);
508 yylval.stabval = stabent(tokenbuf,TRUE);
511 case '/': /* may either be division or pattern */
512 case '?': /* may either be conditional or pattern */
523 if (!expectterm || !isdigit(s[1])) {
532 case '0': case '1': case '2': case '3': case '4':
533 case '5': case '6': case '7': case '8': case '9':
534 case '\'': case '"': case '`':
538 case '\\': /* some magic to force next word to be a WORD */
539 s++; /* used by do and sub to force a separate namespace */
546 if (strEQ(d,"accept"))
548 if (strEQ(d,"atan2"))
560 if (strEQ(d,"continue"))
562 if (strEQ(d,"chdir")) {
563 (void)stabent("ENV",TRUE); /* may use HOME */
566 if (strEQ(d,"close"))
568 if (strEQ(d,"closedir"))
570 if (strEQ(d,"crypt")) {
576 if (strEQ(d,"chmod"))
578 if (strEQ(d,"chown"))
580 if (strEQ(d,"connect"))
584 if (strEQ(d,"chroot"))
591 while (s < d && isspace(*s))
593 if (isalpha(*s) || *s == '_')
594 *(--s) = '\\'; /* force next ident to WORD */
599 if (strEQ(d,"defined"))
601 if (strEQ(d,"delete"))
603 if (strEQ(d,"dbmopen"))
605 if (strEQ(d,"dbmclose"))
614 if (strEQ(d,"elsif")) {
618 if (strEQ(d,"eq") || strEQ(d,"EQ"))
622 if (strEQ(d,"eval")) {
623 allstabs = TRUE; /* must initialize everything since */
624 UNI(O_EVAL); /* we don't know what will be used */
632 if (strEQ(d,"exec")) {
636 if (strEQ(d,"endhostent"))
638 if (strEQ(d,"endnetent"))
640 if (strEQ(d,"endservent"))
642 if (strEQ(d,"endprotoent"))
644 if (strEQ(d,"endpwent"))
646 if (strEQ(d,"endgrent"))
651 if (strEQ(d,"for") || strEQ(d,"foreach")) {
655 if (strEQ(d,"format")) {
657 while (s < d && isspace(*s))
659 if (isalpha(*s) || *s == '_')
660 *(--s) = '\\'; /* force next ident to WORD */
662 allstabs = TRUE; /* must initialize everything since */
663 OPERATOR(FORMAT); /* we don't know what will be used */
667 if (strEQ(d,"fcntl"))
669 if (strEQ(d,"fileno"))
671 if (strEQ(d,"flock"))
676 if (strEQ(d,"gt") || strEQ(d,"GT"))
678 if (strEQ(d,"ge") || strEQ(d,"GE"))
684 if (strEQ(d,"gmtime"))
688 if (strnEQ(d,"get",3)) {
695 if (strEQ(d,"priority"))
697 if (strEQ(d,"protobyname"))
699 if (strEQ(d,"protobynumber"))
701 if (strEQ(d,"protoent"))
703 if (strEQ(d,"pwent"))
705 if (strEQ(d,"pwnam"))
707 if (strEQ(d,"pwuid"))
709 if (strEQ(d,"peername"))
712 else if (*d == 'h') {
713 if (strEQ(d,"hostbyname"))
715 if (strEQ(d,"hostbyaddr"))
717 if (strEQ(d,"hostent"))
720 else if (*d == 'n') {
721 if (strEQ(d,"netbyname"))
723 if (strEQ(d,"netbyaddr"))
725 if (strEQ(d,"netent"))
728 else if (*d == 's') {
729 if (strEQ(d,"servbyname"))
731 if (strEQ(d,"servbyport"))
733 if (strEQ(d,"servent"))
735 if (strEQ(d,"sockname"))
737 if (strEQ(d,"sockopt"))
740 else if (*d == 'g') {
741 if (strEQ(d,"grent"))
743 if (strEQ(d,"grnam"))
745 if (strEQ(d,"grgid"))
748 else if (*d == 'l') {
749 if (strEQ(d,"login"))
766 if (strEQ(d,"index"))
770 if (strEQ(d,"ioctl"))
789 if (strEQ(d,"local"))
791 if (strEQ(d,"length"))
793 if (strEQ(d,"lt") || strEQ(d,"LT"))
795 if (strEQ(d,"le") || strEQ(d,"LE"))
797 if (strEQ(d,"localtime"))
803 if (strEQ(d,"listen"))
805 if (strEQ(d,"lstat"))
821 RETURN(1); /* force error */
823 if (strEQ(d,"mkdir"))
830 if (strEQ(d,"ne") || strEQ(d,"NE"))
841 if (strEQ(d,"opendir"))
846 if (strEQ(d,"print")) {
847 checkcomma(s,"filehandle");
850 if (strEQ(d,"printf")) {
851 checkcomma(s,"filehandle");
854 if (strEQ(d,"push")) {
855 yylval.ival = O_PUSH;
862 if (strEQ(d,"package"))
880 if (strEQ(d,"return"))
882 if (strEQ(d,"reset"))
886 if (strEQ(d,"rename"))
890 if (strEQ(d,"rmdir"))
892 if (strEQ(d,"rindex"))
896 if (strEQ(d,"readdir"))
898 if (strEQ(d,"rewinddir"))
902 if (strEQ(d,"reverse"))
904 if (strEQ(d,"readlink"))
920 RETURN(1); /* force error */
929 if (strEQ(d,"select"))
935 if (strEQ(d,"setpgrp"))
937 if (strEQ(d,"setpriority"))
939 if (strEQ(d,"sethostent"))
941 if (strEQ(d,"setnetent"))
943 if (strEQ(d,"setservent"))
945 if (strEQ(d,"setprotoent"))
947 if (strEQ(d,"setpwent"))
949 if (strEQ(d,"setgrent"))
951 if (strEQ(d,"seekdir"))
953 if (strEQ(d,"setsockopt"))
960 if (strEQ(d,"shift"))
962 if (strEQ(d,"shutdown"))
973 if (strEQ(d,"sleep"))
980 if (strEQ(d,"socket"))
982 if (strEQ(d,"socketpair"))
984 if (strEQ(d,"sort")) {
985 checkcomma(s,"subroutine name");
987 while (s < d && isascii(*s) && isspace(*s)) s++;
988 if (*s == ';' || *s == ')') /* probably a close */
989 fatal("sort is now a reserved word");
990 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
991 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
992 strncpy(tokenbuf,s,d-s);
993 if (strNE(tokenbuf,"keys") &&
994 strNE(tokenbuf,"values") &&
995 strNE(tokenbuf,"split") &&
996 strNE(tokenbuf,"grep") &&
997 strNE(tokenbuf,"readdir") &&
998 strNE(tokenbuf,"unpack") &&
999 strNE(tokenbuf,"do") &&
1000 (d >= bufend || isspace(*d)) )
1001 *(--s) = '\\'; /* force next ident to WORD */
1007 if (strEQ(d,"split"))
1009 if (strEQ(d,"sprintf"))
1013 if (strEQ(d,"sqrt"))
1017 if (strEQ(d,"srand"))
1023 if (strEQ(d,"stat"))
1025 if (strEQ(d,"study")) {
1031 if (strEQ(d,"substr"))
1033 if (strEQ(d,"sub")) {
1036 while (s < d && isspace(*s))
1038 if (isalpha(*s) || *s == '_' || *s == '\'') {
1040 str_sset(subname,curstname);
1041 str_ncat(subname,"'",1);
1043 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1047 str_ncat(subname,s,d-s);
1049 *(--s) = '\\'; /* force next ident to WORD */
1052 str_set(subname,"?");
1061 if (strEQ(d,"system")) {
1065 if (strEQ(d,"symlink"))
1067 if (strEQ(d,"syscall"))
1076 if (strEQ(d,"tr")) {
1081 RETURN(1); /* force error */
1083 if (strEQ(d,"tell"))
1085 if (strEQ(d,"telldir"))
1087 if (strEQ(d,"time"))
1089 if (strEQ(d,"times"))
1094 if (strEQ(d,"using"))
1096 if (strEQ(d,"until")) {
1100 if (strEQ(d,"unless")) {
1104 if (strEQ(d,"unlink"))
1106 if (strEQ(d,"undef"))
1108 if (strEQ(d,"unpack"))
1110 if (strEQ(d,"utime"))
1112 if (strEQ(d,"umask"))
1114 if (strEQ(d,"unshift")) {
1115 yylval.ival = O_UNSHIFT;
1121 if (strEQ(d,"values"))
1123 if (strEQ(d,"vec")) {
1130 if (strEQ(d,"while")) {
1134 if (strEQ(d,"warn"))
1136 if (strEQ(d,"wait"))
1138 if (strEQ(d,"wantarray")) {
1139 yylval.arg = op_new(1);
1140 yylval.arg->arg_type = O_ITEM;
1141 yylval.arg[1].arg_type = A_WANTARRAY;
1144 if (strEQ(d,"write"))
1149 if (!expectterm && strEQ(d,"x"))
1169 yylval.cval = savestr(d);
1171 if (oldoldbufptr && oldoldbufptr < bufptr) {
1172 while (isspace(*oldoldbufptr))
1174 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1176 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1179 return (CLINE, bufptr = s, (int)WORD);
1189 while (s < bufend && isascii(*s) && isspace(*s))
1191 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1193 while (isalpha(*s) || isdigit(*s) || *s == '_')
1195 while (s < bufend && isspace(*s))
1198 fatal("No comma allowed after %s", what);
1203 scanreg(s,send,dest)
1205 register char *send;
1219 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1222 while (d > dest+1 && d[-1] == '\'')
1228 if (*d == '{' /* } */ ) {
1231 while (s < send && brackets) {
1232 if (!reparse && (d == dest || (*s && isascii(*s) &&
1233 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1243 if (reparse && reparse == s - 1)
1257 if (*d == '^' && !isspace(*s))
1263 scanconst(string,len)
1267 register STR *retstr;
1272 if (index(string,'|')) {
1275 retstr = Str_new(86,len);
1276 str_nset(retstr,string,len);
1277 t = str_get(retstr);
1279 retstr->str_u.str_useful = 100;
1280 for (d=t; d < e; ) {
1288 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1292 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1296 (void)bcopy(d+1,d,e-d);
1315 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1327 retstr->str_cur = d - t;
1335 register SPAT *spat;
1341 Newz(801,spat,1,SPAT);
1342 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1343 curstash->tbl_spatroot = spat;
1352 spat->spat_flags |= SPAT_ONCE;
1355 fatal("panic: scanpat");
1357 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1359 yyerror("Search pattern not terminated");
1360 yylval.arg = Nullarg;
1364 while (*s == 'i' || *s == 'o') {
1368 spat->spat_flags |= SPAT_FOLD;
1372 spat->spat_flags |= SPAT_KEEP;
1376 for (d=tokenbuf; d < e; d++) {
1377 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1378 (*d == '@' && d[-1] != '\\')) {
1381 spat->spat_runtime = arg = op_new(1);
1382 arg->arg_type = O_ITEM;
1383 arg[1].arg_type = A_DOUBLE;
1384 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1385 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1386 d = scanreg(d,bufend,buf);
1387 (void)stabent(buf,TRUE); /* make sure it's created */
1388 for (; d < e; d++) {
1389 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1390 d = scanreg(d,bufend,buf);
1391 (void)stabent(buf,TRUE);
1393 else if (*d == '@' && d[-1] != '\\') {
1394 d = scanreg(d,bufend,buf);
1395 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1396 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1397 (void)stabent(buf,TRUE);
1400 goto got_pat; /* skip compiling for now */
1403 if (spat->spat_flags & SPAT_FOLD)
1407 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1409 if (*tokenbuf == '^') {
1410 spat->spat_short = scanconst(tokenbuf+1,len-1);
1411 if (spat->spat_short) {
1412 spat->spat_slen = spat->spat_short->str_cur;
1413 if (spat->spat_slen == len - 1)
1414 spat->spat_flags |= SPAT_ALL;
1418 spat->spat_flags |= SPAT_SCANFIRST;
1419 spat->spat_short = scanconst(tokenbuf,len);
1420 if (spat->spat_short) {
1421 spat->spat_slen = spat->spat_short->str_cur;
1422 if (spat->spat_slen == len)
1423 spat->spat_flags |= SPAT_ALL;
1426 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1427 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1428 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1429 spat->spat_flags & SPAT_FOLD,1);
1430 /* Note that this regexp can still be used if someone says
1431 * something like /a/ && s//b/; so we can't delete it.
1435 if (spat->spat_flags & SPAT_FOLD)
1439 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1441 if (spat->spat_short)
1442 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1443 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1444 spat->spat_flags & SPAT_FOLD,1);
1448 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1456 register SPAT *spat;
1461 Newz(802,spat,1,SPAT);
1462 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1463 curstash->tbl_spatroot = spat;
1465 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1467 yyerror("Substitution pattern not terminated");
1468 yylval.arg = Nullarg;
1472 for (d=tokenbuf; d < e; d++) {
1473 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1474 (*d == '@' && d[-1] != '\\')) {
1477 spat->spat_runtime = arg = op_new(1);
1478 arg->arg_type = O_ITEM;
1479 arg[1].arg_type = A_DOUBLE;
1480 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1481 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1482 d = scanreg(d,bufend,buf);
1483 (void)stabent(buf,TRUE); /* make sure it's created */
1485 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1486 d = scanreg(d,bufend,buf);
1487 (void)stabent(buf,TRUE);
1489 else if (*d == '@' && d[-1] != '\\') {
1490 d = scanreg(d,bufend,buf);
1491 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1492 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1493 (void)stabent(buf,TRUE);
1496 goto get_repl; /* skip compiling for now */
1499 if (*tokenbuf == '^') {
1500 spat->spat_short = scanconst(tokenbuf+1,len-1);
1501 if (spat->spat_short)
1502 spat->spat_slen = spat->spat_short->str_cur;
1505 spat->spat_flags |= SPAT_SCANFIRST;
1506 spat->spat_short = scanconst(tokenbuf,len);
1507 if (spat->spat_short)
1508 spat->spat_slen = spat->spat_short->str_cur;
1510 d = nsavestr(tokenbuf,len);
1514 yyerror("Substitution replacement not terminated");
1515 yylval.arg = Nullarg;
1518 spat->spat_repl = yylval.arg;
1519 spat->spat_flags |= SPAT_ONCE;
1520 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1521 spat->spat_flags |= SPAT_CONST;
1522 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1526 spat->spat_flags |= SPAT_CONST;
1527 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1528 e = tmpstr->str_ptr + tmpstr->str_cur;
1529 for (t = tmpstr->str_ptr; t < e; t++) {
1530 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1531 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1532 spat->spat_flags &= ~SPAT_CONST;
1535 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1538 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1539 spat->spat_repl[1].arg_type = A_SINGLE;
1540 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1544 spat->spat_flags &= ~SPAT_CONST;
1548 spat->spat_flags &= ~SPAT_ONCE;
1553 spat->spat_flags |= SPAT_FOLD;
1554 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1555 str_free(spat->spat_short); /* anchored opt doesn't do */
1556 spat->spat_short = Nullstr; /* case insensitive match */
1557 spat->spat_slen = 0;
1562 spat->spat_flags |= SPAT_KEEP;
1565 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1566 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1567 if (!spat->spat_runtime) {
1568 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1572 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1577 register SPAT *spat;
1579 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1580 if (spat->spat_short &&
1581 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1583 if (spat->spat_flags & SPAT_SCANFIRST) {
1584 str_free(spat->spat_short);
1585 spat->spat_short = Nullstr;
1588 str_free(spat->spat_regexp->regmust);
1589 spat->spat_regexp->regmust = Nullstr;
1593 if (!spat->spat_short || /* promote the better string */
1594 ((spat->spat_flags & SPAT_SCANFIRST) &&
1595 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1596 str_free(spat->spat_short); /* ok if null */
1597 spat->spat_short = spat->spat_regexp->regmust;
1598 spat->spat_regexp->regmust = Nullstr;
1599 spat->spat_flags |= SPAT_SCANFIRST;
1605 expand_charset(s,len,retlen)
1611 register char *d = t;
1613 register char *send = s + len;
1616 if (s[1] == '-' && s+2 < send) {
1617 for (i = s[0]; i <= s[2]; i++)
1626 return nsavestr(t,d-t);
1634 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1642 Newz(803,tbl,256,char);
1643 arg[2].arg_type = A_NULL;
1644 arg[2].arg_ptr.arg_cval = tbl;
1647 yyerror("Translation pattern not terminated");
1648 yylval.arg = Nullarg;
1651 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1652 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1653 free_arg(yylval.arg);
1656 yyerror("Translation replacement not terminated");
1657 yylval.arg = Nullarg;
1660 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1661 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1662 free_arg(yylval.arg);
1668 for (i = 0, j = 0; i < tlen; i++,j++) {
1671 tbl[t[i] & 0377] = r[j];
1686 register char *send;
1687 register bool makesingle = FALSE;
1688 register STAB *stab;
1689 bool alwaysdollar = FALSE;
1690 bool hereis = FALSE;
1692 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1697 arg->arg_type = O_ITEM;
1700 default: /* a substitution replacement */
1701 arg[1].arg_type = A_DOUBLE;
1702 makesingle = TRUE; /* maybe disable runtime scanning */
1712 arg[1].arg_type = A_SINGLE;
1717 else if (s[1] == '.')
1728 yyerror("Illegal octal digit");
1730 case '0': case '1': case '2': case '3': case '4':
1731 case '5': case '6': case '7':
1735 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1736 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1740 i += (*s++ & 7) + 9;
1745 (void)sprintf(tokenbuf,"%ld",i);
1746 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
1747 #ifdef MICROPORT /* Microport 2.4 hack */
1748 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1750 (void)str_2num(arg[1].arg_ptr.arg_str);
1751 #endif /* Microport 2.4 hack */
1754 case '1': case '2': case '3': case '4': case '5':
1755 case '6': case '7': case '8': case '9': case '.':
1757 arg[1].arg_type = A_SINGLE;
1759 while (isdigit(*s) || *s == '_') {
1765 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1767 while (isdigit(*s) || *s == '_') {
1774 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1776 if (*s == '+' || *s == '-')
1782 arg[1].arg_ptr.arg_str = str_make(tokenbuf, d - tokenbuf);
1783 #ifdef MICROPORT /* Microport 2.4 hack */
1784 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1786 (void)str_2num(arg[1].arg_ptr.arg_str);
1787 #endif /* Microport 2.4 hack */
1795 if (*++s && index("`'\"",*s)) {
1797 s = cpytill(d,s,bufend,term,&len);
1807 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1809 } /* assuming tokenbuf won't clobber */
1814 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1815 herewas = str_make(s,bufend-s);
1817 s--, herewas = str_make(s,d-s);
1818 s += herewas->str_cur;
1826 s = cpytill(d,s,bufend,'>',&len);
1831 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1833 if (d - tokenbuf != len) {
1835 arg[1].arg_type = A_GLOB;
1836 d = nsavestr(d,len);
1837 arg[1].arg_ptr.arg_stab = stab = genstab();
1838 stab_io(stab) = stio_new();
1839 stab_val(stab) = str_make(d,len);
1840 stab_val(stab)->str_u.str_hash = curstash;
1847 (void)strcpy(d,"ARGV");
1849 arg[1].arg_type = A_INDREAD;
1850 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1853 arg[1].arg_type = A_READ;
1854 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1855 yyerror("Can't get both program and data from <STDIN>");
1856 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1857 if (!stab_io(arg[1].arg_ptr.arg_stab))
1858 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1859 if (strEQ(d,"ARGV")) {
1860 (void)aadd(arg[1].arg_ptr.arg_stab);
1861 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1878 arg[1].arg_type = A_SINGLE;
1885 arg[1].arg_type = A_DOUBLE;
1886 makesingle = TRUE; /* maybe disable runtime scanning */
1887 alwaysdollar = TRUE; /* treat $) and $| as variables */
1892 arg[1].arg_type = A_BACKTICK;
1894 alwaysdollar = TRUE; /* treat $) and $| as variables */
1902 multi_open = multi_close = '<';
1905 if (tmps = index("([{< )]}> )]}>",term))
1909 tmpstr = Str_new(87,80);
1914 while (s < bufend &&
1915 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
1921 fatal("EOF in string");
1923 str_nset(tmpstr,d+1,s-d);
1925 str_ncat(herewas,s,bufend-s);
1926 str_replace(linestr,herewas);
1927 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
1928 bufend = linestr->str_ptr + linestr->str_cur;
1933 s = str_append_till(tmpstr,s+1,bufend,term,leave);
1934 while (s >= bufend) { /* multiple line string? */
1936 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
1938 fatal("EOF in string");
1942 STR *str = Str_new(88,0);
1944 str_sset(str,linestr);
1945 astore(lineary,(int)line,str);
1947 bufend = linestr->str_ptr + linestr->str_cur;
1949 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
1952 str_scat(linestr,herewas);
1953 bufend = linestr->str_ptr + linestr->str_cur;
1957 str_scat(tmpstr,linestr);
1961 s = str_append_till(tmpstr,s,bufend,term,leave);
1965 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
1966 tmpstr->str_len = tmpstr->str_cur + 1;
1967 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
1969 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
1970 arg[1].arg_ptr.arg_str = tmpstr;
1974 s = tmpstr->str_ptr;
1975 send = s + tmpstr->str_cur;
1976 while (s < send) { /* see if we can make SINGLE */
1977 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1979 *s = '$'; /* grandfather \digit in subst */
1980 if ((*s == '$' || *s == '@') && s+1 < send &&
1981 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
1982 makesingle = FALSE; /* force interpretation */
1984 else if (*s == '\\' && s+1 < send) {
1989 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1991 if ((*s == '$' && s+1 < send &&
1992 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
1993 (*s == '@' && s+1 < send) ) {
1994 len = scanreg(s,send,tokenbuf) - s;
1995 if (*s == '$' || strEQ(tokenbuf,"ARGV")
1996 || strEQ(tokenbuf,"ENV")
1997 || strEQ(tokenbuf,"SIG")
1998 || strEQ(tokenbuf,"INC") )
1999 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2004 else if (*s == '\\' && s+1 < send) {
2008 if (!makesingle && (!leave || (*s && index(leave,*s))))
2012 case '0': case '1': case '2': case '3':
2013 case '4': case '5': case '6': case '7':
2015 if (s < send && *s && index("01234567",*s)) {
2019 if (s < send && *s && index("01234567",*s)) {
2048 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2049 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2051 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
2053 tmpstr->str_cur = d - tmpstr->str_ptr;
2054 arg[1].arg_ptr.arg_str = tmpstr;
2069 register FCMD *fprev = &froot;
2070 register FCMD *fcmd;
2077 Zero(&froot, 1, FCMD);
2078 while ((s = str_gets(linestr,rsfp, 0)) != Nullch) {
2081 STR *tmpstr = Str_new(89,0);
2083 str_sset(tmpstr,linestr);
2084 astore(lineary,(int)line,tmpstr);
2086 bufend = linestr->str_ptr + linestr->str_cur;
2087 if (strEQ(s,".\n")) {
2089 return froot.f_next;
2093 flinebeg = Nullfcmd;
2096 while (s < bufend) {
2097 Newz(804,fcmd,1,FCMD);
2098 fprev->f_next = fcmd;
2100 for (t=s; t < bufend && *t != '@' && *t != '^'; t++) {
2110 fcmd->f_pre = nsavestr(s, t-s);
2111 fcmd->f_presize = t-s;
2115 fcmd->f_flags |= FC_NOBLANK;
2117 fcmd->f_flags |= FC_REPEAT;
2121 flinebeg = fcmd; /* start values here */
2123 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2126 fcmd->f_type = F_LINES;
2130 fcmd->f_type = F_LEFT;
2135 fcmd->f_type = F_RIGHT;
2140 fcmd->f_type = F_CENTER;
2145 fcmd->f_type = F_LEFT;
2148 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2149 fcmd->f_flags |= FC_MORE;
2157 if ((s = str_gets(linestr, rsfp, 0)) == Nullch)
2161 STR *tmpstr = Str_new(90,0);
2163 str_sset(tmpstr,linestr);
2164 astore(lineary,(int)line,tmpstr);
2166 if (strEQ(s,".\n")) {
2168 yyerror("Missing values line");
2169 return froot.f_next;
2173 bufend = linestr->str_ptr + linestr->str_cur;
2174 str = flinebeg->f_unparsed = Str_new(91,bufend - bufptr);
2175 str->str_u.str_hash = curstash;
2176 str_nset(str,"(",1);
2177 flinebeg->f_line = line;
2178 if (!flinebeg->f_next->f_type || index(linestr->str_ptr, ',')) {
2179 str_scat(str,linestr);
2180 str_ncat(str,",$$);",5);
2183 while (s < bufend && isspace(*s))
2186 while (s < bufend) {
2188 case ' ': case '\t': case '\n': case ';':
2189 str_ncat(str, t, s - t);
2190 str_ncat(str, "," ,1);
2191 while (s < bufend && (isspace(*s) || *s == ';'))
2196 str_ncat(str, t, s - t);
2198 s = scanreg(s,bufend,tokenbuf);
2199 str_ncat(str, t, s - t);
2201 if (s < bufend && *s && index("$'\"",*s))
2202 str_ncat(str, ",", 1);
2204 case '"': case '\'':
2205 str_ncat(str, t, s - t);
2208 while (s < bufend && (*s != *t || s[-1] == '\\'))
2212 str_ncat(str, t, s - t);
2214 if (s < bufend && *s && index("$'\"",*s))
2215 str_ncat(str, ",", 1);
2218 yyerror("Please use commas to separate fields");
2221 str_ncat(str,"$$);",4);
2226 bufptr = str_get(linestr);
2227 yyerror("Format not terminated");
2228 return froot.f_next;
2235 cshlen = strlen(cshname);