1 /* $Header: toke.c,v 3.0.1.3 89/11/17 15:43:15 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.3 89/11/17 15:43:15 lwall
10 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
11 * patch5: } misadjusted expection of subsequent term or operator
12 * patch5: y/abcde// didn't work
14 * Revision 3.0.1.2 89/11/11 05:04:42 lwall
15 * patch2: fixed a CLINE macro conflict
17 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
18 * patch1: disambiguated word after "sort" better
20 * Revision 3.0 89/10/18 15:32:33 lwall
29 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
34 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
36 #define META(c) ((c) | 128)
38 #define RETURN(retval) return (bufptr = s,(int)retval)
39 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
40 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
41 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
42 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
43 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
44 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
45 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
46 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
47 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
48 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
49 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
50 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
51 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
52 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
53 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
54 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
55 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
56 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
57 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
58 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
59 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
60 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
61 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
62 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
64 /* This bit of chicanery makes a unary function followed by
65 * a parenthesis into a function with one argument, highest precedence.
67 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
68 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
70 /* This does similarly for list operators, merely by pretending that the
71 * paren came before the listop rather than after.
73 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
74 (*s = META('('), bufptr = oldbufptr, '(') : \
75 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
81 while (s < bufend && isascii(*s) && isspace(*s))
90 #define UNI(f) return uni(f,s)
91 #define LOP(f) return lop(f,s)
130 #endif /* CRIPPLED_CC */
134 register char *s = bufptr;
137 static bool in_format = FALSE;
138 static bool firstline = TRUE;
139 extern int yychar; /* last token */
141 oldoldbufptr = oldbufptr;
148 fprintf(stderr,"Tokener at %s",s);
150 fprintf(stderr,"Tokener at %s\n",s);
154 if ((*s & 127) == '(')
157 warn("Unrecognized character \\%03o ignored", *s++);
163 goto retry; /* ignore stray nulls */
166 if (minus_n || minus_p || perldb) {
169 str_cat(linestr,"do 'perldb.pl'; print $@;");
170 if (minus_n || minus_p) {
171 str_cat(linestr,"line: while (<>) {");
173 str_cat(linestr,"@F=split(' ');");
175 oldoldbufptr = oldbufptr = s = str_get(linestr);
176 bufend = linestr->str_ptr + linestr->str_cur;
181 yylval.formval = load_format();
183 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
184 bufend = linestr->str_ptr + linestr->str_cur;
188 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
190 (void)mypclose(rsfp);
191 else if (rsfp != stdin)
194 if (minus_n || minus_p) {
195 str_set(linestr,minus_p ? "}continue{print;" : "");
196 str_cat(linestr,"}");
197 oldoldbufptr = oldbufptr = s = str_get(linestr);
198 bufend = linestr->str_ptr + linestr->str_cur;
201 oldoldbufptr = oldbufptr = s = str_get(linestr);
205 oldoldbufptr = oldbufptr = bufptr = s;
207 STR *str = Str_new(85,0);
209 str_sset(str,linestr);
210 astore(lineary,(int)line,str);
218 bufend = linestr->str_ptr + linestr->str_cur;
220 while (s < bufend && isspace(*s))
222 if (*s == ':') /* for csh's that have to exec sh scripts */
227 case ' ': case '\t': case '\f':
232 if (preprocess && s == str_get(linestr) &&
233 s[1] == ' ' && isdigit(s[2])) {
235 for (s += 2; isdigit(*s); s++) ;
237 while (s < d && isspace(*s)) s++;
240 s[strlen(s)-1] = '\0'; /* wipe out newline */
243 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
246 filename = savestr(s);
248 filename = savestr(origfilename);
249 oldoldbufptr = oldbufptr = s = str_get(linestr);
251 if (in_eval && !rsfp) {
253 while (s < d && *s != '\n')
266 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
269 case 'r': FTST(O_FTEREAD);
270 case 'w': FTST(O_FTEWRITE);
271 case 'x': FTST(O_FTEEXEC);
272 case 'o': FTST(O_FTEOWNED);
273 case 'R': FTST(O_FTRREAD);
274 case 'W': FTST(O_FTRWRITE);
275 case 'X': FTST(O_FTREXEC);
276 case 'O': FTST(O_FTROWNED);
277 case 'e': FTST(O_FTIS);
278 case 'z': FTST(O_FTZERO);
279 case 's': FTST(O_FTSIZE);
280 case 'f': FTST(O_FTFILE);
281 case 'd': FTST(O_FTDIR);
282 case 'l': FTST(O_FTLINK);
283 case 'p': FTST(O_FTPIPE);
284 case 'S': FTST(O_FTSOCK);
285 case 'u': FTST(O_FTSUID);
286 case 'g': FTST(O_FTSGID);
287 case 'k': FTST(O_FTSVTX);
288 case 'b': FTST(O_FTBLK);
289 case 'c': FTST(O_FTCHR);
290 case 't': FTST(O_FTTTY);
291 case 'T': FTST(O_FTTEXT);
292 case 'B': FTST(O_FTBINARY);
320 s = scanreg(s,bufend,tokenbuf);
321 yylval.stabval = stabent(tokenbuf,TRUE);
332 s = scanreg(s,bufend,tokenbuf);
333 yylval.stabval = stabent(tokenbuf,TRUE);
349 if (isspace(*s) || *s == '#')
350 cmdline = NOLINE; /* invalidate current command line number */
372 while (s < d && isspace(*s))
374 if (isalpha(*s) || *s == '_' || *s == '\'')
375 *(--s) = '\\'; /* force next ident to WORD */
429 while (isascii(*s) && \
430 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
438 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
440 s = scanreg(s,bufend,tokenbuf);
441 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
445 s = scanreg(s,bufend,tokenbuf);
446 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
454 yylval.stabval = stabent(tokenbuf,TRUE);
459 s = scanreg(s,bufend,tokenbuf);
462 yylval.stabval = stabent(tokenbuf,TRUE);
465 case '/': /* may either be division or pattern */
466 case '?': /* may either be conditional or pattern */
477 if (!expectterm || !isdigit(s[1])) {
486 case '0': case '1': case '2': case '3': case '4':
487 case '5': case '6': case '7': case '8': case '9':
488 case '\'': case '"': case '`':
492 case '\\': /* some magic to force next word to be a WORD */
493 s++; /* used by do and sub to force a separate namespace */
500 if (strEQ(d,"accept"))
502 if (strEQ(d,"atan2"))
514 if (strEQ(d,"continue"))
516 if (strEQ(d,"chdir"))
518 if (strEQ(d,"close"))
520 if (strEQ(d,"closedir"))
522 if (strEQ(d,"crypt")) {
528 if (strEQ(d,"chmod"))
530 if (strEQ(d,"chown"))
532 if (strEQ(d,"connect"))
536 if (strEQ(d,"chroot"))
543 while (s < d && isspace(*s))
545 if (isalpha(*s) || *s == '_')
546 *(--s) = '\\'; /* force next ident to WORD */
551 if (strEQ(d,"defined"))
553 if (strEQ(d,"delete"))
555 if (strEQ(d,"dbmopen"))
557 if (strEQ(d,"dbmclose"))
566 if (strEQ(d,"elsif")) {
570 if (strEQ(d,"eq") || strEQ(d,"EQ"))
574 if (strEQ(d,"eval")) {
575 allstabs = TRUE; /* must initialize everything since */
576 UNI(O_EVAL); /* we don't know what will be used */
584 if (strEQ(d,"exec")) {
588 if (strEQ(d,"endhostent"))
590 if (strEQ(d,"endnetent"))
592 if (strEQ(d,"endservent"))
594 if (strEQ(d,"endprotoent"))
596 if (strEQ(d,"endpwent"))
598 if (strEQ(d,"endgrent"))
605 if (strEQ(d,"foreach"))
607 if (strEQ(d,"format")) {
609 while (s < d && isspace(*s))
611 if (isalpha(*s) || *s == '_')
612 *(--s) = '\\'; /* force next ident to WORD */
614 allstabs = TRUE; /* must initialize everything since */
615 OPERATOR(FORMAT); /* we don't know what will be used */
619 if (strEQ(d,"fcntl"))
621 if (strEQ(d,"fileno"))
623 if (strEQ(d,"flock"))
628 if (strEQ(d,"gt") || strEQ(d,"GT"))
630 if (strEQ(d,"ge") || strEQ(d,"GE"))
636 if (strEQ(d,"gmtime"))
640 if (strnEQ(d,"get",3)) {
647 if (strEQ(d,"priority"))
649 if (strEQ(d,"protobyname"))
651 if (strEQ(d,"protobynumber"))
653 if (strEQ(d,"protoent"))
655 if (strEQ(d,"pwent"))
657 if (strEQ(d,"pwnam"))
659 if (strEQ(d,"pwuid"))
661 if (strEQ(d,"peername"))
664 else if (*d == 'h') {
665 if (strEQ(d,"hostbyname"))
667 if (strEQ(d,"hostbyaddr"))
669 if (strEQ(d,"hostent"))
672 else if (*d == 'n') {
673 if (strEQ(d,"netbyname"))
675 if (strEQ(d,"netbyaddr"))
677 if (strEQ(d,"netent"))
680 else if (*d == 's') {
681 if (strEQ(d,"servbyname"))
683 if (strEQ(d,"servbyport"))
685 if (strEQ(d,"servent"))
687 if (strEQ(d,"sockname"))
689 if (strEQ(d,"sockopt"))
692 else if (*d == 'g') {
693 if (strEQ(d,"grent"))
695 if (strEQ(d,"grnam"))
697 if (strEQ(d,"grgid"))
700 else if (*d == 'l') {
701 if (strEQ(d,"login"))
718 if (strEQ(d,"index"))
722 if (strEQ(d,"ioctl"))
741 if (strEQ(d,"local"))
743 if (strEQ(d,"length"))
745 if (strEQ(d,"lt") || strEQ(d,"LT"))
747 if (strEQ(d,"le") || strEQ(d,"LE"))
749 if (strEQ(d,"localtime"))
755 if (strEQ(d,"listen"))
757 if (strEQ(d,"lstat"))
767 RETURN(1); /* force error */
769 if (strEQ(d,"mkdir"))
776 if (strEQ(d,"ne") || strEQ(d,"NE"))
787 if (strEQ(d,"opendir"))
792 if (strEQ(d,"print")) {
793 checkcomma(s,"filehandle");
796 if (strEQ(d,"printf")) {
797 checkcomma(s,"filehandle");
800 if (strEQ(d,"push")) {
801 yylval.ival = O_PUSH;
808 if (strEQ(d,"package"))
824 if (strEQ(d,"return"))
826 if (strEQ(d,"reset"))
830 if (strEQ(d,"rename"))
834 if (strEQ(d,"rmdir"))
836 if (strEQ(d,"rindex"))
840 if (strEQ(d,"readdir"))
842 if (strEQ(d,"rewinddir"))
846 if (strEQ(d,"reverse"))
848 if (strEQ(d,"readlink"))
858 RETURN(1); /* force error */
867 if (strEQ(d,"select"))
873 if (strEQ(d,"setpgrp"))
875 if (strEQ(d,"setpriority"))
877 if (strEQ(d,"sethostent"))
879 if (strEQ(d,"setnetent"))
881 if (strEQ(d,"setservent"))
883 if (strEQ(d,"setprotoent"))
885 if (strEQ(d,"setpwent"))
887 if (strEQ(d,"setgrent"))
889 if (strEQ(d,"seekdir"))
891 if (strEQ(d,"setsockopt"))
898 if (strEQ(d,"shift"))
900 if (strEQ(d,"shutdown"))
911 if (strEQ(d,"sleep"))
918 if (strEQ(d,"socket"))
920 if (strEQ(d,"socketpair"))
922 if (strEQ(d,"sort")) {
923 checkcomma(s,"subroutine name");
925 while (s < d && isascii(*s) && isspace(*s)) s++;
926 if (*s == ';' || *s == ')') /* probably a close */
927 fatal("sort is now a reserved word");
928 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
929 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
930 strncpy(tokenbuf,s,d-s);
931 if (strNE(tokenbuf,"keys") &&
932 strNE(tokenbuf,"values") &&
933 strNE(tokenbuf,"split") &&
934 strNE(tokenbuf,"grep") &&
935 strNE(tokenbuf,"readdir") &&
936 strNE(tokenbuf,"unpack") &&
937 strNE(tokenbuf,"do") &&
938 (d >= bufend || isspace(*d)) )
939 *(--s) = '\\'; /* force next ident to WORD */
945 if (strEQ(d,"split"))
947 if (strEQ(d,"sprintf"))
955 if (strEQ(d,"srand"))
963 if (strEQ(d,"study")) {
969 if (strEQ(d,"substr"))
971 if (strEQ(d,"sub")) {
974 while (s < d && isspace(*s))
976 if (isalpha(*s) || *s == '_' || *s == '\'') {
978 str_sset(subname,curstname);
979 str_ncat(subname,"'",1);
981 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
985 str_ncat(subname,s,d-s);
987 *(--s) = '\\'; /* force next ident to WORD */
990 str_set(subname,"?");
999 if (strEQ(d,"system")) {
1003 if (strEQ(d,"symlink"))
1005 if (strEQ(d,"syscall"))
1014 if (strEQ(d,"tr")) {
1019 RETURN(1); /* force error */
1021 if (strEQ(d,"tell"))
1023 if (strEQ(d,"telldir"))
1025 if (strEQ(d,"time"))
1027 if (strEQ(d,"times"))
1032 if (strEQ(d,"using"))
1034 if (strEQ(d,"until")) {
1038 if (strEQ(d,"unless")) {
1042 if (strEQ(d,"unlink"))
1044 if (strEQ(d,"undef"))
1046 if (strEQ(d,"unpack"))
1048 if (strEQ(d,"utime"))
1050 if (strEQ(d,"umask"))
1052 if (strEQ(d,"unshift")) {
1053 yylval.ival = O_UNSHIFT;
1059 if (strEQ(d,"values"))
1061 if (strEQ(d,"vec")) {
1068 if (strEQ(d,"while")) {
1072 if (strEQ(d,"warn"))
1074 if (strEQ(d,"wait"))
1076 if (strEQ(d,"wantarray")) {
1077 yylval.arg = op_new(1);
1078 yylval.arg->arg_type = O_ITEM;
1079 yylval.arg[1].arg_type = A_WANTARRAY;
1082 if (strEQ(d,"write"))
1087 if (!expectterm && strEQ(d,"x"))
1101 yylval.cval = savestr(d);
1103 if (oldoldbufptr && oldoldbufptr < bufptr) {
1104 while (isspace(*oldoldbufptr))
1106 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1108 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1111 return (CLINE, bufptr = s, (int)WORD);
1121 while (s < bufend && isascii(*s) && isspace(*s))
1123 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1125 while (isalpha(*s) || isdigit(*s) || *s == '_')
1127 while (s < bufend && isspace(*s))
1130 fatal("No comma allowed after %s", what);
1135 scanreg(s,send,dest)
1137 register char *send;
1151 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1154 if (d > dest+1 && d[-1] == '\'')
1160 if (*d == '{' /* } */ ) {
1163 while (s < send && brackets) {
1164 if (!reparse && (d == dest || (*s && isascii(*s) &&
1165 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1175 if (reparse && reparse == s - 1)
1189 if (*d == '^' && !isspace(*s))
1195 scanconst(string,len)
1199 register STR *retstr;
1204 if (index(string,'|')) {
1207 retstr = Str_new(86,len);
1208 str_nset(retstr,string,len);
1209 t = str_get(retstr);
1211 retstr->str_u.str_useful = 100;
1212 for (d=t; d < e; ) {
1220 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1224 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1228 (void)bcopy(d+1,d,e-d);
1247 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1259 retstr->str_cur = d - t;
1267 register SPAT *spat;
1273 Newz(801,spat,1,SPAT);
1274 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1275 curstash->tbl_spatroot = spat;
1284 spat->spat_flags |= SPAT_ONCE;
1287 fatal("panic: scanpat");
1289 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1291 yyerror("Search pattern not terminated");
1292 yylval.arg = Nullarg;
1296 while (*s == 'i' || *s == 'o') {
1300 spat->spat_flags |= SPAT_FOLD;
1304 spat->spat_flags |= SPAT_KEEP;
1308 for (d=tokenbuf; d < e; d++) {
1309 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1310 (*d == '@' && d[-1] != '\\')) {
1313 spat->spat_runtime = arg = op_new(1);
1314 arg->arg_type = O_ITEM;
1315 arg[1].arg_type = A_DOUBLE;
1316 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1317 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1318 d = scanreg(d,bufend,buf);
1319 (void)stabent(buf,TRUE); /* make sure it's created */
1320 for (; d < e; d++) {
1321 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1322 d = scanreg(d,bufend,buf);
1323 (void)stabent(buf,TRUE);
1325 else if (*d == '@' && d[-1] != '\\') {
1326 d = scanreg(d,bufend,buf);
1327 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1328 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1329 (void)stabent(buf,TRUE);
1332 goto got_pat; /* skip compiling for now */
1335 if (spat->spat_flags & SPAT_FOLD)
1339 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1341 if (*tokenbuf == '^') {
1342 spat->spat_short = scanconst(tokenbuf+1,len-1);
1343 if (spat->spat_short) {
1344 spat->spat_slen = spat->spat_short->str_cur;
1345 if (spat->spat_slen == len - 1)
1346 spat->spat_flags |= SPAT_ALL;
1350 spat->spat_flags |= SPAT_SCANFIRST;
1351 spat->spat_short = scanconst(tokenbuf,len);
1352 if (spat->spat_short) {
1353 spat->spat_slen = spat->spat_short->str_cur;
1354 if (spat->spat_slen == len)
1355 spat->spat_flags |= SPAT_ALL;
1358 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1359 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1360 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1361 spat->spat_flags & SPAT_FOLD,1);
1362 /* Note that this regexp can still be used if someone says
1363 * something like /a/ && s//b/; so we can't delete it.
1367 if (spat->spat_flags & SPAT_FOLD)
1371 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1373 if (spat->spat_short)
1374 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1375 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1376 spat->spat_flags & SPAT_FOLD,1);
1380 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1388 register SPAT *spat;
1393 Newz(802,spat,1,SPAT);
1394 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1395 curstash->tbl_spatroot = spat;
1397 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1399 yyerror("Substitution pattern not terminated");
1400 yylval.arg = Nullarg;
1404 for (d=tokenbuf; d < e; d++) {
1405 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1406 (*d == '@' && d[-1] != '\\')) {
1409 spat->spat_runtime = arg = op_new(1);
1410 arg->arg_type = O_ITEM;
1411 arg[1].arg_type = A_DOUBLE;
1412 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1413 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1414 d = scanreg(d,bufend,buf);
1415 (void)stabent(buf,TRUE); /* make sure it's created */
1417 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1418 d = scanreg(d,bufend,buf);
1419 (void)stabent(buf,TRUE);
1421 else if (*d == '@' && d[-1] != '\\') {
1422 d = scanreg(d,bufend,buf);
1423 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1424 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1425 (void)stabent(buf,TRUE);
1428 goto get_repl; /* skip compiling for now */
1431 if (*tokenbuf == '^') {
1432 spat->spat_short = scanconst(tokenbuf+1,len-1);
1433 if (spat->spat_short)
1434 spat->spat_slen = spat->spat_short->str_cur;
1437 spat->spat_flags |= SPAT_SCANFIRST;
1438 spat->spat_short = scanconst(tokenbuf,len);
1439 if (spat->spat_short)
1440 spat->spat_slen = spat->spat_short->str_cur;
1442 d = nsavestr(tokenbuf,len);
1446 yyerror("Substitution replacement not terminated");
1447 yylval.arg = Nullarg;
1450 spat->spat_repl = yylval.arg;
1451 spat->spat_flags |= SPAT_ONCE;
1452 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1453 spat->spat_flags |= SPAT_CONST;
1454 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1458 spat->spat_flags |= SPAT_CONST;
1459 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1460 e = tmpstr->str_ptr + tmpstr->str_cur;
1461 for (t = tmpstr->str_ptr; t < e; t++) {
1462 if (*t == '$' && t[1] && index("`'&+0123456789",t[1]))
1463 spat->spat_flags &= ~SPAT_CONST;
1466 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1469 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1470 spat->spat_repl[1].arg_type = A_SINGLE;
1471 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1475 spat->spat_flags &= ~SPAT_CONST;
1479 spat->spat_flags &= ~SPAT_ONCE;
1484 spat->spat_flags |= SPAT_FOLD;
1485 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1486 str_free(spat->spat_short); /* anchored opt doesn't do */
1487 spat->spat_short = Nullstr; /* case insensitive match */
1488 spat->spat_slen = 0;
1493 spat->spat_flags |= SPAT_KEEP;
1496 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1497 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1498 if (!spat->spat_runtime) {
1499 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1503 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1508 register SPAT *spat;
1510 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1511 if (spat->spat_short &&
1512 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1514 if (spat->spat_flags & SPAT_SCANFIRST) {
1515 str_free(spat->spat_short);
1516 spat->spat_short = Nullstr;
1519 str_free(spat->spat_regexp->regmust);
1520 spat->spat_regexp->regmust = Nullstr;
1524 if (!spat->spat_short || /* promote the better string */
1525 ((spat->spat_flags & SPAT_SCANFIRST) &&
1526 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1527 str_free(spat->spat_short); /* ok if null */
1528 spat->spat_short = spat->spat_regexp->regmust;
1529 spat->spat_regexp->regmust = Nullstr;
1530 spat->spat_flags |= SPAT_SCANFIRST;
1536 expand_charset(s,len,retlen)
1542 register char *d = t;
1544 register char *send = s + len;
1547 if (s[1] == '-' && s+2 < send) {
1548 for (i = s[0]; i <= s[2]; i++)
1557 return nsavestr(t,d-t);
1565 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1573 Newz(803,tbl,256,char);
1574 arg[2].arg_type = A_NULL;
1575 arg[2].arg_ptr.arg_cval = tbl;
1578 yyerror("Translation pattern not terminated");
1579 yylval.arg = Nullarg;
1582 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1583 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1584 free_arg(yylval.arg);
1587 yyerror("Translation replacement not terminated");
1588 yylval.arg = Nullarg;
1591 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1592 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1593 free_arg(yylval.arg);
1599 for (i = 0, j = 0; i < tlen; i++,j++) {
1602 tbl[t[i] & 0377] = r[j];
1617 register char *send;
1618 register bool makesingle = FALSE;
1619 register STAB *stab;
1620 bool alwaysdollar = FALSE;
1621 bool hereis = FALSE;
1623 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1628 arg->arg_type = O_ITEM;
1631 default: /* a substitution replacement */
1632 arg[1].arg_type = A_DOUBLE;
1633 makesingle = TRUE; /* maybe disable runtime scanning */
1643 arg[1].arg_type = A_SINGLE;
1648 else if (s[1] == '.')
1659 yyerror("Illegal octal digit");
1661 case '0': case '1': case '2': case '3': case '4':
1662 case '5': case '6': case '7':
1666 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1667 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1671 i += (*s++ & 7) + 9;
1676 (void)sprintf(tokenbuf,"%ld",i);
1677 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
1678 (void)str_2num(arg[1].arg_ptr.arg_str);
1681 case '1': case '2': case '3': case '4': case '5':
1682 case '6': case '7': case '8': case '9': case '.':
1684 arg[1].arg_type = A_SINGLE;
1686 while (isdigit(*s) || *s == '_') {
1692 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1694 while (isdigit(*s) || *s == '_') {
1701 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1703 if (*s == '+' || *s == '-')
1709 arg[1].arg_ptr.arg_str = str_make(tokenbuf, d - tokenbuf);
1710 (void)str_2num(arg[1].arg_ptr.arg_str);
1718 if (*++s && index("`'\"",*s)) {
1720 s = cpytill(d,s,bufend,term,&len);
1730 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1732 } /* assuming tokenbuf won't clobber */
1737 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1738 herewas = str_make(s,bufend-s);
1740 s--, herewas = str_make(s,d-s);
1741 s += herewas->str_cur;
1749 s = cpytill(d,s,bufend,'>',&len);
1754 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1756 if (d - tokenbuf != len) {
1758 arg[1].arg_type = A_GLOB;
1759 d = nsavestr(d,len);
1760 arg[1].arg_ptr.arg_stab = stab = genstab();
1761 stab_io(stab) = stio_new();
1762 stab_val(stab) = str_make(d,len);
1763 stab_val(stab)->str_u.str_hash = curstash;
1770 (void)strcpy(d,"ARGV");
1772 arg[1].arg_type = A_INDREAD;
1773 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1776 arg[1].arg_type = A_READ;
1777 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1778 yyerror("Can't get both program and data from <STDIN>");
1779 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1780 if (!stab_io(arg[1].arg_ptr.arg_stab))
1781 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1782 if (strEQ(d,"ARGV")) {
1783 (void)aadd(arg[1].arg_ptr.arg_stab);
1784 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1801 arg[1].arg_type = A_SINGLE;
1808 arg[1].arg_type = A_DOUBLE;
1809 makesingle = TRUE; /* maybe disable runtime scanning */
1810 alwaysdollar = TRUE; /* treat $) and $| as variables */
1815 arg[1].arg_type = A_BACKTICK;
1817 alwaysdollar = TRUE; /* treat $) and $| as variables */
1825 multi_open = multi_close = '<';
1828 if (tmps = index("([{< )]}> )]}>",term))
1832 tmpstr = Str_new(87,0);
1837 while (s < bufend &&
1838 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
1844 fatal("EOF in string");
1846 str_nset(tmpstr,d+1,s-d);
1848 str_ncat(herewas,s,bufend-s);
1849 str_replace(linestr,herewas);
1850 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
1851 bufend = linestr->str_ptr + linestr->str_cur;
1856 s = str_append_till(tmpstr,s+1,bufend,term,leave);
1857 while (s >= bufend) { /* multiple line string? */
1859 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
1861 fatal("EOF in string");
1865 STR *str = Str_new(88,0);
1867 str_sset(str,linestr);
1868 astore(lineary,(int)line,str);
1870 bufend = linestr->str_ptr + linestr->str_cur;
1872 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
1875 str_scat(linestr,herewas);
1876 bufend = linestr->str_ptr + linestr->str_cur;
1880 str_scat(tmpstr,linestr);
1884 s = str_append_till(tmpstr,s,bufend,term,leave);
1888 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
1889 tmpstr->str_len = tmpstr->str_cur + 1;
1890 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
1892 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
1893 arg[1].arg_ptr.arg_str = tmpstr;
1897 s = tmpstr->str_ptr;
1898 send = s + tmpstr->str_cur;
1899 while (s < send) { /* see if we can make SINGLE */
1900 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1902 *s = '$'; /* grandfather \digit in subst */
1903 if ((*s == '$' || *s == '@') && s+1 < send &&
1904 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
1905 makesingle = FALSE; /* force interpretation */
1907 else if (*s == '\\' && s+1 < send) {
1912 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1914 if ((*s == '$' && s+1 < send &&
1915 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
1916 (*s == '@' && s+1 < send) ) {
1917 len = scanreg(s,bufend,tokenbuf) - s;
1918 if (*s == '$' || strEQ(tokenbuf,"ARGV")
1919 || strEQ(tokenbuf,"ENV")
1920 || strEQ(tokenbuf,"SIG")
1921 || strEQ(tokenbuf,"INC") )
1922 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
1927 else if (*s == '\\' && s+1 < send) {
1931 if (!makesingle && (!leave || (*s && index(leave,*s))))
1935 case '0': case '1': case '2': case '3':
1936 case '4': case '5': case '6': case '7':
1938 if (s < send && *s && index("01234567",*s)) {
1942 if (s < send && *s && index("01234567",*s)) {
1971 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
1972 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
1974 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
1976 tmpstr->str_cur = d - tmpstr->str_ptr;
1977 arg[1].arg_ptr.arg_str = tmpstr;
1992 register FCMD *fprev = &froot;
1993 register FCMD *fcmd;
2000 Zero(&froot, 1, FCMD);
2001 while ((s = str_gets(linestr,rsfp, 0)) != Nullch) {
2004 STR *tmpstr = Str_new(89,0);
2006 str_sset(tmpstr,linestr);
2007 astore(lineary,(int)line,tmpstr);
2009 bufend = linestr->str_ptr + linestr->str_cur;
2010 if (strEQ(s,".\n")) {
2012 return froot.f_next;
2016 flinebeg = Nullfcmd;
2019 while (s < bufend) {
2020 Newz(804,fcmd,1,FCMD);
2021 fprev->f_next = fcmd;
2023 for (t=s; t < bufend && *t != '@' && *t != '^'; t++) {
2033 fcmd->f_pre = nsavestr(s, t-s);
2034 fcmd->f_presize = t-s;
2038 fcmd->f_flags |= FC_NOBLANK;
2040 fcmd->f_flags |= FC_REPEAT;
2044 flinebeg = fcmd; /* start values here */
2046 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2049 fcmd->f_type = F_LINES;
2053 fcmd->f_type = F_LEFT;
2058 fcmd->f_type = F_RIGHT;
2063 fcmd->f_type = F_CENTER;
2068 fcmd->f_type = F_LEFT;
2071 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2072 fcmd->f_flags |= FC_MORE;
2080 if ((s = str_gets(linestr, rsfp, 0)) == Nullch)
2084 STR *tmpstr = Str_new(90,0);
2086 str_sset(tmpstr,linestr);
2087 astore(lineary,(int)line,tmpstr);
2089 if (strEQ(s,".\n")) {
2091 yyerror("Missing values line");
2092 return froot.f_next;
2096 bufend = linestr->str_ptr + linestr->str_cur;
2097 str = flinebeg->f_unparsed = Str_new(91,bufend - bufptr);
2098 str->str_u.str_hash = curstash;
2099 str_nset(str,"(",1);
2100 flinebeg->f_line = line;
2101 if (!flinebeg->f_next->f_type || index(linestr->str_ptr, ',')) {
2102 str_scat(str,linestr);
2103 str_ncat(str,",$$);",5);
2106 while (s < bufend && isspace(*s))
2109 while (s < bufend) {
2111 case ' ': case '\t': case '\n': case ';':
2112 str_ncat(str, t, s - t);
2113 str_ncat(str, "," ,1);
2114 while (s < bufend && (isspace(*s) || *s == ';'))
2119 str_ncat(str, t, s - t);
2121 s = scanreg(s,bufend,tokenbuf);
2122 str_ncat(str, t, s - t);
2124 if (s < bufend && *s && index("$'\"",*s))
2125 str_ncat(str, ",", 1);
2127 case '"': case '\'':
2128 str_ncat(str, t, s - t);
2131 while (s < bufend && (*s != *t || s[-1] == '\\'))
2135 str_ncat(str, t, s - t);
2137 if (s < bufend && *s && index("$'\"",*s))
2138 str_ncat(str, ",", 1);
2141 yyerror("Please use commas to separate fields");
2144 str_ncat(str,"$$);",4);
2149 bufptr = str_get(linestr);
2150 yyerror("Format not terminated");
2151 return froot.f_next;
2158 cshlen = strlen(cshname);