1 /* $Header: toke.c,v 3.0.1.7 90/03/27 16:32:37 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.7 90/03/27 16:32:37 lwall
10 * patch16: MSDOS support
11 * patch16: formats didn't work inside eval
12 * patch16: final semicolon in program wasn't optional with -p or -n
14 * Revision 3.0.1.6 90/03/12 17:06:36 lwall
15 * patch13: last semicolon of program is now optional, just for Randal
16 * patch13: added splice operator: @oldelems = splice(@array,$offset,$len,LIST)
18 * Revision 3.0.1.5 90/02/28 18:47:06 lwall
19 * patch9: return grandfathered to never be function call
20 * patch9: non-existent perldb.pl now gives reasonable error message
21 * patch9: perl can now start up other interpreters scripts
22 * patch9: line numbers were bogus during certain portions of foreach evaluation
23 * patch9: null hereis core dumped
25 * Revision 3.0.1.4 89/12/21 20:26:56 lwall
26 * patch7: -d switch incompatible with -p or -n
27 * patch7: " ''$foo'' " didn't parse right
28 * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
30 * Revision 3.0.1.3 89/11/17 15:43:15 lwall
31 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
32 * patch5: } misadjusted expection of subsequent term or operator
33 * patch5: y/abcde// didn't work
35 * Revision 3.0.1.2 89/11/11 05:04:42 lwall
36 * patch2: fixed a CLINE macro conflict
38 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
39 * patch1: disambiguated word after "sort" better
41 * Revision 3.0 89/10/18 15:32:33 lwall
50 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
55 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
57 #define META(c) ((c) | 128)
59 #define RETURN(retval) return (bufptr = s,(int)retval)
60 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
61 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
62 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
63 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
64 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
65 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
66 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
67 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
68 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
69 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
70 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
71 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
72 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
73 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
74 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
75 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
76 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
77 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
78 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
79 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
80 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
81 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
82 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
83 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
85 /* This bit of chicanery makes a unary function followed by
86 * a parenthesis into a function with one argument, highest precedence.
88 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
89 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
91 /* This does similarly for list operators, merely by pretending that the
92 * paren came before the listop rather than after.
94 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
95 (*s = META('('), bufptr = oldbufptr, '(') : \
96 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
97 /* grandfather return to old style */
98 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
104 while (s < bufend && isascii(*s) && isspace(*s))
113 #define UNI(f) return uni(f,s)
114 #define LOP(f) return lop(f,s)
153 #endif /* CRIPPLED_CC */
157 register char *s = bufptr;
160 static bool in_format = FALSE;
161 static bool firstline = TRUE;
162 extern int yychar; /* last token */
164 oldoldbufptr = oldbufptr;
171 fprintf(stderr,"Tokener at %s",s);
173 fprintf(stderr,"Tokener at %s\n",s);
177 if ((*s & 127) == '(')
180 warn("Unrecognized character \\%03o ignored", *s++);
186 goto retry; /* ignore stray nulls */
189 if (minus_n || minus_p || perldb) {
193 "do 'perldb.pl' || die \"Can't find perldb.pl in @INC\"; print $@;");
194 if (minus_n || minus_p) {
195 str_cat(linestr,"line: while (<>) {");
197 str_cat(linestr,"@F=split(' ');");
199 oldoldbufptr = oldbufptr = s = str_get(linestr);
200 bufend = linestr->str_ptr + linestr->str_cur;
206 yylval.formval = load_format();
208 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
209 bufend = linestr->str_ptr + linestr->str_cur;
213 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
215 (void)mypclose(rsfp);
216 else if (rsfp != stdin)
219 if (minus_n || minus_p) {
220 str_set(linestr,minus_p ? ";}continue{print" : "");
221 str_cat(linestr,";}");
222 oldoldbufptr = oldbufptr = s = str_get(linestr);
223 bufend = linestr->str_ptr + linestr->str_cur;
224 minus_n = minus_p = 0;
227 oldoldbufptr = oldbufptr = s = str_get(linestr);
229 RETURN(';'); /* not infinite loop because rsfp is NULL now */
231 oldoldbufptr = oldbufptr = bufptr = s;
233 STR *str = Str_new(85,0);
235 str_sset(str,linestr);
236 astore(lineary,(int)line,str);
244 bufend = linestr->str_ptr + linestr->str_cur;
246 if (*s == '#' && s[1] == '!') {
247 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
255 while (s < bufend && !isspace(*s))
258 while (s < bufend && isspace(*s))
261 Newz(899,newargv,origargc+3,char*);
263 while (s < bufend && !isspace(*s))
266 Copy(origargv+1, newargv+2, origargc+1, char*);
272 fatal("Can't exec %s", cmd);
276 while (s < bufend && isspace(*s))
278 if (*s == ':') /* for csh's that have to exec sh scripts */
283 case ' ': case '\t': case '\f':
288 if (preprocess && s == str_get(linestr) &&
289 s[1] == ' ' && isdigit(s[2])) {
291 for (s += 2; isdigit(*s); s++) ;
293 while (s < d && isspace(*s)) s++;
296 s[strlen(s)-1] = '\0'; /* wipe out newline */
299 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
302 filename = savestr(s);
304 filename = savestr(origfilename);
305 oldoldbufptr = oldbufptr = s = str_get(linestr);
307 if (in_eval && !rsfp) {
309 while (s < d && *s != '\n')
315 yylval.formval = load_format();
317 oldoldbufptr = oldbufptr = s = bufptr + 1;
328 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
331 case 'r': FTST(O_FTEREAD);
332 case 'w': FTST(O_FTEWRITE);
333 case 'x': FTST(O_FTEEXEC);
334 case 'o': FTST(O_FTEOWNED);
335 case 'R': FTST(O_FTRREAD);
336 case 'W': FTST(O_FTRWRITE);
337 case 'X': FTST(O_FTREXEC);
338 case 'O': FTST(O_FTROWNED);
339 case 'e': FTST(O_FTIS);
340 case 'z': FTST(O_FTZERO);
341 case 's': FTST(O_FTSIZE);
342 case 'f': FTST(O_FTFILE);
343 case 'd': FTST(O_FTDIR);
344 case 'l': FTST(O_FTLINK);
345 case 'p': FTST(O_FTPIPE);
346 case 'S': FTST(O_FTSOCK);
347 case 'u': FTST(O_FTSUID);
348 case 'g': FTST(O_FTSGID);
349 case 'k': FTST(O_FTSVTX);
350 case 'b': FTST(O_FTBLK);
351 case 'c': FTST(O_FTCHR);
352 case 't': FTST(O_FTTTY);
353 case 'T': FTST(O_FTTEXT);
354 case 'B': FTST(O_FTBINARY);
382 s = scanreg(s,bufend,tokenbuf);
383 yylval.stabval = stabent(tokenbuf,TRUE);
394 s = scanreg(s,bufend,tokenbuf);
395 yylval.stabval = stabent(tokenbuf,TRUE);
411 if (isspace(*s) || *s == '#')
412 cmdline = NOLINE; /* invalidate current command line number */
434 while (s < d && isspace(*s))
436 if (isalpha(*s) || *s == '_' || *s == '\'')
437 *(--s) = '\\'; /* force next ident to WORD */
491 while (isascii(*s) && \
492 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
494 while (d[-1] == '\'') \
500 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
502 s = scanreg(s,bufend,tokenbuf);
503 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
507 s = scanreg(s,bufend,tokenbuf);
508 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
516 yylval.stabval = stabent(tokenbuf,TRUE);
521 s = scanreg(s,bufend,tokenbuf);
524 yylval.stabval = stabent(tokenbuf,TRUE);
527 case '/': /* may either be division or pattern */
528 case '?': /* may either be conditional or pattern */
539 if (!expectterm || !isdigit(s[1])) {
548 case '0': case '1': case '2': case '3': case '4':
549 case '5': case '6': case '7': case '8': case '9':
550 case '\'': case '"': case '`':
554 case '\\': /* some magic to force next word to be a WORD */
555 s++; /* used by do and sub to force a separate namespace */
562 if (strEQ(d,"accept"))
564 if (strEQ(d,"atan2"))
571 if (strEQ(d,"binmode"))
578 if (strEQ(d,"continue"))
580 if (strEQ(d,"chdir")) {
581 (void)stabent("ENV",TRUE); /* may use HOME */
584 if (strEQ(d,"close"))
586 if (strEQ(d,"closedir"))
588 if (strEQ(d,"crypt")) {
594 if (strEQ(d,"chmod"))
596 if (strEQ(d,"chown"))
598 if (strEQ(d,"connect"))
602 if (strEQ(d,"chroot"))
609 while (s < d && isspace(*s))
611 if (isalpha(*s) || *s == '_')
612 *(--s) = '\\'; /* force next ident to WORD */
617 if (strEQ(d,"defined"))
619 if (strEQ(d,"delete"))
621 if (strEQ(d,"dbmopen"))
623 if (strEQ(d,"dbmclose"))
632 if (strEQ(d,"elsif")) {
636 if (strEQ(d,"eq") || strEQ(d,"EQ"))
640 if (strEQ(d,"eval")) {
641 allstabs = TRUE; /* must initialize everything since */
642 UNI(O_EVAL); /* we don't know what will be used */
650 if (strEQ(d,"exec")) {
654 if (strEQ(d,"endhostent"))
656 if (strEQ(d,"endnetent"))
658 if (strEQ(d,"endservent"))
660 if (strEQ(d,"endprotoent"))
662 if (strEQ(d,"endpwent"))
664 if (strEQ(d,"endgrent"))
669 if (strEQ(d,"for") || strEQ(d,"foreach")) {
673 if (strEQ(d,"format")) {
675 while (s < d && isspace(*s))
677 if (isalpha(*s) || *s == '_')
678 *(--s) = '\\'; /* force next ident to WORD */
680 allstabs = TRUE; /* must initialize everything since */
681 OPERATOR(FORMAT); /* we don't know what will be used */
685 if (strEQ(d,"fcntl"))
687 if (strEQ(d,"fileno"))
689 if (strEQ(d,"flock"))
694 if (strEQ(d,"gt") || strEQ(d,"GT"))
696 if (strEQ(d,"ge") || strEQ(d,"GE"))
702 if (strEQ(d,"gmtime"))
706 if (strnEQ(d,"get",3)) {
713 if (strEQ(d,"priority"))
715 if (strEQ(d,"protobyname"))
717 if (strEQ(d,"protobynumber"))
719 if (strEQ(d,"protoent"))
721 if (strEQ(d,"pwent"))
723 if (strEQ(d,"pwnam"))
725 if (strEQ(d,"pwuid"))
727 if (strEQ(d,"peername"))
730 else if (*d == 'h') {
731 if (strEQ(d,"hostbyname"))
733 if (strEQ(d,"hostbyaddr"))
735 if (strEQ(d,"hostent"))
738 else if (*d == 'n') {
739 if (strEQ(d,"netbyname"))
741 if (strEQ(d,"netbyaddr"))
743 if (strEQ(d,"netent"))
746 else if (*d == 's') {
747 if (strEQ(d,"servbyname"))
749 if (strEQ(d,"servbyport"))
751 if (strEQ(d,"servent"))
753 if (strEQ(d,"sockname"))
755 if (strEQ(d,"sockopt"))
758 else if (*d == 'g') {
759 if (strEQ(d,"grent"))
761 if (strEQ(d,"grnam"))
763 if (strEQ(d,"grgid"))
766 else if (*d == 'l') {
767 if (strEQ(d,"login"))
784 if (strEQ(d,"index"))
788 if (strEQ(d,"ioctl"))
807 if (strEQ(d,"local"))
809 if (strEQ(d,"length"))
811 if (strEQ(d,"lt") || strEQ(d,"LT"))
813 if (strEQ(d,"le") || strEQ(d,"LE"))
815 if (strEQ(d,"localtime"))
821 if (strEQ(d,"listen"))
823 if (strEQ(d,"lstat"))
839 RETURN(1); /* force error */
841 if (strEQ(d,"mkdir"))
848 if (strEQ(d,"ne") || strEQ(d,"NE"))
859 if (strEQ(d,"opendir"))
864 if (strEQ(d,"print")) {
865 checkcomma(s,"filehandle");
868 if (strEQ(d,"printf")) {
869 checkcomma(s,"filehandle");
872 if (strEQ(d,"push")) {
873 yylval.ival = O_PUSH;
880 if (strEQ(d,"package"))
898 if (strEQ(d,"return"))
900 if (strEQ(d,"reset"))
904 if (strEQ(d,"rename"))
908 if (strEQ(d,"rmdir"))
910 if (strEQ(d,"rindex"))
914 if (strEQ(d,"readdir"))
916 if (strEQ(d,"rewinddir"))
920 if (strEQ(d,"reverse"))
922 if (strEQ(d,"readlink"))
938 RETURN(1); /* force error */
947 if (strEQ(d,"select"))
953 if (strEQ(d,"setpgrp"))
955 if (strEQ(d,"setpriority"))
957 if (strEQ(d,"sethostent"))
959 if (strEQ(d,"setnetent"))
961 if (strEQ(d,"setservent"))
963 if (strEQ(d,"setprotoent"))
965 if (strEQ(d,"setpwent"))
967 if (strEQ(d,"setgrent"))
969 if (strEQ(d,"seekdir"))
971 if (strEQ(d,"setsockopt"))
978 if (strEQ(d,"shift"))
980 if (strEQ(d,"shutdown"))
991 if (strEQ(d,"sleep"))
998 if (strEQ(d,"socket"))
1000 if (strEQ(d,"socketpair"))
1001 FOP25(O_SOCKETPAIR);
1002 if (strEQ(d,"sort")) {
1003 checkcomma(s,"subroutine name");
1005 while (s < d && isascii(*s) && isspace(*s)) s++;
1006 if (*s == ';' || *s == ')') /* probably a close */
1007 fatal("sort is now a reserved word");
1008 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1009 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
1010 strncpy(tokenbuf,s,d-s);
1011 if (strNE(tokenbuf,"keys") &&
1012 strNE(tokenbuf,"values") &&
1013 strNE(tokenbuf,"split") &&
1014 strNE(tokenbuf,"grep") &&
1015 strNE(tokenbuf,"readdir") &&
1016 strNE(tokenbuf,"unpack") &&
1017 strNE(tokenbuf,"do") &&
1018 (d >= bufend || isspace(*d)) )
1019 *(--s) = '\\'; /* force next ident to WORD */
1025 if (strEQ(d,"split"))
1027 if (strEQ(d,"sprintf"))
1029 if (strEQ(d,"splice")) {
1030 yylval.ival = O_SPLICE;
1035 if (strEQ(d,"sqrt"))
1039 if (strEQ(d,"srand"))
1045 if (strEQ(d,"stat"))
1047 if (strEQ(d,"study")) {
1053 if (strEQ(d,"substr"))
1055 if (strEQ(d,"sub")) {
1058 while (s < d && isspace(*s))
1060 if (isalpha(*s) || *s == '_' || *s == '\'') {
1062 str_sset(subname,curstname);
1063 str_ncat(subname,"'",1);
1065 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1069 str_ncat(subname,s,d-s);
1071 *(--s) = '\\'; /* force next ident to WORD */
1074 str_set(subname,"?");
1083 if (strEQ(d,"system")) {
1087 if (strEQ(d,"symlink"))
1089 if (strEQ(d,"syscall"))
1098 if (strEQ(d,"tr")) {
1103 RETURN(1); /* force error */
1105 if (strEQ(d,"tell"))
1107 if (strEQ(d,"telldir"))
1109 if (strEQ(d,"time"))
1111 if (strEQ(d,"times"))
1116 if (strEQ(d,"using"))
1118 if (strEQ(d,"until")) {
1122 if (strEQ(d,"unless")) {
1126 if (strEQ(d,"unlink"))
1128 if (strEQ(d,"undef"))
1130 if (strEQ(d,"unpack"))
1132 if (strEQ(d,"utime"))
1134 if (strEQ(d,"umask"))
1136 if (strEQ(d,"unshift")) {
1137 yylval.ival = O_UNSHIFT;
1143 if (strEQ(d,"values"))
1145 if (strEQ(d,"vec")) {
1152 if (strEQ(d,"while")) {
1156 if (strEQ(d,"warn"))
1158 if (strEQ(d,"wait"))
1160 if (strEQ(d,"wantarray")) {
1161 yylval.arg = op_new(1);
1162 yylval.arg->arg_type = O_ITEM;
1163 yylval.arg[1].arg_type = A_WANTARRAY;
1166 if (strEQ(d,"write"))
1171 if (!expectterm && strEQ(d,"x"))
1191 yylval.cval = savestr(d);
1193 if (oldoldbufptr && oldoldbufptr < bufptr) {
1194 while (isspace(*oldoldbufptr))
1196 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1198 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1201 return (CLINE, bufptr = s, (int)WORD);
1211 while (s < bufend && isascii(*s) && isspace(*s))
1213 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1215 while (isalpha(*s) || isdigit(*s) || *s == '_')
1217 while (s < bufend && isspace(*s))
1220 fatal("No comma allowed after %s", what);
1225 scanreg(s,send,dest)
1227 register char *send;
1241 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1244 while (d > dest+1 && d[-1] == '\'')
1250 if (*d == '{' /* } */ ) {
1253 while (s < send && brackets) {
1254 if (!reparse && (d == dest || (*s && isascii(*s) &&
1255 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1265 if (reparse && reparse == s - 1)
1279 if (*d == '^' && !isspace(*s))
1285 scanconst(string,len)
1289 register STR *retstr;
1294 if (index(string,'|')) {
1297 retstr = Str_new(86,len);
1298 str_nset(retstr,string,len);
1299 t = str_get(retstr);
1301 retstr->str_u.str_useful = 100;
1302 for (d=t; d < e; ) {
1310 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1314 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1318 (void)bcopy(d+1,d,e-d);
1337 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1349 retstr->str_cur = d - t;
1357 register SPAT *spat;
1363 Newz(801,spat,1,SPAT);
1364 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1365 curstash->tbl_spatroot = spat;
1374 spat->spat_flags |= SPAT_ONCE;
1377 fatal("panic: scanpat");
1379 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1381 yyerror("Search pattern not terminated");
1382 yylval.arg = Nullarg;
1386 while (*s == 'i' || *s == 'o') {
1390 spat->spat_flags |= SPAT_FOLD;
1394 spat->spat_flags |= SPAT_KEEP;
1398 for (d=tokenbuf; d < e; d++) {
1399 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1400 (*d == '@' && d[-1] != '\\')) {
1403 spat->spat_runtime = arg = op_new(1);
1404 arg->arg_type = O_ITEM;
1405 arg[1].arg_type = A_DOUBLE;
1406 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1407 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1408 d = scanreg(d,bufend,buf);
1409 (void)stabent(buf,TRUE); /* make sure it's created */
1410 for (; d < e; d++) {
1411 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1412 d = scanreg(d,bufend,buf);
1413 (void)stabent(buf,TRUE);
1415 else if (*d == '@' && d[-1] != '\\') {
1416 d = scanreg(d,bufend,buf);
1417 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1418 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1419 (void)stabent(buf,TRUE);
1422 goto got_pat; /* skip compiling for now */
1425 if (spat->spat_flags & SPAT_FOLD)
1429 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1431 if (*tokenbuf == '^') {
1432 spat->spat_short = scanconst(tokenbuf+1,len-1);
1433 if (spat->spat_short) {
1434 spat->spat_slen = spat->spat_short->str_cur;
1435 if (spat->spat_slen == len - 1)
1436 spat->spat_flags |= SPAT_ALL;
1440 spat->spat_flags |= SPAT_SCANFIRST;
1441 spat->spat_short = scanconst(tokenbuf,len);
1442 if (spat->spat_short) {
1443 spat->spat_slen = spat->spat_short->str_cur;
1444 if (spat->spat_slen == len)
1445 spat->spat_flags |= SPAT_ALL;
1448 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1449 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1450 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1451 spat->spat_flags & SPAT_FOLD,1);
1452 /* Note that this regexp can still be used if someone says
1453 * something like /a/ && s//b/; so we can't delete it.
1457 if (spat->spat_flags & SPAT_FOLD)
1461 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1463 if (spat->spat_short)
1464 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1465 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1466 spat->spat_flags & SPAT_FOLD,1);
1470 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1478 register SPAT *spat;
1483 Newz(802,spat,1,SPAT);
1484 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1485 curstash->tbl_spatroot = spat;
1487 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1489 yyerror("Substitution pattern not terminated");
1490 yylval.arg = Nullarg;
1494 for (d=tokenbuf; d < e; d++) {
1495 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1496 (*d == '@' && d[-1] != '\\')) {
1499 spat->spat_runtime = arg = op_new(1);
1500 arg->arg_type = O_ITEM;
1501 arg[1].arg_type = A_DOUBLE;
1502 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1503 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1504 d = scanreg(d,bufend,buf);
1505 (void)stabent(buf,TRUE); /* make sure it's created */
1507 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1508 d = scanreg(d,bufend,buf);
1509 (void)stabent(buf,TRUE);
1511 else if (*d == '@' && d[-1] != '\\') {
1512 d = scanreg(d,bufend,buf);
1513 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1514 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1515 (void)stabent(buf,TRUE);
1518 goto get_repl; /* skip compiling for now */
1521 if (*tokenbuf == '^') {
1522 spat->spat_short = scanconst(tokenbuf+1,len-1);
1523 if (spat->spat_short)
1524 spat->spat_slen = spat->spat_short->str_cur;
1527 spat->spat_flags |= SPAT_SCANFIRST;
1528 spat->spat_short = scanconst(tokenbuf,len);
1529 if (spat->spat_short)
1530 spat->spat_slen = spat->spat_short->str_cur;
1532 d = nsavestr(tokenbuf,len);
1536 yyerror("Substitution replacement not terminated");
1537 yylval.arg = Nullarg;
1540 spat->spat_repl = yylval.arg;
1541 spat->spat_flags |= SPAT_ONCE;
1542 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1543 spat->spat_flags |= SPAT_CONST;
1544 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1548 spat->spat_flags |= SPAT_CONST;
1549 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1550 e = tmpstr->str_ptr + tmpstr->str_cur;
1551 for (t = tmpstr->str_ptr; t < e; t++) {
1552 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1553 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1554 spat->spat_flags &= ~SPAT_CONST;
1557 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1560 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1561 spat->spat_repl[1].arg_type = A_SINGLE;
1562 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1566 spat->spat_flags &= ~SPAT_CONST;
1570 spat->spat_flags &= ~SPAT_ONCE;
1575 spat->spat_flags |= SPAT_FOLD;
1576 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1577 str_free(spat->spat_short); /* anchored opt doesn't do */
1578 spat->spat_short = Nullstr; /* case insensitive match */
1579 spat->spat_slen = 0;
1584 spat->spat_flags |= SPAT_KEEP;
1587 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1588 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1589 if (!spat->spat_runtime) {
1590 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1594 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1599 register SPAT *spat;
1601 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1602 if (spat->spat_short &&
1603 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1605 if (spat->spat_flags & SPAT_SCANFIRST) {
1606 str_free(spat->spat_short);
1607 spat->spat_short = Nullstr;
1610 str_free(spat->spat_regexp->regmust);
1611 spat->spat_regexp->regmust = Nullstr;
1615 if (!spat->spat_short || /* promote the better string */
1616 ((spat->spat_flags & SPAT_SCANFIRST) &&
1617 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1618 str_free(spat->spat_short); /* ok if null */
1619 spat->spat_short = spat->spat_regexp->regmust;
1620 spat->spat_regexp->regmust = Nullstr;
1621 spat->spat_flags |= SPAT_SCANFIRST;
1627 expand_charset(s,len,retlen)
1633 register char *d = t;
1635 register char *send = s + len;
1638 if (s[1] == '-' && s+2 < send) {
1639 for (i = s[0]; i <= s[2]; i++)
1648 return nsavestr(t,d-t);
1656 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1664 Newz(803,tbl,256,char);
1665 arg[2].arg_type = A_NULL;
1666 arg[2].arg_ptr.arg_cval = tbl;
1669 yyerror("Translation pattern not terminated");
1670 yylval.arg = Nullarg;
1673 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1674 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1675 free_arg(yylval.arg);
1678 yyerror("Translation replacement not terminated");
1679 yylval.arg = Nullarg;
1682 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1683 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1684 free_arg(yylval.arg);
1690 for (i = 0, j = 0; i < tlen; i++,j++) {
1693 tbl[t[i] & 0377] = r[j];
1708 register char *send;
1709 register bool makesingle = FALSE;
1710 register STAB *stab;
1711 bool alwaysdollar = FALSE;
1712 bool hereis = FALSE;
1714 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1719 arg->arg_type = O_ITEM;
1722 default: /* a substitution replacement */
1723 arg[1].arg_type = A_DOUBLE;
1724 makesingle = TRUE; /* maybe disable runtime scanning */
1734 arg[1].arg_type = A_SINGLE;
1739 else if (s[1] == '.')
1750 yyerror("Illegal octal digit");
1752 case '0': case '1': case '2': case '3': case '4':
1753 case '5': case '6': case '7':
1757 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1758 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1762 i += (*s++ & 7) + 9;
1767 (void)sprintf(tokenbuf,"%ld",i);
1768 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
1769 #ifdef MICROPORT /* Microport 2.4 hack */
1770 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1772 (void)str_2num(arg[1].arg_ptr.arg_str);
1773 #endif /* Microport 2.4 hack */
1776 case '1': case '2': case '3': case '4': case '5':
1777 case '6': case '7': case '8': case '9': case '.':
1779 arg[1].arg_type = A_SINGLE;
1781 while (isdigit(*s) || *s == '_') {
1787 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1789 while (isdigit(*s) || *s == '_') {
1796 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1798 if (*s == '+' || *s == '-')
1804 arg[1].arg_ptr.arg_str = str_make(tokenbuf, d - tokenbuf);
1805 #ifdef MICROPORT /* Microport 2.4 hack */
1806 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1808 (void)str_2num(arg[1].arg_ptr.arg_str);
1809 #endif /* Microport 2.4 hack */
1817 if (*++s && index("`'\"",*s)) {
1819 s = cpytill(d,s,bufend,term,&len);
1829 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1831 } /* assuming tokenbuf won't clobber */
1836 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1837 herewas = str_make(s,bufend-s);
1839 s--, herewas = str_make(s,d-s);
1840 s += herewas->str_cur;
1848 s = cpytill(d,s,bufend,'>',&len);
1853 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1855 if (d - tokenbuf != len) {
1857 arg[1].arg_type = A_GLOB;
1858 d = nsavestr(d,len);
1859 arg[1].arg_ptr.arg_stab = stab = genstab();
1860 stab_io(stab) = stio_new();
1861 stab_val(stab) = str_make(d,len);
1862 stab_val(stab)->str_u.str_hash = curstash;
1869 (void)strcpy(d,"ARGV");
1871 arg[1].arg_type = A_INDREAD;
1872 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1875 arg[1].arg_type = A_READ;
1876 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1877 yyerror("Can't get both program and data from <STDIN>");
1878 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1879 if (!stab_io(arg[1].arg_ptr.arg_stab))
1880 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1881 if (strEQ(d,"ARGV")) {
1882 (void)aadd(arg[1].arg_ptr.arg_stab);
1883 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1900 arg[1].arg_type = A_SINGLE;
1907 arg[1].arg_type = A_DOUBLE;
1908 makesingle = TRUE; /* maybe disable runtime scanning */
1909 alwaysdollar = TRUE; /* treat $) and $| as variables */
1914 arg[1].arg_type = A_BACKTICK;
1916 alwaysdollar = TRUE; /* treat $) and $| as variables */
1924 multi_open = multi_close = '<';
1927 if (tmps = index("([{< )]}> )]}>",term))
1931 tmpstr = Str_new(87,80);
1936 while (s < bufend &&
1937 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
1943 fatal("EOF in string");
1945 str_nset(tmpstr,d+1,s-d);
1947 str_ncat(herewas,s,bufend-s);
1948 str_replace(linestr,herewas);
1949 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
1950 bufend = linestr->str_ptr + linestr->str_cur;
1955 s = str_append_till(tmpstr,s+1,bufend,term,leave);
1956 while (s >= bufend) { /* multiple line string? */
1958 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
1960 fatal("EOF in string");
1964 STR *str = Str_new(88,0);
1966 str_sset(str,linestr);
1967 astore(lineary,(int)line,str);
1969 bufend = linestr->str_ptr + linestr->str_cur;
1971 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
1974 str_scat(linestr,herewas);
1975 bufend = linestr->str_ptr + linestr->str_cur;
1979 str_scat(tmpstr,linestr);
1983 s = str_append_till(tmpstr,s,bufend,term,leave);
1987 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
1988 tmpstr->str_len = tmpstr->str_cur + 1;
1989 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
1991 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
1992 arg[1].arg_ptr.arg_str = tmpstr;
1996 s = tmpstr->str_ptr;
1997 send = s + tmpstr->str_cur;
1998 while (s < send) { /* see if we can make SINGLE */
1999 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
2001 *s = '$'; /* grandfather \digit in subst */
2002 if ((*s == '$' || *s == '@') && s+1 < send &&
2003 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2004 makesingle = FALSE; /* force interpretation */
2006 else if (*s == '\\' && s+1 < send) {
2011 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2013 if ((*s == '$' && s+1 < send &&
2014 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2015 (*s == '@' && s+1 < send) ) {
2016 len = scanreg(s,send,tokenbuf) - s;
2017 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2018 || strEQ(tokenbuf,"ENV")
2019 || strEQ(tokenbuf,"SIG")
2020 || strEQ(tokenbuf,"INC") )
2021 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2026 else if (*s == '\\' && s+1 < send) {
2030 if (!makesingle && (!leave || (*s && index(leave,*s))))
2034 case '0': case '1': case '2': case '3':
2035 case '4': case '5': case '6': case '7':
2037 if (s < send && *s && index("01234567",*s)) {
2041 if (s < send && *s && index("01234567",*s)) {
2070 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2071 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2073 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
2075 tmpstr->str_cur = d - tmpstr->str_ptr;
2076 arg[1].arg_ptr.arg_str = tmpstr;
2092 register FCMD *fprev = &froot;
2093 register FCMD *fcmd;
2100 Zero(&froot, 1, FCMD);
2102 while (s < bufend || (s = str_gets(linestr,rsfp, 0)) != Nullch) {
2105 STR *tmpstr = Str_new(89,0);
2107 str_sset(tmpstr,linestr);
2108 astore(lineary,(int)line,tmpstr);
2110 if (in_eval && !rsfp) {
2111 eol = index(s,'\n');
2116 eol = bufend = linestr->str_ptr + linestr->str_cur;
2117 if (strnEQ(s,".\n",2)) {
2119 return froot.f_next;
2125 flinebeg = Nullfcmd;
2129 Newz(804,fcmd,1,FCMD);
2130 fprev->f_next = fcmd;
2132 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2142 fcmd->f_pre = nsavestr(s, t-s);
2143 fcmd->f_presize = t-s;
2147 fcmd->f_flags |= FC_NOBLANK;
2149 fcmd->f_flags |= FC_REPEAT;
2153 flinebeg = fcmd; /* start values here */
2155 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2158 fcmd->f_type = F_LINES;
2162 fcmd->f_type = F_LEFT;
2167 fcmd->f_type = F_RIGHT;
2172 fcmd->f_type = F_CENTER;
2177 fcmd->f_type = F_LEFT;
2180 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2181 fcmd->f_flags |= FC_MORE;
2189 if (s >= bufend && (s = str_gets(linestr, rsfp, 0)) == Nullch)
2193 STR *tmpstr = Str_new(90,0);
2195 str_sset(tmpstr,linestr);
2196 astore(lineary,(int)line,tmpstr);
2198 if (in_eval && !rsfp) {
2199 eol = index(s,'\n');
2204 eol = bufend = linestr->str_ptr + linestr->str_cur;
2205 if (strnEQ(s,".\n",2)) {
2207 yyerror("Missing values line");
2208 return froot.f_next;
2214 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2215 str->str_u.str_hash = curstash;
2216 str_nset(str,"(",1);
2217 flinebeg->f_line = line;
2219 if (!flinebeg->f_next->f_type || index(s, ',')) {
2221 str_ncat(str, s, eol - s - 1);
2222 str_ncat(str,",$$);",5);
2227 while (s < eol && isspace(*s))
2232 case ' ': case '\t': case '\n': case ';':
2233 str_ncat(str, t, s - t);
2234 str_ncat(str, "," ,1);
2235 while (s < eol && (isspace(*s) || *s == ';'))
2240 str_ncat(str, t, s - t);
2242 s = scanreg(s,eol,tokenbuf);
2243 str_ncat(str, t, s - t);
2245 if (s < eol && *s && index("$'\"",*s))
2246 str_ncat(str, ",", 1);
2248 case '"': case '\'':
2249 str_ncat(str, t, s - t);
2252 while (s < eol && (*s != *t || s[-1] == '\\'))
2256 str_ncat(str, t, s - t);
2258 if (s < eol && *s && index("$'\"",*s))
2259 str_ncat(str, ",", 1);
2262 yyerror("Please use commas to separate fields");
2265 str_ncat(str,"$$);",4);
2270 bufptr = str_get(linestr);
2271 yyerror("Format not terminated");
2272 return froot.f_next;
2279 cshlen = strlen(cshname);