1 /* $Header: toke.c,v 3.0.1.9 90/08/13 22:37:25 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.9 90/08/13 22:37:25 lwall
10 * patch28: defined(@array) and defined(%array) didn't work right
12 * Revision 3.0.1.8 90/08/09 05:39:58 lwall
13 * patch19: added require operator
14 * patch19: added -x switch to extract script from input trash
15 * patch19: bare @name didn't add array to symbol table
16 * patch19: Added __LINE__ and __FILE__ tokens
17 * patch19: Added __END__ token
18 * patch19: Numeric literals are now stored only in floating point
19 * patch19: some support for FPS compiler misfunction
20 * patch19: "\\$foo" not handled right
21 * patch19: program and data can now both come from STDIN
22 * patch19: "here" strings caused warnings about uninitialized variables
24 * Revision 3.0.1.7 90/03/27 16:32:37 lwall
25 * patch16: MSDOS support
26 * patch16: formats didn't work inside eval
27 * patch16: final semicolon in program wasn't optional with -p or -n
29 * Revision 3.0.1.6 90/03/12 17:06:36 lwall
30 * patch13: last semicolon of program is now optional, just for Randal
31 * patch13: added splice operator: @oldelems = splice(@array,$offset,$len,LIST)
33 * Revision 3.0.1.5 90/02/28 18:47:06 lwall
34 * patch9: return grandfathered to never be function call
35 * patch9: non-existent perldb.pl now gives reasonable error message
36 * patch9: perl can now start up other interpreters scripts
37 * patch9: line numbers were bogus during certain portions of foreach evaluation
38 * patch9: null hereis core dumped
40 * Revision 3.0.1.4 89/12/21 20:26:56 lwall
41 * patch7: -d switch incompatible with -p or -n
42 * patch7: " ''$foo'' " didn't parse right
43 * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
45 * Revision 3.0.1.3 89/11/17 15:43:15 lwall
46 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
47 * patch5: } misadjusted expection of subsequent term or operator
48 * patch5: y/abcde// didn't work
50 * Revision 3.0.1.2 89/11/11 05:04:42 lwall
51 * patch2: fixed a CLINE macro conflict
53 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
54 * patch1: disambiguated word after "sort" better
56 * Revision 3.0 89/10/18 15:32:33 lwall
65 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
70 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
72 #define META(c) ((c) | 128)
74 #define RETURN(retval) return (bufptr = s,(int)retval)
75 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
76 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
77 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
78 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
79 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
80 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
81 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
82 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
83 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
84 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
85 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
86 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
87 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
88 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
89 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
90 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
91 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
92 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
93 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
94 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
95 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
96 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
97 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
98 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
100 /* This bit of chicanery makes a unary function followed by
101 * a parenthesis into a function with one argument, highest precedence.
103 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
104 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
106 /* This does similarly for list operators, merely by pretending that the
107 * paren came before the listop rather than after.
109 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
110 (*s = META('('), bufptr = oldbufptr, '(') : \
111 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
112 /* grandfather return to old style */
113 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
119 while (s < bufend && isascii(*s) && isspace(*s))
128 #define UNI(f) return uni(f,s)
129 #define LOP(f) return lop(f,s)
168 #endif /* CRIPPLED_CC */
172 register char *s = bufptr;
175 static bool in_format = FALSE;
176 static bool firstline = TRUE;
177 extern int yychar; /* last token */
179 oldoldbufptr = oldbufptr;
186 fprintf(stderr,"Tokener at %s",s);
188 fprintf(stderr,"Tokener at %s\n",s);
192 if ((*s & 127) == '(')
195 warn("Unrecognized character \\%03o ignored", *s++);
201 if ((*s & 127) == '(')
204 warn("Unrecognized character \\%03o ignored", *s++);
208 goto fake_eof; /* emulate EOF on ^D or ^Z */
213 goto retry; /* ignore stray nulls */
216 if (minus_n || minus_p || perldb) {
219 str_cat(linestr, "require 'perldb.pl';");
220 if (minus_n || minus_p) {
221 str_cat(linestr,"line: while (<>) {");
223 str_cat(linestr,"@F=split(' ');");
225 oldoldbufptr = oldbufptr = s = str_get(linestr);
226 bufend = linestr->str_ptr + linestr->str_cur;
232 yylval.formval = load_format();
234 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
235 bufend = linestr->str_ptr + linestr->str_cur;
241 #endif /* CRYPTSCRIPT */
243 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
246 (void)mypclose(rsfp);
247 else if (rsfp == stdin)
252 if (minus_n || minus_p) {
253 str_set(linestr,minus_p ? ";}continue{print" : "");
254 str_cat(linestr,";}");
255 oldoldbufptr = oldbufptr = s = str_get(linestr);
256 bufend = linestr->str_ptr + linestr->str_cur;
257 minus_n = minus_p = 0;
260 oldoldbufptr = oldbufptr = s = str_get(linestr);
262 RETURN(';'); /* not infinite loop because rsfp is NULL now */
264 if (doextract && *linestr->str_ptr == '#')
267 oldoldbufptr = oldbufptr = bufptr = s;
269 STR *str = Str_new(85,0);
271 str_sset(str,linestr);
272 astore(lineary,(int)curcmd->c_line,str);
280 bufend = linestr->str_ptr + linestr->str_cur;
281 if (curcmd->c_line == 1) {
282 if (*s == '#' && s[1] == '!') {
283 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
291 while (s < bufend && !isspace(*s))
294 while (s < bufend && isspace(*s))
297 Newz(899,newargv,origargc+3,char*);
299 while (s < bufend && !isspace(*s))
302 Copy(origargv+1, newargv+2, origargc+1, char*);
308 fatal("Can't exec %s", cmd);
312 while (s < bufend && isspace(*s))
314 if (*s == ':') /* for csh's that have to exec sh scripts */
319 case ' ': case '\t': case '\f':
323 if (preprocess && s == str_get(linestr) &&
324 s[1] == ' ' && isdigit(s[2])) {
325 curcmd->c_line = atoi(s+2)-1;
326 for (s += 2; isdigit(*s); s++) ;
328 while (s < d && isspace(*s)) s++;
329 s[strlen(s)-1] = '\0'; /* wipe out newline */
332 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
335 filename = savestr(s);
337 filename = origfilename;
338 oldoldbufptr = oldbufptr = s = str_get(linestr);
342 if (in_eval && !rsfp) {
344 while (s < d && *s != '\n')
350 yylval.formval = load_format();
352 oldoldbufptr = oldbufptr = s = bufptr + 1;
363 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
366 case 'r': FTST(O_FTEREAD);
367 case 'w': FTST(O_FTEWRITE);
368 case 'x': FTST(O_FTEEXEC);
369 case 'o': FTST(O_FTEOWNED);
370 case 'R': FTST(O_FTRREAD);
371 case 'W': FTST(O_FTRWRITE);
372 case 'X': FTST(O_FTREXEC);
373 case 'O': FTST(O_FTROWNED);
374 case 'e': FTST(O_FTIS);
375 case 'z': FTST(O_FTZERO);
376 case 's': FTST(O_FTSIZE);
377 case 'f': FTST(O_FTFILE);
378 case 'd': FTST(O_FTDIR);
379 case 'l': FTST(O_FTLINK);
380 case 'p': FTST(O_FTPIPE);
381 case 'S': FTST(O_FTSOCK);
382 case 'u': FTST(O_FTSUID);
383 case 'g': FTST(O_FTSGID);
384 case 'k': FTST(O_FTSVTX);
385 case 'b': FTST(O_FTBLK);
386 case 'c': FTST(O_FTCHR);
387 case 't': FTST(O_FTTTY);
388 case 'T': FTST(O_FTTEXT);
389 case 'B': FTST(O_FTBINARY);
417 s = scanreg(s,bufend,tokenbuf);
418 yylval.stabval = stabent(tokenbuf,TRUE);
429 s = scanreg(s,bufend,tokenbuf);
430 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
446 if (isspace(*s) || *s == '#')
447 cmdline = NOLINE; /* invalidate current command line number */
450 if (curcmd->c_line < cmdline)
451 cmdline = curcmd->c_line;
469 while (s < d && isspace(*s))
471 if (isalpha(*s) || *s == '_' || *s == '\'')
472 *(--s) = '\\'; /* force next ident to WORD */
526 while (isascii(*s) && \
527 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
529 while (d[-1] == '\'') \
535 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
537 s = scanreg(s,bufend,tokenbuf);
538 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
542 s = scanreg(s,bufend,tokenbuf);
543 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
551 yylval.stabval = stabent(tokenbuf,TRUE);
556 s = scanreg(s,bufend,tokenbuf);
559 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
562 case '/': /* may either be division or pattern */
563 case '?': /* may either be conditional or pattern */
574 if (!expectterm || !isdigit(s[1])) {
583 case '0': case '1': case '2': case '3': case '4':
584 case '5': case '6': case '7': case '8': case '9':
585 case '\'': case '"': case '`':
589 case '\\': /* some magic to force next word to be a WORD */
590 s++; /* used by do and sub to force a separate namespace */
595 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
596 ARG *arg = op_new(1);
599 arg->arg_type = O_ITEM;
601 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
603 strcpy(tokenbuf, filename);
604 arg[1].arg_type = A_SINGLE;
605 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
608 else if (strEQ(d,"__END__"))
614 if (strEQ(d,"accept"))
616 if (strEQ(d,"atan2"))
623 if (strEQ(d,"binmode"))
630 if (strEQ(d,"continue"))
632 if (strEQ(d,"chdir")) {
633 (void)stabent("ENV",TRUE); /* may use HOME */
636 if (strEQ(d,"close"))
638 if (strEQ(d,"closedir"))
640 if (strEQ(d,"crypt")) {
646 if (strEQ(d,"chmod"))
648 if (strEQ(d,"chown"))
650 if (strEQ(d,"connect"))
654 if (strEQ(d,"chroot"))
661 while (s < d && isspace(*s))
663 if (isalpha(*s) || *s == '_')
664 *(--s) = '\\'; /* force next ident to WORD */
669 if (strEQ(d,"defined"))
671 if (strEQ(d,"delete"))
673 if (strEQ(d,"dbmopen"))
675 if (strEQ(d,"dbmclose"))
684 if (strEQ(d,"elsif")) {
685 yylval.ival = curcmd->c_line;
688 if (strEQ(d,"eq") || strEQ(d,"EQ"))
692 if (strEQ(d,"eval")) {
693 allstabs = TRUE; /* must initialize everything since */
694 UNI(O_EVAL); /* we don't know what will be used */
702 if (strEQ(d,"exec")) {
706 if (strEQ(d,"endhostent"))
708 if (strEQ(d,"endnetent"))
710 if (strEQ(d,"endservent"))
712 if (strEQ(d,"endprotoent"))
714 if (strEQ(d,"endpwent"))
716 if (strEQ(d,"endgrent"))
721 if (strEQ(d,"for") || strEQ(d,"foreach")) {
722 yylval.ival = curcmd->c_line;
725 if (strEQ(d,"format")) {
727 while (s < d && isspace(*s))
729 if (isalpha(*s) || *s == '_')
730 *(--s) = '\\'; /* force next ident to WORD */
732 allstabs = TRUE; /* must initialize everything since */
733 OPERATOR(FORMAT); /* we don't know what will be used */
737 if (strEQ(d,"fcntl"))
739 if (strEQ(d,"fileno"))
741 if (strEQ(d,"flock"))
746 if (strEQ(d,"gt") || strEQ(d,"GT"))
748 if (strEQ(d,"ge") || strEQ(d,"GE"))
754 if (strEQ(d,"gmtime"))
758 if (strnEQ(d,"get",3)) {
765 if (strEQ(d,"priority"))
767 if (strEQ(d,"protobyname"))
769 if (strEQ(d,"protobynumber"))
771 if (strEQ(d,"protoent"))
773 if (strEQ(d,"pwent"))
775 if (strEQ(d,"pwnam"))
777 if (strEQ(d,"pwuid"))
779 if (strEQ(d,"peername"))
782 else if (*d == 'h') {
783 if (strEQ(d,"hostbyname"))
785 if (strEQ(d,"hostbyaddr"))
787 if (strEQ(d,"hostent"))
790 else if (*d == 'n') {
791 if (strEQ(d,"netbyname"))
793 if (strEQ(d,"netbyaddr"))
795 if (strEQ(d,"netent"))
798 else if (*d == 's') {
799 if (strEQ(d,"servbyname"))
801 if (strEQ(d,"servbyport"))
803 if (strEQ(d,"servent"))
805 if (strEQ(d,"sockname"))
807 if (strEQ(d,"sockopt"))
810 else if (*d == 'g') {
811 if (strEQ(d,"grent"))
813 if (strEQ(d,"grnam"))
815 if (strEQ(d,"grgid"))
818 else if (*d == 'l') {
819 if (strEQ(d,"login"))
833 yylval.ival = curcmd->c_line;
836 if (strEQ(d,"index"))
840 if (strEQ(d,"ioctl"))
859 if (strEQ(d,"local"))
861 if (strEQ(d,"length"))
863 if (strEQ(d,"lt") || strEQ(d,"LT"))
865 if (strEQ(d,"le") || strEQ(d,"LE"))
867 if (strEQ(d,"localtime"))
873 if (strEQ(d,"listen"))
875 if (strEQ(d,"lstat"))
891 RETURN(1); /* force error */
893 if (strEQ(d,"mkdir"))
900 if (strEQ(d,"ne") || strEQ(d,"NE"))
911 if (strEQ(d,"opendir"))
916 if (strEQ(d,"print")) {
917 checkcomma(s,"filehandle");
920 if (strEQ(d,"printf")) {
921 checkcomma(s,"filehandle");
924 if (strEQ(d,"push")) {
925 yylval.ival = O_PUSH;
932 if (strEQ(d,"package"))
950 if (strEQ(d,"return"))
952 if (strEQ(d,"require")) {
953 allstabs = TRUE; /* must initialize everything since */
954 UNI(O_REQUIRE); /* we don't know what will be used */
956 if (strEQ(d,"reset"))
960 if (strEQ(d,"rename"))
964 if (strEQ(d,"rmdir"))
966 if (strEQ(d,"rindex"))
970 if (strEQ(d,"readdir"))
972 if (strEQ(d,"rewinddir"))
976 if (strEQ(d,"reverse"))
978 if (strEQ(d,"readlink"))
994 RETURN(1); /* force error */
1003 if (strEQ(d,"select"))
1005 if (strEQ(d,"seek"))
1007 if (strEQ(d,"send"))
1009 if (strEQ(d,"setpgrp"))
1011 if (strEQ(d,"setpriority"))
1012 FUN3(O_SETPRIORITY);
1013 if (strEQ(d,"sethostent"))
1015 if (strEQ(d,"setnetent"))
1017 if (strEQ(d,"setservent"))
1019 if (strEQ(d,"setprotoent"))
1021 if (strEQ(d,"setpwent"))
1023 if (strEQ(d,"setgrent"))
1025 if (strEQ(d,"seekdir"))
1027 if (strEQ(d,"setsockopt"))
1034 if (strEQ(d,"shift"))
1036 if (strEQ(d,"shutdown"))
1047 if (strEQ(d,"sleep"))
1054 if (strEQ(d,"socket"))
1056 if (strEQ(d,"socketpair"))
1058 if (strEQ(d,"sort")) {
1059 checkcomma(s,"subroutine name");
1061 while (s < d && isascii(*s) && isspace(*s)) s++;
1062 if (*s == ';' || *s == ')') /* probably a close */
1063 fatal("sort is now a reserved word");
1064 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1065 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
1066 strncpy(tokenbuf,s,d-s);
1067 if (strNE(tokenbuf,"keys") &&
1068 strNE(tokenbuf,"values") &&
1069 strNE(tokenbuf,"split") &&
1070 strNE(tokenbuf,"grep") &&
1071 strNE(tokenbuf,"readdir") &&
1072 strNE(tokenbuf,"unpack") &&
1073 strNE(tokenbuf,"do") &&
1074 (d >= bufend || isspace(*d)) )
1075 *(--s) = '\\'; /* force next ident to WORD */
1081 if (strEQ(d,"split"))
1083 if (strEQ(d,"sprintf"))
1085 if (strEQ(d,"splice")) {
1086 yylval.ival = O_SPLICE;
1091 if (strEQ(d,"sqrt"))
1095 if (strEQ(d,"srand"))
1101 if (strEQ(d,"stat"))
1103 if (strEQ(d,"study")) {
1109 if (strEQ(d,"substr"))
1111 if (strEQ(d,"sub")) {
1112 subline = curcmd->c_line;
1114 while (s < d && isspace(*s))
1116 if (isalpha(*s) || *s == '_' || *s == '\'') {
1118 str_sset(subname,curstname);
1119 str_ncat(subname,"'",1);
1121 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1125 str_ncat(subname,s,d-s);
1127 *(--s) = '\\'; /* force next ident to WORD */
1130 str_set(subname,"?");
1139 if (strEQ(d,"system")) {
1143 if (strEQ(d,"symlink"))
1145 if (strEQ(d,"syscall"))
1154 if (strEQ(d,"tr")) {
1159 RETURN(1); /* force error */
1161 if (strEQ(d,"tell"))
1163 if (strEQ(d,"telldir"))
1165 if (strEQ(d,"time"))
1167 if (strEQ(d,"times"))
1169 if (strEQ(d,"truncate"))
1174 if (strEQ(d,"using"))
1176 if (strEQ(d,"until")) {
1177 yylval.ival = curcmd->c_line;
1180 if (strEQ(d,"unless")) {
1181 yylval.ival = curcmd->c_line;
1184 if (strEQ(d,"unlink"))
1186 if (strEQ(d,"undef"))
1188 if (strEQ(d,"unpack"))
1190 if (strEQ(d,"utime"))
1192 if (strEQ(d,"umask"))
1194 if (strEQ(d,"unshift")) {
1195 yylval.ival = O_UNSHIFT;
1201 if (strEQ(d,"values"))
1203 if (strEQ(d,"vec")) {
1210 if (strEQ(d,"while")) {
1211 yylval.ival = curcmd->c_line;
1214 if (strEQ(d,"warn"))
1216 if (strEQ(d,"wait"))
1218 if (strEQ(d,"wantarray")) {
1219 yylval.arg = op_new(1);
1220 yylval.arg->arg_type = O_ITEM;
1221 yylval.arg[1].arg_type = A_WANTARRAY;
1224 if (strEQ(d,"write"))
1229 if (!expectterm && strEQ(d,"x"))
1249 yylval.cval = savestr(d);
1251 if (oldoldbufptr && oldoldbufptr < bufptr) {
1252 while (isspace(*oldoldbufptr))
1254 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1256 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1259 return (CLINE, bufptr = s, (int)WORD);
1271 while (s < bufend && isascii(*s) && isspace(*s))
1273 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1275 while (isalpha(*s) || isdigit(*s) || *s == '_')
1277 while (s < bufend && isspace(*s))
1282 "tell eof times getlogin wait length shift umask getppid \
1283 cos exp int log rand sin sqrt ord wantarray",
1288 fatal("No comma allowed after %s", what);
1294 scanreg(s,send,dest)
1296 register char *send;
1310 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1313 while (d > dest+1 && d[-1] == '\'')
1319 if (*d == '{' /* } */ ) {
1322 while (s < send && brackets) {
1323 if (!reparse && (d == dest || (*s && isascii(*s) &&
1324 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1334 if (reparse && reparse == s - 1)
1348 if (*d == '^' && !isspace(*s))
1354 scanconst(string,len)
1358 register STR *retstr;
1363 if (index(string,'|')) {
1366 retstr = Str_new(86,len);
1367 str_nset(retstr,string,len);
1368 t = str_get(retstr);
1370 retstr->str_u.str_useful = 100;
1371 for (d=t; d < e; ) {
1379 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1383 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1387 (void)bcopy(d+1,d,e-d);
1406 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1418 retstr->str_cur = d - t;
1426 register SPAT *spat;
1432 Newz(801,spat,1,SPAT);
1433 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1434 curstash->tbl_spatroot = spat;
1443 spat->spat_flags |= SPAT_ONCE;
1446 fatal("panic: scanpat");
1448 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1450 yyerror("Search pattern not terminated");
1451 yylval.arg = Nullarg;
1455 while (*s == 'i' || *s == 'o') {
1459 spat->spat_flags |= SPAT_FOLD;
1463 spat->spat_flags |= SPAT_KEEP;
1467 for (d=tokenbuf; d < e; d++) {
1470 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1474 spat->spat_runtime = arg = op_new(1);
1475 arg->arg_type = O_ITEM;
1476 arg[1].arg_type = A_DOUBLE;
1477 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1478 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1479 d = scanreg(d,bufend,buf);
1480 (void)stabent(buf,TRUE); /* make sure it's created */
1481 for (; d < e; d++) {
1484 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1485 d = scanreg(d,bufend,buf);
1486 (void)stabent(buf,TRUE);
1488 else if (*d == '@') {
1489 d = scanreg(d,bufend,buf);
1490 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1491 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1492 (void)stabent(buf,TRUE);
1495 goto got_pat; /* skip compiling for now */
1498 if (spat->spat_flags & SPAT_FOLD)
1502 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1504 if (*tokenbuf == '^') {
1505 spat->spat_short = scanconst(tokenbuf+1,len-1);
1506 if (spat->spat_short) {
1507 spat->spat_slen = spat->spat_short->str_cur;
1508 if (spat->spat_slen == len - 1)
1509 spat->spat_flags |= SPAT_ALL;
1513 spat->spat_flags |= SPAT_SCANFIRST;
1514 spat->spat_short = scanconst(tokenbuf,len);
1515 if (spat->spat_short) {
1516 spat->spat_slen = spat->spat_short->str_cur;
1517 if (spat->spat_slen == len)
1518 spat->spat_flags |= SPAT_ALL;
1521 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1522 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1523 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1524 spat->spat_flags & SPAT_FOLD);
1525 /* Note that this regexp can still be used if someone says
1526 * something like /a/ && s//b/; so we can't delete it.
1530 if (spat->spat_flags & SPAT_FOLD)
1534 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1536 if (spat->spat_short)
1537 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1538 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1539 spat->spat_flags & SPAT_FOLD,1);
1543 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1551 register SPAT *spat;
1556 Newz(802,spat,1,SPAT);
1557 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1558 curstash->tbl_spatroot = spat;
1560 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1562 yyerror("Substitution pattern not terminated");
1563 yylval.arg = Nullarg;
1567 for (d=tokenbuf; d < e; d++) {
1568 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1569 (*d == '@' && d[-1] != '\\')) {
1572 spat->spat_runtime = arg = op_new(1);
1573 arg->arg_type = O_ITEM;
1574 arg[1].arg_type = A_DOUBLE;
1575 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1576 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1577 d = scanreg(d,bufend,buf);
1578 (void)stabent(buf,TRUE); /* make sure it's created */
1580 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1581 d = scanreg(d,bufend,buf);
1582 (void)stabent(buf,TRUE);
1584 else if (*d == '@' && d[-1] != '\\') {
1585 d = scanreg(d,bufend,buf);
1586 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1587 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1588 (void)stabent(buf,TRUE);
1591 goto get_repl; /* skip compiling for now */
1594 if (*tokenbuf == '^') {
1595 spat->spat_short = scanconst(tokenbuf+1,len-1);
1596 if (spat->spat_short)
1597 spat->spat_slen = spat->spat_short->str_cur;
1600 spat->spat_flags |= SPAT_SCANFIRST;
1601 spat->spat_short = scanconst(tokenbuf,len);
1602 if (spat->spat_short)
1603 spat->spat_slen = spat->spat_short->str_cur;
1605 d = nsavestr(tokenbuf,len);
1609 yyerror("Substitution replacement not terminated");
1610 yylval.arg = Nullarg;
1613 spat->spat_repl = yylval.arg;
1614 spat->spat_flags |= SPAT_ONCE;
1615 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1616 spat->spat_flags |= SPAT_CONST;
1617 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1621 spat->spat_flags |= SPAT_CONST;
1622 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1623 e = tmpstr->str_ptr + tmpstr->str_cur;
1624 for (t = tmpstr->str_ptr; t < e; t++) {
1625 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1626 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1627 spat->spat_flags &= ~SPAT_CONST;
1630 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1633 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1634 spat->spat_repl[1].arg_type = A_SINGLE;
1635 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1639 spat->spat_flags &= ~SPAT_CONST;
1643 spat->spat_flags &= ~SPAT_ONCE;
1648 spat->spat_flags |= SPAT_FOLD;
1649 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1650 str_free(spat->spat_short); /* anchored opt doesn't do */
1651 spat->spat_short = Nullstr; /* case insensitive match */
1652 spat->spat_slen = 0;
1657 spat->spat_flags |= SPAT_KEEP;
1660 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1661 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1662 if (!spat->spat_runtime) {
1663 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1667 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1672 register SPAT *spat;
1674 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1675 if (spat->spat_short &&
1676 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1678 if (spat->spat_flags & SPAT_SCANFIRST) {
1679 str_free(spat->spat_short);
1680 spat->spat_short = Nullstr;
1683 str_free(spat->spat_regexp->regmust);
1684 spat->spat_regexp->regmust = Nullstr;
1688 if (!spat->spat_short || /* promote the better string */
1689 ((spat->spat_flags & SPAT_SCANFIRST) &&
1690 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1691 str_free(spat->spat_short); /* ok if null */
1692 spat->spat_short = spat->spat_regexp->regmust;
1693 spat->spat_regexp->regmust = Nullstr;
1694 spat->spat_flags |= SPAT_SCANFIRST;
1700 expand_charset(s,len,retlen)
1706 register char *d = t;
1708 register char *send = s + len;
1710 while (s < send && d - t <= 256) {
1711 if (s[1] == '-' && s+2 < send) {
1712 for (i = s[0]; i <= s[2]; i++)
1721 return nsavestr(t,d-t);
1729 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1737 Newz(803,tbl,256,char);
1738 arg[2].arg_type = A_NULL;
1739 arg[2].arg_ptr.arg_cval = tbl;
1742 yyerror("Translation pattern not terminated");
1743 yylval.arg = Nullarg;
1746 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1747 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1748 free_arg(yylval.arg);
1751 yyerror("Translation replacement not terminated");
1752 yylval.arg = Nullarg;
1755 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1756 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1757 free_arg(yylval.arg);
1763 for (i = 0, j = 0; i < tlen; i++,j++) {
1766 tbl[t[i] & 0377] = r[j];
1781 register char *send;
1782 register bool makesingle = FALSE;
1783 register STAB *stab;
1784 bool alwaysdollar = FALSE;
1785 bool hereis = FALSE;
1788 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1793 arg->arg_type = O_ITEM;
1796 default: /* a substitution replacement */
1797 arg[1].arg_type = A_DOUBLE;
1798 makesingle = TRUE; /* maybe disable runtime scanning */
1808 arg[1].arg_type = A_SINGLE;
1813 else if (s[1] == '.')
1824 yyerror("Illegal octal digit");
1826 case '0': case '1': case '2': case '3': case '4':
1827 case '5': case '6': case '7':
1831 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1832 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1836 i += (*s++ & 7) + 9;
1841 str = Str_new(92,0);
1842 str_numset(str,(double)i);
1844 Safefree(str->str_ptr);
1845 str->str_ptr = Nullch;
1846 str->str_len = str->str_cur = 0;
1848 arg[1].arg_ptr.arg_str = str;
1851 case '1': case '2': case '3': case '4': case '5':
1852 case '6': case '7': case '8': case '9': case '.':
1854 arg[1].arg_type = A_SINGLE;
1856 while (isdigit(*s) || *s == '_') {
1862 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1864 while (isdigit(*s) || *s == '_') {
1871 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1873 if (*s == '+' || *s == '-')
1879 str = Str_new(92,0);
1880 str_numset(str,atof(tokenbuf));
1882 Safefree(str->str_ptr);
1883 str->str_ptr = Nullch;
1884 str->str_len = str->str_cur = 0;
1886 arg[1].arg_ptr.arg_str = str;
1894 if (*++s && index("`'\"",*s)) {
1896 s = cpytill(d,s,bufend,term,&len);
1906 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1908 } /* assuming tokenbuf won't clobber */
1913 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1914 herewas = str_make(s,bufend-s);
1916 s--, herewas = str_make(s,d-s);
1917 s += herewas->str_cur;
1925 s = cpytill(d,s,bufend,'>',&len);
1930 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1932 if (d - tokenbuf != len) {
1934 arg[1].arg_type = A_GLOB;
1935 d = nsavestr(d,len);
1936 arg[1].arg_ptr.arg_stab = stab = genstab();
1937 stab_io(stab) = stio_new();
1938 stab_val(stab) = str_make(d,len);
1939 stab_val(stab)->str_u.str_hash = curstash;
1946 (void)strcpy(d,"ARGV");
1948 arg[1].arg_type = A_INDREAD;
1949 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1952 arg[1].arg_type = A_READ;
1954 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1955 yyerror("Can't get both program and data from <STDIN>");
1957 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1958 if (!stab_io(arg[1].arg_ptr.arg_stab))
1959 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1960 if (strEQ(d,"ARGV")) {
1961 (void)aadd(arg[1].arg_ptr.arg_stab);
1962 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1979 arg[1].arg_type = A_SINGLE;
1986 arg[1].arg_type = A_DOUBLE;
1987 makesingle = TRUE; /* maybe disable runtime scanning */
1988 alwaysdollar = TRUE; /* treat $) and $| as variables */
1993 arg[1].arg_type = A_BACKTICK;
1995 alwaysdollar = TRUE; /* treat $) and $| as variables */
2001 multi_start = curcmd->c_line;
2003 multi_open = multi_close = '<';
2006 if (tmps = index("([{< )]}> )]}>",term))
2010 tmpstr = Str_new(87,80);
2015 while (s < bufend &&
2016 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2021 curcmd->c_line = multi_start;
2022 fatal("EOF in string");
2024 str_nset(tmpstr,d+1,s-d);
2026 str_ncat(herewas,s,bufend-s);
2027 str_replace(linestr,herewas);
2028 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2029 bufend = linestr->str_ptr + linestr->str_cur;
2033 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2036 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2037 while (s >= bufend) { /* multiple line string? */
2039 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2040 curcmd->c_line = multi_start;
2041 fatal("EOF in string");
2045 STR *str = Str_new(88,0);
2047 str_sset(str,linestr);
2048 astore(lineary,(int)curcmd->c_line,str);
2050 bufend = linestr->str_ptr + linestr->str_cur;
2052 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2055 str_scat(linestr,herewas);
2056 bufend = linestr->str_ptr + linestr->str_cur;
2060 str_scat(tmpstr,linestr);
2064 s = str_append_till(tmpstr,s,bufend,term,leave);
2066 multi_end = curcmd->c_line;
2068 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2069 tmpstr->str_len = tmpstr->str_cur + 1;
2070 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2072 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2073 arg[1].arg_ptr.arg_str = tmpstr;
2077 s = tmpstr->str_ptr;
2078 send = s + tmpstr->str_cur;
2079 while (s < send) { /* see if we can make SINGLE */
2080 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
2081 !alwaysdollar && s[1] != '0')
2082 *s = '$'; /* grandfather \digit in subst */
2083 if ((*s == '$' || *s == '@') && s+1 < send &&
2084 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2085 makesingle = FALSE; /* force interpretation */
2087 else if (*s == '\\' && s+1 < send) {
2092 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2094 if ((*s == '$' && s+1 < send &&
2095 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2096 (*s == '@' && s+1 < send) ) {
2097 len = scanreg(s,send,tokenbuf) - s;
2098 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2099 || strEQ(tokenbuf,"ENV")
2100 || strEQ(tokenbuf,"SIG")
2101 || strEQ(tokenbuf,"INC") )
2102 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2107 else if (*s == '\\' && s+1 < send) {
2111 if (!makesingle && (!leave || (*s && index(leave,*s))))
2115 case '0': case '1': case '2': case '3':
2116 case '4': case '5': case '6': case '7':
2118 if (s < send && *s && index("01234567",*s)) {
2122 if (s < send && *s && index("01234567",*s)) {
2151 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2152 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2154 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
2156 tmpstr->str_cur = d - tmpstr->str_ptr;
2157 arg[1].arg_ptr.arg_str = tmpstr;
2173 register FCMD *fprev = &froot;
2174 register FCMD *fcmd;
2181 Zero(&froot, 1, FCMD);
2183 while (s < bufend || (s = str_gets(linestr,rsfp, 0)) != Nullch) {
2186 STR *tmpstr = Str_new(89,0);
2188 str_sset(tmpstr,linestr);
2189 astore(lineary,(int)curcmd->c_line,tmpstr);
2191 if (in_eval && !rsfp) {
2192 eol = index(s,'\n');
2197 eol = bufend = linestr->str_ptr + linestr->str_cur;
2198 if (strnEQ(s,".\n",2)) {
2200 return froot.f_next;
2206 flinebeg = Nullfcmd;
2210 Newz(804,fcmd,1,FCMD);
2211 fprev->f_next = fcmd;
2213 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2223 fcmd->f_pre = nsavestr(s, t-s);
2224 fcmd->f_presize = t-s;
2228 fcmd->f_flags |= FC_NOBLANK;
2230 fcmd->f_flags |= FC_REPEAT;
2234 flinebeg = fcmd; /* start values here */
2236 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2239 fcmd->f_type = F_LINES;
2243 fcmd->f_type = F_LEFT;
2248 fcmd->f_type = F_RIGHT;
2253 fcmd->f_type = F_CENTER;
2258 fcmd->f_type = F_LEFT;
2261 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2262 fcmd->f_flags |= FC_MORE;
2270 if (s >= bufend && (s = str_gets(linestr, rsfp, 0)) == Nullch)
2274 STR *tmpstr = Str_new(90,0);
2276 str_sset(tmpstr,linestr);
2277 astore(lineary,(int)curcmd->c_line,tmpstr);
2279 if (in_eval && !rsfp) {
2280 eol = index(s,'\n');
2285 eol = bufend = linestr->str_ptr + linestr->str_cur;
2286 if (strnEQ(s,".\n",2)) {
2288 yyerror("Missing values line");
2289 return froot.f_next;
2295 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2296 str->str_u.str_hash = curstash;
2297 str_nset(str,"(",1);
2298 flinebeg->f_line = curcmd->c_line;
2300 if (!flinebeg->f_next->f_type || index(s, ',')) {
2302 str_ncat(str, s, eol - s - 1);
2303 str_ncat(str,",$$);",5);
2308 while (s < eol && isspace(*s))
2313 case ' ': case '\t': case '\n': case ';':
2314 str_ncat(str, t, s - t);
2315 str_ncat(str, "," ,1);
2316 while (s < eol && (isspace(*s) || *s == ';'))
2321 str_ncat(str, t, s - t);
2323 s = scanreg(s,eol,tokenbuf);
2324 str_ncat(str, t, s - t);
2326 if (s < eol && *s && index("$'\"",*s))
2327 str_ncat(str, ",", 1);
2329 case '"': case '\'':
2330 str_ncat(str, t, s - t);
2333 while (s < eol && (*s != *t || s[-1] == '\\'))
2337 str_ncat(str, t, s - t);
2339 if (s < eol && *s && index("$'\"",*s))
2340 str_ncat(str, ",", 1);
2343 yyerror("Please use commas to separate fields");
2346 str_ncat(str,"$$);",4);
2351 bufptr = str_get(linestr);
2352 yyerror("Format not terminated");
2353 return froot.f_next;
2360 cshlen = strlen(cshname);