1 /* $RCSfile: toke.c,v $$Revision: 4.0.1.5 $$Date: 91/11/11 16:45:51 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
9 * Revision 4.0.1.5 91/11/11 16:45:51 lwall
10 * patch19: default arg for shift was wrong after first subroutine definition
12 * Revision 4.0.1.4 91/11/05 19:02:48 lwall
13 * patch11: \x and \c were subject to double interpretation in regexps
14 * patch11: prepared for ctype implementations that don't define isascii()
15 * patch11: nested list operators could miscount parens
16 * patch11: once-thru blocks didn't display right in the debugger
17 * patch11: sort eval "whatever" didn't work
18 * patch11: underscore is now allowed within literal octal and hex numbers
20 * Revision 4.0.1.3 91/06/10 01:32:26 lwall
21 * patch10: m'$foo' now treats string as single quoted
22 * patch10: certain pattern optimizations were botched
24 * Revision 4.0.1.2 91/06/07 12:05:56 lwall
25 * patch4: new copyright notice
26 * patch4: debugger lost track of lines in eval
27 * patch4: //o and s///o now optimize themselves fully at runtime
28 * patch4: added global modifier for pattern matches
30 * Revision 4.0.1.1 91/04/12 09:18:18 lwall
31 * patch1: perl -de "print" wouldn't stop at the first statement
33 * Revision 4.0 91/03/20 01:42:14 lwall
53 /* which backslash sequences to keep in m// or s// */
55 static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtfeaxc0123456789[{]}";
57 char *reparse; /* if non-null, scanident found ${foo[$bar]} */
64 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
66 #define META(c) ((c) | 128)
68 #define RETURN(retval) return (bufptr = s,(int)retval)
69 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
70 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
71 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
72 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
73 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
74 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
75 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
76 #define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
77 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
78 #define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
79 #define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
80 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
81 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
82 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
83 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
84 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
85 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
86 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
87 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
88 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
89 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
90 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
91 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
92 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
93 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
94 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
96 /* This bit of chicanery makes a unary function followed by
97 * a parenthesis into a function with one argument, highest precedence.
99 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
100 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
102 /* This does similarly for list operators, merely by pretending that the
103 * paren came before the listop rather than after.
105 #define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
106 (*s = (char) META('('), bufptr = oldbufptr, '(') : \
107 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
108 /* grandfather return to old style */
109 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
115 while (s < bufend && isSPACE(*s))
124 #define UNI(f) return uni(f,s)
125 #define LOP(f) return lop(f,s)
165 #endif /* CRIPPLED_CC */
169 register char *s = bufptr;
172 static bool in_format = FALSE;
173 static bool firstline = TRUE;
174 extern int yychar; /* last token */
176 oldoldbufptr = oldbufptr;
183 fprintf(stderr,"Tokener at %s",s);
185 fprintf(stderr,"Tokener at %s\n",s);
189 if ((*s & 127) == '(') {
194 warn("Unrecognized character \\%03o ignored", *s++ & 255);
200 if ((*s & 127) == '(') {
205 warn("Unrecognized character \\%03o ignored", *s++ & 255);
209 goto fake_eof; /* emulate EOF on ^D or ^Z */
214 goto retry; /* ignore stray nulls */
217 if (minus_n || minus_p || perldb) {
221 char *pdb = getenv("PERLDB");
223 str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
224 str_cat(linestr, ";");
226 if (minus_n || minus_p) {
227 str_cat(linestr,"line: while (<>) {");
229 str_cat(linestr,"chop;");
231 str_cat(linestr,"@F=split(' ');");
233 oldoldbufptr = oldbufptr = s = str_get(linestr);
234 bufend = linestr->str_ptr + linestr->str_cur;
240 yylval.formval = load_format();
242 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
243 bufend = linestr->str_ptr + linestr->str_cur;
249 #endif /* CRYPTSCRIPT */
251 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
255 (void)mypclose(rsfp);
256 else if ((FILE*)rsfp == stdin)
262 if (minus_n || minus_p) {
263 str_set(linestr,minus_p ? ";}continue{print" : "");
264 str_cat(linestr,";}");
265 oldoldbufptr = oldbufptr = s = str_get(linestr);
266 bufend = linestr->str_ptr + linestr->str_cur;
267 minus_n = minus_p = 0;
270 oldoldbufptr = oldbufptr = s = str_get(linestr);
272 RETURN(';'); /* not infinite loop because rsfp is NULL now */
274 if (doextract && *linestr->str_ptr == '#')
277 oldoldbufptr = oldbufptr = bufptr = s;
279 STR *str = Str_new(85,0);
281 str_sset(str,linestr);
282 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
290 bufend = linestr->str_ptr + linestr->str_cur;
291 if (curcmd->c_line == 1) {
292 if (*s == '#' && s[1] == '!') {
293 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
301 while (s < bufend && !isSPACE(*s))
304 while (s < bufend && isSPACE(*s))
307 Newz(899,newargv,origargc+3,char*);
309 while (s < bufend && !isSPACE(*s))
312 Copy(origargv+1, newargv+2, origargc+1, char*);
318 fatal("Can't exec %s", cmd);
322 while (s < bufend && isSPACE(*s))
324 if (*s == ':') /* for csh's that have to exec sh scripts */
329 case ' ': case '\t': case '\f': case '\r': case 013:
333 if (preprocess && s == str_get(linestr) &&
334 s[1] == ' ' && (isDIGIT(s[2]) || strnEQ(s+2,"line ",5)) ) {
335 while (*s && !isDIGIT(*s))
337 curcmd->c_line = atoi(s)-1;
341 while (s < d && isSPACE(*s)) s++;
342 s[strlen(s)-1] = '\0'; /* wipe out newline */
345 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
348 curcmd->c_filestab = fstab(s);
350 curcmd->c_filestab = fstab(origfilename);
351 oldoldbufptr = oldbufptr = s = str_get(linestr);
355 if (in_eval && !rsfp) {
357 while (s < d && *s != '\n')
363 yylval.formval = load_format();
365 oldoldbufptr = oldbufptr = s = bufptr + 1;
376 if (s[1] && isALPHA(s[1]) && !isALPHA(s[2])) {
379 case 'r': FTST(O_FTEREAD);
380 case 'w': FTST(O_FTEWRITE);
381 case 'x': FTST(O_FTEEXEC);
382 case 'o': FTST(O_FTEOWNED);
383 case 'R': FTST(O_FTRREAD);
384 case 'W': FTST(O_FTRWRITE);
385 case 'X': FTST(O_FTREXEC);
386 case 'O': FTST(O_FTROWNED);
387 case 'e': FTST(O_FTIS);
388 case 'z': FTST(O_FTZERO);
389 case 's': FTST(O_FTSIZE);
390 case 'f': FTST(O_FTFILE);
391 case 'd': FTST(O_FTDIR);
392 case 'l': FTST(O_FTLINK);
393 case 'p': FTST(O_FTPIPE);
394 case 'S': FTST(O_FTSOCK);
395 case 'u': FTST(O_FTSUID);
396 case 'g': FTST(O_FTSGID);
397 case 'k': FTST(O_FTSVTX);
398 case 'b': FTST(O_FTBLK);
399 case 'c': FTST(O_FTCHR);
400 case 't': FTST(O_FTTTY);
401 case 'T': FTST(O_FTTEXT);
402 case 'B': FTST(O_FTBINARY);
403 case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
404 case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
405 case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
433 s = scanident(s,bufend,tokenbuf);
434 yylval.stabval = stabent(tokenbuf,TRUE);
445 s = scanident(s,bufend,tokenbuf);
446 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
462 yylval.ival = curcmd->c_line;
463 if (isSPACE(*s) || *s == '#')
464 cmdline = NOLINE; /* invalidate current command line number */
467 if (curcmd->c_line < cmdline)
468 cmdline = curcmd->c_line;
486 while (s < d && isSPACE(*s))
488 if (isALPHA(*s) || *s == '_' || *s == '\'')
489 *(--s) = '\\'; /* force next ident to WORD */
548 while (isALNUM(*s) || *s == '\'') \
550 while (d[-1] == '\'') \
556 if (s[1] == '#' && (isALPHA(s[2]) || s[2] == '_')) {
558 s = scanident(s,bufend,tokenbuf);
559 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
563 s = scanident(s,bufend,tokenbuf);
564 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
572 yylval.stabval = stabent(tokenbuf,TRUE);
577 s = scanident(s,bufend,tokenbuf);
580 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
583 case '/': /* may either be division or pattern */
584 case '?': /* may either be conditional or pattern */
595 if (!expectterm || !isDIGIT(s[1])) {
604 case '0': case '1': case '2': case '3': case '4':
605 case '5': case '6': case '7': case '8': case '9':
606 case '\'': case '"': case '`':
610 case '\\': /* some magic to force next word to be a WORD */
611 s++; /* used by do and sub to force a separate namespace */
616 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
617 ARG *arg = op_new(1);
620 arg->arg_type = O_ITEM;
622 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
624 strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
625 arg[1].arg_type = A_SINGLE;
626 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
629 else if (strEQ(d,"__END__")) {
635 if (stab = stabent("DATA",FALSE)) {
636 stab->str_pok |= SP_MULTI;
637 stab_io(stab) = stio_new();
638 stab_io(stab)->ifp = rsfp;
639 #if defined(HAS_FCNTL) && defined(F_SETFD)
641 fcntl(fd,F_SETFD,fd >= 3);
644 stab_io(stab)->type = '|';
645 else if ((FILE*)rsfp == stdin)
646 stab_io(stab)->type = '-';
648 stab_io(stab)->type = '<';
658 if (strEQ(d,"alarm"))
660 if (strEQ(d,"accept"))
662 if (strEQ(d,"atan2"))
669 if (strEQ(d,"binmode"))
676 if (strEQ(d,"continue"))
678 if (strEQ(d,"chdir")) {
679 (void)stabent("ENV",TRUE); /* may use HOME */
682 if (strEQ(d,"close"))
684 if (strEQ(d,"closedir"))
688 if (strEQ(d,"caller"))
690 if (strEQ(d,"crypt")) {
692 static int cryptseen = 0;
699 if (strEQ(d,"chmod"))
701 if (strEQ(d,"chown"))
703 if (strEQ(d,"connect"))
707 if (strEQ(d,"chroot"))
714 while (s < d && isSPACE(*s))
716 if (isALPHA(*s) || *s == '_')
717 *(--s) = '\\'; /* force next ident to WORD */
722 if (strEQ(d,"defined"))
724 if (strEQ(d,"delete"))
726 if (strEQ(d,"dbmopen"))
728 if (strEQ(d,"dbmclose"))
737 if (strEQ(d,"elsif")) {
738 yylval.ival = curcmd->c_line;
741 if (strEQ(d,"eq") || strEQ(d,"EQ"))
745 if (strEQ(d,"eval")) {
746 allstabs = TRUE; /* must initialize everything since */
747 UNI(O_EVAL); /* we don't know what will be used */
755 if (strEQ(d,"exec")) {
759 if (strEQ(d,"endhostent"))
761 if (strEQ(d,"endnetent"))
763 if (strEQ(d,"endservent"))
765 if (strEQ(d,"endprotoent"))
767 if (strEQ(d,"endpwent"))
769 if (strEQ(d,"endgrent"))
774 if (strEQ(d,"for") || strEQ(d,"foreach")) {
775 yylval.ival = curcmd->c_line;
778 if (strEQ(d,"format")) {
780 while (s < d && isSPACE(*s))
782 if (isALPHA(*s) || *s == '_')
783 *(--s) = '\\'; /* force next ident to WORD */
785 allstabs = TRUE; /* must initialize everything since */
786 OPERATOR(FORMAT); /* we don't know what will be used */
790 if (strEQ(d,"fcntl"))
792 if (strEQ(d,"fileno"))
794 if (strEQ(d,"flock"))
799 if (strEQ(d,"gt") || strEQ(d,"GT"))
801 if (strEQ(d,"ge") || strEQ(d,"GE"))
807 if (strEQ(d,"gmtime"))
811 if (strnEQ(d,"get",3)) {
818 if (strEQ(d,"priority"))
820 if (strEQ(d,"protobyname"))
822 if (strEQ(d,"protobynumber"))
824 if (strEQ(d,"protoent"))
826 if (strEQ(d,"pwent"))
828 if (strEQ(d,"pwnam"))
830 if (strEQ(d,"pwuid"))
832 if (strEQ(d,"peername"))
835 else if (*d == 'h') {
836 if (strEQ(d,"hostbyname"))
838 if (strEQ(d,"hostbyaddr"))
840 if (strEQ(d,"hostent"))
843 else if (*d == 'n') {
844 if (strEQ(d,"netbyname"))
846 if (strEQ(d,"netbyaddr"))
848 if (strEQ(d,"netent"))
851 else if (*d == 's') {
852 if (strEQ(d,"servbyname"))
854 if (strEQ(d,"servbyport"))
856 if (strEQ(d,"servent"))
858 if (strEQ(d,"sockname"))
860 if (strEQ(d,"sockopt"))
863 else if (*d == 'g') {
864 if (strEQ(d,"grent"))
866 if (strEQ(d,"grnam"))
868 if (strEQ(d,"grgid"))
871 else if (*d == 'l') {
872 if (strEQ(d,"login"))
886 yylval.ival = curcmd->c_line;
889 if (strEQ(d,"index"))
893 if (strEQ(d,"ioctl"))
912 if (strEQ(d,"local"))
914 if (strEQ(d,"length"))
916 if (strEQ(d,"lt") || strEQ(d,"LT"))
918 if (strEQ(d,"le") || strEQ(d,"LE"))
920 if (strEQ(d,"localtime"))
926 if (strEQ(d,"listen"))
928 if (strEQ(d,"lstat"))
944 RETURN(1); /* force error */
948 if (strEQ(d,"mkdir"))
952 if (strEQ(d,"msgctl"))
954 if (strEQ(d,"msgget"))
956 if (strEQ(d,"msgrcv"))
958 if (strEQ(d,"msgsnd"))
967 if (strEQ(d,"ne") || strEQ(d,"NE"))
978 if (strEQ(d,"opendir"))
983 if (strEQ(d,"print")) {
984 checkcomma(s,"filehandle");
987 if (strEQ(d,"printf")) {
988 checkcomma(s,"filehandle");
991 if (strEQ(d,"push")) {
992 yylval.ival = O_PUSH;
999 if (strEQ(d,"package"))
1001 if (strEQ(d,"pipe"))
1010 if (strEQ(d,"qq")) {
1014 if (strEQ(d,"qx")) {
1021 if (strEQ(d,"return"))
1023 if (strEQ(d,"require")) {
1024 allstabs = TRUE; /* must initialize everything since */
1025 UNI(O_REQUIRE); /* we don't know what will be used */
1027 if (strEQ(d,"reset"))
1029 if (strEQ(d,"redo"))
1031 if (strEQ(d,"rename"))
1033 if (strEQ(d,"rand"))
1035 if (strEQ(d,"rmdir"))
1037 if (strEQ(d,"rindex"))
1039 if (strEQ(d,"read"))
1041 if (strEQ(d,"readdir"))
1043 if (strEQ(d,"rewinddir"))
1045 if (strEQ(d,"recv"))
1047 if (strEQ(d,"reverse"))
1049 if (strEQ(d,"readlink"))
1065 RETURN(1); /* force error */
1072 if (strEQ(d,"scalar"))
1078 if (strEQ(d,"select"))
1080 if (strEQ(d,"seek"))
1082 if (strEQ(d,"semctl"))
1084 if (strEQ(d,"semget"))
1086 if (strEQ(d,"semop"))
1088 if (strEQ(d,"send"))
1090 if (strEQ(d,"setpgrp"))
1092 if (strEQ(d,"setpriority"))
1093 FUN3(O_SETPRIORITY);
1094 if (strEQ(d,"sethostent"))
1096 if (strEQ(d,"setnetent"))
1098 if (strEQ(d,"setservent"))
1100 if (strEQ(d,"setprotoent"))
1102 if (strEQ(d,"setpwent"))
1104 if (strEQ(d,"setgrent"))
1106 if (strEQ(d,"seekdir"))
1108 if (strEQ(d,"setsockopt"))
1115 if (strEQ(d,"shift"))
1117 if (strEQ(d,"shmctl"))
1119 if (strEQ(d,"shmget"))
1121 if (strEQ(d,"shmread"))
1123 if (strEQ(d,"shmwrite"))
1125 if (strEQ(d,"shutdown"))
1136 if (strEQ(d,"sleep"))
1143 if (strEQ(d,"socket"))
1145 if (strEQ(d,"socketpair"))
1147 if (strEQ(d,"sort")) {
1148 checkcomma(s,"subroutine name");
1150 while (s < d && isSPACE(*s)) s++;
1151 if (*s == ';' || *s == ')') /* probably a close */
1152 fatal("sort is now a reserved word");
1153 if (isALPHA(*s) || *s == '_') {
1155 for (d = s; isALNUM(*d); d++) ;
1156 strncpy(tokenbuf,s,d-s);
1157 if (strNE(tokenbuf,"keys") &&
1158 strNE(tokenbuf,"values") &&
1159 strNE(tokenbuf,"split") &&
1160 strNE(tokenbuf,"grep") &&
1161 strNE(tokenbuf,"readdir") &&
1162 strNE(tokenbuf,"unpack") &&
1163 strNE(tokenbuf,"do") &&
1164 strNE(tokenbuf,"eval") &&
1165 (d >= bufend || isSPACE(*d)) )
1166 *(--s) = '\\'; /* force next ident to WORD */
1172 if (strEQ(d,"split"))
1174 if (strEQ(d,"sprintf"))
1176 if (strEQ(d,"splice")) {
1177 yylval.ival = O_SPLICE;
1182 if (strEQ(d,"sqrt"))
1186 if (strEQ(d,"srand"))
1192 if (strEQ(d,"stat"))
1194 if (strEQ(d,"study")) {
1200 if (strEQ(d,"substr"))
1202 if (strEQ(d,"sub")) {
1203 yylval.ival = savestack->ary_fill; /* restore stuff on reduce */
1207 subline = curcmd->c_line;
1209 while (s < d && isSPACE(*s))
1211 if (isALPHA(*s) || *s == '_' || *s == '\'') {
1212 str_sset(subname,curstname);
1213 str_ncat(subname,"'",1);
1214 for (d = s+1; isALNUM(*d) || *d == '\''; d++)
1219 str_ncat(subname,s,d-s);
1220 *(--s) = '\\'; /* force next ident to WORD */
1223 str_set(subname,"?");
1232 if (strEQ(d,"system")) {
1236 if (strEQ(d,"symlink"))
1238 if (strEQ(d,"syscall"))
1240 if (strEQ(d,"sysread"))
1242 if (strEQ(d,"syswrite"))
1251 if (strEQ(d,"tr")) {
1256 RETURN(1); /* force error */
1258 if (strEQ(d,"tell"))
1260 if (strEQ(d,"telldir"))
1262 if (strEQ(d,"time"))
1264 if (strEQ(d,"times"))
1266 if (strEQ(d,"truncate"))
1271 if (strEQ(d,"using"))
1273 if (strEQ(d,"until")) {
1274 yylval.ival = curcmd->c_line;
1277 if (strEQ(d,"unless")) {
1278 yylval.ival = curcmd->c_line;
1281 if (strEQ(d,"unlink"))
1283 if (strEQ(d,"undef"))
1285 if (strEQ(d,"unpack"))
1287 if (strEQ(d,"utime"))
1289 if (strEQ(d,"umask"))
1291 if (strEQ(d,"unshift")) {
1292 yylval.ival = O_UNSHIFT;
1298 if (strEQ(d,"values"))
1300 if (strEQ(d,"vec")) {
1307 if (strEQ(d,"while")) {
1308 yylval.ival = curcmd->c_line;
1311 if (strEQ(d,"warn"))
1313 if (strEQ(d,"wait"))
1315 if (strEQ(d,"waitpid"))
1317 if (strEQ(d,"wantarray")) {
1318 yylval.arg = op_new(1);
1319 yylval.arg->arg_type = O_ITEM;
1320 yylval.arg[1].arg_type = A_WANTARRAY;
1323 if (strEQ(d,"write"))
1328 if (!expectterm && strEQ(d,"x"))
1348 yylval.cval = savestr(d);
1350 if (oldoldbufptr && oldoldbufptr < bufptr) {
1351 while (isSPACE(*oldoldbufptr))
1353 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1355 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1358 return (CLINE, bufptr = s, (int)WORD);
1370 while (s < bufend && isSPACE(*s))
1372 if (isALPHA(*s) || *s == '_') {
1376 while (s < bufend && isSPACE(*s))
1381 "tell eof times getlogin wait length shift umask getppid \
1382 cos exp int log rand sin sqrt ord wantarray",
1387 fatal("No comma allowed after %s", what);
1393 scanident(s,send,dest)
1395 register char *send;
1409 while (isALNUM(*s) || *s == '\'')
1412 while (d > dest+1 && d[-1] == '\'')
1418 if (*d == '{' /* } */ ) {
1421 while (s < send && brackets) {
1422 if (!reparse && (d == dest || (*s && isALNUM(*s) ))) {
1432 if (reparse && reparse == s - 1)
1446 if (*d == '^' && (isUPPER(*s) || index("[\\]^_?", *s))) {
1457 scanconst(spat,string,len)
1462 register STR *tmpstr;
1466 char *origstring = string;
1467 static char *vert = "|";
1469 if (ninstr(string, string+len, vert, vert+1))
1473 tmpstr = Str_new(86,len);
1474 str_nset(tmpstr,string,len);
1475 t = str_get(tmpstr);
1477 tmpstr->str_u.str_useful = 100;
1478 for (d=t; d < e; ) {
1486 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1491 if (d[1] && index("wWbB0123456789sSdDlLuUExc",d[1])) {
1495 (void)bcopy(d+1,d,e-d);
1520 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1532 tmpstr->str_cur = d - t;
1534 spat->spat_flags |= SPAT_ALL;
1535 if (*origstring != '^')
1536 spat->spat_flags |= SPAT_SCANFIRST;
1537 spat->spat_short = tmpstr;
1538 spat->spat_slen = d - t;
1545 register SPAT *spat;
1550 STR *str = Str_new(93,0);
1553 Newz(801,spat,1,SPAT);
1554 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1555 curstash->tbl_spatroot = spat;
1564 spat->spat_flags |= SPAT_ONCE;
1567 fatal("panic: scanpat");
1569 s = str_append_till(str,s,bufend,s[-1],patleave);
1572 yyerror("Search pattern not terminated");
1573 yylval.arg = Nullarg;
1577 while (*s == 'i' || *s == 'o' || *s == 'g') {
1581 spat->spat_flags |= SPAT_FOLD;
1585 spat->spat_flags |= SPAT_KEEP;
1589 spat->spat_flags |= SPAT_GLOBAL;
1593 e = str->str_ptr + len;
1598 for (; d < e; d++) {
1601 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1605 spat->spat_runtime = arg = op_new(1);
1606 arg->arg_type = O_ITEM;
1607 arg[1].arg_type = A_DOUBLE;
1608 arg[1].arg_ptr.arg_str = str_smake(str);
1609 d = scanident(d,bufend,buf);
1610 (void)stabent(buf,TRUE); /* make sure it's created */
1611 for (; d < e; d++) {
1614 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1615 d = scanident(d,bufend,buf);
1616 (void)stabent(buf,TRUE);
1618 else if (*d == '@') {
1619 d = scanident(d,bufend,buf);
1620 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1621 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1622 (void)stabent(buf,TRUE);
1625 goto got_pat; /* skip compiling for now */
1628 if (spat->spat_flags & SPAT_FOLD)
1632 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1634 scanconst(spat,str->str_ptr,len);
1635 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1636 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1637 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1638 spat->spat_flags & SPAT_FOLD);
1639 /* Note that this regexp can still be used if someone says
1640 * something like /a/ && s//b/; so we can't delete it.
1644 if (spat->spat_flags & SPAT_FOLD)
1648 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1650 if (spat->spat_short)
1651 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1652 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1653 spat->spat_flags & SPAT_FOLD);
1658 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1666 register SPAT *spat;
1670 STR *str = Str_new(93,0);
1672 Newz(802,spat,1,SPAT);
1673 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1674 curstash->tbl_spatroot = spat;
1676 s = str_append_till(str,s+1,bufend,*s,patleave);
1679 yyerror("Substitution pattern not terminated");
1680 yylval.arg = Nullarg;
1684 e = str->str_ptr + len;
1685 for (d = str->str_ptr; d < e; d++) {
1688 else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
1692 spat->spat_runtime = arg = op_new(1);
1693 arg->arg_type = O_ITEM;
1694 arg[1].arg_type = A_DOUBLE;
1695 arg[1].arg_ptr.arg_str = str_smake(str);
1696 d = scanident(d,e,buf);
1697 (void)stabent(buf,TRUE); /* make sure it's created */
1699 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1700 d = scanident(d,e,buf);
1701 (void)stabent(buf,TRUE);
1703 else if (*d == '@' && d[-1] != '\\') {
1704 d = scanident(d,e,buf);
1705 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1706 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1707 (void)stabent(buf,TRUE);
1710 goto get_repl; /* skip compiling for now */
1713 scanconst(spat,str->str_ptr,len);
1718 yyerror("Substitution replacement not terminated");
1719 yylval.arg = Nullarg;
1722 spat->spat_repl = yylval.arg;
1723 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1724 spat->spat_flags |= SPAT_CONST;
1725 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1729 spat->spat_flags |= SPAT_CONST;
1730 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1731 e = tmpstr->str_ptr + tmpstr->str_cur;
1732 for (t = tmpstr->str_ptr; t < e; t++) {
1733 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1734 (t[1] == '{' /*}*/ && isDIGIT(t[2])) ))
1735 spat->spat_flags &= ~SPAT_CONST;
1738 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1741 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1742 spat->spat_repl[1].arg_type = A_SINGLE;
1743 spat->spat_repl = make_op(
1744 (spat->spat_repl[1].arg_type == A_SINGLE ? O_EVALONCE : O_EVAL),
1749 spat->spat_flags &= ~SPAT_CONST;
1753 spat->spat_flags |= SPAT_GLOBAL;
1758 spat->spat_flags |= SPAT_FOLD;
1759 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1760 str_free(spat->spat_short); /* anchored opt doesn't do */
1761 spat->spat_short = Nullstr; /* case insensitive match */
1762 spat->spat_slen = 0;
1767 spat->spat_flags |= SPAT_KEEP;
1770 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1771 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1772 if (!spat->spat_runtime) {
1773 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1774 spat->spat_flags & SPAT_FOLD);
1777 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1784 register SPAT *spat;
1786 if (!spat->spat_short && spat->spat_regexp->regstart &&
1787 (!spat->spat_regexp->regmust || spat->spat_regexp->reganch & ROPT_ANCH)
1789 if (!(spat->spat_regexp->reganch & ROPT_ANCH))
1790 spat->spat_flags |= SPAT_SCANFIRST;
1791 else if (spat->spat_flags & SPAT_FOLD)
1793 spat->spat_short = str_smake(spat->spat_regexp->regstart);
1795 else if (spat->spat_regexp->regmust) {/* is there a better short-circuit? */
1796 if (spat->spat_short &&
1797 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1799 if (spat->spat_flags & SPAT_SCANFIRST) {
1800 str_free(spat->spat_short);
1801 spat->spat_short = Nullstr;
1804 str_free(spat->spat_regexp->regmust);
1805 spat->spat_regexp->regmust = Nullstr;
1809 if (!spat->spat_short || /* promote the better string */
1810 ((spat->spat_flags & SPAT_SCANFIRST) &&
1811 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1812 str_free(spat->spat_short); /* ok if null */
1813 spat->spat_short = spat->spat_regexp->regmust;
1814 spat->spat_regexp->regmust = Nullstr;
1815 spat->spat_flags |= SPAT_SCANFIRST;
1821 expand_charset(s,len,retlen)
1827 register char *d = t;
1829 register char *send = s + len;
1831 while (s < send && d - t <= 256) {
1832 if (s[1] == '-' && s+2 < send) {
1833 for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
1842 return nsavestr(t,d-t);
1850 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1853 register short *tbl;
1861 New(803,tbl,256,short);
1862 arg[2].arg_type = A_NULL;
1863 arg[2].arg_ptr.arg_cval = (char*) tbl;
1866 yyerror("Translation pattern not terminated");
1867 yylval.arg = Nullarg;
1870 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1871 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1872 arg_free(yylval.arg);
1875 yyerror("Translation replacement not terminated");
1876 yylval.arg = Nullarg;
1879 complement = delete = squash = 0;
1880 while (*s == 'c' || *s == 'd' || *s == 's') {
1889 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1890 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1891 arg_free(yylval.arg);
1892 arg[2].arg_len = delete|squash;
1894 if (!rlen && !delete) {
1899 Zero(tbl, 256, short);
1900 for (i = 0; i < tlen; i++)
1901 tbl[t[i] & 0377] = -1;
1902 for (i = 0, j = 0; i < 256; i++) {
1916 for (i = 0; i < 256; i++)
1918 for (i = 0, j = 0; i < tlen; i++,j++) {
1921 if (tbl[t[i] & 0377] == -1)
1922 tbl[t[i] & 0377] = -2;
1927 if (tbl[t[i] & 0377] == -1)
1928 tbl[t[i] & 0377] = r[j] & 0377;
1944 register char *send;
1945 register bool makesingle = FALSE;
1946 register STAB *stab;
1947 bool alwaysdollar = FALSE;
1948 bool hereis = FALSE;
1951 char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
1956 arg->arg_type = O_ITEM;
1959 default: /* a substitution replacement */
1960 arg[1].arg_type = A_DOUBLE;
1961 makesingle = TRUE; /* maybe disable runtime scanning */
1971 arg[1].arg_type = A_SINGLE;
1976 else if (s[1] == '.')
1990 yyerror("Illegal octal digit");
1992 case '0': case '1': case '2': case '3': case '4':
1993 case '5': case '6': case '7':
1997 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1998 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2002 i += (*s++ & 7) + 9;
2007 str = Str_new(92,0);
2008 str_numset(str,(double)i);
2010 Safefree(str->str_ptr);
2011 str->str_ptr = Nullch;
2012 str->str_len = str->str_cur = 0;
2014 arg[1].arg_ptr.arg_str = str;
2017 case '1': case '2': case '3': case '4': case '5':
2018 case '6': case '7': case '8': case '9': case '.':
2020 arg[1].arg_type = A_SINGLE;
2022 while (isDIGIT(*s) || *s == '_') {
2028 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
2030 while (isDIGIT(*s) || *s == '_') {
2037 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
2039 if (*s == '+' || *s == '-')
2045 str = Str_new(92,0);
2046 str_numset(str,atof(tokenbuf));
2048 Safefree(str->str_ptr);
2049 str->str_ptr = Nullch;
2050 str->str_len = str->str_cur = 0;
2052 arg[1].arg_ptr.arg_str = str;
2060 if (*++s && index("`'\"",*s)) {
2062 s = cpytill(d,s,bufend,term,&len);
2074 } /* assuming tokenbuf won't clobber */
2079 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
2080 herewas = str_make(s,bufend-s);
2082 s--, herewas = str_make(s,d-s);
2083 s += herewas->str_cur;
2091 s = cpytill(d,s,bufend,'>',&len);
2095 while (*d && (isALNUM(*d) || *d == '\''))
2097 if (d - tokenbuf != len) {
2099 arg[1].arg_type = A_GLOB;
2100 d = nsavestr(d,len);
2101 arg[1].arg_ptr.arg_stab = stab = genstab();
2102 stab_io(stab) = stio_new();
2103 stab_val(stab) = str_make(d,len);
2110 (void)strcpy(d,"ARGV");
2112 arg[1].arg_type = A_INDREAD;
2113 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
2116 arg[1].arg_type = A_READ;
2117 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
2118 if (!stab_io(arg[1].arg_ptr.arg_stab))
2119 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
2120 if (strEQ(d,"ARGV")) {
2121 (void)aadd(arg[1].arg_ptr.arg_stab);
2122 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
2143 arg[1].arg_type = A_SINGLE;
2150 arg[1].arg_type = A_DOUBLE;
2151 makesingle = TRUE; /* maybe disable runtime scanning */
2152 alwaysdollar = TRUE; /* treat $) and $| as variables */
2157 arg[1].arg_type = A_BACKTICK;
2159 alwaysdollar = TRUE; /* treat $) and $| as variables */
2166 multi_start = curcmd->c_line;
2168 multi_open = multi_close = '<';
2171 if (term && (tmps = index("([{< )]}> )]}>",term)))
2175 tmpstr = Str_new(87,80);
2180 while (s < bufend &&
2181 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2186 curcmd->c_line = multi_start;
2187 fatal("EOF in string");
2189 str_nset(tmpstr,d+1,s-d);
2191 str_ncat(herewas,s,bufend-s);
2192 str_replace(linestr,herewas);
2193 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2194 bufend = linestr->str_ptr + linestr->str_cur;
2198 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2201 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2202 while (s >= bufend) { /* multiple line string? */
2204 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2205 curcmd->c_line = multi_start;
2206 fatal("EOF in string");
2210 STR *str = Str_new(88,0);
2212 str_sset(str,linestr);
2213 astore(stab_xarray(curcmd->c_filestab),
2214 (int)curcmd->c_line,str);
2216 bufend = linestr->str_ptr + linestr->str_cur;
2218 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2221 str_scat(linestr,herewas);
2222 bufend = linestr->str_ptr + linestr->str_cur;
2226 str_scat(tmpstr,linestr);
2230 s = str_append_till(tmpstr,s,bufend,term,leave);
2232 multi_end = curcmd->c_line;
2234 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2235 tmpstr->str_len = tmpstr->str_cur + 1;
2236 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2238 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2239 arg[1].arg_ptr.arg_str = tmpstr;
2243 s = tmpstr->str_ptr;
2244 send = s + tmpstr->str_cur;
2245 while (s < send) { /* see if we can make SINGLE */
2246 if (*s == '\\' && s[1] && isDIGIT(s[1]) && !isDIGIT(s[2]) &&
2247 !alwaysdollar && s[1] != '0')
2248 *s = '$'; /* grandfather \digit in subst */
2249 if ((*s == '$' || *s == '@') && s+1 < send &&
2250 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2251 makesingle = FALSE; /* force interpretation */
2253 else if (*s == '\\' && s+1 < send) {
2254 if (index("lLuUE",s[1]))
2260 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2262 if ((*s == '$' && s+1 < send &&
2263 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2264 (*s == '@' && s+1 < send) ) {
2265 if (s[1] == '#' && (isALPHA(s[2]) || s[2] == '_'))
2267 len = scanident(s,send,tokenbuf) - s;
2268 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2269 || strEQ(tokenbuf,"ENV")
2270 || strEQ(tokenbuf,"SIG")
2271 || strEQ(tokenbuf,"INC") )
2272 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2277 else if (*s == '\\' && s+1 < send) {
2281 if (!makesingle && (!leave || (*s && index(leave,*s))))
2285 case '0': case '1': case '2': case '3':
2286 case '4': case '5': case '6': case '7':
2287 *d++ = scanoct(s, 3, &len);
2291 *d++ = scanhex(++s, 2, &len);
2330 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2331 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2333 tmpstr->str_cur = d - tmpstr->str_ptr;
2334 arg[1].arg_ptr.arg_str = tmpstr;
2350 register FCMD *fprev = &froot;
2351 register FCMD *fcmd;
2358 Zero(&froot, 1, FCMD);
2360 while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
2362 if (in_eval && !rsfp) {
2363 eol = index(s,'\n');
2368 eol = bufend = linestr->str_ptr + linestr->str_cur;
2370 STR *tmpstr = Str_new(89,0);
2372 str_nset(tmpstr, s, eol-s);
2373 astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
2377 for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
2380 return froot.f_next;
2387 flinebeg = Nullfcmd;
2391 Newz(804,fcmd,1,FCMD);
2392 fprev->f_next = fcmd;
2394 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2404 fcmd->f_pre = nsavestr(s, t-s);
2405 fcmd->f_presize = t-s;
2409 fcmd->f_flags |= FC_NOBLANK;
2411 fcmd->f_flags |= FC_REPEAT;
2415 flinebeg = fcmd; /* start values here */
2417 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2420 fcmd->f_type = F_LINES;
2424 fcmd->f_type = F_LEFT;
2429 fcmd->f_type = F_RIGHT;
2434 fcmd->f_type = F_CENTER;
2440 /* Catch the special case @... and handle it as a string
2442 if (*s == '.' && s[1] == '.') {
2443 goto default_format;
2445 fcmd->f_type = F_DECIMAL;
2449 /* Read a format in the form @####.####, where either group
2450 of ### may be empty, or the final .### may be missing. */
2458 fcmd->f_decimals = s-p;
2459 fcmd->f_flags |= FC_DP;
2461 fcmd->f_decimals = 0;
2467 fcmd->f_type = F_LEFT;
2470 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2471 fcmd->f_flags |= FC_MORE;
2480 (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
2483 if (in_eval && !rsfp) {
2484 eol = index(s,'\n');
2489 eol = bufend = linestr->str_ptr + linestr->str_cur;
2491 STR *tmpstr = Str_new(90,0);
2493 str_nset(tmpstr, s, eol-s);
2494 astore(stab_xarray(curcmd->c_filestab),
2495 (int)curcmd->c_line,tmpstr);
2497 if (strnEQ(s,".\n",2)) {
2499 yyerror("Missing values line");
2500 return froot.f_next;
2506 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2507 str->str_u.str_hash = curstash;
2508 str_nset(str,"(",1);
2509 flinebeg->f_line = curcmd->c_line;
2511 if (!flinebeg->f_next->f_type || index(s, ',')) {
2513 str_ncat(str, s, eol - s - 1);
2514 str_ncat(str,",$$);",5);
2519 while (s < eol && isSPACE(*s))
2524 case ' ': case '\t': case '\n': case ';':
2525 str_ncat(str, t, s - t);
2526 str_ncat(str, "," ,1);
2527 while (s < eol && (isSPACE(*s) || *s == ';'))
2532 str_ncat(str, t, s - t);
2534 s = scanident(s,eol,tokenbuf);
2535 str_ncat(str, t, s - t);
2537 if (s < eol && *s && index("$'\"",*s))
2538 str_ncat(str, ",", 1);
2540 case '"': case '\'':
2541 str_ncat(str, t, s - t);
2544 while (s < eol && (*s != *t || s[-1] == '\\'))
2548 str_ncat(str, t, s - t);
2550 if (s < eol && *s && index("$'\"",*s))
2551 str_ncat(str, ",", 1);
2554 yyerror("Please use commas to separate fields");
2557 str_ncat(str,"$$);",4);
2562 bufptr = str_get(linestr);
2563 yyerror("Format not terminated");
2564 return froot.f_next;
2571 cshlen = strlen(cshname);