1 /* $RCSfile: toke.c,v $$Revision: 4.0.1.1 $$Date: 91/04/12 09:18:18 $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 4.0.1.1 91/04/12 09:18:18 lwall
10 * patch1: perl -de "print" wouldn't stop at the first statement
12 * Revision 4.0 91/03/20 01:42:14 lwall
28 /* which backslash sequences to keep in m// or s// */
30 static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtf0123456789[{]}";
32 char *reparse; /* if non-null, scanident found ${foo[$bar]} */
39 #define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
41 #define META(c) ((c) | 128)
43 #define RETURN(retval) return (bufptr = s,(int)retval)
44 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
45 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
46 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
47 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
48 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
49 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
50 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
51 #define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
52 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
53 #define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
54 #define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
55 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
56 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
57 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
58 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
59 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
60 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
61 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
62 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
63 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
64 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
65 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
66 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
67 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
68 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
69 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
71 /* This bit of chicanery makes a unary function followed by
72 * a parenthesis into a function with one argument, highest precedence.
74 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
75 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
77 /* This does similarly for list operators, merely by pretending that the
78 * paren came before the listop rather than after.
80 #define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
81 (*s = META('('), bufptr = oldbufptr, '(') : \
82 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
83 /* grandfather return to old style */
84 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
90 while (s < bufend && isascii(*s) && isspace(*s))
99 #define UNI(f) return uni(f,s)
100 #define LOP(f) return lop(f,s)
140 #endif /* CRIPPLED_CC */
144 register char *s = bufptr;
147 static bool in_format = FALSE;
148 static bool firstline = TRUE;
149 extern int yychar; /* last token */
151 oldoldbufptr = oldbufptr;
158 fprintf(stderr,"Tokener at %s",s);
160 fprintf(stderr,"Tokener at %s\n",s);
164 if ((*s & 127) == '(')
167 warn("Unrecognized character \\%03o ignored", *s++ & 255);
173 if ((*s & 127) == '(')
176 warn("Unrecognized character \\%03o ignored", *s++ & 255);
180 goto fake_eof; /* emulate EOF on ^D or ^Z */
185 goto retry; /* ignore stray nulls */
188 if (minus_n || minus_p || perldb) {
192 char *pdb = getenv("PERLDB");
194 str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
195 str_cat(linestr, ";");
197 if (minus_n || minus_p) {
198 str_cat(linestr,"line: while (<>) {");
200 str_cat(linestr,"chop;");
202 str_cat(linestr,"@F=split(' ');");
204 oldoldbufptr = oldbufptr = s = str_get(linestr);
205 bufend = linestr->str_ptr + linestr->str_cur;
211 yylval.formval = load_format();
213 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
214 bufend = linestr->str_ptr + linestr->str_cur;
220 #endif /* CRYPTSCRIPT */
222 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
226 (void)mypclose(rsfp);
227 else if (rsfp == stdin)
233 if (minus_n || minus_p) {
234 str_set(linestr,minus_p ? ";}continue{print" : "");
235 str_cat(linestr,";}");
236 oldoldbufptr = oldbufptr = s = str_get(linestr);
237 bufend = linestr->str_ptr + linestr->str_cur;
238 minus_n = minus_p = 0;
241 oldoldbufptr = oldbufptr = s = str_get(linestr);
243 RETURN(';'); /* not infinite loop because rsfp is NULL now */
245 if (doextract && *linestr->str_ptr == '#')
248 oldoldbufptr = oldbufptr = bufptr = s;
250 STR *str = Str_new(85,0);
252 str_sset(str,linestr);
253 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
261 bufend = linestr->str_ptr + linestr->str_cur;
262 if (curcmd->c_line == 1) {
263 if (*s == '#' && s[1] == '!') {
264 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
272 while (s < bufend && !isspace(*s))
275 while (s < bufend && isspace(*s))
278 Newz(899,newargv,origargc+3,char*);
280 while (s < bufend && !isspace(*s))
283 Copy(origargv+1, newargv+2, origargc+1, char*);
289 fatal("Can't exec %s", cmd);
293 while (s < bufend && isspace(*s))
295 if (*s == ':') /* for csh's that have to exec sh scripts */
300 case ' ': case '\t': case '\f': case '\r': case 013:
304 if (preprocess && s == str_get(linestr) &&
305 s[1] == ' ' && isdigit(s[2])) {
306 curcmd->c_line = atoi(s+2)-1;
307 for (s += 2; isdigit(*s); s++) ;
309 while (s < d && isspace(*s)) s++;
310 s[strlen(s)-1] = '\0'; /* wipe out newline */
313 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
316 curcmd->c_filestab = fstab(s);
318 curcmd->c_filestab = fstab(origfilename);
319 oldoldbufptr = oldbufptr = s = str_get(linestr);
323 if (in_eval && !rsfp) {
325 while (s < d && *s != '\n')
330 STR *str = Str_new(85,0);
332 str_nset(str,linestr->str_ptr, s - linestr->str_ptr);
333 astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
334 str_chop(linestr, s);
338 yylval.formval = load_format();
340 oldoldbufptr = oldbufptr = s = bufptr + 1;
351 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
354 case 'r': FTST(O_FTEREAD);
355 case 'w': FTST(O_FTEWRITE);
356 case 'x': FTST(O_FTEEXEC);
357 case 'o': FTST(O_FTEOWNED);
358 case 'R': FTST(O_FTRREAD);
359 case 'W': FTST(O_FTRWRITE);
360 case 'X': FTST(O_FTREXEC);
361 case 'O': FTST(O_FTROWNED);
362 case 'e': FTST(O_FTIS);
363 case 'z': FTST(O_FTZERO);
364 case 's': FTST(O_FTSIZE);
365 case 'f': FTST(O_FTFILE);
366 case 'd': FTST(O_FTDIR);
367 case 'l': FTST(O_FTLINK);
368 case 'p': FTST(O_FTPIPE);
369 case 'S': FTST(O_FTSOCK);
370 case 'u': FTST(O_FTSUID);
371 case 'g': FTST(O_FTSGID);
372 case 'k': FTST(O_FTSVTX);
373 case 'b': FTST(O_FTBLK);
374 case 'c': FTST(O_FTCHR);
375 case 't': FTST(O_FTTTY);
376 case 'T': FTST(O_FTTEXT);
377 case 'B': FTST(O_FTBINARY);
378 case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
379 case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
380 case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
408 s = scanident(s,bufend,tokenbuf);
409 yylval.stabval = stabent(tokenbuf,TRUE);
420 s = scanident(s,bufend,tokenbuf);
421 yylval.stabval = hadd(stabent(tokenbuf,TRUE));
437 if (isspace(*s) || *s == '#')
438 cmdline = NOLINE; /* invalidate current command line number */
441 if (curcmd->c_line < cmdline)
442 cmdline = curcmd->c_line;
460 while (s < d && isspace(*s))
462 if (isalpha(*s) || *s == '_' || *s == '\'')
463 *(--s) = '\\'; /* force next ident to WORD */
522 while (isascii(*s) && \
523 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
525 while (d[-1] == '\'') \
531 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
533 s = scanident(s,bufend,tokenbuf);
534 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
538 s = scanident(s,bufend,tokenbuf);
539 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
547 yylval.stabval = stabent(tokenbuf,TRUE);
552 s = scanident(s,bufend,tokenbuf);
555 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
558 case '/': /* may either be division or pattern */
559 case '?': /* may either be conditional or pattern */
570 if (!expectterm || !isdigit(s[1])) {
579 case '0': case '1': case '2': case '3': case '4':
580 case '5': case '6': case '7': case '8': case '9':
581 case '\'': case '"': case '`':
585 case '\\': /* some magic to force next word to be a WORD */
586 s++; /* used by do and sub to force a separate namespace */
591 if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
592 ARG *arg = op_new(1);
595 arg->arg_type = O_ITEM;
597 (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
599 strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
600 arg[1].arg_type = A_SINGLE;
601 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
604 else if (strEQ(d,"__END__")) {
609 if (stab = stabent("DATA",FALSE)) {
610 stab->str_pok |= SP_MULTI;
611 stab_io(stab) = stio_new();
612 stab_io(stab)->ifp = rsfp;
613 #if defined(HAS_FCNTL) && defined(F_SETFD)
615 fcntl(fd,F_SETFD,fd >= 3);
618 stab_io(stab)->type = '|';
619 else if (rsfp == stdin)
620 stab_io(stab)->type = '-';
622 stab_io(stab)->type = '<';
632 if (strEQ(d,"alarm"))
634 if (strEQ(d,"accept"))
636 if (strEQ(d,"atan2"))
643 if (strEQ(d,"binmode"))
650 if (strEQ(d,"continue"))
652 if (strEQ(d,"chdir")) {
653 (void)stabent("ENV",TRUE); /* may use HOME */
656 if (strEQ(d,"close"))
658 if (strEQ(d,"closedir"))
662 if (strEQ(d,"caller"))
664 if (strEQ(d,"crypt")) {
670 if (strEQ(d,"chmod"))
672 if (strEQ(d,"chown"))
674 if (strEQ(d,"connect"))
678 if (strEQ(d,"chroot"))
685 while (s < d && isspace(*s))
687 if (isalpha(*s) || *s == '_')
688 *(--s) = '\\'; /* force next ident to WORD */
693 if (strEQ(d,"defined"))
695 if (strEQ(d,"delete"))
697 if (strEQ(d,"dbmopen"))
699 if (strEQ(d,"dbmclose"))
708 if (strEQ(d,"elsif")) {
709 yylval.ival = curcmd->c_line;
712 if (strEQ(d,"eq") || strEQ(d,"EQ"))
716 if (strEQ(d,"eval")) {
717 allstabs = TRUE; /* must initialize everything since */
718 UNI(O_EVAL); /* we don't know what will be used */
726 if (strEQ(d,"exec")) {
730 if (strEQ(d,"endhostent"))
732 if (strEQ(d,"endnetent"))
734 if (strEQ(d,"endservent"))
736 if (strEQ(d,"endprotoent"))
738 if (strEQ(d,"endpwent"))
740 if (strEQ(d,"endgrent"))
745 if (strEQ(d,"for") || strEQ(d,"foreach")) {
746 yylval.ival = curcmd->c_line;
749 if (strEQ(d,"format")) {
751 while (s < d && isspace(*s))
753 if (isalpha(*s) || *s == '_')
754 *(--s) = '\\'; /* force next ident to WORD */
756 allstabs = TRUE; /* must initialize everything since */
757 OPERATOR(FORMAT); /* we don't know what will be used */
761 if (strEQ(d,"fcntl"))
763 if (strEQ(d,"fileno"))
765 if (strEQ(d,"flock"))
770 if (strEQ(d,"gt") || strEQ(d,"GT"))
772 if (strEQ(d,"ge") || strEQ(d,"GE"))
778 if (strEQ(d,"gmtime"))
782 if (strnEQ(d,"get",3)) {
789 if (strEQ(d,"priority"))
791 if (strEQ(d,"protobyname"))
793 if (strEQ(d,"protobynumber"))
795 if (strEQ(d,"protoent"))
797 if (strEQ(d,"pwent"))
799 if (strEQ(d,"pwnam"))
801 if (strEQ(d,"pwuid"))
803 if (strEQ(d,"peername"))
806 else if (*d == 'h') {
807 if (strEQ(d,"hostbyname"))
809 if (strEQ(d,"hostbyaddr"))
811 if (strEQ(d,"hostent"))
814 else if (*d == 'n') {
815 if (strEQ(d,"netbyname"))
817 if (strEQ(d,"netbyaddr"))
819 if (strEQ(d,"netent"))
822 else if (*d == 's') {
823 if (strEQ(d,"servbyname"))
825 if (strEQ(d,"servbyport"))
827 if (strEQ(d,"servent"))
829 if (strEQ(d,"sockname"))
831 if (strEQ(d,"sockopt"))
834 else if (*d == 'g') {
835 if (strEQ(d,"grent"))
837 if (strEQ(d,"grnam"))
839 if (strEQ(d,"grgid"))
842 else if (*d == 'l') {
843 if (strEQ(d,"login"))
857 yylval.ival = curcmd->c_line;
860 if (strEQ(d,"index"))
864 if (strEQ(d,"ioctl"))
883 if (strEQ(d,"local"))
885 if (strEQ(d,"length"))
887 if (strEQ(d,"lt") || strEQ(d,"LT"))
889 if (strEQ(d,"le") || strEQ(d,"LE"))
891 if (strEQ(d,"localtime"))
897 if (strEQ(d,"listen"))
899 if (strEQ(d,"lstat"))
915 RETURN(1); /* force error */
919 if (strEQ(d,"mkdir"))
923 if (strEQ(d,"msgctl"))
925 if (strEQ(d,"msgget"))
927 if (strEQ(d,"msgrcv"))
929 if (strEQ(d,"msgsnd"))
938 if (strEQ(d,"ne") || strEQ(d,"NE"))
949 if (strEQ(d,"opendir"))
954 if (strEQ(d,"print")) {
955 checkcomma(s,"filehandle");
958 if (strEQ(d,"printf")) {
959 checkcomma(s,"filehandle");
962 if (strEQ(d,"push")) {
963 yylval.ival = O_PUSH;
970 if (strEQ(d,"package"))
992 if (strEQ(d,"return"))
994 if (strEQ(d,"require")) {
995 allstabs = TRUE; /* must initialize everything since */
996 UNI(O_REQUIRE); /* we don't know what will be used */
998 if (strEQ(d,"reset"))
1000 if (strEQ(d,"redo"))
1002 if (strEQ(d,"rename"))
1004 if (strEQ(d,"rand"))
1006 if (strEQ(d,"rmdir"))
1008 if (strEQ(d,"rindex"))
1010 if (strEQ(d,"read"))
1012 if (strEQ(d,"readdir"))
1014 if (strEQ(d,"rewinddir"))
1016 if (strEQ(d,"recv"))
1018 if (strEQ(d,"reverse"))
1020 if (strEQ(d,"readlink"))
1036 RETURN(1); /* force error */
1043 if (strEQ(d,"scalar"))
1049 if (strEQ(d,"select"))
1051 if (strEQ(d,"seek"))
1053 if (strEQ(d,"semctl"))
1055 if (strEQ(d,"semget"))
1057 if (strEQ(d,"semop"))
1059 if (strEQ(d,"send"))
1061 if (strEQ(d,"setpgrp"))
1063 if (strEQ(d,"setpriority"))
1064 FUN3(O_SETPRIORITY);
1065 if (strEQ(d,"sethostent"))
1067 if (strEQ(d,"setnetent"))
1069 if (strEQ(d,"setservent"))
1071 if (strEQ(d,"setprotoent"))
1073 if (strEQ(d,"setpwent"))
1075 if (strEQ(d,"setgrent"))
1077 if (strEQ(d,"seekdir"))
1079 if (strEQ(d,"setsockopt"))
1086 if (strEQ(d,"shift"))
1088 if (strEQ(d,"shmctl"))
1090 if (strEQ(d,"shmget"))
1092 if (strEQ(d,"shmread"))
1094 if (strEQ(d,"shmwrite"))
1096 if (strEQ(d,"shutdown"))
1107 if (strEQ(d,"sleep"))
1114 if (strEQ(d,"socket"))
1116 if (strEQ(d,"socketpair"))
1118 if (strEQ(d,"sort")) {
1119 checkcomma(s,"subroutine name");
1121 while (s < d && isascii(*s) && isspace(*s)) s++;
1122 if (*s == ';' || *s == ')') /* probably a close */
1123 fatal("sort is now a reserved word");
1124 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1125 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
1126 strncpy(tokenbuf,s,d-s);
1127 if (strNE(tokenbuf,"keys") &&
1128 strNE(tokenbuf,"values") &&
1129 strNE(tokenbuf,"split") &&
1130 strNE(tokenbuf,"grep") &&
1131 strNE(tokenbuf,"readdir") &&
1132 strNE(tokenbuf,"unpack") &&
1133 strNE(tokenbuf,"do") &&
1134 (d >= bufend || isspace(*d)) )
1135 *(--s) = '\\'; /* force next ident to WORD */
1141 if (strEQ(d,"split"))
1143 if (strEQ(d,"sprintf"))
1145 if (strEQ(d,"splice")) {
1146 yylval.ival = O_SPLICE;
1151 if (strEQ(d,"sqrt"))
1155 if (strEQ(d,"srand"))
1161 if (strEQ(d,"stat"))
1163 if (strEQ(d,"study")) {
1169 if (strEQ(d,"substr"))
1171 if (strEQ(d,"sub")) {
1172 subline = curcmd->c_line;
1174 while (s < d && isspace(*s))
1176 if (isalpha(*s) || *s == '_' || *s == '\'') {
1178 str_sset(subname,curstname);
1179 str_ncat(subname,"'",1);
1181 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1185 str_ncat(subname,s,d-s);
1187 *(--s) = '\\'; /* force next ident to WORD */
1190 str_set(subname,"?");
1199 if (strEQ(d,"system")) {
1203 if (strEQ(d,"symlink"))
1205 if (strEQ(d,"syscall"))
1207 if (strEQ(d,"sysread"))
1209 if (strEQ(d,"syswrite"))
1218 if (strEQ(d,"tr")) {
1223 RETURN(1); /* force error */
1225 if (strEQ(d,"tell"))
1227 if (strEQ(d,"telldir"))
1229 if (strEQ(d,"time"))
1231 if (strEQ(d,"times"))
1233 if (strEQ(d,"truncate"))
1238 if (strEQ(d,"using"))
1240 if (strEQ(d,"until")) {
1241 yylval.ival = curcmd->c_line;
1244 if (strEQ(d,"unless")) {
1245 yylval.ival = curcmd->c_line;
1248 if (strEQ(d,"unlink"))
1250 if (strEQ(d,"undef"))
1252 if (strEQ(d,"unpack"))
1254 if (strEQ(d,"utime"))
1256 if (strEQ(d,"umask"))
1258 if (strEQ(d,"unshift")) {
1259 yylval.ival = O_UNSHIFT;
1265 if (strEQ(d,"values"))
1267 if (strEQ(d,"vec")) {
1274 if (strEQ(d,"while")) {
1275 yylval.ival = curcmd->c_line;
1278 if (strEQ(d,"warn"))
1280 if (strEQ(d,"wait"))
1282 if (strEQ(d,"waitpid"))
1284 if (strEQ(d,"wantarray")) {
1285 yylval.arg = op_new(1);
1286 yylval.arg->arg_type = O_ITEM;
1287 yylval.arg[1].arg_type = A_WANTARRAY;
1290 if (strEQ(d,"write"))
1295 if (!expectterm && strEQ(d,"x"))
1315 yylval.cval = savestr(d);
1317 if (oldoldbufptr && oldoldbufptr < bufptr) {
1318 while (isspace(*oldoldbufptr))
1320 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1322 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1325 return (CLINE, bufptr = s, (int)WORD);
1337 while (s < bufend && isascii(*s) && isspace(*s))
1339 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1341 while (isalpha(*s) || isdigit(*s) || *s == '_')
1343 while (s < bufend && isspace(*s))
1348 "tell eof times getlogin wait length shift umask getppid \
1349 cos exp int log rand sin sqrt ord wantarray",
1354 fatal("No comma allowed after %s", what);
1360 scanident(s,send,dest)
1362 register char *send;
1376 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1379 while (d > dest+1 && d[-1] == '\'')
1385 if (*d == '{' /* } */ ) {
1388 while (s < send && brackets) {
1389 if (!reparse && (d == dest || (*s && isascii(*s) &&
1390 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1400 if (reparse && reparse == s - 1)
1414 if (*d == '^' && (isupper(*s) || index("[\\]^_?",*s)))
1420 scanconst(string,len)
1424 register STR *retstr;
1429 if (index(string,'|')) {
1432 retstr = Str_new(86,len);
1433 str_nset(retstr,string,len);
1434 t = str_get(retstr);
1436 retstr->str_u.str_useful = 100;
1437 for (d=t; d < e; ) {
1445 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1449 if (d[1] && index("wWbB0123456789sSdDlLuUE",d[1])) {
1453 (void)bcopy(d+1,d,e-d);
1478 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1490 retstr->str_cur = d - t;
1498 register SPAT *spat;
1503 STR *str = Str_new(93,0);
1505 Newz(801,spat,1,SPAT);
1506 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1507 curstash->tbl_spatroot = spat;
1516 spat->spat_flags |= SPAT_ONCE;
1519 fatal("panic: scanpat");
1521 s = str_append_till(str,s,bufend,s[-1],patleave);
1524 yyerror("Search pattern not terminated");
1525 yylval.arg = Nullarg;
1529 while (*s == 'i' || *s == 'o') {
1533 spat->spat_flags |= SPAT_FOLD;
1537 spat->spat_flags |= SPAT_KEEP;
1541 e = str->str_ptr + len;
1542 for (d = str->str_ptr; d < e; d++) {
1545 else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
1549 spat->spat_runtime = arg = op_new(1);
1550 arg->arg_type = O_ITEM;
1551 arg[1].arg_type = A_DOUBLE;
1552 arg[1].arg_ptr.arg_str = str_smake(str);
1553 d = scanident(d,bufend,buf);
1554 (void)stabent(buf,TRUE); /* make sure it's created */
1555 for (; d < e; d++) {
1558 else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
1559 d = scanident(d,bufend,buf);
1560 (void)stabent(buf,TRUE);
1562 else if (*d == '@') {
1563 d = scanident(d,bufend,buf);
1564 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1565 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1566 (void)stabent(buf,TRUE);
1569 goto got_pat; /* skip compiling for now */
1572 if (spat->spat_flags & SPAT_FOLD)
1576 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1578 if (*str->str_ptr == '^') {
1579 spat->spat_short = scanconst(str->str_ptr+1,len-1);
1580 if (spat->spat_short) {
1581 spat->spat_slen = spat->spat_short->str_cur;
1582 if (spat->spat_slen == len - 1)
1583 spat->spat_flags |= SPAT_ALL;
1587 spat->spat_flags |= SPAT_SCANFIRST;
1588 spat->spat_short = scanconst(str->str_ptr,len);
1589 if (spat->spat_short) {
1590 spat->spat_slen = spat->spat_short->str_cur;
1591 if (spat->spat_slen == len)
1592 spat->spat_flags |= SPAT_ALL;
1595 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1596 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1597 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1598 spat->spat_flags & SPAT_FOLD);
1599 /* Note that this regexp can still be used if someone says
1600 * something like /a/ && s//b/; so we can't delete it.
1604 if (spat->spat_flags & SPAT_FOLD)
1608 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1610 if (spat->spat_short)
1611 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1612 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1613 spat->spat_flags & SPAT_FOLD);
1618 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1626 register SPAT *spat;
1630 STR *str = Str_new(93,0);
1632 Newz(802,spat,1,SPAT);
1633 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1634 curstash->tbl_spatroot = spat;
1636 s = str_append_till(str,s+1,bufend,*s,patleave);
1639 yyerror("Substitution pattern not terminated");
1640 yylval.arg = Nullarg;
1644 e = str->str_ptr + len;
1645 for (d = str->str_ptr; d < e; d++) {
1648 else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
1652 spat->spat_runtime = arg = op_new(1);
1653 arg->arg_type = O_ITEM;
1654 arg[1].arg_type = A_DOUBLE;
1655 arg[1].arg_ptr.arg_str = str_smake(str);
1656 d = scanident(d,bufend,buf);
1657 (void)stabent(buf,TRUE); /* make sure it's created */
1659 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1660 d = scanident(d,bufend,buf);
1661 (void)stabent(buf,TRUE);
1663 else if (*d == '@' && d[-1] != '\\') {
1664 d = scanident(d,bufend,buf);
1665 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1666 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1667 (void)stabent(buf,TRUE);
1670 goto get_repl; /* skip compiling for now */
1673 if (*str->str_ptr == '^') {
1674 spat->spat_short = scanconst(str->str_ptr+1,len-1);
1675 if (spat->spat_short)
1676 spat->spat_slen = spat->spat_short->str_cur;
1679 spat->spat_flags |= SPAT_SCANFIRST;
1680 spat->spat_short = scanconst(str->str_ptr,len);
1681 if (spat->spat_short)
1682 spat->spat_slen = spat->spat_short->str_cur;
1688 yyerror("Substitution replacement not terminated");
1689 yylval.arg = Nullarg;
1692 spat->spat_repl = yylval.arg;
1693 spat->spat_flags |= SPAT_ONCE;
1694 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1695 spat->spat_flags |= SPAT_CONST;
1696 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1700 spat->spat_flags |= SPAT_CONST;
1701 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1702 e = tmpstr->str_ptr + tmpstr->str_cur;
1703 for (t = tmpstr->str_ptr; t < e; t++) {
1704 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1705 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1706 spat->spat_flags &= ~SPAT_CONST;
1709 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1712 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1713 spat->spat_repl[1].arg_type = A_SINGLE;
1714 spat->spat_repl = make_op(O_EVAL,2,
1718 spat->spat_flags &= ~SPAT_CONST;
1722 spat->spat_flags &= ~SPAT_ONCE;
1727 spat->spat_flags |= SPAT_FOLD;
1728 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1729 str_free(spat->spat_short); /* anchored opt doesn't do */
1730 spat->spat_short = Nullstr; /* case insensitive match */
1731 spat->spat_slen = 0;
1736 spat->spat_flags |= SPAT_KEEP;
1739 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1740 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1741 if (!spat->spat_runtime) {
1742 spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
1743 spat->spat_flags & SPAT_FOLD);
1746 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1752 register SPAT *spat;
1754 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1755 if (spat->spat_short &&
1756 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1758 if (spat->spat_flags & SPAT_SCANFIRST) {
1759 str_free(spat->spat_short);
1760 spat->spat_short = Nullstr;
1763 str_free(spat->spat_regexp->regmust);
1764 spat->spat_regexp->regmust = Nullstr;
1768 if (!spat->spat_short || /* promote the better string */
1769 ((spat->spat_flags & SPAT_SCANFIRST) &&
1770 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1771 str_free(spat->spat_short); /* ok if null */
1772 spat->spat_short = spat->spat_regexp->regmust;
1773 spat->spat_regexp->regmust = Nullstr;
1774 spat->spat_flags |= SPAT_SCANFIRST;
1780 expand_charset(s,len,retlen)
1786 register char *d = t;
1788 register char *send = s + len;
1790 while (s < send && d - t <= 256) {
1791 if (s[1] == '-' && s+2 < send) {
1792 for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
1801 return nsavestr(t,d-t);
1809 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1812 register short *tbl;
1820 New(803,tbl,256,short);
1821 arg[2].arg_type = A_NULL;
1822 arg[2].arg_ptr.arg_cval = (char*) tbl;
1825 yyerror("Translation pattern not terminated");
1826 yylval.arg = Nullarg;
1829 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1830 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1831 arg_free(yylval.arg);
1834 yyerror("Translation replacement not terminated");
1835 yylval.arg = Nullarg;
1838 complement = delete = squash = 0;
1839 while (*s == 'c' || *s == 'd' || *s == 's') {
1848 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1849 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1850 arg_free(yylval.arg);
1851 arg[2].arg_len = delete|squash;
1853 if (!rlen && !delete) {
1858 Zero(tbl, 256, short);
1859 for (i = 0; i < tlen; i++)
1860 tbl[t[i] & 0377] = -1;
1861 for (i = 0, j = 0; i < 256; i++) {
1875 for (i = 0; i < 256; i++)
1877 for (i = 0, j = 0; i < tlen; i++,j++) {
1880 if (tbl[t[i] & 0377] == -1)
1881 tbl[t[i] & 0377] = -2;
1886 if (tbl[t[i] & 0377] == -1)
1887 tbl[t[i] & 0377] = r[j] & 0377;
1903 register char *send;
1904 register bool makesingle = FALSE;
1905 register STAB *stab;
1906 bool alwaysdollar = FALSE;
1907 bool hereis = FALSE;
1910 char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
1915 arg->arg_type = O_ITEM;
1918 default: /* a substitution replacement */
1919 arg[1].arg_type = A_DOUBLE;
1920 makesingle = TRUE; /* maybe disable runtime scanning */
1930 arg[1].arg_type = A_SINGLE;
1935 else if (s[1] == '.')
1946 yyerror("Illegal octal digit");
1948 case '0': case '1': case '2': case '3': case '4':
1949 case '5': case '6': case '7':
1953 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1954 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1958 i += (*s++ & 7) + 9;
1963 str = Str_new(92,0);
1964 str_numset(str,(double)i);
1966 Safefree(str->str_ptr);
1967 str->str_ptr = Nullch;
1968 str->str_len = str->str_cur = 0;
1970 arg[1].arg_ptr.arg_str = str;
1973 case '1': case '2': case '3': case '4': case '5':
1974 case '6': case '7': case '8': case '9': case '.':
1976 arg[1].arg_type = A_SINGLE;
1978 while (isdigit(*s) || *s == '_') {
1984 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1986 while (isdigit(*s) || *s == '_') {
1993 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1995 if (*s == '+' || *s == '-')
2001 str = Str_new(92,0);
2002 str_numset(str,atof(tokenbuf));
2004 Safefree(str->str_ptr);
2005 str->str_ptr = Nullch;
2006 str->str_len = str->str_cur = 0;
2008 arg[1].arg_ptr.arg_str = str;
2016 if (*++s && index("`'\"",*s)) {
2018 s = cpytill(d,s,bufend,term,&len);
2028 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
2030 } /* assuming tokenbuf won't clobber */
2035 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
2036 herewas = str_make(s,bufend-s);
2038 s--, herewas = str_make(s,d-s);
2039 s += herewas->str_cur;
2047 s = cpytill(d,s,bufend,'>',&len);
2052 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
2054 if (d - tokenbuf != len) {
2056 arg[1].arg_type = A_GLOB;
2057 d = nsavestr(d,len);
2058 arg[1].arg_ptr.arg_stab = stab = genstab();
2059 stab_io(stab) = stio_new();
2060 stab_val(stab) = str_make(d,len);
2067 (void)strcpy(d,"ARGV");
2069 arg[1].arg_type = A_INDREAD;
2070 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
2073 arg[1].arg_type = A_READ;
2074 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
2075 if (!stab_io(arg[1].arg_ptr.arg_stab))
2076 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
2077 if (strEQ(d,"ARGV")) {
2078 (void)aadd(arg[1].arg_ptr.arg_stab);
2079 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
2100 arg[1].arg_type = A_SINGLE;
2107 arg[1].arg_type = A_DOUBLE;
2108 makesingle = TRUE; /* maybe disable runtime scanning */
2109 alwaysdollar = TRUE; /* treat $) and $| as variables */
2114 arg[1].arg_type = A_BACKTICK;
2116 alwaysdollar = TRUE; /* treat $) and $| as variables */
2122 multi_start = curcmd->c_line;
2124 multi_open = multi_close = '<';
2127 if (term && (tmps = index("([{< )]}> )]}>",term)))
2131 tmpstr = Str_new(87,80);
2136 while (s < bufend &&
2137 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
2142 curcmd->c_line = multi_start;
2143 fatal("EOF in string");
2145 str_nset(tmpstr,d+1,s-d);
2147 str_ncat(herewas,s,bufend-s);
2148 str_replace(linestr,herewas);
2149 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
2150 bufend = linestr->str_ptr + linestr->str_cur;
2154 str_nset(tmpstr,"",0); /* avoid "uninitialized" warning */
2157 s = str_append_till(tmpstr,s+1,bufend,term,leave);
2158 while (s >= bufend) { /* multiple line string? */
2160 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
2161 curcmd->c_line = multi_start;
2162 fatal("EOF in string");
2166 STR *str = Str_new(88,0);
2168 str_sset(str,linestr);
2169 astore(stab_xarray(curcmd->c_filestab),
2170 (int)curcmd->c_line,str);
2172 bufend = linestr->str_ptr + linestr->str_cur;
2174 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
2177 str_scat(linestr,herewas);
2178 bufend = linestr->str_ptr + linestr->str_cur;
2182 str_scat(tmpstr,linestr);
2186 s = str_append_till(tmpstr,s,bufend,term,leave);
2188 multi_end = curcmd->c_line;
2190 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
2191 tmpstr->str_len = tmpstr->str_cur + 1;
2192 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
2194 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
2195 arg[1].arg_ptr.arg_str = tmpstr;
2199 s = tmpstr->str_ptr;
2200 send = s + tmpstr->str_cur;
2201 while (s < send) { /* see if we can make SINGLE */
2202 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
2203 !alwaysdollar && s[1] != '0')
2204 *s = '$'; /* grandfather \digit in subst */
2205 if ((*s == '$' || *s == '@') && s+1 < send &&
2206 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
2207 makesingle = FALSE; /* force interpretation */
2209 else if (*s == '\\' && s+1 < send) {
2210 if (index("lLuUE",s[1]))
2216 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
2218 if ((*s == '$' && s+1 < send &&
2219 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2220 (*s == '@' && s+1 < send) ) {
2221 len = scanident(s,send,tokenbuf) - s;
2222 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2223 || strEQ(tokenbuf,"ENV")
2224 || strEQ(tokenbuf,"SIG")
2225 || strEQ(tokenbuf,"INC") )
2226 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2231 else if (*s == '\\' && s+1 < send) {
2235 if (!makesingle && (!leave || (*s && index(leave,*s))))
2239 case '0': case '1': case '2': case '3':
2240 case '4': case '5': case '6': case '7':
2241 *d++ = scanoct(s, 3, &len);
2245 *d++ = scanhex(++s, 2, &len);
2284 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2285 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2287 tmpstr->str_cur = d - tmpstr->str_ptr;
2288 arg[1].arg_ptr.arg_str = tmpstr;
2304 register FCMD *fprev = &froot;
2305 register FCMD *fcmd;
2312 Zero(&froot, 1, FCMD);
2314 while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
2316 if (in_eval && !rsfp) {
2317 eol = index(s,'\n');
2322 eol = bufend = linestr->str_ptr + linestr->str_cur;
2324 STR *tmpstr = Str_new(89,0);
2326 str_nset(tmpstr, s, eol-s);
2327 astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
2330 for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
2333 return froot.f_next;
2340 flinebeg = Nullfcmd;
2344 Newz(804,fcmd,1,FCMD);
2345 fprev->f_next = fcmd;
2347 for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
2357 fcmd->f_pre = nsavestr(s, t-s);
2358 fcmd->f_presize = t-s;
2362 fcmd->f_flags |= FC_NOBLANK;
2364 fcmd->f_flags |= FC_REPEAT;
2368 flinebeg = fcmd; /* start values here */
2370 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2373 fcmd->f_type = F_LINES;
2377 fcmd->f_type = F_LEFT;
2382 fcmd->f_type = F_RIGHT;
2387 fcmd->f_type = F_CENTER;
2393 /* Catch the special case @... and handle it as a string
2395 if (*s == '.' && s[1] == '.') {
2396 goto default_format;
2398 fcmd->f_type = F_DECIMAL;
2402 /* Read a format in the form @####.####, where either group
2403 of ### may be empty, or the final .### may be missing. */
2411 fcmd->f_decimals = s-p;
2412 fcmd->f_flags |= FC_DP;
2414 fcmd->f_decimals = 0;
2420 fcmd->f_type = F_LEFT;
2423 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2424 fcmd->f_flags |= FC_MORE;
2433 (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
2436 if (in_eval && !rsfp) {
2437 eol = index(s,'\n');
2442 eol = bufend = linestr->str_ptr + linestr->str_cur;
2444 STR *tmpstr = Str_new(90,0);
2446 str_nset(tmpstr, s, eol-s);
2447 astore(stab_xarray(curcmd->c_filestab),
2448 (int)curcmd->c_line,tmpstr);
2450 if (strnEQ(s,".\n",2)) {
2452 yyerror("Missing values line");
2453 return froot.f_next;
2459 str = flinebeg->f_unparsed = Str_new(91,eol - s);
2460 str->str_u.str_hash = curstash;
2461 str_nset(str,"(",1);
2462 flinebeg->f_line = curcmd->c_line;
2464 if (!flinebeg->f_next->f_type || index(s, ',')) {
2466 str_ncat(str, s, eol - s - 1);
2467 str_ncat(str,",$$);",5);
2472 while (s < eol && isspace(*s))
2477 case ' ': case '\t': case '\n': case ';':
2478 str_ncat(str, t, s - t);
2479 str_ncat(str, "," ,1);
2480 while (s < eol && (isspace(*s) || *s == ';'))
2485 str_ncat(str, t, s - t);
2487 s = scanident(s,eol,tokenbuf);
2488 str_ncat(str, t, s - t);
2490 if (s < eol && *s && index("$'\"",*s))
2491 str_ncat(str, ",", 1);
2493 case '"': case '\'':
2494 str_ncat(str, t, s - t);
2497 while (s < eol && (*s != *t || s[-1] == '\\'))
2501 str_ncat(str, t, s - t);
2503 if (s < eol && *s && index("$'\"",*s))
2504 str_ncat(str, ",", 1);
2507 yyerror("Please use commas to separate fields");
2510 str_ncat(str,"$$);",4);
2515 bufptr = str_get(linestr);
2516 yyerror("Format not terminated");
2517 return froot.f_next;
2524 cshlen = strlen(cshname);