1 /* $Header: toke.c,v 3.0.1.6 90/03/12 17:06:36 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.6 90/03/12 17:06:36 lwall
10 * patch13: last semicolon of program is now optional, just for Randal
11 * patch13: added splice operator: @oldelems = splice(@array,$offset,$len,LIST)
13 * Revision 3.0.1.5 90/02/28 18:47:06 lwall
14 * patch9: return grandfathered to never be function call
15 * patch9: non-existent perldb.pl now gives reasonable error message
16 * patch9: perl can now start up other interpreters scripts
17 * patch9: line numbers were bogus during certain portions of foreach evaluation
18 * patch9: null hereis core dumped
20 * Revision 3.0.1.4 89/12/21 20:26:56 lwall
21 * patch7: -d switch incompatible with -p or -n
22 * patch7: " ''$foo'' " didn't parse right
23 * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
25 * Revision 3.0.1.3 89/11/17 15:43:15 lwall
26 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
27 * patch5: } misadjusted expection of subsequent term or operator
28 * patch5: y/abcde// didn't work
30 * Revision 3.0.1.2 89/11/11 05:04:42 lwall
31 * patch2: fixed a CLINE macro conflict
33 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
34 * patch1: disambiguated word after "sort" better
36 * Revision 3.0 89/10/18 15:32:33 lwall
45 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
50 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
52 #define META(c) ((c) | 128)
54 #define RETURN(retval) return (bufptr = s,(int)retval)
55 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
56 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
57 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
58 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
59 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
60 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
61 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
62 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
63 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
64 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
65 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
66 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
67 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
68 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
69 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
70 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
71 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
72 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
73 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
74 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
75 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
76 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
77 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
78 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
80 /* This bit of chicanery makes a unary function followed by
81 * a parenthesis into a function with one argument, highest precedence.
83 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
84 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
86 /* This does similarly for list operators, merely by pretending that the
87 * paren came before the listop rather than after.
89 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
90 (*s = META('('), bufptr = oldbufptr, '(') : \
91 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
92 /* grandfather return to old style */
93 #define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
99 while (s < bufend && isascii(*s) && isspace(*s))
108 #define UNI(f) return uni(f,s)
109 #define LOP(f) return lop(f,s)
148 #endif /* CRIPPLED_CC */
152 register char *s = bufptr;
155 static bool in_format = FALSE;
156 static bool firstline = TRUE;
157 extern int yychar; /* last token */
159 oldoldbufptr = oldbufptr;
166 fprintf(stderr,"Tokener at %s",s);
168 fprintf(stderr,"Tokener at %s\n",s);
172 if ((*s & 127) == '(')
175 warn("Unrecognized character \\%03o ignored", *s++);
181 goto retry; /* ignore stray nulls */
184 if (minus_n || minus_p || perldb) {
188 "do 'perldb.pl' || die \"Can't find perldb.pl in @INC\"; print $@;");
189 if (minus_n || minus_p) {
190 str_cat(linestr,"line: while (<>) {");
192 str_cat(linestr,"@F=split(' ');");
194 oldoldbufptr = oldbufptr = s = str_get(linestr);
195 bufend = linestr->str_ptr + linestr->str_cur;
200 yylval.formval = load_format();
202 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
203 bufend = linestr->str_ptr + linestr->str_cur;
207 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
209 (void)mypclose(rsfp);
210 else if (rsfp != stdin)
213 if (minus_n || minus_p) {
214 str_set(linestr,minus_p ? "}continue{print;" : "");
215 str_cat(linestr,"}");
216 oldoldbufptr = oldbufptr = s = str_get(linestr);
217 bufend = linestr->str_ptr + linestr->str_cur;
218 minus_n = minus_p = 0;
221 oldoldbufptr = oldbufptr = s = str_get(linestr);
223 RETURN(';'); /* not infinite loop because rsfp is NULL now */
225 oldoldbufptr = oldbufptr = bufptr = s;
227 STR *str = Str_new(85,0);
229 str_sset(str,linestr);
230 astore(lineary,(int)line,str);
238 bufend = linestr->str_ptr + linestr->str_cur;
240 if (*s == '#' && s[1] == '!') {
241 if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
249 while (s < bufend && !isspace(*s))
252 while (s < bufend && isspace(*s))
255 Newz(899,newargv,origargc+3,char*);
257 while (s < bufend && !isspace(*s))
260 Copy(origargv+1, newargv+2, origargc+1, char*);
266 fatal("Can't exec %s", cmd);
270 while (s < bufend && isspace(*s))
272 if (*s == ':') /* for csh's that have to exec sh scripts */
277 case ' ': case '\t': case '\f':
282 if (preprocess && s == str_get(linestr) &&
283 s[1] == ' ' && isdigit(s[2])) {
285 for (s += 2; isdigit(*s); s++) ;
287 while (s < d && isspace(*s)) s++;
290 s[strlen(s)-1] = '\0'; /* wipe out newline */
293 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
296 filename = savestr(s);
298 filename = savestr(origfilename);
299 oldoldbufptr = oldbufptr = s = str_get(linestr);
301 if (in_eval && !rsfp) {
303 while (s < d && *s != '\n')
316 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
319 case 'r': FTST(O_FTEREAD);
320 case 'w': FTST(O_FTEWRITE);
321 case 'x': FTST(O_FTEEXEC);
322 case 'o': FTST(O_FTEOWNED);
323 case 'R': FTST(O_FTRREAD);
324 case 'W': FTST(O_FTRWRITE);
325 case 'X': FTST(O_FTREXEC);
326 case 'O': FTST(O_FTROWNED);
327 case 'e': FTST(O_FTIS);
328 case 'z': FTST(O_FTZERO);
329 case 's': FTST(O_FTSIZE);
330 case 'f': FTST(O_FTFILE);
331 case 'd': FTST(O_FTDIR);
332 case 'l': FTST(O_FTLINK);
333 case 'p': FTST(O_FTPIPE);
334 case 'S': FTST(O_FTSOCK);
335 case 'u': FTST(O_FTSUID);
336 case 'g': FTST(O_FTSGID);
337 case 'k': FTST(O_FTSVTX);
338 case 'b': FTST(O_FTBLK);
339 case 'c': FTST(O_FTCHR);
340 case 't': FTST(O_FTTTY);
341 case 'T': FTST(O_FTTEXT);
342 case 'B': FTST(O_FTBINARY);
370 s = scanreg(s,bufend,tokenbuf);
371 yylval.stabval = stabent(tokenbuf,TRUE);
382 s = scanreg(s,bufend,tokenbuf);
383 yylval.stabval = stabent(tokenbuf,TRUE);
399 if (isspace(*s) || *s == '#')
400 cmdline = NOLINE; /* invalidate current command line number */
422 while (s < d && isspace(*s))
424 if (isalpha(*s) || *s == '_' || *s == '\'')
425 *(--s) = '\\'; /* force next ident to WORD */
479 while (isascii(*s) && \
480 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
482 while (d[-1] == '\'') \
488 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
490 s = scanreg(s,bufend,tokenbuf);
491 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
495 s = scanreg(s,bufend,tokenbuf);
496 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
504 yylval.stabval = stabent(tokenbuf,TRUE);
509 s = scanreg(s,bufend,tokenbuf);
512 yylval.stabval = stabent(tokenbuf,TRUE);
515 case '/': /* may either be division or pattern */
516 case '?': /* may either be conditional or pattern */
527 if (!expectterm || !isdigit(s[1])) {
536 case '0': case '1': case '2': case '3': case '4':
537 case '5': case '6': case '7': case '8': case '9':
538 case '\'': case '"': case '`':
542 case '\\': /* some magic to force next word to be a WORD */
543 s++; /* used by do and sub to force a separate namespace */
550 if (strEQ(d,"accept"))
552 if (strEQ(d,"atan2"))
564 if (strEQ(d,"continue"))
566 if (strEQ(d,"chdir")) {
567 (void)stabent("ENV",TRUE); /* may use HOME */
570 if (strEQ(d,"close"))
572 if (strEQ(d,"closedir"))
574 if (strEQ(d,"crypt")) {
580 if (strEQ(d,"chmod"))
582 if (strEQ(d,"chown"))
584 if (strEQ(d,"connect"))
588 if (strEQ(d,"chroot"))
595 while (s < d && isspace(*s))
597 if (isalpha(*s) || *s == '_')
598 *(--s) = '\\'; /* force next ident to WORD */
603 if (strEQ(d,"defined"))
605 if (strEQ(d,"delete"))
607 if (strEQ(d,"dbmopen"))
609 if (strEQ(d,"dbmclose"))
618 if (strEQ(d,"elsif")) {
622 if (strEQ(d,"eq") || strEQ(d,"EQ"))
626 if (strEQ(d,"eval")) {
627 allstabs = TRUE; /* must initialize everything since */
628 UNI(O_EVAL); /* we don't know what will be used */
636 if (strEQ(d,"exec")) {
640 if (strEQ(d,"endhostent"))
642 if (strEQ(d,"endnetent"))
644 if (strEQ(d,"endservent"))
646 if (strEQ(d,"endprotoent"))
648 if (strEQ(d,"endpwent"))
650 if (strEQ(d,"endgrent"))
655 if (strEQ(d,"for") || strEQ(d,"foreach")) {
659 if (strEQ(d,"format")) {
661 while (s < d && isspace(*s))
663 if (isalpha(*s) || *s == '_')
664 *(--s) = '\\'; /* force next ident to WORD */
666 allstabs = TRUE; /* must initialize everything since */
667 OPERATOR(FORMAT); /* we don't know what will be used */
671 if (strEQ(d,"fcntl"))
673 if (strEQ(d,"fileno"))
675 if (strEQ(d,"flock"))
680 if (strEQ(d,"gt") || strEQ(d,"GT"))
682 if (strEQ(d,"ge") || strEQ(d,"GE"))
688 if (strEQ(d,"gmtime"))
692 if (strnEQ(d,"get",3)) {
699 if (strEQ(d,"priority"))
701 if (strEQ(d,"protobyname"))
703 if (strEQ(d,"protobynumber"))
705 if (strEQ(d,"protoent"))
707 if (strEQ(d,"pwent"))
709 if (strEQ(d,"pwnam"))
711 if (strEQ(d,"pwuid"))
713 if (strEQ(d,"peername"))
716 else if (*d == 'h') {
717 if (strEQ(d,"hostbyname"))
719 if (strEQ(d,"hostbyaddr"))
721 if (strEQ(d,"hostent"))
724 else if (*d == 'n') {
725 if (strEQ(d,"netbyname"))
727 if (strEQ(d,"netbyaddr"))
729 if (strEQ(d,"netent"))
732 else if (*d == 's') {
733 if (strEQ(d,"servbyname"))
735 if (strEQ(d,"servbyport"))
737 if (strEQ(d,"servent"))
739 if (strEQ(d,"sockname"))
741 if (strEQ(d,"sockopt"))
744 else if (*d == 'g') {
745 if (strEQ(d,"grent"))
747 if (strEQ(d,"grnam"))
749 if (strEQ(d,"grgid"))
752 else if (*d == 'l') {
753 if (strEQ(d,"login"))
770 if (strEQ(d,"index"))
774 if (strEQ(d,"ioctl"))
793 if (strEQ(d,"local"))
795 if (strEQ(d,"length"))
797 if (strEQ(d,"lt") || strEQ(d,"LT"))
799 if (strEQ(d,"le") || strEQ(d,"LE"))
801 if (strEQ(d,"localtime"))
807 if (strEQ(d,"listen"))
809 if (strEQ(d,"lstat"))
825 RETURN(1); /* force error */
827 if (strEQ(d,"mkdir"))
834 if (strEQ(d,"ne") || strEQ(d,"NE"))
845 if (strEQ(d,"opendir"))
850 if (strEQ(d,"print")) {
851 checkcomma(s,"filehandle");
854 if (strEQ(d,"printf")) {
855 checkcomma(s,"filehandle");
858 if (strEQ(d,"push")) {
859 yylval.ival = O_PUSH;
866 if (strEQ(d,"package"))
884 if (strEQ(d,"return"))
886 if (strEQ(d,"reset"))
890 if (strEQ(d,"rename"))
894 if (strEQ(d,"rmdir"))
896 if (strEQ(d,"rindex"))
900 if (strEQ(d,"readdir"))
902 if (strEQ(d,"rewinddir"))
906 if (strEQ(d,"reverse"))
908 if (strEQ(d,"readlink"))
924 RETURN(1); /* force error */
933 if (strEQ(d,"select"))
939 if (strEQ(d,"setpgrp"))
941 if (strEQ(d,"setpriority"))
943 if (strEQ(d,"sethostent"))
945 if (strEQ(d,"setnetent"))
947 if (strEQ(d,"setservent"))
949 if (strEQ(d,"setprotoent"))
951 if (strEQ(d,"setpwent"))
953 if (strEQ(d,"setgrent"))
955 if (strEQ(d,"seekdir"))
957 if (strEQ(d,"setsockopt"))
964 if (strEQ(d,"shift"))
966 if (strEQ(d,"shutdown"))
977 if (strEQ(d,"sleep"))
984 if (strEQ(d,"socket"))
986 if (strEQ(d,"socketpair"))
988 if (strEQ(d,"sort")) {
989 checkcomma(s,"subroutine name");
991 while (s < d && isascii(*s) && isspace(*s)) s++;
992 if (*s == ';' || *s == ')') /* probably a close */
993 fatal("sort is now a reserved word");
994 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
995 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
996 strncpy(tokenbuf,s,d-s);
997 if (strNE(tokenbuf,"keys") &&
998 strNE(tokenbuf,"values") &&
999 strNE(tokenbuf,"split") &&
1000 strNE(tokenbuf,"grep") &&
1001 strNE(tokenbuf,"readdir") &&
1002 strNE(tokenbuf,"unpack") &&
1003 strNE(tokenbuf,"do") &&
1004 (d >= bufend || isspace(*d)) )
1005 *(--s) = '\\'; /* force next ident to WORD */
1011 if (strEQ(d,"split"))
1013 if (strEQ(d,"sprintf"))
1015 if (strEQ(d,"splice")) {
1016 yylval.ival = O_SPLICE;
1021 if (strEQ(d,"sqrt"))
1025 if (strEQ(d,"srand"))
1031 if (strEQ(d,"stat"))
1033 if (strEQ(d,"study")) {
1039 if (strEQ(d,"substr"))
1041 if (strEQ(d,"sub")) {
1044 while (s < d && isspace(*s))
1046 if (isalpha(*s) || *s == '_' || *s == '\'') {
1048 str_sset(subname,curstname);
1049 str_ncat(subname,"'",1);
1051 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
1055 str_ncat(subname,s,d-s);
1057 *(--s) = '\\'; /* force next ident to WORD */
1060 str_set(subname,"?");
1069 if (strEQ(d,"system")) {
1073 if (strEQ(d,"symlink"))
1075 if (strEQ(d,"syscall"))
1084 if (strEQ(d,"tr")) {
1089 RETURN(1); /* force error */
1091 if (strEQ(d,"tell"))
1093 if (strEQ(d,"telldir"))
1095 if (strEQ(d,"time"))
1097 if (strEQ(d,"times"))
1102 if (strEQ(d,"using"))
1104 if (strEQ(d,"until")) {
1108 if (strEQ(d,"unless")) {
1112 if (strEQ(d,"unlink"))
1114 if (strEQ(d,"undef"))
1116 if (strEQ(d,"unpack"))
1118 if (strEQ(d,"utime"))
1120 if (strEQ(d,"umask"))
1122 if (strEQ(d,"unshift")) {
1123 yylval.ival = O_UNSHIFT;
1129 if (strEQ(d,"values"))
1131 if (strEQ(d,"vec")) {
1138 if (strEQ(d,"while")) {
1142 if (strEQ(d,"warn"))
1144 if (strEQ(d,"wait"))
1146 if (strEQ(d,"wantarray")) {
1147 yylval.arg = op_new(1);
1148 yylval.arg->arg_type = O_ITEM;
1149 yylval.arg[1].arg_type = A_WANTARRAY;
1152 if (strEQ(d,"write"))
1157 if (!expectterm && strEQ(d,"x"))
1177 yylval.cval = savestr(d);
1179 if (oldoldbufptr && oldoldbufptr < bufptr) {
1180 while (isspace(*oldoldbufptr))
1182 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1184 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1187 return (CLINE, bufptr = s, (int)WORD);
1197 while (s < bufend && isascii(*s) && isspace(*s))
1199 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1201 while (isalpha(*s) || isdigit(*s) || *s == '_')
1203 while (s < bufend && isspace(*s))
1206 fatal("No comma allowed after %s", what);
1211 scanreg(s,send,dest)
1213 register char *send;
1227 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1230 while (d > dest+1 && d[-1] == '\'')
1236 if (*d == '{' /* } */ ) {
1239 while (s < send && brackets) {
1240 if (!reparse && (d == dest || (*s && isascii(*s) &&
1241 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1251 if (reparse && reparse == s - 1)
1265 if (*d == '^' && !isspace(*s))
1271 scanconst(string,len)
1275 register STR *retstr;
1280 if (index(string,'|')) {
1283 retstr = Str_new(86,len);
1284 str_nset(retstr,string,len);
1285 t = str_get(retstr);
1287 retstr->str_u.str_useful = 100;
1288 for (d=t; d < e; ) {
1296 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1300 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1304 (void)bcopy(d+1,d,e-d);
1323 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1335 retstr->str_cur = d - t;
1343 register SPAT *spat;
1349 Newz(801,spat,1,SPAT);
1350 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1351 curstash->tbl_spatroot = spat;
1360 spat->spat_flags |= SPAT_ONCE;
1363 fatal("panic: scanpat");
1365 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1367 yyerror("Search pattern not terminated");
1368 yylval.arg = Nullarg;
1372 while (*s == 'i' || *s == 'o') {
1376 spat->spat_flags |= SPAT_FOLD;
1380 spat->spat_flags |= SPAT_KEEP;
1384 for (d=tokenbuf; d < e; d++) {
1385 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1386 (*d == '@' && d[-1] != '\\')) {
1389 spat->spat_runtime = arg = op_new(1);
1390 arg->arg_type = O_ITEM;
1391 arg[1].arg_type = A_DOUBLE;
1392 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1393 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1394 d = scanreg(d,bufend,buf);
1395 (void)stabent(buf,TRUE); /* make sure it's created */
1396 for (; d < e; d++) {
1397 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1398 d = scanreg(d,bufend,buf);
1399 (void)stabent(buf,TRUE);
1401 else if (*d == '@' && d[-1] != '\\') {
1402 d = scanreg(d,bufend,buf);
1403 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1404 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1405 (void)stabent(buf,TRUE);
1408 goto got_pat; /* skip compiling for now */
1411 if (spat->spat_flags & SPAT_FOLD)
1415 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1417 if (*tokenbuf == '^') {
1418 spat->spat_short = scanconst(tokenbuf+1,len-1);
1419 if (spat->spat_short) {
1420 spat->spat_slen = spat->spat_short->str_cur;
1421 if (spat->spat_slen == len - 1)
1422 spat->spat_flags |= SPAT_ALL;
1426 spat->spat_flags |= SPAT_SCANFIRST;
1427 spat->spat_short = scanconst(tokenbuf,len);
1428 if (spat->spat_short) {
1429 spat->spat_slen = spat->spat_short->str_cur;
1430 if (spat->spat_slen == len)
1431 spat->spat_flags |= SPAT_ALL;
1434 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1435 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1436 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1437 spat->spat_flags & SPAT_FOLD,1);
1438 /* Note that this regexp can still be used if someone says
1439 * something like /a/ && s//b/; so we can't delete it.
1443 if (spat->spat_flags & SPAT_FOLD)
1447 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1449 if (spat->spat_short)
1450 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1451 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1452 spat->spat_flags & SPAT_FOLD,1);
1456 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1464 register SPAT *spat;
1469 Newz(802,spat,1,SPAT);
1470 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1471 curstash->tbl_spatroot = spat;
1473 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1475 yyerror("Substitution pattern not terminated");
1476 yylval.arg = Nullarg;
1480 for (d=tokenbuf; d < e; d++) {
1481 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1482 (*d == '@' && d[-1] != '\\')) {
1485 spat->spat_runtime = arg = op_new(1);
1486 arg->arg_type = O_ITEM;
1487 arg[1].arg_type = A_DOUBLE;
1488 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1489 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1490 d = scanreg(d,bufend,buf);
1491 (void)stabent(buf,TRUE); /* make sure it's created */
1493 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1494 d = scanreg(d,bufend,buf);
1495 (void)stabent(buf,TRUE);
1497 else if (*d == '@' && d[-1] != '\\') {
1498 d = scanreg(d,bufend,buf);
1499 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1500 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1501 (void)stabent(buf,TRUE);
1504 goto get_repl; /* skip compiling for now */
1507 if (*tokenbuf == '^') {
1508 spat->spat_short = scanconst(tokenbuf+1,len-1);
1509 if (spat->spat_short)
1510 spat->spat_slen = spat->spat_short->str_cur;
1513 spat->spat_flags |= SPAT_SCANFIRST;
1514 spat->spat_short = scanconst(tokenbuf,len);
1515 if (spat->spat_short)
1516 spat->spat_slen = spat->spat_short->str_cur;
1518 d = nsavestr(tokenbuf,len);
1522 yyerror("Substitution replacement not terminated");
1523 yylval.arg = Nullarg;
1526 spat->spat_repl = yylval.arg;
1527 spat->spat_flags |= SPAT_ONCE;
1528 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1529 spat->spat_flags |= SPAT_CONST;
1530 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1534 spat->spat_flags |= SPAT_CONST;
1535 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1536 e = tmpstr->str_ptr + tmpstr->str_cur;
1537 for (t = tmpstr->str_ptr; t < e; t++) {
1538 if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
1539 (t[1] == '{' /*}*/ && isdigit(t[2])) ))
1540 spat->spat_flags &= ~SPAT_CONST;
1543 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1546 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1547 spat->spat_repl[1].arg_type = A_SINGLE;
1548 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1552 spat->spat_flags &= ~SPAT_CONST;
1556 spat->spat_flags &= ~SPAT_ONCE;
1561 spat->spat_flags |= SPAT_FOLD;
1562 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1563 str_free(spat->spat_short); /* anchored opt doesn't do */
1564 spat->spat_short = Nullstr; /* case insensitive match */
1565 spat->spat_slen = 0;
1570 spat->spat_flags |= SPAT_KEEP;
1573 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1574 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1575 if (!spat->spat_runtime) {
1576 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1580 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1585 register SPAT *spat;
1587 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1588 if (spat->spat_short &&
1589 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1591 if (spat->spat_flags & SPAT_SCANFIRST) {
1592 str_free(spat->spat_short);
1593 spat->spat_short = Nullstr;
1596 str_free(spat->spat_regexp->regmust);
1597 spat->spat_regexp->regmust = Nullstr;
1601 if (!spat->spat_short || /* promote the better string */
1602 ((spat->spat_flags & SPAT_SCANFIRST) &&
1603 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1604 str_free(spat->spat_short); /* ok if null */
1605 spat->spat_short = spat->spat_regexp->regmust;
1606 spat->spat_regexp->regmust = Nullstr;
1607 spat->spat_flags |= SPAT_SCANFIRST;
1613 expand_charset(s,len,retlen)
1619 register char *d = t;
1621 register char *send = s + len;
1624 if (s[1] == '-' && s+2 < send) {
1625 for (i = s[0]; i <= s[2]; i++)
1634 return nsavestr(t,d-t);
1642 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1650 Newz(803,tbl,256,char);
1651 arg[2].arg_type = A_NULL;
1652 arg[2].arg_ptr.arg_cval = tbl;
1655 yyerror("Translation pattern not terminated");
1656 yylval.arg = Nullarg;
1659 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1660 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1661 free_arg(yylval.arg);
1664 yyerror("Translation replacement not terminated");
1665 yylval.arg = Nullarg;
1668 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1669 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1670 free_arg(yylval.arg);
1676 for (i = 0, j = 0; i < tlen; i++,j++) {
1679 tbl[t[i] & 0377] = r[j];
1694 register char *send;
1695 register bool makesingle = FALSE;
1696 register STAB *stab;
1697 bool alwaysdollar = FALSE;
1698 bool hereis = FALSE;
1700 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1705 arg->arg_type = O_ITEM;
1708 default: /* a substitution replacement */
1709 arg[1].arg_type = A_DOUBLE;
1710 makesingle = TRUE; /* maybe disable runtime scanning */
1720 arg[1].arg_type = A_SINGLE;
1725 else if (s[1] == '.')
1736 yyerror("Illegal octal digit");
1738 case '0': case '1': case '2': case '3': case '4':
1739 case '5': case '6': case '7':
1743 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1744 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1748 i += (*s++ & 7) + 9;
1753 (void)sprintf(tokenbuf,"%ld",i);
1754 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
1755 #ifdef MICROPORT /* Microport 2.4 hack */
1756 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1758 (void)str_2num(arg[1].arg_ptr.arg_str);
1759 #endif /* Microport 2.4 hack */
1762 case '1': case '2': case '3': case '4': case '5':
1763 case '6': case '7': case '8': case '9': case '.':
1765 arg[1].arg_type = A_SINGLE;
1767 while (isdigit(*s) || *s == '_') {
1773 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1775 while (isdigit(*s) || *s == '_') {
1782 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1784 if (*s == '+' || *s == '-')
1790 arg[1].arg_ptr.arg_str = str_make(tokenbuf, d - tokenbuf);
1791 #ifdef MICROPORT /* Microport 2.4 hack */
1792 { double zz = str_2num(arg[1].arg_ptr.arg_str); }
1794 (void)str_2num(arg[1].arg_ptr.arg_str);
1795 #endif /* Microport 2.4 hack */
1803 if (*++s && index("`'\"",*s)) {
1805 s = cpytill(d,s,bufend,term,&len);
1815 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1817 } /* assuming tokenbuf won't clobber */
1822 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1823 herewas = str_make(s,bufend-s);
1825 s--, herewas = str_make(s,d-s);
1826 s += herewas->str_cur;
1834 s = cpytill(d,s,bufend,'>',&len);
1839 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1841 if (d - tokenbuf != len) {
1843 arg[1].arg_type = A_GLOB;
1844 d = nsavestr(d,len);
1845 arg[1].arg_ptr.arg_stab = stab = genstab();
1846 stab_io(stab) = stio_new();
1847 stab_val(stab) = str_make(d,len);
1848 stab_val(stab)->str_u.str_hash = curstash;
1855 (void)strcpy(d,"ARGV");
1857 arg[1].arg_type = A_INDREAD;
1858 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1861 arg[1].arg_type = A_READ;
1862 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1863 yyerror("Can't get both program and data from <STDIN>");
1864 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1865 if (!stab_io(arg[1].arg_ptr.arg_stab))
1866 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1867 if (strEQ(d,"ARGV")) {
1868 (void)aadd(arg[1].arg_ptr.arg_stab);
1869 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1886 arg[1].arg_type = A_SINGLE;
1893 arg[1].arg_type = A_DOUBLE;
1894 makesingle = TRUE; /* maybe disable runtime scanning */
1895 alwaysdollar = TRUE; /* treat $) and $| as variables */
1900 arg[1].arg_type = A_BACKTICK;
1902 alwaysdollar = TRUE; /* treat $) and $| as variables */
1910 multi_open = multi_close = '<';
1913 if (tmps = index("([{< )]}> )]}>",term))
1917 tmpstr = Str_new(87,80);
1922 while (s < bufend &&
1923 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
1929 fatal("EOF in string");
1931 str_nset(tmpstr,d+1,s-d);
1933 str_ncat(herewas,s,bufend-s);
1934 str_replace(linestr,herewas);
1935 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
1936 bufend = linestr->str_ptr + linestr->str_cur;
1941 s = str_append_till(tmpstr,s+1,bufend,term,leave);
1942 while (s >= bufend) { /* multiple line string? */
1944 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
1946 fatal("EOF in string");
1950 STR *str = Str_new(88,0);
1952 str_sset(str,linestr);
1953 astore(lineary,(int)line,str);
1955 bufend = linestr->str_ptr + linestr->str_cur;
1957 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
1960 str_scat(linestr,herewas);
1961 bufend = linestr->str_ptr + linestr->str_cur;
1965 str_scat(tmpstr,linestr);
1969 s = str_append_till(tmpstr,s,bufend,term,leave);
1973 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
1974 tmpstr->str_len = tmpstr->str_cur + 1;
1975 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
1977 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
1978 arg[1].arg_ptr.arg_str = tmpstr;
1982 s = tmpstr->str_ptr;
1983 send = s + tmpstr->str_cur;
1984 while (s < send) { /* see if we can make SINGLE */
1985 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1987 *s = '$'; /* grandfather \digit in subst */
1988 if ((*s == '$' || *s == '@') && s+1 < send &&
1989 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
1990 makesingle = FALSE; /* force interpretation */
1992 else if (*s == '\\' && s+1 < send) {
1997 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1999 if ((*s == '$' && s+1 < send &&
2000 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
2001 (*s == '@' && s+1 < send) ) {
2002 len = scanreg(s,send,tokenbuf) - s;
2003 if (*s == '$' || strEQ(tokenbuf,"ARGV")
2004 || strEQ(tokenbuf,"ENV")
2005 || strEQ(tokenbuf,"SIG")
2006 || strEQ(tokenbuf,"INC") )
2007 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
2012 else if (*s == '\\' && s+1 < send) {
2016 if (!makesingle && (!leave || (*s && index(leave,*s))))
2020 case '0': case '1': case '2': case '3':
2021 case '4': case '5': case '6': case '7':
2023 if (s < send && *s && index("01234567",*s)) {
2027 if (s < send && *s && index("01234567",*s)) {
2056 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
2057 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
2059 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
2061 tmpstr->str_cur = d - tmpstr->str_ptr;
2062 arg[1].arg_ptr.arg_str = tmpstr;
2077 register FCMD *fprev = &froot;
2078 register FCMD *fcmd;
2085 Zero(&froot, 1, FCMD);
2086 while ((s = str_gets(linestr,rsfp, 0)) != Nullch) {
2089 STR *tmpstr = Str_new(89,0);
2091 str_sset(tmpstr,linestr);
2092 astore(lineary,(int)line,tmpstr);
2094 bufend = linestr->str_ptr + linestr->str_cur;
2095 if (strEQ(s,".\n")) {
2097 return froot.f_next;
2101 flinebeg = Nullfcmd;
2104 while (s < bufend) {
2105 Newz(804,fcmd,1,FCMD);
2106 fprev->f_next = fcmd;
2108 for (t=s; t < bufend && *t != '@' && *t != '^'; t++) {
2118 fcmd->f_pre = nsavestr(s, t-s);
2119 fcmd->f_presize = t-s;
2123 fcmd->f_flags |= FC_NOBLANK;
2125 fcmd->f_flags |= FC_REPEAT;
2129 flinebeg = fcmd; /* start values here */
2131 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
2134 fcmd->f_type = F_LINES;
2138 fcmd->f_type = F_LEFT;
2143 fcmd->f_type = F_RIGHT;
2148 fcmd->f_type = F_CENTER;
2153 fcmd->f_type = F_LEFT;
2156 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2157 fcmd->f_flags |= FC_MORE;
2165 if ((s = str_gets(linestr, rsfp, 0)) == Nullch)
2169 STR *tmpstr = Str_new(90,0);
2171 str_sset(tmpstr,linestr);
2172 astore(lineary,(int)line,tmpstr);
2174 if (strEQ(s,".\n")) {
2176 yyerror("Missing values line");
2177 return froot.f_next;
2181 bufend = linestr->str_ptr + linestr->str_cur;
2182 str = flinebeg->f_unparsed = Str_new(91,bufend - bufptr);
2183 str->str_u.str_hash = curstash;
2184 str_nset(str,"(",1);
2185 flinebeg->f_line = line;
2186 if (!flinebeg->f_next->f_type || index(linestr->str_ptr, ',')) {
2187 str_scat(str,linestr);
2188 str_ncat(str,",$$);",5);
2191 while (s < bufend && isspace(*s))
2194 while (s < bufend) {
2196 case ' ': case '\t': case '\n': case ';':
2197 str_ncat(str, t, s - t);
2198 str_ncat(str, "," ,1);
2199 while (s < bufend && (isspace(*s) || *s == ';'))
2204 str_ncat(str, t, s - t);
2206 s = scanreg(s,bufend,tokenbuf);
2207 str_ncat(str, t, s - t);
2209 if (s < bufend && *s && index("$'\"",*s))
2210 str_ncat(str, ",", 1);
2212 case '"': case '\'':
2213 str_ncat(str, t, s - t);
2216 while (s < bufend && (*s != *t || s[-1] == '\\'))
2220 str_ncat(str, t, s - t);
2222 if (s < bufend && *s && index("$'\"",*s))
2223 str_ncat(str, ",", 1);
2226 yyerror("Please use commas to separate fields");
2229 str_ncat(str,"$$);",4);
2234 bufptr = str_get(linestr);
2235 yyerror("Format not terminated");
2236 return froot.f_next;
2243 cshlen = strlen(cshname);