1 /* $Header: toke.c,v 2.0.1.1 88/06/28 16:39:50 root Exp $
4 * Revision 2.0.1.1 88/06/28 16:39:50 root
5 * patch1: tr/x/y/ can dump core if y is shorter than x
7 * Revision 2.0 88/06/05 00:11:16 root
8 * Baseline version 2.0.
16 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
18 #define RETURN(retval) return (bufptr = s,(int)retval)
19 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
20 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
21 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
22 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s,(int)UNIOP)
23 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
24 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
25 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
26 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
27 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
28 #define SFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)STABFUN)
29 #define LFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LVALFUN)
33 register char *s = bufptr;
36 static bool in_format = FALSE;
37 static bool firstline = TRUE;
43 fprintf(stderr,"Tokener at %s",s);
45 fprintf(stderr,"Tokener at %s\n",s);
50 "Unrecognized character %c in file %s line %ld--ignoring.\n",
51 *s++,filename,(long)line);
56 if (firstline && (minus_n || minus_p)) {
58 str_set(linestr,"line: while (<>) {");
60 str_cat(linestr,"@F=split(' ');");
67 yylval.formval = load_format(); /* leaves . in buffer */
73 if ((s = str_gets(linestr, rsfp)) == Nullch) {
76 else if (rsfp != stdin)
79 if (minus_n || minus_p) {
80 str_set(linestr,minus_p ? "}continue{print;" : "");
96 case ' ': case '\t': case '\f':
101 if (preprocess && s == str_get(linestr) &&
102 s[1] == ' ' && isdigit(s[2])) {
104 for (s += 2; isdigit(*s); s++) ;
105 while (*s && isspace(*s)) s++;
108 s[strlen(s)-1] = '\0'; /* wipe out newline */
111 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
114 filename = savestr(s);
116 filename = savestr(origfilename);
117 s = str_get(linestr);
120 while (*s && *s != '\n')
132 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
135 case 'r': FTST(O_FTEREAD);
136 case 'w': FTST(O_FTEWRITE);
137 case 'x': FTST(O_FTEEXEC);
138 case 'o': FTST(O_FTEOWNED);
139 case 'R': FTST(O_FTRREAD);
140 case 'W': FTST(O_FTRWRITE);
141 case 'X': FTST(O_FTREXEC);
142 case 'O': FTST(O_FTROWNED);
143 case 'e': FTST(O_FTIS);
144 case 'z': FTST(O_FTZERO);
145 case 's': FTST(O_FTSIZE);
146 case 'f': FTST(O_FTFILE);
147 case 'd': FTST(O_FTDIR);
148 case 'l': FTST(O_FTLINK);
149 case 'p': FTST(O_FTPIPE);
150 case 'S': FTST(O_FTSOCK);
151 case 'u': FTST(O_FTSUID);
152 case 'g': FTST(O_FTSGID);
153 case 'k': FTST(O_FTSVTX);
154 case 'b': FTST(O_FTBLK);
155 case 'c': FTST(O_FTCHR);
156 case 't': FTST(O_FTTTY);
157 case 'T': FTST(O_FTTEXT);
158 case 'B': FTST(O_FTBINARY);
186 if (isspace(*s) || *s == '#')
187 cmdline = NOLINE; /* invalidate current command line number */
200 for (d = s; *d == ' ' || *d == '\t'; d++) ;
201 if (*d == '\n' || *d == '#')
202 OPERATOR(tmp); /* block end */
204 TERM(tmp); /* associative array end */
262 while (isalpha(*s) || isdigit(*s) || *s == '_') \
268 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
270 s = scanreg(s,tokenbuf);
271 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
274 s = scanreg(s,tokenbuf);
275 yylval.stabval = stabent(tokenbuf,TRUE);
279 s = scanreg(s,tokenbuf);
280 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
283 case '/': /* may either be division or pattern */
284 case '?': /* may either be conditional or pattern */
293 if (!expectterm || !isdigit(s[1])) {
302 case '0': case '1': case '2': case '3': case '4':
303 case '5': case '6': case '7': case '8': case '9':
304 case '\'': case '"': case '`':
310 yylval.cval = savestr(d);
314 yylval.cval = savestr(d);
318 yylval.cval = savestr(d);
322 if (strEQ(d,"continue"))
324 if (strEQ(d,"chdir"))
326 if (strEQ(d,"close"))
328 if (strEQ(d,"crypt"))
332 if (strEQ(d,"chmod")) {
333 yylval.ival = O_CHMOD;
336 if (strEQ(d,"chown")) {
337 yylval.ival = O_CHOWN;
340 yylval.cval = savestr(d);
348 if (strEQ(d,"delete"))
350 yylval.cval = savestr(d);
356 if (strEQ(d,"elsif")) {
360 if (strEQ(d,"eq") || strEQ(d,"EQ"))
364 if (strEQ(d,"eval")) {
365 allstabs = TRUE; /* must initialize everything since */
366 UNI(O_EVAL); /* we don't know what will be used */
374 if (strEQ(d,"exec")) {
375 yylval.ival = O_EXEC;
378 yylval.cval = savestr(d);
384 if (strEQ(d,"foreach"))
386 if (strEQ(d,"format")) {
392 yylval.cval = savestr(d);
396 if (strEQ(d,"gt") || strEQ(d,"GT"))
398 if (strEQ(d,"ge") || strEQ(d,"GE"))
402 if (strEQ(d,"gmtime"))
404 yylval.cval = savestr(d);
410 yylval.cval = savestr(d);
418 if (strEQ(d,"index"))
422 yylval.cval = savestr(d);
428 yylval.cval = savestr(d);
434 if (strEQ(d,"kill")) {
435 yylval.ival = O_KILL;
438 yylval.cval = savestr(d);
444 if (strEQ(d,"local"))
446 if (strEQ(d,"length"))
448 if (strEQ(d,"lt") || strEQ(d,"LT"))
450 if (strEQ(d,"le") || strEQ(d,"LE"))
452 if (strEQ(d,"localtime"))
458 yylval.cval = savestr(d);
466 yylval.cval = savestr(d);
472 if (strEQ(d,"ne") || strEQ(d,"NE"))
474 yylval.cval = savestr(d);
484 yylval.cval = savestr(d);
488 if (strEQ(d,"print")) {
489 yylval.ival = O_PRINT;
492 if (strEQ(d,"printf")) {
493 yylval.ival = O_PRTF;
496 if (strEQ(d,"push")) {
497 yylval.ival = O_PUSH;
502 yylval.cval = savestr(d);
506 yylval.cval = savestr(d);
510 if (strEQ(d,"reset"))
514 if (strEQ(d,"rename"))
516 yylval.cval = savestr(d);
524 if (strEQ(d,"shift"))
526 if (strEQ(d,"split"))
528 if (strEQ(d,"substr"))
530 if (strEQ(d,"sprintf"))
534 if (strEQ(d,"select"))
540 if (strEQ(d,"study")) {
546 if (strEQ(d,"sleep"))
548 if (strEQ(d,"system")) {
549 yylval.ival = O_SYSTEM;
552 if (strEQ(d,"symlink"))
554 if (strEQ(d,"sort")) {
555 yylval.ival = O_SORT;
558 yylval.cval = savestr(d);
570 if (strEQ(d,"times"))
572 yylval.cval = savestr(d);
576 if (strEQ(d,"using"))
578 if (strEQ(d,"until")) {
582 if (strEQ(d,"unless")) {
586 if (strEQ(d,"umask"))
588 if (strEQ(d,"unshift")) {
589 yylval.ival = O_UNSHIFT;
592 if (strEQ(d,"unlink")) {
593 yylval.ival = O_UNLINK;
596 if (strEQ(d,"utime")) {
597 yylval.ival = O_UTIME;
600 yylval.cval = savestr(d);
604 if (strEQ(d,"values"))
606 yylval.cval = savestr(d);
610 if (strEQ(d,"write"))
612 if (strEQ(d,"while")) {
618 yylval.cval = savestr(d);
622 if (!expectterm && strEQ(d,"x"))
624 yylval.cval = savestr(d);
632 yylval.cval = savestr(d);
636 yylval.cval = savestr(d);
651 while (isdigit(*s) || *s == '_')
655 while (isalpha(*s) || isdigit(*s) || *s == '_')
664 while (*s && *s != '}')
674 if (*d == '^' && !isspace(*s))
683 register STR *retstr;
687 if (index(string,'|')) {
690 retstr = str_make(string);
692 *(long*)&retstr->str_nval = 100;
695 case '.': case '[': case '$': case '(': case ')': case '|':
699 if (index("wWbB0123456789sSdD",d[1])) {
720 if (d[1] == '*' || d[1] == '+' || d[1] == '?') {
731 retstr->str_cur = strlen(retstr->str_ptr);
739 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
742 bzero((char *)spat, sizeof(SPAT));
743 spat->spat_next = spat_root; /* link into spat list */
753 spat->spat_flags |= SPAT_ONCE;
756 fatal("panic: scanpat");
758 s = cpytill(tokenbuf,s,s[-1]);
760 fatal("Search pattern not terminated");
764 spat->spat_flags |= SPAT_FOLD;
766 for (d=tokenbuf; *d; d++) {
767 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
770 spat->spat_runtime = arg = op_new(1);
771 arg->arg_type = O_ITEM;
772 arg[1].arg_type = A_DOUBLE;
773 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
774 goto got_pat; /* skip compiling for now */
777 if (!(spat->spat_flags & SPAT_FOLD)) {
778 if (*tokenbuf == '^') {
779 spat->spat_short = scanconst(tokenbuf+1);
780 if (spat->spat_short) {
781 spat->spat_slen = strlen(spat->spat_short->str_ptr);
782 if (spat->spat_slen == strlen(tokenbuf+1))
783 spat->spat_flags |= SPAT_ALL;
787 spat->spat_flags |= SPAT_SCANFIRST;
788 spat->spat_short = scanconst(tokenbuf);
789 if (spat->spat_short) {
790 spat->spat_slen = strlen(spat->spat_short->str_ptr);
791 if (spat->spat_slen == strlen(tokenbuf))
792 spat->spat_flags |= SPAT_ALL;
796 spat->spat_regexp = regcomp(tokenbuf,spat->spat_flags & SPAT_FOLD,1);
799 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
807 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
810 bzero((char *)spat, sizeof(SPAT));
811 spat->spat_next = spat_root; /* link into spat list */
814 s = cpytill(tokenbuf,s+1,*s);
816 fatal("Substitution pattern not terminated");
817 for (d=tokenbuf; *d; d++) {
818 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
821 spat->spat_runtime = arg = op_new(1);
822 arg->arg_type = O_ITEM;
823 arg[1].arg_type = A_DOUBLE;
824 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
825 goto get_repl; /* skip compiling for now */
828 if (*tokenbuf == '^') {
829 spat->spat_short = scanconst(tokenbuf+1);
830 if (spat->spat_short)
831 spat->spat_slen = strlen(spat->spat_short->str_ptr);
834 spat->spat_flags |= SPAT_SCANFIRST;
835 spat->spat_short = scanconst(tokenbuf);
836 if (spat->spat_short)
837 spat->spat_slen = strlen(spat->spat_short->str_ptr);
839 d = savestr(tokenbuf);
843 fatal("Substitution replacement not terminated");
844 spat->spat_repl = yylval.arg;
845 spat->spat_flags |= SPAT_ONCE;
846 while (*s == 'g' || *s == 'i') {
849 spat->spat_flags &= ~SPAT_ONCE;
853 spat->spat_flags |= SPAT_FOLD;
856 if (!spat->spat_runtime) {
857 spat->spat_regexp = regcomp(d, spat->spat_flags & SPAT_FOLD,1);
861 if (spat->spat_flags & SPAT_FOLD) { /* Oops, disable optimization */
862 str_free(spat->spat_short);
863 spat->spat_short = Nullstr;
866 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
873 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
874 if (spat->spat_short &&
875 strEQ(spat->spat_short->str_ptr,spat->spat_regexp->regmust->str_ptr)){
876 if (spat->spat_flags & SPAT_SCANFIRST) {
877 str_free(spat->spat_short);
878 spat->spat_short = Nullstr;
881 str_free(spat->spat_regexp->regmust);
882 spat->spat_regexp->regmust = Nullstr;
886 if (!spat->spat_short || /* promote the better string */
887 ((spat->spat_flags & SPAT_SCANFIRST) &&
888 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
889 str_free(spat->spat_short); /* ok if null */
890 spat->spat_short = spat->spat_regexp->regmust;
891 spat->spat_regexp->regmust = Nullstr;
892 spat->spat_flags |= SPAT_SCANFIRST;
902 register char *d = t;
906 if (s[1] == '-' && s[2]) {
907 for (i = s[0]; i <= s[2]; i++)
923 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg,0));
926 register char *tbl = safemalloc(256);
930 arg[2].arg_type = A_NULL;
931 arg[2].arg_ptr.arg_cval = tbl;
932 for (i=0; i<256; i++)
936 fatal("Translation pattern not terminated");
937 t = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
938 free_arg(yylval.arg);
941 fatal("Translation replacement not terminated");
942 r = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
943 free_arg(yylval.arg);
949 for (i = 0, j = 0; t[i]; i++,j++) {
952 tbl[t[i] & 0377] = r[j];
967 register bool makesingle = FALSE;
969 char *leave = "\\$nrtfb0123456789"; /* which backslash sequences to keep */
973 arg->arg_type = O_ITEM;
976 default: /* a substitution replacement */
977 arg[1].arg_type = A_DOUBLE;
978 makesingle = TRUE; /* maybe disable runtime scanning */
988 arg[1].arg_type = A_SINGLE;
993 else if (s[1] == '.')
1004 fatal("Illegal octal digit");
1006 case '0': case '1': case '2': case '3': case '4':
1007 case '5': case '6': case '7':
1011 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1012 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1016 i += (*s++ & 7) + 9;
1021 sprintf(tokenbuf,"%ld",i);
1022 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1025 case '1': case '2': case '3': case '4': case '5':
1026 case '6': case '7': case '8': case '9': case '.':
1028 arg[1].arg_type = A_SINGLE;
1030 while (isdigit(*s) || *s == '_') {
1036 if (*s == '.' && index("0123456789eE",s[1])) {
1038 while (isdigit(*s) || *s == '_') {
1045 if (index("eE",*s) && index("+-0123456789",s[1])) {
1047 if (*s == '+' || *s == '-')
1053 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1056 arg[1].arg_type = A_SINGLE;
1063 s = cpytill(d,s+1,'>');
1067 while (*d && (isalpha(*d) || isdigit(*d) || *d == '_')) d++;
1070 arg[1].arg_type = A_GLOB;
1072 arg[1].arg_ptr.arg_stab = stab = genstab();
1073 stab->stab_io = stio_new();
1074 stab->stab_val = str_make(d);
1081 arg[1].arg_type = A_INDREAD;
1082 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1085 arg[1].arg_type = A_READ;
1086 if (rsfp == stdin && strEQ(d,"stdin"))
1087 fatal("Can't get both program and data from <stdin>");
1088 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1089 arg[1].arg_ptr.arg_stab->stab_io = stio_new();
1090 if (strEQ(d,"ARGV")) {
1091 aadd(arg[1].arg_ptr.arg_stab);
1092 arg[1].arg_ptr.arg_stab->stab_io->flags |=
1099 arg[1].arg_type = A_DOUBLE;
1100 makesingle = TRUE; /* maybe disable runtime scanning */
1104 arg[1].arg_type = A_BACKTICK;
1112 tmpstr = str_new(strlen(s));
1113 s = str_append_till(tmpstr,s+1,term,leave);
1114 while (!*s) { /* multiple line string? */
1115 s = str_gets(linestr, rsfp);
1118 fatal("EOF in string");
1121 s = str_append_till(tmpstr,s,term,leave);
1125 arg[1].arg_ptr.arg_str = tmpstr;
1129 s = tmpstr->str_ptr;
1130 while (*s) { /* see if we can make SINGLE */
1131 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1132 !index("`\"",term) )
1133 *s = '$'; /* grandfather \digit in subst */
1134 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1135 makesingle = FALSE; /* force interpretation */
1137 else if (*s == '\\' && s[1]) {
1142 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1144 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1147 len = scanreg(s,tokenbuf) - s;
1148 stabent(tokenbuf,TRUE); /* make sure it's created */
1153 else if (*s == '\\' && s[1]) {
1157 if (!makesingle && (!leave || index(leave,*s)))
1161 case '0': case '1': case '2': case '3':
1162 case '4': case '5': case '6': case '7':
1164 if (index("01234567",*s)) {
1168 if (index("01234567",*s)) {
1197 if (arg[1].arg_type == A_DOUBLE && makesingle)
1198 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
1200 tmpstr->str_cur = d - tmpstr->str_ptr; /* XXX cheat */
1201 arg[1].arg_ptr.arg_str = tmpstr;
1214 register FCMD *fprev = &froot;
1215 register FCMD *fcmd;
1218 register char tmpchar;
1221 while ((s = str_gets(linestr,rsfp)) != Nullch) {
1223 if (strEQ(s,".\n")) {
1225 return froot.f_next;
1229 flinebeg = Nullfcmd;
1232 fcmd = (FCMD *)safemalloc(sizeof (FCMD));
1233 bzero((char*)fcmd, sizeof (FCMD));
1234 fprev->f_next = fcmd;
1236 for (t=s; *t && *t != '@' && *t != '^'; t++) {
1244 fcmd->f_pre = savestr(s);
1245 fcmd->f_presize = strlen(s);
1250 fcmd->f_flags |= FC_NOBLANK;
1254 flinebeg = fcmd; /* start values here */
1256 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
1259 fcmd->f_type = F_LINES;
1263 fcmd->f_type = F_LEFT;
1268 fcmd->f_type = F_RIGHT;
1273 fcmd->f_type = F_CENTER;
1278 fcmd->f_type = F_LEFT;
1281 if (fcmd->f_flags & FC_CHOP && *s == '.') {
1282 fcmd->f_flags |= FC_MORE;
1290 if ((bufptr = str_gets(linestr ,rsfp)) == Nullch)
1293 if (strEQ(bufptr,".\n")) {
1294 yyerror("Missing values line");
1295 return froot.f_next;
1299 lex_newlines = TRUE;
1300 while (flinebeg || *bufptr) {
1303 yyerror("Bad value in format");
1308 yyerror("Missing value in format");
1312 yylval.arg = stab2arg(A_LVAL,yylval.stabval);
1316 yyerror("Extra value in format");
1318 flinebeg->f_expr = yylval.arg;
1320 flinebeg = flinebeg->f_next;
1321 } while (flinebeg && flinebeg->f_size == 0);
1328 lex_newlines = FALSE;
1332 bufptr = str_get(linestr);
1333 yyerror("Format not terminated");
1334 return froot.f_next;