1 /* $Header: toke.c,v 2.0 88/06/05 00:11:16 root Exp $
4 * Revision 2.0 88/06/05 00:11:16 root
5 * Baseline version 2.0.
13 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
15 #define RETURN(retval) return (bufptr = s,(int)retval)
16 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
17 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
18 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
19 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s,(int)UNIOP)
20 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
21 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
22 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
23 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
24 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
25 #define SFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)STABFUN)
26 #define LFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LVALFUN)
30 register char *s = bufptr;
33 static bool in_format = FALSE;
34 static bool firstline = TRUE;
40 fprintf(stderr,"Tokener at %s",s);
42 fprintf(stderr,"Tokener at %s\n",s);
47 "Unrecognized character %c in file %s line %ld--ignoring.\n",
48 *s++,filename,(long)line);
53 if (firstline && (minus_n || minus_p)) {
55 str_set(linestr,"line: while (<>) {");
57 str_cat(linestr,"@F=split(' ');");
64 yylval.formval = load_format(); /* leaves . in buffer */
70 if ((s = str_gets(linestr, rsfp)) == Nullch) {
73 else if (rsfp != stdin)
76 if (minus_n || minus_p) {
77 str_set(linestr,minus_p ? "}continue{print;" : "");
93 case ' ': case '\t': case '\f':
98 if (preprocess && s == str_get(linestr) &&
99 s[1] == ' ' && isdigit(s[2])) {
101 for (s += 2; isdigit(*s); s++) ;
102 while (*s && isspace(*s)) s++;
105 s[strlen(s)-1] = '\0'; /* wipe out newline */
108 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
111 filename = savestr(s);
113 filename = savestr(origfilename);
114 s = str_get(linestr);
117 while (*s && *s != '\n')
129 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
132 case 'r': FTST(O_FTEREAD);
133 case 'w': FTST(O_FTEWRITE);
134 case 'x': FTST(O_FTEEXEC);
135 case 'o': FTST(O_FTEOWNED);
136 case 'R': FTST(O_FTRREAD);
137 case 'W': FTST(O_FTRWRITE);
138 case 'X': FTST(O_FTREXEC);
139 case 'O': FTST(O_FTROWNED);
140 case 'e': FTST(O_FTIS);
141 case 'z': FTST(O_FTZERO);
142 case 's': FTST(O_FTSIZE);
143 case 'f': FTST(O_FTFILE);
144 case 'd': FTST(O_FTDIR);
145 case 'l': FTST(O_FTLINK);
146 case 'p': FTST(O_FTPIPE);
147 case 'S': FTST(O_FTSOCK);
148 case 'u': FTST(O_FTSUID);
149 case 'g': FTST(O_FTSGID);
150 case 'k': FTST(O_FTSVTX);
151 case 'b': FTST(O_FTBLK);
152 case 'c': FTST(O_FTCHR);
153 case 't': FTST(O_FTTTY);
154 case 'T': FTST(O_FTTEXT);
155 case 'B': FTST(O_FTBINARY);
183 if (isspace(*s) || *s == '#')
184 cmdline = NOLINE; /* invalidate current command line number */
197 for (d = s; *d == ' ' || *d == '\t'; d++) ;
198 if (*d == '\n' || *d == '#')
199 OPERATOR(tmp); /* block end */
201 TERM(tmp); /* associative array end */
259 while (isalpha(*s) || isdigit(*s) || *s == '_') \
265 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
267 s = scanreg(s,tokenbuf);
268 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
271 s = scanreg(s,tokenbuf);
272 yylval.stabval = stabent(tokenbuf,TRUE);
276 s = scanreg(s,tokenbuf);
277 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
280 case '/': /* may either be division or pattern */
281 case '?': /* may either be conditional or pattern */
290 if (!expectterm || !isdigit(s[1])) {
299 case '0': case '1': case '2': case '3': case '4':
300 case '5': case '6': case '7': case '8': case '9':
301 case '\'': case '"': case '`':
307 yylval.cval = savestr(d);
311 yylval.cval = savestr(d);
315 yylval.cval = savestr(d);
319 if (strEQ(d,"continue"))
321 if (strEQ(d,"chdir"))
323 if (strEQ(d,"close"))
325 if (strEQ(d,"crypt"))
329 if (strEQ(d,"chmod")) {
330 yylval.ival = O_CHMOD;
333 if (strEQ(d,"chown")) {
334 yylval.ival = O_CHOWN;
337 yylval.cval = savestr(d);
345 if (strEQ(d,"delete"))
347 yylval.cval = savestr(d);
353 if (strEQ(d,"elsif")) {
357 if (strEQ(d,"eq") || strEQ(d,"EQ"))
361 if (strEQ(d,"eval")) {
362 allstabs = TRUE; /* must initialize everything since */
363 UNI(O_EVAL); /* we don't know what will be used */
371 if (strEQ(d,"exec")) {
372 yylval.ival = O_EXEC;
375 yylval.cval = savestr(d);
381 if (strEQ(d,"foreach"))
383 if (strEQ(d,"format")) {
389 yylval.cval = savestr(d);
393 if (strEQ(d,"gt") || strEQ(d,"GT"))
395 if (strEQ(d,"ge") || strEQ(d,"GE"))
399 if (strEQ(d,"gmtime"))
401 yylval.cval = savestr(d);
407 yylval.cval = savestr(d);
415 if (strEQ(d,"index"))
419 yylval.cval = savestr(d);
425 yylval.cval = savestr(d);
431 if (strEQ(d,"kill")) {
432 yylval.ival = O_KILL;
435 yylval.cval = savestr(d);
441 if (strEQ(d,"local"))
443 if (strEQ(d,"length"))
445 if (strEQ(d,"lt") || strEQ(d,"LT"))
447 if (strEQ(d,"le") || strEQ(d,"LE"))
449 if (strEQ(d,"localtime"))
455 yylval.cval = savestr(d);
463 yylval.cval = savestr(d);
469 if (strEQ(d,"ne") || strEQ(d,"NE"))
471 yylval.cval = savestr(d);
481 yylval.cval = savestr(d);
485 if (strEQ(d,"print")) {
486 yylval.ival = O_PRINT;
489 if (strEQ(d,"printf")) {
490 yylval.ival = O_PRTF;
493 if (strEQ(d,"push")) {
494 yylval.ival = O_PUSH;
499 yylval.cval = savestr(d);
503 yylval.cval = savestr(d);
507 if (strEQ(d,"reset"))
511 if (strEQ(d,"rename"))
513 yylval.cval = savestr(d);
521 if (strEQ(d,"shift"))
523 if (strEQ(d,"split"))
525 if (strEQ(d,"substr"))
527 if (strEQ(d,"sprintf"))
531 if (strEQ(d,"select"))
537 if (strEQ(d,"study")) {
543 if (strEQ(d,"sleep"))
545 if (strEQ(d,"system")) {
546 yylval.ival = O_SYSTEM;
549 if (strEQ(d,"symlink"))
551 if (strEQ(d,"sort")) {
552 yylval.ival = O_SORT;
555 yylval.cval = savestr(d);
567 if (strEQ(d,"times"))
569 yylval.cval = savestr(d);
573 if (strEQ(d,"using"))
575 if (strEQ(d,"until")) {
579 if (strEQ(d,"unless")) {
583 if (strEQ(d,"umask"))
585 if (strEQ(d,"unshift")) {
586 yylval.ival = O_UNSHIFT;
589 if (strEQ(d,"unlink")) {
590 yylval.ival = O_UNLINK;
593 if (strEQ(d,"utime")) {
594 yylval.ival = O_UTIME;
597 yylval.cval = savestr(d);
601 if (strEQ(d,"values"))
603 yylval.cval = savestr(d);
607 if (strEQ(d,"write"))
609 if (strEQ(d,"while")) {
615 yylval.cval = savestr(d);
619 if (!expectterm && strEQ(d,"x"))
621 yylval.cval = savestr(d);
629 yylval.cval = savestr(d);
633 yylval.cval = savestr(d);
648 while (isdigit(*s) || *s == '_')
652 while (isalpha(*s) || isdigit(*s) || *s == '_')
661 while (*s && *s != '}')
671 if (*d == '^' && !isspace(*s))
680 register STR *retstr;
684 if (index(string,'|')) {
687 retstr = str_make(string);
689 *(long*)&retstr->str_nval = 100;
692 case '.': case '[': case '$': case '(': case ')': case '|':
696 if (index("wWbB0123456789sSdD",d[1])) {
717 if (d[1] == '*' || d[1] == '+' || d[1] == '?') {
728 retstr->str_cur = strlen(retstr->str_ptr);
736 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
739 bzero((char *)spat, sizeof(SPAT));
740 spat->spat_next = spat_root; /* link into spat list */
750 spat->spat_flags |= SPAT_ONCE;
753 fatal("panic: scanpat");
755 s = cpytill(tokenbuf,s,s[-1]);
757 fatal("Search pattern not terminated");
761 spat->spat_flags |= SPAT_FOLD;
763 for (d=tokenbuf; *d; d++) {
764 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
767 spat->spat_runtime = arg = op_new(1);
768 arg->arg_type = O_ITEM;
769 arg[1].arg_type = A_DOUBLE;
770 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
771 goto got_pat; /* skip compiling for now */
774 if (!(spat->spat_flags & SPAT_FOLD)) {
775 if (*tokenbuf == '^') {
776 spat->spat_short = scanconst(tokenbuf+1);
777 if (spat->spat_short) {
778 spat->spat_slen = strlen(spat->spat_short->str_ptr);
779 if (spat->spat_slen == strlen(tokenbuf+1))
780 spat->spat_flags |= SPAT_ALL;
784 spat->spat_flags |= SPAT_SCANFIRST;
785 spat->spat_short = scanconst(tokenbuf);
786 if (spat->spat_short) {
787 spat->spat_slen = strlen(spat->spat_short->str_ptr);
788 if (spat->spat_slen == strlen(tokenbuf))
789 spat->spat_flags |= SPAT_ALL;
793 spat->spat_regexp = regcomp(tokenbuf,spat->spat_flags & SPAT_FOLD,1);
796 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
804 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
807 bzero((char *)spat, sizeof(SPAT));
808 spat->spat_next = spat_root; /* link into spat list */
811 s = cpytill(tokenbuf,s+1,*s);
813 fatal("Substitution pattern not terminated");
814 for (d=tokenbuf; *d; d++) {
815 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
818 spat->spat_runtime = arg = op_new(1);
819 arg->arg_type = O_ITEM;
820 arg[1].arg_type = A_DOUBLE;
821 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
822 goto get_repl; /* skip compiling for now */
825 if (*tokenbuf == '^') {
826 spat->spat_short = scanconst(tokenbuf+1);
827 if (spat->spat_short)
828 spat->spat_slen = strlen(spat->spat_short->str_ptr);
831 spat->spat_flags |= SPAT_SCANFIRST;
832 spat->spat_short = scanconst(tokenbuf);
833 if (spat->spat_short)
834 spat->spat_slen = strlen(spat->spat_short->str_ptr);
836 d = savestr(tokenbuf);
840 fatal("Substitution replacement not terminated");
841 spat->spat_repl = yylval.arg;
842 spat->spat_flags |= SPAT_ONCE;
843 while (*s == 'g' || *s == 'i') {
846 spat->spat_flags &= ~SPAT_ONCE;
850 spat->spat_flags |= SPAT_FOLD;
853 if (!spat->spat_runtime) {
854 spat->spat_regexp = regcomp(d, spat->spat_flags & SPAT_FOLD,1);
858 if (spat->spat_flags & SPAT_FOLD) { /* Oops, disable optimization */
859 str_free(spat->spat_short);
860 spat->spat_short = Nullstr;
863 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
870 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
871 if (spat->spat_short &&
872 strEQ(spat->spat_short->str_ptr,spat->spat_regexp->regmust->str_ptr)){
873 if (spat->spat_flags & SPAT_SCANFIRST) {
874 str_free(spat->spat_short);
875 spat->spat_short = Nullstr;
878 str_free(spat->spat_regexp->regmust);
879 spat->spat_regexp->regmust = Nullstr;
883 if (!spat->spat_short || /* promote the better string */
884 ((spat->spat_flags & SPAT_SCANFIRST) &&
885 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
886 str_free(spat->spat_short); /* ok if null */
887 spat->spat_short = spat->spat_regexp->regmust;
888 spat->spat_regexp->regmust = Nullstr;
889 spat->spat_flags |= SPAT_SCANFIRST;
899 register char *d = t;
903 if (s[1] == '-' && s[2]) {
904 for (i = s[0]; i <= s[2]; i++)
920 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg,0));
923 register char *tbl = safemalloc(256);
926 arg[2].arg_type = A_NULL;
927 arg[2].arg_ptr.arg_cval = tbl;
928 for (i=0; i<256; i++)
932 fatal("Translation pattern not terminated");
933 t = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
934 free_arg(yylval.arg);
937 fatal("Translation replacement not terminated");
938 r = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
939 free_arg(yylval.arg);
945 for (i = 0; t[i]; i++) {
948 tbl[t[i] & 0377] = r[i];
963 register bool makesingle = FALSE;
965 char *leave = "\\$nrtfb0123456789"; /* which backslash sequences to keep */
969 arg->arg_type = O_ITEM;
972 default: /* a substitution replacement */
973 arg[1].arg_type = A_DOUBLE;
974 makesingle = TRUE; /* maybe disable runtime scanning */
984 arg[1].arg_type = A_SINGLE;
989 else if (s[1] == '.')
1000 fatal("Illegal octal digit");
1002 case '0': case '1': case '2': case '3': case '4':
1003 case '5': case '6': case '7':
1007 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1008 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1012 i += (*s++ & 7) + 9;
1017 sprintf(tokenbuf,"%ld",i);
1018 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1021 case '1': case '2': case '3': case '4': case '5':
1022 case '6': case '7': case '8': case '9': case '.':
1024 arg[1].arg_type = A_SINGLE;
1026 while (isdigit(*s) || *s == '_') {
1032 if (*s == '.' && index("0123456789eE",s[1])) {
1034 while (isdigit(*s) || *s == '_') {
1041 if (index("eE",*s) && index("+-0123456789",s[1])) {
1043 if (*s == '+' || *s == '-')
1049 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1052 arg[1].arg_type = A_SINGLE;
1059 s = cpytill(d,s+1,'>');
1063 while (*d && (isalpha(*d) || isdigit(*d) || *d == '_')) d++;
1066 arg[1].arg_type = A_GLOB;
1068 arg[1].arg_ptr.arg_stab = stab = genstab();
1069 stab->stab_io = stio_new();
1070 stab->stab_val = str_make(d);
1077 arg[1].arg_type = A_INDREAD;
1078 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1081 arg[1].arg_type = A_READ;
1082 if (rsfp == stdin && strEQ(d,"stdin"))
1083 fatal("Can't get both program and data from <stdin>");
1084 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1085 arg[1].arg_ptr.arg_stab->stab_io = stio_new();
1086 if (strEQ(d,"ARGV")) {
1087 aadd(arg[1].arg_ptr.arg_stab);
1088 arg[1].arg_ptr.arg_stab->stab_io->flags |=
1095 arg[1].arg_type = A_DOUBLE;
1096 makesingle = TRUE; /* maybe disable runtime scanning */
1100 arg[1].arg_type = A_BACKTICK;
1108 tmpstr = str_new(strlen(s));
1109 s = str_append_till(tmpstr,s+1,term,leave);
1110 while (!*s) { /* multiple line string? */
1111 s = str_gets(linestr, rsfp);
1114 fatal("EOF in string");
1117 s = str_append_till(tmpstr,s,term,leave);
1121 arg[1].arg_ptr.arg_str = tmpstr;
1125 s = tmpstr->str_ptr;
1126 while (*s) { /* see if we can make SINGLE */
1127 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1128 !index("`\"",term) )
1129 *s = '$'; /* grandfather \digit in subst */
1130 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1131 makesingle = FALSE; /* force interpretation */
1133 else if (*s == '\\' && s[1]) {
1138 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1140 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1143 len = scanreg(s,tokenbuf) - s;
1144 stabent(tokenbuf,TRUE); /* make sure it's created */
1149 else if (*s == '\\' && s[1]) {
1153 if (!makesingle && (!leave || index(leave,*s)))
1157 case '0': case '1': case '2': case '3':
1158 case '4': case '5': case '6': case '7':
1160 if (index("01234567",*s)) {
1164 if (index("01234567",*s)) {
1193 if (arg[1].arg_type == A_DOUBLE && makesingle)
1194 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
1196 tmpstr->str_cur = d - tmpstr->str_ptr; /* XXX cheat */
1197 arg[1].arg_ptr.arg_str = tmpstr;
1210 register FCMD *fprev = &froot;
1211 register FCMD *fcmd;
1214 register char tmpchar;
1217 while ((s = str_gets(linestr,rsfp)) != Nullch) {
1219 if (strEQ(s,".\n")) {
1221 return froot.f_next;
1225 flinebeg = Nullfcmd;
1228 fcmd = (FCMD *)safemalloc(sizeof (FCMD));
1229 bzero((char*)fcmd, sizeof (FCMD));
1230 fprev->f_next = fcmd;
1232 for (t=s; *t && *t != '@' && *t != '^'; t++) {
1240 fcmd->f_pre = savestr(s);
1241 fcmd->f_presize = strlen(s);
1246 fcmd->f_flags |= FC_NOBLANK;
1250 flinebeg = fcmd; /* start values here */
1252 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
1255 fcmd->f_type = F_LINES;
1259 fcmd->f_type = F_LEFT;
1264 fcmd->f_type = F_RIGHT;
1269 fcmd->f_type = F_CENTER;
1274 fcmd->f_type = F_LEFT;
1277 if (fcmd->f_flags & FC_CHOP && *s == '.') {
1278 fcmd->f_flags |= FC_MORE;
1286 if ((bufptr = str_gets(linestr ,rsfp)) == Nullch)
1289 if (strEQ(bufptr,".\n")) {
1290 yyerror("Missing values line");
1291 return froot.f_next;
1295 lex_newlines = TRUE;
1296 while (flinebeg || *bufptr) {
1299 yyerror("Bad value in format");
1304 yyerror("Missing value in format");
1308 yylval.arg = stab2arg(A_LVAL,yylval.stabval);
1312 yyerror("Extra value in format");
1314 flinebeg->f_expr = yylval.arg;
1316 flinebeg = flinebeg->f_next;
1317 } while (flinebeg && flinebeg->f_size == 0);
1324 lex_newlines = FALSE;
1328 bufptr = str_get(linestr);
1329 yyerror("Format not terminated");
1330 return froot.f_next;