1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32)
15 #include "../patchlevel.h"
25 int oper1(int type, int arg1);
26 int oper2(int type, int arg1, int arg2);
27 int oper3(int type, int arg1, int arg2, int arg3);
28 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
29 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
30 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
32 #if defined(OS2) || defined(WIN32)
33 static void usage(void);
38 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
39 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
40 printf("\n -D<number> sets debugging flags."
41 "\n -F<character> the awk script to translate is always invoked with"
43 "\n -n<fieldlist> specifies the names of the input fields if input does"
44 "\n not have to be split into an array."
45 "\n -<number> causes a2p to assume that input will always have that"
52 main(register int argc, register char **argv, register char **env)
59 linestr = str_new(80);
60 str = str_new(0); /* first used for -I flags */
61 for (argc--,argv++; argc; argc--,argv++) {
62 if (argv[0][0] != '-' || !argv[0][1])
68 debug = atoi(argv[0]+2);
70 yydebug = (debug & 1);
74 case '0': case '1': case '2': case '3': case '4':
75 case '5': case '6': case '7': case '8': case '9':
76 maxfld = atoi(argv[0]+1);
83 namelist = savestr(argv[0]+2);
94 #if defined(OS2) || defined(WIN32)
95 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
98 fatal("Unrecognized switch: %s\n",argv[0]);
106 if (argv[0] == Nullch) {
107 #if defined(OS2) || defined(WIN32)
108 if ( isatty(fileno(stdin)) )
113 filename = savestr(argv[0]);
115 filename = savestr(argv[0]);
116 if (strEQ(filename,"-"))
121 rsfp = fopen(argv[0],"r");
123 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
127 bufptr = str_get(linestr);
131 /* now parse the report spec */
134 fatal("Translation aborted due to syntax errors.\n");
144 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
146 printf("\t\"%s\"\n",ops[i].cval),i++;
149 printf("\t%d",ops[i].ival),i++;
159 /* first pass to look for numeric variables */
161 prewalk(0,0,root,&i);
163 /* second pass to produce new program */
165 tmpstr = walk(0,0,root,&i,P_MIN);
166 str = str_make(STARTPERL);
167 str_cat(str, "\neval 'exec ");
169 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
170 if $running_under_some_shell;\n\
171 # this emulates #! processing on NIH machines.\n\
172 # (remove #! line above if indigestible)\n\n");
174 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
176 " # process any FOO=bar switches\n\n");
177 if (do_opens && opens) {
182 str_scat(str,tmpstr);
191 "Please check my work on the %d line%s I've marked with \"#???\".\n",
192 checkers, checkers == 1 ? "" : "s" );
194 "The operation I've selected may be wrong for the operand types.\n");
199 #define RETURN(retval) return (bufptr = s,retval)
200 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
201 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
202 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
209 register char *s = bufptr;
217 fprintf(stderr,"Tokener at %s",s);
219 fprintf(stderr,"Tokener at %s\n",s);
224 "Unrecognized character %c in file %s line %d--ignoring.\n",
229 if (*s && *s != '\n') {
230 yyerror("Ignoring spurious backslash");
235 s = str_get(linestr);
240 if ((s = str_gets(linestr, rsfp)) == Nullch) {
244 s = str_get(linestr);
255 yylval = string(s,0);
284 for (d = s + 1; isspace(*d); d++) ;
294 yylval = string("~",1);
312 yylval = string("**=",3);
314 yylval = string(s-1,2);
332 while (*s == ' ' || *s == '\t')
334 if (strnEQ(s,"getline",7))
342 yylval = string("==",2);
346 yylval = string("=",1);
352 yylval = string("!=",2);
356 yylval = string("!~",2);
365 yylval = string("<=",2);
374 yylval = string(">>",2);
378 yylval = string(">=",2);
386 while (isalpha(*s) || isdigit(*s) || *s == '_') \
406 for (d = s; isdigit(*s); s++) ;
407 yylval = string(d,s-d);
413 split_to_array = set_array_base = TRUE;
416 case '/': /* may either be division or pattern */
423 yylval = string("/=",2);
429 case '0': case '1': case '2': case '3': case '4':
430 case '5': case '6': case '7': case '8': case '9': case '.':
435 s = cpy2(tokenbuf,s,s[-1]);
437 fatal("String not terminated:\n%s",str_get(linestr));
439 yylval = string(tokenbuf,0);
445 set_array_base = TRUE;
446 if (strEQ(d,"ARGV")) {
447 yylval=numary(string("ARGV",0));
450 if (strEQ(d,"atan2")) {
457 if (strEQ(d,"break"))
459 if (strEQ(d,"BEGIN"))
464 if (strEQ(d,"continue"))
466 if (strEQ(d,"cos")) {
470 if (strEQ(d,"close")) {
475 if (strEQ(d,"chdir"))
477 else if (strEQ(d,"crypt"))
479 else if (strEQ(d,"chop"))
481 else if (strEQ(d,"chmod"))
483 else if (strEQ(d,"chown"))
490 if (strEQ(d,"delete"))
501 if (strEQ(d,"exit")) {
505 if (strEQ(d,"exp")) {
509 if (strEQ(d,"elsif"))
511 else if (strEQ(d,"eq"))
513 else if (strEQ(d,"eval"))
515 else if (strEQ(d,"eof"))
517 else if (strEQ(d,"each"))
519 else if (strEQ(d,"exec"))
526 if (saw_FS == 1 && in_begin) {
527 for (d = s; *d && isspace(*d); d++) ;
529 for (d++; *d && isspace(*d); d++) ;
530 if (*d == '"' && d[2] == '"')
538 else if (strEQ(d,"function"))
540 if (strEQ(d,"FILENAME"))
542 if (strEQ(d,"foreach"))
544 else if (strEQ(d,"format"))
546 else if (strEQ(d,"fork"))
548 else if (strEQ(d,"fh"))
553 if (strEQ(d,"getline"))
559 else if (strEQ(d,"gt"))
561 else if (strEQ(d,"goto"))
563 else if (strEQ(d,"gmtime"))
577 if (strEQ(d,"index")) {
578 set_array_base = TRUE;
581 if (strEQ(d,"int")) {
595 else if (strEQ(d,"kill"))
600 if (strEQ(d,"length")) {
604 if (strEQ(d,"log")) {
610 else if (strEQ(d,"local"))
612 else if (strEQ(d,"lt"))
614 else if (strEQ(d,"le"))
616 else if (strEQ(d,"locatime"))
618 else if (strEQ(d,"link"))
623 if (strEQ(d,"match")) {
624 set_array_base = TRUE;
633 do_chop = do_split = split_to_array = set_array_base = TRUE;
634 if (strEQ(d,"next")) {
643 if (strEQ(d,"ORS")) {
647 if (strEQ(d,"OFS")) {
651 if (strEQ(d,"OFMT")) {
656 else if (strEQ(d,"ord"))
658 else if (strEQ(d,"oct"))
663 if (strEQ(d,"print")) {
666 if (strEQ(d,"printf")) {
671 else if (strEQ(d,"pop"))
683 if (strEQ(d,"rand")) {
687 if (strEQ(d,"return"))
689 if (strEQ(d,"reset"))
691 else if (strEQ(d,"redo"))
693 else if (strEQ(d,"rename"))
698 if (strEQ(d,"split")) {
699 set_array_base = TRUE;
702 if (strEQ(d,"substr")) {
703 set_array_base = TRUE;
708 if (strEQ(d,"sprintf")) {
709 /* In old awk, { print sprintf("str%sg"),"in" } prints
710 * "string"; in new awk, "in" is not considered an argument to
711 * sprintf, so the statement breaks. To support both, the
712 * grammar treats arguments to SPRINTF_OLD like old awk,
713 * SPRINTF_NEW like new. Here we return the appropriate one.
715 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
717 if (strEQ(d,"sqrt")) {
721 if (strEQ(d,"SUBSEP")) {
724 if (strEQ(d,"sin")) {
728 if (strEQ(d,"srand")) {
732 if (strEQ(d,"system")) {
738 else if (strEQ(d,"shift"))
740 else if (strEQ(d,"select"))
742 else if (strEQ(d,"seek"))
744 else if (strEQ(d,"stat"))
746 else if (strEQ(d,"study"))
748 else if (strEQ(d,"sleep"))
750 else if (strEQ(d,"symlink"))
752 else if (strEQ(d,"sort"))
759 else if (strEQ(d,"tell"))
761 else if (strEQ(d,"time"))
763 else if (strEQ(d,"times"))
768 if (strEQ(d,"until"))
770 else if (strEQ(d,"unless"))
772 else if (strEQ(d,"umask"))
774 else if (strEQ(d,"unshift"))
776 else if (strEQ(d,"unlink"))
778 else if (strEQ(d,"utime"))
783 if (strEQ(d,"values"))
788 if (strEQ(d,"while"))
790 if (strEQ(d,"write"))
792 else if (strEQ(d,"wait"))
812 scanpat(register char *s)
820 fatal("Search pattern not found:\n%s",str_get(linestr));
824 for (; *s; s++,d++) {
828 else if (s[1] == '\\')
830 else if (s[1] == '[')
833 else if (*s == '[') {
836 if (*s == '\\' && s[1])
838 if (*s == '/' || (*s == '-' && s[1] == ']'))
841 } while (*s && *s != ']');
850 fatal("Search pattern not terminated:\n%s",str_get(linestr));
852 yylval = string(tokenbuf,0);
859 fprintf(stderr,"%s in file %s at line %d\n",
864 scannum(register char *s)
869 case '1': case '2': case '3': case '4': case '5':
870 case '6': case '7': case '8': case '9': case '0' : case '.':
872 while (isdigit(*s)) {
878 while (isdigit(*s)) {
885 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
887 if (*s == '+' || *s == '-')
893 yylval = string(tokenbuf,0);
900 string(char *ptr, int len)
904 ops[mop++].ival = OSTRING + (1<<8);
907 ops[mop].cval = (char *) safemalloc(len+1);
908 strncpy(ops[mop].cval,ptr,len);
909 ops[mop++].cval[len] = '\0';
911 fatal("Recompile a2p with larger OPSMAX\n");
921 fatal("type > 255 (%d)\n",type);
922 ops[mop++].ival = type;
924 fatal("Recompile a2p with larger OPSMAX\n");
929 oper1(int type, int arg1)
934 fatal("type > 255 (%d)\n",type);
935 ops[mop++].ival = type + (1<<8);
936 ops[mop++].ival = arg1;
938 fatal("Recompile a2p with larger OPSMAX\n");
943 oper2(int type, int arg1, int arg2)
948 fatal("type > 255 (%d)\n",type);
949 ops[mop++].ival = type + (2<<8);
950 ops[mop++].ival = arg1;
951 ops[mop++].ival = arg2;
953 fatal("Recompile a2p with larger OPSMAX\n");
958 oper3(int type, int arg1, int arg2, int arg3)
963 fatal("type > 255 (%d)\n",type);
964 ops[mop++].ival = type + (3<<8);
965 ops[mop++].ival = arg1;
966 ops[mop++].ival = arg2;
967 ops[mop++].ival = arg3;
969 fatal("Recompile a2p with larger OPSMAX\n");
974 oper4(int type, int arg1, int arg2, int arg3, int arg4)
979 fatal("type > 255 (%d)\n",type);
980 ops[mop++].ival = type + (4<<8);
981 ops[mop++].ival = arg1;
982 ops[mop++].ival = arg2;
983 ops[mop++].ival = arg3;
984 ops[mop++].ival = arg4;
986 fatal("Recompile a2p with larger OPSMAX\n");
991 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
996 fatal("type > 255 (%d)\n",type);
997 ops[mop++].ival = type + (5<<8);
998 ops[mop++].ival = arg1;
999 ops[mop++].ival = arg2;
1000 ops[mop++].ival = arg3;
1001 ops[mop++].ival = arg4;
1002 ops[mop++].ival = arg5;
1004 fatal("Recompile a2p with larger OPSMAX\n");
1017 type = ops[branch].ival;
1020 for (i=depth; i; i--)
1022 if (type == OSTRING) {
1023 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1026 printf("(%-5d%s %d\n",branch,opname[type],len);
1028 for (i=1; i<=len; i++)
1029 dump(ops[branch+i].ival);
1031 for (i=depth; i; i--)
1038 bl(int arg, int maybe)
1042 else if ((ops[arg].ival & 255) != OBLOCK)
1043 return oper2(OBLOCK,arg,maybe);
1044 else if ((ops[arg].ival >> 8) < 2)
1045 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1056 for (s = str->str_ptr; *s; s++) {
1057 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1061 else if (*s == '\n') {
1062 for (t = s+1; isspace(*t & 127); t++) ;
1064 while (isspace(*t & 127) && *t != '\n') t--;
1065 if (*t == '\n' && t-s > 1) {
1078 register char *d, *s, *t, *e;
1079 register int pos, newpos;
1083 for (s = str->str_ptr; *s; s++) {
1092 else if (*s == '\t')
1094 if (pos > 78) { /* split a long line? */
1097 for (t = tokenbuf; isspace(*t & 127); t++) {
1104 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1108 while (d > tokenbuf &&
1109 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1114 while (d > tokenbuf &&
1115 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1120 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1125 while (d > tokenbuf && *d != ' ')
1135 if (d[-1] != ';' && !(newpos % 4)) {
1141 newpos += strlen(t);
1156 for (t = tokenbuf; *t; t++) {
1160 strcpy(t+strlen(t)-1, "\t#???\n");
1166 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1168 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1171 fputs(tokenbuf,stdout);
1180 key = walk(0,0,arg,&dummy,P_MIN);
1182 hstore(symtab,key->str_ptr,str_make("1"));
1184 set_array_base = TRUE;
1189 rememberargs(int arg)
1196 type = ops[arg].ival & 255;
1197 if (type == OCOMMA) {
1198 rememberargs(ops[arg+1].ival);
1199 rememberargs(ops[arg+3].ival);
1201 else if (type == OVAR) {
1203 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1206 fatal("panic: unknown argument type %d, line %d\n",type,line);
1213 int type = ops[arg].ival & 255;
1216 if (type != OSTRING)
1217 fatal("panic: aryrefarg %d, line %d\n",type,line);
1218 str = hfetch(curarghash,ops[arg+1].cval);
1225 fixfargs(int name, int arg, int prevargs)
1233 type = ops[arg].ival & 255;
1234 if (type == OCOMMA) {
1235 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1236 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1238 else if (type == OVAR) {
1239 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1240 if (strEQ(str_get(str),"*")) {
1243 str_set(str,""); /* in case another routine has this */
1244 ops[arg].ival &= ~255;
1245 ops[arg].ival |= OSTAR;
1246 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1247 fprintf(stderr,"Adding %s\n",tmpbuf);
1250 hstore(curarghash,tmpbuf,str);
1252 numargs = prevargs + 1;
1255 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1256 type,prevargs+1,line);
1261 fixrargs(char *name, int arg, int prevargs)
1269 type = ops[arg].ival & 255;
1270 if (type == OCOMMA) {
1271 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1272 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1275 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1276 sprintf(tmpbuf,"%s:%d",name,prevargs);
1277 str = hfetch(curarghash,tmpbuf);
1279 if (str && strEQ(str->str_ptr,"*")) {
1280 if (type == OVAR || type == OSTAR) {
1281 ops[arg].ival &= ~255;
1282 ops[arg].ival |= OSTAR;
1285 fatal("Can't pass expression by reference as arg %d of %s\n",
1288 numargs = prevargs + 1;