1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
22 int oper1(int type, int arg1);
23 int oper2(int type, int arg1, int arg2);
24 int oper3(int type, int arg1, int arg2, int arg3);
25 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
26 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
27 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
33 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
34 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
35 printf("\n -D<number> sets debugging flags."
36 "\n -F<character> the awk script to translate is always invoked with"
38 "\n -n<fieldlist> specifies the names of the input fields if input does"
39 "\n not have to be split into an array."
40 "\n -<number> causes a2p to assume that input will always have that"
47 main(register int argc, register char **argv, register char **env)
54 linestr = str_new(80);
55 str = str_new(0); /* first used for -I flags */
56 for (argc--,argv++; argc; argc--,argv++) {
57 if (argv[0][0] != '-' || !argv[0][1])
63 debug = atoi(argv[0]+2);
65 yydebug = (debug & 1);
69 case '0': case '1': case '2': case '3': case '4':
70 case '5': case '6': case '7': case '8': case '9':
71 maxfld = atoi(argv[0]+1);
78 namelist = savestr(argv[0]+2);
89 fatal("Unrecognized switch: %s\n",argv[0]);
99 if (argv[0] == Nullch) {
101 if ( isatty(fileno(stdin)) )
106 filename = savestr(argv[0]);
108 filename = savestr(argv[0]);
109 if (strEQ(filename,"-"))
114 rsfp = fopen(argv[0],"r");
116 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
120 bufptr = str_get(linestr);
124 /* now parse the report spec */
127 fatal("Translation aborted due to syntax errors.\n");
137 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
139 printf("\t\"%s\"\n",ops[i].cval),i++;
142 printf("\t%d",ops[i].ival),i++;
152 /* first pass to look for numeric variables */
154 prewalk(0,0,root,&i);
156 /* second pass to produce new program */
158 tmpstr = walk(0,0,root,&i,P_MIN);
159 str = str_make(STARTPERL);
160 str_cat(str, "\neval 'exec ");
162 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
163 if $running_under_some_shell;\n\
164 # this emulates #! processing on NIH machines.\n\
165 # (remove #! line above if indigestible)\n\n");
167 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
169 " # process any FOO=bar switches\n\n");
170 if (do_opens && opens) {
175 str_scat(str,tmpstr);
184 "Please check my work on the %d line%s I've marked with \"#???\".\n",
185 checkers, checkers == 1 ? "" : "s" );
187 "The operation I've selected may be wrong for the operand types.\n");
192 #define RETURN(retval) return (bufptr = s,retval)
193 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
194 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
195 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
202 register char *s = bufptr;
210 fprintf(stderr,"Tokener at %s",s);
212 fprintf(stderr,"Tokener at %s\n",s);
217 "Unrecognized character %c in file %s line %d--ignoring.\n",
222 if (*s && *s != '\n') {
223 yyerror("Ignoring spurious backslash");
228 s = str_get(linestr);
233 if ((s = str_gets(linestr, rsfp)) == Nullch) {
237 s = str_get(linestr);
248 yylval = string(s,0);
273 for (d = s + 1; isspace(*d); d++) ;
283 yylval = string("~",1);
301 yylval = string("**=",3);
303 yylval = string(s-1,2);
321 while (*s == ' ' || *s == '\t')
323 if (strnEQ(s,"getline",7))
331 yylval = string("==",2);
335 yylval = string("=",1);
341 yylval = string("!=",2);
345 yylval = string("!~",2);
354 yylval = string("<=",2);
363 yylval = string(">>",2);
367 yylval = string(">=",2);
375 while (isalpha(*s) || isdigit(*s) || *s == '_') \
395 for (d = s; isdigit(*s); s++) ;
396 yylval = string(d,s-d);
402 split_to_array = set_array_base = TRUE;
405 case '/': /* may either be division or pattern */
412 yylval = string("/=",2);
418 case '0': case '1': case '2': case '3': case '4':
419 case '5': case '6': case '7': case '8': case '9': case '.':
424 s = cpy2(tokenbuf,s,s[-1]);
426 fatal("String not terminated:\n%s",str_get(linestr));
428 yylval = string(tokenbuf,0);
434 set_array_base = TRUE;
435 if (strEQ(d,"ARGV")) {
436 yylval=numary(string("ARGV",0));
439 if (strEQ(d,"atan2")) {
446 if (strEQ(d,"break"))
448 if (strEQ(d,"BEGIN"))
453 if (strEQ(d,"continue"))
455 if (strEQ(d,"cos")) {
459 if (strEQ(d,"close")) {
464 if (strEQ(d,"chdir"))
466 else if (strEQ(d,"crypt"))
468 else if (strEQ(d,"chop"))
470 else if (strEQ(d,"chmod"))
472 else if (strEQ(d,"chown"))
479 if (strEQ(d,"delete"))
490 if (strEQ(d,"exit")) {
494 if (strEQ(d,"exp")) {
498 if (strEQ(d,"elsif"))
500 else if (strEQ(d,"eq"))
502 else if (strEQ(d,"eval"))
504 else if (strEQ(d,"eof"))
506 else if (strEQ(d,"each"))
508 else if (strEQ(d,"exec"))
515 if (saw_FS == 1 && in_begin) {
516 for (d = s; *d && isspace(*d); d++) ;
518 for (d++; *d && isspace(*d); d++) ;
519 if (*d == '"' && d[2] == '"')
527 else if (strEQ(d,"function"))
529 if (strEQ(d,"FILENAME"))
531 if (strEQ(d,"foreach"))
533 else if (strEQ(d,"format"))
535 else if (strEQ(d,"fork"))
537 else if (strEQ(d,"fh"))
542 if (strEQ(d,"getline"))
548 else if (strEQ(d,"gt"))
550 else if (strEQ(d,"goto"))
552 else if (strEQ(d,"gmtime"))
566 if (strEQ(d,"index")) {
567 set_array_base = TRUE;
570 if (strEQ(d,"int")) {
584 else if (strEQ(d,"kill"))
589 if (strEQ(d,"length")) {
593 if (strEQ(d,"log")) {
599 else if (strEQ(d,"local"))
601 else if (strEQ(d,"lt"))
603 else if (strEQ(d,"le"))
605 else if (strEQ(d,"locatime"))
607 else if (strEQ(d,"link"))
612 if (strEQ(d,"match")) {
613 set_array_base = TRUE;
622 do_chop = do_split = split_to_array = set_array_base = TRUE;
623 if (strEQ(d,"next")) {
632 if (strEQ(d,"ORS")) {
636 if (strEQ(d,"OFS")) {
640 if (strEQ(d,"OFMT")) {
645 else if (strEQ(d,"ord"))
647 else if (strEQ(d,"oct"))
652 if (strEQ(d,"print")) {
655 if (strEQ(d,"printf")) {
660 else if (strEQ(d,"pop"))
672 if (strEQ(d,"rand")) {
676 if (strEQ(d,"return"))
678 if (strEQ(d,"reset"))
680 else if (strEQ(d,"redo"))
682 else if (strEQ(d,"rename"))
687 if (strEQ(d,"split")) {
688 set_array_base = TRUE;
691 if (strEQ(d,"substr")) {
692 set_array_base = TRUE;
697 if (strEQ(d,"sprintf"))
699 if (strEQ(d,"sqrt")) {
703 if (strEQ(d,"SUBSEP")) {
706 if (strEQ(d,"sin")) {
710 if (strEQ(d,"srand")) {
714 if (strEQ(d,"system")) {
720 else if (strEQ(d,"shift"))
722 else if (strEQ(d,"select"))
724 else if (strEQ(d,"seek"))
726 else if (strEQ(d,"stat"))
728 else if (strEQ(d,"study"))
730 else if (strEQ(d,"sleep"))
732 else if (strEQ(d,"symlink"))
734 else if (strEQ(d,"sort"))
741 else if (strEQ(d,"tell"))
743 else if (strEQ(d,"time"))
745 else if (strEQ(d,"times"))
750 if (strEQ(d,"until"))
752 else if (strEQ(d,"unless"))
754 else if (strEQ(d,"umask"))
756 else if (strEQ(d,"unshift"))
758 else if (strEQ(d,"unlink"))
760 else if (strEQ(d,"utime"))
765 if (strEQ(d,"values"))
770 if (strEQ(d,"while"))
772 if (strEQ(d,"write"))
774 else if (strEQ(d,"wait"))
794 scanpat(register char *s)
802 fatal("Search pattern not found:\n%s",str_get(linestr));
806 for (; *s; s++,d++) {
810 else if (s[1] == '\\')
812 else if (s[1] == '[')
815 else if (*s == '[') {
818 if (*s == '\\' && s[1])
820 if (*s == '/' || (*s == '-' && s[1] == ']'))
823 } while (*s && *s != ']');
832 fatal("Search pattern not terminated:\n%s",str_get(linestr));
834 yylval = string(tokenbuf,0);
841 fprintf(stderr,"%s in file %s at line %d\n",
846 scannum(register char *s)
851 case '1': case '2': case '3': case '4': case '5':
852 case '6': case '7': case '8': case '9': case '0' : case '.':
854 while (isdigit(*s)) {
860 while (isdigit(*s)) {
867 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
869 if (*s == '+' || *s == '-')
875 yylval = string(tokenbuf,0);
882 string(char *ptr, int len)
886 ops[mop++].ival = OSTRING + (1<<8);
889 ops[mop].cval = (char *) safemalloc(len+1);
890 strncpy(ops[mop].cval,ptr,len);
891 ops[mop++].cval[len] = '\0';
893 fatal("Recompile a2p with larger OPSMAX\n");
903 fatal("type > 255 (%d)\n",type);
904 ops[mop++].ival = type;
906 fatal("Recompile a2p with larger OPSMAX\n");
911 oper1(int type, int arg1)
916 fatal("type > 255 (%d)\n",type);
917 ops[mop++].ival = type + (1<<8);
918 ops[mop++].ival = arg1;
920 fatal("Recompile a2p with larger OPSMAX\n");
925 oper2(int type, int arg1, int arg2)
930 fatal("type > 255 (%d)\n",type);
931 ops[mop++].ival = type + (2<<8);
932 ops[mop++].ival = arg1;
933 ops[mop++].ival = arg2;
935 fatal("Recompile a2p with larger OPSMAX\n");
940 oper3(int type, int arg1, int arg2, int arg3)
945 fatal("type > 255 (%d)\n",type);
946 ops[mop++].ival = type + (3<<8);
947 ops[mop++].ival = arg1;
948 ops[mop++].ival = arg2;
949 ops[mop++].ival = arg3;
951 fatal("Recompile a2p with larger OPSMAX\n");
956 oper4(int type, int arg1, int arg2, int arg3, int arg4)
961 fatal("type > 255 (%d)\n",type);
962 ops[mop++].ival = type + (4<<8);
963 ops[mop++].ival = arg1;
964 ops[mop++].ival = arg2;
965 ops[mop++].ival = arg3;
966 ops[mop++].ival = arg4;
968 fatal("Recompile a2p with larger OPSMAX\n");
973 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
978 fatal("type > 255 (%d)\n",type);
979 ops[mop++].ival = type + (5<<8);
980 ops[mop++].ival = arg1;
981 ops[mop++].ival = arg2;
982 ops[mop++].ival = arg3;
983 ops[mop++].ival = arg4;
984 ops[mop++].ival = arg5;
986 fatal("Recompile a2p with larger OPSMAX\n");
999 type = ops[branch].ival;
1002 for (i=depth; i; i--)
1004 if (type == OSTRING) {
1005 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1008 printf("(%-5d%s %d\n",branch,opname[type],len);
1010 for (i=1; i<=len; i++)
1011 dump(ops[branch+i].ival);
1013 for (i=depth; i; i--)
1020 bl(int arg, int maybe)
1024 else if ((ops[arg].ival & 255) != OBLOCK)
1025 return oper2(OBLOCK,arg,maybe);
1026 else if ((ops[arg].ival >> 8) < 2)
1027 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1038 for (s = str->str_ptr; *s; s++) {
1039 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1043 else if (*s == '\n') {
1044 for (t = s+1; isspace(*t & 127); t++) ;
1046 while (isspace(*t & 127) && *t != '\n') t--;
1047 if (*t == '\n' && t-s > 1) {
1060 register char *d, *s, *t, *e;
1061 register int pos, newpos;
1065 for (s = str->str_ptr; *s; s++) {
1074 else if (*s == '\t')
1076 if (pos > 78) { /* split a long line? */
1079 for (t = tokenbuf; isspace(*t & 127); t++) {
1086 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1090 while (d > tokenbuf &&
1091 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1096 while (d > tokenbuf &&
1097 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1102 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1107 while (d > tokenbuf && *d != ' ')
1117 if (d[-1] != ';' && !(newpos % 4)) {
1123 newpos += strlen(t);
1138 for (t = tokenbuf; *t; t++) {
1142 strcpy(t+strlen(t)-1, "\t#???\n");
1148 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1150 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1153 fputs(tokenbuf,stdout);
1162 key = walk(0,0,arg,&dummy,P_MIN);
1164 hstore(symtab,key->str_ptr,str_make("1"));
1166 set_array_base = TRUE;
1171 rememberargs(int arg)
1178 type = ops[arg].ival & 255;
1179 if (type == OCOMMA) {
1180 rememberargs(ops[arg+1].ival);
1181 rememberargs(ops[arg+3].ival);
1183 else if (type == OVAR) {
1185 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1188 fatal("panic: unknown argument type %d, line %d\n",type,line);
1195 int type = ops[arg].ival & 255;
1198 if (type != OSTRING)
1199 fatal("panic: aryrefarg %d, line %d\n",type,line);
1200 str = hfetch(curarghash,ops[arg+1].cval);
1207 fixfargs(int name, int arg, int prevargs)
1215 type = ops[arg].ival & 255;
1216 if (type == OCOMMA) {
1217 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1218 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1220 else if (type == OVAR) {
1221 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1222 if (strEQ(str_get(str),"*")) {
1225 str_set(str,""); /* in case another routine has this */
1226 ops[arg].ival &= ~255;
1227 ops[arg].ival |= OSTAR;
1228 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1229 fprintf(stderr,"Adding %s\n",tmpbuf);
1232 hstore(curarghash,tmpbuf,str);
1234 numargs = prevargs + 1;
1237 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1238 type,prevargs+1,line);
1243 fixrargs(char *name, int arg, int prevargs)
1251 type = ops[arg].ival & 255;
1252 if (type == OCOMMA) {
1253 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1254 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1257 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1258 sprintf(tmpbuf,"%s:%d",name,prevargs);
1259 str = hfetch(curarghash,tmpbuf);
1261 if (str && strEQ(str->str_ptr,"*")) {
1262 if (type == OVAR || type == OSTAR) {
1263 ops[arg].ival &= ~255;
1264 ops[arg].ival |= OSTAR;
1267 fatal("Can't pass expression by reference as arg %d of %s\n",
1270 numargs = prevargs + 1;