1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
33 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
34 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
35 printf("\n -D<number> sets debugging flags."
36 "\n -F<character> the awk script to translate is always invoked with"
38 "\n -n<fieldlist> specifies the names of the input fields if input does"
39 "\n not have to be split into an array."
40 "\n -<number> causes a2p to assume that input will always have that"
57 linestr = str_new(80);
58 str = str_new(0); /* first used for -I flags */
59 for (argc--,argv++; argc; argc--,argv++) {
60 if (argv[0][0] != '-' || !argv[0][1])
66 debug = atoi(argv[0]+2);
68 yydebug = (debug & 1);
72 case '0': case '1': case '2': case '3': case '4':
73 case '5': case '6': case '7': case '8': case '9':
74 maxfld = atoi(argv[0]+1);
81 namelist = savestr(argv[0]+2);
89 fatal("Unrecognized switch: %s\n",argv[0]);
99 if (argv[0] == Nullch) {
101 if ( isatty(fileno(stdin)) )
106 filename = savestr(argv[0]);
108 filename = savestr(argv[0]);
109 if (strEQ(filename,"-"))
114 rsfp = fopen(argv[0],"r");
116 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
120 bufptr = str_get(linestr);
124 /* now parse the report spec */
127 fatal("Translation aborted due to syntax errors.\n");
137 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
139 printf("\t\"%s\"\n",ops[i].cval),i++;
142 printf("\t%d",ops[i].ival),i++;
152 /* first pass to look for numeric variables */
154 prewalk(0,0,root,&i);
156 /* second pass to produce new program */
158 tmpstr = walk(0,0,root,&i,P_MIN);
159 str = str_make(STARTPERL);
160 str_cat(str, "\neval 'exec ");
162 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
163 if $running_under_some_shell;\n\
164 # this emulates #! processing on NIH machines.\n\
165 # (remove #! line above if indigestible)\n\n");
167 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
169 " # process any FOO=bar switches\n\n");
170 if (do_opens && opens) {
175 str_scat(str,tmpstr);
184 "Please check my work on the %d line%s I've marked with \"#???\".\n",
185 checkers, checkers == 1 ? "" : "s" );
187 "The operation I've selected may be wrong for the operand types.\n");
192 #define RETURN(retval) return (bufptr = s,retval)
193 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
194 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
195 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
202 register char *s = bufptr;
210 fprintf(stderr,"Tokener at %s",s);
212 fprintf(stderr,"Tokener at %s\n",s);
217 "Unrecognized character %c in file %s line %d--ignoring.\n",
222 if (*s && *s != '\n') {
223 yyerror("Ignoring spurious backslash");
228 s = str_get(linestr);
233 if ((s = str_gets(linestr, rsfp)) == Nullch) {
237 s = str_get(linestr);
248 yylval = string(s,0);
273 for (d = s + 1; isspace(*d); d++) ;
283 yylval = string("~",1);
301 yylval = string("**=",3);
303 yylval = string(s-1,2);
321 while (*s == ' ' || *s == '\t')
323 if (strnEQ(s,"getline",7))
331 yylval = string("==",2);
335 yylval = string("=",1);
341 yylval = string("!=",2);
345 yylval = string("!~",2);
354 yylval = string("<=",2);
363 yylval = string(">>",2);
367 yylval = string(">=",2);
375 while (isalpha(*s) || isdigit(*s) || *s == '_') \
395 for (d = s; isdigit(*s); s++) ;
396 yylval = string(d,s-d);
402 split_to_array = set_array_base = TRUE;
405 case '/': /* may either be division or pattern */
412 yylval = string("/=",2);
418 case '0': case '1': case '2': case '3': case '4':
419 case '5': case '6': case '7': case '8': case '9': case '.':
424 s = cpy2(tokenbuf,s,s[-1]);
426 fatal("String not terminated:\n%s",str_get(linestr));
428 yylval = string(tokenbuf,0);
434 set_array_base = TRUE;
435 if (strEQ(d,"ARGV")) {
436 yylval=numary(string("ARGV",0));
439 if (strEQ(d,"atan2")) {
446 if (strEQ(d,"break"))
448 if (strEQ(d,"BEGIN"))
453 if (strEQ(d,"continue"))
455 if (strEQ(d,"cos")) {
459 if (strEQ(d,"close")) {
464 if (strEQ(d,"chdir"))
466 else if (strEQ(d,"crypt"))
468 else if (strEQ(d,"chop"))
470 else if (strEQ(d,"chmod"))
472 else if (strEQ(d,"chown"))
479 if (strEQ(d,"delete"))
490 if (strEQ(d,"exit")) {
494 if (strEQ(d,"exp")) {
498 if (strEQ(d,"elsif"))
500 else if (strEQ(d,"eq"))
502 else if (strEQ(d,"eval"))
504 else if (strEQ(d,"eof"))
506 else if (strEQ(d,"each"))
508 else if (strEQ(d,"exec"))
515 if (saw_FS == 1 && in_begin) {
516 for (d = s; *d && isspace(*d); d++) ;
518 for (d++; *d && isspace(*d); d++) ;
519 if (*d == '"' && d[2] == '"')
527 else if (strEQ(d,"function"))
529 if (strEQ(d,"FILENAME"))
531 if (strEQ(d,"foreach"))
533 else if (strEQ(d,"format"))
535 else if (strEQ(d,"fork"))
537 else if (strEQ(d,"fh"))
542 if (strEQ(d,"getline"))
548 else if (strEQ(d,"gt"))
550 else if (strEQ(d,"goto"))
552 else if (strEQ(d,"gmtime"))
566 if (strEQ(d,"index")) {
567 set_array_base = TRUE;
570 if (strEQ(d,"int")) {
584 else if (strEQ(d,"kill"))
589 if (strEQ(d,"length")) {
593 if (strEQ(d,"log")) {
599 else if (strEQ(d,"local"))
601 else if (strEQ(d,"lt"))
603 else if (strEQ(d,"le"))
605 else if (strEQ(d,"locatime"))
607 else if (strEQ(d,"link"))
612 if (strEQ(d,"match")) {
613 set_array_base = TRUE;
622 do_chop = do_split = split_to_array = set_array_base = TRUE;
623 if (strEQ(d,"next")) {
632 if (strEQ(d,"ORS")) {
636 if (strEQ(d,"OFS")) {
640 if (strEQ(d,"OFMT")) {
645 else if (strEQ(d,"ord"))
647 else if (strEQ(d,"oct"))
652 if (strEQ(d,"print")) {
655 if (strEQ(d,"printf")) {
660 else if (strEQ(d,"pop"))
672 if (strEQ(d,"rand")) {
676 if (strEQ(d,"return"))
678 if (strEQ(d,"reset"))
680 else if (strEQ(d,"redo"))
682 else if (strEQ(d,"rename"))
687 if (strEQ(d,"split")) {
688 set_array_base = TRUE;
691 if (strEQ(d,"substr")) {
692 set_array_base = TRUE;
697 if (strEQ(d,"sprintf"))
699 if (strEQ(d,"sqrt")) {
703 if (strEQ(d,"SUBSEP")) {
706 if (strEQ(d,"sin")) {
710 if (strEQ(d,"srand")) {
714 if (strEQ(d,"system")) {
720 else if (strEQ(d,"shift"))
722 else if (strEQ(d,"select"))
724 else if (strEQ(d,"seek"))
726 else if (strEQ(d,"stat"))
728 else if (strEQ(d,"study"))
730 else if (strEQ(d,"sleep"))
732 else if (strEQ(d,"symlink"))
734 else if (strEQ(d,"sort"))
741 else if (strEQ(d,"tell"))
743 else if (strEQ(d,"time"))
745 else if (strEQ(d,"times"))
750 if (strEQ(d,"until"))
752 else if (strEQ(d,"unless"))
754 else if (strEQ(d,"umask"))
756 else if (strEQ(d,"unshift"))
758 else if (strEQ(d,"unlink"))
760 else if (strEQ(d,"utime"))
765 if (strEQ(d,"values"))
770 if (strEQ(d,"while"))
772 if (strEQ(d,"write"))
774 else if (strEQ(d,"wait"))
803 fatal("Search pattern not found:\n%s",str_get(linestr));
807 for (; *s; s++,d++) {
811 else if (s[1] == '\\')
813 else if (s[1] == '[')
816 else if (*s == '[') {
819 if (*s == '\\' && s[1])
821 if (*s == '/' || (*s == '-' && s[1] == ']'))
824 } while (*s && *s != ']');
833 fatal("Search pattern not terminated:\n%s",str_get(linestr));
835 yylval = string(tokenbuf,0);
843 fprintf(stderr,"%s in file %s at line %d\n",
854 case '1': case '2': case '3': case '4': case '5':
855 case '6': case '7': case '8': case '9': case '0' : case '.':
857 while (isdigit(*s)) {
863 while (isdigit(*s)) {
870 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
872 if (*s == '+' || *s == '-')
878 yylval = string(tokenbuf,0);
891 ops[mop++].ival = OSTRING + (1<<8);
894 ops[mop].cval = safemalloc(len+1);
895 strncpy(ops[mop].cval,ptr,len);
896 ops[mop++].cval[len] = '\0';
898 fatal("Recompile a2p with larger OPSMAX\n");
909 fatal("type > 255 (%d)\n",type);
910 ops[mop++].ival = type;
912 fatal("Recompile a2p with larger OPSMAX\n");
924 fatal("type > 255 (%d)\n",type);
925 ops[mop++].ival = type + (1<<8);
926 ops[mop++].ival = arg1;
928 fatal("Recompile a2p with larger OPSMAX\n");
933 oper2(type,arg1,arg2)
941 fatal("type > 255 (%d)\n",type);
942 ops[mop++].ival = type + (2<<8);
943 ops[mop++].ival = arg1;
944 ops[mop++].ival = arg2;
946 fatal("Recompile a2p with larger OPSMAX\n");
951 oper3(type,arg1,arg2,arg3)
960 fatal("type > 255 (%d)\n",type);
961 ops[mop++].ival = type + (3<<8);
962 ops[mop++].ival = arg1;
963 ops[mop++].ival = arg2;
964 ops[mop++].ival = arg3;
966 fatal("Recompile a2p with larger OPSMAX\n");
971 oper4(type,arg1,arg2,arg3,arg4)
981 fatal("type > 255 (%d)\n",type);
982 ops[mop++].ival = type + (4<<8);
983 ops[mop++].ival = arg1;
984 ops[mop++].ival = arg2;
985 ops[mop++].ival = arg3;
986 ops[mop++].ival = arg4;
988 fatal("Recompile a2p with larger OPSMAX\n");
993 oper5(type,arg1,arg2,arg3,arg4,arg5)
1004 fatal("type > 255 (%d)\n",type);
1005 ops[mop++].ival = type + (5<<8);
1006 ops[mop++].ival = arg1;
1007 ops[mop++].ival = arg2;
1008 ops[mop++].ival = arg3;
1009 ops[mop++].ival = arg4;
1010 ops[mop++].ival = arg5;
1012 fatal("Recompile a2p with larger OPSMAX\n");
1026 type = ops[branch].ival;
1029 for (i=depth; i; i--)
1031 if (type == OSTRING) {
1032 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1035 printf("(%-5d%s %d\n",branch,opname[type],len);
1037 for (i=1; i<=len; i++)
1038 dump(ops[branch+i].ival);
1040 for (i=depth; i; i--)
1053 else if ((ops[arg].ival & 255) != OBLOCK)
1054 return oper2(OBLOCK,arg,maybe);
1055 else if ((ops[arg].ival >> 8) < 2)
1056 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1068 for (s = str->str_ptr; *s; s++) {
1069 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1073 else if (*s == '\n') {
1074 for (t = s+1; isspace(*t & 127); t++) ;
1076 while (isspace(*t & 127) && *t != '\n') t--;
1077 if (*t == '\n' && t-s > 1) {
1091 register char *d, *s, *t, *e;
1092 register int pos, newpos;
1096 for (s = str->str_ptr; *s; s++) {
1105 else if (*s == '\t')
1107 if (pos > 78) { /* split a long line? */
1110 for (t = tokenbuf; isspace(*t & 127); t++) {
1117 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1121 while (d > tokenbuf &&
1122 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1127 while (d > tokenbuf &&
1128 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1133 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1138 while (d > tokenbuf && *d != ' ')
1148 if (d[-1] != ';' && !(newpos % 4)) {
1154 newpos += strlen(t);
1169 for (t = tokenbuf; *t; t++) {
1173 strcpy(t+strlen(t)-1, "\t#???\n");
1179 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1181 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1184 fputs(tokenbuf,stdout);
1194 key = walk(0,0,arg,&dummy,P_MIN);
1196 hstore(symtab,key->str_ptr,str_make("1"));
1198 set_array_base = TRUE;
1211 type = ops[arg].ival & 255;
1212 if (type == OCOMMA) {
1213 rememberargs(ops[arg+1].ival);
1214 rememberargs(ops[arg+3].ival);
1216 else if (type == OVAR) {
1218 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1221 fatal("panic: unknown argument type %d, line %d\n",type,line);
1229 int type = ops[arg].ival & 255;
1232 if (type != OSTRING)
1233 fatal("panic: aryrefarg %d, line %d\n",type,line);
1234 str = hfetch(curarghash,ops[arg+1].cval);
1241 fixfargs(name,arg,prevargs)
1252 type = ops[arg].ival & 255;
1253 if (type == OCOMMA) {
1254 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1255 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1257 else if (type == OVAR) {
1258 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1259 if (strEQ(str_get(str),"*")) {
1262 str_set(str,""); /* in case another routine has this */
1263 ops[arg].ival &= ~255;
1264 ops[arg].ival |= OSTAR;
1265 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1266 fprintf(stderr,"Adding %s\n",tmpbuf);
1269 hstore(curarghash,tmpbuf,str);
1271 numargs = prevargs + 1;
1274 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1275 type,prevargs+1,line);
1280 fixrargs(name,arg,prevargs)
1291 type = ops[arg].ival & 255;
1292 if (type == OCOMMA) {
1293 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1294 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1299 sprintf(tmpbuf,"%s:%d",name,prevargs);
1300 str = hfetch(curarghash,tmpbuf);
1301 if (str && strEQ(str->str_ptr,"*")) {
1302 if (type == OVAR || type == OSTAR) {
1303 ops[arg].ival &= ~255;
1304 ops[arg].ival |= OSTAR;
1307 fatal("Can't pass expression by reference as arg %d of %s\n",
1310 numargs = prevargs + 1;