1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
33 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
34 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
35 printf("\n -D<number> sets debugging flags."
36 "\n -F<character> the awk script to translate is always invoked with"
38 "\n -n<fieldlist> specifies the names of the input fields if input does"
39 "\n not have to be split into an array."
40 "\n -<number> causes a2p to assume that input will always have that"
57 linestr = str_new(80);
58 str = str_new(0); /* first used for -I flags */
59 for (argc--,argv++; argc; argc--,argv++) {
60 if (argv[0][0] != '-' || !argv[0][1])
66 debug = atoi(argv[0]+2);
68 yydebug = (debug & 1);
72 case '0': case '1': case '2': case '3': case '4':
73 case '5': case '6': case '7': case '8': case '9':
74 maxfld = atoi(argv[0]+1);
81 namelist = savestr(argv[0]+2);
92 fatal("Unrecognized switch: %s\n",argv[0]);
102 if (argv[0] == Nullch) {
104 if ( isatty(fileno(stdin)) )
109 filename = savestr(argv[0]);
111 filename = savestr(argv[0]);
112 if (strEQ(filename,"-"))
117 rsfp = fopen(argv[0],"r");
119 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
123 bufptr = str_get(linestr);
127 /* now parse the report spec */
130 fatal("Translation aborted due to syntax errors.\n");
140 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
142 printf("\t\"%s\"\n",ops[i].cval),i++;
145 printf("\t%d",ops[i].ival),i++;
155 /* first pass to look for numeric variables */
157 prewalk(0,0,root,&i);
159 /* second pass to produce new program */
161 tmpstr = walk(0,0,root,&i,P_MIN);
162 str = str_make(STARTPERL);
163 str_cat(str, "\neval 'exec ");
165 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
166 if $running_under_some_shell;\n\
167 # this emulates #! processing on NIH machines.\n\
168 # (remove #! line above if indigestible)\n\n");
170 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
172 " # process any FOO=bar switches\n\n");
173 if (do_opens && opens) {
178 str_scat(str,tmpstr);
187 "Please check my work on the %d line%s I've marked with \"#???\".\n",
188 checkers, checkers == 1 ? "" : "s" );
190 "The operation I've selected may be wrong for the operand types.\n");
195 #define RETURN(retval) return (bufptr = s,retval)
196 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
197 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
198 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
205 register char *s = bufptr;
213 fprintf(stderr,"Tokener at %s",s);
215 fprintf(stderr,"Tokener at %s\n",s);
220 "Unrecognized character %c in file %s line %d--ignoring.\n",
225 if (*s && *s != '\n') {
226 yyerror("Ignoring spurious backslash");
231 s = str_get(linestr);
236 if ((s = str_gets(linestr, rsfp)) == Nullch) {
240 s = str_get(linestr);
251 yylval = string(s,0);
276 for (d = s + 1; isspace(*d); d++) ;
286 yylval = string("~",1);
304 yylval = string("**=",3);
306 yylval = string(s-1,2);
324 while (*s == ' ' || *s == '\t')
326 if (strnEQ(s,"getline",7))
334 yylval = string("==",2);
338 yylval = string("=",1);
344 yylval = string("!=",2);
348 yylval = string("!~",2);
357 yylval = string("<=",2);
366 yylval = string(">>",2);
370 yylval = string(">=",2);
378 while (isalpha(*s) || isdigit(*s) || *s == '_') \
398 for (d = s; isdigit(*s); s++) ;
399 yylval = string(d,s-d);
405 split_to_array = set_array_base = TRUE;
408 case '/': /* may either be division or pattern */
415 yylval = string("/=",2);
421 case '0': case '1': case '2': case '3': case '4':
422 case '5': case '6': case '7': case '8': case '9': case '.':
427 s = cpy2(tokenbuf,s,s[-1]);
429 fatal("String not terminated:\n%s",str_get(linestr));
431 yylval = string(tokenbuf,0);
437 set_array_base = TRUE;
438 if (strEQ(d,"ARGV")) {
439 yylval=numary(string("ARGV",0));
442 if (strEQ(d,"atan2")) {
449 if (strEQ(d,"break"))
451 if (strEQ(d,"BEGIN"))
456 if (strEQ(d,"continue"))
458 if (strEQ(d,"cos")) {
462 if (strEQ(d,"close")) {
467 if (strEQ(d,"chdir"))
469 else if (strEQ(d,"crypt"))
471 else if (strEQ(d,"chop"))
473 else if (strEQ(d,"chmod"))
475 else if (strEQ(d,"chown"))
482 if (strEQ(d,"delete"))
493 if (strEQ(d,"exit")) {
497 if (strEQ(d,"exp")) {
501 if (strEQ(d,"elsif"))
503 else if (strEQ(d,"eq"))
505 else if (strEQ(d,"eval"))
507 else if (strEQ(d,"eof"))
509 else if (strEQ(d,"each"))
511 else if (strEQ(d,"exec"))
518 if (saw_FS == 1 && in_begin) {
519 for (d = s; *d && isspace(*d); d++) ;
521 for (d++; *d && isspace(*d); d++) ;
522 if (*d == '"' && d[2] == '"')
530 else if (strEQ(d,"function"))
532 if (strEQ(d,"FILENAME"))
534 if (strEQ(d,"foreach"))
536 else if (strEQ(d,"format"))
538 else if (strEQ(d,"fork"))
540 else if (strEQ(d,"fh"))
545 if (strEQ(d,"getline"))
551 else if (strEQ(d,"gt"))
553 else if (strEQ(d,"goto"))
555 else if (strEQ(d,"gmtime"))
569 if (strEQ(d,"index")) {
570 set_array_base = TRUE;
573 if (strEQ(d,"int")) {
587 else if (strEQ(d,"kill"))
592 if (strEQ(d,"length")) {
596 if (strEQ(d,"log")) {
602 else if (strEQ(d,"local"))
604 else if (strEQ(d,"lt"))
606 else if (strEQ(d,"le"))
608 else if (strEQ(d,"locatime"))
610 else if (strEQ(d,"link"))
615 if (strEQ(d,"match")) {
616 set_array_base = TRUE;
625 do_chop = do_split = split_to_array = set_array_base = TRUE;
626 if (strEQ(d,"next")) {
635 if (strEQ(d,"ORS")) {
639 if (strEQ(d,"OFS")) {
643 if (strEQ(d,"OFMT")) {
648 else if (strEQ(d,"ord"))
650 else if (strEQ(d,"oct"))
655 if (strEQ(d,"print")) {
658 if (strEQ(d,"printf")) {
663 else if (strEQ(d,"pop"))
675 if (strEQ(d,"rand")) {
679 if (strEQ(d,"return"))
681 if (strEQ(d,"reset"))
683 else if (strEQ(d,"redo"))
685 else if (strEQ(d,"rename"))
690 if (strEQ(d,"split")) {
691 set_array_base = TRUE;
694 if (strEQ(d,"substr")) {
695 set_array_base = TRUE;
700 if (strEQ(d,"sprintf"))
702 if (strEQ(d,"sqrt")) {
706 if (strEQ(d,"SUBSEP")) {
709 if (strEQ(d,"sin")) {
713 if (strEQ(d,"srand")) {
717 if (strEQ(d,"system")) {
723 else if (strEQ(d,"shift"))
725 else if (strEQ(d,"select"))
727 else if (strEQ(d,"seek"))
729 else if (strEQ(d,"stat"))
731 else if (strEQ(d,"study"))
733 else if (strEQ(d,"sleep"))
735 else if (strEQ(d,"symlink"))
737 else if (strEQ(d,"sort"))
744 else if (strEQ(d,"tell"))
746 else if (strEQ(d,"time"))
748 else if (strEQ(d,"times"))
753 if (strEQ(d,"until"))
755 else if (strEQ(d,"unless"))
757 else if (strEQ(d,"umask"))
759 else if (strEQ(d,"unshift"))
761 else if (strEQ(d,"unlink"))
763 else if (strEQ(d,"utime"))
768 if (strEQ(d,"values"))
773 if (strEQ(d,"while"))
775 if (strEQ(d,"write"))
777 else if (strEQ(d,"wait"))
806 fatal("Search pattern not found:\n%s",str_get(linestr));
810 for (; *s; s++,d++) {
814 else if (s[1] == '\\')
816 else if (s[1] == '[')
819 else if (*s == '[') {
822 if (*s == '\\' && s[1])
824 if (*s == '/' || (*s == '-' && s[1] == ']'))
827 } while (*s && *s != ']');
836 fatal("Search pattern not terminated:\n%s",str_get(linestr));
838 yylval = string(tokenbuf,0);
846 fprintf(stderr,"%s in file %s at line %d\n",
857 case '1': case '2': case '3': case '4': case '5':
858 case '6': case '7': case '8': case '9': case '0' : case '.':
860 while (isdigit(*s)) {
866 while (isdigit(*s)) {
873 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
875 if (*s == '+' || *s == '-')
881 yylval = string(tokenbuf,0);
894 ops[mop++].ival = OSTRING + (1<<8);
897 ops[mop].cval = safemalloc(len+1);
898 strncpy(ops[mop].cval,ptr,len);
899 ops[mop++].cval[len] = '\0';
901 fatal("Recompile a2p with larger OPSMAX\n");
912 fatal("type > 255 (%d)\n",type);
913 ops[mop++].ival = type;
915 fatal("Recompile a2p with larger OPSMAX\n");
927 fatal("type > 255 (%d)\n",type);
928 ops[mop++].ival = type + (1<<8);
929 ops[mop++].ival = arg1;
931 fatal("Recompile a2p with larger OPSMAX\n");
936 oper2(type,arg1,arg2)
944 fatal("type > 255 (%d)\n",type);
945 ops[mop++].ival = type + (2<<8);
946 ops[mop++].ival = arg1;
947 ops[mop++].ival = arg2;
949 fatal("Recompile a2p with larger OPSMAX\n");
954 oper3(type,arg1,arg2,arg3)
963 fatal("type > 255 (%d)\n",type);
964 ops[mop++].ival = type + (3<<8);
965 ops[mop++].ival = arg1;
966 ops[mop++].ival = arg2;
967 ops[mop++].ival = arg3;
969 fatal("Recompile a2p with larger OPSMAX\n");
974 oper4(type,arg1,arg2,arg3,arg4)
984 fatal("type > 255 (%d)\n",type);
985 ops[mop++].ival = type + (4<<8);
986 ops[mop++].ival = arg1;
987 ops[mop++].ival = arg2;
988 ops[mop++].ival = arg3;
989 ops[mop++].ival = arg4;
991 fatal("Recompile a2p with larger OPSMAX\n");
996 oper5(type,arg1,arg2,arg3,arg4,arg5)
1007 fatal("type > 255 (%d)\n",type);
1008 ops[mop++].ival = type + (5<<8);
1009 ops[mop++].ival = arg1;
1010 ops[mop++].ival = arg2;
1011 ops[mop++].ival = arg3;
1012 ops[mop++].ival = arg4;
1013 ops[mop++].ival = arg5;
1015 fatal("Recompile a2p with larger OPSMAX\n");
1029 type = ops[branch].ival;
1032 for (i=depth; i; i--)
1034 if (type == OSTRING) {
1035 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1038 printf("(%-5d%s %d\n",branch,opname[type],len);
1040 for (i=1; i<=len; i++)
1041 dump(ops[branch+i].ival);
1043 for (i=depth; i; i--)
1056 else if ((ops[arg].ival & 255) != OBLOCK)
1057 return oper2(OBLOCK,arg,maybe);
1058 else if ((ops[arg].ival >> 8) < 2)
1059 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1071 for (s = str->str_ptr; *s; s++) {
1072 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1076 else if (*s == '\n') {
1077 for (t = s+1; isspace(*t & 127); t++) ;
1079 while (isspace(*t & 127) && *t != '\n') t--;
1080 if (*t == '\n' && t-s > 1) {
1094 register char *d, *s, *t, *e;
1095 register int pos, newpos;
1099 for (s = str->str_ptr; *s; s++) {
1108 else if (*s == '\t')
1110 if (pos > 78) { /* split a long line? */
1113 for (t = tokenbuf; isspace(*t & 127); t++) {
1120 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1124 while (d > tokenbuf &&
1125 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1130 while (d > tokenbuf &&
1131 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1136 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1141 while (d > tokenbuf && *d != ' ')
1151 if (d[-1] != ';' && !(newpos % 4)) {
1157 newpos += strlen(t);
1172 for (t = tokenbuf; *t; t++) {
1176 strcpy(t+strlen(t)-1, "\t#???\n");
1182 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1184 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1187 fputs(tokenbuf,stdout);
1197 key = walk(0,0,arg,&dummy,P_MIN);
1199 hstore(symtab,key->str_ptr,str_make("1"));
1201 set_array_base = TRUE;
1214 type = ops[arg].ival & 255;
1215 if (type == OCOMMA) {
1216 rememberargs(ops[arg+1].ival);
1217 rememberargs(ops[arg+3].ival);
1219 else if (type == OVAR) {
1221 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1224 fatal("panic: unknown argument type %d, line %d\n",type,line);
1232 int type = ops[arg].ival & 255;
1235 if (type != OSTRING)
1236 fatal("panic: aryrefarg %d, line %d\n",type,line);
1237 str = hfetch(curarghash,ops[arg+1].cval);
1244 fixfargs(name,arg,prevargs)
1255 type = ops[arg].ival & 255;
1256 if (type == OCOMMA) {
1257 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1258 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1260 else if (type == OVAR) {
1261 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1262 if (strEQ(str_get(str),"*")) {
1265 str_set(str,""); /* in case another routine has this */
1266 ops[arg].ival &= ~255;
1267 ops[arg].ival |= OSTAR;
1268 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1269 fprintf(stderr,"Adding %s\n",tmpbuf);
1272 hstore(curarghash,tmpbuf,str);
1274 numargs = prevargs + 1;
1277 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1278 type,prevargs+1,line);
1283 fixrargs(name,arg,prevargs)
1294 type = ops[arg].ival & 255;
1295 if (type == OCOMMA) {
1296 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1297 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1300 char *tmpbuf = safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1301 sprintf(tmpbuf,"%s:%d",name,prevargs);
1302 str = hfetch(curarghash,tmpbuf);
1304 if (str && strEQ(str->str_ptr,"*")) {
1305 if (type == OVAR || type == OSTAR) {
1306 ops[arg].ival &= ~255;
1307 ops[arg].ival |= OSTAR;
1310 fatal("Can't pass expression by reference as arg %d of %s\n",
1313 numargs = prevargs + 1;