1 /* $Header: a2py.c,v 4.0 91/03/20 01:57:26 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 4.0 91/03/20 01:57:26 lwall
15 #include "../patchlev.h"
29 printf("\nThis is the AWK to PERL translator, version 3.0, patchlevel %d\n", PATCHLEVEL);
30 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
31 printf("\n -D<number> sets debugging flags."
32 "\n -F<character> the awk script to translate is always invoked with"
34 "\n -n<fieldlist> specifies the names of the input fields if input does"
35 "\n not have to be split into an array."
36 "\n -<number> causes a2p to assume that input will always have that"
52 linestr = str_new(80);
53 str = str_new(0); /* first used for -I flags */
54 for (argc--,argv++; argc; argc--,argv++) {
55 if (argv[0][0] != '-' || !argv[0][1])
61 debug = atoi(argv[0]+2);
63 yydebug = (debug & 1);
67 case '0': case '1': case '2': case '3': case '4':
68 case '5': case '6': case '7': case '8': case '9':
69 maxfld = atoi(argv[0]+1);
76 namelist = savestr(argv[0]+2);
84 fatal("Unrecognized switch: %s\n",argv[0]);
94 if (argv[0] == Nullch) {
96 if ( isatty(fileno(stdin)) )
101 filename = savestr(argv[0]);
103 filename = savestr(argv[0]);
104 if (strEQ(filename,"-"))
109 rsfp = fopen(argv[0],"r");
111 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
115 bufptr = str_get(linestr);
119 /* now parse the report spec */
122 fatal("Translation aborted due to syntax errors.\n");
132 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
134 printf("\t\"%s\"\n",ops[i].cval),i++;
137 printf("\t%d",ops[i].ival),i++;
147 /* first pass to look for numeric variables */
149 prewalk(0,0,root,&i);
151 /* second pass to produce new program */
153 tmpstr = walk(0,0,root,&i,P_MIN);
154 str = str_make("#!");
156 str_cat(str, "/perl\neval \"exec ");
158 str_cat(str, "/perl -S $0 $*\"\n\
159 if $running_under_some_shell;\n\
160 # this emulates #! processing on NIH machines.\n\
161 # (remove #! line above if indigestible)\n\n");
163 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_]+=)(.*)/ && shift;\n");
165 " # process any FOO=bar switches\n\n");
166 if (do_opens && opens) {
171 str_scat(str,tmpstr);
180 "Please check my work on the %d line%s I've marked with \"#???\".\n",
181 checkers, checkers == 1 ? "" : "s" );
183 "The operation I've selected may be wrong for the operand types.\n");
188 #define RETURN(retval) return (bufptr = s,retval)
189 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
190 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
191 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
197 register char *s = bufptr;
205 fprintf(stderr,"Tokener at %s",s);
207 fprintf(stderr,"Tokener at %s\n",s);
212 "Unrecognized character %c in file %s line %d--ignoring.\n",
217 s = str_get(linestr);
222 if ((s = str_gets(linestr, rsfp)) == Nullch) {
226 s = str_get(linestr);
237 yylval = string(s,0);
262 for (d = s + 1; isspace(*d); d++) ;
272 yylval = string("~",1);
290 yylval = string("**=",3);
292 yylval = string(s-1,2);
310 while (*s == ' ' || *s == '\t')
312 if (strnEQ(s,"getline",7))
320 yylval = string("==",2);
324 yylval = string("=",1);
330 yylval = string("!=",2);
334 yylval = string("!~",2);
343 yylval = string("<=",2);
352 yylval = string(">>",2);
356 yylval = string(">=",2);
364 while (isalpha(*s) || isdigit(*s) || *s == '_') \
384 for (d = s; isdigit(*s); s++) ;
385 yylval = string(d,s-d);
391 split_to_array = set_array_base = TRUE;
394 case '/': /* may either be division or pattern */
401 yylval = string("/=",2);
407 case '0': case '1': case '2': case '3': case '4':
408 case '5': case '6': case '7': case '8': case '9': case '.':
413 s = cpy2(tokenbuf,s,s[-1]);
415 fatal("String not terminated:\n%s",str_get(linestr));
417 yylval = string(tokenbuf,0);
423 set_array_base = TRUE;
424 if (strEQ(d,"ARGV")) {
425 yylval=numary(string("ARGV",0));
428 if (strEQ(d,"atan2")) {
435 if (strEQ(d,"break"))
437 if (strEQ(d,"BEGIN"))
442 if (strEQ(d,"continue"))
444 if (strEQ(d,"cos")) {
448 if (strEQ(d,"close")) {
453 if (strEQ(d,"chdir"))
455 else if (strEQ(d,"crypt"))
457 else if (strEQ(d,"chop"))
459 else if (strEQ(d,"chmod"))
461 else if (strEQ(d,"chown"))
468 if (strEQ(d,"delete"))
479 if (strEQ(d,"exit")) {
483 if (strEQ(d,"exp")) {
487 if (strEQ(d,"elsif"))
489 else if (strEQ(d,"eq"))
491 else if (strEQ(d,"eval"))
493 else if (strEQ(d,"eof"))
495 else if (strEQ(d,"each"))
497 else if (strEQ(d,"exec"))
504 if (saw_FS == 1 && in_begin) {
505 for (d = s; *d && isspace(*d); d++) ;
507 for (d++; *d && isspace(*d); d++) ;
508 if (*d == '"' && d[2] == '"')
516 else if (strEQ(d,"function"))
518 if (strEQ(d,"FILENAME"))
520 if (strEQ(d,"foreach"))
522 else if (strEQ(d,"format"))
524 else if (strEQ(d,"fork"))
526 else if (strEQ(d,"fh"))
531 if (strEQ(d,"getline"))
537 else if (strEQ(d,"gt"))
539 else if (strEQ(d,"goto"))
541 else if (strEQ(d,"gmtime"))
555 if (strEQ(d,"index")) {
556 set_array_base = TRUE;
559 if (strEQ(d,"int")) {
573 else if (strEQ(d,"kill"))
578 if (strEQ(d,"length")) {
582 if (strEQ(d,"log")) {
588 else if (strEQ(d,"local"))
590 else if (strEQ(d,"lt"))
592 else if (strEQ(d,"le"))
594 else if (strEQ(d,"locatime"))
596 else if (strEQ(d,"link"))
601 if (strEQ(d,"match")) {
602 set_array_base = TRUE;
611 do_chop = do_split = split_to_array = set_array_base = TRUE;
612 if (strEQ(d,"next")) {
621 if (strEQ(d,"ORS")) {
625 if (strEQ(d,"OFS")) {
629 if (strEQ(d,"OFMT")) {
634 else if (strEQ(d,"ord"))
636 else if (strEQ(d,"oct"))
641 if (strEQ(d,"print")) {
644 if (strEQ(d,"printf")) {
649 else if (strEQ(d,"pop"))
661 if (strEQ(d,"rand")) {
665 if (strEQ(d,"return"))
667 if (strEQ(d,"reset"))
669 else if (strEQ(d,"redo"))
671 else if (strEQ(d,"rename"))
676 if (strEQ(d,"split")) {
677 set_array_base = TRUE;
680 if (strEQ(d,"substr")) {
681 set_array_base = TRUE;
686 if (strEQ(d,"sprintf"))
688 if (strEQ(d,"sqrt")) {
692 if (strEQ(d,"SUBSEP")) {
695 if (strEQ(d,"sin")) {
699 if (strEQ(d,"srand")) {
703 if (strEQ(d,"system")) {
709 else if (strEQ(d,"shift"))
711 else if (strEQ(d,"select"))
713 else if (strEQ(d,"seek"))
715 else if (strEQ(d,"stat"))
717 else if (strEQ(d,"study"))
719 else if (strEQ(d,"sleep"))
721 else if (strEQ(d,"symlink"))
723 else if (strEQ(d,"sort"))
730 else if (strEQ(d,"tell"))
732 else if (strEQ(d,"time"))
734 else if (strEQ(d,"times"))
739 if (strEQ(d,"until"))
741 else if (strEQ(d,"unless"))
743 else if (strEQ(d,"umask"))
745 else if (strEQ(d,"unshift"))
747 else if (strEQ(d,"unlink"))
749 else if (strEQ(d,"utime"))
754 if (strEQ(d,"values"))
759 if (strEQ(d,"while"))
761 if (strEQ(d,"write"))
763 else if (strEQ(d,"wait"))
792 fatal("Search pattern not found:\n%s",str_get(linestr));
796 for (; *s; s++,d++) {
800 else if (s[1] == '\\')
803 else if (*s == '[') {
806 if (*s == '\\' && s[1])
808 if (*s == '/' || (*s == '-' && s[1] == ']'))
811 } while (*s && *s != ']');
820 fatal("Search pattern not terminated:\n%s",str_get(linestr));
822 yylval = string(tokenbuf,0);
829 fprintf(stderr,"%s in file %s at line %d\n",
840 case '1': case '2': case '3': case '4': case '5':
841 case '6': case '7': case '8': case '9': case '0' : case '.':
843 while (isdigit(*s)) {
846 if (*s == '.' && index("0123456789eE",s[1])) {
848 while (isdigit(*s)) {
852 if (index("eE",*s) && index("+-0123456789",s[1])) {
854 if (*s == '+' || *s == '-')
860 yylval = string(tokenbuf,0);
871 ops[mop++].ival = OSTRING + (1<<8);
874 ops[mop].cval = safemalloc(len+1);
875 strncpy(ops[mop].cval,ptr,len);
876 ops[mop++].cval[len] = '\0';
878 fatal("Recompile a2p with larger OPSMAX\n");
888 fatal("type > 255 (%d)\n",type);
889 ops[mop++].ival = type;
891 fatal("Recompile a2p with larger OPSMAX\n");
902 fatal("type > 255 (%d)\n",type);
903 ops[mop++].ival = type + (1<<8);
904 ops[mop++].ival = arg1;
906 fatal("Recompile a2p with larger OPSMAX\n");
910 oper2(type,arg1,arg2)
918 fatal("type > 255 (%d)\n",type);
919 ops[mop++].ival = type + (2<<8);
920 ops[mop++].ival = arg1;
921 ops[mop++].ival = arg2;
923 fatal("Recompile a2p with larger OPSMAX\n");
927 oper3(type,arg1,arg2,arg3)
936 fatal("type > 255 (%d)\n",type);
937 ops[mop++].ival = type + (3<<8);
938 ops[mop++].ival = arg1;
939 ops[mop++].ival = arg2;
940 ops[mop++].ival = arg3;
942 fatal("Recompile a2p with larger OPSMAX\n");
946 oper4(type,arg1,arg2,arg3,arg4)
956 fatal("type > 255 (%d)\n",type);
957 ops[mop++].ival = type + (4<<8);
958 ops[mop++].ival = arg1;
959 ops[mop++].ival = arg2;
960 ops[mop++].ival = arg3;
961 ops[mop++].ival = arg4;
963 fatal("Recompile a2p with larger OPSMAX\n");
967 oper5(type,arg1,arg2,arg3,arg4,arg5)
978 fatal("type > 255 (%d)\n",type);
979 ops[mop++].ival = type + (5<<8);
980 ops[mop++].ival = arg1;
981 ops[mop++].ival = arg2;
982 ops[mop++].ival = arg3;
983 ops[mop++].ival = arg4;
984 ops[mop++].ival = arg5;
986 fatal("Recompile a2p with larger OPSMAX\n");
999 type = ops[branch].ival;
1002 for (i=depth; i; i--)
1004 if (type == OSTRING) {
1005 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1008 printf("(%-5d%s %d\n",branch,opname[type],len);
1010 for (i=1; i<=len; i++)
1011 dump(ops[branch+i].ival);
1013 for (i=depth; i; i--)
1025 else if ((ops[arg].ival & 255) != OBLOCK)
1026 return oper2(OBLOCK,arg,maybe);
1027 else if ((ops[arg].ival >> 8) < 2)
1028 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1039 for (s = str->str_ptr; *s; s++) {
1040 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1044 else if (*s == '\n') {
1045 for (t = s+1; isspace(*t & 127); t++) ;
1047 while (isspace(*t & 127) && *t != '\n') t--;
1048 if (*t == '\n' && t-s > 1) {
1061 register char *d, *s, *t, *e;
1062 register int pos, newpos;
1066 for (s = str->str_ptr; *s; s++) {
1075 else if (*s == '\t')
1077 if (pos > 78) { /* split a long line? */
1080 for (t = tokenbuf; isspace(*t & 127); t++) {
1087 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1091 while (d > tokenbuf &&
1092 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1097 while (d > tokenbuf &&
1098 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1103 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1108 while (d > tokenbuf && *d != ' ')
1118 if (d[-1] != ';' && !(newpos % 4)) {
1124 newpos += strlen(t);
1138 for (t = tokenbuf; *t; t++) {
1142 strcpy(t+strlen(t)-1, "\t#???\n");
1148 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1150 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1153 fputs(tokenbuf,stdout);
1162 key = walk(0,0,arg,&dummy,P_MIN);
1164 hstore(symtab,key->str_ptr,str_make("1"));
1166 set_array_base = TRUE;
1178 type = ops[arg].ival & 255;
1179 if (type == OCOMMA) {
1180 rememberargs(ops[arg+1].ival);
1181 rememberargs(ops[arg+3].ival);
1183 else if (type == OVAR) {
1185 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1188 fatal("panic: unknown argument type %d, line %d\n",type,line);
1195 int type = ops[arg].ival & 255;
1198 if (type != OSTRING)
1199 fatal("panic: aryrefarg %d, line %d\n",type,line);
1200 str = hfetch(curarghash,ops[arg+1].cval);
1206 fixfargs(name,arg,prevargs)
1217 type = ops[arg].ival & 255;
1218 if (type == OCOMMA) {
1219 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1220 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1222 else if (type == OVAR) {
1223 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1224 if (strEQ(str_get(str),"*")) {
1227 str_set(str,""); /* in case another routine has this */
1228 ops[arg].ival &= ~255;
1229 ops[arg].ival |= OSTAR;
1230 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1231 fprintf(stderr,"Adding %s\n",tmpbuf);
1234 hstore(curarghash,tmpbuf,str);
1236 numargs = prevargs + 1;
1239 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1240 type,prevargs+1,line);
1244 fixrargs(name,arg,prevargs)
1255 type = ops[arg].ival & 255;
1256 if (type == OCOMMA) {
1257 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1258 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1263 sprintf(tmpbuf,"%s:%d",name,prevargs);
1264 str = hfetch(curarghash,tmpbuf);
1265 fprintf(stderr,"Looking for %s\n",tmpbuf);
1266 if (str && strEQ(str->str_ptr,"*")) {
1267 if (type == OVAR || type == OSTAR) {
1268 ops[arg].ival &= ~255;
1269 ops[arg].ival |= OSTAR;
1272 fatal("Can't pass expression by reference as arg %d of %s\n",
1275 numargs = prevargs + 1;