1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
26 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
27 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
28 printf("\n -D<number> sets debugging flags."
29 "\n -F<character> the awk script to translate is always invoked with"
31 "\n -n<fieldlist> specifies the names of the input fields if input does"
32 "\n not have to be split into an array."
33 "\n -<number> causes a2p to assume that input will always have that"
48 linestr = str_new(80);
49 str = str_new(0); /* first used for -I flags */
50 for (argc--,argv++; argc; argc--,argv++) {
51 if (argv[0][0] != '-' || !argv[0][1])
57 debug = atoi(argv[0]+2);
59 yydebug = (debug & 1);
63 case '0': case '1': case '2': case '3': case '4':
64 case '5': case '6': case '7': case '8': case '9':
65 maxfld = atoi(argv[0]+1);
72 namelist = savestr(argv[0]+2);
80 fatal("Unrecognized switch: %s\n",argv[0]);
90 if (argv[0] == Nullch) {
92 if ( isatty(fileno(stdin)) )
97 filename = savestr(argv[0]);
99 filename = savestr(argv[0]);
100 if (strEQ(filename,"-"))
105 rsfp = fopen(argv[0],"r");
107 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
111 bufptr = str_get(linestr);
115 /* now parse the report spec */
118 fatal("Translation aborted due to syntax errors.\n");
128 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
130 printf("\t\"%s\"\n",ops[i].cval),i++;
133 printf("\t%d",ops[i].ival),i++;
143 /* first pass to look for numeric variables */
145 prewalk(0,0,root,&i);
147 /* second pass to produce new program */
149 tmpstr = walk(0,0,root,&i,P_MIN);
150 str = str_make("#!");
152 str_cat(str, "/perl\neval \"exec ");
154 str_cat(str, "/perl -S $0 $*\"\n\
155 if $running_under_some_shell;\n\
156 # this emulates #! processing on NIH machines.\n\
157 # (remove #! line above if indigestible)\n\n");
159 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
161 " # process any FOO=bar switches\n\n");
162 if (do_opens && opens) {
167 str_scat(str,tmpstr);
176 "Please check my work on the %d line%s I've marked with \"#???\".\n",
177 checkers, checkers == 1 ? "" : "s" );
179 "The operation I've selected may be wrong for the operand types.\n");
184 #define RETURN(retval) return (bufptr = s,retval)
185 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
186 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
187 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
193 register char *s = bufptr;
201 fprintf(stderr,"Tokener at %s",s);
203 fprintf(stderr,"Tokener at %s\n",s);
208 "Unrecognized character %c in file %s line %d--ignoring.\n",
213 if (*s && *s != '\n') {
214 yyerror("Ignoring spurious backslash");
219 s = str_get(linestr);
224 if ((s = str_gets(linestr, rsfp)) == Nullch) {
228 s = str_get(linestr);
239 yylval = string(s,0);
264 for (d = s + 1; isspace(*d); d++) ;
274 yylval = string("~",1);
292 yylval = string("**=",3);
294 yylval = string(s-1,2);
312 while (*s == ' ' || *s == '\t')
314 if (strnEQ(s,"getline",7))
322 yylval = string("==",2);
326 yylval = string("=",1);
332 yylval = string("!=",2);
336 yylval = string("!~",2);
345 yylval = string("<=",2);
354 yylval = string(">>",2);
358 yylval = string(">=",2);
366 while (isalpha(*s) || isdigit(*s) || *s == '_') \
386 for (d = s; isdigit(*s); s++) ;
387 yylval = string(d,s-d);
393 split_to_array = set_array_base = TRUE;
396 case '/': /* may either be division or pattern */
403 yylval = string("/=",2);
409 case '0': case '1': case '2': case '3': case '4':
410 case '5': case '6': case '7': case '8': case '9': case '.':
415 s = cpy2(tokenbuf,s,s[-1]);
417 fatal("String not terminated:\n%s",str_get(linestr));
419 yylval = string(tokenbuf,0);
425 set_array_base = TRUE;
426 if (strEQ(d,"ARGV")) {
427 yylval=numary(string("ARGV",0));
430 if (strEQ(d,"atan2")) {
437 if (strEQ(d,"break"))
439 if (strEQ(d,"BEGIN"))
444 if (strEQ(d,"continue"))
446 if (strEQ(d,"cos")) {
450 if (strEQ(d,"close")) {
455 if (strEQ(d,"chdir"))
457 else if (strEQ(d,"crypt"))
459 else if (strEQ(d,"chop"))
461 else if (strEQ(d,"chmod"))
463 else if (strEQ(d,"chown"))
470 if (strEQ(d,"delete"))
481 if (strEQ(d,"exit")) {
485 if (strEQ(d,"exp")) {
489 if (strEQ(d,"elsif"))
491 else if (strEQ(d,"eq"))
493 else if (strEQ(d,"eval"))
495 else if (strEQ(d,"eof"))
497 else if (strEQ(d,"each"))
499 else if (strEQ(d,"exec"))
506 if (saw_FS == 1 && in_begin) {
507 for (d = s; *d && isspace(*d); d++) ;
509 for (d++; *d && isspace(*d); d++) ;
510 if (*d == '"' && d[2] == '"')
518 else if (strEQ(d,"function"))
520 if (strEQ(d,"FILENAME"))
522 if (strEQ(d,"foreach"))
524 else if (strEQ(d,"format"))
526 else if (strEQ(d,"fork"))
528 else if (strEQ(d,"fh"))
533 if (strEQ(d,"getline"))
539 else if (strEQ(d,"gt"))
541 else if (strEQ(d,"goto"))
543 else if (strEQ(d,"gmtime"))
557 if (strEQ(d,"index")) {
558 set_array_base = TRUE;
561 if (strEQ(d,"int")) {
575 else if (strEQ(d,"kill"))
580 if (strEQ(d,"length")) {
584 if (strEQ(d,"log")) {
590 else if (strEQ(d,"local"))
592 else if (strEQ(d,"lt"))
594 else if (strEQ(d,"le"))
596 else if (strEQ(d,"locatime"))
598 else if (strEQ(d,"link"))
603 if (strEQ(d,"match")) {
604 set_array_base = TRUE;
613 do_chop = do_split = split_to_array = set_array_base = TRUE;
614 if (strEQ(d,"next")) {
623 if (strEQ(d,"ORS")) {
627 if (strEQ(d,"OFS")) {
631 if (strEQ(d,"OFMT")) {
636 else if (strEQ(d,"ord"))
638 else if (strEQ(d,"oct"))
643 if (strEQ(d,"print")) {
646 if (strEQ(d,"printf")) {
651 else if (strEQ(d,"pop"))
663 if (strEQ(d,"rand")) {
667 if (strEQ(d,"return"))
669 if (strEQ(d,"reset"))
671 else if (strEQ(d,"redo"))
673 else if (strEQ(d,"rename"))
678 if (strEQ(d,"split")) {
679 set_array_base = TRUE;
682 if (strEQ(d,"substr")) {
683 set_array_base = TRUE;
688 if (strEQ(d,"sprintf"))
690 if (strEQ(d,"sqrt")) {
694 if (strEQ(d,"SUBSEP")) {
697 if (strEQ(d,"sin")) {
701 if (strEQ(d,"srand")) {
705 if (strEQ(d,"system")) {
711 else if (strEQ(d,"shift"))
713 else if (strEQ(d,"select"))
715 else if (strEQ(d,"seek"))
717 else if (strEQ(d,"stat"))
719 else if (strEQ(d,"study"))
721 else if (strEQ(d,"sleep"))
723 else if (strEQ(d,"symlink"))
725 else if (strEQ(d,"sort"))
732 else if (strEQ(d,"tell"))
734 else if (strEQ(d,"time"))
736 else if (strEQ(d,"times"))
741 if (strEQ(d,"until"))
743 else if (strEQ(d,"unless"))
745 else if (strEQ(d,"umask"))
747 else if (strEQ(d,"unshift"))
749 else if (strEQ(d,"unlink"))
751 else if (strEQ(d,"utime"))
756 if (strEQ(d,"values"))
761 if (strEQ(d,"while"))
763 if (strEQ(d,"write"))
765 else if (strEQ(d,"wait"))
794 fatal("Search pattern not found:\n%s",str_get(linestr));
798 for (; *s; s++,d++) {
802 else if (s[1] == '\\')
804 else if (s[1] == '[')
807 else if (*s == '[') {
810 if (*s == '\\' && s[1])
812 if (*s == '/' || (*s == '-' && s[1] == ']'))
815 } while (*s && *s != ']');
824 fatal("Search pattern not terminated:\n%s",str_get(linestr));
826 yylval = string(tokenbuf,0);
833 fprintf(stderr,"%s in file %s at line %d\n",
844 case '1': case '2': case '3': case '4': case '5':
845 case '6': case '7': case '8': case '9': case '0' : case '.':
847 while (isdigit(*s)) {
853 while (isdigit(*s)) {
860 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
862 if (*s == '+' || *s == '-')
868 yylval = string(tokenbuf,0);
879 ops[mop++].ival = OSTRING + (1<<8);
882 ops[mop].cval = safemalloc(len+1);
883 strncpy(ops[mop].cval,ptr,len);
884 ops[mop++].cval[len] = '\0';
886 fatal("Recompile a2p with larger OPSMAX\n");
896 fatal("type > 255 (%d)\n",type);
897 ops[mop++].ival = type;
899 fatal("Recompile a2p with larger OPSMAX\n");
910 fatal("type > 255 (%d)\n",type);
911 ops[mop++].ival = type + (1<<8);
912 ops[mop++].ival = arg1;
914 fatal("Recompile a2p with larger OPSMAX\n");
918 oper2(type,arg1,arg2)
926 fatal("type > 255 (%d)\n",type);
927 ops[mop++].ival = type + (2<<8);
928 ops[mop++].ival = arg1;
929 ops[mop++].ival = arg2;
931 fatal("Recompile a2p with larger OPSMAX\n");
935 oper3(type,arg1,arg2,arg3)
944 fatal("type > 255 (%d)\n",type);
945 ops[mop++].ival = type + (3<<8);
946 ops[mop++].ival = arg1;
947 ops[mop++].ival = arg2;
948 ops[mop++].ival = arg3;
950 fatal("Recompile a2p with larger OPSMAX\n");
954 oper4(type,arg1,arg2,arg3,arg4)
964 fatal("type > 255 (%d)\n",type);
965 ops[mop++].ival = type + (4<<8);
966 ops[mop++].ival = arg1;
967 ops[mop++].ival = arg2;
968 ops[mop++].ival = arg3;
969 ops[mop++].ival = arg4;
971 fatal("Recompile a2p with larger OPSMAX\n");
975 oper5(type,arg1,arg2,arg3,arg4,arg5)
986 fatal("type > 255 (%d)\n",type);
987 ops[mop++].ival = type + (5<<8);
988 ops[mop++].ival = arg1;
989 ops[mop++].ival = arg2;
990 ops[mop++].ival = arg3;
991 ops[mop++].ival = arg4;
992 ops[mop++].ival = arg5;
994 fatal("Recompile a2p with larger OPSMAX\n");
1007 type = ops[branch].ival;
1010 for (i=depth; i; i--)
1012 if (type == OSTRING) {
1013 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1016 printf("(%-5d%s %d\n",branch,opname[type],len);
1018 for (i=1; i<=len; i++)
1019 dump(ops[branch+i].ival);
1021 for (i=depth; i; i--)
1033 else if ((ops[arg].ival & 255) != OBLOCK)
1034 return oper2(OBLOCK,arg,maybe);
1035 else if ((ops[arg].ival >> 8) < 2)
1036 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1047 for (s = str->str_ptr; *s; s++) {
1048 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1052 else if (*s == '\n') {
1053 for (t = s+1; isspace(*t & 127); t++) ;
1055 while (isspace(*t & 127) && *t != '\n') t--;
1056 if (*t == '\n' && t-s > 1) {
1069 register char *d, *s, *t, *e;
1070 register int pos, newpos;
1074 for (s = str->str_ptr; *s; s++) {
1083 else if (*s == '\t')
1085 if (pos > 78) { /* split a long line? */
1088 for (t = tokenbuf; isspace(*t & 127); t++) {
1095 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1099 while (d > tokenbuf &&
1100 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1105 while (d > tokenbuf &&
1106 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1111 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1116 while (d > tokenbuf && *d != ' ')
1126 if (d[-1] != ';' && !(newpos % 4)) {
1132 newpos += strlen(t);
1146 for (t = tokenbuf; *t; t++) {
1150 strcpy(t+strlen(t)-1, "\t#???\n");
1156 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1158 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1161 fputs(tokenbuf,stdout);
1170 key = walk(0,0,arg,&dummy,P_MIN);
1172 hstore(symtab,key->str_ptr,str_make("1"));
1174 set_array_base = TRUE;
1186 type = ops[arg].ival & 255;
1187 if (type == OCOMMA) {
1188 rememberargs(ops[arg+1].ival);
1189 rememberargs(ops[arg+3].ival);
1191 else if (type == OVAR) {
1193 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1196 fatal("panic: unknown argument type %d, line %d\n",type,line);
1203 int type = ops[arg].ival & 255;
1206 if (type != OSTRING)
1207 fatal("panic: aryrefarg %d, line %d\n",type,line);
1208 str = hfetch(curarghash,ops[arg+1].cval);
1214 fixfargs(name,arg,prevargs)
1225 type = ops[arg].ival & 255;
1226 if (type == OCOMMA) {
1227 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1228 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1230 else if (type == OVAR) {
1231 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1232 if (strEQ(str_get(str),"*")) {
1235 str_set(str,""); /* in case another routine has this */
1236 ops[arg].ival &= ~255;
1237 ops[arg].ival |= OSTAR;
1238 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1239 fprintf(stderr,"Adding %s\n",tmpbuf);
1242 hstore(curarghash,tmpbuf,str);
1244 numargs = prevargs + 1;
1247 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1248 type,prevargs+1,line);
1252 fixrargs(name,arg,prevargs)
1263 type = ops[arg].ival & 255;
1264 if (type == OCOMMA) {
1265 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1266 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1271 sprintf(tmpbuf,"%s:%d",name,prevargs);
1272 str = hfetch(curarghash,tmpbuf);
1273 if (str && strEQ(str->str_ptr,"*")) {
1274 if (type == OVAR || type == OSTAR) {
1275 ops[arg].ival &= ~255;
1276 ops[arg].ival |= OSTAR;
1279 fatal("Can't pass expression by reference as arg %d of %s\n",
1282 numargs = prevargs + 1;