1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32)
12 #include "../patchlevel.h"
22 int oper1(int type, int arg1);
23 int oper2(int type, int arg1, int arg2);
24 int oper3(int type, int arg1, int arg2, int arg3);
25 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
26 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
27 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
29 #if defined(OS2) || defined(WIN32)
30 static void usage(void);
35 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
36 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
37 printf("\n -D<number> sets debugging flags."
38 "\n -F<character> the awk script to translate is always invoked with"
40 "\n -n<fieldlist> specifies the names of the input fields if input does"
41 "\n not have to be split into an array."
42 "\n -<number> causes a2p to assume that input will always have that"
49 main(register int argc, register char **argv, register char **env)
56 linestr = str_new(80);
57 str = str_new(0); /* first used for -I flags */
58 for (argc--,argv++; argc; argc--,argv++) {
59 if (argv[0][0] != '-' || !argv[0][1])
65 debug = atoi(argv[0]+2);
67 yydebug = (debug & 1);
71 case '0': case '1': case '2': case '3': case '4':
72 case '5': case '6': case '7': case '8': case '9':
73 maxfld = atoi(argv[0]+1);
80 namelist = savestr(argv[0]+2);
91 #if defined(OS2) || defined(WIN32)
92 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
95 fatal("Unrecognized switch: %s\n",argv[0]);
103 if (argv[0] == Nullch) {
104 #if defined(OS2) || defined(WIN32)
105 if ( isatty(fileno(stdin)) )
110 filename = savestr(argv[0]);
112 filename = savestr(argv[0]);
113 if (strEQ(filename,"-"))
118 rsfp = fopen(argv[0],"r");
120 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
124 bufptr = str_get(linestr);
128 /* now parse the report spec */
131 fatal("Translation aborted due to syntax errors.\n");
141 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
143 printf("\t\"%s\"\n",ops[i].cval),i++;
146 printf("\t%d",ops[i].ival),i++;
156 /* first pass to look for numeric variables */
158 prewalk(0,0,root,&i);
160 /* second pass to produce new program */
162 tmpstr = walk(0,0,root,&i,P_MIN);
163 str = str_make(STARTPERL);
164 str_cat(str, "\neval 'exec ");
166 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
167 if $running_under_some_shell;\n\
168 # this emulates #! processing on NIH machines.\n\
169 # (remove #! line above if indigestible)\n\n");
171 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
173 " # process any FOO=bar switches\n\n");
174 if (do_opens && opens) {
179 str_scat(str,tmpstr);
188 "Please check my work on the %d line%s I've marked with \"#???\".\n",
189 checkers, checkers == 1 ? "" : "s" );
191 "The operation I've selected may be wrong for the operand types.\n");
196 #define RETURN(retval) return (bufptr = s,retval)
197 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
198 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
199 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
206 register char *s = bufptr;
214 fprintf(stderr,"Tokener at %s",s);
216 fprintf(stderr,"Tokener at %s\n",s);
221 "Unrecognized character %c in file %s line %d--ignoring.\n",
226 if (*s && *s != '\n') {
227 yyerror("Ignoring spurious backslash");
232 s = str_get(linestr);
237 if ((s = str_gets(linestr, rsfp)) == Nullch) {
241 s = str_get(linestr);
252 yylval = string(s,0);
277 for (d = s + 1; isspace(*d); d++) ;
287 yylval = string("~",1);
305 yylval = string("**=",3);
307 yylval = string(s-1,2);
325 while (*s == ' ' || *s == '\t')
327 if (strnEQ(s,"getline",7))
335 yylval = string("==",2);
339 yylval = string("=",1);
345 yylval = string("!=",2);
349 yylval = string("!~",2);
358 yylval = string("<=",2);
367 yylval = string(">>",2);
371 yylval = string(">=",2);
379 while (isalpha(*s) || isdigit(*s) || *s == '_') \
399 for (d = s; isdigit(*s); s++) ;
400 yylval = string(d,s-d);
406 split_to_array = set_array_base = TRUE;
409 case '/': /* may either be division or pattern */
416 yylval = string("/=",2);
422 case '0': case '1': case '2': case '3': case '4':
423 case '5': case '6': case '7': case '8': case '9': case '.':
428 s = cpy2(tokenbuf,s,s[-1]);
430 fatal("String not terminated:\n%s",str_get(linestr));
432 yylval = string(tokenbuf,0);
438 set_array_base = TRUE;
439 if (strEQ(d,"ARGV")) {
440 yylval=numary(string("ARGV",0));
443 if (strEQ(d,"atan2")) {
450 if (strEQ(d,"break"))
452 if (strEQ(d,"BEGIN"))
457 if (strEQ(d,"continue"))
459 if (strEQ(d,"cos")) {
463 if (strEQ(d,"close")) {
468 if (strEQ(d,"chdir"))
470 else if (strEQ(d,"crypt"))
472 else if (strEQ(d,"chop"))
474 else if (strEQ(d,"chmod"))
476 else if (strEQ(d,"chown"))
483 if (strEQ(d,"delete"))
494 if (strEQ(d,"exit")) {
498 if (strEQ(d,"exp")) {
502 if (strEQ(d,"elsif"))
504 else if (strEQ(d,"eq"))
506 else if (strEQ(d,"eval"))
508 else if (strEQ(d,"eof"))
510 else if (strEQ(d,"each"))
512 else if (strEQ(d,"exec"))
519 if (saw_FS == 1 && in_begin) {
520 for (d = s; *d && isspace(*d); d++) ;
522 for (d++; *d && isspace(*d); d++) ;
523 if (*d == '"' && d[2] == '"')
531 else if (strEQ(d,"function"))
533 if (strEQ(d,"FILENAME"))
535 if (strEQ(d,"foreach"))
537 else if (strEQ(d,"format"))
539 else if (strEQ(d,"fork"))
541 else if (strEQ(d,"fh"))
546 if (strEQ(d,"getline"))
552 else if (strEQ(d,"gt"))
554 else if (strEQ(d,"goto"))
556 else if (strEQ(d,"gmtime"))
570 if (strEQ(d,"index")) {
571 set_array_base = TRUE;
574 if (strEQ(d,"int")) {
588 else if (strEQ(d,"kill"))
593 if (strEQ(d,"length")) {
597 if (strEQ(d,"log")) {
603 else if (strEQ(d,"local"))
605 else if (strEQ(d,"lt"))
607 else if (strEQ(d,"le"))
609 else if (strEQ(d,"locatime"))
611 else if (strEQ(d,"link"))
616 if (strEQ(d,"match")) {
617 set_array_base = TRUE;
626 do_chop = do_split = split_to_array = set_array_base = TRUE;
627 if (strEQ(d,"next")) {
636 if (strEQ(d,"ORS")) {
640 if (strEQ(d,"OFS")) {
644 if (strEQ(d,"OFMT")) {
649 else if (strEQ(d,"ord"))
651 else if (strEQ(d,"oct"))
656 if (strEQ(d,"print")) {
659 if (strEQ(d,"printf")) {
664 else if (strEQ(d,"pop"))
676 if (strEQ(d,"rand")) {
680 if (strEQ(d,"return"))
682 if (strEQ(d,"reset"))
684 else if (strEQ(d,"redo"))
686 else if (strEQ(d,"rename"))
691 if (strEQ(d,"split")) {
692 set_array_base = TRUE;
695 if (strEQ(d,"substr")) {
696 set_array_base = TRUE;
701 if (strEQ(d,"sprintf"))
703 if (strEQ(d,"sqrt")) {
707 if (strEQ(d,"SUBSEP")) {
710 if (strEQ(d,"sin")) {
714 if (strEQ(d,"srand")) {
718 if (strEQ(d,"system")) {
724 else if (strEQ(d,"shift"))
726 else if (strEQ(d,"select"))
728 else if (strEQ(d,"seek"))
730 else if (strEQ(d,"stat"))
732 else if (strEQ(d,"study"))
734 else if (strEQ(d,"sleep"))
736 else if (strEQ(d,"symlink"))
738 else if (strEQ(d,"sort"))
745 else if (strEQ(d,"tell"))
747 else if (strEQ(d,"time"))
749 else if (strEQ(d,"times"))
754 if (strEQ(d,"until"))
756 else if (strEQ(d,"unless"))
758 else if (strEQ(d,"umask"))
760 else if (strEQ(d,"unshift"))
762 else if (strEQ(d,"unlink"))
764 else if (strEQ(d,"utime"))
769 if (strEQ(d,"values"))
774 if (strEQ(d,"while"))
776 if (strEQ(d,"write"))
778 else if (strEQ(d,"wait"))
798 scanpat(register char *s)
806 fatal("Search pattern not found:\n%s",str_get(linestr));
810 for (; *s; s++,d++) {
814 else if (s[1] == '\\')
816 else if (s[1] == '[')
819 else if (*s == '[') {
822 if (*s == '\\' && s[1])
824 if (*s == '/' || (*s == '-' && s[1] == ']'))
827 } while (*s && *s != ']');
836 fatal("Search pattern not terminated:\n%s",str_get(linestr));
838 yylval = string(tokenbuf,0);
845 fprintf(stderr,"%s in file %s at line %d\n",
850 scannum(register char *s)
855 case '1': case '2': case '3': case '4': case '5':
856 case '6': case '7': case '8': case '9': case '0' : case '.':
858 while (isdigit(*s)) {
864 while (isdigit(*s)) {
871 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
873 if (*s == '+' || *s == '-')
879 yylval = string(tokenbuf,0);
886 string(char *ptr, int len)
890 ops[mop++].ival = OSTRING + (1<<8);
893 ops[mop].cval = (char *) safemalloc(len+1);
894 strncpy(ops[mop].cval,ptr,len);
895 ops[mop++].cval[len] = '\0';
897 fatal("Recompile a2p with larger OPSMAX\n");
907 fatal("type > 255 (%d)\n",type);
908 ops[mop++].ival = type;
910 fatal("Recompile a2p with larger OPSMAX\n");
915 oper1(int type, int arg1)
920 fatal("type > 255 (%d)\n",type);
921 ops[mop++].ival = type + (1<<8);
922 ops[mop++].ival = arg1;
924 fatal("Recompile a2p with larger OPSMAX\n");
929 oper2(int type, int arg1, int arg2)
934 fatal("type > 255 (%d)\n",type);
935 ops[mop++].ival = type + (2<<8);
936 ops[mop++].ival = arg1;
937 ops[mop++].ival = arg2;
939 fatal("Recompile a2p with larger OPSMAX\n");
944 oper3(int type, int arg1, int arg2, int arg3)
949 fatal("type > 255 (%d)\n",type);
950 ops[mop++].ival = type + (3<<8);
951 ops[mop++].ival = arg1;
952 ops[mop++].ival = arg2;
953 ops[mop++].ival = arg3;
955 fatal("Recompile a2p with larger OPSMAX\n");
960 oper4(int type, int arg1, int arg2, int arg3, int arg4)
965 fatal("type > 255 (%d)\n",type);
966 ops[mop++].ival = type + (4<<8);
967 ops[mop++].ival = arg1;
968 ops[mop++].ival = arg2;
969 ops[mop++].ival = arg3;
970 ops[mop++].ival = arg4;
972 fatal("Recompile a2p with larger OPSMAX\n");
977 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
982 fatal("type > 255 (%d)\n",type);
983 ops[mop++].ival = type + (5<<8);
984 ops[mop++].ival = arg1;
985 ops[mop++].ival = arg2;
986 ops[mop++].ival = arg3;
987 ops[mop++].ival = arg4;
988 ops[mop++].ival = arg5;
990 fatal("Recompile a2p with larger OPSMAX\n");
1003 type = ops[branch].ival;
1006 for (i=depth; i; i--)
1008 if (type == OSTRING) {
1009 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1012 printf("(%-5d%s %d\n",branch,opname[type],len);
1014 for (i=1; i<=len; i++)
1015 dump(ops[branch+i].ival);
1017 for (i=depth; i; i--)
1024 bl(int arg, int maybe)
1028 else if ((ops[arg].ival & 255) != OBLOCK)
1029 return oper2(OBLOCK,arg,maybe);
1030 else if ((ops[arg].ival >> 8) < 2)
1031 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1042 for (s = str->str_ptr; *s; s++) {
1043 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1047 else if (*s == '\n') {
1048 for (t = s+1; isspace(*t & 127); t++) ;
1050 while (isspace(*t & 127) && *t != '\n') t--;
1051 if (*t == '\n' && t-s > 1) {
1064 register char *d, *s, *t, *e;
1065 register int pos, newpos;
1069 for (s = str->str_ptr; *s; s++) {
1078 else if (*s == '\t')
1080 if (pos > 78) { /* split a long line? */
1083 for (t = tokenbuf; isspace(*t & 127); t++) {
1090 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1094 while (d > tokenbuf &&
1095 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1100 while (d > tokenbuf &&
1101 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1106 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1111 while (d > tokenbuf && *d != ' ')
1121 if (d[-1] != ';' && !(newpos % 4)) {
1127 newpos += strlen(t);
1142 for (t = tokenbuf; *t; t++) {
1146 strcpy(t+strlen(t)-1, "\t#???\n");
1152 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1154 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1157 fputs(tokenbuf,stdout);
1166 key = walk(0,0,arg,&dummy,P_MIN);
1168 hstore(symtab,key->str_ptr,str_make("1"));
1170 set_array_base = TRUE;
1175 rememberargs(int arg)
1182 type = ops[arg].ival & 255;
1183 if (type == OCOMMA) {
1184 rememberargs(ops[arg+1].ival);
1185 rememberargs(ops[arg+3].ival);
1187 else if (type == OVAR) {
1189 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1192 fatal("panic: unknown argument type %d, line %d\n",type,line);
1199 int type = ops[arg].ival & 255;
1202 if (type != OSTRING)
1203 fatal("panic: aryrefarg %d, line %d\n",type,line);
1204 str = hfetch(curarghash,ops[arg+1].cval);
1211 fixfargs(int name, int arg, int prevargs)
1219 type = ops[arg].ival & 255;
1220 if (type == OCOMMA) {
1221 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1222 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1224 else if (type == OVAR) {
1225 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1226 if (strEQ(str_get(str),"*")) {
1229 str_set(str,""); /* in case another routine has this */
1230 ops[arg].ival &= ~255;
1231 ops[arg].ival |= OSTAR;
1232 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1233 fprintf(stderr,"Adding %s\n",tmpbuf);
1236 hstore(curarghash,tmpbuf,str);
1238 numargs = prevargs + 1;
1241 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1242 type,prevargs+1,line);
1247 fixrargs(char *name, int arg, int prevargs)
1255 type = ops[arg].ival & 255;
1256 if (type == OCOMMA) {
1257 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1258 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1261 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1262 sprintf(tmpbuf,"%s:%d",name,prevargs);
1263 str = hfetch(curarghash,tmpbuf);
1265 if (str && strEQ(str->str_ptr,"*")) {
1266 if (type == OVAR || type == OSTAR) {
1267 ops[arg].ival &= ~255;
1268 ops[arg].ival |= OSTAR;
1271 fatal("Can't pass expression by reference as arg %d of %s\n",
1274 numargs = prevargs + 1;