1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
17 #include "../netware/clibstuf.h"
19 #include "../patchlevel.h"
29 int oper1(int type, int arg1);
30 int oper2(int type, int arg1, int arg2);
31 int oper3(int type, int arg1, int arg2, int arg3);
32 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
33 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
34 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
36 char *savestr(char *str);
37 char *cpy2(register char *to, register char *from, register int delim);
40 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
41 static void usage(void);
46 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
47 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
48 printf("\n -D<number> sets debugging flags."
49 "\n -F<character> the awk script to translate is always invoked with"
51 "\n -n<fieldlist> specifies the names of the input fields if input does"
52 "\n not have to be split into an array."
53 "\n -<number> causes a2p to assume that input will always have that"
60 main(register int argc, register char **argv, register char **env)
68 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
72 linestr = str_new(80);
73 str = str_new(0); /* first used for -I flags */
74 for (argc--,argv++; argc; argc--,argv++) {
75 if (argv[0][0] != '-' || !argv[0][1])
80 debug = atoi(argv[0]+2);
82 yydebug = (debug & 1);
86 case '0': case '1': case '2': case '3': case '4':
87 case '5': case '6': case '7': case '8': case '9':
88 maxfld = atoi(argv[0]+1);
95 namelist = savestr(argv[0]+2);
106 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
107 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
110 fatal("Unrecognized switch: %s\n",argv[0]);
118 if (argv[0] == Nullch) {
119 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
120 if ( isatty(fileno(stdin)) )
125 filename = savestr(argv[0]);
127 filename = savestr(argv[0]);
128 if (strEQ(filename,"-"))
133 rsfp = fopen(argv[0],"r");
135 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
139 bufptr = str_get(linestr);
143 /* now parse the report spec */
146 fatal("Translation aborted due to syntax errors.\n");
156 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
158 printf("\t\"%s\"\n",ops[i].cval),i++;
161 printf("\t%d",ops[i].ival),i++;
171 /* first pass to look for numeric variables */
173 prewalk(0,0,root,&i);
175 /* second pass to produce new program */
177 tmpstr = walk(0,0,root,&i,P_MIN);
178 str = str_make(STARTPERL);
179 str_cat(str, "\neval 'exec ");
181 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
182 if $running_under_some_shell;\n\
183 # this emulates #! processing on NIH machines.\n\
184 # (remove #! line above if indigestible)\n\n");
186 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
188 " # process any FOO=bar switches\n\n");
189 if (do_opens && opens) {
194 str_scat(str,tmpstr);
203 "Please check my work on the %d line%s I've marked with \"#???\".\n",
204 checkers, checkers == 1 ? "" : "s" );
206 "The operation I've selected may be wrong for the operand types.\n");
209 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
213 #define RETURN(retval) return (bufptr = s,retval)
214 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
215 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
216 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
223 register char *s = bufptr;
231 fprintf(stderr,"Tokener at %s",s);
233 fprintf(stderr,"Tokener at %s\n",s);
239 "Unrecognized character %c in file %s line %d--ignoring.\n",
244 if (*s && *s != '\n') {
245 yyerror("Ignoring spurious backslash");
250 s = str_get(linestr);
255 if ((s = str_gets(linestr, rsfp)) == Nullch) {
259 s = str_get(linestr);
270 yylval = string(s,0);
299 for (d = s + 1; isSPACE(*d); d++) ;
309 yylval = string("~",1);
327 yylval = string("**=",3);
329 yylval = string(s-1,2);
347 while (*s == ' ' || *s == '\t')
349 if (strnEQ(s,"getline",7))
357 yylval = string("==",2);
361 yylval = string("=",1);
367 yylval = string("!=",2);
371 yylval = string("!~",2);
380 yylval = string("<=",2);
389 yylval = string(">>",2);
393 yylval = string(">=",2);
401 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
421 for (d = s; isDIGIT(*s); s++) ;
422 yylval = string(d,s-d);
428 split_to_array = set_array_base = TRUE;
431 case '/': /* may either be division or pattern */
438 yylval = string("/=",2);
444 case '0': case '1': case '2': case '3': case '4':
445 case '5': case '6': case '7': case '8': case '9': case '.':
450 s = cpy2(tokenbuf,s,s[-1]);
452 fatal("String not terminated:\n%s",str_get(linestr));
454 yylval = string(tokenbuf,0);
460 set_array_base = TRUE;
461 if (strEQ(d,"ARGV")) {
462 yylval=numary(string("ARGV",0));
465 if (strEQ(d,"atan2")) {
472 if (strEQ(d,"break"))
474 if (strEQ(d,"BEGIN"))
479 if (strEQ(d,"continue"))
481 if (strEQ(d,"cos")) {
485 if (strEQ(d,"close")) {
490 if (strEQ(d,"chdir"))
492 else if (strEQ(d,"crypt"))
494 else if (strEQ(d,"chop"))
496 else if (strEQ(d,"chmod"))
498 else if (strEQ(d,"chown"))
505 if (strEQ(d,"delete"))
516 if (strEQ(d,"exit")) {
520 if (strEQ(d,"exp")) {
524 if (strEQ(d,"elsif"))
526 else if (strEQ(d,"eq"))
528 else if (strEQ(d,"eval"))
530 else if (strEQ(d,"eof"))
532 else if (strEQ(d,"each"))
534 else if (strEQ(d,"exec"))
541 if (saw_FS == 1 && in_begin) {
542 for (d = s; *d && isSPACE(*d); d++) ;
544 for (d++; *d && isSPACE(*d); d++) ;
545 if (*d == '"' && d[2] == '"')
553 else if (strEQ(d,"function"))
555 if (strEQ(d,"FILENAME"))
557 if (strEQ(d,"foreach"))
559 else if (strEQ(d,"format"))
561 else if (strEQ(d,"fork"))
563 else if (strEQ(d,"fh"))
568 if (strEQ(d,"getline"))
574 else if (strEQ(d,"gt"))
576 else if (strEQ(d,"goto"))
578 else if (strEQ(d,"gmtime"))
592 if (strEQ(d,"index")) {
593 set_array_base = TRUE;
596 if (strEQ(d,"int")) {
610 else if (strEQ(d,"kill"))
615 if (strEQ(d,"length")) {
619 if (strEQ(d,"log")) {
625 else if (strEQ(d,"local"))
627 else if (strEQ(d,"lt"))
629 else if (strEQ(d,"le"))
631 else if (strEQ(d,"locatime"))
633 else if (strEQ(d,"link"))
638 if (strEQ(d,"match")) {
639 set_array_base = TRUE;
648 do_chop = do_split = split_to_array = set_array_base = TRUE;
649 if (strEQ(d,"next")) {
658 if (strEQ(d,"ORS")) {
662 if (strEQ(d,"OFS")) {
666 if (strEQ(d,"OFMT")) {
671 else if (strEQ(d,"ord"))
673 else if (strEQ(d,"oct"))
678 if (strEQ(d,"print")) {
681 if (strEQ(d,"printf")) {
686 else if (strEQ(d,"pop"))
698 if (strEQ(d,"rand")) {
702 if (strEQ(d,"return"))
704 if (strEQ(d,"reset"))
706 else if (strEQ(d,"redo"))
708 else if (strEQ(d,"rename"))
713 if (strEQ(d,"split")) {
714 set_array_base = TRUE;
717 if (strEQ(d,"substr")) {
718 set_array_base = TRUE;
723 if (strEQ(d,"sprintf")) {
724 /* In old awk, { print sprintf("str%sg"),"in" } prints
725 * "string"; in new awk, "in" is not considered an argument to
726 * sprintf, so the statement breaks. To support both, the
727 * grammar treats arguments to SPRINTF_OLD like old awk,
728 * SPRINTF_NEW like new. Here we return the appropriate one.
730 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
732 if (strEQ(d,"sqrt")) {
736 if (strEQ(d,"SUBSEP")) {
739 if (strEQ(d,"sin")) {
743 if (strEQ(d,"srand")) {
747 if (strEQ(d,"system")) {
753 else if (strEQ(d,"shift"))
755 else if (strEQ(d,"select"))
757 else if (strEQ(d,"seek"))
759 else if (strEQ(d,"stat"))
761 else if (strEQ(d,"study"))
763 else if (strEQ(d,"sleep"))
765 else if (strEQ(d,"symlink"))
767 else if (strEQ(d,"sort"))
774 else if (strEQ(d,"tell"))
776 else if (strEQ(d,"time"))
778 else if (strEQ(d,"times"))
783 if (strEQ(d,"until"))
785 else if (strEQ(d,"unless"))
787 else if (strEQ(d,"umask"))
789 else if (strEQ(d,"unshift"))
791 else if (strEQ(d,"unlink"))
793 else if (strEQ(d,"utime"))
798 if (strEQ(d,"values"))
803 if (strEQ(d,"while"))
805 if (strEQ(d,"write"))
807 else if (strEQ(d,"wait"))
827 scanpat(register char *s)
835 fatal("Search pattern not found:\n%s",str_get(linestr));
839 for (; *s; s++,d++) {
843 else if (s[1] == '\\')
845 else if (s[1] == '[')
848 else if (*s == '[') {
851 if (*s == '\\' && s[1])
853 if (*s == '/' || (*s == '-' && s[1] == ']'))
856 } while (*s && *s != ']');
865 fatal("Search pattern not terminated:\n%s",str_get(linestr));
867 yylval = string(tokenbuf,0);
874 fprintf(stderr,"%s in file %s at line %d\n",
879 scannum(register char *s)
884 case '1': case '2': case '3': case '4': case '5':
885 case '6': case '7': case '8': case '9': case '0' : case '.':
887 while (isDIGIT(*s)) {
893 while (isDIGIT(*s)) {
900 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
902 if (*s == '+' || *s == '-')
908 yylval = string(tokenbuf,0);
915 string(char *ptr, int len)
919 ops[mop++].ival = OSTRING + (1<<8);
922 ops[mop].cval = (char *) safemalloc(len+1);
923 strncpy(ops[mop].cval,ptr,len);
924 ops[mop++].cval[len] = '\0';
926 fatal("Recompile a2p with larger OPSMAX\n");
936 fatal("type > 255 (%d)\n",type);
937 ops[mop++].ival = type;
939 fatal("Recompile a2p with larger OPSMAX\n");
944 oper1(int type, int arg1)
949 fatal("type > 255 (%d)\n",type);
950 ops[mop++].ival = type + (1<<8);
951 ops[mop++].ival = arg1;
953 fatal("Recompile a2p with larger OPSMAX\n");
958 oper2(int type, int arg1, int arg2)
963 fatal("type > 255 (%d)\n",type);
964 ops[mop++].ival = type + (2<<8);
965 ops[mop++].ival = arg1;
966 ops[mop++].ival = arg2;
968 fatal("Recompile a2p with larger OPSMAX\n");
973 oper3(int type, int arg1, int arg2, int arg3)
978 fatal("type > 255 (%d)\n",type);
979 ops[mop++].ival = type + (3<<8);
980 ops[mop++].ival = arg1;
981 ops[mop++].ival = arg2;
982 ops[mop++].ival = arg3;
984 fatal("Recompile a2p with larger OPSMAX\n");
989 oper4(int type, int arg1, int arg2, int arg3, int arg4)
994 fatal("type > 255 (%d)\n",type);
995 ops[mop++].ival = type + (4<<8);
996 ops[mop++].ival = arg1;
997 ops[mop++].ival = arg2;
998 ops[mop++].ival = arg3;
999 ops[mop++].ival = arg4;
1001 fatal("Recompile a2p with larger OPSMAX\n");
1006 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1011 fatal("type > 255 (%d)\n",type);
1012 ops[mop++].ival = type + (5<<8);
1013 ops[mop++].ival = arg1;
1014 ops[mop++].ival = arg2;
1015 ops[mop++].ival = arg3;
1016 ops[mop++].ival = arg4;
1017 ops[mop++].ival = arg5;
1019 fatal("Recompile a2p with larger OPSMAX\n");
1032 type = ops[branch].ival;
1035 for (i=depth; i; i--)
1037 if (type == OSTRING) {
1038 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1041 printf("(%-5d%s %d\n",branch,opname[type],len);
1043 for (i=1; i<=len; i++)
1044 dump(ops[branch+i].ival);
1046 for (i=depth; i; i--)
1053 bl(int arg, int maybe)
1057 else if ((ops[arg].ival & 255) != OBLOCK)
1058 return oper2(OBLOCK,arg,maybe);
1059 else if ((ops[arg].ival >> 8) < 2)
1060 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1071 for (s = str->str_ptr; *s; s++) {
1072 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1076 else if (*s == '\n') {
1077 for (t = s+1; isSPACE(*t & 127); t++) ;
1079 while (isSPACE(*t & 127) && *t != '\n') t--;
1080 if (*t == '\n' && t-s > 1) {
1093 register char *d, *s, *t, *e;
1094 register int pos, newpos;
1098 for (s = str->str_ptr; *s; s++) {
1107 else if (*s == '\t')
1109 if (pos > 78) { /* split a long line? */
1112 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1119 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1123 while (d > tokenbuf &&
1124 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1129 while (d > tokenbuf &&
1130 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1135 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1140 while (d > tokenbuf && *d != ' ')
1150 if (d[-1] != ';' && !(newpos % 4)) {
1156 newpos += strlen(t);
1171 for (t = tokenbuf; *t; t++) {
1175 strcpy(t+strlen(t)-1, "\t#???\n");
1181 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1183 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1186 fputs(tokenbuf,stdout);
1195 key = walk(0,0,arg,&dummy,P_MIN);
1197 hstore(symtab,key->str_ptr,str_make("1"));
1199 set_array_base = TRUE;
1204 rememberargs(int arg)
1211 type = ops[arg].ival & 255;
1212 if (type == OCOMMA) {
1213 rememberargs(ops[arg+1].ival);
1214 rememberargs(ops[arg+3].ival);
1216 else if (type == OVAR) {
1218 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1221 fatal("panic: unknown argument type %d, line %d\n",type,line);
1228 int type = ops[arg].ival & 255;
1231 if (type != OSTRING)
1232 fatal("panic: aryrefarg %d, line %d\n",type,line);
1233 str = hfetch(curarghash,ops[arg+1].cval);
1240 fixfargs(int name, int arg, int prevargs)
1248 type = ops[arg].ival & 255;
1249 if (type == OCOMMA) {
1250 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1251 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1253 else if (type == OVAR) {
1254 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1255 if (strEQ(str_get(str),"*")) {
1258 str_set(str,""); /* in case another routine has this */
1259 ops[arg].ival &= ~255;
1260 ops[arg].ival |= OSTAR;
1261 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1262 fprintf(stderr,"Adding %s\n",tmpbuf);
1265 hstore(curarghash,tmpbuf,str);
1267 numargs = prevargs + 1;
1270 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1271 type,prevargs+1,line);
1276 fixrargs(char *name, int arg, int prevargs)
1284 type = ops[arg].ival & 255;
1285 if (type == OCOMMA) {
1286 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1287 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1290 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1291 sprintf(tmpbuf,"%s:%d",name,prevargs);
1292 str = hfetch(curarghash,tmpbuf);
1294 if (str && strEQ(str->str_ptr,"*")) {
1295 if (type == OVAR || type == OSTAR) {
1296 ops[arg].ival &= ~255;
1297 ops[arg].ival |= OSTAR;
1300 fatal("Can't pass expression by reference as arg %d of %s\n",
1303 numargs = prevargs + 1;