1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
16 #include "../netware/clibstuf.h"
18 #include "../patchlevel.h"
28 int oper1(int type, int arg1);
29 int oper2(int type, int arg1, int arg2);
30 int oper3(int type, int arg1, int arg2, int arg3);
31 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
32 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
33 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
35 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
36 static void usage(void);
41 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
42 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
43 printf("\n -D<number> sets debugging flags."
44 "\n -F<character> the awk script to translate is always invoked with"
46 "\n -n<fieldlist> specifies the names of the input fields if input does"
47 "\n not have to be split into an array."
48 "\n -<number> causes a2p to assume that input will always have that"
55 main(register int argc, register char **argv, register char **env)
62 fnInitGpfGlobals(); // For importing the CLIB calls in place of Watcom calls
66 linestr = str_new(80);
67 str = str_new(0); /* first used for -I flags */
68 for (argc--,argv++; argc; argc--,argv++) {
69 if (argv[0][0] != '-' || !argv[0][1])
74 debug = atoi(argv[0]+2);
76 yydebug = (debug & 1);
80 case '0': case '1': case '2': case '3': case '4':
81 case '5': case '6': case '7': case '8': case '9':
82 maxfld = atoi(argv[0]+1);
89 namelist = savestr(argv[0]+2);
100 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
101 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
104 fatal("Unrecognized switch: %s\n",argv[0]);
112 if (argv[0] == Nullch) {
113 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
114 if ( isatty(fileno(stdin)) )
119 filename = savestr(argv[0]);
121 filename = savestr(argv[0]);
122 if (strEQ(filename,"-"))
127 rsfp = fopen(argv[0],"r");
129 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
133 bufptr = str_get(linestr);
137 /* now parse the report spec */
140 fatal("Translation aborted due to syntax errors.\n");
150 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
152 printf("\t\"%s\"\n",ops[i].cval),i++;
155 printf("\t%d",ops[i].ival),i++;
165 /* first pass to look for numeric variables */
167 prewalk(0,0,root,&i);
169 /* second pass to produce new program */
171 tmpstr = walk(0,0,root,&i,P_MIN);
172 str = str_make(STARTPERL);
173 str_cat(str, "\neval 'exec ");
175 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
176 if $running_under_some_shell;\n\
177 # this emulates #! processing on NIH machines.\n\
178 # (remove #! line above if indigestible)\n\n");
180 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
182 " # process any FOO=bar switches\n\n");
183 if (do_opens && opens) {
188 str_scat(str,tmpstr);
197 "Please check my work on the %d line%s I've marked with \"#???\".\n",
198 checkers, checkers == 1 ? "" : "s" );
200 "The operation I've selected may be wrong for the operand types.\n");
205 #define RETURN(retval) return (bufptr = s,retval)
206 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
207 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
208 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
215 register char *s = bufptr;
223 fprintf(stderr,"Tokener at %s",s);
225 fprintf(stderr,"Tokener at %s\n",s);
231 "Unrecognized character %c in file %s line %d--ignoring.\n",
236 if (*s && *s != '\n') {
237 yyerror("Ignoring spurious backslash");
242 s = str_get(linestr);
247 if ((s = str_gets(linestr, rsfp)) == Nullch) {
251 s = str_get(linestr);
262 yylval = string(s,0);
291 for (d = s + 1; isSPACE(*d); d++) ;
301 yylval = string("~",1);
319 yylval = string("**=",3);
321 yylval = string(s-1,2);
339 while (*s == ' ' || *s == '\t')
341 if (strnEQ(s,"getline",7))
349 yylval = string("==",2);
353 yylval = string("=",1);
359 yylval = string("!=",2);
363 yylval = string("!~",2);
372 yylval = string("<=",2);
381 yylval = string(">>",2);
385 yylval = string(">=",2);
393 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
413 for (d = s; isDIGIT(*s); s++) ;
414 yylval = string(d,s-d);
420 split_to_array = set_array_base = TRUE;
423 case '/': /* may either be division or pattern */
430 yylval = string("/=",2);
436 case '0': case '1': case '2': case '3': case '4':
437 case '5': case '6': case '7': case '8': case '9': case '.':
442 s = cpy2(tokenbuf,s,s[-1]);
444 fatal("String not terminated:\n%s",str_get(linestr));
446 yylval = string(tokenbuf,0);
452 set_array_base = TRUE;
453 if (strEQ(d,"ARGV")) {
454 yylval=numary(string("ARGV",0));
457 if (strEQ(d,"atan2")) {
464 if (strEQ(d,"break"))
466 if (strEQ(d,"BEGIN"))
471 if (strEQ(d,"continue"))
473 if (strEQ(d,"cos")) {
477 if (strEQ(d,"close")) {
482 if (strEQ(d,"chdir"))
484 else if (strEQ(d,"crypt"))
486 else if (strEQ(d,"chop"))
488 else if (strEQ(d,"chmod"))
490 else if (strEQ(d,"chown"))
497 if (strEQ(d,"delete"))
508 if (strEQ(d,"exit")) {
512 if (strEQ(d,"exp")) {
516 if (strEQ(d,"elsif"))
518 else if (strEQ(d,"eq"))
520 else if (strEQ(d,"eval"))
522 else if (strEQ(d,"eof"))
524 else if (strEQ(d,"each"))
526 else if (strEQ(d,"exec"))
533 if (saw_FS == 1 && in_begin) {
534 for (d = s; *d && isSPACE(*d); d++) ;
536 for (d++; *d && isSPACE(*d); d++) ;
537 if (*d == '"' && d[2] == '"')
545 else if (strEQ(d,"function"))
547 if (strEQ(d,"FILENAME"))
549 if (strEQ(d,"foreach"))
551 else if (strEQ(d,"format"))
553 else if (strEQ(d,"fork"))
555 else if (strEQ(d,"fh"))
560 if (strEQ(d,"getline"))
566 else if (strEQ(d,"gt"))
568 else if (strEQ(d,"goto"))
570 else if (strEQ(d,"gmtime"))
584 if (strEQ(d,"index")) {
585 set_array_base = TRUE;
588 if (strEQ(d,"int")) {
602 else if (strEQ(d,"kill"))
607 if (strEQ(d,"length")) {
611 if (strEQ(d,"log")) {
617 else if (strEQ(d,"local"))
619 else if (strEQ(d,"lt"))
621 else if (strEQ(d,"le"))
623 else if (strEQ(d,"locatime"))
625 else if (strEQ(d,"link"))
630 if (strEQ(d,"match")) {
631 set_array_base = TRUE;
640 do_chop = do_split = split_to_array = set_array_base = TRUE;
641 if (strEQ(d,"next")) {
650 if (strEQ(d,"ORS")) {
654 if (strEQ(d,"OFS")) {
658 if (strEQ(d,"OFMT")) {
663 else if (strEQ(d,"ord"))
665 else if (strEQ(d,"oct"))
670 if (strEQ(d,"print")) {
673 if (strEQ(d,"printf")) {
678 else if (strEQ(d,"pop"))
690 if (strEQ(d,"rand")) {
694 if (strEQ(d,"return"))
696 if (strEQ(d,"reset"))
698 else if (strEQ(d,"redo"))
700 else if (strEQ(d,"rename"))
705 if (strEQ(d,"split")) {
706 set_array_base = TRUE;
709 if (strEQ(d,"substr")) {
710 set_array_base = TRUE;
715 if (strEQ(d,"sprintf")) {
716 /* In old awk, { print sprintf("str%sg"),"in" } prints
717 * "string"; in new awk, "in" is not considered an argument to
718 * sprintf, so the statement breaks. To support both, the
719 * grammar treats arguments to SPRINTF_OLD like old awk,
720 * SPRINTF_NEW like new. Here we return the appropriate one.
722 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
724 if (strEQ(d,"sqrt")) {
728 if (strEQ(d,"SUBSEP")) {
731 if (strEQ(d,"sin")) {
735 if (strEQ(d,"srand")) {
739 if (strEQ(d,"system")) {
745 else if (strEQ(d,"shift"))
747 else if (strEQ(d,"select"))
749 else if (strEQ(d,"seek"))
751 else if (strEQ(d,"stat"))
753 else if (strEQ(d,"study"))
755 else if (strEQ(d,"sleep"))
757 else if (strEQ(d,"symlink"))
759 else if (strEQ(d,"sort"))
766 else if (strEQ(d,"tell"))
768 else if (strEQ(d,"time"))
770 else if (strEQ(d,"times"))
775 if (strEQ(d,"until"))
777 else if (strEQ(d,"unless"))
779 else if (strEQ(d,"umask"))
781 else if (strEQ(d,"unshift"))
783 else if (strEQ(d,"unlink"))
785 else if (strEQ(d,"utime"))
790 if (strEQ(d,"values"))
795 if (strEQ(d,"while"))
797 if (strEQ(d,"write"))
799 else if (strEQ(d,"wait"))
819 scanpat(register char *s)
827 fatal("Search pattern not found:\n%s",str_get(linestr));
831 for (; *s; s++,d++) {
835 else if (s[1] == '\\')
837 else if (s[1] == '[')
840 else if (*s == '[') {
843 if (*s == '\\' && s[1])
845 if (*s == '/' || (*s == '-' && s[1] == ']'))
848 } while (*s && *s != ']');
857 fatal("Search pattern not terminated:\n%s",str_get(linestr));
859 yylval = string(tokenbuf,0);
866 fprintf(stderr,"%s in file %s at line %d\n",
871 scannum(register char *s)
876 case '1': case '2': case '3': case '4': case '5':
877 case '6': case '7': case '8': case '9': case '0' : case '.':
879 while (isDIGIT(*s)) {
885 while (isDIGIT(*s)) {
892 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
894 if (*s == '+' || *s == '-')
900 yylval = string(tokenbuf,0);
907 string(char *ptr, int len)
911 ops[mop++].ival = OSTRING + (1<<8);
914 ops[mop].cval = (char *) safemalloc(len+1);
915 strncpy(ops[mop].cval,ptr,len);
916 ops[mop++].cval[len] = '\0';
918 fatal("Recompile a2p with larger OPSMAX\n");
928 fatal("type > 255 (%d)\n",type);
929 ops[mop++].ival = type;
931 fatal("Recompile a2p with larger OPSMAX\n");
936 oper1(int type, int arg1)
941 fatal("type > 255 (%d)\n",type);
942 ops[mop++].ival = type + (1<<8);
943 ops[mop++].ival = arg1;
945 fatal("Recompile a2p with larger OPSMAX\n");
950 oper2(int type, int arg1, int arg2)
955 fatal("type > 255 (%d)\n",type);
956 ops[mop++].ival = type + (2<<8);
957 ops[mop++].ival = arg1;
958 ops[mop++].ival = arg2;
960 fatal("Recompile a2p with larger OPSMAX\n");
965 oper3(int type, int arg1, int arg2, int arg3)
970 fatal("type > 255 (%d)\n",type);
971 ops[mop++].ival = type + (3<<8);
972 ops[mop++].ival = arg1;
973 ops[mop++].ival = arg2;
974 ops[mop++].ival = arg3;
976 fatal("Recompile a2p with larger OPSMAX\n");
981 oper4(int type, int arg1, int arg2, int arg3, int arg4)
986 fatal("type > 255 (%d)\n",type);
987 ops[mop++].ival = type + (4<<8);
988 ops[mop++].ival = arg1;
989 ops[mop++].ival = arg2;
990 ops[mop++].ival = arg3;
991 ops[mop++].ival = arg4;
993 fatal("Recompile a2p with larger OPSMAX\n");
998 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1003 fatal("type > 255 (%d)\n",type);
1004 ops[mop++].ival = type + (5<<8);
1005 ops[mop++].ival = arg1;
1006 ops[mop++].ival = arg2;
1007 ops[mop++].ival = arg3;
1008 ops[mop++].ival = arg4;
1009 ops[mop++].ival = arg5;
1011 fatal("Recompile a2p with larger OPSMAX\n");
1024 type = ops[branch].ival;
1027 for (i=depth; i; i--)
1029 if (type == OSTRING) {
1030 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1033 printf("(%-5d%s %d\n",branch,opname[type],len);
1035 for (i=1; i<=len; i++)
1036 dump(ops[branch+i].ival);
1038 for (i=depth; i; i--)
1045 bl(int arg, int maybe)
1049 else if ((ops[arg].ival & 255) != OBLOCK)
1050 return oper2(OBLOCK,arg,maybe);
1051 else if ((ops[arg].ival >> 8) < 2)
1052 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1063 for (s = str->str_ptr; *s; s++) {
1064 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1068 else if (*s == '\n') {
1069 for (t = s+1; isSPACE(*t & 127); t++) ;
1071 while (isSPACE(*t & 127) && *t != '\n') t--;
1072 if (*t == '\n' && t-s > 1) {
1085 register char *d, *s, *t, *e;
1086 register int pos, newpos;
1090 for (s = str->str_ptr; *s; s++) {
1099 else if (*s == '\t')
1101 if (pos > 78) { /* split a long line? */
1104 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1111 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1115 while (d > tokenbuf &&
1116 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1121 while (d > tokenbuf &&
1122 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1127 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1132 while (d > tokenbuf && *d != ' ')
1142 if (d[-1] != ';' && !(newpos % 4)) {
1148 newpos += strlen(t);
1163 for (t = tokenbuf; *t; t++) {
1167 strcpy(t+strlen(t)-1, "\t#???\n");
1173 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1175 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1178 fputs(tokenbuf,stdout);
1187 key = walk(0,0,arg,&dummy,P_MIN);
1189 hstore(symtab,key->str_ptr,str_make("1"));
1191 set_array_base = TRUE;
1196 rememberargs(int arg)
1203 type = ops[arg].ival & 255;
1204 if (type == OCOMMA) {
1205 rememberargs(ops[arg+1].ival);
1206 rememberargs(ops[arg+3].ival);
1208 else if (type == OVAR) {
1210 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1213 fatal("panic: unknown argument type %d, line %d\n",type,line);
1220 int type = ops[arg].ival & 255;
1223 if (type != OSTRING)
1224 fatal("panic: aryrefarg %d, line %d\n",type,line);
1225 str = hfetch(curarghash,ops[arg+1].cval);
1232 fixfargs(int name, int arg, int prevargs)
1240 type = ops[arg].ival & 255;
1241 if (type == OCOMMA) {
1242 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1243 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1245 else if (type == OVAR) {
1246 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1247 if (strEQ(str_get(str),"*")) {
1250 str_set(str,""); /* in case another routine has this */
1251 ops[arg].ival &= ~255;
1252 ops[arg].ival |= OSTAR;
1253 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1254 fprintf(stderr,"Adding %s\n",tmpbuf);
1257 hstore(curarghash,tmpbuf,str);
1259 numargs = prevargs + 1;
1262 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1263 type,prevargs+1,line);
1268 fixrargs(char *name, int arg, int prevargs)
1276 type = ops[arg].ival & 255;
1277 if (type == OCOMMA) {
1278 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1279 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1282 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1283 sprintf(tmpbuf,"%s:%d",name,prevargs);
1284 str = hfetch(curarghash,tmpbuf);
1286 if (str && strEQ(str->str_ptr,"*")) {
1287 if (type == OVAR || type == OSTAR) {
1288 ops[arg].ival &= ~255;
1289 ops[arg].ival |= OSTAR;
1292 fatal("Can't pass expression by reference as arg %d of %s\n",
1295 numargs = prevargs + 1;