1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
16 #include "../netware/clibstuf.h"
18 #include "../patchlevel.h"
28 int oper1(int type, int arg1);
29 int oper2(int type, int arg1, int arg2);
30 int oper3(int type, int arg1, int arg2, int arg3);
31 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
32 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
33 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
35 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
36 static void usage(void);
41 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
42 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
43 printf("\n -D<number> sets debugging flags."
44 "\n -F<character> the awk script to translate is always invoked with"
46 "\n -n<fieldlist> specifies the names of the input fields if input does"
47 "\n not have to be split into an array."
48 "\n -<number> causes a2p to assume that input will always have that"
55 main(register int argc, register char **argv, register char **env)
62 fnInitGpfGlobals(); // For importing the CLIB calls in place of Watcom calls
66 linestr = str_new(80);
67 str = str_new(0); /* first used for -I flags */
68 for (argc--,argv++; argc; argc--,argv++) {
69 if (argv[0][0] != '-' || !argv[0][1])
74 debug = atoi(argv[0]+2);
76 yydebug = (debug & 1);
80 case '0': case '1': case '2': case '3': case '4':
81 case '5': case '6': case '7': case '8': case '9':
82 maxfld = atoi(argv[0]+1);
89 namelist = savestr(argv[0]+2);
100 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
101 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
104 fatal("Unrecognized switch: %s\n",argv[0]);
112 if (argv[0] == Nullch) {
113 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
114 if ( isatty(fileno(stdin)) )
119 filename = savestr(argv[0]);
121 filename = savestr(argv[0]);
122 if (strEQ(filename,"-"))
127 rsfp = fopen(argv[0],"r");
129 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
133 bufptr = str_get(linestr);
137 /* now parse the report spec */
140 fatal("Translation aborted due to syntax errors.\n");
150 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
152 printf("\t\"%s\"\n",ops[i].cval),i++;
155 printf("\t%d",ops[i].ival),i++;
165 /* first pass to look for numeric variables */
167 prewalk(0,0,root,&i);
169 /* second pass to produce new program */
171 tmpstr = walk(0,0,root,&i,P_MIN);
172 str = str_make(STARTPERL);
173 str_cat(str, "\neval 'exec ");
175 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
176 if $running_under_some_shell;\n\
177 # this emulates #! processing on NIH machines.\n\
178 # (remove #! line above if indigestible)\n\n");
180 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
182 " # process any FOO=bar switches\n\n");
183 if (do_opens && opens) {
188 str_scat(str,tmpstr);
197 "Please check my work on the %d line%s I've marked with \"#???\".\n",
198 checkers, checkers == 1 ? "" : "s" );
200 "The operation I've selected may be wrong for the operand types.\n");
203 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
207 #define RETURN(retval) return (bufptr = s,retval)
208 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
209 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
210 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
217 register char *s = bufptr;
225 fprintf(stderr,"Tokener at %s",s);
227 fprintf(stderr,"Tokener at %s\n",s);
233 "Unrecognized character %c in file %s line %d--ignoring.\n",
238 if (*s && *s != '\n') {
239 yyerror("Ignoring spurious backslash");
244 s = str_get(linestr);
249 if ((s = str_gets(linestr, rsfp)) == Nullch) {
253 s = str_get(linestr);
264 yylval = string(s,0);
293 for (d = s + 1; isSPACE(*d); d++) ;
303 yylval = string("~",1);
321 yylval = string("**=",3);
323 yylval = string(s-1,2);
341 while (*s == ' ' || *s == '\t')
343 if (strnEQ(s,"getline",7))
351 yylval = string("==",2);
355 yylval = string("=",1);
361 yylval = string("!=",2);
365 yylval = string("!~",2);
374 yylval = string("<=",2);
383 yylval = string(">>",2);
387 yylval = string(">=",2);
395 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
415 for (d = s; isDIGIT(*s); s++) ;
416 yylval = string(d,s-d);
422 split_to_array = set_array_base = TRUE;
425 case '/': /* may either be division or pattern */
432 yylval = string("/=",2);
438 case '0': case '1': case '2': case '3': case '4':
439 case '5': case '6': case '7': case '8': case '9': case '.':
444 s = cpy2(tokenbuf,s,s[-1]);
446 fatal("String not terminated:\n%s",str_get(linestr));
448 yylval = string(tokenbuf,0);
454 set_array_base = TRUE;
455 if (strEQ(d,"ARGV")) {
456 yylval=numary(string("ARGV",0));
459 if (strEQ(d,"atan2")) {
466 if (strEQ(d,"break"))
468 if (strEQ(d,"BEGIN"))
473 if (strEQ(d,"continue"))
475 if (strEQ(d,"cos")) {
479 if (strEQ(d,"close")) {
484 if (strEQ(d,"chdir"))
486 else if (strEQ(d,"crypt"))
488 else if (strEQ(d,"chop"))
490 else if (strEQ(d,"chmod"))
492 else if (strEQ(d,"chown"))
499 if (strEQ(d,"delete"))
510 if (strEQ(d,"exit")) {
514 if (strEQ(d,"exp")) {
518 if (strEQ(d,"elsif"))
520 else if (strEQ(d,"eq"))
522 else if (strEQ(d,"eval"))
524 else if (strEQ(d,"eof"))
526 else if (strEQ(d,"each"))
528 else if (strEQ(d,"exec"))
535 if (saw_FS == 1 && in_begin) {
536 for (d = s; *d && isSPACE(*d); d++) ;
538 for (d++; *d && isSPACE(*d); d++) ;
539 if (*d == '"' && d[2] == '"')
547 else if (strEQ(d,"function"))
549 if (strEQ(d,"FILENAME"))
551 if (strEQ(d,"foreach"))
553 else if (strEQ(d,"format"))
555 else if (strEQ(d,"fork"))
557 else if (strEQ(d,"fh"))
562 if (strEQ(d,"getline"))
568 else if (strEQ(d,"gt"))
570 else if (strEQ(d,"goto"))
572 else if (strEQ(d,"gmtime"))
586 if (strEQ(d,"index")) {
587 set_array_base = TRUE;
590 if (strEQ(d,"int")) {
604 else if (strEQ(d,"kill"))
609 if (strEQ(d,"length")) {
613 if (strEQ(d,"log")) {
619 else if (strEQ(d,"local"))
621 else if (strEQ(d,"lt"))
623 else if (strEQ(d,"le"))
625 else if (strEQ(d,"locatime"))
627 else if (strEQ(d,"link"))
632 if (strEQ(d,"match")) {
633 set_array_base = TRUE;
642 do_chop = do_split = split_to_array = set_array_base = TRUE;
643 if (strEQ(d,"next")) {
652 if (strEQ(d,"ORS")) {
656 if (strEQ(d,"OFS")) {
660 if (strEQ(d,"OFMT")) {
665 else if (strEQ(d,"ord"))
667 else if (strEQ(d,"oct"))
672 if (strEQ(d,"print")) {
675 if (strEQ(d,"printf")) {
680 else if (strEQ(d,"pop"))
692 if (strEQ(d,"rand")) {
696 if (strEQ(d,"return"))
698 if (strEQ(d,"reset"))
700 else if (strEQ(d,"redo"))
702 else if (strEQ(d,"rename"))
707 if (strEQ(d,"split")) {
708 set_array_base = TRUE;
711 if (strEQ(d,"substr")) {
712 set_array_base = TRUE;
717 if (strEQ(d,"sprintf")) {
718 /* In old awk, { print sprintf("str%sg"),"in" } prints
719 * "string"; in new awk, "in" is not considered an argument to
720 * sprintf, so the statement breaks. To support both, the
721 * grammar treats arguments to SPRINTF_OLD like old awk,
722 * SPRINTF_NEW like new. Here we return the appropriate one.
724 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
726 if (strEQ(d,"sqrt")) {
730 if (strEQ(d,"SUBSEP")) {
733 if (strEQ(d,"sin")) {
737 if (strEQ(d,"srand")) {
741 if (strEQ(d,"system")) {
747 else if (strEQ(d,"shift"))
749 else if (strEQ(d,"select"))
751 else if (strEQ(d,"seek"))
753 else if (strEQ(d,"stat"))
755 else if (strEQ(d,"study"))
757 else if (strEQ(d,"sleep"))
759 else if (strEQ(d,"symlink"))
761 else if (strEQ(d,"sort"))
768 else if (strEQ(d,"tell"))
770 else if (strEQ(d,"time"))
772 else if (strEQ(d,"times"))
777 if (strEQ(d,"until"))
779 else if (strEQ(d,"unless"))
781 else if (strEQ(d,"umask"))
783 else if (strEQ(d,"unshift"))
785 else if (strEQ(d,"unlink"))
787 else if (strEQ(d,"utime"))
792 if (strEQ(d,"values"))
797 if (strEQ(d,"while"))
799 if (strEQ(d,"write"))
801 else if (strEQ(d,"wait"))
821 scanpat(register char *s)
829 fatal("Search pattern not found:\n%s",str_get(linestr));
833 for (; *s; s++,d++) {
837 else if (s[1] == '\\')
839 else if (s[1] == '[')
842 else if (*s == '[') {
845 if (*s == '\\' && s[1])
847 if (*s == '/' || (*s == '-' && s[1] == ']'))
850 } while (*s && *s != ']');
859 fatal("Search pattern not terminated:\n%s",str_get(linestr));
861 yylval = string(tokenbuf,0);
868 fprintf(stderr,"%s in file %s at line %d\n",
873 scannum(register char *s)
878 case '1': case '2': case '3': case '4': case '5':
879 case '6': case '7': case '8': case '9': case '0' : case '.':
881 while (isDIGIT(*s)) {
887 while (isDIGIT(*s)) {
894 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
896 if (*s == '+' || *s == '-')
902 yylval = string(tokenbuf,0);
909 string(char *ptr, int len)
913 ops[mop++].ival = OSTRING + (1<<8);
916 ops[mop].cval = (char *) safemalloc(len+1);
917 strncpy(ops[mop].cval,ptr,len);
918 ops[mop++].cval[len] = '\0';
920 fatal("Recompile a2p with larger OPSMAX\n");
930 fatal("type > 255 (%d)\n",type);
931 ops[mop++].ival = type;
933 fatal("Recompile a2p with larger OPSMAX\n");
938 oper1(int type, int arg1)
943 fatal("type > 255 (%d)\n",type);
944 ops[mop++].ival = type + (1<<8);
945 ops[mop++].ival = arg1;
947 fatal("Recompile a2p with larger OPSMAX\n");
952 oper2(int type, int arg1, int arg2)
957 fatal("type > 255 (%d)\n",type);
958 ops[mop++].ival = type + (2<<8);
959 ops[mop++].ival = arg1;
960 ops[mop++].ival = arg2;
962 fatal("Recompile a2p with larger OPSMAX\n");
967 oper3(int type, int arg1, int arg2, int arg3)
972 fatal("type > 255 (%d)\n",type);
973 ops[mop++].ival = type + (3<<8);
974 ops[mop++].ival = arg1;
975 ops[mop++].ival = arg2;
976 ops[mop++].ival = arg3;
978 fatal("Recompile a2p with larger OPSMAX\n");
983 oper4(int type, int arg1, int arg2, int arg3, int arg4)
988 fatal("type > 255 (%d)\n",type);
989 ops[mop++].ival = type + (4<<8);
990 ops[mop++].ival = arg1;
991 ops[mop++].ival = arg2;
992 ops[mop++].ival = arg3;
993 ops[mop++].ival = arg4;
995 fatal("Recompile a2p with larger OPSMAX\n");
1000 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1005 fatal("type > 255 (%d)\n",type);
1006 ops[mop++].ival = type + (5<<8);
1007 ops[mop++].ival = arg1;
1008 ops[mop++].ival = arg2;
1009 ops[mop++].ival = arg3;
1010 ops[mop++].ival = arg4;
1011 ops[mop++].ival = arg5;
1013 fatal("Recompile a2p with larger OPSMAX\n");
1026 type = ops[branch].ival;
1029 for (i=depth; i; i--)
1031 if (type == OSTRING) {
1032 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1035 printf("(%-5d%s %d\n",branch,opname[type],len);
1037 for (i=1; i<=len; i++)
1038 dump(ops[branch+i].ival);
1040 for (i=depth; i; i--)
1047 bl(int arg, int maybe)
1051 else if ((ops[arg].ival & 255) != OBLOCK)
1052 return oper2(OBLOCK,arg,maybe);
1053 else if ((ops[arg].ival >> 8) < 2)
1054 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1065 for (s = str->str_ptr; *s; s++) {
1066 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1070 else if (*s == '\n') {
1071 for (t = s+1; isSPACE(*t & 127); t++) ;
1073 while (isSPACE(*t & 127) && *t != '\n') t--;
1074 if (*t == '\n' && t-s > 1) {
1087 register char *d, *s, *t, *e;
1088 register int pos, newpos;
1092 for (s = str->str_ptr; *s; s++) {
1101 else if (*s == '\t')
1103 if (pos > 78) { /* split a long line? */
1106 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1113 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1117 while (d > tokenbuf &&
1118 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1123 while (d > tokenbuf &&
1124 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1129 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1134 while (d > tokenbuf && *d != ' ')
1144 if (d[-1] != ';' && !(newpos % 4)) {
1150 newpos += strlen(t);
1165 for (t = tokenbuf; *t; t++) {
1169 strcpy(t+strlen(t)-1, "\t#???\n");
1175 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1177 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1180 fputs(tokenbuf,stdout);
1189 key = walk(0,0,arg,&dummy,P_MIN);
1191 hstore(symtab,key->str_ptr,str_make("1"));
1193 set_array_base = TRUE;
1198 rememberargs(int arg)
1205 type = ops[arg].ival & 255;
1206 if (type == OCOMMA) {
1207 rememberargs(ops[arg+1].ival);
1208 rememberargs(ops[arg+3].ival);
1210 else if (type == OVAR) {
1212 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1215 fatal("panic: unknown argument type %d, line %d\n",type,line);
1222 int type = ops[arg].ival & 255;
1225 if (type != OSTRING)
1226 fatal("panic: aryrefarg %d, line %d\n",type,line);
1227 str = hfetch(curarghash,ops[arg+1].cval);
1234 fixfargs(int name, int arg, int prevargs)
1242 type = ops[arg].ival & 255;
1243 if (type == OCOMMA) {
1244 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1245 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1247 else if (type == OVAR) {
1248 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1249 if (strEQ(str_get(str),"*")) {
1252 str_set(str,""); /* in case another routine has this */
1253 ops[arg].ival &= ~255;
1254 ops[arg].ival |= OSTAR;
1255 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1256 fprintf(stderr,"Adding %s\n",tmpbuf);
1259 hstore(curarghash,tmpbuf,str);
1261 numargs = prevargs + 1;
1264 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1265 type,prevargs+1,line);
1270 fixrargs(char *name, int arg, int prevargs)
1278 type = ops[arg].ival & 255;
1279 if (type == OCOMMA) {
1280 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1281 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1284 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1285 sprintf(tmpbuf,"%s:%d",name,prevargs);
1286 str = hfetch(curarghash,tmpbuf);
1288 if (str && strEQ(str->str_ptr,"*")) {
1289 if (type == OVAR || type == OSTAR) {
1290 ops[arg].ival &= ~255;
1291 ops[arg].ival |= OSTAR;
1294 fatal("Can't pass expression by reference as arg %d of %s\n",
1297 numargs = prevargs + 1;