1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
16 #include "../netware/clibstuf.h"
18 #include "../patchlevel.h"
28 int oper1(int type, int arg1);
29 int oper2(int type, int arg1, int arg2);
30 int oper3(int type, int arg1, int arg2, int arg3);
31 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
32 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
33 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
35 char *savestr(char *str);
36 char *cpy2(register char *to, register char *from, register int delim);
39 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
40 static void usage(void);
45 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
46 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
47 printf("\n -D<number> sets debugging flags."
48 "\n -F<character> the awk script to translate is always invoked with"
50 "\n -n<fieldlist> specifies the names of the input fields if input does"
51 "\n not have to be split into an array."
52 "\n -<number> causes a2p to assume that input will always have that"
59 main(register int argc, register char **argv, register char **env)
67 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
71 linestr = str_new(80);
72 str = str_new(0); /* first used for -I flags */
73 for (argc--,argv++; argc; argc--,argv++) {
74 if (argv[0][0] != '-' || !argv[0][1])
79 debug = atoi(argv[0]+2);
81 yydebug = (debug & 1);
85 case '0': case '1': case '2': case '3': case '4':
86 case '5': case '6': case '7': case '8': case '9':
87 maxfld = atoi(argv[0]+1);
94 namelist = savestr(argv[0]+2);
105 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
106 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
109 fatal("Unrecognized switch: %s\n",argv[0]);
117 if (argv[0] == Nullch) {
118 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
119 if ( isatty(fileno(stdin)) )
124 filename = savestr(argv[0]);
126 filename = savestr(argv[0]);
127 if (strEQ(filename,"-"))
132 rsfp = fopen(argv[0],"r");
134 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
138 bufptr = str_get(linestr);
142 /* now parse the report spec */
145 fatal("Translation aborted due to syntax errors.\n");
155 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
157 printf("\t\"%s\"\n",ops[i].cval),i++;
160 printf("\t%d",ops[i].ival),i++;
170 /* first pass to look for numeric variables */
172 prewalk(0,0,root,&i);
174 /* second pass to produce new program */
176 tmpstr = walk(0,0,root,&i,P_MIN);
177 str = str_make(STARTPERL);
178 str_cat(str, "\neval 'exec ");
180 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
181 if $running_under_some_shell;\n\
182 # this emulates #! processing on NIH machines.\n\
183 # (remove #! line above if indigestible)\n\n");
185 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
187 " # process any FOO=bar switches\n\n");
188 if (do_opens && opens) {
193 str_scat(str,tmpstr);
202 "Please check my work on the %d line%s I've marked with \"#???\".\n",
203 checkers, checkers == 1 ? "" : "s" );
205 "The operation I've selected may be wrong for the operand types.\n");
208 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
212 #define RETURN(retval) return (bufptr = s,retval)
213 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
214 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
215 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
222 register char *s = bufptr;
230 fprintf(stderr,"Tokener at %s",s);
232 fprintf(stderr,"Tokener at %s\n",s);
238 "Unrecognized character %c in file %s line %d--ignoring.\n",
243 if (*s && *s != '\n') {
244 yyerror("Ignoring spurious backslash");
249 s = str_get(linestr);
254 if ((s = str_gets(linestr, rsfp)) == Nullch) {
258 s = str_get(linestr);
269 yylval = string(s,0);
298 for (d = s + 1; isSPACE(*d); d++) ;
308 yylval = string("~",1);
326 yylval = string("**=",3);
328 yylval = string(s-1,2);
346 while (*s == ' ' || *s == '\t')
348 if (strnEQ(s,"getline",7))
356 yylval = string("==",2);
360 yylval = string("=",1);
366 yylval = string("!=",2);
370 yylval = string("!~",2);
379 yylval = string("<=",2);
388 yylval = string(">>",2);
392 yylval = string(">=",2);
400 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
420 for (d = s; isDIGIT(*s); s++) ;
421 yylval = string(d,s-d);
427 split_to_array = set_array_base = TRUE;
430 case '/': /* may either be division or pattern */
437 yylval = string("/=",2);
443 case '0': case '1': case '2': case '3': case '4':
444 case '5': case '6': case '7': case '8': case '9': case '.':
449 s = cpy2(tokenbuf,s,s[-1]);
451 fatal("String not terminated:\n%s",str_get(linestr));
453 yylval = string(tokenbuf,0);
459 set_array_base = TRUE;
460 if (strEQ(d,"ARGV")) {
461 yylval=numary(string("ARGV",0));
464 if (strEQ(d,"atan2")) {
471 if (strEQ(d,"break"))
473 if (strEQ(d,"BEGIN"))
478 if (strEQ(d,"continue"))
480 if (strEQ(d,"cos")) {
484 if (strEQ(d,"close")) {
489 if (strEQ(d,"chdir"))
491 else if (strEQ(d,"crypt"))
493 else if (strEQ(d,"chop"))
495 else if (strEQ(d,"chmod"))
497 else if (strEQ(d,"chown"))
504 if (strEQ(d,"delete"))
515 if (strEQ(d,"exit")) {
519 if (strEQ(d,"exp")) {
523 if (strEQ(d,"elsif"))
525 else if (strEQ(d,"eq"))
527 else if (strEQ(d,"eval"))
529 else if (strEQ(d,"eof"))
531 else if (strEQ(d,"each"))
533 else if (strEQ(d,"exec"))
540 if (saw_FS == 1 && in_begin) {
541 for (d = s; *d && isSPACE(*d); d++) ;
543 for (d++; *d && isSPACE(*d); d++) ;
544 if (*d == '"' && d[2] == '"')
552 else if (strEQ(d,"function"))
554 if (strEQ(d,"FILENAME"))
556 if (strEQ(d,"foreach"))
558 else if (strEQ(d,"format"))
560 else if (strEQ(d,"fork"))
562 else if (strEQ(d,"fh"))
567 if (strEQ(d,"getline"))
573 else if (strEQ(d,"gt"))
575 else if (strEQ(d,"goto"))
577 else if (strEQ(d,"gmtime"))
591 if (strEQ(d,"index")) {
592 set_array_base = TRUE;
595 if (strEQ(d,"int")) {
609 else if (strEQ(d,"kill"))
614 if (strEQ(d,"length")) {
618 if (strEQ(d,"log")) {
624 else if (strEQ(d,"local"))
626 else if (strEQ(d,"lt"))
628 else if (strEQ(d,"le"))
630 else if (strEQ(d,"locatime"))
632 else if (strEQ(d,"link"))
637 if (strEQ(d,"match")) {
638 set_array_base = TRUE;
647 do_chop = do_split = split_to_array = set_array_base = TRUE;
648 if (strEQ(d,"next")) {
657 if (strEQ(d,"ORS")) {
661 if (strEQ(d,"OFS")) {
665 if (strEQ(d,"OFMT")) {
670 else if (strEQ(d,"ord"))
672 else if (strEQ(d,"oct"))
677 if (strEQ(d,"print")) {
680 if (strEQ(d,"printf")) {
685 else if (strEQ(d,"pop"))
697 if (strEQ(d,"rand")) {
701 if (strEQ(d,"return"))
703 if (strEQ(d,"reset"))
705 else if (strEQ(d,"redo"))
707 else if (strEQ(d,"rename"))
712 if (strEQ(d,"split")) {
713 set_array_base = TRUE;
716 if (strEQ(d,"substr")) {
717 set_array_base = TRUE;
722 if (strEQ(d,"sprintf")) {
723 /* In old awk, { print sprintf("str%sg"),"in" } prints
724 * "string"; in new awk, "in" is not considered an argument to
725 * sprintf, so the statement breaks. To support both, the
726 * grammar treats arguments to SPRINTF_OLD like old awk,
727 * SPRINTF_NEW like new. Here we return the appropriate one.
729 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
731 if (strEQ(d,"sqrt")) {
735 if (strEQ(d,"SUBSEP")) {
738 if (strEQ(d,"sin")) {
742 if (strEQ(d,"srand")) {
746 if (strEQ(d,"system")) {
752 else if (strEQ(d,"shift"))
754 else if (strEQ(d,"select"))
756 else if (strEQ(d,"seek"))
758 else if (strEQ(d,"stat"))
760 else if (strEQ(d,"study"))
762 else if (strEQ(d,"sleep"))
764 else if (strEQ(d,"symlink"))
766 else if (strEQ(d,"sort"))
773 else if (strEQ(d,"tell"))
775 else if (strEQ(d,"time"))
777 else if (strEQ(d,"times"))
782 if (strEQ(d,"until"))
784 else if (strEQ(d,"unless"))
786 else if (strEQ(d,"umask"))
788 else if (strEQ(d,"unshift"))
790 else if (strEQ(d,"unlink"))
792 else if (strEQ(d,"utime"))
797 if (strEQ(d,"values"))
802 if (strEQ(d,"while"))
804 if (strEQ(d,"write"))
806 else if (strEQ(d,"wait"))
826 scanpat(register char *s)
834 fatal("Search pattern not found:\n%s",str_get(linestr));
838 for (; *s; s++,d++) {
842 else if (s[1] == '\\')
844 else if (s[1] == '[')
847 else if (*s == '[') {
850 if (*s == '\\' && s[1])
852 if (*s == '/' || (*s == '-' && s[1] == ']'))
855 } while (*s && *s != ']');
864 fatal("Search pattern not terminated:\n%s",str_get(linestr));
866 yylval = string(tokenbuf,0);
873 fprintf(stderr,"%s in file %s at line %d\n",
878 scannum(register char *s)
883 case '1': case '2': case '3': case '4': case '5':
884 case '6': case '7': case '8': case '9': case '0' : case '.':
886 while (isDIGIT(*s)) {
892 while (isDIGIT(*s)) {
899 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
901 if (*s == '+' || *s == '-')
907 yylval = string(tokenbuf,0);
914 string(char *ptr, int len)
918 ops[mop++].ival = OSTRING + (1<<8);
921 ops[mop].cval = (char *) safemalloc(len+1);
922 strncpy(ops[mop].cval,ptr,len);
923 ops[mop++].cval[len] = '\0';
925 fatal("Recompile a2p with larger OPSMAX\n");
935 fatal("type > 255 (%d)\n",type);
936 ops[mop++].ival = type;
938 fatal("Recompile a2p with larger OPSMAX\n");
943 oper1(int type, int arg1)
948 fatal("type > 255 (%d)\n",type);
949 ops[mop++].ival = type + (1<<8);
950 ops[mop++].ival = arg1;
952 fatal("Recompile a2p with larger OPSMAX\n");
957 oper2(int type, int arg1, int arg2)
962 fatal("type > 255 (%d)\n",type);
963 ops[mop++].ival = type + (2<<8);
964 ops[mop++].ival = arg1;
965 ops[mop++].ival = arg2;
967 fatal("Recompile a2p with larger OPSMAX\n");
972 oper3(int type, int arg1, int arg2, int arg3)
977 fatal("type > 255 (%d)\n",type);
978 ops[mop++].ival = type + (3<<8);
979 ops[mop++].ival = arg1;
980 ops[mop++].ival = arg2;
981 ops[mop++].ival = arg3;
983 fatal("Recompile a2p with larger OPSMAX\n");
988 oper4(int type, int arg1, int arg2, int arg3, int arg4)
993 fatal("type > 255 (%d)\n",type);
994 ops[mop++].ival = type + (4<<8);
995 ops[mop++].ival = arg1;
996 ops[mop++].ival = arg2;
997 ops[mop++].ival = arg3;
998 ops[mop++].ival = arg4;
1000 fatal("Recompile a2p with larger OPSMAX\n");
1005 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1010 fatal("type > 255 (%d)\n",type);
1011 ops[mop++].ival = type + (5<<8);
1012 ops[mop++].ival = arg1;
1013 ops[mop++].ival = arg2;
1014 ops[mop++].ival = arg3;
1015 ops[mop++].ival = arg4;
1016 ops[mop++].ival = arg5;
1018 fatal("Recompile a2p with larger OPSMAX\n");
1031 type = ops[branch].ival;
1034 for (i=depth; i; i--)
1036 if (type == OSTRING) {
1037 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1040 printf("(%-5d%s %d\n",branch,opname[type],len);
1042 for (i=1; i<=len; i++)
1043 dump(ops[branch+i].ival);
1045 for (i=depth; i; i--)
1052 bl(int arg, int maybe)
1056 else if ((ops[arg].ival & 255) != OBLOCK)
1057 return oper2(OBLOCK,arg,maybe);
1058 else if ((ops[arg].ival >> 8) < 2)
1059 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1070 for (s = str->str_ptr; *s; s++) {
1071 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1075 else if (*s == '\n') {
1076 for (t = s+1; isSPACE(*t & 127); t++) ;
1078 while (isSPACE(*t & 127) && *t != '\n') t--;
1079 if (*t == '\n' && t-s > 1) {
1092 register char *d, *s, *t, *e;
1093 register int pos, newpos;
1097 for (s = str->str_ptr; *s; s++) {
1106 else if (*s == '\t')
1108 if (pos > 78) { /* split a long line? */
1111 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1118 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1122 while (d > tokenbuf &&
1123 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1128 while (d > tokenbuf &&
1129 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1134 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1139 while (d > tokenbuf && *d != ' ')
1149 if (d[-1] != ';' && !(newpos % 4)) {
1155 newpos += strlen(t);
1170 for (t = tokenbuf; *t; t++) {
1174 strcpy(t+strlen(t)-1, "\t#???\n");
1180 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1182 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1185 fputs(tokenbuf,stdout);
1194 key = walk(0,0,arg,&dummy,P_MIN);
1196 hstore(symtab,key->str_ptr,str_make("1"));
1198 set_array_base = TRUE;
1203 rememberargs(int arg)
1210 type = ops[arg].ival & 255;
1211 if (type == OCOMMA) {
1212 rememberargs(ops[arg+1].ival);
1213 rememberargs(ops[arg+3].ival);
1215 else if (type == OVAR) {
1217 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1220 fatal("panic: unknown argument type %d, line %d\n",type,line);
1227 int type = ops[arg].ival & 255;
1230 if (type != OSTRING)
1231 fatal("panic: aryrefarg %d, line %d\n",type,line);
1232 str = hfetch(curarghash,ops[arg+1].cval);
1239 fixfargs(int name, int arg, int prevargs)
1247 type = ops[arg].ival & 255;
1248 if (type == OCOMMA) {
1249 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1250 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1252 else if (type == OVAR) {
1253 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1254 if (strEQ(str_get(str),"*")) {
1257 str_set(str,""); /* in case another routine has this */
1258 ops[arg].ival &= ~255;
1259 ops[arg].ival |= OSTAR;
1260 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1261 fprintf(stderr,"Adding %s\n",tmpbuf);
1264 hstore(curarghash,tmpbuf,str);
1266 numargs = prevargs + 1;
1269 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1270 type,prevargs+1,line);
1275 fixrargs(char *name, int arg, int prevargs)
1283 type = ops[arg].ival & 255;
1284 if (type == OCOMMA) {
1285 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1286 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1289 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1290 sprintf(tmpbuf,"%s:%d",name,prevargs);
1291 str = hfetch(curarghash,tmpbuf);
1293 if (str && strEQ(str->str_ptr,"*")) {
1294 if (type == OVAR || type == OSTAR) {
1295 ops[arg].ival &= ~255;
1296 ops[arg].ival |= OSTAR;
1299 fatal("Can't pass expression by reference as arg %d of %s\n",
1302 numargs = prevargs + 1;