1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
17 #include "../netware/clibstuf.h"
19 #include "../patchlevel.h"
29 int oper1(int type, int arg1);
30 int oper2(int type, int arg1, int arg2);
31 int oper3(int type, int arg1, int arg2, int arg3);
32 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
33 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
34 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
36 char *savestr(char *str);
37 char *cpy2(register char *to, register char *from, register int delim);
40 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
41 static void usage(void);
46 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
47 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
48 printf("\n -D<number> sets debugging flags."
49 "\n -F<character> the awk script to translate is always invoked with"
51 "\n -n<fieldlist> specifies the names of the input fields if input does"
52 "\n not have to be split into an array."
53 "\n -<number> causes a2p to assume that input will always have that"
60 #pragma message disable (mainparm) /* We have the envp in main(). */
64 main(register int argc, register char **argv, register char **env)
72 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
76 linestr = str_new(80);
77 str = str_new(0); /* first used for -I flags */
78 for (argc--,argv++; argc; argc--,argv++) {
79 if (argv[0][0] != '-' || !argv[0][1])
84 debug = atoi(argv[0]+2);
86 yydebug = (debug & 1);
90 case '0': case '1': case '2': case '3': case '4':
91 case '5': case '6': case '7': case '8': case '9':
92 maxfld = atoi(argv[0]+1);
99 namelist = savestr(argv[0]+2);
110 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
111 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
114 fatal("Unrecognized switch: %s\n",argv[0]);
122 if (argv[0] == Nullch) {
123 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
124 if ( isatty(fileno(stdin)) )
129 filename = savestr(argv[0]);
131 filename = savestr(argv[0]);
132 if (strEQ(filename,"-"))
137 rsfp = fopen(argv[0],"r");
139 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
143 bufptr = str_get(linestr);
147 /* now parse the report spec */
150 fatal("Translation aborted due to syntax errors.\n");
160 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
162 printf("\t\"%s\"\n",ops[i].cval),i++;
165 printf("\t%d",ops[i].ival),i++;
175 /* first pass to look for numeric variables */
177 prewalk(0,0,root,&i);
179 /* second pass to produce new program */
181 tmpstr = walk(0,0,root,&i,P_MIN);
182 str = str_make(STARTPERL);
183 str_cat(str, "\neval 'exec ");
185 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
186 if $running_under_some_shell;\n\
187 # this emulates #! processing on NIH machines.\n\
188 # (remove #! line above if indigestible)\n\n");
190 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
192 " # process any FOO=bar switches\n\n");
193 if (do_opens && opens) {
198 str_scat(str,tmpstr);
207 "Please check my work on the %d line%s I've marked with \"#???\".\n",
208 checkers, checkers == 1 ? "" : "s" );
210 "The operation I've selected may be wrong for the operand types.\n");
213 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
217 #define RETURN(retval) return (bufptr = s,retval)
218 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
219 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
220 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
227 register char *s = bufptr;
235 fprintf(stderr,"Tokener at %s",s);
237 fprintf(stderr,"Tokener at %s\n",s);
243 "Unrecognized character %c in file %s line %d--ignoring.\n",
248 if (*s && *s != '\n') {
249 yyerror("Ignoring spurious backslash");
254 s = str_get(linestr);
259 if ((s = str_gets(linestr, rsfp)) == Nullch) {
263 s = str_get(linestr);
274 yylval = string(s,0);
303 for (d = s + 1; isSPACE(*d); d++) ;
313 yylval = string("~",1);
331 yylval = string("**=",3);
333 yylval = string(s-1,2);
351 while (*s == ' ' || *s == '\t')
353 if (strnEQ(s,"getline",7))
361 yylval = string("==",2);
365 yylval = string("=",1);
371 yylval = string("!=",2);
375 yylval = string("!~",2);
384 yylval = string("<=",2);
393 yylval = string(">>",2);
397 yylval = string(">=",2);
405 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
425 for (d = s; isDIGIT(*s); s++) ;
426 yylval = string(d,s-d);
432 split_to_array = set_array_base = TRUE;
435 case '/': /* may either be division or pattern */
442 yylval = string("/=",2);
448 case '0': case '1': case '2': case '3': case '4':
449 case '5': case '6': case '7': case '8': case '9': case '.':
454 s = cpy2(tokenbuf,s,s[-1]);
456 fatal("String not terminated:\n%s",str_get(linestr));
458 yylval = string(tokenbuf,0);
464 set_array_base = TRUE;
465 if (strEQ(d,"ARGV")) {
466 yylval=numary(string("ARGV",0));
469 if (strEQ(d,"atan2")) {
476 if (strEQ(d,"break"))
478 if (strEQ(d,"BEGIN"))
483 if (strEQ(d,"continue"))
485 if (strEQ(d,"cos")) {
489 if (strEQ(d,"close")) {
494 if (strEQ(d,"chdir"))
496 else if (strEQ(d,"crypt"))
498 else if (strEQ(d,"chop"))
500 else if (strEQ(d,"chmod"))
502 else if (strEQ(d,"chown"))
509 if (strEQ(d,"delete"))
520 if (strEQ(d,"exit")) {
524 if (strEQ(d,"exp")) {
528 if (strEQ(d,"elsif"))
530 else if (strEQ(d,"eq"))
532 else if (strEQ(d,"eval"))
534 else if (strEQ(d,"eof"))
536 else if (strEQ(d,"each"))
538 else if (strEQ(d,"exec"))
545 if (saw_FS == 1 && in_begin) {
546 for (d = s; *d && isSPACE(*d); d++) ;
548 for (d++; *d && isSPACE(*d); d++) ;
549 if (*d == '"' && d[2] == '"')
557 else if (strEQ(d,"function"))
559 if (strEQ(d,"FILENAME"))
561 if (strEQ(d,"foreach"))
563 else if (strEQ(d,"format"))
565 else if (strEQ(d,"fork"))
567 else if (strEQ(d,"fh"))
572 if (strEQ(d,"getline"))
578 else if (strEQ(d,"gt"))
580 else if (strEQ(d,"goto"))
582 else if (strEQ(d,"gmtime"))
596 if (strEQ(d,"index")) {
597 set_array_base = TRUE;
600 if (strEQ(d,"int")) {
614 else if (strEQ(d,"kill"))
619 if (strEQ(d,"length")) {
623 if (strEQ(d,"log")) {
629 else if (strEQ(d,"local"))
631 else if (strEQ(d,"lt"))
633 else if (strEQ(d,"le"))
635 else if (strEQ(d,"locatime"))
637 else if (strEQ(d,"link"))
642 if (strEQ(d,"match")) {
643 set_array_base = TRUE;
652 do_chop = do_split = split_to_array = set_array_base = TRUE;
653 if (strEQ(d,"next")) {
662 if (strEQ(d,"ORS")) {
666 if (strEQ(d,"OFS")) {
670 if (strEQ(d,"OFMT")) {
675 else if (strEQ(d,"ord"))
677 else if (strEQ(d,"oct"))
682 if (strEQ(d,"print")) {
685 if (strEQ(d,"printf")) {
690 else if (strEQ(d,"pop"))
702 if (strEQ(d,"rand")) {
706 if (strEQ(d,"return"))
708 if (strEQ(d,"reset"))
710 else if (strEQ(d,"redo"))
712 else if (strEQ(d,"rename"))
717 if (strEQ(d,"split")) {
718 set_array_base = TRUE;
721 if (strEQ(d,"substr")) {
722 set_array_base = TRUE;
727 if (strEQ(d,"sprintf")) {
728 /* In old awk, { print sprintf("str%sg"),"in" } prints
729 * "string"; in new awk, "in" is not considered an argument to
730 * sprintf, so the statement breaks. To support both, the
731 * grammar treats arguments to SPRINTF_OLD like old awk,
732 * SPRINTF_NEW like new. Here we return the appropriate one.
734 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
736 if (strEQ(d,"sqrt")) {
740 if (strEQ(d,"SUBSEP")) {
743 if (strEQ(d,"sin")) {
747 if (strEQ(d,"srand")) {
751 if (strEQ(d,"system")) {
757 else if (strEQ(d,"shift"))
759 else if (strEQ(d,"select"))
761 else if (strEQ(d,"seek"))
763 else if (strEQ(d,"stat"))
765 else if (strEQ(d,"study"))
767 else if (strEQ(d,"sleep"))
769 else if (strEQ(d,"symlink"))
771 else if (strEQ(d,"sort"))
778 else if (strEQ(d,"tell"))
780 else if (strEQ(d,"time"))
782 else if (strEQ(d,"times"))
787 if (strEQ(d,"until"))
789 else if (strEQ(d,"unless"))
791 else if (strEQ(d,"umask"))
793 else if (strEQ(d,"unshift"))
795 else if (strEQ(d,"unlink"))
797 else if (strEQ(d,"utime"))
802 if (strEQ(d,"values"))
807 if (strEQ(d,"while"))
809 if (strEQ(d,"write"))
811 else if (strEQ(d,"wait"))
831 scanpat(register char *s)
839 fatal("Search pattern not found:\n%s",str_get(linestr));
843 for (; *s; s++,d++) {
847 else if (s[1] == '\\')
849 else if (s[1] == '[')
852 else if (*s == '[') {
855 if (*s == '\\' && s[1])
857 if (*s == '/' || (*s == '-' && s[1] == ']'))
860 } while (*s && *s != ']');
869 fatal("Search pattern not terminated:\n%s",str_get(linestr));
871 yylval = string(tokenbuf,0);
878 fprintf(stderr,"%s in file %s at line %d\n",
883 scannum(register char *s)
888 case '1': case '2': case '3': case '4': case '5':
889 case '6': case '7': case '8': case '9': case '0' : case '.':
891 while (isDIGIT(*s)) {
897 while (isDIGIT(*s)) {
904 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
906 if (*s == '+' || *s == '-')
912 yylval = string(tokenbuf,0);
919 string(char *ptr, int len)
923 ops[mop++].ival = OSTRING + (1<<8);
926 ops[mop].cval = (char *) safemalloc(len+1);
927 strncpy(ops[mop].cval,ptr,len);
928 ops[mop++].cval[len] = '\0';
930 fatal("Recompile a2p with larger OPSMAX\n");
940 fatal("type > 255 (%d)\n",type);
941 ops[mop++].ival = type;
943 fatal("Recompile a2p with larger OPSMAX\n");
948 oper1(int type, int arg1)
953 fatal("type > 255 (%d)\n",type);
954 ops[mop++].ival = type + (1<<8);
955 ops[mop++].ival = arg1;
957 fatal("Recompile a2p with larger OPSMAX\n");
962 oper2(int type, int arg1, int arg2)
967 fatal("type > 255 (%d)\n",type);
968 ops[mop++].ival = type + (2<<8);
969 ops[mop++].ival = arg1;
970 ops[mop++].ival = arg2;
972 fatal("Recompile a2p with larger OPSMAX\n");
977 oper3(int type, int arg1, int arg2, int arg3)
982 fatal("type > 255 (%d)\n",type);
983 ops[mop++].ival = type + (3<<8);
984 ops[mop++].ival = arg1;
985 ops[mop++].ival = arg2;
986 ops[mop++].ival = arg3;
988 fatal("Recompile a2p with larger OPSMAX\n");
993 oper4(int type, int arg1, int arg2, int arg3, int arg4)
998 fatal("type > 255 (%d)\n",type);
999 ops[mop++].ival = type + (4<<8);
1000 ops[mop++].ival = arg1;
1001 ops[mop++].ival = arg2;
1002 ops[mop++].ival = arg3;
1003 ops[mop++].ival = arg4;
1005 fatal("Recompile a2p with larger OPSMAX\n");
1010 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1015 fatal("type > 255 (%d)\n",type);
1016 ops[mop++].ival = type + (5<<8);
1017 ops[mop++].ival = arg1;
1018 ops[mop++].ival = arg2;
1019 ops[mop++].ival = arg3;
1020 ops[mop++].ival = arg4;
1021 ops[mop++].ival = arg5;
1023 fatal("Recompile a2p with larger OPSMAX\n");
1036 type = ops[branch].ival;
1039 for (i=depth; i; i--)
1041 if (type == OSTRING) {
1042 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1045 printf("(%-5d%s %d\n",branch,opname[type],len);
1047 for (i=1; i<=len; i++)
1048 dump(ops[branch+i].ival);
1050 for (i=depth; i; i--)
1057 bl(int arg, int maybe)
1061 else if ((ops[arg].ival & 255) != OBLOCK)
1062 return oper2(OBLOCK,arg,maybe);
1063 else if ((ops[arg].ival >> 8) < 2)
1064 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1075 for (s = str->str_ptr; *s; s++) {
1076 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1080 else if (*s == '\n') {
1081 for (t = s+1; isSPACE(*t & 127); t++) ;
1083 while (isSPACE(*t & 127) && *t != '\n') t--;
1084 if (*t == '\n' && t-s > 1) {
1097 register char *d, *s, *t, *e;
1098 register int pos, newpos;
1102 for (s = str->str_ptr; *s; s++) {
1111 else if (*s == '\t')
1113 if (pos > 78) { /* split a long line? */
1116 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1123 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1127 while (d > tokenbuf &&
1128 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1133 while (d > tokenbuf &&
1134 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1139 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1144 while (d > tokenbuf && *d != ' ')
1154 if (d[-1] != ';' && !(newpos % 4)) {
1160 newpos += strlen(t);
1175 for (t = tokenbuf; *t; t++) {
1179 strcpy(t+strlen(t)-1, "\t#???\n");
1185 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1187 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1190 fputs(tokenbuf,stdout);
1199 key = walk(0,0,arg,&dummy,P_MIN);
1201 hstore(symtab,key->str_ptr,str_make("1"));
1203 set_array_base = TRUE;
1208 rememberargs(int arg)
1215 type = ops[arg].ival & 255;
1216 if (type == OCOMMA) {
1217 rememberargs(ops[arg+1].ival);
1218 rememberargs(ops[arg+3].ival);
1220 else if (type == OVAR) {
1222 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1225 fatal("panic: unknown argument type %d, line %d\n",type,line);
1232 int type = ops[arg].ival & 255;
1235 if (type != OSTRING)
1236 fatal("panic: aryrefarg %d, line %d\n",type,line);
1237 str = hfetch(curarghash,ops[arg+1].cval);
1244 fixfargs(int name, int arg, int prevargs)
1252 type = ops[arg].ival & 255;
1253 if (type == OCOMMA) {
1254 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1255 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1257 else if (type == OVAR) {
1258 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1259 if (strEQ(str_get(str),"*")) {
1262 str_set(str,""); /* in case another routine has this */
1263 ops[arg].ival &= ~255;
1264 ops[arg].ival |= OSTAR;
1265 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1266 fprintf(stderr,"Adding %s\n",tmpbuf);
1269 hstore(curarghash,tmpbuf,str);
1271 numargs = prevargs + 1;
1274 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1275 type,prevargs+1,line);
1280 fixrargs(char *name, int arg, int prevargs)
1288 type = ops[arg].ival & 255;
1289 if (type == OCOMMA) {
1290 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1291 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1294 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1295 sprintf(tmpbuf,"%s:%d",name,prevargs);
1296 str = hfetch(curarghash,tmpbuf);
1298 if (str && strEQ(str->str_ptr,"*")) {
1299 if (type == OVAR || type == OSTAR) {
1300 ops[arg].ival &= ~255;
1301 ops[arg].ival |= OSTAR;
1304 fatal("Can't pass expression by reference as arg %d of %s\n",
1307 numargs = prevargs + 1;