1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
17 #include "../netware/clibstuf.h"
19 #include "../patchlevel.h"
29 int oper1(int type, int arg1);
30 int oper2(int type, int arg1, int arg2);
31 int oper3(int type, int arg1, int arg2, int arg3);
32 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
33 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
34 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
36 char *savestr(char *str);
37 char *cpy2(register char *to, register char *from, register int delim);
40 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
41 static void usage(void);
46 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
47 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
48 printf("\n -D<number> sets debugging flags."
49 "\n -F<character> the awk script to translate is always invoked with"
51 "\n -n<fieldlist> specifies the names of the input fields if input does"
52 "\n not have to be split into an array."
53 "\n -<number> causes a2p to assume that input will always have that"
60 #pragma message disable (mainparm) /* We have the envp in main(). */
64 main(register int argc, register char **argv, register char **env)
72 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
76 linestr = str_new(80);
77 str = str_new(0); /* first used for -I flags */
78 for (argc--,argv++; argc; argc--,argv++) {
79 if (argv[0][0] != '-' || !argv[0][1])
84 debug = atoi(argv[0]+2);
86 yydebug = (debug & 1);
90 case '0': case '1': case '2': case '3': case '4':
91 case '5': case '6': case '7': case '8': case '9':
92 maxfld = atoi(argv[0]+1);
99 namelist = savestr(argv[0]+2);
110 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
111 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
114 fatal("Unrecognized switch: %s\n",argv[0]);
122 if (argv[0] == Nullch) {
123 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
124 if ( isatty(fileno(stdin)) )
129 filename = savestr(argv[0]);
131 filename = savestr(argv[0]);
132 if (strEQ(filename,"-"))
137 rsfp = fopen(argv[0],"r");
139 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
143 bufptr = str_get(linestr);
147 /* now parse the report spec */
150 fatal("Translation aborted due to syntax errors.\n");
160 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
162 printf("\t\"%s\"\n",ops[i].cval),i++;
165 printf("\t%d",ops[i].ival),i++;
175 /* first pass to look for numeric variables */
177 prewalk(0,0,root,&i);
179 /* second pass to produce new program */
181 tmpstr = walk(0,0,root,&i,P_MIN);
182 str = str_make(STARTPERL);
183 str_cat(str, "\neval 'exec ");
185 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
186 if $running_under_some_shell;\n\
187 # this emulates #! processing on NIH machines.\n\
188 # (remove #! line above if indigestible)\n\n");
190 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
192 " # process any FOO=bar switches\n\n");
193 if (do_opens && opens) {
198 str_scat(str,tmpstr);
207 "Please check my work on the %d line%s I've marked with \"#???\".\n",
208 checkers, checkers == 1 ? "" : "s" );
210 "The operation I've selected may be wrong for the operand types.\n");
213 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
217 #define RETURN(retval) return (bufptr = s,retval)
218 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
219 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
220 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
227 register char *s = bufptr;
235 fprintf(stderr,"Tokener at %s",s);
237 fprintf(stderr,"Tokener at %s\n",s);
243 "Unrecognized character %c in file %s line %d--ignoring.\n",
248 if (*s && *s != '\n') {
249 yyerror("Ignoring spurious backslash");
254 s = str_get(linestr);
259 if ((s = str_gets(linestr, rsfp)) == Nullch) {
263 s = str_get(linestr);
274 yylval = string(s,0);
303 for (d = s + 1; isSPACE(*d); d++) ;
313 yylval = string("~",1);
331 yylval = string("**=",3);
333 yylval = string(s-1,2);
351 while (*s == ' ' || *s == '\t')
353 if (strnEQ(s,"getline",7))
361 yylval = string("==",2);
365 yylval = string("=",1);
371 yylval = string("!=",2);
375 yylval = string("!~",2);
384 yylval = string("<=",2);
393 yylval = string(">>",2);
397 yylval = string(">=",2);
405 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
425 for (d = s; isDIGIT(*s); s++) ;
426 yylval = string(d,s-d);
432 for (d = s; isALPHA(*s) || isDIGIT(*s) || *s == '_'; )
434 split_to_array = set_array_base = TRUE;
437 yylval = string(d,s-d);
442 case '/': /* may either be division or pattern */
449 yylval = string("/=",2);
455 case '0': case '1': case '2': case '3': case '4':
456 case '5': case '6': case '7': case '8': case '9': case '.':
461 s = cpy2(tokenbuf,s,s[-1]);
463 fatal("String not terminated:\n%s",str_get(linestr));
465 yylval = string(tokenbuf,0);
471 set_array_base = TRUE;
472 if (strEQ(d,"ARGV")) {
473 yylval=numary(string("ARGV",0));
476 if (strEQ(d,"atan2")) {
483 if (strEQ(d,"break"))
485 if (strEQ(d,"BEGIN"))
490 if (strEQ(d,"continue"))
492 if (strEQ(d,"cos")) {
496 if (strEQ(d,"close")) {
501 if (strEQ(d,"chdir"))
503 else if (strEQ(d,"crypt"))
505 else if (strEQ(d,"chop"))
507 else if (strEQ(d,"chmod"))
509 else if (strEQ(d,"chown"))
516 if (strEQ(d,"delete"))
527 if (strEQ(d,"exit")) {
531 if (strEQ(d,"exp")) {
535 if (strEQ(d,"elsif"))
537 else if (strEQ(d,"eq"))
539 else if (strEQ(d,"eval"))
541 else if (strEQ(d,"eof"))
543 else if (strEQ(d,"each"))
545 else if (strEQ(d,"exec"))
552 if (saw_FS == 1 && in_begin) {
553 for (d = s; *d && isSPACE(*d); d++) ;
555 for (d++; *d && isSPACE(*d); d++) ;
556 if (*d == '"' && d[2] == '"')
564 else if (strEQ(d,"function"))
566 if (strEQ(d,"FILENAME"))
568 if (strEQ(d,"foreach"))
570 else if (strEQ(d,"format"))
572 else if (strEQ(d,"fork"))
574 else if (strEQ(d,"fh"))
579 if (strEQ(d,"getline"))
585 else if (strEQ(d,"gt"))
587 else if (strEQ(d,"goto"))
589 else if (strEQ(d,"gmtime"))
603 if (strEQ(d,"index")) {
604 set_array_base = TRUE;
607 if (strEQ(d,"int")) {
621 else if (strEQ(d,"kill"))
626 if (strEQ(d,"length")) {
630 if (strEQ(d,"log")) {
636 else if (strEQ(d,"local"))
638 else if (strEQ(d,"lt"))
640 else if (strEQ(d,"le"))
642 else if (strEQ(d,"locatime"))
644 else if (strEQ(d,"link"))
649 if (strEQ(d,"match")) {
650 set_array_base = TRUE;
659 do_chop = do_split = split_to_array = set_array_base = TRUE;
660 if (strEQ(d,"next")) {
669 if (strEQ(d,"ORS")) {
673 if (strEQ(d,"OFS")) {
677 if (strEQ(d,"OFMT")) {
682 else if (strEQ(d,"ord"))
684 else if (strEQ(d,"oct"))
689 if (strEQ(d,"print")) {
692 if (strEQ(d,"printf")) {
697 else if (strEQ(d,"pop"))
709 if (strEQ(d,"rand")) {
713 if (strEQ(d,"return"))
715 if (strEQ(d,"reset"))
717 else if (strEQ(d,"redo"))
719 else if (strEQ(d,"rename"))
724 if (strEQ(d,"split")) {
725 set_array_base = TRUE;
728 if (strEQ(d,"substr")) {
729 set_array_base = TRUE;
734 if (strEQ(d,"sprintf")) {
735 /* In old awk, { print sprintf("str%sg"),"in" } prints
736 * "string"; in new awk, "in" is not considered an argument to
737 * sprintf, so the statement breaks. To support both, the
738 * grammar treats arguments to SPRINTF_OLD like old awk,
739 * SPRINTF_NEW like new. Here we return the appropriate one.
741 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
743 if (strEQ(d,"sqrt")) {
747 if (strEQ(d,"SUBSEP")) {
750 if (strEQ(d,"sin")) {
754 if (strEQ(d,"srand")) {
758 if (strEQ(d,"system")) {
764 else if (strEQ(d,"shift"))
766 else if (strEQ(d,"select"))
768 else if (strEQ(d,"seek"))
770 else if (strEQ(d,"stat"))
772 else if (strEQ(d,"study"))
774 else if (strEQ(d,"sleep"))
776 else if (strEQ(d,"symlink"))
778 else if (strEQ(d,"sort"))
785 else if (strEQ(d,"tell"))
787 else if (strEQ(d,"time"))
789 else if (strEQ(d,"times"))
794 if (strEQ(d,"until"))
796 else if (strEQ(d,"unless"))
798 else if (strEQ(d,"umask"))
800 else if (strEQ(d,"unshift"))
802 else if (strEQ(d,"unlink"))
804 else if (strEQ(d,"utime"))
809 if (strEQ(d,"values"))
814 if (strEQ(d,"while"))
816 if (strEQ(d,"write"))
818 else if (strEQ(d,"wait"))
838 scanpat(register char *s)
846 fatal("Search pattern not found:\n%s",str_get(linestr));
850 for (; *s; s++,d++) {
854 else if (s[1] == '\\')
856 else if (s[1] == '[')
859 else if (*s == '[') {
862 if (*s == '\\' && s[1])
864 if (*s == '/' || (*s == '-' && s[1] == ']'))
867 } while (*s && *s != ']');
876 fatal("Search pattern not terminated:\n%s",str_get(linestr));
878 yylval = string(tokenbuf,0);
885 fprintf(stderr,"%s in file %s at line %d\n",
890 scannum(register char *s)
895 case '1': case '2': case '3': case '4': case '5':
896 case '6': case '7': case '8': case '9': case '0' : case '.':
898 while (isDIGIT(*s)) {
904 while (isDIGIT(*s)) {
911 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
913 if (*s == '+' || *s == '-')
919 yylval = string(tokenbuf,0);
926 string(char *ptr, int len)
930 ops[mop++].ival = OSTRING + (1<<8);
933 ops[mop].cval = (char *) safemalloc(len+1);
934 strncpy(ops[mop].cval,ptr,len);
935 ops[mop++].cval[len] = '\0';
937 fatal("Recompile a2p with larger OPSMAX\n");
947 fatal("type > 255 (%d)\n",type);
948 ops[mop++].ival = type;
950 fatal("Recompile a2p with larger OPSMAX\n");
955 oper1(int type, int arg1)
960 fatal("type > 255 (%d)\n",type);
961 ops[mop++].ival = type + (1<<8);
962 ops[mop++].ival = arg1;
964 fatal("Recompile a2p with larger OPSMAX\n");
969 oper2(int type, int arg1, int arg2)
974 fatal("type > 255 (%d)\n",type);
975 ops[mop++].ival = type + (2<<8);
976 ops[mop++].ival = arg1;
977 ops[mop++].ival = arg2;
979 fatal("Recompile a2p with larger OPSMAX\n");
984 oper3(int type, int arg1, int arg2, int arg3)
989 fatal("type > 255 (%d)\n",type);
990 ops[mop++].ival = type + (3<<8);
991 ops[mop++].ival = arg1;
992 ops[mop++].ival = arg2;
993 ops[mop++].ival = arg3;
995 fatal("Recompile a2p with larger OPSMAX\n");
1000 oper4(int type, int arg1, int arg2, int arg3, int arg4)
1005 fatal("type > 255 (%d)\n",type);
1006 ops[mop++].ival = type + (4<<8);
1007 ops[mop++].ival = arg1;
1008 ops[mop++].ival = arg2;
1009 ops[mop++].ival = arg3;
1010 ops[mop++].ival = arg4;
1012 fatal("Recompile a2p with larger OPSMAX\n");
1017 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1022 fatal("type > 255 (%d)\n",type);
1023 ops[mop++].ival = type + (5<<8);
1024 ops[mop++].ival = arg1;
1025 ops[mop++].ival = arg2;
1026 ops[mop++].ival = arg3;
1027 ops[mop++].ival = arg4;
1028 ops[mop++].ival = arg5;
1030 fatal("Recompile a2p with larger OPSMAX\n");
1043 type = ops[branch].ival;
1046 for (i=depth; i; i--)
1048 if (type == OSTRING) {
1049 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1052 printf("(%-5d%s %d\n",branch,opname[type],len);
1054 for (i=1; i<=len; i++)
1055 dump(ops[branch+i].ival);
1057 for (i=depth; i; i--)
1064 bl(int arg, int maybe)
1068 else if ((ops[arg].ival & 255) != OBLOCK)
1069 return oper2(OBLOCK,arg,maybe);
1070 else if ((ops[arg].ival >> 8) < 2)
1071 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1082 for (s = str->str_ptr; *s; s++) {
1083 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1087 else if (*s == '\n') {
1088 for (t = s+1; isSPACE(*t & 127); t++) ;
1090 while (isSPACE(*t & 127) && *t != '\n') t--;
1091 if (*t == '\n' && t-s > 1) {
1104 register char *d, *s, *t, *e;
1105 register int pos, newpos;
1109 for (s = str->str_ptr; *s; s++) {
1118 else if (*s == '\t')
1120 if (pos > 78) { /* split a long line? */
1123 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1130 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1134 while (d > tokenbuf &&
1135 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1140 while (d > tokenbuf &&
1141 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1146 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1151 while (d > tokenbuf && *d != ' ')
1161 if (d[-1] != ';' && !(newpos % 4)) {
1167 newpos += strlen(t);
1182 for (t = tokenbuf; *t; t++) {
1186 strcpy(t+strlen(t)-1, "\t#???\n");
1192 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1194 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1197 fputs(tokenbuf,stdout);
1206 key = walk(0,0,arg,&dummy,P_MIN);
1208 hstore(symtab,key->str_ptr,str_make("1"));
1210 set_array_base = TRUE;
1215 rememberargs(int arg)
1222 type = ops[arg].ival & 255;
1223 if (type == OCOMMA) {
1224 rememberargs(ops[arg+1].ival);
1225 rememberargs(ops[arg+3].ival);
1227 else if (type == OVAR) {
1229 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1232 fatal("panic: unknown argument type %d, line %d\n",type,line);
1239 int type = ops[arg].ival & 255;
1242 if (type != OSTRING)
1243 fatal("panic: aryrefarg %d, line %d\n",type,line);
1244 str = hfetch(curarghash,ops[arg+1].cval);
1251 fixfargs(int name, int arg, int prevargs)
1259 type = ops[arg].ival & 255;
1260 if (type == OCOMMA) {
1261 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1262 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1264 else if (type == OVAR) {
1265 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1266 if (strEQ(str_get(str),"*")) {
1269 str_set(str,""); /* in case another routine has this */
1270 ops[arg].ival &= ~255;
1271 ops[arg].ival |= OSTAR;
1272 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1273 fprintf(stderr,"Adding %s\n",tmpbuf);
1276 hstore(curarghash,tmpbuf,str);
1278 numargs = prevargs + 1;
1281 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1282 type,prevargs+1,line);
1287 fixrargs(char *name, int arg, int prevargs)
1295 type = ops[arg].ival & 255;
1296 if (type == OCOMMA) {
1297 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1298 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1301 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1302 sprintf(tmpbuf,"%s:%d",name,prevargs);
1303 str = hfetch(curarghash,tmpbuf);
1305 if (str && strEQ(str->str_ptr,"*")) {
1306 if (type == OVAR || type == OSTAR) {
1307 ops[arg].ival &= ~255;
1308 ops[arg].ival |= OSTAR;
1311 fatal("Can't pass expression by reference as arg %d of %s\n",
1314 numargs = prevargs + 1;