3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
10 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
15 #include "../netware/clibstuf.h"
17 #include "../patchlevel.h"
27 int oper1(int type, int arg1);
28 int oper2(int type, int arg1, int arg2);
29 int oper3(int type, int arg1, int arg2, int arg3);
30 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
31 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
32 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
34 char *savestr(char *str);
35 char *cpy2(register char *to, register char *from, register int delim);
38 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
39 static void usage(void);
44 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
45 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
46 printf("\n -D<number> sets debugging flags."
47 "\n -F<character> the awk script to translate is always invoked with"
49 "\n -n<fieldlist> specifies the names of the input fields if input does"
50 "\n not have to be split into an array."
51 "\n -<number> causes a2p to assume that input will always have that"
58 #pragma message disable (mainparm) /* We have the envp in main(). */
62 main(register int argc, register char **argv, register char **env)
70 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
74 linestr = str_new(80);
75 str = str_new(0); /* first used for -I flags */
76 for (argc--,argv++; argc; argc--,argv++) {
77 if (argv[0][0] != '-' || !argv[0][1])
82 debug = atoi(argv[0]+2);
84 yydebug = (debug & 1);
88 case '0': case '1': case '2': case '3': case '4':
89 case '5': case '6': case '7': case '8': case '9':
90 maxfld = atoi(argv[0]+1);
97 namelist = savestr(argv[0]+2);
108 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
109 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
112 fatal("Unrecognized switch: %s\n",argv[0]);
120 if (argv[0] == Nullch) {
121 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
122 if ( isatty(fileno(stdin)) )
127 filename = savestr(argv[0]);
129 filename = savestr(argv[0]);
130 if (strEQ(filename,"-"))
135 rsfp = fopen(argv[0],"r");
137 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
141 bufptr = str_get(linestr);
145 /* now parse the report spec */
148 fatal("Translation aborted due to syntax errors.\n");
158 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
160 printf("\t\"%s\"\n",ops[i].cval),i++;
163 printf("\t%d",ops[i].ival),i++;
173 /* first pass to look for numeric variables */
175 prewalk(0,0,root,&i);
177 /* second pass to produce new program */
179 tmpstr = walk(0,0,root,&i,P_MIN);
180 str = str_make(STARTPERL);
181 str_cat(str, "\neval 'exec ");
183 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
184 if $running_under_some_shell;\n\
185 # this emulates #! processing on NIH machines.\n\
186 # (remove #! line above if indigestible)\n\n");
188 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
190 " # process any FOO=bar switches\n\n");
191 if (do_opens && opens) {
196 str_scat(str,tmpstr);
205 "Please check my work on the %d line%s I've marked with \"#???\".\n",
206 checkers, checkers == 1 ? "" : "s" );
208 "The operation I've selected may be wrong for the operand types.\n");
211 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
215 #define RETURN(retval) return (bufptr = s,retval)
216 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
217 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
218 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
225 register char *s = bufptr;
233 fprintf(stderr,"Tokener at %s",s);
235 fprintf(stderr,"Tokener at %s\n",s);
241 "Unrecognized character %c in file %s line %d--ignoring.\n",
246 if (*s && *s != '\n') {
247 yyerror("Ignoring spurious backslash");
252 s = str_get(linestr);
257 if ((s = str_gets(linestr, rsfp)) == Nullch) {
261 s = str_get(linestr);
272 yylval = string(s,0);
301 for (d = s + 1; isSPACE(*d); d++) ;
311 yylval = string("~",1);
329 yylval = string("**=",3);
331 yylval = string(s-1,2);
349 while (*s == ' ' || *s == '\t')
351 if (strnEQ(s,"getline",7))
359 yylval = string("==",2);
363 yylval = string("=",1);
369 yylval = string("!=",2);
373 yylval = string("!~",2);
382 yylval = string("<=",2);
391 yylval = string(">>",2);
395 yylval = string(">=",2);
403 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
423 for (d = s; isDIGIT(*s); s++) ;
424 yylval = string(d,s-d);
430 for (d = s; isALPHA(*s) || isDIGIT(*s) || *s == '_'; )
432 split_to_array = set_array_base = TRUE;
435 yylval = string(d,s-d);
440 case '/': /* may either be division or pattern */
447 yylval = string("/=",2);
453 case '0': case '1': case '2': case '3': case '4':
454 case '5': case '6': case '7': case '8': case '9': case '.':
459 s = cpy2(tokenbuf,s,s[-1]);
461 fatal("String not terminated:\n%s",str_get(linestr));
463 yylval = string(tokenbuf,0);
469 set_array_base = TRUE;
470 if (strEQ(d,"ARGV")) {
471 yylval=numary(string("ARGV",0));
474 if (strEQ(d,"atan2")) {
481 if (strEQ(d,"break"))
483 if (strEQ(d,"BEGIN"))
488 if (strEQ(d,"continue"))
490 if (strEQ(d,"cos")) {
494 if (strEQ(d,"close")) {
499 if (strEQ(d,"chdir"))
501 else if (strEQ(d,"crypt"))
503 else if (strEQ(d,"chop"))
505 else if (strEQ(d,"chmod"))
507 else if (strEQ(d,"chown"))
514 if (strEQ(d,"delete"))
525 if (strEQ(d,"exit")) {
529 if (strEQ(d,"exp")) {
533 if (strEQ(d,"elsif"))
535 else if (strEQ(d,"eq"))
537 else if (strEQ(d,"eval"))
539 else if (strEQ(d,"eof"))
541 else if (strEQ(d,"each"))
543 else if (strEQ(d,"exec"))
550 if (saw_FS == 1 && in_begin) {
551 for (d = s; *d && isSPACE(*d); d++) ;
553 for (d++; *d && isSPACE(*d); d++) ;
554 if (*d == '"' && d[2] == '"')
562 else if (strEQ(d,"function"))
564 if (strEQ(d,"FILENAME"))
566 if (strEQ(d,"foreach"))
568 else if (strEQ(d,"format"))
570 else if (strEQ(d,"fork"))
572 else if (strEQ(d,"fh"))
577 if (strEQ(d,"getline"))
583 else if (strEQ(d,"gt"))
585 else if (strEQ(d,"goto"))
587 else if (strEQ(d,"gmtime"))
601 if (strEQ(d,"index")) {
602 set_array_base = TRUE;
605 if (strEQ(d,"int")) {
619 else if (strEQ(d,"kill"))
624 if (strEQ(d,"length")) {
628 if (strEQ(d,"log")) {
634 else if (strEQ(d,"local"))
636 else if (strEQ(d,"lt"))
638 else if (strEQ(d,"le"))
640 else if (strEQ(d,"locatime"))
642 else if (strEQ(d,"link"))
647 if (strEQ(d,"match")) {
648 set_array_base = TRUE;
657 do_chop = do_split = split_to_array = set_array_base = TRUE;
658 if (strEQ(d,"next")) {
667 if (strEQ(d,"ORS")) {
671 if (strEQ(d,"OFS")) {
675 if (strEQ(d,"OFMT")) {
680 else if (strEQ(d,"ord"))
682 else if (strEQ(d,"oct"))
687 if (strEQ(d,"print")) {
690 if (strEQ(d,"printf")) {
695 else if (strEQ(d,"pop"))
707 if (strEQ(d,"rand")) {
711 if (strEQ(d,"return"))
713 if (strEQ(d,"reset"))
715 else if (strEQ(d,"redo"))
717 else if (strEQ(d,"rename"))
722 if (strEQ(d,"split")) {
723 set_array_base = TRUE;
726 if (strEQ(d,"substr")) {
727 set_array_base = TRUE;
732 if (strEQ(d,"sprintf")) {
733 /* In old awk, { print sprintf("str%sg"),"in" } prints
734 * "string"; in new awk, "in" is not considered an argument to
735 * sprintf, so the statement breaks. To support both, the
736 * grammar treats arguments to SPRINTF_OLD like old awk,
737 * SPRINTF_NEW like new. Here we return the appropriate one.
739 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
741 if (strEQ(d,"sqrt")) {
745 if (strEQ(d,"SUBSEP")) {
748 if (strEQ(d,"sin")) {
752 if (strEQ(d,"srand")) {
756 if (strEQ(d,"system")) {
762 else if (strEQ(d,"shift"))
764 else if (strEQ(d,"select"))
766 else if (strEQ(d,"seek"))
768 else if (strEQ(d,"stat"))
770 else if (strEQ(d,"study"))
772 else if (strEQ(d,"sleep"))
774 else if (strEQ(d,"symlink"))
776 else if (strEQ(d,"sort"))
783 else if (strEQ(d,"tell"))
785 else if (strEQ(d,"time"))
787 else if (strEQ(d,"times"))
792 if (strEQ(d,"until"))
794 else if (strEQ(d,"unless"))
796 else if (strEQ(d,"umask"))
798 else if (strEQ(d,"unshift"))
800 else if (strEQ(d,"unlink"))
802 else if (strEQ(d,"utime"))
807 if (strEQ(d,"values"))
812 if (strEQ(d,"while"))
814 if (strEQ(d,"write"))
816 else if (strEQ(d,"wait"))
836 scanpat(register char *s)
844 fatal("Search pattern not found:\n%s",str_get(linestr));
848 for (; *s; s++,d++) {
852 else if (s[1] == '\\')
854 else if (s[1] == '[')
857 else if (*s == '[') {
860 if (*s == '\\' && s[1])
862 if (*s == '/' || (*s == '-' && s[1] == ']'))
865 } while (*s && *s != ']');
874 fatal("Search pattern not terminated:\n%s",str_get(linestr));
876 yylval = string(tokenbuf,0);
883 fprintf(stderr,"%s in file %s at line %d\n",
888 scannum(register char *s)
893 case '1': case '2': case '3': case '4': case '5':
894 case '6': case '7': case '8': case '9': case '0' : case '.':
896 while (isDIGIT(*s)) {
902 while (isDIGIT(*s)) {
909 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
911 if (*s == '+' || *s == '-')
917 yylval = string(tokenbuf,0);
924 string(char *ptr, int len)
928 ops[mop++].ival = OSTRING + (1<<8);
931 ops[mop].cval = (char *) safemalloc(len+1);
932 strncpy(ops[mop].cval,ptr,len);
933 ops[mop++].cval[len] = '\0';
935 fatal("Recompile a2p with larger OPSMAX\n");
945 fatal("type > 255 (%d)\n",type);
946 ops[mop++].ival = type;
948 fatal("Recompile a2p with larger OPSMAX\n");
953 oper1(int type, int arg1)
958 fatal("type > 255 (%d)\n",type);
959 ops[mop++].ival = type + (1<<8);
960 ops[mop++].ival = arg1;
962 fatal("Recompile a2p with larger OPSMAX\n");
967 oper2(int type, int arg1, int arg2)
972 fatal("type > 255 (%d)\n",type);
973 ops[mop++].ival = type + (2<<8);
974 ops[mop++].ival = arg1;
975 ops[mop++].ival = arg2;
977 fatal("Recompile a2p with larger OPSMAX\n");
982 oper3(int type, int arg1, int arg2, int arg3)
987 fatal("type > 255 (%d)\n",type);
988 ops[mop++].ival = type + (3<<8);
989 ops[mop++].ival = arg1;
990 ops[mop++].ival = arg2;
991 ops[mop++].ival = arg3;
993 fatal("Recompile a2p with larger OPSMAX\n");
998 oper4(int type, int arg1, int arg2, int arg3, int arg4)
1003 fatal("type > 255 (%d)\n",type);
1004 ops[mop++].ival = type + (4<<8);
1005 ops[mop++].ival = arg1;
1006 ops[mop++].ival = arg2;
1007 ops[mop++].ival = arg3;
1008 ops[mop++].ival = arg4;
1010 fatal("Recompile a2p with larger OPSMAX\n");
1015 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1020 fatal("type > 255 (%d)\n",type);
1021 ops[mop++].ival = type + (5<<8);
1022 ops[mop++].ival = arg1;
1023 ops[mop++].ival = arg2;
1024 ops[mop++].ival = arg3;
1025 ops[mop++].ival = arg4;
1026 ops[mop++].ival = arg5;
1028 fatal("Recompile a2p with larger OPSMAX\n");
1041 type = ops[branch].ival;
1044 for (i=depth; i; i--)
1046 if (type == OSTRING) {
1047 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1050 printf("(%-5d%s %d\n",branch,opname[type],len);
1052 for (i=1; i<=len; i++)
1053 dump(ops[branch+i].ival);
1055 for (i=depth; i; i--)
1062 bl(int arg, int maybe)
1066 else if ((ops[arg].ival & 255) != OBLOCK)
1067 return oper2(OBLOCK,arg,maybe);
1068 else if ((ops[arg].ival >> 8) < 2)
1069 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1080 for (s = str->str_ptr; *s; s++) {
1081 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1085 else if (*s == '\n') {
1086 for (t = s+1; isSPACE(*t & 127); t++) ;
1088 while (isSPACE(*t & 127) && *t != '\n') t--;
1089 if (*t == '\n' && t-s > 1) {
1102 register char *d, *s, *t, *e;
1103 register int pos, newpos;
1107 for (s = str->str_ptr; *s; s++) {
1116 else if (*s == '\t')
1118 if (pos > 78) { /* split a long line? */
1121 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1128 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1132 while (d > tokenbuf &&
1133 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1138 while (d > tokenbuf &&
1139 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1144 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1149 while (d > tokenbuf && *d != ' ')
1159 if (d[-1] != ';' && !(newpos % 4)) {
1165 newpos += strlen(t);
1180 for (t = tokenbuf; *t; t++) {
1184 strcpy(t+strlen(t)-1, "\t#???\n");
1190 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1192 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1195 fputs(tokenbuf,stdout);
1204 key = walk(0,0,arg,&dummy,P_MIN);
1206 hstore(symtab,key->str_ptr,str_make("1"));
1208 set_array_base = TRUE;
1213 rememberargs(int arg)
1220 type = ops[arg].ival & 255;
1221 if (type == OCOMMA) {
1222 rememberargs(ops[arg+1].ival);
1223 rememberargs(ops[arg+3].ival);
1225 else if (type == OVAR) {
1227 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1230 fatal("panic: unknown argument type %d, line %d\n",type,line);
1237 int type = ops[arg].ival & 255;
1240 if (type != OSTRING)
1241 fatal("panic: aryrefarg %d, line %d\n",type,line);
1242 str = hfetch(curarghash,ops[arg+1].cval);
1249 fixfargs(int name, int arg, int prevargs)
1257 type = ops[arg].ival & 255;
1258 if (type == OCOMMA) {
1259 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1260 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1262 else if (type == OVAR) {
1263 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1264 if (strEQ(str_get(str),"*")) {
1267 str_set(str,""); /* in case another routine has this */
1268 ops[arg].ival &= ~255;
1269 ops[arg].ival |= OSTAR;
1270 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1271 fprintf(stderr,"Adding %s\n",tmpbuf);
1274 hstore(curarghash,tmpbuf,str);
1276 numargs = prevargs + 1;
1279 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1280 type,prevargs+1,line);
1285 fixrargs(char *name, int arg, int prevargs)
1293 type = ops[arg].ival & 255;
1294 if (type == OCOMMA) {
1295 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1296 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1299 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1300 sprintf(tmpbuf,"%s:%d",name,prevargs);
1301 str = hfetch(curarghash,tmpbuf);
1303 if (str && strEQ(str->str_ptr,"*")) {
1304 if (type == OVAR || type == OSTAR) {
1305 ops[arg].ival &= ~255;
1306 ops[arg].ival |= OSTAR;
1309 fatal("Can't pass expression by reference as arg %d of %s\n",
1312 numargs = prevargs + 1;