3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
10 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
15 #include "../netware/clibstuf.h"
17 #include "../patchlevel.h"
27 int oper1(int type, int arg1);
28 int oper2(int type, int arg1, int arg2);
29 int oper3(int type, int arg1, int arg2, int arg3);
30 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
31 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
32 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
34 char *savestr(char *str);
35 char *cpy2(register char *to, register char *from, register int delim);
38 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
39 static void usage(void);
44 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
45 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
46 printf("\n -D<number> sets debugging flags."
47 "\n -F<character> the awk script to translate is always invoked with"
49 "\n -n<fieldlist> specifies the names of the input fields if input does"
50 "\n not have to be split into an array."
51 "\n -<number> causes a2p to assume that input will always have that"
58 #pragma message disable (mainparm) /* We have the envp in main(). */
62 main(register int argc, register const char **argv, register const char **env)
70 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
74 linestr = str_new(80);
75 str = str_new(0); /* first used for -I flags */
76 for (argc--,argv++; argc; argc--,argv++) {
77 if (argv[0][0] != '-' || !argv[0][1])
82 debug = atoi(argv[0]+2);
84 yydebug = (debug & 1);
88 case '0': case '1': case '2': case '3': case '4':
89 case '5': case '6': case '7': case '8': case '9':
90 maxfld = atoi(argv[0]+1);
97 namelist = savestr(argv[0]+2);
108 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
109 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
112 fatal("Unrecognized switch: %s\n",argv[0]);
120 if (argv[0] == NULL) {
121 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
122 if ( isatty(fileno(stdin)) )
127 filename = savestr(argv[0]);
129 if (strEQ(filename,"-"))
134 rsfp = fopen(argv[0],"r");
136 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
140 bufptr = str_get(linestr);
144 /* now parse the report spec */
147 fatal("Translation aborted due to syntax errors.\n");
157 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
159 printf("\t\"%s\"\n",ops[i].cval),i++;
162 printf("\t%d",ops[i].ival),i++;
172 /* first pass to look for numeric variables */
174 prewalk(0,0,root,&i);
176 /* second pass to produce new program */
178 tmpstr = walk(0,0,root,&i,P_MIN);
179 str = str_make(STARTPERL);
180 str_cat(str, "\neval 'exec ");
182 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
183 if $running_under_some_shell;\n\
184 # this emulates #! processing on NIH machines.\n\
185 # (remove #! line above if indigestible)\n\n");
187 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
189 " # process any FOO=bar switches\n\n");
190 if (do_opens && opens) {
195 str_scat(str,tmpstr);
204 "Please check my work on the %d line%s I've marked with \"#???\".\n",
205 checkers, checkers == 1 ? "" : "s" );
207 "The operation I've selected may be wrong for the operand types.\n");
210 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
214 #define RETURN(retval) return (bufptr = s,retval)
215 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
216 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
217 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
224 register char *s = bufptr;
232 fprintf(stderr,"Tokener at %s",s);
234 fprintf(stderr,"Tokener at %s\n",s);
240 "Unrecognized character %c in file %s line %d--ignoring.\n",
245 if (*s && *s != '\n') {
246 yyerror("Ignoring spurious backslash");
251 s = str_get(linestr);
256 if ((s = str_gets(linestr, rsfp)) == NULL) {
260 s = str_get(linestr);
271 yylval = string(s,0);
300 for (d = s + 1; isSPACE(*d); d++) ;
310 yylval = string("~",1);
328 yylval = string("**=",3);
330 yylval = string(s-1,2);
348 while (*s == ' ' || *s == '\t')
350 if (strnEQ(s,"getline",7))
358 yylval = string("==",2);
362 yylval = string("=",1);
368 yylval = string("!=",2);
372 yylval = string("!~",2);
381 yylval = string("<=",2);
390 yylval = string(">>",2);
394 yylval = string(">=",2);
402 while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
422 for (d = s; isDIGIT(*s); s++) ;
423 yylval = string(d,s-d);
429 for (d = s; isALPHA(*s) || isDIGIT(*s) || *s == '_'; )
431 split_to_array = TRUE;
434 yylval = string(d,s-d);
439 case '/': /* may either be division or pattern */
446 yylval = string("/=",2);
452 case '0': case '1': case '2': case '3': case '4':
453 case '5': case '6': case '7': case '8': case '9': case '.':
458 s = cpy2(tokenbuf,s,s[-1]);
460 fatal("String not terminated:\n%s",str_get(linestr));
462 yylval = string(tokenbuf,0);
467 if (strEQ(d,"ARGV")) {
468 yylval=numary(string("ARGV",0));
471 if (strEQ(d,"atan2")) {
478 if (strEQ(d,"break"))
480 if (strEQ(d,"BEGIN"))
485 if (strEQ(d,"continue"))
487 if (strEQ(d,"cos")) {
491 if (strEQ(d,"close")) {
496 if (strEQ(d,"chdir"))
498 else if (strEQ(d,"crypt"))
500 else if (strEQ(d,"chop"))
502 else if (strEQ(d,"chmod"))
504 else if (strEQ(d,"chown"))
511 if (strEQ(d,"delete"))
522 if (strEQ(d,"exit")) {
526 if (strEQ(d,"exp")) {
530 if (strEQ(d,"elsif"))
532 else if (strEQ(d,"eq"))
534 else if (strEQ(d,"eval"))
536 else if (strEQ(d,"eof"))
538 else if (strEQ(d,"each"))
540 else if (strEQ(d,"exec"))
547 if (saw_FS == 1 && in_begin) {
548 for (d = s; *d && isSPACE(*d); d++) ;
550 for (d++; *d && isSPACE(*d); d++) ;
551 if (*d == '"' && d[2] == '"')
559 else if (strEQ(d,"function"))
561 if (strEQ(d,"FILENAME"))
563 if (strEQ(d,"foreach"))
565 else if (strEQ(d,"format"))
567 else if (strEQ(d,"fork"))
569 else if (strEQ(d,"fh"))
574 if (strEQ(d,"getline"))
580 else if (strEQ(d,"gt"))
582 else if (strEQ(d,"goto"))
584 else if (strEQ(d,"gmtime"))
598 if (strEQ(d,"index")) {
601 if (strEQ(d,"int")) {
615 else if (strEQ(d,"kill"))
620 if (strEQ(d,"length")) {
624 if (strEQ(d,"log")) {
630 else if (strEQ(d,"local"))
632 else if (strEQ(d,"lt"))
634 else if (strEQ(d,"le"))
636 else if (strEQ(d,"locatime"))
638 else if (strEQ(d,"link"))
643 if (strEQ(d,"match")) {
652 do_chop = do_split = split_to_array = TRUE;
653 if (strEQ(d,"next")) {
662 if (strEQ(d,"ORS")) {
666 if (strEQ(d,"OFS")) {
670 if (strEQ(d,"OFMT")) {
675 else if (strEQ(d,"ord"))
677 else if (strEQ(d,"oct"))
682 if (strEQ(d,"print")) {
685 if (strEQ(d,"printf")) {
690 else if (strEQ(d,"pop"))
702 if (strEQ(d,"rand")) {
706 if (strEQ(d,"return"))
708 if (strEQ(d,"reset"))
710 else if (strEQ(d,"redo"))
712 else if (strEQ(d,"rename"))
717 if (strEQ(d,"split")) {
720 if (strEQ(d,"substr")) {
725 if (strEQ(d,"sprintf")) {
726 /* In old awk, { print sprintf("str%sg"),"in" } prints
727 * "string"; in new awk, "in" is not considered an argument to
728 * sprintf, so the statement breaks. To support both, the
729 * grammar treats arguments to SPRINTF_OLD like old awk,
730 * SPRINTF_NEW like new. Here we return the appropriate one.
732 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
734 if (strEQ(d,"sqrt")) {
738 if (strEQ(d,"SUBSEP")) {
741 if (strEQ(d,"sin")) {
745 if (strEQ(d,"srand")) {
749 if (strEQ(d,"system")) {
755 else if (strEQ(d,"shift"))
757 else if (strEQ(d,"select"))
759 else if (strEQ(d,"seek"))
761 else if (strEQ(d,"stat"))
763 else if (strEQ(d,"study"))
765 else if (strEQ(d,"sleep"))
767 else if (strEQ(d,"symlink"))
769 else if (strEQ(d,"sort"))
776 else if (strEQ(d,"tell"))
778 else if (strEQ(d,"time"))
780 else if (strEQ(d,"times"))
785 if (strEQ(d,"until"))
787 else if (strEQ(d,"unless"))
789 else if (strEQ(d,"umask"))
791 else if (strEQ(d,"unshift"))
793 else if (strEQ(d,"unlink"))
795 else if (strEQ(d,"utime"))
800 if (strEQ(d,"values"))
805 if (strEQ(d,"while"))
807 if (strEQ(d,"write"))
809 else if (strEQ(d,"wait"))
829 scanpat(register char *s)
837 fatal("Search pattern not found:\n%s",str_get(linestr));
841 for (; *s; s++,d++) {
845 else if (s[1] == '\\')
847 else if (s[1] == '[')
850 else if (*s == '[') {
853 if (*s == '\\' && s[1])
855 if (*s == '/' || (*s == '-' && s[1] == ']'))
858 } while (*s && *s != ']');
867 fatal("Search pattern not terminated:\n%s",str_get(linestr));
869 yylval = string(tokenbuf,0);
874 yyerror(const char *s)
876 fprintf(stderr,"%s in file %s at line %d\n",
881 scannum(register char *s)
886 case '1': case '2': case '3': case '4': case '5':
887 case '6': case '7': case '8': case '9': case '0' : case '.':
889 while (isDIGIT(*s)) {
895 while (isDIGIT(*s)) {
902 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
904 if (*s == '+' || *s == '-')
910 yylval = string(tokenbuf,0);
917 string(const char *ptr, int len)
921 ops[mop++].ival = OSTRING + (1<<8);
924 ops[mop].cval = (char *) safemalloc(len+1);
925 strncpy(ops[mop].cval,ptr,len);
926 ops[mop++].cval[len] = '\0';
928 fatal("Recompile a2p with larger OPSMAX\n");
938 fatal("type > 255 (%d)\n",type);
939 ops[mop++].ival = type;
941 fatal("Recompile a2p with larger OPSMAX\n");
946 oper1(int type, int arg1)
951 fatal("type > 255 (%d)\n",type);
952 ops[mop++].ival = type + (1<<8);
953 ops[mop++].ival = arg1;
955 fatal("Recompile a2p with larger OPSMAX\n");
960 oper2(int type, int arg1, int arg2)
965 fatal("type > 255 (%d)\n",type);
966 ops[mop++].ival = type + (2<<8);
967 ops[mop++].ival = arg1;
968 ops[mop++].ival = arg2;
970 fatal("Recompile a2p with larger OPSMAX\n");
975 oper3(int type, int arg1, int arg2, int arg3)
980 fatal("type > 255 (%d)\n",type);
981 ops[mop++].ival = type + (3<<8);
982 ops[mop++].ival = arg1;
983 ops[mop++].ival = arg2;
984 ops[mop++].ival = arg3;
986 fatal("Recompile a2p with larger OPSMAX\n");
991 oper4(int type, int arg1, int arg2, int arg3, int arg4)
996 fatal("type > 255 (%d)\n",type);
997 ops[mop++].ival = type + (4<<8);
998 ops[mop++].ival = arg1;
999 ops[mop++].ival = arg2;
1000 ops[mop++].ival = arg3;
1001 ops[mop++].ival = arg4;
1003 fatal("Recompile a2p with larger OPSMAX\n");
1008 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1013 fatal("type > 255 (%d)\n",type);
1014 ops[mop++].ival = type + (5<<8);
1015 ops[mop++].ival = arg1;
1016 ops[mop++].ival = arg2;
1017 ops[mop++].ival = arg3;
1018 ops[mop++].ival = arg4;
1019 ops[mop++].ival = arg5;
1021 fatal("Recompile a2p with larger OPSMAX\n");
1034 type = ops[branch].ival;
1037 for (i=depth; i; i--)
1039 if (type == OSTRING) {
1040 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1043 printf("(%-5d%s %d\n",branch,opname[type],len);
1045 for (i=1; i<=len; i++)
1046 dump(ops[branch+i].ival);
1048 for (i=depth; i; i--)
1055 bl(int arg, int maybe)
1059 else if ((ops[arg].ival & 255) != OBLOCK)
1060 return oper2(OBLOCK,arg,maybe);
1061 else if ((ops[arg].ival >> 8) < 2)
1062 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1073 for (s = str->str_ptr; *s; s++) {
1074 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1078 else if (*s == '\n') {
1079 for (t = s+1; isSPACE(*t & 127); t++) ;
1081 while (isSPACE(*t & 127) && *t != '\n') t--;
1082 if (*t == '\n' && t-s > 1) {
1095 register char *d, *s, *t, *e;
1096 register int pos, newpos;
1100 for (s = str->str_ptr; *s; s++) {
1109 else if (*s == '\t')
1111 if (pos > 78) { /* split a long line? */
1114 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1121 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1125 while (d > tokenbuf &&
1126 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1131 while (d > tokenbuf &&
1132 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1137 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1142 while (d > tokenbuf && *d != ' ')
1152 if (d[-1] != ';' && !(newpos % 4)) {
1158 newpos += strlen(t);
1173 for (t = tokenbuf; *t; t++) {
1177 strcpy(t+strlen(t)-1, "\t#???\n");
1183 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1185 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1188 fputs(tokenbuf,stdout);
1197 key = walk(0,0,arg,&dummy,P_MIN);
1199 hstore(symtab,key->str_ptr,str_make("1"));
1205 rememberargs(int arg)
1212 type = ops[arg].ival & 255;
1213 if (type == OCOMMA) {
1214 rememberargs(ops[arg+1].ival);
1215 rememberargs(ops[arg+3].ival);
1217 else if (type == OVAR) {
1219 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1222 fatal("panic: unknown argument type %d, line %d\n",type,line);
1229 int type = ops[arg].ival & 255;
1232 if (type != OSTRING)
1233 fatal("panic: aryrefarg %d, line %d\n",type,line);
1234 str = hfetch(curarghash,ops[arg+1].cval);
1241 fixfargs(int name, int arg, int prevargs)
1249 type = ops[arg].ival & 255;
1250 if (type == OCOMMA) {
1251 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1252 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1254 else if (type == OVAR) {
1255 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1256 if (strEQ(str_get(str),"*")) {
1259 str_set(str,""); /* in case another routine has this */
1260 ops[arg].ival &= ~255;
1261 ops[arg].ival |= OSTAR;
1262 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1263 fprintf(stderr,"Adding %s\n",tmpbuf);
1266 hstore(curarghash,tmpbuf,str);
1268 numargs = prevargs + 1;
1271 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1272 type,prevargs+1,line);
1277 fixrargs(char *name, int arg, int prevargs)
1285 type = ops[arg].ival & 255;
1286 if (type == OCOMMA) {
1287 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1288 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1291 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1292 sprintf(tmpbuf,"%s:%d",name,prevargs);
1293 str = hfetch(curarghash,tmpbuf);
1295 if (str && strEQ(str->str_ptr,"*")) {
1296 if (type == OVAR || type == OSTAR) {
1297 ops[arg].ival &= ~255;
1298 ops[arg].ival |= OSTAR;
1301 fatal("Can't pass expression by reference as arg %d of %s\n",
1304 numargs = prevargs + 1;