1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32)
15 #include "../patchlevel.h"
25 int oper1(int type, int arg1);
26 int oper2(int type, int arg1, int arg2);
27 int oper3(int type, int arg1, int arg2, int arg3);
28 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
29 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
30 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
32 #if defined(OS2) || defined(WIN32)
33 static void usage(void);
38 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
39 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
40 printf("\n -D<number> sets debugging flags."
41 "\n -F<character> the awk script to translate is always invoked with"
43 "\n -n<fieldlist> specifies the names of the input fields if input does"
44 "\n not have to be split into an array."
45 "\n -<number> causes a2p to assume that input will always have that"
52 main(register int argc, register char **argv, register char **env)
59 linestr = str_new(80);
60 str = str_new(0); /* first used for -I flags */
61 for (argc--,argv++; argc; argc--,argv++) {
62 if (argv[0][0] != '-' || !argv[0][1])
68 debug = atoi(argv[0]+2);
70 yydebug = (debug & 1);
74 case '0': case '1': case '2': case '3': case '4':
75 case '5': case '6': case '7': case '8': case '9':
76 maxfld = atoi(argv[0]+1);
83 namelist = savestr(argv[0]+2);
94 #if defined(OS2) || defined(WIN32)
95 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
98 fatal("Unrecognized switch: %s\n",argv[0]);
106 if (argv[0] == Nullch) {
107 #if defined(OS2) || defined(WIN32)
108 if ( isatty(fileno(stdin)) )
113 filename = savestr(argv[0]);
115 filename = savestr(argv[0]);
116 if (strEQ(filename,"-"))
121 rsfp = fopen(argv[0],"r");
123 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
127 bufptr = str_get(linestr);
131 /* now parse the report spec */
134 fatal("Translation aborted due to syntax errors.\n");
144 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
146 printf("\t\"%s\"\n",ops[i].cval),i++;
149 printf("\t%d",ops[i].ival),i++;
159 /* first pass to look for numeric variables */
161 prewalk(0,0,root,&i);
163 /* second pass to produce new program */
165 tmpstr = walk(0,0,root,&i,P_MIN);
166 str = str_make(STARTPERL);
167 str_cat(str, "\neval 'exec ");
169 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
170 if $running_under_some_shell;\n\
171 # this emulates #! processing on NIH machines.\n\
172 # (remove #! line above if indigestible)\n\n");
174 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
176 " # process any FOO=bar switches\n\n");
177 if (do_opens && opens) {
182 str_scat(str,tmpstr);
191 "Please check my work on the %d line%s I've marked with \"#???\".\n",
192 checkers, checkers == 1 ? "" : "s" );
194 "The operation I've selected may be wrong for the operand types.\n");
199 #define RETURN(retval) return (bufptr = s,retval)
200 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
201 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
202 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
209 register char *s = bufptr;
217 fprintf(stderr,"Tokener at %s",s);
219 fprintf(stderr,"Tokener at %s\n",s);
224 "Unrecognized character %c in file %s line %d--ignoring.\n",
229 if (*s && *s != '\n') {
230 yyerror("Ignoring spurious backslash");
235 s = str_get(linestr);
240 if ((s = str_gets(linestr, rsfp)) == Nullch) {
244 s = str_get(linestr);
255 yylval = string(s,0);
284 for (d = s + 1; isspace(*d); d++) ;
294 yylval = string("~",1);
312 yylval = string("**=",3);
314 yylval = string(s-1,2);
332 while (*s == ' ' || *s == '\t')
334 if (strnEQ(s,"getline",7))
342 yylval = string("==",2);
346 yylval = string("=",1);
352 yylval = string("!=",2);
356 yylval = string("!~",2);
365 yylval = string("<=",2);
374 yylval = string(">>",2);
378 yylval = string(">=",2);
386 while (isalpha(*s) || isdigit(*s) || *s == '_') \
406 for (d = s; isdigit(*s); s++) ;
407 yylval = string(d,s-d);
413 split_to_array = set_array_base = TRUE;
416 case '/': /* may either be division or pattern */
423 yylval = string("/=",2);
429 case '0': case '1': case '2': case '3': case '4':
430 case '5': case '6': case '7': case '8': case '9': case '.':
435 s = cpy2(tokenbuf,s,s[-1]);
437 fatal("String not terminated:\n%s",str_get(linestr));
439 yylval = string(tokenbuf,0);
445 set_array_base = TRUE;
446 if (strEQ(d,"ARGV")) {
447 yylval=numary(string("ARGV",0));
450 if (strEQ(d,"atan2")) {
457 if (strEQ(d,"break"))
459 if (strEQ(d,"BEGIN"))
464 if (strEQ(d,"continue"))
466 if (strEQ(d,"cos")) {
470 if (strEQ(d,"close")) {
475 if (strEQ(d,"chdir"))
477 else if (strEQ(d,"crypt"))
479 else if (strEQ(d,"chop"))
481 else if (strEQ(d,"chmod"))
483 else if (strEQ(d,"chown"))
490 if (strEQ(d,"delete"))
501 if (strEQ(d,"exit")) {
505 if (strEQ(d,"exp")) {
509 if (strEQ(d,"elsif"))
511 else if (strEQ(d,"eq"))
513 else if (strEQ(d,"eval"))
515 else if (strEQ(d,"eof"))
517 else if (strEQ(d,"each"))
519 else if (strEQ(d,"exec"))
526 if (saw_FS == 1 && in_begin) {
527 for (d = s; *d && isspace(*d); d++) ;
529 for (d++; *d && isspace(*d); d++) ;
530 if (*d == '"' && d[2] == '"')
538 else if (strEQ(d,"function"))
540 if (strEQ(d,"FILENAME"))
542 if (strEQ(d,"foreach"))
544 else if (strEQ(d,"format"))
546 else if (strEQ(d,"fork"))
548 else if (strEQ(d,"fh"))
553 if (strEQ(d,"getline"))
559 else if (strEQ(d,"gt"))
561 else if (strEQ(d,"goto"))
563 else if (strEQ(d,"gmtime"))
577 if (strEQ(d,"index")) {
578 set_array_base = TRUE;
581 if (strEQ(d,"int")) {
595 else if (strEQ(d,"kill"))
600 if (strEQ(d,"length")) {
604 if (strEQ(d,"log")) {
610 else if (strEQ(d,"local"))
612 else if (strEQ(d,"lt"))
614 else if (strEQ(d,"le"))
616 else if (strEQ(d,"locatime"))
618 else if (strEQ(d,"link"))
623 if (strEQ(d,"match")) {
624 set_array_base = TRUE;
633 do_chop = do_split = split_to_array = set_array_base = TRUE;
634 if (strEQ(d,"next")) {
643 if (strEQ(d,"ORS")) {
647 if (strEQ(d,"OFS")) {
651 if (strEQ(d,"OFMT")) {
656 else if (strEQ(d,"ord"))
658 else if (strEQ(d,"oct"))
663 if (strEQ(d,"print")) {
666 if (strEQ(d,"printf")) {
671 else if (strEQ(d,"pop"))
683 if (strEQ(d,"rand")) {
687 if (strEQ(d,"return"))
689 if (strEQ(d,"reset"))
691 else if (strEQ(d,"redo"))
693 else if (strEQ(d,"rename"))
698 if (strEQ(d,"split")) {
699 set_array_base = TRUE;
702 if (strEQ(d,"substr")) {
703 set_array_base = TRUE;
708 if (strEQ(d,"sprintf"))
710 if (strEQ(d,"sqrt")) {
714 if (strEQ(d,"SUBSEP")) {
717 if (strEQ(d,"sin")) {
721 if (strEQ(d,"srand")) {
725 if (strEQ(d,"system")) {
731 else if (strEQ(d,"shift"))
733 else if (strEQ(d,"select"))
735 else if (strEQ(d,"seek"))
737 else if (strEQ(d,"stat"))
739 else if (strEQ(d,"study"))
741 else if (strEQ(d,"sleep"))
743 else if (strEQ(d,"symlink"))
745 else if (strEQ(d,"sort"))
752 else if (strEQ(d,"tell"))
754 else if (strEQ(d,"time"))
756 else if (strEQ(d,"times"))
761 if (strEQ(d,"until"))
763 else if (strEQ(d,"unless"))
765 else if (strEQ(d,"umask"))
767 else if (strEQ(d,"unshift"))
769 else if (strEQ(d,"unlink"))
771 else if (strEQ(d,"utime"))
776 if (strEQ(d,"values"))
781 if (strEQ(d,"while"))
783 if (strEQ(d,"write"))
785 else if (strEQ(d,"wait"))
805 scanpat(register char *s)
813 fatal("Search pattern not found:\n%s",str_get(linestr));
817 for (; *s; s++,d++) {
821 else if (s[1] == '\\')
823 else if (s[1] == '[')
826 else if (*s == '[') {
829 if (*s == '\\' && s[1])
831 if (*s == '/' || (*s == '-' && s[1] == ']'))
834 } while (*s && *s != ']');
843 fatal("Search pattern not terminated:\n%s",str_get(linestr));
845 yylval = string(tokenbuf,0);
852 fprintf(stderr,"%s in file %s at line %d\n",
857 scannum(register char *s)
862 case '1': case '2': case '3': case '4': case '5':
863 case '6': case '7': case '8': case '9': case '0' : case '.':
865 while (isdigit(*s)) {
871 while (isdigit(*s)) {
878 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
880 if (*s == '+' || *s == '-')
886 yylval = string(tokenbuf,0);
893 string(char *ptr, int len)
897 ops[mop++].ival = OSTRING + (1<<8);
900 ops[mop].cval = (char *) safemalloc(len+1);
901 strncpy(ops[mop].cval,ptr,len);
902 ops[mop++].cval[len] = '\0';
904 fatal("Recompile a2p with larger OPSMAX\n");
914 fatal("type > 255 (%d)\n",type);
915 ops[mop++].ival = type;
917 fatal("Recompile a2p with larger OPSMAX\n");
922 oper1(int type, int arg1)
927 fatal("type > 255 (%d)\n",type);
928 ops[mop++].ival = type + (1<<8);
929 ops[mop++].ival = arg1;
931 fatal("Recompile a2p with larger OPSMAX\n");
936 oper2(int type, int arg1, int arg2)
941 fatal("type > 255 (%d)\n",type);
942 ops[mop++].ival = type + (2<<8);
943 ops[mop++].ival = arg1;
944 ops[mop++].ival = arg2;
946 fatal("Recompile a2p with larger OPSMAX\n");
951 oper3(int type, int arg1, int arg2, int arg3)
956 fatal("type > 255 (%d)\n",type);
957 ops[mop++].ival = type + (3<<8);
958 ops[mop++].ival = arg1;
959 ops[mop++].ival = arg2;
960 ops[mop++].ival = arg3;
962 fatal("Recompile a2p with larger OPSMAX\n");
967 oper4(int type, int arg1, int arg2, int arg3, int arg4)
972 fatal("type > 255 (%d)\n",type);
973 ops[mop++].ival = type + (4<<8);
974 ops[mop++].ival = arg1;
975 ops[mop++].ival = arg2;
976 ops[mop++].ival = arg3;
977 ops[mop++].ival = arg4;
979 fatal("Recompile a2p with larger OPSMAX\n");
984 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
989 fatal("type > 255 (%d)\n",type);
990 ops[mop++].ival = type + (5<<8);
991 ops[mop++].ival = arg1;
992 ops[mop++].ival = arg2;
993 ops[mop++].ival = arg3;
994 ops[mop++].ival = arg4;
995 ops[mop++].ival = arg5;
997 fatal("Recompile a2p with larger OPSMAX\n");
1010 type = ops[branch].ival;
1013 for (i=depth; i; i--)
1015 if (type == OSTRING) {
1016 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1019 printf("(%-5d%s %d\n",branch,opname[type],len);
1021 for (i=1; i<=len; i++)
1022 dump(ops[branch+i].ival);
1024 for (i=depth; i; i--)
1031 bl(int arg, int maybe)
1035 else if ((ops[arg].ival & 255) != OBLOCK)
1036 return oper2(OBLOCK,arg,maybe);
1037 else if ((ops[arg].ival >> 8) < 2)
1038 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1049 for (s = str->str_ptr; *s; s++) {
1050 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1054 else if (*s == '\n') {
1055 for (t = s+1; isspace(*t & 127); t++) ;
1057 while (isspace(*t & 127) && *t != '\n') t--;
1058 if (*t == '\n' && t-s > 1) {
1071 register char *d, *s, *t, *e;
1072 register int pos, newpos;
1076 for (s = str->str_ptr; *s; s++) {
1085 else if (*s == '\t')
1087 if (pos > 78) { /* split a long line? */
1090 for (t = tokenbuf; isspace(*t & 127); t++) {
1097 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1101 while (d > tokenbuf &&
1102 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1107 while (d > tokenbuf &&
1108 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1113 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1118 while (d > tokenbuf && *d != ' ')
1128 if (d[-1] != ';' && !(newpos % 4)) {
1134 newpos += strlen(t);
1149 for (t = tokenbuf; *t; t++) {
1153 strcpy(t+strlen(t)-1, "\t#???\n");
1159 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1161 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1164 fputs(tokenbuf,stdout);
1173 key = walk(0,0,arg,&dummy,P_MIN);
1175 hstore(symtab,key->str_ptr,str_make("1"));
1177 set_array_base = TRUE;
1182 rememberargs(int arg)
1189 type = ops[arg].ival & 255;
1190 if (type == OCOMMA) {
1191 rememberargs(ops[arg+1].ival);
1192 rememberargs(ops[arg+3].ival);
1194 else if (type == OVAR) {
1196 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1199 fatal("panic: unknown argument type %d, line %d\n",type,line);
1206 int type = ops[arg].ival & 255;
1209 if (type != OSTRING)
1210 fatal("panic: aryrefarg %d, line %d\n",type,line);
1211 str = hfetch(curarghash,ops[arg+1].cval);
1218 fixfargs(int name, int arg, int prevargs)
1226 type = ops[arg].ival & 255;
1227 if (type == OCOMMA) {
1228 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1229 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1231 else if (type == OVAR) {
1232 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1233 if (strEQ(str_get(str),"*")) {
1236 str_set(str,""); /* in case another routine has this */
1237 ops[arg].ival &= ~255;
1238 ops[arg].ival |= OSTAR;
1239 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1240 fprintf(stderr,"Adding %s\n",tmpbuf);
1243 hstore(curarghash,tmpbuf,str);
1245 numargs = prevargs + 1;
1248 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1249 type,prevargs+1,line);
1254 fixrargs(char *name, int arg, int prevargs)
1262 type = ops[arg].ival & 255;
1263 if (type == OCOMMA) {
1264 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1265 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1268 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1269 sprintf(tmpbuf,"%s:%d",name,prevargs);
1270 str = hfetch(curarghash,tmpbuf);
1272 if (str && strEQ(str->str_ptr,"*")) {
1273 if (type == OVAR || type == OSTAR) {
1274 ops[arg].ival &= ~255;
1275 ops[arg].ival |= OSTAR;
1278 fatal("Can't pass expression by reference as arg %d of %s\n",
1281 numargs = prevargs + 1;