1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991-1997, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 #if defined(OS2) || defined(WIN32)
15 #include "../patchlevel.h"
25 int oper1(int type, int arg1);
26 int oper2(int type, int arg1, int arg2);
27 int oper3(int type, int arg1, int arg2, int arg3);
28 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
29 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
30 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
32 #if defined(OS2) || defined(WIN32)
33 static void usage(void);
38 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
39 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
40 printf("\n -D<number> sets debugging flags."
41 "\n -F<character> the awk script to translate is always invoked with"
43 "\n -n<fieldlist> specifies the names of the input fields if input does"
44 "\n not have to be split into an array."
45 "\n -<number> causes a2p to assume that input will always have that"
52 main(register int argc, register char **argv, register char **env)
59 linestr = str_new(80);
60 str = str_new(0); /* first used for -I flags */
61 for (argc--,argv++; argc; argc--,argv++) {
62 if (argv[0][0] != '-' || !argv[0][1])
68 debug = atoi(argv[0]+2);
70 yydebug = (debug & 1);
74 case '0': case '1': case '2': case '3': case '4':
75 case '5': case '6': case '7': case '8': case '9':
76 maxfld = atoi(argv[0]+1);
83 namelist = savestr(argv[0]+2);
94 #if defined(OS2) || defined(WIN32)
95 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
98 fatal("Unrecognized switch: %s\n",argv[0]);
106 if (argv[0] == Nullch) {
107 #if defined(OS2) || defined(WIN32)
108 if ( isatty(fileno(stdin)) )
113 filename = savestr(argv[0]);
115 filename = savestr(argv[0]);
116 if (strEQ(filename,"-"))
121 rsfp = fopen(argv[0],"r");
123 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
127 bufptr = str_get(linestr);
131 /* now parse the report spec */
134 fatal("Translation aborted due to syntax errors.\n");
144 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
146 printf("\t\"%s\"\n",ops[i].cval),i++;
149 printf("\t%d",ops[i].ival),i++;
159 /* first pass to look for numeric variables */
161 prewalk(0,0,root,&i);
163 /* second pass to produce new program */
165 tmpstr = walk(0,0,root,&i,P_MIN);
166 str = str_make(STARTPERL);
167 str_cat(str, "\neval 'exec ");
169 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
170 if $running_under_some_shell;\n\
171 # this emulates #! processing on NIH machines.\n\
172 # (remove #! line above if indigestible)\n\n");
174 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
176 " # process any FOO=bar switches\n\n");
177 if (do_opens && opens) {
182 str_scat(str,tmpstr);
191 "Please check my work on the %d line%s I've marked with \"#???\".\n",
192 checkers, checkers == 1 ? "" : "s" );
194 "The operation I've selected may be wrong for the operand types.\n");
199 #define RETURN(retval) return (bufptr = s,retval)
200 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
201 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
202 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
209 register char *s = bufptr;
217 fprintf(stderr,"Tokener at %s",s);
219 fprintf(stderr,"Tokener at %s\n",s);
224 "Unrecognized character %c in file %s line %d--ignoring.\n",
229 if (*s && *s != '\n') {
230 yyerror("Ignoring spurious backslash");
235 s = str_get(linestr);
240 if ((s = str_gets(linestr, rsfp)) == Nullch) {
244 s = str_get(linestr);
255 yylval = string(s,0);
280 for (d = s + 1; isspace(*d); d++) ;
290 yylval = string("~",1);
308 yylval = string("**=",3);
310 yylval = string(s-1,2);
328 while (*s == ' ' || *s == '\t')
330 if (strnEQ(s,"getline",7))
338 yylval = string("==",2);
342 yylval = string("=",1);
348 yylval = string("!=",2);
352 yylval = string("!~",2);
361 yylval = string("<=",2);
370 yylval = string(">>",2);
374 yylval = string(">=",2);
382 while (isalpha(*s) || isdigit(*s) || *s == '_') \
402 for (d = s; isdigit(*s); s++) ;
403 yylval = string(d,s-d);
409 split_to_array = set_array_base = TRUE;
412 case '/': /* may either be division or pattern */
419 yylval = string("/=",2);
425 case '0': case '1': case '2': case '3': case '4':
426 case '5': case '6': case '7': case '8': case '9': case '.':
431 s = cpy2(tokenbuf,s,s[-1]);
433 fatal("String not terminated:\n%s",str_get(linestr));
435 yylval = string(tokenbuf,0);
441 set_array_base = TRUE;
442 if (strEQ(d,"ARGV")) {
443 yylval=numary(string("ARGV",0));
446 if (strEQ(d,"atan2")) {
453 if (strEQ(d,"break"))
455 if (strEQ(d,"BEGIN"))
460 if (strEQ(d,"continue"))
462 if (strEQ(d,"cos")) {
466 if (strEQ(d,"close")) {
471 if (strEQ(d,"chdir"))
473 else if (strEQ(d,"crypt"))
475 else if (strEQ(d,"chop"))
477 else if (strEQ(d,"chmod"))
479 else if (strEQ(d,"chown"))
486 if (strEQ(d,"delete"))
497 if (strEQ(d,"exit")) {
501 if (strEQ(d,"exp")) {
505 if (strEQ(d,"elsif"))
507 else if (strEQ(d,"eq"))
509 else if (strEQ(d,"eval"))
511 else if (strEQ(d,"eof"))
513 else if (strEQ(d,"each"))
515 else if (strEQ(d,"exec"))
522 if (saw_FS == 1 && in_begin) {
523 for (d = s; *d && isspace(*d); d++) ;
525 for (d++; *d && isspace(*d); d++) ;
526 if (*d == '"' && d[2] == '"')
534 else if (strEQ(d,"function"))
536 if (strEQ(d,"FILENAME"))
538 if (strEQ(d,"foreach"))
540 else if (strEQ(d,"format"))
542 else if (strEQ(d,"fork"))
544 else if (strEQ(d,"fh"))
549 if (strEQ(d,"getline"))
555 else if (strEQ(d,"gt"))
557 else if (strEQ(d,"goto"))
559 else if (strEQ(d,"gmtime"))
573 if (strEQ(d,"index")) {
574 set_array_base = TRUE;
577 if (strEQ(d,"int")) {
591 else if (strEQ(d,"kill"))
596 if (strEQ(d,"length")) {
600 if (strEQ(d,"log")) {
606 else if (strEQ(d,"local"))
608 else if (strEQ(d,"lt"))
610 else if (strEQ(d,"le"))
612 else if (strEQ(d,"locatime"))
614 else if (strEQ(d,"link"))
619 if (strEQ(d,"match")) {
620 set_array_base = TRUE;
629 do_chop = do_split = split_to_array = set_array_base = TRUE;
630 if (strEQ(d,"next")) {
639 if (strEQ(d,"ORS")) {
643 if (strEQ(d,"OFS")) {
647 if (strEQ(d,"OFMT")) {
652 else if (strEQ(d,"ord"))
654 else if (strEQ(d,"oct"))
659 if (strEQ(d,"print")) {
662 if (strEQ(d,"printf")) {
667 else if (strEQ(d,"pop"))
679 if (strEQ(d,"rand")) {
683 if (strEQ(d,"return"))
685 if (strEQ(d,"reset"))
687 else if (strEQ(d,"redo"))
689 else if (strEQ(d,"rename"))
694 if (strEQ(d,"split")) {
695 set_array_base = TRUE;
698 if (strEQ(d,"substr")) {
699 set_array_base = TRUE;
704 if (strEQ(d,"sprintf"))
706 if (strEQ(d,"sqrt")) {
710 if (strEQ(d,"SUBSEP")) {
713 if (strEQ(d,"sin")) {
717 if (strEQ(d,"srand")) {
721 if (strEQ(d,"system")) {
727 else if (strEQ(d,"shift"))
729 else if (strEQ(d,"select"))
731 else if (strEQ(d,"seek"))
733 else if (strEQ(d,"stat"))
735 else if (strEQ(d,"study"))
737 else if (strEQ(d,"sleep"))
739 else if (strEQ(d,"symlink"))
741 else if (strEQ(d,"sort"))
748 else if (strEQ(d,"tell"))
750 else if (strEQ(d,"time"))
752 else if (strEQ(d,"times"))
757 if (strEQ(d,"until"))
759 else if (strEQ(d,"unless"))
761 else if (strEQ(d,"umask"))
763 else if (strEQ(d,"unshift"))
765 else if (strEQ(d,"unlink"))
767 else if (strEQ(d,"utime"))
772 if (strEQ(d,"values"))
777 if (strEQ(d,"while"))
779 if (strEQ(d,"write"))
781 else if (strEQ(d,"wait"))
801 scanpat(register char *s)
809 fatal("Search pattern not found:\n%s",str_get(linestr));
813 for (; *s; s++,d++) {
817 else if (s[1] == '\\')
819 else if (s[1] == '[')
822 else if (*s == '[') {
825 if (*s == '\\' && s[1])
827 if (*s == '/' || (*s == '-' && s[1] == ']'))
830 } while (*s && *s != ']');
839 fatal("Search pattern not terminated:\n%s",str_get(linestr));
841 yylval = string(tokenbuf,0);
848 fprintf(stderr,"%s in file %s at line %d\n",
853 scannum(register char *s)
858 case '1': case '2': case '3': case '4': case '5':
859 case '6': case '7': case '8': case '9': case '0' : case '.':
861 while (isdigit(*s)) {
867 while (isdigit(*s)) {
874 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
876 if (*s == '+' || *s == '-')
882 yylval = string(tokenbuf,0);
889 string(char *ptr, int len)
893 ops[mop++].ival = OSTRING + (1<<8);
896 ops[mop].cval = (char *) safemalloc(len+1);
897 strncpy(ops[mop].cval,ptr,len);
898 ops[mop++].cval[len] = '\0';
900 fatal("Recompile a2p with larger OPSMAX\n");
910 fatal("type > 255 (%d)\n",type);
911 ops[mop++].ival = type;
913 fatal("Recompile a2p with larger OPSMAX\n");
918 oper1(int type, int arg1)
923 fatal("type > 255 (%d)\n",type);
924 ops[mop++].ival = type + (1<<8);
925 ops[mop++].ival = arg1;
927 fatal("Recompile a2p with larger OPSMAX\n");
932 oper2(int type, int arg1, int arg2)
937 fatal("type > 255 (%d)\n",type);
938 ops[mop++].ival = type + (2<<8);
939 ops[mop++].ival = arg1;
940 ops[mop++].ival = arg2;
942 fatal("Recompile a2p with larger OPSMAX\n");
947 oper3(int type, int arg1, int arg2, int arg3)
952 fatal("type > 255 (%d)\n",type);
953 ops[mop++].ival = type + (3<<8);
954 ops[mop++].ival = arg1;
955 ops[mop++].ival = arg2;
956 ops[mop++].ival = arg3;
958 fatal("Recompile a2p with larger OPSMAX\n");
963 oper4(int type, int arg1, int arg2, int arg3, int arg4)
968 fatal("type > 255 (%d)\n",type);
969 ops[mop++].ival = type + (4<<8);
970 ops[mop++].ival = arg1;
971 ops[mop++].ival = arg2;
972 ops[mop++].ival = arg3;
973 ops[mop++].ival = arg4;
975 fatal("Recompile a2p with larger OPSMAX\n");
980 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
985 fatal("type > 255 (%d)\n",type);
986 ops[mop++].ival = type + (5<<8);
987 ops[mop++].ival = arg1;
988 ops[mop++].ival = arg2;
989 ops[mop++].ival = arg3;
990 ops[mop++].ival = arg4;
991 ops[mop++].ival = arg5;
993 fatal("Recompile a2p with larger OPSMAX\n");
1006 type = ops[branch].ival;
1009 for (i=depth; i; i--)
1011 if (type == OSTRING) {
1012 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1015 printf("(%-5d%s %d\n",branch,opname[type],len);
1017 for (i=1; i<=len; i++)
1018 dump(ops[branch+i].ival);
1020 for (i=depth; i; i--)
1027 bl(int arg, int maybe)
1031 else if ((ops[arg].ival & 255) != OBLOCK)
1032 return oper2(OBLOCK,arg,maybe);
1033 else if ((ops[arg].ival >> 8) < 2)
1034 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1045 for (s = str->str_ptr; *s; s++) {
1046 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1050 else if (*s == '\n') {
1051 for (t = s+1; isspace(*t & 127); t++) ;
1053 while (isspace(*t & 127) && *t != '\n') t--;
1054 if (*t == '\n' && t-s > 1) {
1067 register char *d, *s, *t, *e;
1068 register int pos, newpos;
1072 for (s = str->str_ptr; *s; s++) {
1081 else if (*s == '\t')
1083 if (pos > 78) { /* split a long line? */
1086 for (t = tokenbuf; isspace(*t & 127); t++) {
1093 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1097 while (d > tokenbuf &&
1098 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1103 while (d > tokenbuf &&
1104 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1109 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1114 while (d > tokenbuf && *d != ' ')
1124 if (d[-1] != ';' && !(newpos % 4)) {
1130 newpos += strlen(t);
1145 for (t = tokenbuf; *t; t++) {
1149 strcpy(t+strlen(t)-1, "\t#???\n");
1155 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1157 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1160 fputs(tokenbuf,stdout);
1169 key = walk(0,0,arg,&dummy,P_MIN);
1171 hstore(symtab,key->str_ptr,str_make("1"));
1173 set_array_base = TRUE;
1178 rememberargs(int arg)
1185 type = ops[arg].ival & 255;
1186 if (type == OCOMMA) {
1187 rememberargs(ops[arg+1].ival);
1188 rememberargs(ops[arg+3].ival);
1190 else if (type == OVAR) {
1192 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1195 fatal("panic: unknown argument type %d, line %d\n",type,line);
1202 int type = ops[arg].ival & 255;
1205 if (type != OSTRING)
1206 fatal("panic: aryrefarg %d, line %d\n",type,line);
1207 str = hfetch(curarghash,ops[arg+1].cval);
1214 fixfargs(int name, int arg, int prevargs)
1222 type = ops[arg].ival & 255;
1223 if (type == OCOMMA) {
1224 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1225 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1227 else if (type == OVAR) {
1228 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1229 if (strEQ(str_get(str),"*")) {
1232 str_set(str,""); /* in case another routine has this */
1233 ops[arg].ival &= ~255;
1234 ops[arg].ival |= OSTAR;
1235 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1236 fprintf(stderr,"Adding %s\n",tmpbuf);
1239 hstore(curarghash,tmpbuf,str);
1241 numargs = prevargs + 1;
1244 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1245 type,prevargs+1,line);
1250 fixrargs(char *name, int arg, int prevargs)
1258 type = ops[arg].ival & 255;
1259 if (type == OCOMMA) {
1260 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1261 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1264 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1265 sprintf(tmpbuf,"%s:%d",name,prevargs);
1266 str = hfetch(curarghash,tmpbuf);
1268 if (str && strEQ(str->str_ptr,"*")) {
1269 if (type == OVAR || type == OSTAR) {
1270 ops[arg].ival &= ~255;
1271 ops[arg].ival |= OSTAR;
1274 fatal("Can't pass expression by reference as arg %d of %s\n",
1277 numargs = prevargs + 1;