1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
9 * Revision 4.1 92/08/07 18:29:14 lwall
11 * Revision 4.0.1.2 92/06/08 16:15:16 lwall
12 * patch20: in a2p, now warns about spurious backslashes
13 * patch20: in a2p, now allows [ to be backslashed in pattern
14 * patch20: in a2p, now allows numbers of the form 2.
16 * Revision 4.0.1.1 91/06/07 12:12:59 lwall
17 * patch4: new copyright notice
19 * Revision 4.0 91/03/20 01:57:26 lwall
25 #include "../patchlevel.h"
39 printf("\nThis is the AWK to PERL translator, version 4.0, patchlevel %d\n", PATCHLEVEL);
40 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
41 printf("\n -D<number> sets debugging flags."
42 "\n -F<character> the awk script to translate is always invoked with"
44 "\n -n<fieldlist> specifies the names of the input fields if input does"
45 "\n not have to be split into an array."
46 "\n -<number> causes a2p to assume that input will always have that"
62 linestr = str_new(80);
63 str = str_new(0); /* first used for -I flags */
64 for (argc--,argv++; argc; argc--,argv++) {
65 if (argv[0][0] != '-' || !argv[0][1])
71 debug = atoi(argv[0]+2);
73 yydebug = (debug & 1);
77 case '0': case '1': case '2': case '3': case '4':
78 case '5': case '6': case '7': case '8': case '9':
79 maxfld = atoi(argv[0]+1);
86 namelist = savestr(argv[0]+2);
94 fatal("Unrecognized switch: %s\n",argv[0]);
104 if (argv[0] == Nullch) {
106 if ( isatty(fileno(stdin)) )
111 filename = savestr(argv[0]);
113 filename = savestr(argv[0]);
114 if (strEQ(filename,"-"))
119 rsfp = fopen(argv[0],"r");
121 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
125 bufptr = str_get(linestr);
129 /* now parse the report spec */
132 fatal("Translation aborted due to syntax errors.\n");
142 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
144 printf("\t\"%s\"\n",ops[i].cval),i++;
147 printf("\t%d",ops[i].ival),i++;
157 /* first pass to look for numeric variables */
159 prewalk(0,0,root,&i);
161 /* second pass to produce new program */
163 tmpstr = walk(0,0,root,&i,P_MIN);
164 str = str_make("#!");
166 str_cat(str, "/perl\neval \"exec ");
168 str_cat(str, "/perl -S $0 $*\"\n\
169 if $running_under_some_shell;\n\
170 # this emulates #! processing on NIH machines.\n\
171 # (remove #! line above if indigestible)\n\n");
173 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_]+=)(.*)/ && shift;\n");
175 " # process any FOO=bar switches\n\n");
176 if (do_opens && opens) {
181 str_scat(str,tmpstr);
190 "Please check my work on the %d line%s I've marked with \"#???\".\n",
191 checkers, checkers == 1 ? "" : "s" );
193 "The operation I've selected may be wrong for the operand types.\n");
198 #define RETURN(retval) return (bufptr = s,retval)
199 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
200 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
201 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
207 register char *s = bufptr;
215 fprintf(stderr,"Tokener at %s",s);
217 fprintf(stderr,"Tokener at %s\n",s);
222 "Unrecognized character %c in file %s line %d--ignoring.\n",
227 if (*s && *s != '\n') {
228 yyerror("Ignoring spurious backslash");
233 s = str_get(linestr);
238 if ((s = str_gets(linestr, rsfp)) == Nullch) {
242 s = str_get(linestr);
253 yylval = string(s,0);
278 for (d = s + 1; isspace(*d); d++) ;
288 yylval = string("~",1);
306 yylval = string("**=",3);
308 yylval = string(s-1,2);
326 while (*s == ' ' || *s == '\t')
328 if (strnEQ(s,"getline",7))
336 yylval = string("==",2);
340 yylval = string("=",1);
346 yylval = string("!=",2);
350 yylval = string("!~",2);
359 yylval = string("<=",2);
368 yylval = string(">>",2);
372 yylval = string(">=",2);
380 while (isalpha(*s) || isdigit(*s) || *s == '_') \
400 for (d = s; isdigit(*s); s++) ;
401 yylval = string(d,s-d);
407 split_to_array = set_array_base = TRUE;
410 case '/': /* may either be division or pattern */
417 yylval = string("/=",2);
423 case '0': case '1': case '2': case '3': case '4':
424 case '5': case '6': case '7': case '8': case '9': case '.':
429 s = cpy2(tokenbuf,s,s[-1]);
431 fatal("String not terminated:\n%s",str_get(linestr));
433 yylval = string(tokenbuf,0);
439 set_array_base = TRUE;
440 if (strEQ(d,"ARGV")) {
441 yylval=numary(string("ARGV",0));
444 if (strEQ(d,"atan2")) {
451 if (strEQ(d,"break"))
453 if (strEQ(d,"BEGIN"))
458 if (strEQ(d,"continue"))
460 if (strEQ(d,"cos")) {
464 if (strEQ(d,"close")) {
469 if (strEQ(d,"chdir"))
471 else if (strEQ(d,"crypt"))
473 else if (strEQ(d,"chop"))
475 else if (strEQ(d,"chmod"))
477 else if (strEQ(d,"chown"))
484 if (strEQ(d,"delete"))
495 if (strEQ(d,"exit")) {
499 if (strEQ(d,"exp")) {
503 if (strEQ(d,"elsif"))
505 else if (strEQ(d,"eq"))
507 else if (strEQ(d,"eval"))
509 else if (strEQ(d,"eof"))
511 else if (strEQ(d,"each"))
513 else if (strEQ(d,"exec"))
520 if (saw_FS == 1 && in_begin) {
521 for (d = s; *d && isspace(*d); d++) ;
523 for (d++; *d && isspace(*d); d++) ;
524 if (*d == '"' && d[2] == '"')
532 else if (strEQ(d,"function"))
534 if (strEQ(d,"FILENAME"))
536 if (strEQ(d,"foreach"))
538 else if (strEQ(d,"format"))
540 else if (strEQ(d,"fork"))
542 else if (strEQ(d,"fh"))
547 if (strEQ(d,"getline"))
553 else if (strEQ(d,"gt"))
555 else if (strEQ(d,"goto"))
557 else if (strEQ(d,"gmtime"))
571 if (strEQ(d,"index")) {
572 set_array_base = TRUE;
575 if (strEQ(d,"int")) {
589 else if (strEQ(d,"kill"))
594 if (strEQ(d,"length")) {
598 if (strEQ(d,"log")) {
604 else if (strEQ(d,"local"))
606 else if (strEQ(d,"lt"))
608 else if (strEQ(d,"le"))
610 else if (strEQ(d,"locatime"))
612 else if (strEQ(d,"link"))
617 if (strEQ(d,"match")) {
618 set_array_base = TRUE;
627 do_chop = do_split = split_to_array = set_array_base = TRUE;
628 if (strEQ(d,"next")) {
637 if (strEQ(d,"ORS")) {
641 if (strEQ(d,"OFS")) {
645 if (strEQ(d,"OFMT")) {
650 else if (strEQ(d,"ord"))
652 else if (strEQ(d,"oct"))
657 if (strEQ(d,"print")) {
660 if (strEQ(d,"printf")) {
665 else if (strEQ(d,"pop"))
677 if (strEQ(d,"rand")) {
681 if (strEQ(d,"return"))
683 if (strEQ(d,"reset"))
685 else if (strEQ(d,"redo"))
687 else if (strEQ(d,"rename"))
692 if (strEQ(d,"split")) {
693 set_array_base = TRUE;
696 if (strEQ(d,"substr")) {
697 set_array_base = TRUE;
702 if (strEQ(d,"sprintf"))
704 if (strEQ(d,"sqrt")) {
708 if (strEQ(d,"SUBSEP")) {
711 if (strEQ(d,"sin")) {
715 if (strEQ(d,"srand")) {
719 if (strEQ(d,"system")) {
725 else if (strEQ(d,"shift"))
727 else if (strEQ(d,"select"))
729 else if (strEQ(d,"seek"))
731 else if (strEQ(d,"stat"))
733 else if (strEQ(d,"study"))
735 else if (strEQ(d,"sleep"))
737 else if (strEQ(d,"symlink"))
739 else if (strEQ(d,"sort"))
746 else if (strEQ(d,"tell"))
748 else if (strEQ(d,"time"))
750 else if (strEQ(d,"times"))
755 if (strEQ(d,"until"))
757 else if (strEQ(d,"unless"))
759 else if (strEQ(d,"umask"))
761 else if (strEQ(d,"unshift"))
763 else if (strEQ(d,"unlink"))
765 else if (strEQ(d,"utime"))
770 if (strEQ(d,"values"))
775 if (strEQ(d,"while"))
777 if (strEQ(d,"write"))
779 else if (strEQ(d,"wait"))
808 fatal("Search pattern not found:\n%s",str_get(linestr));
812 for (; *s; s++,d++) {
816 else if (s[1] == '\\')
818 else if (s[1] == '[')
821 else if (*s == '[') {
824 if (*s == '\\' && s[1])
826 if (*s == '/' || (*s == '-' && s[1] == ']'))
829 } while (*s && *s != ']');
838 fatal("Search pattern not terminated:\n%s",str_get(linestr));
840 yylval = string(tokenbuf,0);
847 fprintf(stderr,"%s in file %s at line %d\n",
858 case '1': case '2': case '3': case '4': case '5':
859 case '6': case '7': case '8': case '9': case '0' : case '.':
861 while (isdigit(*s)) {
867 while (isdigit(*s)) {
874 if (index("eE",*s) && index("+-0123456789",s[1])) {
876 if (*s == '+' || *s == '-')
882 yylval = string(tokenbuf,0);
893 ops[mop++].ival = OSTRING + (1<<8);
896 ops[mop].cval = safemalloc(len+1);
897 strncpy(ops[mop].cval,ptr,len);
898 ops[mop++].cval[len] = '\0';
900 fatal("Recompile a2p with larger OPSMAX\n");
910 fatal("type > 255 (%d)\n",type);
911 ops[mop++].ival = type;
913 fatal("Recompile a2p with larger OPSMAX\n");
924 fatal("type > 255 (%d)\n",type);
925 ops[mop++].ival = type + (1<<8);
926 ops[mop++].ival = arg1;
928 fatal("Recompile a2p with larger OPSMAX\n");
932 oper2(type,arg1,arg2)
940 fatal("type > 255 (%d)\n",type);
941 ops[mop++].ival = type + (2<<8);
942 ops[mop++].ival = arg1;
943 ops[mop++].ival = arg2;
945 fatal("Recompile a2p with larger OPSMAX\n");
949 oper3(type,arg1,arg2,arg3)
958 fatal("type > 255 (%d)\n",type);
959 ops[mop++].ival = type + (3<<8);
960 ops[mop++].ival = arg1;
961 ops[mop++].ival = arg2;
962 ops[mop++].ival = arg3;
964 fatal("Recompile a2p with larger OPSMAX\n");
968 oper4(type,arg1,arg2,arg3,arg4)
978 fatal("type > 255 (%d)\n",type);
979 ops[mop++].ival = type + (4<<8);
980 ops[mop++].ival = arg1;
981 ops[mop++].ival = arg2;
982 ops[mop++].ival = arg3;
983 ops[mop++].ival = arg4;
985 fatal("Recompile a2p with larger OPSMAX\n");
989 oper5(type,arg1,arg2,arg3,arg4,arg5)
1000 fatal("type > 255 (%d)\n",type);
1001 ops[mop++].ival = type + (5<<8);
1002 ops[mop++].ival = arg1;
1003 ops[mop++].ival = arg2;
1004 ops[mop++].ival = arg3;
1005 ops[mop++].ival = arg4;
1006 ops[mop++].ival = arg5;
1008 fatal("Recompile a2p with larger OPSMAX\n");
1021 type = ops[branch].ival;
1024 for (i=depth; i; i--)
1026 if (type == OSTRING) {
1027 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1030 printf("(%-5d%s %d\n",branch,opname[type],len);
1032 for (i=1; i<=len; i++)
1033 dump(ops[branch+i].ival);
1035 for (i=depth; i; i--)
1047 else if ((ops[arg].ival & 255) != OBLOCK)
1048 return oper2(OBLOCK,arg,maybe);
1049 else if ((ops[arg].ival >> 8) < 2)
1050 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1061 for (s = str->str_ptr; *s; s++) {
1062 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1066 else if (*s == '\n') {
1067 for (t = s+1; isspace(*t & 127); t++) ;
1069 while (isspace(*t & 127) && *t != '\n') t--;
1070 if (*t == '\n' && t-s > 1) {
1083 register char *d, *s, *t, *e;
1084 register int pos, newpos;
1088 for (s = str->str_ptr; *s; s++) {
1097 else if (*s == '\t')
1099 if (pos > 78) { /* split a long line? */
1102 for (t = tokenbuf; isspace(*t & 127); t++) {
1109 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1113 while (d > tokenbuf &&
1114 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1119 while (d > tokenbuf &&
1120 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1125 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1130 while (d > tokenbuf && *d != ' ')
1140 if (d[-1] != ';' && !(newpos % 4)) {
1146 newpos += strlen(t);
1160 for (t = tokenbuf; *t; t++) {
1164 strcpy(t+strlen(t)-1, "\t#???\n");
1170 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1172 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1175 fputs(tokenbuf,stdout);
1184 key = walk(0,0,arg,&dummy,P_MIN);
1186 hstore(symtab,key->str_ptr,str_make("1"));
1188 set_array_base = TRUE;
1200 type = ops[arg].ival & 255;
1201 if (type == OCOMMA) {
1202 rememberargs(ops[arg+1].ival);
1203 rememberargs(ops[arg+3].ival);
1205 else if (type == OVAR) {
1207 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1210 fatal("panic: unknown argument type %d, line %d\n",type,line);
1217 int type = ops[arg].ival & 255;
1220 if (type != OSTRING)
1221 fatal("panic: aryrefarg %d, line %d\n",type,line);
1222 str = hfetch(curarghash,ops[arg+1].cval);
1228 fixfargs(name,arg,prevargs)
1239 type = ops[arg].ival & 255;
1240 if (type == OCOMMA) {
1241 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1242 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1244 else if (type == OVAR) {
1245 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1246 if (strEQ(str_get(str),"*")) {
1249 str_set(str,""); /* in case another routine has this */
1250 ops[arg].ival &= ~255;
1251 ops[arg].ival |= OSTAR;
1252 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1253 fprintf(stderr,"Adding %s\n",tmpbuf);
1256 hstore(curarghash,tmpbuf,str);
1258 numargs = prevargs + 1;
1261 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1262 type,prevargs+1,line);
1266 fixrargs(name,arg,prevargs)
1277 type = ops[arg].ival & 255;
1278 if (type == OCOMMA) {
1279 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1280 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1285 sprintf(tmpbuf,"%s:%d",name,prevargs);
1286 str = hfetch(curarghash,tmpbuf);
1287 if (str && strEQ(str->str_ptr,"*")) {
1288 if (type == OVAR || type == OSTAR) {
1289 ops[arg].ival &= ~255;
1290 ops[arg].ival |= OSTAR;
1293 fatal("Can't pass expression by reference as arg %d of %s\n",
1296 numargs = prevargs + 1;