1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
32 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
33 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
34 printf("\n -D<number> sets debugging flags."
35 "\n -F<character> the awk script to translate is always invoked with"
37 "\n -n<fieldlist> specifies the names of the input fields if input does"
38 "\n not have to be split into an array."
39 "\n -<number> causes a2p to assume that input will always have that"
54 linestr = str_new(80);
55 str = str_new(0); /* first used for -I flags */
56 for (argc--,argv++; argc; argc--,argv++) {
57 if (argv[0][0] != '-' || !argv[0][1])
63 debug = atoi(argv[0]+2);
65 yydebug = (debug & 1);
69 case '0': case '1': case '2': case '3': case '4':
70 case '5': case '6': case '7': case '8': case '9':
71 maxfld = atoi(argv[0]+1);
78 namelist = savestr(argv[0]+2);
86 fatal("Unrecognized switch: %s\n",argv[0]);
96 if (argv[0] == Nullch) {
98 if ( isatty(fileno(stdin)) )
103 filename = savestr(argv[0]);
105 filename = savestr(argv[0]);
106 if (strEQ(filename,"-"))
111 rsfp = fopen(argv[0],"r");
113 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
117 bufptr = str_get(linestr);
121 /* now parse the report spec */
124 fatal("Translation aborted due to syntax errors.\n");
134 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
136 printf("\t\"%s\"\n",ops[i].cval),i++;
139 printf("\t%d",ops[i].ival),i++;
149 /* first pass to look for numeric variables */
151 prewalk(0,0,root,&i);
153 /* second pass to produce new program */
155 tmpstr = walk(0,0,root,&i,P_MIN);
156 str = str_make(STARTPERL);
157 str_cat(str, "\neval 'exec ");
159 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
160 if $running_under_some_shell;\n\
161 # this emulates #! processing on NIH machines.\n\
162 # (remove #! line above if indigestible)\n\n");
164 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
166 " # process any FOO=bar switches\n\n");
167 if (do_opens && opens) {
172 str_scat(str,tmpstr);
181 "Please check my work on the %d line%s I've marked with \"#???\".\n",
182 checkers, checkers == 1 ? "" : "s" );
184 "The operation I've selected may be wrong for the operand types.\n");
189 #define RETURN(retval) return (bufptr = s,retval)
190 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
191 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
192 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
199 register char *s = bufptr;
207 fprintf(stderr,"Tokener at %s",s);
209 fprintf(stderr,"Tokener at %s\n",s);
214 "Unrecognized character %c in file %s line %d--ignoring.\n",
219 if (*s && *s != '\n') {
220 yyerror("Ignoring spurious backslash");
225 s = str_get(linestr);
230 if ((s = str_gets(linestr, rsfp)) == Nullch) {
234 s = str_get(linestr);
245 yylval = string(s,0);
270 for (d = s + 1; isspace(*d); d++) ;
280 yylval = string("~",1);
298 yylval = string("**=",3);
300 yylval = string(s-1,2);
318 while (*s == ' ' || *s == '\t')
320 if (strnEQ(s,"getline",7))
328 yylval = string("==",2);
332 yylval = string("=",1);
338 yylval = string("!=",2);
342 yylval = string("!~",2);
351 yylval = string("<=",2);
360 yylval = string(">>",2);
364 yylval = string(">=",2);
372 while (isalpha(*s) || isdigit(*s) || *s == '_') \
392 for (d = s; isdigit(*s); s++) ;
393 yylval = string(d,s-d);
399 split_to_array = set_array_base = TRUE;
402 case '/': /* may either be division or pattern */
409 yylval = string("/=",2);
415 case '0': case '1': case '2': case '3': case '4':
416 case '5': case '6': case '7': case '8': case '9': case '.':
421 s = cpy2(tokenbuf,s,s[-1]);
423 fatal("String not terminated:\n%s",str_get(linestr));
425 yylval = string(tokenbuf,0);
431 set_array_base = TRUE;
432 if (strEQ(d,"ARGV")) {
433 yylval=numary(string("ARGV",0));
436 if (strEQ(d,"atan2")) {
443 if (strEQ(d,"break"))
445 if (strEQ(d,"BEGIN"))
450 if (strEQ(d,"continue"))
452 if (strEQ(d,"cos")) {
456 if (strEQ(d,"close")) {
461 if (strEQ(d,"chdir"))
463 else if (strEQ(d,"crypt"))
465 else if (strEQ(d,"chop"))
467 else if (strEQ(d,"chmod"))
469 else if (strEQ(d,"chown"))
476 if (strEQ(d,"delete"))
487 if (strEQ(d,"exit")) {
491 if (strEQ(d,"exp")) {
495 if (strEQ(d,"elsif"))
497 else if (strEQ(d,"eq"))
499 else if (strEQ(d,"eval"))
501 else if (strEQ(d,"eof"))
503 else if (strEQ(d,"each"))
505 else if (strEQ(d,"exec"))
512 if (saw_FS == 1 && in_begin) {
513 for (d = s; *d && isspace(*d); d++) ;
515 for (d++; *d && isspace(*d); d++) ;
516 if (*d == '"' && d[2] == '"')
524 else if (strEQ(d,"function"))
526 if (strEQ(d,"FILENAME"))
528 if (strEQ(d,"foreach"))
530 else if (strEQ(d,"format"))
532 else if (strEQ(d,"fork"))
534 else if (strEQ(d,"fh"))
539 if (strEQ(d,"getline"))
545 else if (strEQ(d,"gt"))
547 else if (strEQ(d,"goto"))
549 else if (strEQ(d,"gmtime"))
563 if (strEQ(d,"index")) {
564 set_array_base = TRUE;
567 if (strEQ(d,"int")) {
581 else if (strEQ(d,"kill"))
586 if (strEQ(d,"length")) {
590 if (strEQ(d,"log")) {
596 else if (strEQ(d,"local"))
598 else if (strEQ(d,"lt"))
600 else if (strEQ(d,"le"))
602 else if (strEQ(d,"locatime"))
604 else if (strEQ(d,"link"))
609 if (strEQ(d,"match")) {
610 set_array_base = TRUE;
619 do_chop = do_split = split_to_array = set_array_base = TRUE;
620 if (strEQ(d,"next")) {
629 if (strEQ(d,"ORS")) {
633 if (strEQ(d,"OFS")) {
637 if (strEQ(d,"OFMT")) {
642 else if (strEQ(d,"ord"))
644 else if (strEQ(d,"oct"))
649 if (strEQ(d,"print")) {
652 if (strEQ(d,"printf")) {
657 else if (strEQ(d,"pop"))
669 if (strEQ(d,"rand")) {
673 if (strEQ(d,"return"))
675 if (strEQ(d,"reset"))
677 else if (strEQ(d,"redo"))
679 else if (strEQ(d,"rename"))
684 if (strEQ(d,"split")) {
685 set_array_base = TRUE;
688 if (strEQ(d,"substr")) {
689 set_array_base = TRUE;
694 if (strEQ(d,"sprintf"))
696 if (strEQ(d,"sqrt")) {
700 if (strEQ(d,"SUBSEP")) {
703 if (strEQ(d,"sin")) {
707 if (strEQ(d,"srand")) {
711 if (strEQ(d,"system")) {
717 else if (strEQ(d,"shift"))
719 else if (strEQ(d,"select"))
721 else if (strEQ(d,"seek"))
723 else if (strEQ(d,"stat"))
725 else if (strEQ(d,"study"))
727 else if (strEQ(d,"sleep"))
729 else if (strEQ(d,"symlink"))
731 else if (strEQ(d,"sort"))
738 else if (strEQ(d,"tell"))
740 else if (strEQ(d,"time"))
742 else if (strEQ(d,"times"))
747 if (strEQ(d,"until"))
749 else if (strEQ(d,"unless"))
751 else if (strEQ(d,"umask"))
753 else if (strEQ(d,"unshift"))
755 else if (strEQ(d,"unlink"))
757 else if (strEQ(d,"utime"))
762 if (strEQ(d,"values"))
767 if (strEQ(d,"while"))
769 if (strEQ(d,"write"))
771 else if (strEQ(d,"wait"))
800 fatal("Search pattern not found:\n%s",str_get(linestr));
804 for (; *s; s++,d++) {
808 else if (s[1] == '\\')
810 else if (s[1] == '[')
813 else if (*s == '[') {
816 if (*s == '\\' && s[1])
818 if (*s == '/' || (*s == '-' && s[1] == ']'))
821 } while (*s && *s != ']');
830 fatal("Search pattern not terminated:\n%s",str_get(linestr));
832 yylval = string(tokenbuf,0);
840 fprintf(stderr,"%s in file %s at line %d\n",
851 case '1': case '2': case '3': case '4': case '5':
852 case '6': case '7': case '8': case '9': case '0' : case '.':
854 while (isdigit(*s)) {
860 while (isdigit(*s)) {
867 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
869 if (*s == '+' || *s == '-')
875 yylval = string(tokenbuf,0);
888 ops[mop++].ival = OSTRING + (1<<8);
891 ops[mop].cval = safemalloc(len+1);
892 strncpy(ops[mop].cval,ptr,len);
893 ops[mop++].cval[len] = '\0';
895 fatal("Recompile a2p with larger OPSMAX\n");
906 fatal("type > 255 (%d)\n",type);
907 ops[mop++].ival = type;
909 fatal("Recompile a2p with larger OPSMAX\n");
921 fatal("type > 255 (%d)\n",type);
922 ops[mop++].ival = type + (1<<8);
923 ops[mop++].ival = arg1;
925 fatal("Recompile a2p with larger OPSMAX\n");
930 oper2(type,arg1,arg2)
938 fatal("type > 255 (%d)\n",type);
939 ops[mop++].ival = type + (2<<8);
940 ops[mop++].ival = arg1;
941 ops[mop++].ival = arg2;
943 fatal("Recompile a2p with larger OPSMAX\n");
948 oper3(type,arg1,arg2,arg3)
957 fatal("type > 255 (%d)\n",type);
958 ops[mop++].ival = type + (3<<8);
959 ops[mop++].ival = arg1;
960 ops[mop++].ival = arg2;
961 ops[mop++].ival = arg3;
963 fatal("Recompile a2p with larger OPSMAX\n");
968 oper4(type,arg1,arg2,arg3,arg4)
978 fatal("type > 255 (%d)\n",type);
979 ops[mop++].ival = type + (4<<8);
980 ops[mop++].ival = arg1;
981 ops[mop++].ival = arg2;
982 ops[mop++].ival = arg3;
983 ops[mop++].ival = arg4;
985 fatal("Recompile a2p with larger OPSMAX\n");
990 oper5(type,arg1,arg2,arg3,arg4,arg5)
1001 fatal("type > 255 (%d)\n",type);
1002 ops[mop++].ival = type + (5<<8);
1003 ops[mop++].ival = arg1;
1004 ops[mop++].ival = arg2;
1005 ops[mop++].ival = arg3;
1006 ops[mop++].ival = arg4;
1007 ops[mop++].ival = arg5;
1009 fatal("Recompile a2p with larger OPSMAX\n");
1023 type = ops[branch].ival;
1026 for (i=depth; i; i--)
1028 if (type == OSTRING) {
1029 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1032 printf("(%-5d%s %d\n",branch,opname[type],len);
1034 for (i=1; i<=len; i++)
1035 dump(ops[branch+i].ival);
1037 for (i=depth; i; i--)
1050 else if ((ops[arg].ival & 255) != OBLOCK)
1051 return oper2(OBLOCK,arg,maybe);
1052 else if ((ops[arg].ival >> 8) < 2)
1053 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1065 for (s = str->str_ptr; *s; s++) {
1066 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1070 else if (*s == '\n') {
1071 for (t = s+1; isspace(*t & 127); t++) ;
1073 while (isspace(*t & 127) && *t != '\n') t--;
1074 if (*t == '\n' && t-s > 1) {
1088 register char *d, *s, *t, *e;
1089 register int pos, newpos;
1093 for (s = str->str_ptr; *s; s++) {
1102 else if (*s == '\t')
1104 if (pos > 78) { /* split a long line? */
1107 for (t = tokenbuf; isspace(*t & 127); t++) {
1114 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1118 while (d > tokenbuf &&
1119 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1124 while (d > tokenbuf &&
1125 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1130 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1135 while (d > tokenbuf && *d != ' ')
1145 if (d[-1] != ';' && !(newpos % 4)) {
1151 newpos += strlen(t);
1166 for (t = tokenbuf; *t; t++) {
1170 strcpy(t+strlen(t)-1, "\t#???\n");
1176 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1178 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1181 fputs(tokenbuf,stdout);
1191 key = walk(0,0,arg,&dummy,P_MIN);
1193 hstore(symtab,key->str_ptr,str_make("1"));
1195 set_array_base = TRUE;
1208 type = ops[arg].ival & 255;
1209 if (type == OCOMMA) {
1210 rememberargs(ops[arg+1].ival);
1211 rememberargs(ops[arg+3].ival);
1213 else if (type == OVAR) {
1215 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1218 fatal("panic: unknown argument type %d, line %d\n",type,line);
1226 int type = ops[arg].ival & 255;
1229 if (type != OSTRING)
1230 fatal("panic: aryrefarg %d, line %d\n",type,line);
1231 str = hfetch(curarghash,ops[arg+1].cval);
1238 fixfargs(name,arg,prevargs)
1249 type = ops[arg].ival & 255;
1250 if (type == OCOMMA) {
1251 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1252 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1254 else if (type == OVAR) {
1255 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1256 if (strEQ(str_get(str),"*")) {
1259 str_set(str,""); /* in case another routine has this */
1260 ops[arg].ival &= ~255;
1261 ops[arg].ival |= OSTAR;
1262 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1263 fprintf(stderr,"Adding %s\n",tmpbuf);
1266 hstore(curarghash,tmpbuf,str);
1268 numargs = prevargs + 1;
1271 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1272 type,prevargs+1,line);
1277 fixrargs(name,arg,prevargs)
1288 type = ops[arg].ival & 255;
1289 if (type == OCOMMA) {
1290 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1291 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1296 sprintf(tmpbuf,"%s:%d",name,prevargs);
1297 str = hfetch(curarghash,tmpbuf);
1298 if (str && strEQ(str->str_ptr,"*")) {
1299 if (type == OVAR || type == OSTAR) {
1300 ops[arg].ival &= ~255;
1301 ops[arg].ival |= OSTAR;
1304 fatal("Can't pass expression by reference as arg %d of %s\n",
1307 numargs = prevargs + 1;