1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
32 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
33 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
34 printf("\n -D<number> sets debugging flags."
35 "\n -F<character> the awk script to translate is always invoked with"
37 "\n -n<fieldlist> specifies the names of the input fields if input does"
38 "\n not have to be split into an array."
39 "\n -<number> causes a2p to assume that input will always have that"
54 linestr = str_new(80);
55 str = str_new(0); /* first used for -I flags */
56 for (argc--,argv++; argc; argc--,argv++) {
57 if (argv[0][0] != '-' || !argv[0][1])
63 debug = atoi(argv[0]+2);
65 yydebug = (debug & 1);
69 case '0': case '1': case '2': case '3': case '4':
70 case '5': case '6': case '7': case '8': case '9':
71 maxfld = atoi(argv[0]+1);
78 namelist = savestr(argv[0]+2);
86 fatal("Unrecognized switch: %s\n",argv[0]);
96 if (argv[0] == Nullch) {
98 if ( isatty(fileno(stdin)) )
103 filename = savestr(argv[0]);
105 filename = savestr(argv[0]);
106 if (strEQ(filename,"-"))
111 rsfp = fopen(argv[0],"r");
113 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
117 bufptr = str_get(linestr);
121 /* now parse the report spec */
124 fatal("Translation aborted due to syntax errors.\n");
134 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
136 printf("\t\"%s\"\n",ops[i].cval),i++;
139 printf("\t%d",ops[i].ival),i++;
149 /* first pass to look for numeric variables */
151 prewalk(0,0,root,&i);
153 /* second pass to produce new program */
155 tmpstr = walk(0,0,root,&i,P_MIN);
156 str = str_make("#!");
158 str_cat(str, "/perl\neval \"exec ");
160 str_cat(str, "/perl -S $0 $*\"\n\
161 if $running_under_some_shell;\n\
162 # this emulates #! processing on NIH machines.\n\
163 # (remove #! line above if indigestible)\n\n");
165 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
167 " # process any FOO=bar switches\n\n");
168 if (do_opens && opens) {
173 str_scat(str,tmpstr);
182 "Please check my work on the %d line%s I've marked with \"#???\".\n",
183 checkers, checkers == 1 ? "" : "s" );
185 "The operation I've selected may be wrong for the operand types.\n");
190 #define RETURN(retval) return (bufptr = s,retval)
191 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
192 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
193 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
200 register char *s = bufptr;
208 fprintf(stderr,"Tokener at %s",s);
210 fprintf(stderr,"Tokener at %s\n",s);
215 "Unrecognized character %c in file %s line %d--ignoring.\n",
220 if (*s && *s != '\n') {
221 yyerror("Ignoring spurious backslash");
226 s = str_get(linestr);
231 if ((s = str_gets(linestr, rsfp)) == Nullch) {
235 s = str_get(linestr);
246 yylval = string(s,0);
271 for (d = s + 1; isspace(*d); d++) ;
281 yylval = string("~",1);
299 yylval = string("**=",3);
301 yylval = string(s-1,2);
319 while (*s == ' ' || *s == '\t')
321 if (strnEQ(s,"getline",7))
329 yylval = string("==",2);
333 yylval = string("=",1);
339 yylval = string("!=",2);
343 yylval = string("!~",2);
352 yylval = string("<=",2);
361 yylval = string(">>",2);
365 yylval = string(">=",2);
373 while (isalpha(*s) || isdigit(*s) || *s == '_') \
393 for (d = s; isdigit(*s); s++) ;
394 yylval = string(d,s-d);
400 split_to_array = set_array_base = TRUE;
403 case '/': /* may either be division or pattern */
410 yylval = string("/=",2);
416 case '0': case '1': case '2': case '3': case '4':
417 case '5': case '6': case '7': case '8': case '9': case '.':
422 s = cpy2(tokenbuf,s,s[-1]);
424 fatal("String not terminated:\n%s",str_get(linestr));
426 yylval = string(tokenbuf,0);
432 set_array_base = TRUE;
433 if (strEQ(d,"ARGV")) {
434 yylval=numary(string("ARGV",0));
437 if (strEQ(d,"atan2")) {
444 if (strEQ(d,"break"))
446 if (strEQ(d,"BEGIN"))
451 if (strEQ(d,"continue"))
453 if (strEQ(d,"cos")) {
457 if (strEQ(d,"close")) {
462 if (strEQ(d,"chdir"))
464 else if (strEQ(d,"crypt"))
466 else if (strEQ(d,"chop"))
468 else if (strEQ(d,"chmod"))
470 else if (strEQ(d,"chown"))
477 if (strEQ(d,"delete"))
488 if (strEQ(d,"exit")) {
492 if (strEQ(d,"exp")) {
496 if (strEQ(d,"elsif"))
498 else if (strEQ(d,"eq"))
500 else if (strEQ(d,"eval"))
502 else if (strEQ(d,"eof"))
504 else if (strEQ(d,"each"))
506 else if (strEQ(d,"exec"))
513 if (saw_FS == 1 && in_begin) {
514 for (d = s; *d && isspace(*d); d++) ;
516 for (d++; *d && isspace(*d); d++) ;
517 if (*d == '"' && d[2] == '"')
525 else if (strEQ(d,"function"))
527 if (strEQ(d,"FILENAME"))
529 if (strEQ(d,"foreach"))
531 else if (strEQ(d,"format"))
533 else if (strEQ(d,"fork"))
535 else if (strEQ(d,"fh"))
540 if (strEQ(d,"getline"))
546 else if (strEQ(d,"gt"))
548 else if (strEQ(d,"goto"))
550 else if (strEQ(d,"gmtime"))
564 if (strEQ(d,"index")) {
565 set_array_base = TRUE;
568 if (strEQ(d,"int")) {
582 else if (strEQ(d,"kill"))
587 if (strEQ(d,"length")) {
591 if (strEQ(d,"log")) {
597 else if (strEQ(d,"local"))
599 else if (strEQ(d,"lt"))
601 else if (strEQ(d,"le"))
603 else if (strEQ(d,"locatime"))
605 else if (strEQ(d,"link"))
610 if (strEQ(d,"match")) {
611 set_array_base = TRUE;
620 do_chop = do_split = split_to_array = set_array_base = TRUE;
621 if (strEQ(d,"next")) {
630 if (strEQ(d,"ORS")) {
634 if (strEQ(d,"OFS")) {
638 if (strEQ(d,"OFMT")) {
643 else if (strEQ(d,"ord"))
645 else if (strEQ(d,"oct"))
650 if (strEQ(d,"print")) {
653 if (strEQ(d,"printf")) {
658 else if (strEQ(d,"pop"))
670 if (strEQ(d,"rand")) {
674 if (strEQ(d,"return"))
676 if (strEQ(d,"reset"))
678 else if (strEQ(d,"redo"))
680 else if (strEQ(d,"rename"))
685 if (strEQ(d,"split")) {
686 set_array_base = TRUE;
689 if (strEQ(d,"substr")) {
690 set_array_base = TRUE;
695 if (strEQ(d,"sprintf"))
697 if (strEQ(d,"sqrt")) {
701 if (strEQ(d,"SUBSEP")) {
704 if (strEQ(d,"sin")) {
708 if (strEQ(d,"srand")) {
712 if (strEQ(d,"system")) {
718 else if (strEQ(d,"shift"))
720 else if (strEQ(d,"select"))
722 else if (strEQ(d,"seek"))
724 else if (strEQ(d,"stat"))
726 else if (strEQ(d,"study"))
728 else if (strEQ(d,"sleep"))
730 else if (strEQ(d,"symlink"))
732 else if (strEQ(d,"sort"))
739 else if (strEQ(d,"tell"))
741 else if (strEQ(d,"time"))
743 else if (strEQ(d,"times"))
748 if (strEQ(d,"until"))
750 else if (strEQ(d,"unless"))
752 else if (strEQ(d,"umask"))
754 else if (strEQ(d,"unshift"))
756 else if (strEQ(d,"unlink"))
758 else if (strEQ(d,"utime"))
763 if (strEQ(d,"values"))
768 if (strEQ(d,"while"))
770 if (strEQ(d,"write"))
772 else if (strEQ(d,"wait"))
801 fatal("Search pattern not found:\n%s",str_get(linestr));
805 for (; *s; s++,d++) {
809 else if (s[1] == '\\')
811 else if (s[1] == '[')
814 else if (*s == '[') {
817 if (*s == '\\' && s[1])
819 if (*s == '/' || (*s == '-' && s[1] == ']'))
822 } while (*s && *s != ']');
831 fatal("Search pattern not terminated:\n%s",str_get(linestr));
833 yylval = string(tokenbuf,0);
841 fprintf(stderr,"%s in file %s at line %d\n",
852 case '1': case '2': case '3': case '4': case '5':
853 case '6': case '7': case '8': case '9': case '0' : case '.':
855 while (isdigit(*s)) {
861 while (isdigit(*s)) {
868 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
870 if (*s == '+' || *s == '-')
876 yylval = string(tokenbuf,0);
889 ops[mop++].ival = OSTRING + (1<<8);
892 ops[mop].cval = safemalloc(len+1);
893 strncpy(ops[mop].cval,ptr,len);
894 ops[mop++].cval[len] = '\0';
896 fatal("Recompile a2p with larger OPSMAX\n");
907 fatal("type > 255 (%d)\n",type);
908 ops[mop++].ival = type;
910 fatal("Recompile a2p with larger OPSMAX\n");
922 fatal("type > 255 (%d)\n",type);
923 ops[mop++].ival = type + (1<<8);
924 ops[mop++].ival = arg1;
926 fatal("Recompile a2p with larger OPSMAX\n");
931 oper2(type,arg1,arg2)
939 fatal("type > 255 (%d)\n",type);
940 ops[mop++].ival = type + (2<<8);
941 ops[mop++].ival = arg1;
942 ops[mop++].ival = arg2;
944 fatal("Recompile a2p with larger OPSMAX\n");
949 oper3(type,arg1,arg2,arg3)
958 fatal("type > 255 (%d)\n",type);
959 ops[mop++].ival = type + (3<<8);
960 ops[mop++].ival = arg1;
961 ops[mop++].ival = arg2;
962 ops[mop++].ival = arg3;
964 fatal("Recompile a2p with larger OPSMAX\n");
969 oper4(type,arg1,arg2,arg3,arg4)
979 fatal("type > 255 (%d)\n",type);
980 ops[mop++].ival = type + (4<<8);
981 ops[mop++].ival = arg1;
982 ops[mop++].ival = arg2;
983 ops[mop++].ival = arg3;
984 ops[mop++].ival = arg4;
986 fatal("Recompile a2p with larger OPSMAX\n");
991 oper5(type,arg1,arg2,arg3,arg4,arg5)
1002 fatal("type > 255 (%d)\n",type);
1003 ops[mop++].ival = type + (5<<8);
1004 ops[mop++].ival = arg1;
1005 ops[mop++].ival = arg2;
1006 ops[mop++].ival = arg3;
1007 ops[mop++].ival = arg4;
1008 ops[mop++].ival = arg5;
1010 fatal("Recompile a2p with larger OPSMAX\n");
1024 type = ops[branch].ival;
1027 for (i=depth; i; i--)
1029 if (type == OSTRING) {
1030 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1033 printf("(%-5d%s %d\n",branch,opname[type],len);
1035 for (i=1; i<=len; i++)
1036 dump(ops[branch+i].ival);
1038 for (i=depth; i; i--)
1051 else if ((ops[arg].ival & 255) != OBLOCK)
1052 return oper2(OBLOCK,arg,maybe);
1053 else if ((ops[arg].ival >> 8) < 2)
1054 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1066 for (s = str->str_ptr; *s; s++) {
1067 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1071 else if (*s == '\n') {
1072 for (t = s+1; isspace(*t & 127); t++) ;
1074 while (isspace(*t & 127) && *t != '\n') t--;
1075 if (*t == '\n' && t-s > 1) {
1089 register char *d, *s, *t, *e;
1090 register int pos, newpos;
1094 for (s = str->str_ptr; *s; s++) {
1103 else if (*s == '\t')
1105 if (pos > 78) { /* split a long line? */
1108 for (t = tokenbuf; isspace(*t & 127); t++) {
1115 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1119 while (d > tokenbuf &&
1120 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1125 while (d > tokenbuf &&
1126 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1131 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1136 while (d > tokenbuf && *d != ' ')
1146 if (d[-1] != ';' && !(newpos % 4)) {
1152 newpos += strlen(t);
1167 for (t = tokenbuf; *t; t++) {
1171 strcpy(t+strlen(t)-1, "\t#???\n");
1177 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1179 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1182 fputs(tokenbuf,stdout);
1192 key = walk(0,0,arg,&dummy,P_MIN);
1194 hstore(symtab,key->str_ptr,str_make("1"));
1196 set_array_base = TRUE;
1209 type = ops[arg].ival & 255;
1210 if (type == OCOMMA) {
1211 rememberargs(ops[arg+1].ival);
1212 rememberargs(ops[arg+3].ival);
1214 else if (type == OVAR) {
1216 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1219 fatal("panic: unknown argument type %d, line %d\n",type,line);
1227 int type = ops[arg].ival & 255;
1230 if (type != OSTRING)
1231 fatal("panic: aryrefarg %d, line %d\n",type,line);
1232 str = hfetch(curarghash,ops[arg+1].cval);
1239 fixfargs(name,arg,prevargs)
1250 type = ops[arg].ival & 255;
1251 if (type == OCOMMA) {
1252 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1253 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1255 else if (type == OVAR) {
1256 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1257 if (strEQ(str_get(str),"*")) {
1260 str_set(str,""); /* in case another routine has this */
1261 ops[arg].ival &= ~255;
1262 ops[arg].ival |= OSTAR;
1263 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1264 fprintf(stderr,"Adding %s\n",tmpbuf);
1267 hstore(curarghash,tmpbuf,str);
1269 numargs = prevargs + 1;
1272 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1273 type,prevargs+1,line);
1278 fixrargs(name,arg,prevargs)
1289 type = ops[arg].ival & 255;
1290 if (type == OCOMMA) {
1291 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1292 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1297 sprintf(tmpbuf,"%s:%d",name,prevargs);
1298 str = hfetch(curarghash,tmpbuf);
1299 if (str && strEQ(str->str_ptr,"*")) {
1300 if (type == OVAR || type == OSTAR) {
1301 ops[arg].ival &= ~255;
1302 ops[arg].ival |= OSTAR;
1305 fatal("Can't pass expression by reference as arg %d of %s\n",
1308 numargs = prevargs + 1;