1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
26 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
27 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
28 printf("\n -D<number> sets debugging flags."
29 "\n -F<character> the awk script to translate is always invoked with"
31 "\n -n<fieldlist> specifies the names of the input fields if input does"
32 "\n not have to be split into an array."
33 "\n -<number> causes a2p to assume that input will always have that"
48 linestr = str_new(80);
49 str = str_new(0); /* first used for -I flags */
50 for (argc--,argv++; argc; argc--,argv++) {
51 if (argv[0][0] != '-' || !argv[0][1])
57 debug = atoi(argv[0]+2);
59 yydebug = (debug & 1);
63 case '0': case '1': case '2': case '3': case '4':
64 case '5': case '6': case '7': case '8': case '9':
65 maxfld = atoi(argv[0]+1);
72 namelist = savestr(argv[0]+2);
80 fatal("Unrecognized switch: %s\n",argv[0]);
90 if (argv[0] == Nullch) {
92 if ( isatty(fileno(stdin)) )
97 filename = savestr(argv[0]);
99 filename = savestr(argv[0]);
100 if (strEQ(filename,"-"))
105 rsfp = fopen(argv[0],"r");
107 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
111 bufptr = str_get(linestr);
115 /* now parse the report spec */
118 fatal("Translation aborted due to syntax errors.\n");
128 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
130 printf("\t\"%s\"\n",ops[i].cval),i++;
133 printf("\t%d",ops[i].ival),i++;
143 /* first pass to look for numeric variables */
145 prewalk(0,0,root,&i);
147 /* second pass to produce new program */
149 tmpstr = walk(0,0,root,&i,P_MIN);
150 str = str_make("#!");
152 str_cat(str, "/perl\neval \"exec ");
154 str_cat(str, "/perl -S $0 $*\"\n\
155 if $running_under_some_shell;\n\
156 # this emulates #! processing on NIH machines.\n\
157 # (remove #! line above if indigestible)\n\n");
159 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
161 " # process any FOO=bar switches\n\n");
162 if (do_opens && opens) {
167 str_scat(str,tmpstr);
176 "Please check my work on the %d line%s I've marked with \"#???\".\n",
177 checkers, checkers == 1 ? "" : "s" );
179 "The operation I've selected may be wrong for the operand types.\n");
184 #define RETURN(retval) return (bufptr = s,retval)
185 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
186 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
187 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
193 register char *s = bufptr;
201 fprintf(stderr,"Tokener at %s",s);
203 fprintf(stderr,"Tokener at %s\n",s);
208 "Unrecognized character %c in file %s line %d--ignoring.\n",
213 if (*s && *s != '\n') {
214 yyerror("Ignoring spurious backslash");
219 s = str_get(linestr);
224 if ((s = str_gets(linestr, rsfp)) == Nullch) {
228 s = str_get(linestr);
239 yylval = string(s,0);
264 for (d = s + 1; isspace(*d); d++) ;
274 yylval = string("~",1);
292 yylval = string("**=",3);
294 yylval = string(s-1,2);
312 while (*s == ' ' || *s == '\t')
314 if (strnEQ(s,"getline",7))
322 yylval = string("==",2);
326 yylval = string("=",1);
332 yylval = string("!=",2);
336 yylval = string("!~",2);
345 yylval = string("<=",2);
354 yylval = string(">>",2);
358 yylval = string(">=",2);
366 while (isalpha(*s) || isdigit(*s) || *s == '_') \
386 for (d = s; isdigit(*s); s++) ;
387 yylval = string(d,s-d);
393 split_to_array = set_array_base = TRUE;
396 case '/': /* may either be division or pattern */
403 yylval = string("/=",2);
409 case '0': case '1': case '2': case '3': case '4':
410 case '5': case '6': case '7': case '8': case '9': case '.':
415 s = cpy2(tokenbuf,s,s[-1]);
417 fatal("String not terminated:\n%s",str_get(linestr));
419 yylval = string(tokenbuf,0);
425 set_array_base = TRUE;
426 if (strEQ(d,"ARGV")) {
427 yylval=numary(string("ARGV",0));
430 if (strEQ(d,"atan2")) {
437 if (strEQ(d,"break"))
439 if (strEQ(d,"BEGIN"))
444 if (strEQ(d,"continue"))
446 if (strEQ(d,"cos")) {
450 if (strEQ(d,"close")) {
455 if (strEQ(d,"chdir"))
457 else if (strEQ(d,"crypt"))
459 else if (strEQ(d,"chop"))
461 else if (strEQ(d,"chmod"))
463 else if (strEQ(d,"chown"))
470 if (strEQ(d,"delete"))
481 if (strEQ(d,"exit")) {
485 if (strEQ(d,"exp")) {
489 if (strEQ(d,"elsif"))
491 else if (strEQ(d,"eq"))
493 else if (strEQ(d,"eval"))
495 else if (strEQ(d,"eof"))
497 else if (strEQ(d,"each"))
499 else if (strEQ(d,"exec"))
506 if (saw_FS == 1 && in_begin) {
507 for (d = s; *d && isspace(*d); d++) ;
509 for (d++; *d && isspace(*d); d++) ;
510 if (*d == '"' && d[2] == '"')
518 else if (strEQ(d,"function"))
520 if (strEQ(d,"FILENAME"))
522 if (strEQ(d,"foreach"))
524 else if (strEQ(d,"format"))
526 else if (strEQ(d,"fork"))
528 else if (strEQ(d,"fh"))
533 if (strEQ(d,"getline"))
539 else if (strEQ(d,"gt"))
541 else if (strEQ(d,"goto"))
543 else if (strEQ(d,"gmtime"))
557 if (strEQ(d,"index")) {
558 set_array_base = TRUE;
561 if (strEQ(d,"int")) {
575 else if (strEQ(d,"kill"))
580 if (strEQ(d,"length")) {
584 if (strEQ(d,"log")) {
590 else if (strEQ(d,"local"))
592 else if (strEQ(d,"lt"))
594 else if (strEQ(d,"le"))
596 else if (strEQ(d,"locatime"))
598 else if (strEQ(d,"link"))
603 if (strEQ(d,"match")) {
604 set_array_base = TRUE;
613 do_chop = do_split = split_to_array = set_array_base = TRUE;
614 if (strEQ(d,"next")) {
623 if (strEQ(d,"ORS")) {
627 if (strEQ(d,"OFS")) {
631 if (strEQ(d,"OFMT")) {
636 else if (strEQ(d,"ord"))
638 else if (strEQ(d,"oct"))
643 if (strEQ(d,"print")) {
646 if (strEQ(d,"printf")) {
651 else if (strEQ(d,"pop"))
663 if (strEQ(d,"rand")) {
667 if (strEQ(d,"return"))
669 if (strEQ(d,"reset"))
671 else if (strEQ(d,"redo"))
673 else if (strEQ(d,"rename"))
678 if (strEQ(d,"split")) {
679 set_array_base = TRUE;
682 if (strEQ(d,"substr")) {
683 set_array_base = TRUE;
688 if (strEQ(d,"sprintf"))
690 if (strEQ(d,"sqrt")) {
694 if (strEQ(d,"SUBSEP")) {
697 if (strEQ(d,"sin")) {
701 if (strEQ(d,"srand")) {
705 if (strEQ(d,"system")) {
711 else if (strEQ(d,"shift"))
713 else if (strEQ(d,"select"))
715 else if (strEQ(d,"seek"))
717 else if (strEQ(d,"stat"))
719 else if (strEQ(d,"study"))
721 else if (strEQ(d,"sleep"))
723 else if (strEQ(d,"symlink"))
725 else if (strEQ(d,"sort"))
732 else if (strEQ(d,"tell"))
734 else if (strEQ(d,"time"))
736 else if (strEQ(d,"times"))
741 if (strEQ(d,"until"))
743 else if (strEQ(d,"unless"))
745 else if (strEQ(d,"umask"))
747 else if (strEQ(d,"unshift"))
749 else if (strEQ(d,"unlink"))
751 else if (strEQ(d,"utime"))
756 if (strEQ(d,"values"))
761 if (strEQ(d,"while"))
763 if (strEQ(d,"write"))
765 else if (strEQ(d,"wait"))
794 fatal("Search pattern not found:\n%s",str_get(linestr));
798 for (; *s; s++,d++) {
802 else if (s[1] == '\\')
804 else if (s[1] == '[')
807 else if (*s == '[') {
810 if (*s == '\\' && s[1])
812 if (*s == '/' || (*s == '-' && s[1] == ']'))
815 } while (*s && *s != ']');
824 fatal("Search pattern not terminated:\n%s",str_get(linestr));
826 yylval = string(tokenbuf,0);
834 fprintf(stderr,"%s in file %s at line %d\n",
845 case '1': case '2': case '3': case '4': case '5':
846 case '6': case '7': case '8': case '9': case '0' : case '.':
848 while (isdigit(*s)) {
854 while (isdigit(*s)) {
861 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
863 if (*s == '+' || *s == '-')
869 yylval = string(tokenbuf,0);
880 ops[mop++].ival = OSTRING + (1<<8);
883 ops[mop].cval = safemalloc(len+1);
884 strncpy(ops[mop].cval,ptr,len);
885 ops[mop++].cval[len] = '\0';
887 fatal("Recompile a2p with larger OPSMAX\n");
897 fatal("type > 255 (%d)\n",type);
898 ops[mop++].ival = type;
900 fatal("Recompile a2p with larger OPSMAX\n");
911 fatal("type > 255 (%d)\n",type);
912 ops[mop++].ival = type + (1<<8);
913 ops[mop++].ival = arg1;
915 fatal("Recompile a2p with larger OPSMAX\n");
919 oper2(type,arg1,arg2)
927 fatal("type > 255 (%d)\n",type);
928 ops[mop++].ival = type + (2<<8);
929 ops[mop++].ival = arg1;
930 ops[mop++].ival = arg2;
932 fatal("Recompile a2p with larger OPSMAX\n");
936 oper3(type,arg1,arg2,arg3)
945 fatal("type > 255 (%d)\n",type);
946 ops[mop++].ival = type + (3<<8);
947 ops[mop++].ival = arg1;
948 ops[mop++].ival = arg2;
949 ops[mop++].ival = arg3;
951 fatal("Recompile a2p with larger OPSMAX\n");
955 oper4(type,arg1,arg2,arg3,arg4)
965 fatal("type > 255 (%d)\n",type);
966 ops[mop++].ival = type + (4<<8);
967 ops[mop++].ival = arg1;
968 ops[mop++].ival = arg2;
969 ops[mop++].ival = arg3;
970 ops[mop++].ival = arg4;
972 fatal("Recompile a2p with larger OPSMAX\n");
976 oper5(type,arg1,arg2,arg3,arg4,arg5)
987 fatal("type > 255 (%d)\n",type);
988 ops[mop++].ival = type + (5<<8);
989 ops[mop++].ival = arg1;
990 ops[mop++].ival = arg2;
991 ops[mop++].ival = arg3;
992 ops[mop++].ival = arg4;
993 ops[mop++].ival = arg5;
995 fatal("Recompile a2p with larger OPSMAX\n");
1009 type = ops[branch].ival;
1012 for (i=depth; i; i--)
1014 if (type == OSTRING) {
1015 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1018 printf("(%-5d%s %d\n",branch,opname[type],len);
1020 for (i=1; i<=len; i++)
1021 dump(ops[branch+i].ival);
1023 for (i=depth; i; i--)
1035 else if ((ops[arg].ival & 255) != OBLOCK)
1036 return oper2(OBLOCK,arg,maybe);
1037 else if ((ops[arg].ival >> 8) < 2)
1038 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1050 for (s = str->str_ptr; *s; s++) {
1051 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1055 else if (*s == '\n') {
1056 for (t = s+1; isspace(*t & 127); t++) ;
1058 while (isspace(*t & 127) && *t != '\n') t--;
1059 if (*t == '\n' && t-s > 1) {
1073 register char *d, *s, *t, *e;
1074 register int pos, newpos;
1078 for (s = str->str_ptr; *s; s++) {
1087 else if (*s == '\t')
1089 if (pos > 78) { /* split a long line? */
1092 for (t = tokenbuf; isspace(*t & 127); t++) {
1099 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1103 while (d > tokenbuf &&
1104 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1109 while (d > tokenbuf &&
1110 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1115 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1120 while (d > tokenbuf && *d != ' ')
1130 if (d[-1] != ';' && !(newpos % 4)) {
1136 newpos += strlen(t);
1151 for (t = tokenbuf; *t; t++) {
1155 strcpy(t+strlen(t)-1, "\t#???\n");
1161 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1163 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1166 fputs(tokenbuf,stdout);
1175 key = walk(0,0,arg,&dummy,P_MIN);
1177 hstore(symtab,key->str_ptr,str_make("1"));
1179 set_array_base = TRUE;
1191 type = ops[arg].ival & 255;
1192 if (type == OCOMMA) {
1193 rememberargs(ops[arg+1].ival);
1194 rememberargs(ops[arg+3].ival);
1196 else if (type == OVAR) {
1198 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1201 fatal("panic: unknown argument type %d, line %d\n",type,line);
1208 int type = ops[arg].ival & 255;
1211 if (type != OSTRING)
1212 fatal("panic: aryrefarg %d, line %d\n",type,line);
1213 str = hfetch(curarghash,ops[arg+1].cval);
1219 fixfargs(name,arg,prevargs)
1230 type = ops[arg].ival & 255;
1231 if (type == OCOMMA) {
1232 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1233 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1235 else if (type == OVAR) {
1236 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1237 if (strEQ(str_get(str),"*")) {
1240 str_set(str,""); /* in case another routine has this */
1241 ops[arg].ival &= ~255;
1242 ops[arg].ival |= OSTAR;
1243 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1244 fprintf(stderr,"Adding %s\n",tmpbuf);
1247 hstore(curarghash,tmpbuf,str);
1249 numargs = prevargs + 1;
1252 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1253 type,prevargs+1,line);
1257 fixrargs(name,arg,prevargs)
1268 type = ops[arg].ival & 255;
1269 if (type == OCOMMA) {
1270 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1271 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1276 sprintf(tmpbuf,"%s:%d",name,prevargs);
1277 str = hfetch(curarghash,tmpbuf);
1278 if (str && strEQ(str->str_ptr,"*")) {
1279 if (type == OVAR || type == OSTAR) {
1280 ops[arg].ival &= ~255;
1281 ops[arg].ival |= OSTAR;
1284 fatal("Can't pass expression by reference as arg %d of %s\n",
1287 numargs = prevargs + 1;