1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
26 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
27 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
28 printf("\n -D<number> sets debugging flags."
29 "\n -F<character> the awk script to translate is always invoked with"
31 "\n -n<fieldlist> specifies the names of the input fields if input does"
32 "\n not have to be split into an array."
33 "\n -<number> causes a2p to assume that input will always have that"
49 linestr = str_new(80);
50 str = str_new(0); /* first used for -I flags */
51 for (argc--,argv++; argc; argc--,argv++) {
52 if (argv[0][0] != '-' || !argv[0][1])
58 debug = atoi(argv[0]+2);
60 yydebug = (debug & 1);
64 case '0': case '1': case '2': case '3': case '4':
65 case '5': case '6': case '7': case '8': case '9':
66 maxfld = atoi(argv[0]+1);
73 namelist = savestr(argv[0]+2);
81 fatal("Unrecognized switch: %s\n",argv[0]);
91 if (argv[0] == Nullch) {
93 if ( isatty(fileno(stdin)) )
98 filename = savestr(argv[0]);
100 filename = savestr(argv[0]);
101 if (strEQ(filename,"-"))
106 rsfp = fopen(argv[0],"r");
108 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
112 bufptr = str_get(linestr);
116 /* now parse the report spec */
119 fatal("Translation aborted due to syntax errors.\n");
129 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
131 printf("\t\"%s\"\n",ops[i].cval),i++;
134 printf("\t%d",ops[i].ival),i++;
144 /* first pass to look for numeric variables */
146 prewalk(0,0,root,&i);
148 /* second pass to produce new program */
150 tmpstr = walk(0,0,root,&i,P_MIN);
151 str = str_make("#!");
153 str_cat(str, "/perl\neval \"exec ");
155 str_cat(str, "/perl -S $0 $*\"\n\
156 if $running_under_some_shell;\n\
157 # this emulates #! processing on NIH machines.\n\
158 # (remove #! line above if indigestible)\n\n");
160 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
162 " # process any FOO=bar switches\n\n");
163 if (do_opens && opens) {
168 str_scat(str,tmpstr);
177 "Please check my work on the %d line%s I've marked with \"#???\".\n",
178 checkers, checkers == 1 ? "" : "s" );
180 "The operation I've selected may be wrong for the operand types.\n");
185 #define RETURN(retval) return (bufptr = s,retval)
186 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
187 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
188 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
194 register char *s = bufptr;
202 fprintf(stderr,"Tokener at %s",s);
204 fprintf(stderr,"Tokener at %s\n",s);
209 "Unrecognized character %c in file %s line %d--ignoring.\n",
214 if (*s && *s != '\n') {
215 yyerror("Ignoring spurious backslash");
220 s = str_get(linestr);
225 if ((s = str_gets(linestr, rsfp)) == Nullch) {
229 s = str_get(linestr);
240 yylval = string(s,0);
265 for (d = s + 1; isspace(*d); d++) ;
275 yylval = string("~",1);
293 yylval = string("**=",3);
295 yylval = string(s-1,2);
313 while (*s == ' ' || *s == '\t')
315 if (strnEQ(s,"getline",7))
323 yylval = string("==",2);
327 yylval = string("=",1);
333 yylval = string("!=",2);
337 yylval = string("!~",2);
346 yylval = string("<=",2);
355 yylval = string(">>",2);
359 yylval = string(">=",2);
367 while (isalpha(*s) || isdigit(*s) || *s == '_') \
387 for (d = s; isdigit(*s); s++) ;
388 yylval = string(d,s-d);
394 split_to_array = set_array_base = TRUE;
397 case '/': /* may either be division or pattern */
404 yylval = string("/=",2);
410 case '0': case '1': case '2': case '3': case '4':
411 case '5': case '6': case '7': case '8': case '9': case '.':
416 s = cpy2(tokenbuf,s,s[-1]);
418 fatal("String not terminated:\n%s",str_get(linestr));
420 yylval = string(tokenbuf,0);
426 set_array_base = TRUE;
427 if (strEQ(d,"ARGV")) {
428 yylval=numary(string("ARGV",0));
431 if (strEQ(d,"atan2")) {
438 if (strEQ(d,"break"))
440 if (strEQ(d,"BEGIN"))
445 if (strEQ(d,"continue"))
447 if (strEQ(d,"cos")) {
451 if (strEQ(d,"close")) {
456 if (strEQ(d,"chdir"))
458 else if (strEQ(d,"crypt"))
460 else if (strEQ(d,"chop"))
462 else if (strEQ(d,"chmod"))
464 else if (strEQ(d,"chown"))
471 if (strEQ(d,"delete"))
482 if (strEQ(d,"exit")) {
486 if (strEQ(d,"exp")) {
490 if (strEQ(d,"elsif"))
492 else if (strEQ(d,"eq"))
494 else if (strEQ(d,"eval"))
496 else if (strEQ(d,"eof"))
498 else if (strEQ(d,"each"))
500 else if (strEQ(d,"exec"))
507 if (saw_FS == 1 && in_begin) {
508 for (d = s; *d && isspace(*d); d++) ;
510 for (d++; *d && isspace(*d); d++) ;
511 if (*d == '"' && d[2] == '"')
519 else if (strEQ(d,"function"))
521 if (strEQ(d,"FILENAME"))
523 if (strEQ(d,"foreach"))
525 else if (strEQ(d,"format"))
527 else if (strEQ(d,"fork"))
529 else if (strEQ(d,"fh"))
534 if (strEQ(d,"getline"))
540 else if (strEQ(d,"gt"))
542 else if (strEQ(d,"goto"))
544 else if (strEQ(d,"gmtime"))
558 if (strEQ(d,"index")) {
559 set_array_base = TRUE;
562 if (strEQ(d,"int")) {
576 else if (strEQ(d,"kill"))
581 if (strEQ(d,"length")) {
585 if (strEQ(d,"log")) {
591 else if (strEQ(d,"local"))
593 else if (strEQ(d,"lt"))
595 else if (strEQ(d,"le"))
597 else if (strEQ(d,"locatime"))
599 else if (strEQ(d,"link"))
604 if (strEQ(d,"match")) {
605 set_array_base = TRUE;
614 do_chop = do_split = split_to_array = set_array_base = TRUE;
615 if (strEQ(d,"next")) {
624 if (strEQ(d,"ORS")) {
628 if (strEQ(d,"OFS")) {
632 if (strEQ(d,"OFMT")) {
637 else if (strEQ(d,"ord"))
639 else if (strEQ(d,"oct"))
644 if (strEQ(d,"print")) {
647 if (strEQ(d,"printf")) {
652 else if (strEQ(d,"pop"))
664 if (strEQ(d,"rand")) {
668 if (strEQ(d,"return"))
670 if (strEQ(d,"reset"))
672 else if (strEQ(d,"redo"))
674 else if (strEQ(d,"rename"))
679 if (strEQ(d,"split")) {
680 set_array_base = TRUE;
683 if (strEQ(d,"substr")) {
684 set_array_base = TRUE;
689 if (strEQ(d,"sprintf"))
691 if (strEQ(d,"sqrt")) {
695 if (strEQ(d,"SUBSEP")) {
698 if (strEQ(d,"sin")) {
702 if (strEQ(d,"srand")) {
706 if (strEQ(d,"system")) {
712 else if (strEQ(d,"shift"))
714 else if (strEQ(d,"select"))
716 else if (strEQ(d,"seek"))
718 else if (strEQ(d,"stat"))
720 else if (strEQ(d,"study"))
722 else if (strEQ(d,"sleep"))
724 else if (strEQ(d,"symlink"))
726 else if (strEQ(d,"sort"))
733 else if (strEQ(d,"tell"))
735 else if (strEQ(d,"time"))
737 else if (strEQ(d,"times"))
742 if (strEQ(d,"until"))
744 else if (strEQ(d,"unless"))
746 else if (strEQ(d,"umask"))
748 else if (strEQ(d,"unshift"))
750 else if (strEQ(d,"unlink"))
752 else if (strEQ(d,"utime"))
757 if (strEQ(d,"values"))
762 if (strEQ(d,"while"))
764 if (strEQ(d,"write"))
766 else if (strEQ(d,"wait"))
795 fatal("Search pattern not found:\n%s",str_get(linestr));
799 for (; *s; s++,d++) {
803 else if (s[1] == '\\')
805 else if (s[1] == '[')
808 else if (*s == '[') {
811 if (*s == '\\' && s[1])
813 if (*s == '/' || (*s == '-' && s[1] == ']'))
816 } while (*s && *s != ']');
825 fatal("Search pattern not terminated:\n%s",str_get(linestr));
827 yylval = string(tokenbuf,0);
834 fprintf(stderr,"%s in file %s at line %d\n",
845 case '1': case '2': case '3': case '4': case '5':
846 case '6': case '7': case '8': case '9': case '0' : case '.':
848 while (isdigit(*s)) {
854 while (isdigit(*s)) {
861 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
863 if (*s == '+' || *s == '-')
869 yylval = string(tokenbuf,0);
880 ops[mop++].ival = OSTRING + (1<<8);
883 ops[mop].cval = safemalloc(len+1);
884 strncpy(ops[mop].cval,ptr,len);
885 ops[mop++].cval[len] = '\0';
887 fatal("Recompile a2p with larger OPSMAX\n");
897 fatal("type > 255 (%d)\n",type);
898 ops[mop++].ival = type;
900 fatal("Recompile a2p with larger OPSMAX\n");
911 fatal("type > 255 (%d)\n",type);
912 ops[mop++].ival = type + (1<<8);
913 ops[mop++].ival = arg1;
915 fatal("Recompile a2p with larger OPSMAX\n");
919 oper2(type,arg1,arg2)
927 fatal("type > 255 (%d)\n",type);
928 ops[mop++].ival = type + (2<<8);
929 ops[mop++].ival = arg1;
930 ops[mop++].ival = arg2;
932 fatal("Recompile a2p with larger OPSMAX\n");
936 oper3(type,arg1,arg2,arg3)
945 fatal("type > 255 (%d)\n",type);
946 ops[mop++].ival = type + (3<<8);
947 ops[mop++].ival = arg1;
948 ops[mop++].ival = arg2;
949 ops[mop++].ival = arg3;
951 fatal("Recompile a2p with larger OPSMAX\n");
955 oper4(type,arg1,arg2,arg3,arg4)
965 fatal("type > 255 (%d)\n",type);
966 ops[mop++].ival = type + (4<<8);
967 ops[mop++].ival = arg1;
968 ops[mop++].ival = arg2;
969 ops[mop++].ival = arg3;
970 ops[mop++].ival = arg4;
972 fatal("Recompile a2p with larger OPSMAX\n");
976 oper5(type,arg1,arg2,arg3,arg4,arg5)
987 fatal("type > 255 (%d)\n",type);
988 ops[mop++].ival = type + (5<<8);
989 ops[mop++].ival = arg1;
990 ops[mop++].ival = arg2;
991 ops[mop++].ival = arg3;
992 ops[mop++].ival = arg4;
993 ops[mop++].ival = arg5;
995 fatal("Recompile a2p with larger OPSMAX\n");
1008 type = ops[branch].ival;
1011 for (i=depth; i; i--)
1013 if (type == OSTRING) {
1014 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1017 printf("(%-5d%s %d\n",branch,opname[type],len);
1019 for (i=1; i<=len; i++)
1020 dump(ops[branch+i].ival);
1022 for (i=depth; i; i--)
1034 else if ((ops[arg].ival & 255) != OBLOCK)
1035 return oper2(OBLOCK,arg,maybe);
1036 else if ((ops[arg].ival >> 8) < 2)
1037 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1048 for (s = str->str_ptr; *s; s++) {
1049 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1053 else if (*s == '\n') {
1054 for (t = s+1; isspace(*t & 127); t++) ;
1056 while (isspace(*t & 127) && *t != '\n') t--;
1057 if (*t == '\n' && t-s > 1) {
1070 register char *d, *s, *t, *e;
1071 register int pos, newpos;
1075 for (s = str->str_ptr; *s; s++) {
1084 else if (*s == '\t')
1086 if (pos > 78) { /* split a long line? */
1089 for (t = tokenbuf; isspace(*t & 127); t++) {
1096 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1100 while (d > tokenbuf &&
1101 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1106 while (d > tokenbuf &&
1107 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1112 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1117 while (d > tokenbuf && *d != ' ')
1127 if (d[-1] != ';' && !(newpos % 4)) {
1133 newpos += strlen(t);
1147 for (t = tokenbuf; *t; t++) {
1151 strcpy(t+strlen(t)-1, "\t#???\n");
1157 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1159 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1162 fputs(tokenbuf,stdout);
1171 key = walk(0,0,arg,&dummy,P_MIN);
1173 hstore(symtab,key->str_ptr,str_make("1"));
1175 set_array_base = TRUE;
1187 type = ops[arg].ival & 255;
1188 if (type == OCOMMA) {
1189 rememberargs(ops[arg+1].ival);
1190 rememberargs(ops[arg+3].ival);
1192 else if (type == OVAR) {
1194 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1197 fatal("panic: unknown argument type %d, line %d\n",type,line);
1204 int type = ops[arg].ival & 255;
1207 if (type != OSTRING)
1208 fatal("panic: aryrefarg %d, line %d\n",type,line);
1209 str = hfetch(curarghash,ops[arg+1].cval);
1215 fixfargs(name,arg,prevargs)
1226 type = ops[arg].ival & 255;
1227 if (type == OCOMMA) {
1228 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1229 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1231 else if (type == OVAR) {
1232 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1233 if (strEQ(str_get(str),"*")) {
1236 str_set(str,""); /* in case another routine has this */
1237 ops[arg].ival &= ~255;
1238 ops[arg].ival |= OSTAR;
1239 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1240 fprintf(stderr,"Adding %s\n",tmpbuf);
1243 hstore(curarghash,tmpbuf,str);
1245 numargs = prevargs + 1;
1248 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1249 type,prevargs+1,line);
1253 fixrargs(name,arg,prevargs)
1264 type = ops[arg].ival & 255;
1265 if (type == OCOMMA) {
1266 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1267 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1272 sprintf(tmpbuf,"%s:%d",name,prevargs);
1273 str = hfetch(curarghash,tmpbuf);
1274 if (str && strEQ(str->str_ptr,"*")) {
1275 if (type == OVAR || type == OSTAR) {
1276 ops[arg].ival &= ~255;
1277 ops[arg].ival |= OSTAR;
1280 fatal("Can't pass expression by reference as arg %d of %s\n",
1283 numargs = prevargs + 1;