1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
25 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
26 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
27 printf("\n -D<number> sets debugging flags."
28 "\n -F<character> the awk script to translate is always invoked with"
30 "\n -n<fieldlist> specifies the names of the input fields if input does"
31 "\n not have to be split into an array."
32 "\n -<number> causes a2p to assume that input will always have that"
47 linestr = str_new(80);
48 str = str_new(0); /* first used for -I flags */
49 for (argc--,argv++; argc; argc--,argv++) {
50 if (argv[0][0] != '-' || !argv[0][1])
56 debug = atoi(argv[0]+2);
58 yydebug = (debug & 1);
62 case '0': case '1': case '2': case '3': case '4':
63 case '5': case '6': case '7': case '8': case '9':
64 maxfld = atoi(argv[0]+1);
71 namelist = savestr(argv[0]+2);
79 fatal("Unrecognized switch: %s\n",argv[0]);
89 if (argv[0] == Nullch) {
91 if ( isatty(fileno(stdin)) )
96 filename = savestr(argv[0]);
98 filename = savestr(argv[0]);
99 if (strEQ(filename,"-"))
104 rsfp = fopen(argv[0],"r");
106 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
110 bufptr = str_get(linestr);
114 /* now parse the report spec */
117 fatal("Translation aborted due to syntax errors.\n");
127 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
129 printf("\t\"%s\"\n",ops[i].cval),i++;
132 printf("\t%d",ops[i].ival),i++;
142 /* first pass to look for numeric variables */
144 prewalk(0,0,root,&i);
146 /* second pass to produce new program */
148 tmpstr = walk(0,0,root,&i,P_MIN);
149 str = str_make("#!");
151 str_cat(str, "/perl\neval \"exec ");
153 str_cat(str, "/perl -S $0 $*\"\n\
154 if $running_under_some_shell;\n\
155 # this emulates #! processing on NIH machines.\n\
156 # (remove #! line above if indigestible)\n\n");
158 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
160 " # process any FOO=bar switches\n\n");
161 if (do_opens && opens) {
166 str_scat(str,tmpstr);
175 "Please check my work on the %d line%s I've marked with \"#???\".\n",
176 checkers, checkers == 1 ? "" : "s" );
178 "The operation I've selected may be wrong for the operand types.\n");
183 #define RETURN(retval) return (bufptr = s,retval)
184 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
185 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
186 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
192 register char *s = bufptr;
200 fprintf(stderr,"Tokener at %s",s);
202 fprintf(stderr,"Tokener at %s\n",s);
207 "Unrecognized character %c in file %s line %d--ignoring.\n",
212 if (*s && *s != '\n') {
213 yyerror("Ignoring spurious backslash");
218 s = str_get(linestr);
223 if ((s = str_gets(linestr, rsfp)) == Nullch) {
227 s = str_get(linestr);
238 yylval = string(s,0);
263 for (d = s + 1; isspace(*d); d++) ;
273 yylval = string("~",1);
291 yylval = string("**=",3);
293 yylval = string(s-1,2);
311 while (*s == ' ' || *s == '\t')
313 if (strnEQ(s,"getline",7))
321 yylval = string("==",2);
325 yylval = string("=",1);
331 yylval = string("!=",2);
335 yylval = string("!~",2);
344 yylval = string("<=",2);
353 yylval = string(">>",2);
357 yylval = string(">=",2);
365 while (isalpha(*s) || isdigit(*s) || *s == '_') \
385 for (d = s; isdigit(*s); s++) ;
386 yylval = string(d,s-d);
392 split_to_array = set_array_base = TRUE;
395 case '/': /* may either be division or pattern */
402 yylval = string("/=",2);
408 case '0': case '1': case '2': case '3': case '4':
409 case '5': case '6': case '7': case '8': case '9': case '.':
414 s = cpy2(tokenbuf,s,s[-1]);
416 fatal("String not terminated:\n%s",str_get(linestr));
418 yylval = string(tokenbuf,0);
424 set_array_base = TRUE;
425 if (strEQ(d,"ARGV")) {
426 yylval=numary(string("ARGV",0));
429 if (strEQ(d,"atan2")) {
436 if (strEQ(d,"break"))
438 if (strEQ(d,"BEGIN"))
443 if (strEQ(d,"continue"))
445 if (strEQ(d,"cos")) {
449 if (strEQ(d,"close")) {
454 if (strEQ(d,"chdir"))
456 else if (strEQ(d,"crypt"))
458 else if (strEQ(d,"chop"))
460 else if (strEQ(d,"chmod"))
462 else if (strEQ(d,"chown"))
469 if (strEQ(d,"delete"))
480 if (strEQ(d,"exit")) {
484 if (strEQ(d,"exp")) {
488 if (strEQ(d,"elsif"))
490 else if (strEQ(d,"eq"))
492 else if (strEQ(d,"eval"))
494 else if (strEQ(d,"eof"))
496 else if (strEQ(d,"each"))
498 else if (strEQ(d,"exec"))
505 if (saw_FS == 1 && in_begin) {
506 for (d = s; *d && isspace(*d); d++) ;
508 for (d++; *d && isspace(*d); d++) ;
509 if (*d == '"' && d[2] == '"')
517 else if (strEQ(d,"function"))
519 if (strEQ(d,"FILENAME"))
521 if (strEQ(d,"foreach"))
523 else if (strEQ(d,"format"))
525 else if (strEQ(d,"fork"))
527 else if (strEQ(d,"fh"))
532 if (strEQ(d,"getline"))
538 else if (strEQ(d,"gt"))
540 else if (strEQ(d,"goto"))
542 else if (strEQ(d,"gmtime"))
556 if (strEQ(d,"index")) {
557 set_array_base = TRUE;
560 if (strEQ(d,"int")) {
574 else if (strEQ(d,"kill"))
579 if (strEQ(d,"length")) {
583 if (strEQ(d,"log")) {
589 else if (strEQ(d,"local"))
591 else if (strEQ(d,"lt"))
593 else if (strEQ(d,"le"))
595 else if (strEQ(d,"locatime"))
597 else if (strEQ(d,"link"))
602 if (strEQ(d,"match")) {
603 set_array_base = TRUE;
612 do_chop = do_split = split_to_array = set_array_base = TRUE;
613 if (strEQ(d,"next")) {
622 if (strEQ(d,"ORS")) {
626 if (strEQ(d,"OFS")) {
630 if (strEQ(d,"OFMT")) {
635 else if (strEQ(d,"ord"))
637 else if (strEQ(d,"oct"))
642 if (strEQ(d,"print")) {
645 if (strEQ(d,"printf")) {
650 else if (strEQ(d,"pop"))
662 if (strEQ(d,"rand")) {
666 if (strEQ(d,"return"))
668 if (strEQ(d,"reset"))
670 else if (strEQ(d,"redo"))
672 else if (strEQ(d,"rename"))
677 if (strEQ(d,"split")) {
678 set_array_base = TRUE;
681 if (strEQ(d,"substr")) {
682 set_array_base = TRUE;
687 if (strEQ(d,"sprintf"))
689 if (strEQ(d,"sqrt")) {
693 if (strEQ(d,"SUBSEP")) {
696 if (strEQ(d,"sin")) {
700 if (strEQ(d,"srand")) {
704 if (strEQ(d,"system")) {
710 else if (strEQ(d,"shift"))
712 else if (strEQ(d,"select"))
714 else if (strEQ(d,"seek"))
716 else if (strEQ(d,"stat"))
718 else if (strEQ(d,"study"))
720 else if (strEQ(d,"sleep"))
722 else if (strEQ(d,"symlink"))
724 else if (strEQ(d,"sort"))
731 else if (strEQ(d,"tell"))
733 else if (strEQ(d,"time"))
735 else if (strEQ(d,"times"))
740 if (strEQ(d,"until"))
742 else if (strEQ(d,"unless"))
744 else if (strEQ(d,"umask"))
746 else if (strEQ(d,"unshift"))
748 else if (strEQ(d,"unlink"))
750 else if (strEQ(d,"utime"))
755 if (strEQ(d,"values"))
760 if (strEQ(d,"while"))
762 if (strEQ(d,"write"))
764 else if (strEQ(d,"wait"))
793 fatal("Search pattern not found:\n%s",str_get(linestr));
797 for (; *s; s++,d++) {
801 else if (s[1] == '\\')
803 else if (s[1] == '[')
806 else if (*s == '[') {
809 if (*s == '\\' && s[1])
811 if (*s == '/' || (*s == '-' && s[1] == ']'))
814 } while (*s && *s != ']');
823 fatal("Search pattern not terminated:\n%s",str_get(linestr));
825 yylval = string(tokenbuf,0);
833 fprintf(stderr,"%s in file %s at line %d\n",
844 case '1': case '2': case '3': case '4': case '5':
845 case '6': case '7': case '8': case '9': case '0' : case '.':
847 while (isdigit(*s)) {
853 while (isdigit(*s)) {
860 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
862 if (*s == '+' || *s == '-')
868 yylval = string(tokenbuf,0);
879 ops[mop++].ival = OSTRING + (1<<8);
882 ops[mop].cval = safemalloc(len+1);
883 strncpy(ops[mop].cval,ptr,len);
884 ops[mop++].cval[len] = '\0';
886 fatal("Recompile a2p with larger OPSMAX\n");
896 fatal("type > 255 (%d)\n",type);
897 ops[mop++].ival = type;
899 fatal("Recompile a2p with larger OPSMAX\n");
910 fatal("type > 255 (%d)\n",type);
911 ops[mop++].ival = type + (1<<8);
912 ops[mop++].ival = arg1;
914 fatal("Recompile a2p with larger OPSMAX\n");
918 oper2(type,arg1,arg2)
926 fatal("type > 255 (%d)\n",type);
927 ops[mop++].ival = type + (2<<8);
928 ops[mop++].ival = arg1;
929 ops[mop++].ival = arg2;
931 fatal("Recompile a2p with larger OPSMAX\n");
935 oper3(type,arg1,arg2,arg3)
944 fatal("type > 255 (%d)\n",type);
945 ops[mop++].ival = type + (3<<8);
946 ops[mop++].ival = arg1;
947 ops[mop++].ival = arg2;
948 ops[mop++].ival = arg3;
950 fatal("Recompile a2p with larger OPSMAX\n");
954 oper4(type,arg1,arg2,arg3,arg4)
964 fatal("type > 255 (%d)\n",type);
965 ops[mop++].ival = type + (4<<8);
966 ops[mop++].ival = arg1;
967 ops[mop++].ival = arg2;
968 ops[mop++].ival = arg3;
969 ops[mop++].ival = arg4;
971 fatal("Recompile a2p with larger OPSMAX\n");
975 oper5(type,arg1,arg2,arg3,arg4,arg5)
986 fatal("type > 255 (%d)\n",type);
987 ops[mop++].ival = type + (5<<8);
988 ops[mop++].ival = arg1;
989 ops[mop++].ival = arg2;
990 ops[mop++].ival = arg3;
991 ops[mop++].ival = arg4;
992 ops[mop++].ival = arg5;
994 fatal("Recompile a2p with larger OPSMAX\n");
1008 type = ops[branch].ival;
1011 for (i=depth; i; i--)
1013 if (type == OSTRING) {
1014 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1017 printf("(%-5d%s %d\n",branch,opname[type],len);
1019 for (i=1; i<=len; i++)
1020 dump(ops[branch+i].ival);
1022 for (i=depth; i; i--)
1034 else if ((ops[arg].ival & 255) != OBLOCK)
1035 return oper2(OBLOCK,arg,maybe);
1036 else if ((ops[arg].ival >> 8) < 2)
1037 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1049 for (s = str->str_ptr; *s; s++) {
1050 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1054 else if (*s == '\n') {
1055 for (t = s+1; isspace(*t & 127); t++) ;
1057 while (isspace(*t & 127) && *t != '\n') t--;
1058 if (*t == '\n' && t-s > 1) {
1072 register char *d, *s, *t, *e;
1073 register int pos, newpos;
1077 for (s = str->str_ptr; *s; s++) {
1086 else if (*s == '\t')
1088 if (pos > 78) { /* split a long line? */
1091 for (t = tokenbuf; isspace(*t & 127); t++) {
1098 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1102 while (d > tokenbuf &&
1103 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1108 while (d > tokenbuf &&
1109 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1114 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1119 while (d > tokenbuf && *d != ' ')
1129 if (d[-1] != ';' && !(newpos % 4)) {
1135 newpos += strlen(t);
1150 for (t = tokenbuf; *t; t++) {
1154 strcpy(t+strlen(t)-1, "\t#???\n");
1160 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1162 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1165 fputs(tokenbuf,stdout);
1174 key = walk(0,0,arg,&dummy,P_MIN);
1176 hstore(symtab,key->str_ptr,str_make("1"));
1178 set_array_base = TRUE;
1190 type = ops[arg].ival & 255;
1191 if (type == OCOMMA) {
1192 rememberargs(ops[arg+1].ival);
1193 rememberargs(ops[arg+3].ival);
1195 else if (type == OVAR) {
1197 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1200 fatal("panic: unknown argument type %d, line %d\n",type,line);
1207 int type = ops[arg].ival & 255;
1210 if (type != OSTRING)
1211 fatal("panic: aryrefarg %d, line %d\n",type,line);
1212 str = hfetch(curarghash,ops[arg+1].cval);
1218 fixfargs(name,arg,prevargs)
1229 type = ops[arg].ival & 255;
1230 if (type == OCOMMA) {
1231 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1232 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1234 else if (type == OVAR) {
1235 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1236 if (strEQ(str_get(str),"*")) {
1239 str_set(str,""); /* in case another routine has this */
1240 ops[arg].ival &= ~255;
1241 ops[arg].ival |= OSTAR;
1242 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1243 fprintf(stderr,"Adding %s\n",tmpbuf);
1246 hstore(curarghash,tmpbuf,str);
1248 numargs = prevargs + 1;
1251 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1252 type,prevargs+1,line);
1256 fixrargs(name,arg,prevargs)
1267 type = ops[arg].ival & 255;
1268 if (type == OCOMMA) {
1269 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1270 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1275 sprintf(tmpbuf,"%s:%d",name,prevargs);
1276 str = hfetch(curarghash,tmpbuf);
1277 if (str && strEQ(str->str_ptr,"*")) {
1278 if (type == OVAR || type == OSTAR) {
1279 ops[arg].ival &= ~255;
1280 ops[arg].ival |= OSTAR;
1283 fatal("Can't pass expression by reference as arg %d of %s\n",
1286 numargs = prevargs + 1;