1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
12 #include "../patchlevel.h"
32 printf("\nThis is the AWK to PERL translator, version 5.0, patchlevel %d\n", PATCHLEVEL);
33 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
34 printf("\n -D<number> sets debugging flags."
35 "\n -F<character> the awk script to translate is always invoked with"
37 "\n -n<fieldlist> specifies the names of the input fields if input does"
38 "\n not have to be split into an array."
39 "\n -<number> causes a2p to assume that input will always have that"
54 linestr = str_new(80);
55 str = str_new(0); /* first used for -I flags */
56 for (argc--,argv++; argc; argc--,argv++) {
57 if (argv[0][0] != '-' || !argv[0][1])
63 debug = atoi(argv[0]+2);
65 yydebug = (debug & 1);
69 case '0': case '1': case '2': case '3': case '4':
70 case '5': case '6': case '7': case '8': case '9':
71 maxfld = atoi(argv[0]+1);
78 namelist = savestr(argv[0]+2);
86 fatal("Unrecognized switch: %s\n",argv[0]);
96 if (argv[0] == Nullch) {
98 if ( isatty(fileno(stdin)) )
103 filename = savestr(argv[0]);
105 filename = savestr(argv[0]);
106 if (strEQ(filename,"-"))
111 rsfp = fopen(argv[0],"r");
113 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
117 bufptr = str_get(linestr);
121 /* now parse the report spec */
124 fatal("Translation aborted due to syntax errors.\n");
134 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
136 printf("\t\"%s\"\n",ops[i].cval),i++;
139 printf("\t%d",ops[i].ival),i++;
149 /* first pass to look for numeric variables */
151 prewalk(0,0,root,&i);
153 /* second pass to produce new program */
155 tmpstr = walk(0,0,root,&i,P_MIN);
156 str = str_make(STARTPERL);
157 str_cat(str, "\neval 'exec perl -S $0 \"$@\"'\n\
158 if $running_under_some_shell;\n\
159 # this emulates #! processing on NIH machines.\n\
160 # (remove #! line above if indigestible)\n\n");
162 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
164 " # process any FOO=bar switches\n\n");
165 if (do_opens && opens) {
170 str_scat(str,tmpstr);
179 "Please check my work on the %d line%s I've marked with \"#???\".\n",
180 checkers, checkers == 1 ? "" : "s" );
182 "The operation I've selected may be wrong for the operand types.\n");
187 #define RETURN(retval) return (bufptr = s,retval)
188 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
189 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
190 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
197 register char *s = bufptr;
205 fprintf(stderr,"Tokener at %s",s);
207 fprintf(stderr,"Tokener at %s\n",s);
212 "Unrecognized character %c in file %s line %d--ignoring.\n",
217 if (*s && *s != '\n') {
218 yyerror("Ignoring spurious backslash");
223 s = str_get(linestr);
228 if ((s = str_gets(linestr, rsfp)) == Nullch) {
232 s = str_get(linestr);
243 yylval = string(s,0);
268 for (d = s + 1; isspace(*d); d++) ;
278 yylval = string("~",1);
296 yylval = string("**=",3);
298 yylval = string(s-1,2);
316 while (*s == ' ' || *s == '\t')
318 if (strnEQ(s,"getline",7))
326 yylval = string("==",2);
330 yylval = string("=",1);
336 yylval = string("!=",2);
340 yylval = string("!~",2);
349 yylval = string("<=",2);
358 yylval = string(">>",2);
362 yylval = string(">=",2);
370 while (isalpha(*s) || isdigit(*s) || *s == '_') \
390 for (d = s; isdigit(*s); s++) ;
391 yylval = string(d,s-d);
397 split_to_array = set_array_base = TRUE;
400 case '/': /* may either be division or pattern */
407 yylval = string("/=",2);
413 case '0': case '1': case '2': case '3': case '4':
414 case '5': case '6': case '7': case '8': case '9': case '.':
419 s = cpy2(tokenbuf,s,s[-1]);
421 fatal("String not terminated:\n%s",str_get(linestr));
423 yylval = string(tokenbuf,0);
429 set_array_base = TRUE;
430 if (strEQ(d,"ARGV")) {
431 yylval=numary(string("ARGV",0));
434 if (strEQ(d,"atan2")) {
441 if (strEQ(d,"break"))
443 if (strEQ(d,"BEGIN"))
448 if (strEQ(d,"continue"))
450 if (strEQ(d,"cos")) {
454 if (strEQ(d,"close")) {
459 if (strEQ(d,"chdir"))
461 else if (strEQ(d,"crypt"))
463 else if (strEQ(d,"chop"))
465 else if (strEQ(d,"chmod"))
467 else if (strEQ(d,"chown"))
474 if (strEQ(d,"delete"))
485 if (strEQ(d,"exit")) {
489 if (strEQ(d,"exp")) {
493 if (strEQ(d,"elsif"))
495 else if (strEQ(d,"eq"))
497 else if (strEQ(d,"eval"))
499 else if (strEQ(d,"eof"))
501 else if (strEQ(d,"each"))
503 else if (strEQ(d,"exec"))
510 if (saw_FS == 1 && in_begin) {
511 for (d = s; *d && isspace(*d); d++) ;
513 for (d++; *d && isspace(*d); d++) ;
514 if (*d == '"' && d[2] == '"')
522 else if (strEQ(d,"function"))
524 if (strEQ(d,"FILENAME"))
526 if (strEQ(d,"foreach"))
528 else if (strEQ(d,"format"))
530 else if (strEQ(d,"fork"))
532 else if (strEQ(d,"fh"))
537 if (strEQ(d,"getline"))
543 else if (strEQ(d,"gt"))
545 else if (strEQ(d,"goto"))
547 else if (strEQ(d,"gmtime"))
561 if (strEQ(d,"index")) {
562 set_array_base = TRUE;
565 if (strEQ(d,"int")) {
579 else if (strEQ(d,"kill"))
584 if (strEQ(d,"length")) {
588 if (strEQ(d,"log")) {
594 else if (strEQ(d,"local"))
596 else if (strEQ(d,"lt"))
598 else if (strEQ(d,"le"))
600 else if (strEQ(d,"locatime"))
602 else if (strEQ(d,"link"))
607 if (strEQ(d,"match")) {
608 set_array_base = TRUE;
617 do_chop = do_split = split_to_array = set_array_base = TRUE;
618 if (strEQ(d,"next")) {
627 if (strEQ(d,"ORS")) {
631 if (strEQ(d,"OFS")) {
635 if (strEQ(d,"OFMT")) {
640 else if (strEQ(d,"ord"))
642 else if (strEQ(d,"oct"))
647 if (strEQ(d,"print")) {
650 if (strEQ(d,"printf")) {
655 else if (strEQ(d,"pop"))
667 if (strEQ(d,"rand")) {
671 if (strEQ(d,"return"))
673 if (strEQ(d,"reset"))
675 else if (strEQ(d,"redo"))
677 else if (strEQ(d,"rename"))
682 if (strEQ(d,"split")) {
683 set_array_base = TRUE;
686 if (strEQ(d,"substr")) {
687 set_array_base = TRUE;
692 if (strEQ(d,"sprintf"))
694 if (strEQ(d,"sqrt")) {
698 if (strEQ(d,"SUBSEP")) {
701 if (strEQ(d,"sin")) {
705 if (strEQ(d,"srand")) {
709 if (strEQ(d,"system")) {
715 else if (strEQ(d,"shift"))
717 else if (strEQ(d,"select"))
719 else if (strEQ(d,"seek"))
721 else if (strEQ(d,"stat"))
723 else if (strEQ(d,"study"))
725 else if (strEQ(d,"sleep"))
727 else if (strEQ(d,"symlink"))
729 else if (strEQ(d,"sort"))
736 else if (strEQ(d,"tell"))
738 else if (strEQ(d,"time"))
740 else if (strEQ(d,"times"))
745 if (strEQ(d,"until"))
747 else if (strEQ(d,"unless"))
749 else if (strEQ(d,"umask"))
751 else if (strEQ(d,"unshift"))
753 else if (strEQ(d,"unlink"))
755 else if (strEQ(d,"utime"))
760 if (strEQ(d,"values"))
765 if (strEQ(d,"while"))
767 if (strEQ(d,"write"))
769 else if (strEQ(d,"wait"))
798 fatal("Search pattern not found:\n%s",str_get(linestr));
802 for (; *s; s++,d++) {
806 else if (s[1] == '\\')
808 else if (s[1] == '[')
811 else if (*s == '[') {
814 if (*s == '\\' && s[1])
816 if (*s == '/' || (*s == '-' && s[1] == ']'))
819 } while (*s && *s != ']');
828 fatal("Search pattern not terminated:\n%s",str_get(linestr));
830 yylval = string(tokenbuf,0);
838 fprintf(stderr,"%s in file %s at line %d\n",
849 case '1': case '2': case '3': case '4': case '5':
850 case '6': case '7': case '8': case '9': case '0' : case '.':
852 while (isdigit(*s)) {
858 while (isdigit(*s)) {
865 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
867 if (*s == '+' || *s == '-')
873 yylval = string(tokenbuf,0);
886 ops[mop++].ival = OSTRING + (1<<8);
889 ops[mop].cval = safemalloc(len+1);
890 strncpy(ops[mop].cval,ptr,len);
891 ops[mop++].cval[len] = '\0';
893 fatal("Recompile a2p with larger OPSMAX\n");
904 fatal("type > 255 (%d)\n",type);
905 ops[mop++].ival = type;
907 fatal("Recompile a2p with larger OPSMAX\n");
919 fatal("type > 255 (%d)\n",type);
920 ops[mop++].ival = type + (1<<8);
921 ops[mop++].ival = arg1;
923 fatal("Recompile a2p with larger OPSMAX\n");
928 oper2(type,arg1,arg2)
936 fatal("type > 255 (%d)\n",type);
937 ops[mop++].ival = type + (2<<8);
938 ops[mop++].ival = arg1;
939 ops[mop++].ival = arg2;
941 fatal("Recompile a2p with larger OPSMAX\n");
946 oper3(type,arg1,arg2,arg3)
955 fatal("type > 255 (%d)\n",type);
956 ops[mop++].ival = type + (3<<8);
957 ops[mop++].ival = arg1;
958 ops[mop++].ival = arg2;
959 ops[mop++].ival = arg3;
961 fatal("Recompile a2p with larger OPSMAX\n");
966 oper4(type,arg1,arg2,arg3,arg4)
976 fatal("type > 255 (%d)\n",type);
977 ops[mop++].ival = type + (4<<8);
978 ops[mop++].ival = arg1;
979 ops[mop++].ival = arg2;
980 ops[mop++].ival = arg3;
981 ops[mop++].ival = arg4;
983 fatal("Recompile a2p with larger OPSMAX\n");
988 oper5(type,arg1,arg2,arg3,arg4,arg5)
999 fatal("type > 255 (%d)\n",type);
1000 ops[mop++].ival = type + (5<<8);
1001 ops[mop++].ival = arg1;
1002 ops[mop++].ival = arg2;
1003 ops[mop++].ival = arg3;
1004 ops[mop++].ival = arg4;
1005 ops[mop++].ival = arg5;
1007 fatal("Recompile a2p with larger OPSMAX\n");
1021 type = ops[branch].ival;
1024 for (i=depth; i; i--)
1026 if (type == OSTRING) {
1027 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1030 printf("(%-5d%s %d\n",branch,opname[type],len);
1032 for (i=1; i<=len; i++)
1033 dump(ops[branch+i].ival);
1035 for (i=depth; i; i--)
1048 else if ((ops[arg].ival & 255) != OBLOCK)
1049 return oper2(OBLOCK,arg,maybe);
1050 else if ((ops[arg].ival >> 8) < 2)
1051 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1063 for (s = str->str_ptr; *s; s++) {
1064 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1068 else if (*s == '\n') {
1069 for (t = s+1; isspace(*t & 127); t++) ;
1071 while (isspace(*t & 127) && *t != '\n') t--;
1072 if (*t == '\n' && t-s > 1) {
1086 register char *d, *s, *t, *e;
1087 register int pos, newpos;
1091 for (s = str->str_ptr; *s; s++) {
1100 else if (*s == '\t')
1102 if (pos > 78) { /* split a long line? */
1105 for (t = tokenbuf; isspace(*t & 127); t++) {
1112 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1116 while (d > tokenbuf &&
1117 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1122 while (d > tokenbuf &&
1123 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1128 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1133 while (d > tokenbuf && *d != ' ')
1143 if (d[-1] != ';' && !(newpos % 4)) {
1149 newpos += strlen(t);
1164 for (t = tokenbuf; *t; t++) {
1168 strcpy(t+strlen(t)-1, "\t#???\n");
1174 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1176 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1179 fputs(tokenbuf,stdout);
1189 key = walk(0,0,arg,&dummy,P_MIN);
1191 hstore(symtab,key->str_ptr,str_make("1"));
1193 set_array_base = TRUE;
1206 type = ops[arg].ival & 255;
1207 if (type == OCOMMA) {
1208 rememberargs(ops[arg+1].ival);
1209 rememberargs(ops[arg+3].ival);
1211 else if (type == OVAR) {
1213 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1216 fatal("panic: unknown argument type %d, line %d\n",type,line);
1224 int type = ops[arg].ival & 255;
1227 if (type != OSTRING)
1228 fatal("panic: aryrefarg %d, line %d\n",type,line);
1229 str = hfetch(curarghash,ops[arg+1].cval);
1236 fixfargs(name,arg,prevargs)
1247 type = ops[arg].ival & 255;
1248 if (type == OCOMMA) {
1249 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1250 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1252 else if (type == OVAR) {
1253 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1254 if (strEQ(str_get(str),"*")) {
1257 str_set(str,""); /* in case another routine has this */
1258 ops[arg].ival &= ~255;
1259 ops[arg].ival |= OSTAR;
1260 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1261 fprintf(stderr,"Adding %s\n",tmpbuf);
1264 hstore(curarghash,tmpbuf,str);
1266 numargs = prevargs + 1;
1269 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1270 type,prevargs+1,line);
1275 fixrargs(name,arg,prevargs)
1286 type = ops[arg].ival & 255;
1287 if (type == OCOMMA) {
1288 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1289 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1294 sprintf(tmpbuf,"%s:%d",name,prevargs);
1295 str = hfetch(curarghash,tmpbuf);
1296 if (str && strEQ(str->str_ptr,"*")) {
1297 if (type == OVAR || type == OSTAR) {
1298 ops[arg].ival &= ~255;
1299 ops[arg].ival |= OSTAR;
1302 fatal("Can't pass expression by reference as arg %d of %s\n",
1305 numargs = prevargs + 1;