1 /* $RCSfile: a2py.c,v $$Revision: 4.0.1.2 $$Date: 92/06/08 16:15:16 $
3 * Copyright (c) 1991, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
9 * Revision 4.0.1.2 92/06/08 16:15:16 lwall
10 * patch20: in a2p, now warns about spurious backslashes
11 * patch20: in a2p, now allows [ to be backslashed in pattern
12 * patch20: in a2p, now allows numbers of the form 2.
14 * Revision 4.0.1.1 91/06/07 12:12:59 lwall
15 * patch4: new copyright notice
17 * Revision 4.0 91/03/20 01:57:26 lwall
23 #include "../patchlevel.h"
37 printf("\nThis is the AWK to PERL translator, version 4.0, patchlevel %d\n", PATCHLEVEL);
38 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
39 printf("\n -D<number> sets debugging flags."
40 "\n -F<character> the awk script to translate is always invoked with"
42 "\n -n<fieldlist> specifies the names of the input fields if input does"
43 "\n not have to be split into an array."
44 "\n -<number> causes a2p to assume that input will always have that"
60 linestr = str_new(80);
61 str = str_new(0); /* first used for -I flags */
62 for (argc--,argv++; argc; argc--,argv++) {
63 if (argv[0][0] != '-' || !argv[0][1])
69 debug = atoi(argv[0]+2);
71 yydebug = (debug & 1);
75 case '0': case '1': case '2': case '3': case '4':
76 case '5': case '6': case '7': case '8': case '9':
77 maxfld = atoi(argv[0]+1);
84 namelist = savestr(argv[0]+2);
92 fatal("Unrecognized switch: %s\n",argv[0]);
102 if (argv[0] == Nullch) {
104 if ( isatty(fileno(stdin)) )
109 filename = savestr(argv[0]);
111 filename = savestr(argv[0]);
112 if (strEQ(filename,"-"))
117 rsfp = fopen(argv[0],"r");
119 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
123 bufptr = str_get(linestr);
127 /* now parse the report spec */
130 fatal("Translation aborted due to syntax errors.\n");
140 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
142 printf("\t\"%s\"\n",ops[i].cval),i++;
145 printf("\t%d",ops[i].ival),i++;
155 /* first pass to look for numeric variables */
157 prewalk(0,0,root,&i);
159 /* second pass to produce new program */
161 tmpstr = walk(0,0,root,&i,P_MIN);
162 str = str_make("#!");
164 str_cat(str, "/perl\neval \"exec ");
166 str_cat(str, "/perl -S $0 $*\"\n\
167 if $running_under_some_shell;\n\
168 # this emulates #! processing on NIH machines.\n\
169 # (remove #! line above if indigestible)\n\n");
171 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_]+=)(.*)/ && shift;\n");
173 " # process any FOO=bar switches\n\n");
174 if (do_opens && opens) {
179 str_scat(str,tmpstr);
188 "Please check my work on the %d line%s I've marked with \"#???\".\n",
189 checkers, checkers == 1 ? "" : "s" );
191 "The operation I've selected may be wrong for the operand types.\n");
196 #define RETURN(retval) return (bufptr = s,retval)
197 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
198 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
199 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
205 register char *s = bufptr;
213 fprintf(stderr,"Tokener at %s",s);
215 fprintf(stderr,"Tokener at %s\n",s);
220 "Unrecognized character %c in file %s line %d--ignoring.\n",
225 if (*s && *s != '\n') {
226 yyerror("Ignoring spurious backslash");
231 s = str_get(linestr);
236 if ((s = str_gets(linestr, rsfp)) == Nullch) {
240 s = str_get(linestr);
251 yylval = string(s,0);
276 for (d = s + 1; isspace(*d); d++) ;
286 yylval = string("~",1);
304 yylval = string("**=",3);
306 yylval = string(s-1,2);
324 while (*s == ' ' || *s == '\t')
326 if (strnEQ(s,"getline",7))
334 yylval = string("==",2);
338 yylval = string("=",1);
344 yylval = string("!=",2);
348 yylval = string("!~",2);
357 yylval = string("<=",2);
366 yylval = string(">>",2);
370 yylval = string(">=",2);
378 while (isalpha(*s) || isdigit(*s) || *s == '_') \
398 for (d = s; isdigit(*s); s++) ;
399 yylval = string(d,s-d);
405 split_to_array = set_array_base = TRUE;
408 case '/': /* may either be division or pattern */
415 yylval = string("/=",2);
421 case '0': case '1': case '2': case '3': case '4':
422 case '5': case '6': case '7': case '8': case '9': case '.':
427 s = cpy2(tokenbuf,s,s[-1]);
429 fatal("String not terminated:\n%s",str_get(linestr));
431 yylval = string(tokenbuf,0);
437 set_array_base = TRUE;
438 if (strEQ(d,"ARGV")) {
439 yylval=numary(string("ARGV",0));
442 if (strEQ(d,"atan2")) {
449 if (strEQ(d,"break"))
451 if (strEQ(d,"BEGIN"))
456 if (strEQ(d,"continue"))
458 if (strEQ(d,"cos")) {
462 if (strEQ(d,"close")) {
467 if (strEQ(d,"chdir"))
469 else if (strEQ(d,"crypt"))
471 else if (strEQ(d,"chop"))
473 else if (strEQ(d,"chmod"))
475 else if (strEQ(d,"chown"))
482 if (strEQ(d,"delete"))
493 if (strEQ(d,"exit")) {
497 if (strEQ(d,"exp")) {
501 if (strEQ(d,"elsif"))
503 else if (strEQ(d,"eq"))
505 else if (strEQ(d,"eval"))
507 else if (strEQ(d,"eof"))
509 else if (strEQ(d,"each"))
511 else if (strEQ(d,"exec"))
518 if (saw_FS == 1 && in_begin) {
519 for (d = s; *d && isspace(*d); d++) ;
521 for (d++; *d && isspace(*d); d++) ;
522 if (*d == '"' && d[2] == '"')
530 else if (strEQ(d,"function"))
532 if (strEQ(d,"FILENAME"))
534 if (strEQ(d,"foreach"))
536 else if (strEQ(d,"format"))
538 else if (strEQ(d,"fork"))
540 else if (strEQ(d,"fh"))
545 if (strEQ(d,"getline"))
551 else if (strEQ(d,"gt"))
553 else if (strEQ(d,"goto"))
555 else if (strEQ(d,"gmtime"))
569 if (strEQ(d,"index")) {
570 set_array_base = TRUE;
573 if (strEQ(d,"int")) {
587 else if (strEQ(d,"kill"))
592 if (strEQ(d,"length")) {
596 if (strEQ(d,"log")) {
602 else if (strEQ(d,"local"))
604 else if (strEQ(d,"lt"))
606 else if (strEQ(d,"le"))
608 else if (strEQ(d,"locatime"))
610 else if (strEQ(d,"link"))
615 if (strEQ(d,"match")) {
616 set_array_base = TRUE;
625 do_chop = do_split = split_to_array = set_array_base = TRUE;
626 if (strEQ(d,"next")) {
635 if (strEQ(d,"ORS")) {
639 if (strEQ(d,"OFS")) {
643 if (strEQ(d,"OFMT")) {
648 else if (strEQ(d,"ord"))
650 else if (strEQ(d,"oct"))
655 if (strEQ(d,"print")) {
658 if (strEQ(d,"printf")) {
663 else if (strEQ(d,"pop"))
675 if (strEQ(d,"rand")) {
679 if (strEQ(d,"return"))
681 if (strEQ(d,"reset"))
683 else if (strEQ(d,"redo"))
685 else if (strEQ(d,"rename"))
690 if (strEQ(d,"split")) {
691 set_array_base = TRUE;
694 if (strEQ(d,"substr")) {
695 set_array_base = TRUE;
700 if (strEQ(d,"sprintf"))
702 if (strEQ(d,"sqrt")) {
706 if (strEQ(d,"SUBSEP")) {
709 if (strEQ(d,"sin")) {
713 if (strEQ(d,"srand")) {
717 if (strEQ(d,"system")) {
723 else if (strEQ(d,"shift"))
725 else if (strEQ(d,"select"))
727 else if (strEQ(d,"seek"))
729 else if (strEQ(d,"stat"))
731 else if (strEQ(d,"study"))
733 else if (strEQ(d,"sleep"))
735 else if (strEQ(d,"symlink"))
737 else if (strEQ(d,"sort"))
744 else if (strEQ(d,"tell"))
746 else if (strEQ(d,"time"))
748 else if (strEQ(d,"times"))
753 if (strEQ(d,"until"))
755 else if (strEQ(d,"unless"))
757 else if (strEQ(d,"umask"))
759 else if (strEQ(d,"unshift"))
761 else if (strEQ(d,"unlink"))
763 else if (strEQ(d,"utime"))
768 if (strEQ(d,"values"))
773 if (strEQ(d,"while"))
775 if (strEQ(d,"write"))
777 else if (strEQ(d,"wait"))
806 fatal("Search pattern not found:\n%s",str_get(linestr));
810 for (; *s; s++,d++) {
814 else if (s[1] == '\\')
816 else if (s[1] == '[')
819 else if (*s == '[') {
822 if (*s == '\\' && s[1])
824 if (*s == '/' || (*s == '-' && s[1] == ']'))
827 } while (*s && *s != ']');
836 fatal("Search pattern not terminated:\n%s",str_get(linestr));
838 yylval = string(tokenbuf,0);
845 fprintf(stderr,"%s in file %s at line %d\n",
856 case '1': case '2': case '3': case '4': case '5':
857 case '6': case '7': case '8': case '9': case '0' : case '.':
859 while (isdigit(*s)) {
865 while (isdigit(*s)) {
872 if (index("eE",*s) && index("+-0123456789",s[1])) {
874 if (*s == '+' || *s == '-')
880 yylval = string(tokenbuf,0);
891 ops[mop++].ival = OSTRING + (1<<8);
894 ops[mop].cval = safemalloc(len+1);
895 strncpy(ops[mop].cval,ptr,len);
896 ops[mop++].cval[len] = '\0';
898 fatal("Recompile a2p with larger OPSMAX\n");
908 fatal("type > 255 (%d)\n",type);
909 ops[mop++].ival = type;
911 fatal("Recompile a2p with larger OPSMAX\n");
922 fatal("type > 255 (%d)\n",type);
923 ops[mop++].ival = type + (1<<8);
924 ops[mop++].ival = arg1;
926 fatal("Recompile a2p with larger OPSMAX\n");
930 oper2(type,arg1,arg2)
938 fatal("type > 255 (%d)\n",type);
939 ops[mop++].ival = type + (2<<8);
940 ops[mop++].ival = arg1;
941 ops[mop++].ival = arg2;
943 fatal("Recompile a2p with larger OPSMAX\n");
947 oper3(type,arg1,arg2,arg3)
956 fatal("type > 255 (%d)\n",type);
957 ops[mop++].ival = type + (3<<8);
958 ops[mop++].ival = arg1;
959 ops[mop++].ival = arg2;
960 ops[mop++].ival = arg3;
962 fatal("Recompile a2p with larger OPSMAX\n");
966 oper4(type,arg1,arg2,arg3,arg4)
976 fatal("type > 255 (%d)\n",type);
977 ops[mop++].ival = type + (4<<8);
978 ops[mop++].ival = arg1;
979 ops[mop++].ival = arg2;
980 ops[mop++].ival = arg3;
981 ops[mop++].ival = arg4;
983 fatal("Recompile a2p with larger OPSMAX\n");
987 oper5(type,arg1,arg2,arg3,arg4,arg5)
998 fatal("type > 255 (%d)\n",type);
999 ops[mop++].ival = type + (5<<8);
1000 ops[mop++].ival = arg1;
1001 ops[mop++].ival = arg2;
1002 ops[mop++].ival = arg3;
1003 ops[mop++].ival = arg4;
1004 ops[mop++].ival = arg5;
1006 fatal("Recompile a2p with larger OPSMAX\n");
1019 type = ops[branch].ival;
1022 for (i=depth; i; i--)
1024 if (type == OSTRING) {
1025 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1028 printf("(%-5d%s %d\n",branch,opname[type],len);
1030 for (i=1; i<=len; i++)
1031 dump(ops[branch+i].ival);
1033 for (i=depth; i; i--)
1045 else if ((ops[arg].ival & 255) != OBLOCK)
1046 return oper2(OBLOCK,arg,maybe);
1047 else if ((ops[arg].ival >> 8) < 2)
1048 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1059 for (s = str->str_ptr; *s; s++) {
1060 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1064 else if (*s == '\n') {
1065 for (t = s+1; isspace(*t & 127); t++) ;
1067 while (isspace(*t & 127) && *t != '\n') t--;
1068 if (*t == '\n' && t-s > 1) {
1081 register char *d, *s, *t, *e;
1082 register int pos, newpos;
1086 for (s = str->str_ptr; *s; s++) {
1095 else if (*s == '\t')
1097 if (pos > 78) { /* split a long line? */
1100 for (t = tokenbuf; isspace(*t & 127); t++) {
1107 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1111 while (d > tokenbuf &&
1112 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1117 while (d > tokenbuf &&
1118 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1123 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1128 while (d > tokenbuf && *d != ' ')
1138 if (d[-1] != ';' && !(newpos % 4)) {
1144 newpos += strlen(t);
1158 for (t = tokenbuf; *t; t++) {
1162 strcpy(t+strlen(t)-1, "\t#???\n");
1168 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1170 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1173 fputs(tokenbuf,stdout);
1182 key = walk(0,0,arg,&dummy,P_MIN);
1184 hstore(symtab,key->str_ptr,str_make("1"));
1186 set_array_base = TRUE;
1198 type = ops[arg].ival & 255;
1199 if (type == OCOMMA) {
1200 rememberargs(ops[arg+1].ival);
1201 rememberargs(ops[arg+3].ival);
1203 else if (type == OVAR) {
1205 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1208 fatal("panic: unknown argument type %d, line %d\n",type,line);
1215 int type = ops[arg].ival & 255;
1218 if (type != OSTRING)
1219 fatal("panic: aryrefarg %d, line %d\n",type,line);
1220 str = hfetch(curarghash,ops[arg+1].cval);
1226 fixfargs(name,arg,prevargs)
1237 type = ops[arg].ival & 255;
1238 if (type == OCOMMA) {
1239 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1240 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1242 else if (type == OVAR) {
1243 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1244 if (strEQ(str_get(str),"*")) {
1247 str_set(str,""); /* in case another routine has this */
1248 ops[arg].ival &= ~255;
1249 ops[arg].ival |= OSTAR;
1250 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1251 fprintf(stderr,"Adding %s\n",tmpbuf);
1254 hstore(curarghash,tmpbuf,str);
1256 numargs = prevargs + 1;
1259 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1260 type,prevargs+1,line);
1264 fixrargs(name,arg,prevargs)
1275 type = ops[arg].ival & 255;
1276 if (type == OCOMMA) {
1277 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1278 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1283 sprintf(tmpbuf,"%s:%d",name,prevargs);
1284 str = hfetch(curarghash,tmpbuf);
1285 if (str && strEQ(str->str_ptr,"*")) {
1286 if (type == OVAR || type == OSTAR) {
1287 ops[arg].ival &= ~255;
1288 ops[arg].ival |= OSTAR;
1291 fatal("Can't pass expression by reference as arg %d of %s\n",
1294 numargs = prevargs + 1;