1 /* $Header: str.c,v 3.0.1.8 90/08/09 05:22:18 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.8 90/08/09 05:22:18 lwall
10 * patch19: the number to string converter wasn't allocating enough space
11 * patch19: tainting didn't work on setgid scripts
13 * Revision 3.0.1.7 90/03/27 16:24:11 lwall
14 * patch16: strings with prefix chopped off sometimes freed wrong
15 * patch16: taint check blows up on undefined array element
17 * Revision 3.0.1.6 90/03/12 17:02:14 lwall
18 * patch13: substr as lvalue didn't invalidate old numeric value
20 * Revision 3.0.1.5 90/02/28 18:30:38 lwall
21 * patch9: you may now undef $/ to have no input record separator
22 * patch9: nested evals clobbered their longjmp environment
23 * patch9: sometimes perl thought ordinary data was a symbol table entry
24 * patch9: insufficient space allocated for numeric string on sun4
25 * patch9: underscore in an array name in a double-quoted string not recognized
26 * patch9: "@foo{}" not recognized unless %foo defined
27 * patch9: "$foo[$[]" gives error
29 * Revision 3.0.1.4 89/12/21 20:21:35 lwall
30 * patch7: errno may now be a macro with an lvalue
31 * patch7: made nested or recursive foreach work right
33 * Revision 3.0.1.3 89/11/17 15:38:23 lwall
34 * patch5: some machines typedef unchar too
35 * patch5: substitution on leading components occasionally caused <> corruption
37 * Revision 3.0.1.2 89/11/11 04:56:22 lwall
38 * patch2: uchar gives Crays fits
40 * Revision 3.0.1.1 89/10/26 23:23:41 lwall
41 * patch1: string ordering tests were wrong
42 * patch1: $/ now works even when STDSTDIO undefined
44 * Revision 3.0 89/10/18 15:23:38 lwall
53 extern char **environ;
61 tainted |= str->str_tainted;
63 return str->str_pok ? str->str_ptr : str_2ptr(str);
67 /* dlb ... guess we have a "crippled cc".
68 * dlb the following functions are usually macros.
75 if (*Str->str_ptr > '0' ||
77 (Str->str_cur && *Str->str_ptr != '0'))
82 return (Str->str_u.str_nval != 0.0);
92 tainted |= Str->str_tainted;
95 return Str->str_u.str_nval;
99 /* dlb ... end of crutch */
107 unsigned long newlen;
110 register char *s = str->str_ptr;
113 if (newlen >= 0x10000) {
114 fprintf(stderr, "Allocation too large: %lx\n", newlen);
118 if (str->str_state == SS_INCR) { /* data before str_ptr? */
119 str->str_len += str->str_u.str_useful;
120 str->str_ptr -= str->str_u.str_useful;
121 str->str_u.str_useful = 0L;
122 bcopy(s, str->str_ptr, str->str_cur+1);
124 str->str_state = SS_NORM; /* normal again */
125 if (newlen > str->str_len)
126 newlen += 10 * (newlen - str->str_cur); /* avoid copy each time */
128 if (newlen > str->str_len) { /* need more room? */
130 Renew(s,newlen,char);
132 New(703,s,newlen,char);
134 str->str_len = newlen;
144 str->str_pok = 0; /* invalidate pointer */
145 if (str->str_state == SS_INCR)
148 str->str_u.str_nval = num;
149 str->str_state = SS_NORM;
150 str->str_nok = 1; /* validate number */
152 str->str_tainted = tainted;
168 olderrno = errno; /* some Xenix systems wipe out errno here */
169 #if defined(scs) && defined(ns32000)
170 gcvt(str->str_u.str_nval,20,s);
173 if (str->str_u.str_nval == 0.0)
177 (void)sprintf(s,"%.20g",str->str_u.str_nval);
187 if (str == &str_undef)
190 warn("Use of uninitialized variable");
195 str->str_cur = s - str->str_ptr;
199 fprintf(stderr,"0x%lx ptr(%s)\n",str,str->str_ptr);
210 if (str->str_state == SS_INCR)
211 Str_Grow(str,0); /* just force copy down */
212 str->str_state = SS_NORM;
213 if (str->str_len && str->str_pok)
214 str->str_u.str_nval = atof(str->str_ptr);
216 if (str == &str_undef)
219 warn("Use of uninitialized variable");
220 str->str_u.str_nval = 0.0;
225 fprintf(stderr,"0x%lx num(%g)\n",str,str->str_u.str_nval);
227 return str->str_u.str_nval;
236 tainted |= sstr->str_tainted;
241 dstr->str_pok = dstr->str_nok = 0;
242 else if (sstr->str_pok) {
243 str_nset(dstr,sstr->str_ptr,sstr->str_cur);
245 dstr->str_u.str_nval = sstr->str_u.str_nval;
247 dstr->str_state = SS_NORM;
249 else if (sstr->str_cur == sizeof(STBP)) {
250 char *tmps = sstr->str_ptr;
252 if (*tmps == 'S' && bcmp(tmps,"StB",4) == 0) {
253 dstr->str_magic = str_smake(sstr->str_magic);
254 dstr->str_magic->str_rare = 'X';
258 else if (sstr->str_nok)
259 str_numset(dstr,sstr->str_u.str_nval);
261 if (dstr->str_state == SS_INCR)
262 Str_Grow(dstr,0); /* just force copy down */
265 dstr->str_u = sstr->str_u;
267 dstr->str_u.str_nval = sstr->str_u.str_nval;
269 dstr->str_pok = dstr->str_nok = 0;
273 str_nset(str,ptr,len)
278 STR_GROW(str, len + 1);
280 (void)bcopy(ptr,str->str_ptr,len);
282 *(str->str_ptr+str->str_cur) = '\0';
283 str->str_nok = 0; /* invalidate number */
284 str->str_pok = 1; /* validate pointer */
286 str->str_tainted = tainted;
299 STR_GROW(str, len + 1);
300 (void)bcopy(ptr,str->str_ptr,len+1);
302 str->str_nok = 0; /* invalidate number */
303 str->str_pok = 1; /* validate pointer */
305 str->str_tainted = tainted;
309 str_chop(str,ptr) /* like set but assuming ptr is in str */
313 register STRLEN delta;
316 fatal("str_chop: internal inconsistency");
317 delta = ptr - str->str_ptr;
318 str->str_len -= delta;
319 str->str_cur -= delta;
320 str->str_ptr += delta;
321 if (str->str_state == SS_INCR)
322 str->str_u.str_useful += delta;
324 str->str_u.str_useful = delta;
325 str->str_state = SS_INCR;
327 str->str_nok = 0; /* invalidate number */
328 str->str_pok = 1; /* validate pointer (and unstudy str) */
331 str_ncat(str,ptr,len)
338 STR_GROW(str, str->str_cur + len + 1);
339 (void)bcopy(ptr,str->str_ptr+str->str_cur,len);
341 *(str->str_ptr+str->str_cur) = '\0';
342 str->str_nok = 0; /* invalidate number */
343 str->str_pok = 1; /* validate pointer */
345 str->str_tainted |= tainted;
354 tainted |= sstr->str_tainted;
358 if (!(sstr->str_pok))
359 (void)str_2ptr(sstr);
361 str_ncat(dstr,sstr->str_ptr,sstr->str_cur);
375 STR_GROW(str, str->str_cur + len + 1);
376 (void)bcopy(ptr,str->str_ptr+str->str_cur,len+1);
378 str->str_nok = 0; /* invalidate number */
379 str->str_pok = 1; /* validate pointer */
381 str->str_tainted |= tainted;
386 str_append_till(str,from,fromend,delim,keeplist)
389 register char *fromend;
398 len = fromend - from;
399 STR_GROW(str, str->str_cur + len + 1);
400 str->str_nok = 0; /* invalidate number */
401 str->str_pok = 1; /* validate pointer */
402 to = str->str_ptr+str->str_cur;
403 for (; from < fromend; from++,to++) {
404 if (*from == '\\' && from+1 < fromend && delim != '\\') {
406 if (from[1] == delim || from[1] == '\\')
411 else if (from[1] && index(keeplist,from[1]))
416 else if (*from == delim)
421 str->str_cur = to - str->str_ptr;
438 freestrroot = str->str_magic;
439 str->str_magic = Nullstr;
440 str->str_state = SS_NORM;
443 Newz(700+x,str,1,STR);
446 STR_GROW(str, len + 1);
451 str_magic(str, stab, how, name, namlen)
460 str->str_magic = Str_new(75,namlen);
461 str = str->str_magic;
462 str->str_u.str_stab = stab;
465 str_nset(str,name,namlen);
469 str_insert(bigstr,offset,len,little,littlelen)
478 register char *midend;
479 register char *bigend;
483 bigstr->str_pok = SP_VALID; /* disable possible screamer */
486 if (i > 0) { /* string might grow */
487 STR_GROW(bigstr, bigstr->str_cur + i + 1);
488 big = bigstr->str_ptr;
489 mid = big + offset + len;
490 midend = bigend = big + bigstr->str_cur;
493 while (midend > mid) /* shove everything down */
494 *--bigend = *--midend;
495 (void)bcopy(little,big+offset,littlelen);
496 bigstr->str_cur += i;
500 (void)bcopy(little,bigstr->str_ptr+offset,len);
504 big = bigstr->str_ptr;
507 bigend = big + bigstr->str_cur;
510 fatal("panic: str_insert");
512 if (mid - big > bigend - midend) { /* faster to shorten from end */
514 (void)bcopy(little, mid, littlelen);
519 (void)bcopy(midend, mid, i);
523 bigstr->str_cur = mid - big;
525 else if (i = mid - big) { /* faster from front */
528 str_chop(bigstr,midend-i);
533 (void)bcopy(little, mid, littlelen);
535 else if (littlelen) {
537 str_chop(bigstr,midend);
538 (void)bcopy(little,midend,littlelen);
541 str_chop(bigstr,midend);
546 /* make str point to what nstr did */
549 str_replace(str,nstr)
553 if (str->str_state == SS_INCR)
554 Str_Grow(str,0); /* just force copy down */
555 if (nstr->str_state == SS_INCR)
558 Safefree(str->str_ptr);
559 str->str_ptr = nstr->str_ptr;
560 str->str_len = nstr->str_len;
561 str->str_cur = nstr->str_cur;
562 str->str_pok = nstr->str_pok;
563 str->str_nok = nstr->str_nok;
565 str->str_u = nstr->str_u;
567 str->str_u.str_nval = nstr->str_u.str_nval;
570 str->str_tainted = nstr->str_tainted;
581 if (str->str_state) {
582 if (str->str_state == SS_FREE) /* already freed */
584 if (str->str_state == SS_INCR && !(str->str_pok & 2)) {
585 str->str_ptr -= str->str_u.str_useful;
586 str->str_len += str->str_u.str_useful;
590 str_free(str->str_magic);
593 Safefree(str->str_ptr);
594 if ((str->str_pok & SP_INTRP) && str->str_u.str_args)
595 arg_free(str->str_u.str_args);
599 if (str->str_len > 127) { /* next user not likely to want more */
600 Safefree(str->str_ptr); /* so give it back to malloc */
601 str->str_ptr = Nullch;
605 str->str_ptr[0] = '\0';
607 if ((str->str_pok & SP_INTRP) && str->str_u.str_args)
608 arg_free(str->str_u.str_args);
612 str->str_state = SS_FREE;
614 str->str_tainted = 0;
616 str->str_magic = freestrroot;
618 #endif /* LEAKTEST */
640 return str2 == Nullstr;
645 (void)str_2ptr(str1);
647 (void)str_2ptr(str2);
649 if (str1->str_cur != str2->str_cur)
652 return !bcmp(str1->str_ptr, str2->str_ptr, str1->str_cur);
662 return str2 == Nullstr;
667 (void)str_2ptr(str1);
669 (void)str_2ptr(str2);
671 if (str1->str_cur < str2->str_cur) {
672 if (retval = memcmp(str1->str_ptr, str2->str_ptr, str1->str_cur))
677 else if (retval = memcmp(str1->str_ptr, str2->str_ptr, str2->str_cur))
679 else if (str1->str_cur == str2->str_cur)
686 str_gets(str,fp,append)
691 register char *bp; /* we're going to steal some values */
692 register int cnt; /* from the stdio struct and put EVERYTHING */
693 register STDCHAR *ptr; /* in the innermost loop into registers */
694 register int newline = record_separator;/* (assuming >= 6 registers) */
698 register int get_paragraph;
699 register char *oldbp;
701 if (get_paragraph = !rslen) { /* yes, that's an assignment */
703 oldbp = Nullch; /* remember last \n position (none) */
705 #ifdef STDSTDIO /* Here is some breathtakingly efficient cheating */
707 cnt = fp->_cnt; /* get count into register */
708 str->str_nok = 0; /* invalidate number */
709 str->str_pok = 1; /* validate pointer */
710 if (str->str_len <= cnt + 1) /* make sure we have the room */
711 STR_GROW(str, append+cnt+2); /* (remembering cnt can be -1) */
712 bp = str->str_ptr + append; /* move these two too to registers */
716 while (--cnt >= 0) { /* this */ /* eat */
717 if ((*bp++ = *ptr++) == newline) /* really */ /* dust */
718 goto thats_all_folks; /* screams */ /* sed :-) */
721 fp->_cnt = cnt; /* deregisterize cnt and ptr */
723 i = _filbuf(fp); /* get more characters */
725 ptr = fp->_ptr; /* reregisterize cnt and ptr */
727 bpx = bp - str->str_ptr; /* prepare for possible relocation */
728 if (get_paragraph && oldbp)
729 obpx = oldbp - str->str_ptr;
731 STR_GROW(str, bpx + cnt + 2);
732 bp = str->str_ptr + bpx; /* reconstitute our pointer */
733 if (get_paragraph && oldbp)
734 oldbp = str->str_ptr + obpx;
736 if (i == newline) { /* all done for now? */
738 goto thats_all_folks;
740 else if (i == EOF) /* all done for ever? */
741 goto thats_really_all_folks;
742 *bp++ = i; /* now go back to screaming loop */
746 if (get_paragraph && bp - 1 != oldbp) {
747 oldbp = bp; /* remember where this newline was */
748 goto screamer; /* and go back to the fray */
750 thats_really_all_folks:
751 fp->_cnt = cnt; /* put these back or we're in trouble */
754 str->str_cur = bp - str->str_ptr; /* set length */
756 #else /* !STDSTDIO */ /* The big, slow, and stupid way */
759 static char buf[8192];
760 char * bpe = buf + sizeof(buf) - 3;
765 while ((i = getc(fp)) != EOF && (*bp++ = i) != newline && bp < bpe);
766 if (i == newline && get_paragraph &&
767 (i = getc(fp)) != EOF && (*bp++ = i) != newline && bp < bpe)
775 if (i != newline && i != EOF) {
781 #endif /* STDSTDIO */
783 return str->str_cur - append ? str->str_ptr : Nullch;
792 CMD *oldcurcmd = curcmd;
795 str_sset(linestr,str);
797 oldoldbufptr = oldbufptr = bufptr = str_get(linestr);
798 bufend = bufptr + linestr->str_cur;
799 if (++loop_ptr >= loop_max) {
801 Renew(loop_stack, loop_max, struct loop);
803 loop_stack[loop_ptr].loop_label = "_EVAL_";
804 loop_stack[loop_ptr].loop_sp = 0;
807 deb("(Pushing label #%d _EVAL_)\n", loop_ptr);
810 if (setjmp(loop_stack[loop_ptr].loop_env)) {
813 fatal("%s\n",stab_val(stabent("@",TRUE))->str_ptr);
817 char *tmps = loop_stack[loop_ptr].loop_label;
818 deb("(Popping label #%d %s)\n",loop_ptr,
825 curcmd->c_line = oldcurcmd->c_line;
829 if (retval || error_count)
830 fatal("Invalid component in string or format");
833 if (cmd->c_type != C_EXPR || cmd->c_next || arg->arg_type != O_LIST)
834 fatal("panic: error in parselist %d %x %d", cmd->c_type,
835 cmd->c_next, arg ? arg->arg_type : -1);
844 register char *s = str_get(src);
845 register char *send = s + src->str_cur;
850 register int brackets;
855 toparse = Str_new(76,0);
859 str_nset(toparse,"",0);
862 if (*s == '\\' && s[1] && index("$@[{\\]}",s[1])) {
863 str_ncat(str, t, s - t);
865 if (*nointrp && s+1 < send)
866 if (*s != '@' && (*s != '$' || index(nointrp,s[1])))
868 str_ncat(str, "$b", 2);
873 else if ((*s == '@' || (*s == '$' && !index(nointrp,s[1]))) &&
877 if (*s == '$' && s[1] == '#' && (isalpha(s[2]) || s[2] == '_'))
879 s = scanreg(s,send,tokenbuf);
881 (!(stab = stabent(tokenbuf,FALSE)) ||
882 (*s == '{' ? !stab_xhash(stab) : !stab_xarray(stab)) )) {
885 continue; /* grandfather @ from old scripts */
887 str_ncat(str,"$a",2);
888 str_ncat(toparse,",",1);
889 if (t[1] != '{' && (*s == '[' || *s == '{' /* }} */ ) &&
890 (stab = stabent(tokenbuf,FALSE)) &&
891 ((*s == '[') ? (stab_xarray(stab) != 0) : (stab_xhash(stab) != 0)) ) {
913 s = cpytill(tokenbuf,s+1,send,*s,&len);
915 fatal("Unterminated string");
920 } while (brackets > 0 && s < send);
922 fatal("Unmatched brackets in string");
923 if (*nointrp) { /* we're in a regular expression */
925 if (*d == '{' && s[-1] == '}') { /* maybe {n,m} */
927 if (isdigit(*d)) { /* matches /^{\d,?\d*}$/ */
933 s = checkpoint; /* Is {n,m}! Backoff! */
936 else if (*d == '[' && s[-1] == ']') { /* char class? */
937 int weight = 2; /* let's weigh the evidence */
939 unsigned char un_char = 0, last_un_char;
945 else if (d[1] == '$')
949 if (isdigit(d[2]) && !d[3])
955 for (d++; d < s; d++) {
956 last_un_char = un_char;
957 un_char = (unsigned char)*d;
961 weight -= seen[un_char] * 10;
962 if (isalpha(d[1]) || isdigit(d[1]) ||
964 d = scanreg(d,s,tokenbuf);
965 if (stabent(tokenbuf,FALSE))
970 else if (*d == '$' && d[1] &&
971 index("[#!%*<>()-=",d[1])) {
972 if (!d[2] || /*{*/ index("])} =",d[2]))
981 if (index("wds",d[1]))
983 else if (seen['\''] || seen['"'])
985 else if (index("rnftb",d[1]))
987 else if (isdigit(d[1])) {
989 while (d[1] && isdigit(d[1]))
997 if (last_un_char < d[1] || d[1] == '\\') {
998 if (index("aA01! ",last_un_char))
1000 if (index("zZ79~",d[1]))
1006 if (isalpha(*d) && d[1] && isalpha(d[1])) {
1008 if (yylex() != WORD)
1012 if (un_char == last_un_char + 1)
1014 weight -= seen[un_char];
1021 fprintf(stderr,"[%s] weight %d\n",
1022 checkpoint+1,weight);
1025 if (weight >= 0) /* probably a character class */
1031 str_ncat(toparse, "join($\",", 8);
1032 if (t[1] == '{' && s[-1] == '}') {
1033 str_ncat(toparse, t, 1);
1034 str_ncat(toparse, t+2, s - t - 3);
1037 str_ncat(toparse, t, s - t);
1039 str_ncat(toparse, ")", 1);
1045 str_ncat(str,t,s-t);
1046 if (toparse->str_ptr && *toparse->str_ptr == ',') {
1047 *toparse->str_ptr = '(';
1048 str_ncat(toparse,",$$);",5);
1049 str->str_u.str_args = parselist(toparse);
1050 str->str_u.str_args->arg_len--; /* ignore $$ reference */
1053 str->str_u.str_args = Nullarg;
1055 str->str_pok |= SP_INTRP;
1057 str_replace(src,str);
1068 register char *send;
1069 register STR **elem;
1071 if (!(src->str_pok & SP_INTRP)) {
1072 int oldsave = savestack->ary_fill;
1074 (void)savehptr(&curstash);
1075 curstash = src->str_u.str_hash; /* so stabent knows right package */
1077 restorelist(oldsave);
1079 s = src->str_ptr; /* assumed valid since str_pok set */
1081 send = s + src->str_cur;
1083 if (src->str_u.str_args) {
1084 (void)eval(src->str_u.str_args,G_ARRAY,sp);
1085 /* Assuming we have correct # of args */
1086 elem = stack->ary_array + sp;
1091 if (*s == '$' && s+1 < send) {
1092 str_ncat(str,t,s-t);
1095 str_scat(str,*++elem);
1098 str_ncat(str,++s,1);
1106 str_ncat(str,t,s-t);
1119 str->str_u.str_nval += 1.0;
1123 if (!str->str_pok || !*str->str_ptr) {
1124 str->str_u.str_nval = 1.0;
1130 while (isalpha(*d)) d++;
1131 while (isdigit(*d)) d++;
1133 str_numset(str,atof(str->str_ptr) + 1.0); /* punt */
1137 while (d >= str->str_ptr) {
1147 *(d--) -= 'z' - 'a' + 1;
1150 /* oh,oh, the number grew */
1151 STR_GROW(str, str->str_cur + 2);
1153 for (d = str->str_ptr + str->str_cur; d > str->str_ptr; d--)
1168 str->str_u.str_nval -= 1.0;
1172 if (!str->str_pok) {
1173 str->str_u.str_nval = -1.0;
1177 str_numset(str,atof(str->str_ptr) - 1.0);
1180 /* Make a string that will exist for the duration of the expression
1181 * evaluation. Actually, it may have to last longer than that, but
1182 * hopefully cmd_exec won't free it until it has been assigned to a
1183 * permanent location. */
1185 static long tmps_size = -1;
1191 register STR *str = Str_new(78,0);
1193 str_sset(str,oldstr);
1194 if (++tmps_max > tmps_size) {
1195 tmps_size = tmps_max;
1196 if (!(tmps_size & 127)) {
1198 Renew(tmps_list, tmps_size + 128, STR*);
1200 New(702,tmps_list, 128, STR*);
1203 tmps_list[tmps_max] = str;
1207 /* same thing without the copying */
1213 if (++tmps_max > tmps_size) {
1214 tmps_size = tmps_max;
1215 if (!(tmps_size & 127)) {
1217 Renew(tmps_list, tmps_size + 128, STR*);
1219 New(704,tmps_list, 128, STR*);
1222 tmps_list[tmps_max] = str;
1231 register STR *str = Str_new(79,0);
1235 str_nset(str,s,len);
1243 register STR *str = Str_new(80,0);
1249 /* make an exact duplicate of old */
1255 register STR *new = Str_new(81,0);
1259 if (old->str_state == SS_FREE) {
1260 warn("semi-panic: attempt to dup freed string");
1263 if (old->str_state == SS_INCR && !(old->str_pok & 2))
1266 Safefree(new->str_ptr);
1267 Copy(old,new,1,STR);
1269 new->str_ptr = nsavestr(old->str_ptr,old->str_len);
1277 register HENT *entry;
1278 register STAB *stab;
1281 register SPAT *spat;
1284 if (!*s) { /* reset ?? searches */
1285 for (spat = stash->tbl_spatroot;
1287 spat = spat->spat_next) {
1288 spat->spat_flags &= ~SPAT_USED;
1293 /* reset variables */
1301 for ( ; i <= max; i++) {
1302 for (entry = stash->tbl_array[i];
1304 entry = entry->hent_next) {
1305 stab = (STAB*)entry->hent_val;
1306 str = stab_val(stab);
1310 str->str_tainted = tainted;
1312 if (str->str_ptr != Nullch)
1313 str->str_ptr[0] = '\0';
1314 if (stab_xarray(stab)) {
1315 aclear(stab_xarray(stab));
1317 if (stab_xhash(stab)) {
1318 hclear(stab_xhash(stab));
1319 if (stab == envstab)
1320 environ[0] = Nullch;
1333 fprintf(stderr,"%s %d %d %d\n",s,tainted,uid, euid);
1335 if (tainted && (!euid || euid != uid || egid != gid)) {
1345 register STR *envstr;
1347 envstr = hfetch(stab_hash(envstab),"PATH",4,FALSE);
1348 if (!envstr || envstr->str_tainted) {
1350 taintproper("Insecure PATH");
1352 envstr = hfetch(stab_hash(envstab),"IFS",3,FALSE);
1353 if (envstr && envstr->str_tainted) {
1355 taintproper("Insecure IFS");