From: Hugo van der Sanden Date: Thu, 11 Jan 2001 17:09:03 +0000 (+0000) Subject: Re: new feature: s?printf parameter reordering X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=211dfcf14199529e353c08dea10d7050e6a4a22a;p=p5sagit%2Fp5-mst-13.2.git Re: new feature: s?printf parameter reordering Message-Id: <200101111709.RAA23756@crypt.compulink.co.uk> - support reordering for all parameters: %, *v, *, .* - lay down that the reordering specification must immediately follow that parameter: %3$, *v3$, *3$, .*3$ - fix vectorisation of a zero-length string - factor out the code choosing the argument to format Possibly unwanted side-effects: - the special format specifiers ' +-0' must now precede any vectorisation specifier. Tests in op/sprintf and op/ver have been changed to reflect this. - sprintf.t test #214 changed its expectations because in many cases, the next parameter has already been consumed when an invalid type letter is detected. Probably wanted side-effects: - attempts to format a non-existent parameter will warn as if C - attempt to write to non-existent parameter with '%n' will complain of "attempt to modify read-only value" instead of being silent p4raw-id: //depot/perl@8481 --- diff --git a/sv.c b/sv.c index c14809b..526ed08 100644 --- a/sv.c +++ b/sv.c @@ -6704,6 +6704,21 @@ Perl_sv_vsetpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, maybe_tainted); } +I32 +S_expect_number(char** pattern) +{ + I32 var = 0; + switch (**pattern) { + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + while (isDIGIT(**pattern)) + var = var * 10 + (*(*pattern)++ - '0'); + } + return var; +} +#define EXPECT_NUMBER(pattern, var) (var = S_expect_number(&pattern)) + /* =for apidoc sv_vcatpvfn @@ -6764,6 +6779,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV bool alt = FALSE; bool left = FALSE; bool vectorize = FALSE; + bool vectorarg = FALSE; bool utf = FALSE; char fill = ' '; char plus = 0; @@ -6801,10 +6817,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN gap; char *dotstr = "."; STRLEN dotstrlen = 1; - I32 epix = 0; /* explicit parameter index */ + I32 efix = 0; /* explicit format parameter index */ I32 ewix = 0; /* explicit width index */ + I32 epix = 0; /* explicit precision index */ + I32 evix = 0; /* explicit vector index */ bool asterisk = FALSE; + /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; if (q > p) { sv_catpvn(sv, p, q - p); @@ -6813,6 +6832,25 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (q++ >= patend) break; +/* + We allow format specification elements in this order: + \d+\$ explicit format parameter index + [-+ 0#]+ flags + \*?(\d+\$)?v vector with optional (optionally specified) arg + \d+|\*(\d+\$)? width using optional (optionally specified) arg + \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg + [hlqLV] size + [%bcdefginopsux_DFOUX] format (mandatory) +*/ + if (EXPECT_NUMBER(q, width)) { + if (*q == '$') { + ++q; + efix = width; + } else { + goto gotwidth; + } + } + /* FLAGS */ while (*q) { @@ -6836,64 +6874,63 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV q++; continue; - case '*': /* printf("%*vX",":",$ipv6addr) */ - if (q[1] != 'v') - break; - q++; - if (args) - vecsv = va_arg(*args, SV*); - else if (svix < svmax) - vecsv = svargs[svix++]; - else - continue; - dotstr = SvPVx(vecsv,dotstrlen); - if (DO_UTF8(vecsv)) - is_utf = TRUE; - /* FALL THROUGH */ - - case 'v': - vectorize = TRUE; - q++; - continue; - default: break; } break; } - /* WIDTH */ - - scanwidth: - + tryasterisk: if (*q == '*') { - if (asterisk) - goto unknown; + q++; + if (EXPECT_NUMBER(q, ewix)) + if (*q++ != '$') + goto unknown; asterisk = TRUE; + } + if (*q == 'v') { q++; + if (vectorize) + goto unknown; + if (vectorarg = asterisk) { + evix = ewix; + ewix = 0; + asterisk = FALSE; + } + vectorize = TRUE; + goto tryasterisk; } - switch (*q) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - width = 0; - while (isDIGIT(*q)) - width = width * 10 + (*q++ - '0'); - if (*q == '$') { - if (asterisk && ewix == 0) { - ewix = width; - width = 0; - q++; - goto scanwidth; - } else if (epix == 0) { - epix = width; - width = 0; - q++; - goto scanwidth; - } else - goto unknown; + if (!asterisk) + EXPECT_NUMBER(q, width); + + if (vectorize) { + if (vectorarg) { + if (args) + vecsv = va_arg(*args, SV*); + else + vecsv = (evix ? evix <= svmax : svix < svmax) ? + svargs[ewix ? ewix-1 : svix++] : &PL_sv_undef; + dotstr = (U8*)SvPVx(vecsv, dotstrlen); + if (DO_UTF8(vecsv)) + is_utf = TRUE; + } + if (args) { + vecsv = va_arg(*args, SV*); + vecstr = (U8*)SvPVx(vecsv,veclen); + utf = DO_UTF8(vecsv); } + else if (efix ? efix <= svmax : svix < svmax) { + vecsv = svargs[efix ? efix-1 : svix++]; + vecstr = (U8*)SvPVx(vecsv,veclen); + utf = DO_UTF8(vecsv); + } + else { + vecstr = (U8*)""; + veclen = 0; + } + if (DO_UTF8(vecsv)) + is_utf = TRUE; } if (asterisk) { @@ -6905,19 +6942,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV left |= (i < 0); width = (i < 0) ? -i : i; } + gotwidth: /* PRECISION */ if (*q == '.') { q++; if (*q == '*') { + q++; + if (EXPECT_NUMBER(q, epix) && *q++ != '$') + goto unknown; if (args) i = va_arg(*args, int); else i = (ewix ? ewix <= svmax : svix < svmax) ? SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; precis = (i < 0) ? 0 : i; - q++; } else { precis = 0; @@ -6927,23 +6967,6 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV has_precis = TRUE; } - if (vectorize) { - if (args) { - vecsv = va_arg(*args, SV*); - vecstr = (U8*)SvPVx(vecsv,veclen); - utf = DO_UTF8(vecsv); - } - else if (epix ? epix <= svmax : svix < svmax) { - vecsv = svargs[epix ? epix-1 : svix++]; - vecstr = (U8*)SvPVx(vecsv,veclen); - utf = DO_UTF8(vecsv); - } - else { - vecstr = (U8*)""; - veclen = 0; - } - } - /* SIZE */ switch (*q) { @@ -6975,21 +6998,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* CONVERSION */ + if (*q == '%') { + eptr = q++; + elen = 1; + goto string; + } + + if (!args) + argsv = (efix ? efix <= svmax : svix < svmax) ? + svargs[efix ? efix-1 : svix++] : &PL_sv_undef; + switch (c = *q++) { /* STRINGS */ - case '%': - eptr = q - 1; - elen = 1; - goto string; - case 'c': - if (args) - uv = va_arg(*args, int); - else - uv = (epix ? epix <= svmax : svix < svmax) ? - SvIVx(svargs[epix ? epix-1 : svix++]) : 0; + uv = args ? va_arg(*args, int) : SvIVx(argsv); if ((uv > 255 || (uv > 127 && SvUTF8(sv))) && !IN_BYTE) { eptr = (char*)utf8buf; elen = uv_to_utf8((U8*)eptr, uv) - utf8buf; @@ -7018,8 +7042,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV elen = sizeof nullstr - 1; } } - else if (epix ? epix <= svmax : svix < svmax) { - argsv = svargs[epix ? epix-1 : svix++]; + else { eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) { if (has_precis && precis < elen) { @@ -7043,7 +7066,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV */ if (!args) goto unknown; - argsv = va_arg(*args,SV*); + argsv = va_arg(*args, SV*); eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) is_utf = TRUE; @@ -7059,11 +7082,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'p': if (alt) goto unknown; - if (args) - uv = PTR2UV(va_arg(*args, void*)); - else - uv = (epix ? epix <= svmax : svix < svmax) ? - PTR2UV(svargs[epix ? epix-1 : svix++]) : 0; + uv = PTR2UV(args ? va_arg(*args, void*) : argsv); base = 16; goto integer; @@ -7078,10 +7097,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'i': if (vectorize) { STRLEN ulen; - if (!veclen) { - vectorize = FALSE; - break; - } + if (!veclen) + continue; if (utf) iv = (IV)utf8_to_uv(vecstr, veclen, &ulen, 0); else { @@ -7103,8 +7120,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else { - iv = (epix ? epix <= svmax : svix < svmax) ? - SvIVx(svargs[epix ? epix-1 : svix++]) : 0; + iv = SvIVx(argsv); switch (intsize) { case 'h': iv = (short)iv; break; default: break; @@ -7161,10 +7177,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (vectorize) { STRLEN ulen; vector: - if (!veclen) { - vectorize = FALSE; - break; - } + if (!veclen) + continue; if (utf) uv = utf8_to_uv(vecstr, veclen, &ulen, 0); else { @@ -7186,8 +7200,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else { - uv = (epix ? epix <= svmax : svix < svmax) ? - SvUVx(svargs[epix ? epix-1 : svix++]) : 0; + uv = SvUVx(argsv); switch (intsize) { case 'h': uv = (unsigned short)uv; break; default: break; @@ -7276,11 +7289,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* This is evil, but floating point is even more evil */ vectorize = FALSE; - if (args) - nv = va_arg(*args, NV); - else - nv = (epix ? epix <= svmax : svix < svmax) ? - SvNVx(svargs[epix ? epix-1 : svix++]) : 0.0; + nv = args ? va_arg(*args, NV) : SvNVx(argsv); need = 0; if (c != 'e' && c != 'E') { @@ -7360,8 +7369,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV #endif } } - else if (epix ? epix <= svmax : svix < svmax) - sv_setuv_mg(svargs[epix ? epix-1 : svix++], (UV)i); + else + sv_setuv_mg(argsv, (UV)i); continue; /* not "break" */ /* UNKNOWN */ diff --git a/t/op/sprintf.t b/t/op/sprintf.t index 4e80999..055b0e4 100755 --- a/t/op/sprintf.t +++ b/t/op/sprintf.t @@ -21,7 +21,9 @@ print '1..', scalar @tests, "\n"; $SIG{__WARN__} = sub { if ($_[0] =~ /^Invalid conversion/) { - $w = ' INVALID' + $w = ' INVALID'; + } elsif ($_[0] =~ /^Use of uninitialized value/) { + $w = ' UNINIT'; } else { warn @_; } @@ -175,19 +177,19 @@ __END__ >%#vd< >chr(1)< >1< >%vd< >"\01\02\03"< >1.2.3< >%v.3d< >"\01\02\03"< >001.002.003< ->%v03d< >"\01\02\03"< >001.002.003< ->%v-3d< >"\01\02\03"< >1 .2 .3 < ->%v+-3d< >"\01\02\03"< >+1 .2 .3 < +>%0v3d< >"\01\02\03"< >001.002.003< +>%-v3d< >"\01\02\03"< >1 .2 .3 < +>%+-v3d< >"\01\02\03"< >+1 .2 .3 < >%v4.3d< >"\01\02\03"< > 001. 002. 003< ->%v04.3d< >"\01\02\03"< >0001.0002.0003< ->%*v02d< >['-', "\0\7\14"]< >00-07-12< ->%v.*d< >[3, "\01\02\03"]< >001.002.003< ->%v0*d< >[3, "\01\02\03"]< >001.002.003< ->%v-*d< >[3, "\01\02\03"]< >1 .2 .3 < ->%v+-*d< >[3, "\01\02\03"]< >+1 .2 .3 < ->%v*.*d< >[4, 3, "\01\02\03"]< > 001. 002. 003< ->%v0*.*d< >[4, 3, "\01\02\03"]< >0001.0002.0003< ->%*v0*d< >['-', 2, "\0\7\13"]< >00-07-11< +>%0v4.3d< >"\01\02\03"< >0001.0002.0003< +>%0*v2d< >['-', "\0\7\14"]< >00-07-12< +>%v.*d< >["\01\02\03", 3]< >001.002.003< +>%0v*d< >["\01\02\03", 3]< >001.002.003< +>%-v*d< >["\01\02\03", 3]< >1 .2 .3 < +>%+-v*d< >["\01\02\03", 3]< >+1 .2 .3 < +>%v*.*d< >["\01\02\03", 4, 3]< > 001. 002. 003< +>%0v*.*d< >["\01\02\03", 4, 3]< >0001.0002.0003< +>%0*v*d< >['-', "\0\7\13", 2]< >00-07-11< >%e< >1234.875< >1.234875e+03< >%e< >0.000012345< >1.234500e-05< >%e< >1234567E96< >1.234567e+102< @@ -314,10 +316,11 @@ __END__ >%2$d %d %d< >[12, 34]< >34 12 34< >%3$d %d %d< >[12, 34, 56]< >56 12 34< >%2$*3$d %d< >[12, 34, 3]< > 34 12< ->%*3$2$d %d< >[12, 34, 3]< > 34 12< ->%2$d< >12< >0< +>%*3$2$d %d< >[12, 34, 3]< >%*3$2$d 34 INVALID< +>%2$d< >12< >0 UNINIT< >%0$d< >12< >%0$d INVALID< >%1$$d< >12< >%1$$d INVALID< >%1$1$d< >12< >%1$1$d INVALID< >%*2$*2$d< >[12, 3]< >%*2$*2$d INVALID< >%*2*2$d< >[12, 3]< >%*2*2$d INVALID< +>%0v2.2d< >''< >< diff --git a/t/op/ver.t b/t/op/ver.t index edfebd2..b9ba589 100755 --- a/t/op/ver.t +++ b/t/op/ver.t @@ -102,10 +102,10 @@ print "not " unless sprintf("%vX", 1.22.333.4444) eq '1.16.14D.115C'; print "ok $test\n"; ++$test; if (ord("\t") == 9) { # ASCII - print "not " unless sprintf("%*v#o", ":", "Perl") eq '0120:0145:0162:0154'; + print "not " unless sprintf("%#*vo", ":", "Perl") eq '0120:0145:0162:0154'; } else { - print "not " unless sprintf("%*v#o", ":", "Perl") eq '0327:0205:0231:0223'; + print "not " unless sprintf("%#*vo", ":", "Perl") eq '0327:0205:0231:0223'; } print "ok $test\n"; ++$test; @@ -144,10 +144,10 @@ print "ok $test\n"; ++$test; print "ok $test\n"; ++$test; if (ord("\t") == 9) { # ASCII - print "not " unless sprintf("%*v#o", ":", "Perl") eq '0120:0145:0162:0154'; + print "not " unless sprintf("%#*vo", ":", "Perl") eq '0120:0145:0162:0154'; } else { - print "not " unless sprintf("%*v#o", ":", "Perl") eq '0327:0205:0231:0223'; + print "not " unless sprintf("%#*vo", ":", "Perl") eq '0327:0205:0231:0223'; } print "ok $test\n"; ++$test;