From: Yves Orton Date: Wed, 5 Jul 2006 20:40:58 +0000 (+0200) Subject: Introduce a new function, pv_escape(), to display contents of PVs X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3df15adcc3686bbc809ac9706048f258fd787941;hp=461824dcfbc00b3c4e20590f06d6c9881e4a416b;p=p5sagit%2Fp5-mst-13.2.git Introduce a new function, pv_escape(), to display contents of PVs that might contain non printable chars. Subject: Re: [PATCH]: fix: [perl #39583] Pattern Match fails for specific length string Message-ID: <9b18b3110607051140n10c211a1jf17d3b7914d6f68b@mail.gmail.com> p4raw-id: //depot/perl@28490 --- diff --git a/dump.c b/dump.c index 0116e99..98405c6 100644 --- a/dump.c +++ b/dump.c @@ -119,40 +119,121 @@ Perl_dump_eval(pTHX) op_dump(PL_eval_root); } -char * -Perl_pv_display(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim) -{ - const bool nul_terminated = len > cur && pv[cur] == '\0'; - bool truncated = 0; - sv_setpvn(dsv, "\"", 1); - for (; cur--; pv++) { - if (pvlim && SvCUR(dsv) >= pvlim) { - truncated = 1; - break; - } - switch (*pv) { - case '\t': sv_catpvs(dsv, "\\t"); break; - case '\n': sv_catpvs(dsv, "\\n"); break; - case '\r': sv_catpvs(dsv, "\\r"); break; - case '\f': sv_catpvs(dsv, "\\f"); break; - case '"': sv_catpvs(dsv, "\\\""); break; - case '\\': sv_catpvs(dsv, "\\\\"); break; - default: - if (isPRINT(*pv)) - sv_catpvn(dsv, pv, 1); - else if (cur && isDIGIT(*(pv+1))) - Perl_sv_catpvf(aTHX_ dsv, "\\%03o", (U8)*pv); - else - Perl_sv_catpvf(aTHX_ dsv, "\\%o", (U8)*pv); - } +/* +=for apidoc Apd|char*|pv_escape|NN SV *dsv|NN const char *pv|const STRLEN count|const STRLEN max|const U32 flags + +Escapes at most the first "count" chars of pv and puts the results into +buf such that the size of the escaped string will not exceed "max" chars +and will not contain any incomplete escape sequences. + +If flags contains PERL_PV_ESCAPE_QUOTE then the string will have quotes +placed around it; moreover, if the number of chars converted was less than +"count" then a trailing elipses (...) will be added after the closing +quote. + +If PERL_PV_ESCAPE_QUOTE is not set, but PERL_PV_ESCAPE_PADR is, then the +returned string will be right padded with spaces such that it is max chars +long. + +Normally the SV will be cleared before the escaped string is prepared, +but when PERL_PV_ESCAPE_CAT is set this will not occur. + +Returns a pointer to the string contained by SV. + +=cut +*/ + +char * +Perl_pv_escape( pTHX_ SV *dsv, const char *pv, const STRLEN count, const STRLEN max, const U32 flags ) { + char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : '\\'; + char octbuf[8] = "\\0123456"; + STRLEN wrote = 0; + STRLEN chsize = 0; + const char *end = pv + count; + + if (flags & PERL_PV_ESCAPE_CAT) { + if ( dq == '"' ) + sv_catpvn(dsv, "\"", 1); + } else { + if ( dq == '"' ) + sv_setpvn(dsv, "\"", 1); + else + sv_setpvn(dsv, "", 0); + } + for ( ; (pv < end && (!max || (wrote < max))) ; pv++ ) { + if ( (*pv == dq) || (*pv == '\\') || isCNTRL(*pv) ) { + chsize = 2; + switch (*pv) { + case '\\' : octbuf[1] = '\\'; break; + case '\v' : octbuf[1] = 'v'; break; + case '\t' : octbuf[1] = 't'; break; + case '\r' : octbuf[1] = 'r'; break; + case '\n' : octbuf[1] = 'n'; break; + case '\f' : octbuf[1] = 'f'; break; + case '"' : if ( dq == *pv ) { + octbuf[1] = '"'; + break; + } + default: + /* note the (U8*) casts here are important. + * if they are omitted we can produce the octal + * for a negative number which could produce a + * buffer overrun in octbuf, with it on we are + * guaranteed that the longest the string could be + * is 5, (we reserve 8 just because its the first + * power of 2 larger than 5.)*/ + if ( (pv < end) && isDIGIT(*(pv+1)) ) + chsize = sprintf( octbuf, "\\%03o", (U8)*pv); + else + chsize = sprintf( octbuf, "\\%o", (U8)*pv); + } + if ( max && (wrote + chsize > max) ) { + break; + } else { + sv_catpvn(dsv, octbuf, chsize); + wrote += chsize; + } + } else { + sv_catpvn(dsv, pv, 1); + wrote++; + } } - sv_catpvs(dsv, "\""); - if (truncated) - sv_catpvs(dsv, "..."); - if (nul_terminated) - sv_catpvs(dsv, "\\0"); + if ( dq == '"' ) { + sv_catpvn( dsv, "\"", 1 ); + if ( pv < end ) + sv_catpvn( dsv, "...", 3 ); + } else if ( max && (flags & PERL_PV_ESCAPE_PADR) ) { + for ( ; wrote < max ; wrote++ ) + sv_catpvn( dsv, " ", 1 ); + } + return SvPVX(dsv); +} + +/* +=for apidoc pv_display + char *pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len, + STRLEN pvlim, U32 flags) + +Similar to + + pv_escape(dsv,pv,cur,pvlim,PERL_PV_ESCAPE_QUOTE); + +except that an additional "\0" will be appended to the string when +len > cur and pv[cur] is "\0". + +Note that the final string may be up to 7 chars longer than pvlim. + +=cut +*/ + +char * +Perl_pv_display(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim) +{ + pv_escape( dsv, pv, cur, pvlim, PERL_PV_ESCAPE_QUOTE); + if (len > cur && pv[cur] == '\0') + sv_catpvn( dsv, "\\0", 2 ); return SvPVX(dsv); } diff --git a/embed.fnc b/embed.fnc index c965b1a..81127e0 100644 --- a/embed.fnc +++ b/embed.fnc @@ -980,8 +980,10 @@ Apd |void |sv_setpvn_mg |NN SV *sv|NN const char *ptr|STRLEN len Apd |void |sv_setsv_mg |NN SV *dstr|NULLOK SV *sstr Apdbm |void |sv_usepvn_mg |NN SV *sv|NULLOK char *ptr|STRLEN len ApR |MGVTBL*|get_vtbl |int vtbl_id -Ap |char* |pv_display |NN SV *dsv|NN const char *pv|STRLEN cur|STRLEN len \ +Apd |char* |pv_display |NN SV *dsv|NN const char *pv|STRLEN cur|STRLEN len \ |STRLEN pvlim +Apd |char* |pv_escape |NN SV *dsv|NN const char *pv|const STRLEN count \ + |const STRLEN max|const U32 flags Afp |void |dump_indent |I32 level|NN PerlIO *file|NN const char* pat|... Ap |void |dump_vindent |I32 level|NN PerlIO *file|NN const char* pat \ |NULLOK va_list *args diff --git a/embed.h b/embed.h index bc5027e..8e26433 100644 --- a/embed.h +++ b/embed.h @@ -1000,6 +1000,7 @@ #define sv_setsv_mg Perl_sv_setsv_mg #define get_vtbl Perl_get_vtbl #define pv_display Perl_pv_display +#define pv_escape Perl_pv_escape #define dump_indent Perl_dump_indent #define dump_vindent Perl_dump_vindent #define do_gv_dump Perl_do_gv_dump @@ -3182,6 +3183,7 @@ #define sv_setsv_mg(a,b) Perl_sv_setsv_mg(aTHX_ a,b) #define get_vtbl(a) Perl_get_vtbl(aTHX_ a) #define pv_display(a,b,c,d,e) Perl_pv_display(aTHX_ a,b,c,d,e) +#define pv_escape(a,b,c,d,e) Perl_pv_escape(aTHX_ a,b,c,d,e) #define dump_vindent(a,b,c,d) Perl_dump_vindent(aTHX_ a,b,c,d) #define do_gv_dump(a,b,c,d) Perl_do_gv_dump(aTHX_ a,b,c,d) #define do_gvgv_dump(a,b,c,d) Perl_do_gvgv_dump(aTHX_ a,b,c,d) diff --git a/global.sym b/global.sym index bf8b843..c737e97 100644 --- a/global.sym +++ b/global.sym @@ -605,6 +605,7 @@ Perl_sv_setsv_mg Perl_sv_usepvn_mg Perl_get_vtbl Perl_pv_display +Perl_pv_escape Perl_dump_indent Perl_dump_vindent Perl_do_gv_dump diff --git a/perl.h b/perl.h index 3a762fa..8fd8e21 100644 --- a/perl.h +++ b/perl.h @@ -5628,5 +5628,12 @@ extern void moncontrol(int); so that Configure picks them up. */ +/* these are used by Perl_pv_escape() and are here so that they + * are available throughout the core */ + +#define PERL_PV_ESCAPE_QUOTE 1 +#define PERL_PV_ESCAPE_PADR 2 +#define PERL_PV_ESCAPE_CAT 4 + #endif /* Include guard */ diff --git a/pod/perlapi.pod b/pod/perlapi.pod index ca1491d..d832d0a 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -753,6 +753,62 @@ Found in file perl.c =back +=head1 Functions in file dump.c + + +=over 8 + +=item pv_display +X + + char *pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len, + STRLEN pvlim, U32 flags) + +Similar to + + pv_escape(dsv,pv,cur,pvlim,PERL_PV_ESCAPE_QUOTE); + +except that an additional "\0" will be appended to the string when +len > cur and pv[cur] is "\0". + +Note that the final string may be up to 7 chars longer than pvlim. + + char* pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim) + +=for hackers +Found in file dump.c + +=item pv_escape +X + +Escapes at most the first "count" chars of pv and puts the results into +buf such that the size of the escaped string will not exceed "max" chars +and will not contain any incomplete escape sequences. + +If flags contains PERL_PV_ESCAPE_QUOTE then the string will have quotes +placed around it; moreover, if the number of chars converted was less than +"count" then a trailing elipses (...) will be added after the closing +quote. + +If PERL_PV_ESCAPE_QUOTE is not set, but PERL_PV_ESCAPE_PADR is, then the +returned string will be right padded with spaces such that it is max chars +long. + +Normally the SV will be cleared before the escaped string is prepared, +but when PERL_PV_ESCAPE_CAT is set this will not occur. + +Returns a pointer to the string contained by SV. + +NOTE: the perl_ form of this function is deprecated. + + char* pv_escape(SV *dsv, const char *pv, const STRLEN count, const STRLEN max, const U32 flags) + +=for hackers +Found in file dump.c + + +=back + =head1 Functions in file mathoms.c diff --git a/proto.h b/proto.h index 4272b1d..c595c31 100644 --- a/proto.h +++ b/proto.h @@ -2674,6 +2674,10 @@ PERL_CALLCONV char* Perl_pv_display(pTHX_ SV *dsv, const char *pv, STRLEN cur, S __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); +PERL_CALLCONV char* Perl_pv_escape(pTHX_ SV *dsv, const char *pv, const STRLEN count, const STRLEN max, const U32 flags) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); + PERL_CALLCONV void Perl_dump_indent(pTHX_ I32 level, PerlIO *file, const char* pat, ...) __attribute__format__(__printf__,pTHX_3,pTHX_4) __attribute__nonnull__(pTHX_2) diff --git a/reentr.c b/reentr.c index 6321eb6..4dcaf80 100644 --- a/reentr.c +++ b/reentr.c @@ -44,7 +44,7 @@ Perl_reentrant_size(pTHX) { #ifdef HAS_GETGRNAM_R # if defined(HAS_SYSCONF) && defined(_SC_GETGR_R_SIZE_MAX) && !defined(__GLIBC__) PL_reentrant_buffer->_grent_size = sysconf(_SC_GETGR_R_SIZE_MAX); - if (PL_reentrant_buffer->_grent_size == -1U) + if (PL_reentrant_buffer->_grent_size == -1) PL_reentrant_buffer->_grent_size = REENTRANTUSUALSIZE; # else # if defined(__osf__) && defined(__alpha) && defined(SIABUFSIZ) diff --git a/regexec.c b/regexec.c index 9bb8317..53b9ff7 100644 --- a/regexec.c +++ b/regexec.c @@ -2616,8 +2616,9 @@ S_push_slab(pTHX) #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1) -#ifdef DEBUGGING -STATIC void +#ifdef DEBUGGING + +STATIC void S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_utf8) { const int docolor = *PL_colors[0]; @@ -2646,24 +2647,30 @@ S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_u if (pref0_len > pref_len) pref0_len = pref_len; { - const char * const s0 = - do_utf8 && OP(scan) != CANY ? - pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len), - pref0_len, 60, UNI_DISPLAY_REGEX) : - locinput - pref_len; - const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len; - const char * const s1 = do_utf8 && OP(scan) != CANY ? - pv_uni_display(PERL_DEBUG_PAD(1), - (U8*)(locinput - pref_len + pref0_len), - pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) : - locinput - pref_len + pref0_len; - const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len; - const char * const s2 = do_utf8 && OP(scan) != CANY ? - pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput, - PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) : - locinput; - const int len2 = do_utf8 ? (int)strlen(s2) : l; - PerlIO_printf(Perl_debug_log, + const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0; + const char * const s0 = is_uni ? + pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len), + pref0_len, 60, UNI_DISPLAY_REGEX) : + pv_escape(PERL_DEBUG_PAD(0), (locinput - pref_len), + pref0_len, 60, 0); + + const int len0 = strlen(s0); + const char * const s1 = is_uni ? + pv_uni_display(PERL_DEBUG_PAD(1), + (U8*)(locinput - pref_len + pref0_len), + pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) : + pv_escape(PERL_DEBUG_PAD(1), + (locinput - pref_len + pref0_len), + pref_len - pref0_len, 60, 0); + + const int len1 = (int)strlen(s1); + const char * const s2 = is_uni ? + pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput, + PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) : + pv_escape(PERL_DEBUG_PAD(2), locinput, + PL_regeol - locinput, 60, 0); + const int len2 = (int)strlen(s2); + PerlIO_printf(Perl_debug_log, "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|", (IV)(locinput - PL_bostr), PL_colors[4], @@ -2680,6 +2687,7 @@ S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_u ""); } } + #endif STATIC I32 /* 0 failure, 1 success */ diff --git a/t/lib/warnings/9uninit b/t/lib/warnings/9uninit index fadcd1b..82fc387 100644 --- a/t/lib/warnings/9uninit +++ b/t/lib/warnings/9uninit @@ -1017,7 +1017,7 @@ my ($v); my %h = ("\0011\002\r\n\t\f\"\\abcdefghijklmnopqrstuvwxyz", undef); $v = join '', %h; EXPECT -Use of uninitialized value $h{"\0011\2\r\n\t\f\"\\abcdefghijkl"...} in join or string at - line 6. +Use of uninitialized value $h{"\0011\2\r\n\t\f\"\\abcdefghijklm"...} in join or string at - line 6. ######## use warnings 'uninitialized'; my ($m1, $v);