From: LAUN Wolfgang Date: Mon, 17 Mar 2003 13:55:37 +0000 (+0100) Subject: pack changes and related fixes X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=497043642ba2050cd87b28b50f6a01a0f50d0e90;p=p5sagit%2Fp5-mst-13.2.git pack changes and related fixes Message-ID: <75A46BF1A9D8D311863A00508B6259A405F17EB8@ATTMSX4> p4raw-id: //depot/perl@19010 --- diff --git a/embed.fnc b/embed.fnc index d4e1f35..c288f72 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1060,14 +1060,16 @@ s |U32 |seed #endif #if defined(PERL_IN_PP_PACK_C) || defined(PERL_DECL_PROT) -s |void |doencodes |SV* sv|char* s|I32 len +s |I32 |unpack_rec |tempsym_t* symptr|char *s|char *strbeg|char *strend|char **new_s +s |SV ** |pack_rec |SV *cat|tempsym_t* symptr|SV **beglist|SV **endlist s |SV* |mul128 |SV *sv|U8 m +s |I32 |measure_struct |tempsym_t* symptr +s |char * |group_end |char *pat|char *patend|char ender +s |char * |get_num |char *ppat|I32 * +s |bool |next_symbol |tempsym_t* symptr +s |void |doencodes |SV* sv|char* s|I32 len s |SV* |is_an_int |char *s|STRLEN l s |int |div128 |SV *pnum|bool *done -s |char * |next_symbol |char *pat|char *patend -s |I32 |find_count |char **ppat|char *patend|int *star -s |char * |group_end |char *pat|char *patend|char ender -s |I32 |measure_struct |char *pat|char *patend #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) diff --git a/embed.h b/embed.h index b03d9de..8793f64 100644 --- a/embed.h +++ b/embed.h @@ -1469,28 +1469,34 @@ #endif #if defined(PERL_IN_PP_PACK_C) || defined(PERL_DECL_PROT) #ifdef PERL_CORE -#define doencodes S_doencodes +#define unpack_rec S_unpack_rec +#endif +#ifdef PERL_CORE +#define pack_rec S_pack_rec #endif #ifdef PERL_CORE #define mul128 S_mul128 #endif #ifdef PERL_CORE -#define is_an_int S_is_an_int +#define measure_struct S_measure_struct #endif #ifdef PERL_CORE -#define div128 S_div128 +#define group_end S_group_end +#endif +#ifdef PERL_CORE +#define get_num S_get_num #endif #ifdef PERL_CORE #define next_symbol S_next_symbol #endif #ifdef PERL_CORE -#define find_count S_find_count +#define doencodes S_doencodes #endif #ifdef PERL_CORE -#define group_end S_group_end +#define is_an_int S_is_an_int #endif #ifdef PERL_CORE -#define measure_struct S_measure_struct +#define div128 S_div128 #endif #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) @@ -3927,28 +3933,34 @@ #endif #if defined(PERL_IN_PP_PACK_C) || defined(PERL_DECL_PROT) #ifdef PERL_CORE -#define doencodes(a,b,c) S_doencodes(aTHX_ a,b,c) +#define unpack_rec(a,b,c,d,e) S_unpack_rec(aTHX_ a,b,c,d,e) +#endif +#ifdef PERL_CORE +#define pack_rec(a,b,c,d) S_pack_rec(aTHX_ a,b,c,d) #endif #ifdef PERL_CORE #define mul128(a,b) S_mul128(aTHX_ a,b) #endif #ifdef PERL_CORE -#define is_an_int(a,b) S_is_an_int(aTHX_ a,b) +#define measure_struct(a) S_measure_struct(aTHX_ a) #endif #ifdef PERL_CORE -#define div128(a,b) S_div128(aTHX_ a,b) +#define group_end(a,b,c) S_group_end(aTHX_ a,b,c) #endif #ifdef PERL_CORE -#define next_symbol(a,b) S_next_symbol(aTHX_ a,b) +#define get_num(a,b) S_get_num(aTHX_ a,b) #endif #ifdef PERL_CORE -#define find_count(a,b,c) S_find_count(aTHX_ a,b,c) +#define next_symbol(a) S_next_symbol(aTHX_ a) #endif #ifdef PERL_CORE -#define group_end(a,b,c) S_group_end(aTHX_ a,b,c) +#define doencodes(a,b,c) S_doencodes(aTHX_ a,b,c) +#endif +#ifdef PERL_CORE +#define is_an_int(a,b) S_is_an_int(aTHX_ a,b) #endif #ifdef PERL_CORE -#define measure_struct(a,b) S_measure_struct(aTHX_ a,b) +#define div128(a,b) S_div128(aTHX_ a,b) #endif #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) diff --git a/lib/diagnostics.pm b/lib/diagnostics.pm index 1ba70c5..466b9e9 100755 --- a/lib/diagnostics.pm +++ b/lib/diagnostics.pm @@ -53,7 +53,7 @@ escape sequences for pagers. Warnings dispatched from perl itself (or more accurately, those that match descriptions found in L) are only displayed once (no duplicate -descriptions). User code generated warnings ala warn() are unaffected, +descriptions). User code generated warnings a la warn() are unaffected, allowing duplicate user messages to be displayed. =head2 The I Program @@ -296,6 +296,7 @@ our %HTML_Escapes; *THITHER = $standalone ? *STDOUT : *STDERR; +my %transfmt = (); my $transmo = </$1/g; - if ($header =~ /%[csd]/) { - my $rhs = my $lhs = $header; - if ($lhs =~ s/(.*?)%d(?!%d)(.*)/\Q$1\E-?\\d+\Q$2\E/g) { - $lhs =~ s/\\%s/.*?/g; - } else { - # if i had lookbehind negations, - # i wouldn't have to do this \377 noise - $lhs =~ s/(.*?)%s/\Q$1\E.*?\377/g; - $lhs =~ s/\377([^\377]*)$/\Q$1\E/; - $lhs =~ s/\377//g; - $lhs =~ s/\.\*\?$/.*/; # Allow %s at the end to eat it all - } - $lhs =~ s/\\%c/./g; - $transmo .= " s{^$lhs}\n {\Q$rhs\E}s\n\t&& return 1;\n"; + my @toks = split( /(%l?[dx]|%c|%(?:\.\d+)?s)/, $header ); + if (@toks > 1) { + my $conlen = 0; + for my $i (0..$#toks){ + if( $i % 2 ){ + if( $toks[$i] eq '%c' ){ + $toks[$i] = '.'; + } elsif( $toks[$i] eq '%d' ){ + $toks[$i] = '\d+'; + } elsif( $toks[$i] eq '%s' ){ + $toks[$i] = $i == $#toks ? '.*' : '.*?'; + } elsif( $toks[$i] =~ '%.(\d+)s' ){ + $toks[$i] = ".{$1}"; + } elsif( $toks[$i] =~ '^%l*x$' ){ + $toks[$i] = '[\da-f]+'; + } + } elsif( length( $toks[$i] ) ){ + $toks[$i] =~ s/^.*$/\Q$&\E/; + $conlen += length( $toks[$i] ); + } + } + my $lhs = join( '', @toks ); + $transfmt{$header}{pat} = + " s{^$lhs}\n {\Q$header\E}s\n\t&& return 1;\n"; + $transfmt{$header}{len} = $conlen; } else { - $transmo .= " m{^\Q$header\E} && return 1;\n"; + $transfmt{$header}{pat} = + " m{^\Q$header\E} && return 1;\n"; + $transfmt{$header}{len} = length( $header ); } print STDERR "$WHOAMI: Duplicate entry: \"$header\"\n" @@ -390,6 +404,12 @@ my %msg; die "No diagnostics?" unless %msg; + # Apply patterns in order of decreasing sum of lengths of fixed parts + # Seems the best way of hitting the right one. + for my $hdr ( sort { $transfmt{$b}{len} <=> $transfmt{$a}{len} } + keys %transfmt ){ + $transmo .= $transfmt{$hdr}{pat}; + } $transmo .= " return 0;\n}\n"; print STDERR $transmo if $DEBUG; eval $transmo; @@ -505,15 +525,33 @@ sub splainthis { s/\.?\n+$//; my $orig = $_; # return unless defined; + + # get rid of the where-are-we-in-input part s/, <.*?> (?:line|chunk).*$//; - my $real = s/(.*?) at .*? (?:line|chunk) \d+.*/$1/; + + # Discard 1st " at line " and all text beyond + # but be aware of messsages containing " at this-or-that" + my $real = 0; + my @secs = split( / at / ); + $_ = $secs[0]; + for my $i ( 1..$#secs ){ + if( $secs[$i] =~ /.+? (?:line|chunk) \d+/ ){ + $real = 1; + last; + } else { + $_ .= ' at ' . $secs[$i]; + } + } + + # remove parenthesis occurring at the end of some messages s/^\((.*)\)$/$1/; + if ($exact_duplicate{$orig}++) { return &transmo; - } - else { + } else { return 0 unless &transmo; } + $orig = shorten($orig); if ($old_diag{$_}) { autodescribe(); diff --git a/perl.h b/perl.h index 547165a..89d1494 100644 --- a/perl.h +++ b/perl.h @@ -3355,6 +3355,25 @@ typedef void *Thread; #undef PERLVARI #undef PERLVARIC +/* Types used by pack/unpack */ +typedef enum { + e_no_len, /* no length */ + e_number, /* number, [] */ + e_star /* asterisk */ +} howlen_t; + +typedef struct { + char* patptr; /* current template char */ + char* patend; /* one after last char */ + char* grpbeg; /* 1st char of ()-group */ + char* grpend; /* end of ()-group */ + I32 code; /* template code (!) */ + I32 length; /* length/repeat count */ + howlen_t howlen; /* how length is given */ + int level; /* () nesting level */ + U32 flags; /* /=4, comma=2, pack=1 */ +} tempsym_t; + #include "thread.h" #include "pp.h" diff --git a/pod/perldiag.pod b/pod/perldiag.pod index daa0837..af771f1 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -64,7 +64,7 @@ L. =item '!' allowed only after types %s -(F) The '!' is allowed in pack() and unpack() only after certain types. +(F) The '!' is allowed in pack() or unpack(9 only after certain types. See L. =item Ambiguous call resolved as CORE::%s(), qualify as such or use & @@ -466,30 +466,24 @@ checking. Alternatively, if you are certain that you're calling the function correctly, you may put an ampersand before the name to avoid the warning. See L. -=item Can only compress unsigned integers +=item Can only compress unsigned integers in pack (F) An argument to pack("w",...) was not an integer. The BER compressed integer format can only be used with positive integers, and you attempted to compress something else. See L. -=item Cannot compress integer +=item Cannot compress integer in pack (F) An argument to pack("w",...) was too large to compress. The BER compressed integer format can only be used with positive integers, and you attempted to compress Infinity or a very large number (> 1e308). See L. -=item Cannot compress negative numbers +=item Cannot compress negative numbers in pack (F) An argument to pack("w",...) was negative. The BER compressed integer format can only be used with positive integers. See L. -=item / cannot take a count - -(F) You had an unpack template indicating a counted-length string, but -you have also specified an explicit size for the string. See -L. - =item Can't bless non-reference value (F) Only hard references may be blessed. This is how Perl "enforces" @@ -1127,7 +1121,7 @@ references can be weakened. with an assignment operator, which implies modifying the value itself. Perhaps you need to copy the value to a temporary, and repeat that. -=item Character in "C" format wrapped +=item Character in "C" format wrapped in pack (W pack) You said @@ -1142,7 +1136,7 @@ and so on) and not for Unicode characters, so Perl behaved as if you meant If you actually want to pack Unicode codepoints, use the C<"U"> format instead. -=item Character in "c" format wrapped +=item Character in "c" format wrapped in pack (W pack) You said @@ -1157,6 +1151,11 @@ and so on) and not for Unicode characters, so Perl behaved as if you meant If you actually want to pack Unicode codepoints, use the C<"U"> format instead. +=item Code missing after '/' + +(F) You had a (sub-)template that ends with a '/'. There must be another +template code following the slash. See L. + =item close() on unopened filehandle %s (W unopened) You tried to close a filehandle that was never opened. @@ -1264,6 +1263,12 @@ valid magic number. (P) The malloc package that comes with Perl had an internal failure. +=item Count after length/code in unpack + +(F) You had an unpack template indicating a counted-length string, but +you have also specified an explicit size for the string. See +L. + =item C<-p> destination: %s (F) An error occurred during the implicit output invoked by the C<-p> @@ -1347,6 +1352,11 @@ See Server error. (F) You said something like "use Module 42" but the Module did not define a C<$VERSION.> +=item '/' does not take a repeat count + +(F) You cannot put a repeat count of any kind right after the '/' code. +See L. + =item Don't know how to handle magic of type '%s' (P) The internal handling of magical variables has been cursed. @@ -1550,9 +1560,7 @@ some time before now. Check your control flow. flock() operates on filehandles. Are you attempting to call flock() on a dirhandle by the same name? -=item Quantifier follows nothing in regex; - -marked by <-- HERE in m/%s/ +=item Quantifier follows nothing in regex; marked by <-- HERE in m/%s/ (F) You started a regular expression with a quantifier. Backslash it if you meant it literally. The <-- HERE shows in the regular expression about @@ -1655,10 +1663,11 @@ version of Perl, and this should not happen anyway. (F) Unlike with "next" or "last", you're not allowed to goto an unspecified destination. See L. -=item %s-group starts with a count +=item ()-group starts with a count -(F) In pack/unpack a ()-group started with a count. A count is +(F) A ()-group started with a count. A count is supposed to follow something: a template character or a ()-group. + See L. =item %s had compilation errors @@ -1900,17 +1909,11 @@ elements of an attribute list. If the previous attribute had a parenthesised parameter list, perhaps that list was terminated too soon. See L. -=item Invalid type in pack: '%s' - -(F) The given character is not a valid pack type. See L. -(W pack) The given character is not a valid pack type but used to be -silently ignored. +=item Invalid type '%s' in %s -=item Invalid type in unpack: '%s' - -(F) The given character is not a valid unpack type. See -L. -(W unpack) The given character is not a valid unpack type but used to be +(F) The given character is not a valid pack or unpack type. +See L. +(W) The given character is not a valid pack or unpack type but used to be silently ignored. =item Invalid version format (multiple underscores) @@ -1977,6 +1980,12 @@ L. (F) While under the C pragma, switching the real and effective uids or gids failed. +=item length/code after end of string in unpack + +(F) While unpacking, the string buffer was alread used up when an unpack +length/code combination tried to obtain more data. This results in +an undefined value for the length. See L. + =item listen() on closed socket %s (W closed) You tried to do a listen on a closed socket. Did you forget @@ -1995,14 +2004,22 @@ instead on the filehandle.) values cannot be returned in subroutines used in lvalue context. See L. -=item Lookbehind longer than %d not implemented in regex; - -marked by <-- HERE in m/%s/ +=item Lookbehind longer than %d not implemented in regex; marked by <-- HERE in m/%s/ (F) There is currently a limit on the length of string which lookbehind can handle. This restriction may be eased in a future release. The <-- HERE shows in the regular expression about where the problem was discovered. +=item Malformed integer in [] in pack + +(F) Between the brackets enclosing a numeric repeat count only digits +are permitted. See L. + +=item Malformed integer in [] in unpack + +(F) Between the brackets enclosing a numeric repeat count only digits +are permitted. See L. + =item Malformed PERLLIB_PREFIX (F) An error peculiar to OS/2. PERLLIB_PREFIX should be of the form @@ -2037,9 +2054,7 @@ possibility is careless use of utf8::upgrade(). Perl thought it was reading UTF-16 encoded character data but while doing it Perl met a malformed Unicode surrogate. -=item %s matches null string many times in regex; - -marked by <-- HERE in m/%s/ +=item %s matches null string many times in regex; marked by <-- HERE in m/%s/ (W regexp) The pattern you've specified would be an infinite loop if the regular expression engine didn't specifically check for that. The <-- HERE @@ -2052,7 +2067,7 @@ See L. interpreter, especially if the word that is being warned about is "use" or "my". -=item % may only be used in unpack +=item % may not be used in pack (F) You can't pack a string by supplying a checksum, because the checksumming process loses information, and you can't go the other way. @@ -2187,22 +2202,17 @@ See L for details. (W syntax) Multidimensional arrays aren't written like C<$foo[1,2,3]>. They're written like C<$foo[1][2][3]>, as in C. -=item / must be followed by a*, A* or Z* +=item '/' must be followed by 'a*', 'A*' or 'Z*' (F) You had a pack template indicating a counted-length string, Currently the only things that can have their length counted are a*, A* or Z*. See L. -=item / must be followed by a, A or Z - -(F) You had an unpack template indicating a counted-length string, which -must be followed by one of the letters a, A or Z to indicate what sort -of string is to be unpacked. See L. - -=item / must follow a numeric type +=item '/' must follow a numeric type in unpack -(F) You had an unpack template that contained a '#', but this did not -follow some numeric unpack specification. See L. +(F) You had an unpack template that contained a '/', but this did not +follow some unpack specification producing a numeric value. +See L. =item "my sub" not yet implemented @@ -2222,6 +2232,11 @@ If you had a good reason for having a unique name, then just mention it again somehow to suppress the message. The C declaration is provided for this purpose. +=item Negative '/' count in unpack + +(F) The length count obtained from a length/code unpack operation was +negative. See L. + =item Negative length (F) You tried to do a read/write/send/recv operation with a buffer @@ -2307,6 +2322,11 @@ ordinary subroutine call. redirection, and found a '2>' or a '2>>' on the command line, but can't find the name of the file to which to write data destined for stderr. +=item No group ending character '%c' found in template + +(F) A pack or unpack template has an opening '(' or '[' without its +matching counterpart. See L. + =item No input file after < on command line (F) An error peculiar to VMS. Perl handles its own command line @@ -2453,12 +2473,6 @@ supplied. See L. of Perl. Check the #! line, or manually feed your script into Perl yourself. -=item %s not allowed in length fields - -(F) The count in the (un)pack template may be replaced by C<[TEMPLATE]> only if -C