From: Nick Ing-Simmons Date: Tue, 2 Oct 2001 08:40:57 +0000 (+0000) Subject: Integrate mainline + lib/open.t patch from Chromatic X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f1c09d8aea4b00e7799b7273c877df50e0fd657a;hp=bb407f0b8769c638c05e60ebfd157a1e676a6c22;p=p5sagit%2Fp5-mst-13.2.git Integrate mainline + lib/open.t patch from Chromatic p4raw-id: //depot/perlio@12301 --- diff --git a/MANIFEST b/MANIFEST index 5f45512..bd3a7a1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1155,6 +1155,7 @@ lib/Term/ANSIColor/ChangeLog Term::ANSIColor lib/Term/ANSIColor/README Term::ANSIColor lib/Term/ANSIColor/test.pl See if Term::ANSIColor works lib/Term/Cap.pm Perl module supporting termcap usage +lib/Term/Cap.t See if Term::Cap works lib/Term/Complete.pm A command completion subroutine lib/Term/Complete.t See if Term::Complete works lib/Term/ReadLine.pm Stub readline library @@ -1210,6 +1211,7 @@ lib/Text/ParseWords.t See if Text::ParseWords works lib/Text/Soundex.pm Perl module to implement Soundex lib/Text/Soundex.t See if Soundex works lib/Text/Tabs.pm Do expand and unexpand +lib/Text/TabsWrap/CHANGELOG ChangeLog for Tabs+Wrap lib/Text/TabsWrap/t/fill.t See if Text::Wrap::fill works lib/Text/TabsWrap/t/tabs.t See if Text::Tabs works lib/Text/TabsWrap/t/wrap.t See if Text::Wrap::wrap works diff --git a/embed.h b/embed.h index c19e445..29ee843 100644 --- a/embed.h +++ b/embed.h @@ -401,6 +401,7 @@ #define mess Perl_mess #define vmess Perl_vmess #define qerror Perl_qerror +#define sortsv Perl_sortsv #define mg_clear Perl_mg_clear #define mg_copy Perl_mg_copy #define mg_find Perl_mg_find @@ -639,14 +640,10 @@ #define sv_2iv Perl_sv_2iv #define sv_2mortal Perl_sv_2mortal #define sv_2nv Perl_sv_2nv -#ifdef CRIPPLED_CC #define sv_2pv Perl_sv_2pv -#endif #define sv_2pvutf8 Perl_sv_2pvutf8 #define sv_2pvbyte Perl_sv_2pvbyte -#ifdef CRIPPLED_CC #define sv_pvn_nomg Perl_sv_pvn_nomg -#endif #define sv_2uv Perl_sv_2uv #define sv_iv Perl_sv_iv #define sv_uv Perl_sv_uv @@ -661,12 +658,8 @@ #define sv_catpvf Perl_sv_catpvf #define sv_vcatpvf Perl_sv_vcatpvf #define sv_catpv Perl_sv_catpv -#ifdef CRIPPLED_CC #define sv_catpvn Perl_sv_catpvn -#endif -#ifdef CRIPPLED_CC #define sv_catsv Perl_sv_catsv -#endif #define sv_chop Perl_sv_chop #define sv_clean_all Perl_sv_clean_all #define sv_clean_objs Perl_sv_clean_objs @@ -699,9 +692,7 @@ #define sv_peek Perl_sv_peek #define sv_pos_u2b Perl_sv_pos_u2b #define sv_pos_b2u Perl_sv_pos_b2u -#ifdef CRIPPLED_CC #define sv_pvn_force Perl_sv_pvn_force -#endif #define sv_pvutf8n_force Perl_sv_pvutf8n_force #define sv_pvbyten_force Perl_sv_pvbyten_force #define sv_reftype Perl_sv_reftype @@ -721,9 +712,7 @@ #define sv_setref_pvn Perl_sv_setref_pvn #define sv_setpv Perl_sv_setpv #define sv_setpvn Perl_sv_setpvn -#ifdef CRIPPLED_CC #define sv_setsv Perl_sv_setsv -#endif #define sv_taint Perl_sv_taint #define sv_tainted Perl_sv_tainted #define sv_unmagic Perl_sv_unmagic @@ -844,9 +833,7 @@ #define sv_pv Perl_sv_pv #define sv_pvutf8 Perl_sv_pvutf8 #define sv_pvbyte Perl_sv_pvbyte -#ifdef CRIPPLED_CC #define sv_utf8_upgrade Perl_sv_utf8_upgrade -#endif #define sv_utf8_downgrade Perl_sv_utf8_downgrade #define sv_utf8_encode Perl_sv_utf8_encode #define sv_utf8_decode Perl_sv_utf8_decode @@ -1006,7 +993,6 @@ #define save_lines S_save_lines #define doeval S_doeval #define doopen_pmc S_doopen_pmc -#define qsortsv S_qsortsv #endif #if defined(PERL_IN_PP_HOT_C) || defined(PERL_DECL_PROT) #define do_maybe_phash S_do_maybe_phash @@ -1936,6 +1922,7 @@ #endif #define vmess(a,b) Perl_vmess(aTHX_ a,b) #define qerror(a) Perl_qerror(aTHX_ a) +#define sortsv(a,b,c) Perl_sortsv(aTHX_ a,b,c) #define mg_clear(a) Perl_mg_clear(aTHX_ a) #define mg_copy(a,b,c,d) Perl_mg_copy(aTHX_ a,b,c,d) #define mg_find(a,b) Perl_mg_find(aTHX_ a,b) @@ -2518,7 +2505,6 @@ #define save_lines(a,b) S_save_lines(aTHX_ a,b) #define doeval(a,b) S_doeval(aTHX_ a,b) #define doopen_pmc(a,b) S_doopen_pmc(aTHX_ a,b) -#define qsortsv(a,b,c) S_qsortsv(aTHX_ a,b,c) #endif #if defined(PERL_IN_PP_HOT_C) || defined(PERL_DECL_PROT) #define do_maybe_phash(a,b,c,d,e) S_do_maybe_phash(aTHX_ a,b,c,d,e) diff --git a/embed.pl b/embed.pl index e4dae1b..73e72d2 100755 --- a/embed.pl +++ b/embed.pl @@ -344,14 +344,12 @@ walk_table { else { my ($flags,$retval,$func,@args) = @_; unless ($flags =~ /o/) { - $ret .= "#ifdef CRIPPLED_CC\n" if $flags =~ /C/; if ($flags =~ /s/) { $ret .= hide($func,"S_$func"); } elsif ($flags =~ /p/) { $ret .= hide($func,"Perl_$func"); } - $ret .= "#endif\n" if $flags =~ /C/; } } $ret; @@ -1054,7 +1052,6 @@ __END__ : : flags are single letters with following meanings: : A member of public API -: C wrap compatibility macro in #ifdef DCRIPPLED_CC : d function has documentation with its source : s static function, should have an S_ prefix in source : file @@ -1472,6 +1469,7 @@ p |char* |mem_collxfrm |const char* s|STRLEN len|STRLEN* xlen Afp |SV* |mess |const char* pat|... Ap |SV* |vmess |const char* pat|va_list* args p |void |qerror |SV* err +Apd |void |sortsv |SV ** array|size_t num_elts|SVCOMPARE_t f Apd |int |mg_clear |SV* sv Apd |int |mg_copy |SV* sv|SV* nsv|const char* key|I32 klen Apd |MAGIC* |mg_find |SV* sv|int type @@ -1723,10 +1721,10 @@ Apd |IO* |sv_2io |SV* sv Apd |IV |sv_2iv |SV* sv Apd |SV* |sv_2mortal |SV* sv Apd |NV |sv_2nv |SV* sv -ACp |char* |sv_2pv |SV* sv|STRLEN* lp +Ap |char* |sv_2pv |SV* sv|STRLEN* lp Apd |char* |sv_2pvutf8 |SV* sv|STRLEN* lp Apd |char* |sv_2pvbyte |SV* sv|STRLEN* lp -ACp |char* |sv_pvn_nomg |SV* sv|STRLEN* lp +Ap |char* |sv_pvn_nomg |SV* sv|STRLEN* lp Apd |UV |sv_2uv |SV* sv Apd |IV |sv_iv |SV* sv Apd |UV |sv_uv |SV* sv @@ -1741,8 +1739,8 @@ Apd |SV* |sv_bless |SV* sv|HV* stash Afpd |void |sv_catpvf |SV* sv|const char* pat|... Ap |void |sv_vcatpvf |SV* sv|const char* pat|va_list* args Apd |void |sv_catpv |SV* sv|const char* ptr -ACpd |void |sv_catpvn |SV* sv|const char* ptr|STRLEN len -ACpd |void |sv_catsv |SV* dsv|SV* ssv +Apd |void |sv_catpvn |SV* sv|const char* ptr|STRLEN len +Apd |void |sv_catsv |SV* dsv|SV* ssv Apd |void |sv_chop |SV* sv|char* ptr pd |I32 |sv_clean_all pd |void |sv_clean_objs @@ -1777,7 +1775,7 @@ Apd |SV* |sv_newref |SV* sv Ap |char* |sv_peek |SV* sv Apd |void |sv_pos_u2b |SV* sv|I32* offsetp|I32* lenp Apd |void |sv_pos_b2u |SV* sv|I32* offsetp -ACpd |char* |sv_pvn_force |SV* sv|STRLEN* lp +Apd |char* |sv_pvn_force |SV* sv|STRLEN* lp Apd |char* |sv_pvutf8n_force|SV* sv|STRLEN* lp Apd |char* |sv_pvbyten_force|SV* sv|STRLEN* lp Apd |char* |sv_reftype |SV* sv|int ob @@ -1798,7 +1796,7 @@ Apd |SV* |sv_setref_pvn |SV* rv|const char* classname|char* pv \ |STRLEN n Apd |void |sv_setpv |SV* sv|const char* ptr Apd |void |sv_setpvn |SV* sv|const char* ptr|STRLEN len -ACpd |void |sv_setsv |SV* dsv|SV* ssv +Apd |void |sv_setsv |SV* dsv|SV* ssv Apd |void |sv_taint |SV* sv Apd |bool |sv_tainted |SV* sv Apd |int |sv_unmagic |SV* sv|int type @@ -1930,7 +1928,7 @@ Apd |char* |sv_2pvbyte_nolen|SV* sv Apd |char* |sv_pv |SV *sv Apd |char* |sv_pvutf8 |SV *sv Apd |char* |sv_pvbyte |SV *sv -ACpd |STRLEN |sv_utf8_upgrade|SV *sv +Apd |STRLEN |sv_utf8_upgrade|SV *sv ApdM |bool |sv_utf8_downgrade|SV *sv|bool fail_ok Apd |void |sv_utf8_encode |SV *sv ApdM |bool |sv_utf8_decode |SV *sv @@ -2107,7 +2105,6 @@ s |I32 |dopoptosub_at |PERL_CONTEXT* cxstk|I32 startingblock s |void |save_lines |AV *array|SV *sv s |OP* |doeval |int gimme|OP** startop s |PerlIO *|doopen_pmc |const char *name|const char *mode -s |void |qsortsv |SV ** array|size_t num_elts|SVCOMPARE_t f #endif #if defined(PERL_IN_PP_HOT_C) || defined(PERL_DECL_PROT) diff --git a/ext/DB_File/DB_File.pm b/ext/DB_File/DB_File.pm index 1df9876..5ac1120 100644 --- a/ext/DB_File/DB_File.pm +++ b/ext/DB_File/DB_File.pm @@ -222,8 +222,9 @@ sub AUTOLOAD { "; } } - eval "sub $AUTOLOAD { $val }"; - goto &$AUTOLOAD; + no strict 'refs'; + *{$AUTOLOAD} = sub { $val }; + goto &{$AUTOLOAD}; } diff --git a/ext/Data/Dumper/Dumper.pm b/ext/Data/Dumper/Dumper.pm index 6cf7d35..b5c6b85 100644 --- a/ext/Data/Dumper/Dumper.pm +++ b/ext/Data/Dumper/Dumper.pm @@ -40,6 +40,8 @@ $Quotekeys = 1 unless defined $Quotekeys; $Bless = "bless" unless defined $Bless; #$Expdepth = 0 unless defined $Expdepth; $Maxdepth = 0 unless defined $Maxdepth; +$Useperl = 0 unless defined $Useperl; +$Sortkeys = 0 unless defined $Sortkeys; # # expects an arrayref of values to be dumped. @@ -75,6 +77,8 @@ sub new { 'bless' => $Bless, # keyword to use for "bless" # expdepth => $Expdepth, # cutoff depth for explicit dumping maxdepth => $Maxdepth, # depth beyond which we give up + useperl => $Useperl, # use the pure Perl implementation + sortkeys => $Sortkeys, # flag or filter for sorting hash keys }; if ($Indent > 0) { @@ -148,7 +152,8 @@ sub DESTROY {} sub Dump { return &Dumpxs - unless $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}); + unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) || + $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}); return &Dumpperl; } @@ -208,6 +213,8 @@ sub Dumpperl { # # twist, toil and turn; # and recurse, of course. +# sometimes sordidly; +# and curse if no recourse. # sub _dump { my($s, $val, $name) = @_; @@ -331,7 +338,23 @@ sub _dump { ($name =~ /^\\?[\%\@\*\$][^{].*[]}]$/) ? ($mname = $name) : ($mname = $name . '->'); $mname .= '->' if $mname =~ /^\*.+\{[A-Z]+\}$/; - while (($k, $v) = each %$val) { + my ($sortkeys, $keys, $key) = ("$s->{sortkeys}"); + if ($sortkeys) { + if (ref($s->{sortkeys}) eq 'CODE') { + $keys = $s->{sortkeys}($val); + unless (ref($keys) eq 'ARRAY') { + carp "Sortkeys subroutine did not return ARRAYREF"; + $keys = []; + } + } + else { + $keys = [ sort keys %$val ]; + } + } + while (($k, $v) = ! $sortkeys ? (each %$val) : + @$keys ? ($key = shift(@$keys), $val->{$key}) : + () ) + { my $nk = $s->_dump($k, ""); $nk = $1 if !$s->{quotekeys} and $nk =~ /^[\"\']([A-Za-z_]\w*)[\"\']$/; $sname = $mname . '{' . $nk . '}'; @@ -537,6 +560,16 @@ sub Maxdepth { defined($v) ? (($s->{'maxdepth'} = $v), return $s) : $s->{'maxdepth'}; } +sub Useperl { + my($s, $v) = @_; + defined($v) ? (($s->{'useperl'} = $v), return $s) : $s->{'useperl'}; +} + +sub Sortkeys { + my($s, $v) = @_; + defined($v) ? (($s->{'sortkeys'} = $v), return $s) : $s->{'sortkeys'}; +} + # used by qquote below my %esc = ( @@ -848,6 +881,31 @@ C is set. (Useful in debugger when we often don't want to see more than enough). Default is 0, which means there is no maximum depth. +=item $Data::Dumper::Useperl I $I->Useperl(I<[NEWVAL]>) + +Can be set to a boolean value which controls whether the pure Perl +implementation of C is used. The C module is +a dual implementation, with almost all functionality written in both +pure Perl and also in XS ('C'). Since the XS version is much faster, it +will always be used if possible. This option lets you override the +default behavior, usually for testing purposes only. Default is 0, which +means the XS implementation will be used if possible. + +=item $Data::Dumper::Sortkeys I $I->Sortkeys(I<[NEWVAL]>) + +Can be set to a boolean value to control whether hash keys are dumped in +sorted order. A true value will cause the keys of all hashes to be +dumped in Perl's default sort order. Can also be set to a subroutine +reference which will be called for each hash that is dumped. In this +case C will call the subroutine once for each hash, +passing it the reference of the hash. The purpose of the subroutine is +to return a reference to an array of the keys that will be dumped, in +the order that they should be dumped. Using this feature, you can +control both the order of the keys, and which keys are actually used. In +other words, this subroutine acts as a filter by which you can exclude +certain keys from being dumped. Default is 0, which means that hash keys +are not sorted. + =back =head2 Exports @@ -1003,6 +1061,30 @@ distribution for more examples.) print $d->Dump; + ######## + # sorting and filtering hash keys + ######## + + $Data::Dumper::Sortkeys = \&my_filter; + my $foo = { map { (ord, "$_$_$_") } 'I'..'Q' }; + my $bar = { %$foo }; + my $baz = { reverse %$foo }; + print Dumper [ $foo, $bar, $baz ]; + + sub my_filter { + my ($hash) = @_; + # return an array ref containing the hash keys to dump + # in the order that you want them to be dumped + return [ + # Sort the keys of %$foo in reverse numeric order + $hash eq $foo ? (sort {$b <=> $a} keys %$hash) : + # Only dump the odd number keys of %$bar + $hash eq $bar ? (grep {$_ % 2} keys %$hash) : + # Sort keys in default order for all other hashes + (sort keys %$hash) + ]; + } + =head1 BUGS Due to limitations of Perl subroutine call semantics, you cannot pass an diff --git a/ext/Data/Dumper/Dumper.xs b/ext/Data/Dumper/Dumper.xs index 8fc7ac3..d0eb917 100644 --- a/ext/Data/Dumper/Dumper.xs +++ b/ext/Data/Dumper/Dumper.xs @@ -29,7 +29,7 @@ static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, SV *pad, SV *xpad, SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, - I32 maxdepth); + I32 maxdepth, SV *sortkeys); /* does a string need to be protected? */ static I32 @@ -179,7 +179,7 @@ static I32 DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity, - I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth) + I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys) { char tmpbuf[128]; U32 i; @@ -354,7 +354,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); sv_catpvn(retval, ")}", 2); } /* plain */ else { @@ -362,7 +362,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); } SvREFCNT_dec(namesv); } @@ -374,7 +374,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); SvREFCNT_dec(namesv); } else if (realtype == SVt_PVAV) { @@ -443,7 +443,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); if (ix < ixmax) sv_catpvn(retval, ",", 1); } @@ -468,6 +468,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, char *key; I32 klen; SV *hval; + AV *keys = Nullav; iname = newSVpvn(name, namelen); if (name[0] == '%') { @@ -497,9 +498,42 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catsv(totpad, pad); sv_catsv(totpad, apad); - (void)hv_iterinit((HV*)ival); + /* If requested, get a sorted/filtered array of hash keys */ + if (sortkeys) { + if (sortkeys == &PL_sv_yes) { + keys = newAV(); + (void)hv_iterinit((HV*)ival); + while (entry = hv_iternext((HV*)ival)) { + sv = hv_iterkeysv(entry); + SvREFCNT_inc(sv); + av_push(keys, sv); + } + sortsv(AvARRAY(keys), + av_len(keys)+1, + Perl_sv_cmp_locale); + } + else { + dSP; ENTER; SAVETMPS; PUSHMARK(sp); + XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK; + i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL); + SPAGAIN; + if (i) { + sv = POPs; + if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV)) + keys = (AV*)SvREFCNT_inc(SvRV(sv)); + } + if (! keys) + warn("Sortkeys subroutine did not return ARRAYREF\n"); + PUTBACK; FREETMPS; LEAVE; + } + if (keys) + sv_2mortal((SV*)keys); + } + else + (void)hv_iterinit((HV*)ival); i = 0; - while ((entry = hv_iternext((HV*)ival))) { + while (sortkeys ? (void*)(keys && (i <= av_len(keys))) : + (void*)((entry = hv_iternext((HV*)ival))) ) { char *nkey = NULL; I32 nticks = 0; SV* keysv; @@ -508,9 +542,21 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, if (i) sv_catpvn(retval, ",", 1); + + if (sortkeys) { + char *key; + svp = av_fetch(keys, i, FALSE); + keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef); + key = SvPV(keysv, keylen); + svp = hv_fetch((HV*)ival, key, keylen, 0); + hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef); + } + else { + keysv = hv_iterkeysv(entry); + hval = hv_iterval((HV*)ival, entry); + } + i++; - keysv = hv_iterkeysv(entry); - hval = hv_iterval((HV*)ival, entry); do_utf8 = DO_UTF8(keysv); key = SvPV(keysv, keylen); @@ -571,7 +617,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv, postav, levelp, indent, pad, xpad, newapad, sep, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); SvREFCNT_dec(sname); Safefree(nkey); if (indent >= 2) @@ -713,7 +759,8 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry, seenhv, postav, &nlevel, indent, pad, xpad, newapad, sep, freezer, toaster, purity, - deepcopy, quotekeys, bless, maxdepth); + deepcopy, quotekeys, bless, maxdepth, + sortkeys); SvREFCNT_dec(e); } } @@ -776,7 +823,7 @@ Data_Dumper_Dumpxs(href, ...) I32 indent, terse, i, imax, postlen; SV **svp; SV *val, *name, *pad, *xpad, *apad, *sep, *varname; - SV *freezer, *toaster, *bless; + SV *freezer, *toaster, *bless, *sortkeys; I32 purity, deepcopy, quotekeys, maxdepth = 0; char tmpbuf[1024]; I32 gimme = GIMME; @@ -858,6 +905,17 @@ Data_Dumper_Dumpxs(href, ...) bless = *svp; if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE))) maxdepth = SvIV(*svp); + if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) { + sortkeys = *svp; + if (! SvTRUE(sortkeys)) + sortkeys = NULL; + else if (! (SvROK(sortkeys) && + SvTYPE(SvRV(sortkeys)) == SVt_PVCV) ) + { + /* flag to use qsortsv() for sorting hash keys */ + sortkeys = &PL_sv_yes; + } + } postav = newAV(); if (todumpav) @@ -923,7 +981,7 @@ Data_Dumper_Dumpxs(href, ...) DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv, postav, &level, indent, pad, xpad, newapad, sep, freezer, toaster, purity, deepcopy, quotekeys, - bless, maxdepth); + bless, maxdepth, sortkeys); if (indent >= 2) SvREFCNT_dec(newapad); diff --git a/ext/Data/Dumper/t/dumper.t b/ext/Data/Dumper/t/dumper.t index bf07229..2371835 100755 --- a/ext/Data/Dumper/t/dumper.t +++ b/ext/Data/Dumper/t/dumper.t @@ -61,11 +61,11 @@ sub TEST { if (defined &Data::Dumper::Dumpxs) { print "### XS extension loaded, will run XS tests\n"; - $TMAX = 192; $XS = 1; + $TMAX = 210; $XS = 1; } else { print "### XS extensions not loaded, will NOT run XS tests\n"; - $TMAX = 96; $XS = 0; + $TMAX = 105; $XS = 0; } print "1..$TMAX\n"; @@ -821,3 +821,106 @@ EOT TEST q(Data::Dumper->Dumpxs([$a], ['a'])); } + +{ + $i = 0; + $a = { map { ("$_$_$_", ++$i) } 'I'..'Q' }; + local $Data::Dumper::Sortkeys = 1; + +############# 193 +## + $WANT = <<'EOT'; +#$VAR1 = { +# III => 1, +# JJJ => 2, +# KKK => 3, +# LLL => 4, +# MMM => 5, +# NNN => 6, +# OOO => 7, +# PPP => 8, +# QQQ => 9 +#}; +EOT + +TEST q(Data::Dumper->new([$a])->Dump;); +TEST q(Data::Dumper->new([$a])->Dumpxs;) + if $XS; +} + +{ + $i = 5; + $c = { map { (++$i, "$_$_$_") } 'I'..'Q' }; + local $Data::Dumper::Sortkeys = \&sort199; + sub sort199 { + my $hash = shift; + return [ sort { $b <=> $a } keys %$hash ]; + } + +############# 199 +## + $WANT = <<'EOT'; +#$VAR1 = { +# '14' => 'QQQ', +# '13' => 'PPP', +# '12' => 'OOO', +# '11' => 'NNN', +# '10' => 'MMM', +# '9' => 'LLL', +# '8' => 'KKK', +# '7' => 'JJJ', +# '6' => 'III' +#}; +EOT + +TEST q(Data::Dumper->new([$c])->Dump;); +TEST q(Data::Dumper->new([$c])->Dumpxs;) + if $XS; +} + +{ + $i = 5; + $c = { map { (++$i, "$_$_$_") } 'I'..'Q' }; + $d = { reverse %$c }; + local $Data::Dumper::Sortkeys = \&sort205; + sub sort205 { + my $hash = shift; + return [ + $hash eq $c ? (sort { $a <=> $b } keys %$hash) + : (reverse sort keys %$hash) + ]; + } + +############# 205 +## + $WANT = <<'EOT'; +#$VAR1 = [ +# { +# '6' => 'III', +# '7' => 'JJJ', +# '8' => 'KKK', +# '9' => 'LLL', +# '10' => 'MMM', +# '11' => 'NNN', +# '12' => 'OOO', +# '13' => 'PPP', +# '14' => 'QQQ' +# }, +# { +# QQQ => '14', +# PPP => '13', +# OOO => '12', +# NNN => '11', +# MMM => '10', +# LLL => '9', +# KKK => '8', +# JJJ => '7', +# III => '6' +# } +#]; +EOT + +TEST q(Data::Dumper->new([[$c, $d]])->Dump;); +TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;) + if $XS; +} diff --git a/ext/GDBM_File/GDBM_File.pm b/ext/GDBM_File/GDBM_File.pm index 75bc7c1..63225f0 100644 --- a/ext/GDBM_File/GDBM_File.pm +++ b/ext/GDBM_File/GDBM_File.pm @@ -67,8 +67,9 @@ sub AUTOLOAD { ($constname = $AUTOLOAD) =~ s/.*:://; my ($error, $val) = constant($constname); Carp::croak $error if $error; - eval "sub $AUTOLOAD { $val }"; - goto &$AUTOLOAD; + no strict 'refs'; + *{$AUTOLOAD} = sub { $val }; + goto &{$AUTOLOAD}; } XSLoader::load 'GDBM_File', $VERSION; diff --git a/global.sym b/global.sym index ab477a0..28d86a5 100644 --- a/global.sym +++ b/global.sym @@ -219,6 +219,7 @@ Perl_grok_oct Perl_markstack_grow Perl_mess Perl_vmess +Perl_sortsv Perl_mg_clear Perl_mg_copy Perl_mg_find diff --git a/lib/Term/Cap.t b/lib/Term/Cap.t new file mode 100644 index 0000000..ea34927 --- /dev/null +++ b/lib/Term/Cap.t @@ -0,0 +1,191 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +END { + # let VMS whack all versions + 1 while unlink('tcout'); +} + +use Test::More tests => 43; + +use_ok( 'Term::Cap' ); + +local (*TCOUT, *OUT); +my $out = tie *OUT, 'TieOut'; +my $writable = 1; + +if (open(TCOUT, ">tcout")) { + print TCOUT ; + close TCOUT; +} else { + $writable = 0; +} + +# termcap_path -- the names are hardcoded in Term::Cap +$ENV{TERMCAP} = ''; +my $path = join '', Term::Cap::termcap_path(); +my $files = join '', grep { -f $_ } ( $ENV{HOME} . '/.termcap', '/etc/termcap', + '/usr/share/misc/termcap' ); +is( $path, $files, 'termcap_path() found default files okay' ); + +SKIP: { + # this is ugly, but -f $0 really *ought* to work + skip("-f $0 fails, some tests difficult now", 2) unless -f $0; + + $ENV{TERMCAP} = $0; + ok( grep($0, Term::Cap::termcap_path()), 'found file from $ENV{TERMCAP}' ); + + $ENV{TERMCAP} = (grep { $^O eq $_ } qw( os2 MSWin32 dos )) ? 'a:/' : '/'; + $ENV{TERMPATH} = $0; + ok( grep($0, Term::Cap::termcap_path()), 'found file from $ENV{TERMPATH}' ); +} + + +# make a Term::Cap "object" +my $t = { + PADDING => 1, + _pc => 'pc', +}; +bless($t, 'Term::Cap' ); + +# see if Tpad() works +is( $t->Tpad(), undef, 'Tpad() is undef with no string' ); +is( $t->Tpad('x'), 'x', 'Tpad() returns strings with no match' ); +is( $t->Tpad( '1*a', 2 ), 'apcpc', 'Tpad() pads string fine' ); + +$t->{PADDING} = 2; +is( $t->Tpad( '1*a', 3, *OUT ), 'apcpc', 'Tpad() pad math is okay' ); +is( $out->read(), 'apcpc', 'Tpad() writes to filehandle fine' ); + +is( $t->Tputs('PADDING'), 2, 'Tputs() returns existing value file' ); +is( $t->Tputs('pc', 2), 'pc', 'Tputs() delegates to Tpad() fine' ); +$t->Tputs('pc', 1, *OUT); +is( $t->{pc}, 'pc', 'Tputs() caches fine when asked' ); +is( $out->read(), 'pc', 'Tputs() writes to filehandle fine' ); + +eval { $t->Trequire( 'pc' ) }; +is( $@, '', 'Trequire() finds existing cap fine' ); +eval { $t->Trequire( 'nonsense' ) }; +like( $@, qr/support: \(nonsense\)/, 'Trequire() croaks with unsupported cap' ); + +my $warn; +local $SIG{__WARN__} = sub { + $warn = $_[0]; +}; + +# test the first few features by forcing Tgetent() to croak (line 156) +undef $ENV{TERM}; +my $vals = {}; +eval { $t = Term::Cap->Tgetent($vals) }; +like( $@, qr/TERM not set/, 'Tgetent() croaks without TERM' ); +like( $warn, qr/OSPEED was not set/, 'Tgetent() set default OSPEED value' ); +is( $vals->{PADDING}, 10000/9600, 'Default OSPEED implies default PADDING' ); + +# check values for very slow speeds +$vals->{OSPEED} = 1; +$warn = ''; +eval { $t = Term::Cap->Tgetent($vals) }; +is( $warn, '', 'no warning when passing OSPEED to Tgetent()' ); +is( $vals->{PADDING}, 200, 'Tgetent() set slow PADDING when needed' ); + +# now see if lines 177 or 180 will fail +$ENV{TERM} = 'foo'; +$ENV{TERMPATH} = '!'; +$ENV{TERMCAP} = ''; +eval { $t = Term::Cap->Tgetent($vals) }; +isn't( $@, '', 'Tgetent() caught bad termcap file' ); + +# if there's no valid termcap file found, it should croak +$vals->{TERM} = ''; +$ENV{TERMPATH} = $0; +eval { $t = Term::Cap->Tgetent($vals) }; +like( $@, qr/failed termcap lookup/, 'Tgetent() dies with bad termcap file' ); + +SKIP: { + skip( "Can't write 'tcout' file for tests", 8 ) unless $writable; + + # it shouldn't try to read one file more than 32(!) times + # see __END__ for a really awful termcap example + + $ENV{TERMPATH} = join(' ', ('tcout') x 33); + $vals->{TERM} = 'bar'; + eval { $t = Term::Cap->Tgetent($vals) }; + like( $@, qr/failed termcap loop/, 'Tgetent() dies with much recursion' ); + + # now let it read a fake termcap file, and see if it sets properties + $ENV{TERMPATH} = 'tcout'; + $vals->{TERM} = 'baz'; + $t = Term::Cap->Tgetent($vals); + is( $t->{_f1}, 1, 'Tgetent() set a single field correctly' ); + is( $t->{_f2}, 1, 'Tgetent() set another field on the same line' ); + is( $t->{_no}, '', 'Tgetent() set a blank field correctly' ); + is( $t->{_k1}, 'v1', 'Tgetent() set a key value pair correctly' ); + like( $t->{_k2}, qr/v2\\\n2/, 'Tgetent() set and translated a pair right' ); + + # and it should have set these two fields + is( $t->{_pc}, "\0", 'set _pc field correctly' ); + is( $t->{_bc}, "\b", 'set _bc field correctly' ); +} + +# Tgoto has comments on the expected formats +$t->{_test} = "a%d"; +is( $t->Tgoto('test', '', 1, *OUT), 'a1', 'Tgoto() works with %d code' ); +is( $out->read(), 'a1', 'Tgoto() printed to filehandle fine' ); + +$t->{_test} = "a%."; +like( $t->Tgoto('test', '', 1), qr/^a\x01/, 'Tgoto() works with %.' ); +like( $t->Tgoto('test', '', 0), qr/\x61\x01\x08/, 'Tgoto() %. and magic work' ); + +$t->{_test} = 'a%+'; +like( $t->Tgoto('test', '', 1), qr/a\x01/, 'Tgoto() works with %+' ); +$t->{_test} = 'a%+a'; +is( $t->Tgoto('test', '', 1), 'ab', 'Tgoto() works with %+ and a character' ); +$t->{_test} .= 'a' x 99; +like( $t->Tgoto('test', '', 1), qr/ba{98}/, 'Tgoto() substr()s %+ if needed' ); + +$t->{_test} = '%ra%d'; +is( $t->Tgoto('test', 1, ''), 'a1', 'Tgoto() swaps params with %r set' ); + +$t->{_test} = 'a%>11bc'; +is( $t->Tgoto('test', '', 1), 'abc', 'Tgoto() unpacks with %> set' ); + +$t->{_test} = 'a%21'; +is( $t->Tgoto('test'), 'a001', 'Tgoto() formats with %2 set' ); + +$t->{_test} = 'a%31'; +is( $t->Tgoto('test'), 'a0001', 'Tgoto() also formats with %3 set' ); + +$t->{_test} = '%ia%21'; +is( $t->Tgoto('test', '', 1), 'a021', 'Tgoto() incremented args with %i set '); + +$t->{_test} = '%z'; +is( $t->Tgoto('test'), 'OOPS', 'Tgoto() handled invalid arg fine' ); + +# and this is pretty standard +package TieOut; + +sub TIEHANDLE { + bless( \(my $self), $_[0] ); +} + +sub PRINT { + my $self = shift; + $$self .= join('', @_); +} + +sub read { + my $self = shift; + substr( $$self, 0, length($$self), '' ); +} + +__END__ +bar: :tc=bar: \ +baz: \ +:f1: :f2: \ +:no@ \ +:k1#v1\ +:k2=v2\\n2 diff --git a/lib/Term/Complete.t b/lib/Term/Complete.t index 3f8cb20..63b2825 100644 --- a/lib/Term/Complete.t +++ b/lib/Term/Complete.t @@ -8,6 +8,8 @@ BEGIN { use warnings; use Test::More tests => 8; use vars qw( $Term::Complete::complete $complete ); +my $restore; + SKIP: { skip('PERL_SKIP_TTY_TEST', 8) if $ENV{PERL_SKIP_TTY_TEST}; @@ -18,7 +20,9 @@ SKIP: { if (defined $TTY) { open(TTY, $TTY) or die "open $TTY failed: $!"; skip("$TTY not a tty", 8) if defined $TTY && ! -t TTY; - } + $restore = `stty -g`; + skip("Can't reliably restore $TTY", 8) if $?; + } use_ok( 'Term::Complete' ); @@ -65,6 +69,9 @@ like( $$out, qr/prompt:frobn/, 'prompt is okay' ); # now remove the prompt and we should be okay $$out =~ s/prompt://g; is( $$out, get_expected('frobn', 'frobnitz' ), 'works with new $complete' ); +`stty $restore`; + +} # end of SKIP, end of tests # easier than matching space characters sub get_expected { @@ -110,6 +117,3 @@ sub PRINT { my $self = shift; ($$self .= join('', @_)) =~ s/\s+/./gm; } - -} # end of SKIP, end of tests - diff --git a/lib/Text/TabsWrap/CHANGELOG b/lib/Text/TabsWrap/CHANGELOG new file mode 100644 index 0000000..7f0720a --- /dev/null +++ b/lib/Text/TabsWrap/CHANGELOG @@ -0,0 +1,74 @@ += 2001/09/29 + +Philip Newton sent in a clean patch that +added support for defining words differently; that prevents +Text::Wrap from untainting strings; and that fixes a documentation +bug. + +So that fill.t can be used in the version included in the perl +distribution, fill.t no longer uses File::Slurp. + +Both Sweth Chandramouli and Drew Degentesh + both objected to the automatic unexpand +that Text::Wrap does on its results. Drew sent a patch which +has been integrated. + +Way back in '97, Joel Earl asked that +it be possible to use a line separator other than \n when +adding new lines. There is now support for that. + += 2001/01/30 + +Bugfix by Michael G Schwern : don't add extra +whitespace when working one an array of input (as opposed to a +single string). + +Performance rewrite: use m/\G/ rather than s///. + +You can now specify that words that are too long to wrap can simply +overflow the line. Feature requested by James Hoagland + and by John Porter . + +Documentation changes from Rich Bowen . + += 1998/11/29 + +Combined Fill.pm into Wrap.pm. It appears there are versions of +Wrap.pm with fill in them. + += 1998/11/28 + +Over the last couple of years, many people sent in various +rewrites of Text::Wrap. I should have done something about +updating it long ago. If someone wants to take it over from +me, discuss it in perl-porters. I'll be happy to hand it +over. + +Anyway, I have a bunch of people to thank. I didn't +use what any of them sent in, but I did take ideas from +all of them. Many sent in complete new implamentations. + + Ivan Brawley + + Jacqui Caren + + Jeff Kowalski + + Allen Smith + + Sullivan N. Beck + +The end result is a very slight change in the API. There +is now an additional package variable: $Text::Wrap::huge. +When $huge is set to 'die' then long words will cause +wrap() to die. When it is set to 'wrap', long words will +be wrapped. The default is 'wrap'. + +LONG WORDS WILL NOW BE WRAPPED BY DEFAULT. +This is a change in behavior. + +At the bottom of Text::Wrap, there was a function (fill()) +sitting there unpublished. There was a note that Tim Pierce +had a faster version, but a search on CPAN failed to turn it +up. Text::Fill is now available. + diff --git a/lib/Text/TabsWrap/t/fill.t b/lib/Text/TabsWrap/t/fill.t index 5ff3850..3d5b98f 100755 --- a/lib/Text/TabsWrap/t/fill.t +++ b/lib/Text/TabsWrap/t/fill.t @@ -75,8 +75,8 @@ while (@tests) { print "ok $tn\n"; } elsif ($rerun) { my $oi = $in; - open(F,">#o") and do { print F $back; close(F) }; - open(F,">#e") and do { print F $out; close(F) }; + write_file("#o", $back); + write_file("#e", $out); foreach ($in, $back, $out) { s/\t/^I\t/gs; s/\n/\$\n/gs; @@ -96,3 +96,15 @@ while (@tests) { } $tn++; } + +sub write_file +{ + my ($f, @data) = @_; + + local(*F); + + open(F, ">$f") || die "open >$f: $!"; + (print F @data) || die "write $f: $!"; + close(F) || die "close $f: $!"; + return 1; +} diff --git a/lib/Text/Wrap.pm b/lib/Text/Wrap.pm index 3c88508..8dd1f6c 100644 --- a/lib/Text/Wrap.pm +++ b/lib/Text/Wrap.pm @@ -6,9 +6,10 @@ require Exporter; @EXPORT = qw(wrap fill); @EXPORT_OK = qw($columns $break $huge); -$VERSION = 2001.0131; +$VERSION = 2001.0929; -use vars qw($VERSION $columns $debug $break $huge); +use vars qw($VERSION $columns $debug $break $huge $unexpand $tabstop + $separator); use strict; BEGIN { @@ -16,6 +17,9 @@ BEGIN { $debug = 0; $break = '\s'; $huge = 'wrap'; # alternatively: 'die' or 'overflow' + $unexpand = 1; + $tabstop = 8; + $separator = "\n"; } use Text::Tabs qw(expand unexpand); @@ -24,25 +28,34 @@ sub wrap { my ($ip, $xp, @t) = @_; + local($Text::Tabs::tabstop) = $tabstop; my $r = ""; my $tail = pop(@t); - my $t = expand(join("", (map { /\s+\Z/ ? ( $_ ) : ($_, ' ') } @t), $tail)); + my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail)); my $lead = $ip; my $ll = $columns - length(expand($ip)) - 1; my $nll = $columns - length(expand($xp)) - 1; my $nl = ""; my $remainder = ""; + use re 'taint'; + pos($t) = 0; while ($t !~ /\G\s*\Z/gc) { - if ($t =~ /\G([^\n]{0,$ll})($break|\Z(?!\n))/xmgc) { - $r .= unexpand($nl . $lead . $1); + if ($t =~ /\G([^\n]{0,$ll})($break|\z)/xmgc) { + $r .= $unexpand + ? unexpand($nl . $lead . $1) + : $nl . $lead . $1; $remainder = $2; } elsif ($huge eq 'wrap' && $t =~ /\G([^\n]{$ll})/gc) { - $r .= unexpand($nl . $lead . $1); - $remainder = "\n"; - } elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)($break|\Z(?!\n))/xmgc) { - $r .= unexpand($nl . $lead . $1); + $r .= $unexpand + ? unexpand($nl . $lead . $1) + : $nl . $lead . $1; + $remainder = $separator; + } elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)($break|\z)/xmgc) { + $r .= $unexpand + ? unexpand($nl . $lead . $1) + : $nl . $lead . $1; $remainder = $2; } elsif ($huge eq 'die') { die "couldn't wrap '$t'"; @@ -52,7 +65,7 @@ sub wrap $lead = $xp; $ll = $nll; - $nl = "\n"; + $nl = $separator; } $r .= $remainder; @@ -128,21 +141,54 @@ B =head1 DESCRIPTION -Text::Wrap::wrap() is a very simple paragraph formatter. It formats a +C is a very simple paragraph formatter. It formats a single paragraph at a time by breaking lines at word boundries. Indentation is controlled for the first line (C<$initial_tab>) and all subsquent lines (C<$subsequent_tab>) independently. Please note: C<$initial_tab> and C<$subsequent_tab> are the literal strings that will be used: it is unlikley you would want to pass in a number. +Text::Wrap::fill() is a simple multi-paragraph formatter. It formats +each paragraph separately and then joins them together when it's done. It +will destory any whitespace in the original text. It breaks text into +paragraphs by looking for whitespace after a newline. In other respects +it acts like wrap(). + +=head1 OVERRIDES + +C has a number of variables that control its behavior. +Because other modules might be using C it is suggested +that you leave these variables alone! If you can't do that, then +use C when you change the +values so that the original value is restored. This C trick +will not work if you import the variable into your own namespace. + Lines are wrapped at C<$Text::Wrap::columns> columns. C<$Text::Wrap::columns> should be set to the full width of your output device. In fact, every resulting line will have length of no more than C<$columns - 1>. +It is possible to control which characters terminate words by +modifying C<$Text::Wrap::break>. Set this to a string such as +C<'[\s:]'> (to break before spaces or colons) or a pre-compiled regexp +such as C (to break before spaces or apostrophes). The +default is simply C<'\s'>; that is, words are terminated by spaces. +(This means, among other things, that trailing punctuation such as +full stops or commas stay with the word they are "attached" to.) + Beginner note: In example 2, above C<$columns> is imported into the local namespace, and set locally. In example 3, C<$Text::Wrap::columns> is set in its own namespace without importing it. +C starts its work by expanding all the tabs in its +input into spaces. The last thing it does it to turn spaces back +into tabs. If you do not want tabs in your results, set +C<$Text::Wrap::unexapand> to a false value. Likewise if you do not +want to use 8-character tabstops, set C<$Text::Wrap::tabstop> to +the number of characters you do want for your tabstops. + +If you want to separate your lines with something other than C<\n> +then set C<$Text::Wrap::seporator> to your preference. + When words that are longer than C<$columns> are encountered, they are broken up. C adds a C<"\n"> at column C<$columns>. This behavior can be overridden by setting C<$huge> to @@ -150,17 +196,7 @@ This behavior can be overridden by setting C<$huge> to C to be called. When set to 'overflow', large words will be left intact. -Text::Wrap::fill() is a simple multi-paragraph formatter. It formats -each paragraph separately and then joins them together when it's done. It -will destory any whitespace in the original text. It breaks text into -paragraphs by looking for whitespace after a newline. In other respects -it acts like wrap(). - -When called in list context, C will return a list of lines and -C will return a list of paragraphs. - -Historical notes: Older versions of C and C always -returned strings. Also, 'die' used to be the default value of +Historical notes: 'die' used to be the default value of C<$huge>. Now, 'wrap' is the default value. =head1 EXAMPLE diff --git a/lib/open.t b/lib/open.t index 90e5e3b..88749d7 100644 --- a/lib/open.t +++ b/lib/open.t @@ -13,20 +13,23 @@ sub import { } # can't use require_ok() here, with a name like 'open' -ok( require 'open.pm', 'required okay!' ); +ok( require 'open.pm', 'requiring open' ); # this should fail eval { import() }; -like( $@, qr/needs explicit list of disciplines/, 'import fails without args' ); +like( $@, qr/needs explicit list of disciplines/, + 'import should fail without args' ); # the hint bits shouldn't be set yet -is( $^H & $open::hint_bits, 0, '$^H is okay before open import runs' ); +is( $^H & $open::hint_bits, 0, + 'hint bits should not be set in $^H before open import' ); # prevent it from loading I18N::Langinfo, so we can test encoding failures local @INC; -$ENV{LC_ALL} = ''; +$ENV{LC_ALL} = $ENV{LANG} = ''; eval { import( 'IN', 'locale' ) }; -like( $@, qr/Cannot figure out an encoding/, 'no encoding found' ); +like( $@, qr/Cannot figure out an encoding/, + 'no encoding should be found without $ENV{LANG} or $ENV{LC_ALL}' ); my $warn; local $SIG{__WARN__} = sub { @@ -35,34 +38,38 @@ local $SIG{__WARN__} = sub { # and it shouldn't be able to find this discipline eval{ import( 'IN', 'macguffin' ) }; -like( $warn, qr/Unknown discipline layer/, 'warned about unknown discipline' ); +like( $warn, qr/Unknown discipline layer/, + 'should warn about unknown discipline with bad discipline provided' ); # now load a real-looking locale $ENV{LC_ALL} = ' .utf8'; import( 'IN', 'locale' ); -is( ${^OPEN}, ':utf8\0', 'set locale layer okay!' ); +is( ${^OPEN}, ':utf8\0', + 'should set a valid locale layer' ); # and see if it sets the magic variables appropriately import( 'IN', ':crlf' ); -ok( $^H & $open::hint_bits, '$^H is set after open import runs' ); -is( $^H{'open_IN'}, 'crlf', 'set crlf layer okay!' ); +ok( $^H & $open::hint_bits, + 'hint bits should be set in $^H after open import' ); +is( $^H{'open_IN'}, 'crlf', 'should have set crlf layer' ); # it should reset them appropriately, too import( 'IN', ':raw' ); -is( $^H{'open_IN'}, 'raw', 'set raw layer okay!' ); +is( $^H{'open_IN'}, 'raw', 'should have reset to raw layer' ); # it dies if you don't set IN, OUT, or INOUT eval { import( 'sideways', ':raw' ) }; -like( $@, qr/Unknown discipline class/, 'croaked with unknown class' ); +like( $@, qr/Unknown discipline class/, 'should croak with unknown class' ); # but it handles them all so well together import( 'INOUT', ':raw :crlf' ); -is( ${^OPEN}, ':raw :crlf\0:raw :crlf', 'multi types, multi disciplines' ); -is( $^H{'open_INOUT'}, 'crlf', 'last layer set in %^H' ); +is( ${^OPEN}, ':raw :crlf\0:raw :crlf', + 'should set multi types, multi disciplines' ); +is( $^H{'open_INOUT'}, 'crlf', 'should record last layer set in %^H' ); __END__ # this one won't run as $locale_encoding is already set # perhaps qx{} it, if it's important to run $ENV{LC_ALL} = 'nonexistent.euc'; eval { open::_get_locale_encoding() }; -like( $@, qr/too ambiguous/, 'died with ambiguous locale encoding' ); +like( $@, qr/too ambiguous/, 'should die with ambiguous locale encoding' ); diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index e8cf0cc..025a70b 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -1,7 +1,6 @@ package utf8; -my $DEBUG = 0; -my $seq = "AAA0000"; +sub DEBUG () { 0 } sub DESTROY {} @@ -10,53 +9,57 @@ sub croak { require Carp; Carp::croak(@_) } sub SWASHNEW { my ($class, $type, $list, $minbits, $none) = @_; local $^D = 0 if $^D; - print STDERR "SWASHNEW @_\n" if $DEBUG; - my $extras; - my $bits; - + + print STDERR "SWASHNEW @_\n" if DEBUG; + + my $file; + if ($type and ref ${"${class}::{$type}"} eq $class) { - warn qq/Found \${"${class}::{$type}"}\n/ if $DEBUG; + warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG; return ${"${class}::{$type}"}; # Already there... } - $type ||= $seq++; - - my $caller; - my $i = 0; - while (($caller = caller($i)) eq __PACKAGE__) { $i++ } - my $encoding = $enc{$caller} || "unicore"; - (my $file = $type) =~ s!::!/!g; - if ($file =~ /^In[- ]?(.+)/i) { - my $In = $1; - defined %utf8::In || do "$encoding/In.pl"; - my $prefix = substr(lc($In), 0, 3); - if (exists $utf8::InPat{$prefix}) { - for my $k (keys %{$utf8::InPat{$prefix}}) { + if ($type) { + + defined %utf8::In || do "unicore/In.pl"; + + $type =~ s/^In(?:[-_]|\s+)?//i; + $type =~ s/\s+$//; + + my $inprefix = substr(lc($type), 0, 3); + if (exists $utf8::InPat{$inprefix}) { + my $In = $type; + for my $k (keys %{$utf8::InPat{$inprefix}}) { if ($In =~ /^$k$/i) { - $In = $utf8::InPat{$prefix}->{$k}; + $In = $utf8::InPat{$inprefix}->{$k}; if (exists $utf8::In{$In}) { - $file = "$encoding/In/$utf8::In{$In}"; + $file = "unicore/In/$utf8::In{$In}"; + print "inprefix = $inprefix, In = $In, k = $k, file = $file\n" if DEBUG; last; } } } } - } else { - $file =~ s#^(Is|To)([A-Z].*)#$1/$2#; + + # This is separate from 'To' in preparation of Is.pl (a la In.pl). + if ((not defined $file) && $type =~ /^Is([A-Z][A-Za-z]*)$/) { + $file = "unicore/Is/$1"; + } + + if ((not defined $file) && $type =~ /^To([A-Z][A-Za-z]*)$/) { + $file = "unicore/To/$1"; + } } { - $list ||= - ( exists &{"${caller}::${type}"} && - eval { $caller->$type() } ) - || do "$file.pl" - || do "$encoding/$file.pl" - || do "$encoding/Is/${type}.pl" - || croak("Can't find Unicode character property \"$type\""); + $list ||= do "$file.pl" + || do "unicore/Is/$type.pl" + || croak("Can't find Unicode character property \"$type\""); } - $| = 1; - + my $extras; + my $bits; + if ($list) { my @tmp = split(/^/m, $list); my %seen; @@ -94,7 +97,7 @@ sub SWASHNEW { while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) { my $char = $1; my $name = $2; - # print STDERR "$1 => $2\n" if $DEBUG; +# print STDERR "$1 => $2\n" if DEBUG; if ($char =~ /[-+!]/) { my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really my $subobj = $c->SWASHNEW($t, "", 0, 0, 0); @@ -104,7 +107,7 @@ sub SWASHNEW { } } - print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if $DEBUG; + print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; ${"${class}::{$type}"} = bless { TYPE => $type, @@ -124,7 +127,7 @@ sub SWASHGET { my $type = $self->{TYPE}; my $bits = $self->{BITS}; my $none = $self->{NONE}; - print STDERR "SWASHGET @_ [$type/$bits/$none]\n" if $DEBUG; + print STDERR "SWASHGET @_ [$type/$bits/$none]\n" if DEBUG; my $end = $start + $len; my $swatch = ""; my $key; @@ -150,7 +153,7 @@ sub SWASHGET { } for ($key = $min; $key <= $max; $key++) { last LINE if $key >= $end; -# print STDERR "$key => $val\n" if $DEBUG; +# print STDERR "$key => $val\n" if DEBUG; vec($swatch, $key - $start, $bits) = $val; ++$val if $val < $none; } @@ -162,7 +165,7 @@ sub SWASHGET { } for ($key = $min; $key <= $max; $key++, $val++) { last LINE if $key >= $end; -# print STDERR "$key => $val\n" if $DEBUG; +# print STDERR "$key => $val\n" if DEBUG; vec($swatch, $key - $start, $bits) = $val; } } @@ -179,7 +182,7 @@ sub SWASHGET { } for ($key = $min; $key <= $max; $key++) { last LINE if $key >= $end; -# print STDERR "$key => 1\n" if $DEBUG; +# print STDERR "$key => 1\n" if DEBUG; vec($swatch, $key - $start, 1) = 1; } } @@ -190,7 +193,7 @@ sub SWASHGET { while ($x =~ /^([-+!])(.*)/mg) { my $char = $1; my $name = $2; - print STDERR "INDIRECT $1 $2\n" if $DEBUG; + print STDERR "INDIRECT $1 $2\n" if DEBUG; my $otherbits = $self->{$name}->{BITS}; croak("SWASHGET size mismatch") if $bits < $otherbits; my $other = $self->{$name}->SWASHGET($start, $len); @@ -230,7 +233,7 @@ sub SWASHGET { } } } - if ($DEBUG) { + if (DEBUG) { print STDERR "CELLS "; for ($key = 0; $key < $len; $key++) { print STDERR vec($swatch, $key, $bits), " "; diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 12abd71..cf54c9a 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2021,6 +2021,18 @@ Recursively unlocks a shared sv. =for hackers Found in file sharedsv.c +=item sortsv + + +Sort an array in place. Here is an example: + + sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale); + + void sortsv(SV ** array, size_t num_elts, SVCOMPARE_t f) + +=for hackers +Found in file pp_ctl.c + =item SP Stack pointer. This is usually handled by C. See C and @@ -2385,22 +2397,22 @@ which guarantees to evaluate sv only once. =for hackers Found in file sv.h -=item SvNVx +=item SvNVX -Coerces the given SV to a double and returns it. Guarantees to evaluate -sv only once. Use the more efficent C otherwise. +Returns the raw value in the SV's NV slot, without checks or conversions. +Only use when you are sure SvNOK is true. See also C. - NV SvNVx(SV* sv) + NV SvNVX(SV* sv) =for hackers Found in file sv.h -=item SvNVX +=item SvNVx -Returns the raw value in the SV's NV slot, without checks or conversions. -Only use when you are sure SvNOK is true. See also C. +Coerces the given SV to a double and returns it. Guarantees to evaluate +sv only once. Use the more efficent C otherwise. - NV SvNVX(SV* sv) + NV SvNVx(SV* sv) =for hackers Found in file sv.h @@ -2594,21 +2606,21 @@ Like C, but converts sv to uft8 first if necessary. =for hackers Found in file sv.h -=item SvPVX +=item SvPVx -Returns a pointer to the physical string in the SV. The SV must contain a -string. +A version of C which guarantees to evaluate sv only once. - char* SvPVX(SV* sv) + char* SvPVx(SV* sv, STRLEN len) =for hackers Found in file sv.h -=item SvPVx +=item SvPVX -A version of C which guarantees to evaluate sv only once. +Returns a pointer to the physical string in the SV. The SV must contain a +string. - char* SvPVx(SV* sv, STRLEN len) + char* SvPVX(SV* sv) =for hackers Found in file sv.h @@ -2815,19 +2827,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B -in the C enum. Test these flags with the C macro. +Returns the type of the SV. See C. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B +in the C enum. Test these flags with the C macro. =for hackers Found in file sv.h diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 06434a2..9447b42 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1294,6 +1294,10 @@ Your code will be interpreted as an attempt to call a method named "elseif" for the class returned by the following block. This is unlikely to be what you want. +=item Empty %s + +(F) Empty C<\p{}> or C<\P{}>. + =item entering effective %s failed (F) While under the C pragma, switching the real and @@ -1940,6 +1944,10 @@ can vary from one line to the next. (S) This is an educated guess made in conjunction with the message "%s found where operator expected". Often the missing operator is a comma. +=item Missing right brace on %s + +(F) Missing right brace in C<\p{...}> or C<\P{...}>. + =item Missing right curly or square bracket (F) The lexer counted more opening curly or square brackets than closing diff --git a/pod/perlfaq3.pod b/pod/perlfaq3.pod index 0a1e42a..9979ab9 100644 --- a/pod/perlfaq3.pod +++ b/pod/perlfaq3.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq3 - Programming Tools ($Revision: 1.1 $, $Date: 2001/09/20 03:03:00 $) +perlfaq3 - Programming Tools ($Revision: 1.2 $, $Date: 2001/09/29 03:13:13 $) =head1 DESCRIPTION @@ -474,6 +474,56 @@ Information about malloc is in the F file in the source distribution. You can find out whether you are using perl's malloc by typing C. +Of course, the best way to save memory is to not do anything to waste +it in the first place. Good programming practices can go a long way +toward this: + +=over 4 + +=item * Don't slurp! + +Don't read an entire file into memory if you can process it line +by line. Or more concretely, use a loop like this: + + # + # Good Idea + # + while () { + # ... + } + +instead of this: + + # + # Bad Idea + # + @data = ; + foreach (@data) { + # ... + } + +When the files you're processing are small, it doesn't much matter which +way you do it, but it makes a huge difference when they start getting +larger. + +=item * Pass by reference + +Pass arrays and hashes by reference, not by value. For one thing, it's +the only way to pass multiple lists or hashes (or both) in a single +call/return. It also avoids creating a copy of all the contents. This +requires some judgment, however, because any changes will be propagated +back to the original data. If you really want to mangle (er, modify) a +copy, you'll have to sacrifice the memory needed to make one. + +=item * Tie large variables to disk. + +For "big" data stores (i.e. ones that exceed available memory) consider +using one of the DB modules to store it on disk instead of in RAM. This +will incur a penalty in access time, but that's probably better that +causing your hard disk to thrash due to massive swapping. + +=back + =head2 Is it unsafe to return a pointer to local data? No, Perl's garbage collection system takes care of this. diff --git a/pod/perlfaq4.pod b/pod/perlfaq4.pod index 4e574e9..72eb8d8 100644 --- a/pod/perlfaq4.pod +++ b/pod/perlfaq4.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq4 - Data Manipulation ($Revision: 1.1 $, $Date: 2001/09/20 03:03:00 $) +perlfaq4 - Data Manipulation ($Revision: 1.2 $, $Date: 2001/09/26 15:42:12 $) =head1 DESCRIPTION @@ -583,7 +583,7 @@ To make the first letter of each word upper case: This has the strange effect of turning "C" into "C". Sometimes you might want this. Other times you might need a -more thorough solution (Suggested by brian d. foy): +more thorough solution (Suggested by brian d foy): $string =~ s/ ( (^\w) #at the beginning of the line diff --git a/pod/perlfaq5.pod b/pod/perlfaq5.pod index 85b9f6a..bfd6d35 100644 --- a/pod/perlfaq5.pod +++ b/pod/perlfaq5.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq5 - Files and Formats ($Revision: 1.1 $, $Date: 2001/09/20 03:03:00 $) +perlfaq5 - Files and Formats ($Revision: 1.2 $, $Date: 2001/09/26 10:44:41 $) =head1 DESCRIPTION @@ -428,9 +428,9 @@ See L for an swrite() function. This one will do it for you: sub commify { - local $_ = shift; - 1 while s/^([-+]?\d+)(\d{3})/$1,$2/; - return $_; + my $number = shift; + 1 while ($number =~ s/^([-+]?\d+)(\d{3})/$1,$2/); + return $number; } $n = 23659019423.2331; diff --git a/pod/perlfaq9.pod b/pod/perlfaq9.pod index 892772e..3bf862f 100644 --- a/pod/perlfaq9.pod +++ b/pod/perlfaq9.pod @@ -1,45 +1,66 @@ =head1 NAME -perlfaq9 - Networking ($Revision: 1.1 $, $Date: 2001/09/20 03:03:00 $) +perlfaq9 - Networking ($Revision: 1.2 $, $Date: 2001/09/28 06:40:07 $) =head1 DESCRIPTION This section deals with questions related to networking, the internet, and a few on the web. -=head2 My CGI script runs from the command line but not the browser. (500 Server Error) +=head2 What is the correct form of response from a CGI script? -If you can demonstrate that you've read the following FAQs and that -your problem isn't something simple that can be easily answered, you'll -probably receive a courteous and useful reply to your question if you -post it on comp.infosystems.www.authoring.cgi (if it's something to do -with HTTP, HTML, or the CGI protocols). Questions that appear to be Perl -questions but are really CGI ones that are posted to comp.lang.perl.misc -may not be so well received. +(Alan Flavell answers...) -The useful FAQs and related documents are: +The Common Gateway Interface (CGI) specifies a software interface between +a program ("CGI script") and a web server (HTTPD). It is not specific +to Perl, and has its own FAQs and tutorials, and usenet group, +comp.infosystems.www.authoring.cgi - CGI FAQ - http://www.webthing.com/tutorials/cgifaq.html +The original CGI specification is at: http://hoohoo.ncsa.uiuc.edu/cgi/ - Web FAQ - http://www.boutell.com/faq/ +Current best-practice RFC draft at: http://CGI-Spec.Golux.Com/ - WWW Security FAQ - http://www.w3.org/Security/Faq/ +Other relevant documentation listed in: http://www.perl.org/CGI_MetaFAQ.html - HTTP Spec - http://www.w3.org/pub/WWW/Protocols/HTTP/ +These Perl FAQs very selectively cover some CGI issues. However, Perl +programmers are strongly advised to use the CGI.pm module, to take care +of the details for them. - HTML Spec - http://www.w3.org/TR/REC-html40/ - http://www.w3.org/pub/WWW/MarkUp/ +The similarity between CGI response headers (defined in the CGI +specification) and HTTP response headers (defined in the HTTP +specification, RFC2616) is intentional, but can sometimes be confusing. - CGI Spec - http://www.w3.org/CGI/ +The CGI specification defines two kinds of script: the "Parsed Header" +script, and the "Non Parsed Header" (NPH) script. Check your server +documentation to see what it supports. "Parsed Header" scripts are +simpler in various respects. The CGI specification allows any of the +usual newline representations in the CGI response (it's the server's +job to create an accurate HTTP response based on it). So "\n" written in +text mode is technically correct, and recommended. NPH scripts are more +tricky: they must put out a complete and accurate set of HTTP +transaction response headers; the HTTP specification calls for records +to be terminated with carriage-return and line-feed, i.e ASCII \015\012 +written in binary mode. + +Using CGI.pm gives excellent platform independence, including EBCDIC +systems. CGI.pm selects an appropriate newline representation +($CGI::CRLF) and sets binmode as appropriate. + +=head2 My CGI script runs from the command line but not the browser. (500 Server Error) + +If you can demonstrate that you've read the FAQs and that +your problem isn't something simple that can be easily answered, you'll +probably receive a courteous and useful reply to your question if you +post it on comp.infosystems.www.authoring.cgi (if it's something to do +with HTTP or the CGI protocols). Questions that appear to be Perl +questions but are really CGI ones that are posted to comp.lang.perl.misc +are not so well received. + +The useful FAQs, related documents, and troubleshooting guides are +listed in the CGI Meta FAQ: + + http://www.perl.org/CGI_MetaFAQ.html - CGI Security FAQ - http://www.go2net.com/people/paulp/cgi-security/safe-cgi.txt =head2 How can I get better error messages from a CGI program? @@ -233,34 +254,36 @@ regexp for breaking any arbitrary URI into components (Appendix B). =head2 How do I redirect to another page? -According to RFC 2616, "Hypertext Transfer Protocol -- HTTP/1.1", the -preferred method is to send a C header instead of a -C header: +Specify the complete URL of the destination (even if it is on the same +server). This is one of the two different kinds of CGI "Location:" +responses which are defined in the CGI specification for a Parsed Headers +script. The other kind (an absolute URLpath) is resolved internally to +the server without any HTTP redirection. The CGI specifications do not +allow relative URLs in either case. - Location: http://www.domain.com/newpage +Use of CGI.pm is strongly recommended. This example shows redirection +with a complete URL. This redirection is handled by the web browser. -Note that relative URLs in these headers can cause strange effects -because of "optimizations" that servers do. + use CGI qw/:standard/; - $url = "http://www.perl.com/CPAN/"; - print "Location: $url\n\n"; - exit; + my $url = 'http://www.perl.com/CPAN/'; + print redirect($url); -To target a particular frame in a frameset, include the "Window-target:" -in the header. - print < +This example shows a redirection with an absolute URLpath. This +redirection is handled by the local web server. - EOF + my $url = '/CPAN/index.html'; + print redirect($url); + + +But if coded directly, it could be as follows (the final "\n" is +shown separately, for clarity), using either a complete URL or +an absolute URLpath. + + print "Location: $url\n"; # CGI response header + print "\n"; # end of headers -To be correct to the spec, each of those virtual newlines should -really be physical C<"\015\012"> sequences by the time your message is -received by the client browser. Except for NPH scripts, though, that -local newline should get translated by your server into standard form, -so you shouldn't have a problem here, even if you are stuck on MacOS. -Everybody else probably won't even notice. =head2 How do I put a password on my web pages? @@ -282,16 +305,9 @@ a DBI compatible driver. HTTPD::UserAdmin supports files used by the =head2 How do I make sure users can't enter values into a form that cause my CGI script to do bad things? -Read the CGI security FAQ, at -http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html , and the -Perl/CGI FAQ at -http://www.perl.com/CPAN/doc/FAQs/cgi/perl-cgi-faq.html . +See the security references listed in the CGI Meta FAQ -In brief: use tainting (see L), which makes sure that data -from outside your script (eg, CGI parameters) are never used in -C or C calls. In addition to tainting, never use the -single-argument form of system() or exec(). Instead, supply the -command and arguments as a list, which prevents shell globbing. + http://www.perl.org/CGI_MetaFAQ.html =head2 How do I parse a mail header? diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 69e44ff..86a09ba 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3882,6 +3882,9 @@ C
.) Here is a typical code layout: # In the main program push @INC, new Foo(...); +Note that these hooks are also permitted to set the %INC entry +corresponding to the files they have loaded. See L. + For a yet-more-powerful import facility, see L and L. =item reset EXPR diff --git a/pod/perltodo.pod b/pod/perltodo.pod index fd991cb..1f57c0c 100644 --- a/pod/perltodo.pod +++ b/pod/perltodo.pod @@ -67,6 +67,28 @@ B<\b> assertion wants to be overloaded by a function. =item * +Allow for long form of the General Category Properties, e.g +C<\p{IsOpenPunctuation}>, not just the abbreviated form, e.g. +C<\p{IsPs}>. + +=item * + +Allow for the metaproperties C and C, and C; +C, C, C, C (note that +are large classes than the general categories C and C), +C, C, C, C, +C, , C, C, +C, C, C, C. + +There are also enumerated properties: C, +C, C, C. These +properties have multiple values: for uniqueness the property +value should be appended. For example, C<\p{IsAlphabetic}> +wouldbe the binary property, while C<\p{AlphabeticLineBreak}> +would mean the enumerated property. + +=item * + Case Mappings? http://www.unicode.org/unicode/reports/tr21/ lc(), uc(), lcfirst(), and ucfirst() work only for some of the @@ -84,7 +106,8 @@ class subtraction. =back See L for what's -there and what's missing. +there and what's missing. Almost all of Levels 2 and 3 is missing, +and as of 5.8.0 not even all of Level 1 is there. =head2 use Thread for iThreads diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index f27173c..4d6be20 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -168,13 +168,28 @@ match property) constructs. For instance, C<\p{Lu}> matches any character with the Unicode uppercase property, while C<\p{M}> matches any mark character. Single letter properties may omit the brackets, so that can be written C<\pM> also. Many predefined character classes -are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The -recommended names of the C classes are the official Unicode script -and block names but with all non-alphanumeric characters removed, for -example the block name C<"Latin-1 Supplement"> becomes -C<\p{InLatin1Supplement}>. +are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. -Here is the list as of Unicode 3.1.0 (the two-letter classes) and +The C<\p{Is...}> test for "general properties" such as "letter", +"digit", while the C<\p{In...}> test for Unicode scripts and blocks. + +The official Unicode script and block names have spaces and +dashes and separators, but for convenience you can have +dashes, spaces, and underbars at every word division, and +you need not care about correct casing. It is recommended, +however, that for consistency you use the following naming: +the official Unicode script or block name (see below for +the additional rules that apply to block names), with the whitespace +and dashes removed, and the words "uppercase-first-lowercase-otherwise". +That is, "Latin-1 Supplement" becomes "Latin1Supplement". + +You can also negate both C<\p{}> and C<\P{}> by introducing a caret +(^) between the first curly and the property name: C<\p{^InTamil}> is +equal to C<\P{InTamil}>. + +The C can be left out: C<\p{Greek}> is equal to C<\p{InGreek}>. + +Here is the list as of Unicode 3.1.1 (the two-letter classes) and as defined by Perl (the one-letter classes) (in Unicode materials what Perl calls C is often called C): diff --git a/pod/perlvar.pod b/pod/perlvar.pod index e61e8ed..6f9bd8d 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -1158,7 +1158,9 @@ already been included. If the file was loaded via a hook (e.g. a subroutine reference, see L for a description of these hooks), this hook is -inserted into %INC in place of a filename. +by default inserted into %INC in place of a filename. Note, however, +that the hook may have set the %INC entry by itself to provide some more +specific info. =item %ENV diff --git a/pp_ctl.c b/pp_ctl.c index 8b320bf..54587e9 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -1018,8 +1018,8 @@ PP(pp_sort) cx->blk_sub.oldcurpad = PL_curpad; cx->blk_sub.argarray = av; } - qsortsv((myorigmark+1), max, - is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv); + sortsv((myorigmark+1), max, + is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv); POPBLOCK(cx,PL_curpm); PL_stack_sp = newsp; @@ -1030,8 +1030,8 @@ PP(pp_sort) else { if (max > 1) { MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */ - qsortsv(ORIGMARK+1, max, - (PL_op->op_private & OPpSORT_NUMERIC) + sortsv(ORIGMARK+1, max, + (PL_op->op_private & OPpSORT_NUMERIC) ? ( (PL_op->op_private & OPpSORT_INTEGER) ? ( overloading ? amagic_i_ncmp : sv_i_ncmp) : ( overloading ? amagic_ncmp : sv_ncmp)) @@ -4036,8 +4036,18 @@ dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp) ** They make convenient temporary pointers in other places. */ -STATIC void -S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) +/* +=for apidoc sortsv + +Sort an array in place. Here is an example: + + sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale); + +=cut +*/ + +void +Perl_sortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) { int i, run; int sense; diff --git a/proto.h b/proto.h index 9c1115c..44e0a03 100644 --- a/proto.h +++ b/proto.h @@ -459,6 +459,7 @@ PERL_CALLCONV SV* Perl_mess(pTHX_ const char* pat, ...) ; PERL_CALLCONV SV* Perl_vmess(pTHX_ const char* pat, va_list* args); PERL_CALLCONV void Perl_qerror(pTHX_ SV* err); +PERL_CALLCONV void Perl_sortsv(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t f); PERL_CALLCONV int Perl_mg_clear(pTHX_ SV* sv); PERL_CALLCONV int Perl_mg_copy(pTHX_ SV* sv, SV* nsv, const char* key, I32 klen); PERL_CALLCONV MAGIC* Perl_mg_find(pTHX_ SV* sv, int type); @@ -1098,7 +1099,6 @@ STATIC I32 S_dopoptosub_at(pTHX_ PERL_CONTEXT* cxstk, I32 startingblock); STATIC void S_save_lines(pTHX_ AV *array, SV *sv); STATIC OP* S_doeval(pTHX_ int gimme, OP** startop); STATIC PerlIO * S_doopen_pmc(pTHX_ const char *name, const char *mode); -STATIC void S_qsortsv(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t f); #endif #if defined(PERL_IN_PP_HOT_C) || defined(PERL_DECL_PROT) diff --git a/regcomp.c b/regcomp.c index 4455730..96bafd3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2881,7 +2881,7 @@ tryagain: if (!RExC_end) { RExC_parse += 2; RExC_end = oldregxend; - vFAIL("Missing right brace on \\p{}"); + vFAIL2("Missing right brace on \\%c{}", UCHARAT(RExC_parse - 2)); } RExC_end++; } @@ -3085,7 +3085,7 @@ tryagain: /* FALL THROUGH */ default: if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(*p)) - vWARN2(p +1, "Unrecognized escape \\%c passed through", *p); + vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p)); goto normal_default; } break; @@ -3423,20 +3423,35 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (*RExC_parse == '{') { e = strchr(RExC_parse++, '}'); if (!e) - vFAIL("Missing right brace on \\p{}"); + vFAIL2("Missing right brace on \\%c{}", value); + while (isSPACE(UCHARAT(RExC_parse))) + RExC_parse++; + if (e == RExC_parse) + vFAIL2("Empty \\%c{}", value); n = e - RExC_parse; + while (isSPACE(UCHARAT(RExC_parse + n - 1))) + n--; } else { e = RExC_parse; n = 1; } if (!SIZE_ONLY) { + if (UCHARAT(RExC_parse) == '^') { + RExC_parse++; + n--; + value = value == 'p' ? 'P' : 'p'; /* toggle */ + while (isSPACE(UCHARAT(RExC_parse))) { + RExC_parse++; + n--; + } + } if (value == 'p') - Perl_sv_catpvf(aTHX_ listsv, - "+utf8::%.*s\n", (int)n, RExC_parse); + Perl_sv_catpvf(aTHX_ listsv, + "+utf8::%.*s\n", (int)n, RExC_parse); else - Perl_sv_catpvf(aTHX_ listsv, - "!utf8::%.*s\n", (int)n, RExC_parse); + Perl_sv_catpvf(aTHX_ listsv, + "!utf8::%.*s\n", (int)n, RExC_parse); } RExC_parse = e + 1; ANYOF_FLAGS(ret) |= ANYOF_UNICODE; diff --git a/sv.h b/sv.h index 7ca49a7..0b3aba2 100644 --- a/sv.h +++ b/sv.h @@ -983,11 +983,17 @@ otherwise. #define sv_utf8_upgrade_macro(sv) sv_utf8_upgrade_flags(sv, SV_GMAGIC) /* function style also available for sourcecompat */ +#undef sv_setsv #define sv_setsv(dsv, ssv) sv_setsv_macro(dsv, ssv) +#undef sv_catsv #define sv_catsv(dsv, ssv) sv_catsv_macro(dsv, ssv) +#undef sv_catpvn #define sv_catpvn(dsv, sstr, slen) sv_catpvn_macro(dsv, sstr, slen) +#undef sv_2pv #define sv_2pv(sv, lp) sv_2pv_macro(sv, lp) +#undef sv_pvn_force #define sv_pvn_force(sv, lp) sv_pvn_force_macro(sv, lp) +#undef sv_utf8_upgrade #define sv_utf8_upgrade(sv) sv_utf8_upgrade_macro(sv) #undef SvPV diff --git a/t/op/inccode.t b/t/op/inccode.t index 3ccea1a..bd66628 100644 --- a/t/op/inccode.t +++ b/t/op/inccode.t @@ -10,7 +10,7 @@ BEGIN { use File::Spec; require "test.pl"; -plan(tests => 39); +plan(tests => 43); my @tempfiles = (); @@ -134,3 +134,21 @@ is( ref $INC{'Quux2.pm'}, 'FooLoader', is( $INC{'Quux2.pm'}, $sref, ' key is correct in %INC' ); pop @INC; + +push @INC, sub { + my ($self, $filename) = @_; + if (substr($filename,0,4) eq 'Toto') { + $INC{$filename} = 'xyz'; + return get_temp_fh($filename); + } + else { + return undef; + } +}; + +ok( eval { require Toto; 1 }, 'require() magic via anonymous code ref' ); +ok( exists $INC{'Toto.pm'}, ' %INC sees it' ); +ok( ! ref $INC{'Toto.pm'}, q/ key isn't a ref in %INC/ ); +is( $INC{'Toto.pm'}, 'xyz', ' key is correct in %INC' ); + +pop @INC; diff --git a/t/op/pat.t b/t/op/pat.t index f5a2edd..50258b9 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..716\n"; +print "1..730\n"; BEGIN { chdir 't' if -d 't'; @@ -2127,7 +2127,56 @@ sub ok ($$) { print "ok 715\n"; } +print "# some Unicode properties\n"; + { + # Dashes, underbars, case. print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; print "ok 716\n"; + + # Complement, leading and trailing whitespace. + print "not " unless "\x80" =~ /\P{ ^ In Latin 1 Supplement }/; + print "ok 717\n"; + + # No ^In, dashes, case. + print "not " unless "\x80" =~ /\p{latin-1-supplement}/; + print "ok 718\n"; +} + +{ + print "not " unless "a" =~ /\pL/; + print "ok 719\n"; + + print "not " unless "a" =~ /\p{IsLl}/; + print "ok 720\n"; + + print "not " if "a" =~ /\p{IsLu}/; + print "ok 721\n"; + + print "not " unless "A" =~ /\pL/; + print "ok 722\n"; + + print "not " unless "A" =~ /\p{IsLu}/; + print "ok 723\n"; + + print "not " if "A" =~ /\p{IsLl}/; + print "ok 724\n"; + + print "not " if "a" =~ /\PL/; + print "ok 725\n"; + + print "not " if "a" =~ /\P{IsLl}/; + print "ok 726\n"; + + print "not " unless "a" =~ /\P{IsLu}/; + print "ok 727\n"; + + print "not " if "A" =~ /\PL/; + print "ok 728\n"; + + print "not " if "A" =~ /\P{IsLu}/; + print "ok 729\n"; + + print "not " unless "A" =~ /\P{IsLl}/; + print "ok 730\n"; }