From: Karl Williamson Date: Mon, 14 Dec 2009 16:18:29 +0000 (-0700) Subject: more regex folding tests X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1443f10d16f26271d2df9b4bdda55dbe024319c3;p=p5sagit%2Fp5-mst-13.2.git more regex folding tests --- diff --git a/t/re/reg_fold.t b/t/re/reg_fold.t index 2514452..bbeaedd 100644 --- a/t/re/reg_fold.t +++ b/t/re/reg_fold.t @@ -16,25 +16,55 @@ open my $fh,"<",$file or die "Failed to read '$file': $!"; while (<$fh>) { chomp; my ($line,$comment)= split/\s+#\s+/, $_; - my ($cp,$type,@fc)=split/[\s;]+/,$line||''; + my ($cp,$type,@folded)=split/[\s;]+/,$line||''; next unless $type and ($type eq 'F' or $type eq 'C'); - $_="\\x{$_}" for @fc; + my $fold_above_latin1 = grep { hex("0x$_") > 255 } @folded; + $_="\\x{$_}" for @folded; my $cpv=hex("0x$cp"); - my $chr="chr(0x$cp)"; + my $chr="\\x{$cp}"; my @str; - push @str,$chr if $cpv<128 or $cpv>256; - if ($cpv<256) { - push @str,"do{my \$c=$chr; utf8::upgrade(\$c); \$c}" - } + foreach my $swap (0, 1) { # swap lhs and rhs, or not. + foreach my $charclass (0, 1) { # Put rhs in [...], or not + my $lhs; + my $rhs; + if ($swap) { + $lhs = join "", @folded; + $rhs = $chr; + $rhs = "[$rhs]" if $charclass; + } else { + $lhs = $chr; + $rhs = ""; + foreach my $rhs_char (@folded) { + $rhs .= '[' if $charclass; + $rhs .= $rhs_char; + $rhs .= ']' if $charclass; + } + } + $lhs = "\"$lhs\""; + $rhs = "/^$rhs\$/i"; - foreach my $str ( @str ) { - my $expr="$str=~/@fc/ix"; - my $t=($cpv > 256 || $str=~/^do/) ? "unicode" : "latin"; - push @tests, - qq[ok($expr,'$chr=~/@fc/ix - $comment ($t string)')]; - $tests[-1]="TODO: { local \$::TODO='[13:41] cue *It is all Greek to me* joke.';\n$tests[-1] }" - if $cp eq '0390' or $cp eq '03B0'; - $count++; + # Try both Latin1 and Unicode for code points below 256 + foreach my $upgrade ("", 'utf8::upgrade($c); ') { + if ($upgrade) { + next if $swap && $fold_above_latin1; + next if !$swap && $cpv > 255; + } + my $eval = "my \$c = $lhs; $upgrade\$c =~ $rhs"; + #print __LINE__, ": $eval\n"; + push @tests, qq[ok(eval '$eval', '$eval - $comment')]; + if (! $swap && ($cp eq '0390' || $cp eq '03B0')) { + $tests[-1]="TODO: { local \$::TODO='[13:41] cue *It is all Greek to me* joke.';\n$tests[-1] }" + } elsif ($charclass && @folded > 1 && $swap && ! $upgrade && ! $fold_above_latin1) { + $tests[-1]="TODO: { local \$::TODO='Multi-char, non-utf8 folded inside character class [ ] doesnt work';\n$tests[-1] }" + } elsif (! $upgrade && $cpv >= 128 && $cpv <= 255 && $cpv != 0xb5) { + $tests[-1]="TODO: { local \$::TODO='Most non-utf8 latin1 doesnt work';\n$tests[-1] }" + } elsif (! $swap && $charclass && @folded > 1) { + # There are a few of these that pass; most fail. + $tests[-1]="TODO: { local \$::TODO='Some multi-char, f8 folded inside character class [ ] doesnt work';\n$tests[-1] }" + } + $count++; + } + } } } eval join ";\n","plan tests=>".($count-1),@tests,"1"