Work around MySQL storage encoding bug. Addresses #19
tla [Sun, 19 Jan 2014 19:36:34 +0000 (20:36 +0100)]
persistence/lib/Text/Tradition/Directory.pm
persistence/t/data/florilegium_graphml.xml
persistence/t/mysql_utf8.t [new file with mode: 0644]

index 1d2ff32..3afc469 100644 (file)
@@ -4,7 +4,7 @@ use strict;
 use warnings;
 use Moose;
 use DBI;
-use Encode qw/ decode_utf8 /;
+use Encode qw/ encode decode_utf8 /;
 use KiokuDB::GC::Naive;
 use KiokuDB::TypeMap;
 use KiokuDB::TypeMap::Entry::Naive;
@@ -241,6 +241,12 @@ has +typemap => (
   },
 );
 
+has '_mysql_utf8_hack' => (
+       is => 'ro',
+       isa => 'Bool',
+       default => undef,
+);
+
 # Push some columns into the extra_args
 around BUILDARGS => sub {
        my $orig = shift;
@@ -252,10 +258,20 @@ around BUILDARGS => sub {
                $args = { @_ };
        }
        my @column_args;
-       if( $args->{'dsn'} =~ /^dbi/ ) { # We're using Backend::DBI
+       if( $args->{'dsn'} =~ /^dbi:(\w+):/ ) { # We're using Backend::DBI
+               my $dbtype = $1;
                @column_args = ( 'columns',
                        [ 'name' => { 'data_type' => 'varchar', 'is_nullable' => 1 },
                          'public' => { 'data_type' => 'bool', 'is_nullable' => 1 } ] );
+               if( $dbtype eq 'mysql' && 
+                       exists $args->{extra_args}->{dbi_attrs} &&
+                       $args->{extra_args}->{dbi_attrs}->{mysql_enable_utf8} ) {
+                       # There is a bad interaction with MySQL in utf-8 mode.
+                       # Work around it here.
+                       # TODO fix the underlying storage problem
+                       $args->{extra_args}->{dbi_attrs}->{mysql_enable_utf8} = undef;
+                       $args->{_mysql_utf8_hack} = 1;
+               }
        }
        my $ea = $args->{'extra_args'};
        if( ref( $ea ) eq 'ARRAY' ) {
@@ -358,9 +374,12 @@ sub _get_object_idlist {
                        . $objclass . '"' );
                $q->execute();
                while( my @row = $q->fetchrow_array ) {
-                       my( $id, $name ) = @row;
-                       # Horrible horrible hack
-                       $name = decode_utf8( $name ) if $dbtype eq 'mysql';
+                       # Horrible horrible hack. Re-convert the name to UTF-8.
+                       if( $self->_mysql_utf8_hack ) {
+                               # Convert the chars into a raw bytestring.
+                               my $octets = encode( 'ISO-8859-1', $row[1] );
+                               $row[1] = decode_utf8( $octets );
+                       }
                        push( @tlist, { 'id' => $row[0], 'name' => $row[1], 'public' => $row[2] } );
                }
        } else {
index f5ddc1d..baf075c 100644 (file)
@@ -6,6 +6,7 @@
   <key attr.name="linear" attr.type="string" for="graph" id="dg3"/>
   <key attr.name="ac_label" attr.type="string" for="graph" id="dg4"/>
   <key attr.name="public" attr.type="boolean" for="graph" id="dg5"/>
+  <key attr.name="name" attr.type="string" for="graph" id="dg6"/>
   <key attr.name="text" attr.type="string" for="node" id="dn0"/>
   <key attr.name="is_end" attr.type="boolean" for="node" id="dn1"/>
   <key attr.name="is_start" attr.type="boolean" for="node" id="dn2"/>
   <key attr.name="extra" attr.type="boolean" for="edge" id="de4"/>
   <key attr.name="class" attr.type="string" for="edge" id="de5"/>
   <key attr.name="colocated" attr.type="boolean" for="edge" id="de6"/>
-  <graph edgedefault="directed" id="Tradition" parse.edgeids="canonical" parse.edges="376" parse.nodeids="canonical" parse.nodes="319" parse.order="nodesfirst">
+  <graph edgedefault="directed" id="Florilegium_Coislinianum_β" parse.edgeids="canonical" parse.edges="376" parse.nodeids="canonical" parse.nodes="319" parse.order="nodesfirst">
     <data key="dg0">2.0</data>
     <data key="dg1">, </data>
     <data key="dg2">base text</data>
     <data key="dg3">1</data>
     <data key="dg4"> (a.c.)</data>
     <data key="dg5">1</data>
+    <data key="dg6">Florilegium Coislinianum β</data>
     <node id="n0">
       <data key="dn0">#END#</data>
       <data key="dn1">1</data>
diff --git a/persistence/t/mysql_utf8.t b/persistence/t/mysql_utf8.t
new file mode 100644 (file)
index 0000000..9aef7c9
--- /dev/null
@@ -0,0 +1,46 @@
+#!/usr/bin/env perl
+
+use feature 'say';
+use strict;
+use warnings;
+use Test::More;
+use Test::More::UTF8;
+use Text::Tradition;
+use Text::Tradition::Directory;
+
+my $mysql_connect_info = $ENV{TT_MYSQL_TEST};
+plan skip_all => 'Please set TT_MYSQL_TEST to an appropriate db to run this test'
+       unless $mysql_connect_info;
+
+my @dbconnect = split( /;/, $mysql_connect_info );
+my $dsn = 'dbi:mysql:';
+my $user;
+my $pass;
+foreach my $item ( @dbconnect ) {
+       my( $k, $v ) = split( /=/, $item );
+       if( $k eq 'user' ) {
+               $user = $v;
+       } elsif( $k eq 'password' ) {
+               $pass = $v;
+       } else {
+               $dsn .= "$item;";
+       }
+}
+
+my $dir = Text::Tradition::Directory->new( 'dsn' => $dsn, 
+    'extra_args' => { 'user' => $user, 'password' => $pass,
+       dbi_attrs => { 'mysql_enable_utf8' => 1 } },
+    );
+
+my $scope = $dir->new_scope();
+
+my $utf8_t = Text::Tradition->new(
+       'input' => 'Self',
+       'file'  => 't/data/florilegium_graphml.xml' );
+my $uuid = $dir->save( $utf8_t );
+foreach my $tinfo( $dir->traditionlist ) {
+       next unless $tinfo->{id} eq $uuid;
+       like( $tinfo->{name}, qr/\x{3b2}/, "Tradition name encoded correctly" );
+}
+
+done_testing();