From: tla Date: Sun, 19 Jan 2014 19:36:34 +0000 (+0100) Subject: Work around MySQL storage encoding bug. Addresses #19 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=6f9cd3b760e7b378fd954b0736baba18351138c8;p=scpubgit%2Fstemmatology.git Work around MySQL storage encoding bug. Addresses #19 --- diff --git a/persistence/lib/Text/Tradition/Directory.pm b/persistence/lib/Text/Tradition/Directory.pm index 1d2ff32..3afc469 100644 --- a/persistence/lib/Text/Tradition/Directory.pm +++ b/persistence/lib/Text/Tradition/Directory.pm @@ -4,7 +4,7 @@ use strict; use warnings; use Moose; use DBI; -use Encode qw/ decode_utf8 /; +use Encode qw/ encode decode_utf8 /; use KiokuDB::GC::Naive; use KiokuDB::TypeMap; use KiokuDB::TypeMap::Entry::Naive; @@ -241,6 +241,12 @@ has +typemap => ( }, ); +has '_mysql_utf8_hack' => ( + is => 'ro', + isa => 'Bool', + default => undef, +); + # Push some columns into the extra_args around BUILDARGS => sub { my $orig = shift; @@ -252,10 +258,20 @@ around BUILDARGS => sub { $args = { @_ }; } my @column_args; - if( $args->{'dsn'} =~ /^dbi/ ) { # We're using Backend::DBI + if( $args->{'dsn'} =~ /^dbi:(\w+):/ ) { # We're using Backend::DBI + my $dbtype = $1; @column_args = ( 'columns', [ 'name' => { 'data_type' => 'varchar', 'is_nullable' => 1 }, 'public' => { 'data_type' => 'bool', 'is_nullable' => 1 } ] ); + if( $dbtype eq 'mysql' && + exists $args->{extra_args}->{dbi_attrs} && + $args->{extra_args}->{dbi_attrs}->{mysql_enable_utf8} ) { + # There is a bad interaction with MySQL in utf-8 mode. + # Work around it here. + # TODO fix the underlying storage problem + $args->{extra_args}->{dbi_attrs}->{mysql_enable_utf8} = undef; + $args->{_mysql_utf8_hack} = 1; + } } my $ea = $args->{'extra_args'}; if( ref( $ea ) eq 'ARRAY' ) { @@ -358,9 +374,12 @@ sub _get_object_idlist { . $objclass . '"' ); $q->execute(); while( my @row = $q->fetchrow_array ) { - my( $id, $name ) = @row; - # Horrible horrible hack - $name = decode_utf8( $name ) if $dbtype eq 'mysql'; + # Horrible horrible hack. Re-convert the name to UTF-8. + if( $self->_mysql_utf8_hack ) { + # Convert the chars into a raw bytestring. + my $octets = encode( 'ISO-8859-1', $row[1] ); + $row[1] = decode_utf8( $octets ); + } push( @tlist, { 'id' => $row[0], 'name' => $row[1], 'public' => $row[2] } ); } } else { diff --git a/persistence/t/data/florilegium_graphml.xml b/persistence/t/data/florilegium_graphml.xml index f5ddc1d..baf075c 100644 --- a/persistence/t/data/florilegium_graphml.xml +++ b/persistence/t/data/florilegium_graphml.xml @@ -6,6 +6,7 @@ + @@ -19,13 +20,14 @@ - + 2.0 , base text 1 (a.c.) 1 + Florilegium Coislinianum β #END# 1 diff --git a/persistence/t/mysql_utf8.t b/persistence/t/mysql_utf8.t new file mode 100644 index 0000000..9aef7c9 --- /dev/null +++ b/persistence/t/mysql_utf8.t @@ -0,0 +1,46 @@ +#!/usr/bin/env perl + +use feature 'say'; +use strict; +use warnings; +use Test::More; +use Test::More::UTF8; +use Text::Tradition; +use Text::Tradition::Directory; + +my $mysql_connect_info = $ENV{TT_MYSQL_TEST}; +plan skip_all => 'Please set TT_MYSQL_TEST to an appropriate db to run this test' + unless $mysql_connect_info; + +my @dbconnect = split( /;/, $mysql_connect_info ); +my $dsn = 'dbi:mysql:'; +my $user; +my $pass; +foreach my $item ( @dbconnect ) { + my( $k, $v ) = split( /=/, $item ); + if( $k eq 'user' ) { + $user = $v; + } elsif( $k eq 'password' ) { + $pass = $v; + } else { + $dsn .= "$item;"; + } +} + +my $dir = Text::Tradition::Directory->new( 'dsn' => $dsn, + 'extra_args' => { 'user' => $user, 'password' => $pass, + dbi_attrs => { 'mysql_enable_utf8' => 1 } }, + ); + +my $scope = $dir->new_scope(); + +my $utf8_t = Text::Tradition->new( + 'input' => 'Self', + 'file' => 't/data/florilegium_graphml.xml' ); +my $uuid = $dir->save( $utf8_t ); +foreach my $tinfo( $dir->traditionlist ) { + next unless $tinfo->{id} eq $uuid; + like( $tinfo->{name}, qr/\x{3b2}/, "Tradition name encoded correctly" ); +} + +done_testing();