From: tla Date: Thu, 27 Nov 2014 11:17:19 +0000 (+0100) Subject: add ASCII variant of sigil to witnesses. Needed for tla/stemmaweb#46 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e46fff9387ac32049cda8fcea12ad66ce9130e0d;p=scpubgit%2Fstemmatology.git add ASCII variant of sigil to witnesses. Needed for tla/stemmaweb#46 --- diff --git a/base/lib/Text/Tradition/Witness.pm b/base/lib/Text/Tradition/Witness.pm index 37e6a66..9e80c84 100644 --- a/base/lib/Text/Tradition/Witness.pm +++ b/base/lib/Text/Tradition/Witness.pm @@ -122,6 +122,7 @@ readings) in the collation. =begin testing +use Test::More::UTF8 qw/ -utf8 /; use Text::Tradition; my $trad = Text::Tradition->new( 'name' => 'test tradition' ); my $c = $trad->collation; @@ -175,6 +176,24 @@ if( $xpwit ) { is( @{$xpwit->text}, 157, "Got correct text length" ); } +# Test non-ASCII sigla +my $at = Text::Tradition->new( + name => 'armexample', + input => 'Tabular', + excel => 'xlsx', + file => 't/data/armexample.xlsx' ); +foreach my $wit ( $at->witnesses ) { + my $sig = $wit->sigil; + if( $sig =~ /^\p{ASCII}+$/ ) { + is( $wit->ascii_sigil, '_A_' . $sig, + "Correct ASCII sigil for ASCII witness $sig" ); + } else { + # This is our non-ASCII example + is( $wit->ascii_sigil, '_A_5315622', + "Correct ASCII sigil for non-ASCII witness $sig" ); + } +} + =end testing @@ -199,6 +218,15 @@ has 'sigil' => ( writer => '_set_sigil', ); +# An ASCII version of the sigil, for any applications that cannot +# deal with Unicode. This should not be set directly, but will be +# set automatically when the sigil is set. +has 'ascii_sigil' => ( + is => 'ro', + isa => 'Sigil', + writer => '_set_ascii_sigil', + ); + # Other identifying information has 'identifier' => ( is => 'rw', @@ -317,6 +345,17 @@ sub BUILD { if( $self->sourcetype eq 'collation' ) { $self->is_collated( 1 ); } + # Make an ASCII sigil. Convert each non-ASCII character to its Unicode + # number and just string them together. + my $asig = '_A_'; + foreach my $char ( split( '', $self->sigil ) ) { + if( $char =~ /\p{ASCII}/ ) { + $asig .= $char; + } else { + $asig .= sprintf( "%x", ord( $char ) ); + } + } + $self->_set_ascii_sigil( $asig ) ; return $self; } diff --git a/base/t/text_tradition_witness.t b/base/t/text_tradition_witness.t index 4badcb6..8c2dea9 100644 --- a/base/t/text_tradition_witness.t +++ b/base/t/text_tradition_witness.t @@ -8,6 +8,7 @@ $| = 1; # =begin testing { +use Test::More::UTF8 qw/ -utf8 /; use Text::Tradition; my $trad = Text::Tradition->new( 'name' => 'test tradition' ); my $c = $trad->collation; @@ -60,6 +61,24 @@ if( $xpwit ) { ok( !$xpwit->is_layered, "Picked up no correction layer" ); is( @{$xpwit->text}, 157, "Got correct text length" ); } + +# Test non-ASCII sigla +my $at = Text::Tradition->new( + name => 'armexample', + input => 'Tabular', + excel => 'xlsx', + file => 't/data/armexample.xlsx' ); +foreach my $wit ( $at->witnesses ) { + my $sig = $wit->sigil; + if( $sig =~ /^\p{ASCII}+$/ ) { + is( $wit->ascii_sigil, '_A_' . $sig, + "Correct ASCII sigil for ASCII witness $sig" ); + } else { + # This is our non-ASCII example + is( $wit->ascii_sigil, '_A_5315622', + "Correct ASCII sigil for non-ASCII witness $sig" ); + } +} }