X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation%2FReading%2FWordForm.pm;h=968f205fae8205692c5db68a43b9e30c1026a210;hb=896fe649a80575aaa06d3484c09579a2cb34ba8a;hp=4c819294be3d07aace72968e695e0810a38e2a41;hpb=6ad2ce785dc85432a6227c7b2efdae0364959714;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation/Reading/WordForm.pm b/lib/Text/Tradition/Collation/Reading/WordForm.pm index 4c81929..968f205 100644 --- a/lib/Text/Tradition/Collation/Reading/WordForm.pm +++ b/lib/Text/Tradition/Collation/Reading/WordForm.pm @@ -1,6 +1,10 @@ package Text::Tradition::Collation::Reading::WordForm; +use Lingua::Features::Structure; +use JSON (); use Moose; +use Text::Tradition::Error; +use TryCatch; =head1 NAME @@ -42,25 +46,54 @@ has 'language' => ( required => 1, ); -# TODO do we need this? -has 'form' => ( - is => 'ro', - isa => 'Str', - # required => 1, - ); - has 'lemma' => ( is => 'ro', isa => 'Str', required => 1, ); -has 'morphology' => ( +has 'morphstr' => ( is => 'ro', - isa => 'Lingua::Features::Structure', + isa => 'Str', required => 1, ); +around BUILDARGS => sub { + my $orig = shift; + my $class = shift; + my $args = @_ == 1 ? $_[0] : { @_ }; + if( exists $args->{'JSON'} ) { + my @data = split( / \/\/ /, $args->{'JSON'} ); + # print STDERR "Attempting to parse " . $data[2] . " into structure"; + $args = { 'language' => $data[0], 'lemma' => $data[1], + 'morphstr' => $data[2] }; + } elsif( exists $args->{'morphology'} ) { + # Backwards compat + my $mobj = delete $args->{'morphology'}; + $args->{'morphstr'} = $mobj->to_string() + if ref $mobj; + } + $class->$orig( $args ); +}; + +=head2 morphology + +Returns a Lingua::Features::Structure object that corresponds to morphstr. + +=cut + +sub morphology { + my $self = shift; + return unless $self->morphstr; + my $struct; + try { + $struct = Lingua::Features::Structure->from_string( $self->morphstr ); + } catch { + throw( "Morphology string " . $self->morphstr . " does not parse" ); + } + return $struct; +} + =head2 to_string Returns a string combination of language/lemma/morphology that can be used @@ -70,9 +103,24 @@ in equivalence testing. sub to_string { my $self = shift; - return join( '++', $self->language, $self->lemma, $self->morphology->to_string ); + return JSON->new->convert_blessed(1)->encode( $self ); +} + +# Rather than spitting it out as a JSON hash, encode it as a string so that +# the XML serialization doesn't become insane. +sub TO_JSON { + my $self = shift; + return sprintf( "%s // %s // %s", $self->language, $self->lemma, + $self->morphstr ); } +sub throw { + Text::Tradition::Error->throw( + 'ident' => 'Wordform error', + 'message' => $_[0], + ); +} + no Moose; __PACKAGE__->meta->make_immutable;