From: Tara L Andrews Date: Tue, 29 May 2012 03:04:52 +0000 (+0200) Subject: muck about with serialization of lexeme wordforms; allow individual lexeme addressing X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=scpubgit%2Fstemmatology.git;a=commitdiff_plain;h=da83693e61ca80c4c9a23583a0aadda94d087125 muck about with serialization of lexeme wordforms; allow individual lexeme addressing --- diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index 1bd5244..5b11f85 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -167,6 +167,7 @@ has 'reading_lexemes' => ( traits => ['Array'], isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]', handles => { + lexeme => 'get', lexemes => 'elements', has_lexemes => 'count', clear_lexemes => 'clear', diff --git a/lib/Text/Tradition/Collation/Reading/Lexeme.pm b/lib/Text/Tradition/Collation/Reading/Lexeme.pm index 220d5d9..98814e0 100644 --- a/lib/Text/Tradition/Collation/Reading/Lexeme.pm +++ b/lib/Text/Tradition/Collation/Reading/Lexeme.pm @@ -125,6 +125,23 @@ sub BUILD { } } +around 'add_matching_form' => sub { + my $orig = shift; + my $self = shift; + my @realargs; + foreach my $a ( @_ ) { + if( ref( $a ) ) { + push( @realargs, $a ); + } else { + # Make the wordform from the string + my $wf = Text::Tradition::Collation::Reading::WordForm->new( + 'JSON' => $a ); + push( @realargs, $wf ); + } + } + return $self->$orig( @realargs ); +}; + =head2 disambiguate( $index ) Selects the word form at $index in the list of matching forms, and asserts @@ -141,6 +158,25 @@ sub disambiguate { $self->is_disambiguated( 1 ); } +=head2 has_form( $rep ) + +Returns the index of the matching form whose string representation is in $rep, +or else undef if none is found. + +=cut + +sub has_form { + my( $self, $rep ) = @_; + my $i = 0; + foreach my $mf ( $self->matching_forms ) { + my $struct = $mf->TO_JSON; + return $i if $struct eq $rep; + $i++; + } + return undef; +} + + sub TO_JSON { my $self = shift; my $hash = {}; diff --git a/lib/Text/Tradition/Collation/Reading/WordForm.pm b/lib/Text/Tradition/Collation/Reading/WordForm.pm index 6a6b56b..4335857 100644 --- a/lib/Text/Tradition/Collation/Reading/WordForm.pm +++ b/lib/Text/Tradition/Collation/Reading/WordForm.pm @@ -68,10 +68,12 @@ around BUILDARGS => sub { my $class = shift; my $args = @_ == 1 ? $_[0] : { @_ }; if( exists $args->{'JSON'} ) { - my $data = $args->{'JSON'}; - my $morph = Lingua::Features::Structure->from_string( $data->{'morphology'} ); - $data->{'morphology'} = $morph; - $args = $data; + $DB::single = 1; + my @data = split( / \/\/ /, $args->{'JSON'} ); + print STDERR "Attempting to parse " . $data[2] . " into structure"; + my $morph = Lingua::Features::Structure->from_string( $data[2] ); + $args = { 'language' => $data[0], 'lemma' => $data[1], + 'morphology' => $morph }; } $class->$orig( $args ); }; @@ -88,13 +90,12 @@ sub to_string { return JSON->new->convert_blessed(1)->encode( $self ); } +# Rather than spitting it out as a JSON hash, encode it as a string so that +# the XML serialization doesn't become insane. sub TO_JSON { my $self = shift; - return { - 'language' => $self->language, - 'lemma' => $self->lemma, - 'morphology' => $self->morphology->to_string - }; + return sprintf( "%s // %s // %s", $self->language, $self->lemma, + $self->morphology->to_string() ); } no Moose;