From: Tara L Andrews Date: Thu, 3 May 2012 13:24:02 +0000 (+0200) Subject: use JSON for serialization rather than rolling own X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7604424bc5cc004f536d20099d9d348aa922ce7d;p=scpubgit%2Fstemmatology.git use JSON for serialization rather than rolling own --- diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index f090b34..5a60823 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -1,6 +1,7 @@ package Text::Tradition::Collation::Reading; use Moose; +use JSON qw/ from_json /; use Module::Load; use Text::Tradition::Error; use YAML::XS; @@ -328,55 +329,30 @@ sub lemmatize { } -# For graph serialization. Return a string representation of the associated +# For graph serialization. Return a JSON representation of the associated # reading lexemes. -# TODO Push this in to the Lexeme package. sub _serialize_lexemes { my $self = shift; - my @lexstrs; - foreach my $l ( $self->lexemes ) { - my @mf; - foreach my $wf ( $l->matching_forms ) { - push( @mf, $wf->to_string ); - } - my $form = $l->form ? $l->form->to_string : ''; - push( @lexstrs, join( '|L|', $l->language, $l->string, $form, - join( '|M|', @mf ) ) ); - } - return join( '|R|', @lexstrs ); + my $json = JSON->new->allow_blessed(1)->convert_blessed(1); + return $json->encode( [ $self->lexemes ] ); } +# Given a JSON representation of the lexemes, instantiate them and add +# them to the reading. sub _deserialize_lexemes { - my( $self, $data ) = @_; - return unless $data; + my( $self, $json ) = @_; + my $data = from_json( $json ); + return unless @$data; - # Need to have the lexeme modules in order to have lexemes. - eval { - use Text::Tradition::Collation::Reading::Lexeme; - use Text::Tradition::Collation::Reading::WordForm; - }; + # Need to have the lexeme module in order to have lexemes. + eval { use Text::Tradition::Collation::Reading::Lexeme; }; throw( $@ ) if $@; # Good to go - add the lexemes. my @lexemes; - foreach my $lexdata ( split( /\|R\|/, $data ) ) { - my( $lang, $lstring, $form, $allforms ) = split( /\|L\|/, $lexdata ); - my @wfdata; - push( @wfdata, $form ) if $form; - push( @wfdata, split( /\|M\|/, $allforms ) ); - my @wforms; - foreach my $wd ( @wfdata ) { - my $wf = Text::Tradition::Collation::Reading::WordForm->new( - 'serial' => $wd ); - push( @wforms, $wf ); - } - my %largs = ( 'language' => $lang, 'string' => $lstring ); - if( $form ) { - $largs{'form'} = shift @wforms; - $largs{'is_disambiguated'} = 1; - } - $largs{'wordform_matchlist'} = \@wforms; - push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new( %largs ) ); + foreach my $lexhash ( @$data ) { + push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new( + 'JSON' => $lexhash ) ); } $self->clear_lexemes; $self->add_lexeme( @lexemes ); diff --git a/lib/Text/Tradition/Collation/Reading/Lexeme.pm b/lib/Text/Tradition/Collation/Reading/Lexeme.pm index ca4802c..220d5d9 100644 --- a/lib/Text/Tradition/Collation/Reading/Lexeme.pm +++ b/lib/Text/Tradition/Collation/Reading/Lexeme.pm @@ -1,6 +1,7 @@ package Text::Tradition::Collation::Reading::Lexeme; use Moose; +use JSON (); use Module::Load; =head1 NAME @@ -91,6 +92,30 @@ has 'form' => ( writer => '_set_form', ); +around BUILDARGS => sub { + my $orig = shift; + my $class = shift; + my $args = @_ == 1 ? $_[0] : { @_ }; + if( exists $args->{JSON} ) { + my $data = $args->{JSON}; + if( exists $data->{'form'} && $data->{'form'} ) { + my $form = Text::Tradition::Collation::Reading::WordForm->new( + 'JSON' => $data->{'form'} ); + $data->{'form'} = $form; + } + if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) { + my @ml; + foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) { + push( @ml, Text::Tradition::Collation::Reading::WordForm->new( + 'JSON' => $wfjson ) ); + } + $data->{'wordform_matchlist'} = \@ml; + } + $args = $data; + } + $class->$orig( $args ); +}; + # Do auto-disambiguation if we were created with a single wordform sub BUILD { my $self = shift; @@ -116,30 +141,15 @@ sub disambiguate { $self->is_disambiguated( 1 ); } -=head2 lookup - -Uses the module for the declared language to look up the lexeme in the -language database (if any.) Sets the returned morphological matches in -matching_forms, and returns the list as an array of WordForm objects. - -=cut - -sub lookup { +sub TO_JSON { my $self = shift; - my $lang = $self->language; - my @answers; - try { - my $langmod = "Text::Tradition::Language::$lang"; - load( $langmod ); - @answers = $langmod->can( 'word_lookup' )->( $self->string ); - } catch { - throw( "No language module for $lang, or the module has no word_lookup functionality" ); - } - $self->clear_matching_forms; - $self->add_matching_form( @answers ); - return @answers; + my $hash = {}; + # Do the scalar keys + map { $hash->{$_} = $self->$_ if defined $self->$_ } + qw/ language string is_disambiguated form /; + $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches; + return $hash; } - no Moose; __PACKAGE__->meta->make_immutable; diff --git a/lib/Text/Tradition/Collation/Reading/WordForm.pm b/lib/Text/Tradition/Collation/Reading/WordForm.pm index 46a9f80..6a6b56b 100644 --- a/lib/Text/Tradition/Collation/Reading/WordForm.pm +++ b/lib/Text/Tradition/Collation/Reading/WordForm.pm @@ -1,7 +1,8 @@ package Text::Tradition::Collation::Reading::WordForm; -use Moose; use Lingua::Features::Structure; +use JSON (); +use Moose; =head1 NAME @@ -66,11 +67,11 @@ around BUILDARGS => sub { my $orig = shift; my $class = shift; my $args = @_ == 1 ? $_[0] : { @_ }; - if( exists $args->{'serial'} ) { - my( $lang, $lemma, $morph ) = split( /\+\+/, delete $args->{'serial'} ); - $args->{'language'} = $lang; - $args->{'lemma'} = $lemma; - $args->{'morphology'} = Lingua::Features::Structure->from_string( $morph ); + if( exists $args->{'JSON'} ) { + my $data = $args->{'JSON'}; + my $morph = Lingua::Features::Structure->from_string( $data->{'morphology'} ); + $data->{'morphology'} = $morph; + $args = $data; } $class->$orig( $args ); }; @@ -84,7 +85,16 @@ in equivalence testing. sub to_string { my $self = shift; - return join( '++', $self->language, $self->lemma, $self->morphology->to_string ); + return JSON->new->convert_blessed(1)->encode( $self ); +} + +sub TO_JSON { + my $self = shift; + return { + 'language' => $self->language, + 'lemma' => $self->lemma, + 'morphology' => $self->morphology->to_string + }; } no Moose;