# For the morphology stuff
requires( 'Lingua::TagSet::Multext' );
requires( 'Lingua::TagSet::TreeTagger' );
+requires( 'Lingua::Features::Structure' );
build_requires( 'Test::Warn' );
&WriteAll;
use Moose;
use Module::Load;
+use Text::Tradition::Error;
use YAML::XS;
use overload '""' => \&_stringify, 'fallback' => 1;
$class->$orig( $args );
};
+# Look for a lexeme-string argument in the build args.
+sub BUILD {
+ my( $self, $args ) = @_;
+ if( exists $args->{'lexemes'} ) {
+ $self->_deserialize_lexemes( $args->{'lexemes'} );
+ }
+}
+
=head2 is_meta
A meta attribute (ha ha), which should be true if any of our 'special'
# For graph serialization. Return a string representation of the associated
# reading lexemes.
+# TODO Push this in to the Lexeme package.
sub _serialize_lexemes {
my $self = shift;
my @lexstrs;
}
return join( '|R|', @lexstrs );
}
-
+
+sub _deserialize_lexemes {
+ my( $self, $data ) = @_;
+ return unless $data;
+
+ # Need to have the lexeme modules in order to have lexemes.
+ eval {
+ use Text::Tradition::Collation::Reading::Lexeme;
+ use Text::Tradition::Collation::Reading::WordForm;
+ };
+ throw( $@ ) if $@;
+
+ # Good to go - add the lexemes.
+ my @lexemes;
+ foreach my $lexdata ( split( /\|R\|/, $data ) ) {
+ my( $lang, $lstring, $form, $allforms ) = split( /\|L\|/, $lexdata );
+ my @wfdata;
+ push( @wfdata, $form ) if $form;
+ push( @wfdata, split( /\|M\|/, $allforms ) );
+ my @wforms;
+ foreach my $wd ( @wfdata ) {
+ my $wf = Text::Tradition::Collation::Reading::WordForm->new(
+ 'serial' => $wd );
+ push( @wforms, $wf );
+ }
+ my %largs = ( 'language' => $lang, 'string' => $lstring );
+ if( $form ) {
+ $largs{'form'} = shift @wforms;
+ $largs{'is_disambiguated'} = 1;
+ }
+ $largs{'wordform_matchlist'} = \@wforms;
+ push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new( %largs ) );
+ }
+ $self->clear_lexemes;
+ $self->add_lexeme( @lexemes );
+}
## Utility methods
return $self->text;
}
-## TODO will need a throw() here
+sub throw {
+ Text::Tradition::Error->throw(
+ 'ident' => 'Reading error',
+ 'message' => $_[0],
+ );
+}
no Moose;
__PACKAGE__->meta->make_immutable;
package Text::Tradition::Collation::Reading::WordForm;
use Moose;
+use Lingua::Features::Structure;
=head1 NAME
required => 1,
);
+around BUILDARGS => sub {
+ my $orig = shift;
+ my $class = shift;
+ my $args = @_ == 1 ? $_[0] : { @_ };
+ if( exists $args->{'serial'} ) {
+ my( $lang, $lemma, $morph ) = split( /\+\+/, delete $args->{'serial'} );
+ $args->{'language'} = $lang;
+ $args->{'lemma'} = $lemma;
+ $args->{'morphology'} = Lingua::Features::Structure->from_string( $morph );
+ }
+ $class->$orig( $args );
+};
+
=head2 to_string
Returns a string combination of language/lemma/morphology that can be used
--- /dev/null
+use lib 'lib';
+use strict;
+use warnings;
+use Test::More;
+use Text::Tradition;
+
+eval "use Flemm";
+plan skip_all => "Flemm 3.1 required" if $@;
+
+binmode( STDOUT, ':utf8' );
+binmode( STDERR, ':utf8' );
+
+my $tf = Text::Tradition->new(
+ 'input' => 'Self',
+ 'file' => 't/data/besoin.xml',
+ 'language' => 'French' );
+
+$tf->lemmatize();
+my $graphmlstr = $tf->collation->as_graphml;
+like( $graphmlstr, qr/graphml xmlns/,
+ "Serialized tradition after lemmatization" );
+
+my $tf2 = Text::Tradition->new(
+ input => 'Self',
+ string => $graphmlstr,
+ language => 'French' );
+
+is( ref $tf2, 'Text::Tradition', "Re-parsed tradition with lemmatization" );
+is( $tf->name, $tf2->name, "Traditions have same name" );
+foreach my $r ( $tf->collation->readings ) {
+ my $r2 = $tf2->collation->reading( $r->id );
+ is( ref $r2, 'Text::Tradition::Collation::Reading',
+ "Reading $r exists in new tradition" );
+ if( $r2 ) {
+ is( scalar $r->lexemes, scalar $r2->lexemes,
+ "Same number of lexemes in new tradition for $r" );
+ }
+}
+
+done_testing();