From: Tara L Andrews Date: Thu, 3 May 2012 08:24:36 +0000 (+0200) Subject: naive serialization of lexems in GraphML X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7cd9f181280b397e3ef6c95845270d48368fc11f;p=scpubgit%2Fstemmatology.git naive serialization of lexems in GraphML --- diff --git a/Makefile.PL b/Makefile.PL index cf2a5e2..75a438a 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -30,6 +30,7 @@ requires( 'TryCatch' ); requires( 'XML::Easy::Syntax' ); requires( 'XML::LibXML' ); requires( 'XML::LibXML::XPathContext' ); +requires( 'YAML::XS' ); # For the morphology stuff requires( 'Lingua::TagSet::Multext' ); requires( 'Lingua::TagSet::TreeTagger' ); diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index cc26b0e..a77f74f 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -974,6 +974,9 @@ sub as_graphml { next unless $save_types{$attr->type_constraint->name}; $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } + # Extra custom key for the reading morphology + $reading_attributes{'lexemes'} = 'string'; + my %node_data_keys; my $ndi = 0; foreach my $datum ( sort keys %reading_attributes ) { @@ -1052,6 +1055,13 @@ sub as_graphml { $node_el->setAttribute( 'id', $node_xmlid ); foreach my $d ( keys %reading_attributes ) { my $nval = $n->$d; + # Custom serialization + if( $d eq 'lexemes' ) { + # If nval is a true value, we have lexemes so we need to + # serialize them. Otherwise set nval to undef so that the + # key is excluded from this reading. + $nval = $nval ? $n->_serialize_lexemes : undef; + } if( $rankoffset && $d eq 'rank' && $n ne $self->start ) { # Adjust the ranks within the subgraph. $nval = $n eq $self->end ? $end->rank - $rankoffset + 1 diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index 4bdaecb..0e78cbc 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -2,6 +2,7 @@ package Text::Tradition::Collation::Reading; use Moose; use Module::Load; +use YAML::XS; use overload '""' => \&_stringify, 'fallback' => 1; =head1 NAME @@ -138,8 +139,7 @@ has 'normal_form' => ( predicate => 'has_normal_form', ); -# Holds the word form. If is_disambiguated is true, the form at index zero -# is the correct one. +# Holds the lexemes for the reading. has 'reading_lexemes' => ( traits => ['Array'], isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]', @@ -276,17 +276,34 @@ sub _stringify { =head1 MORPHOLOGY -A few methods to try to tack on morphological information. +Methods for the morphological information (if any) attached to readings. +A reading may be made up of multiple lexemes; the concatenated lexeme +strings ought to match the reading's normalized form. + +See L for more information +on Lexeme objects and their attributes. + +=head2 has_lexemes + +Returns a true value if the reading has any attached lexemes. =head2 lexemes -=head2 has_lexemes +Returns the Lexeme objects (if any) attached to the reading. =head2 clear_lexemes -=head2 add_lexeme +Wipes any associated Lexeme objects out of the reading. + +=head2 add_lexeme( $lexobj ) -=head2 lemmatize +Adds the Lexeme in $lexobj to the list of lexemes. + +=head2 lemmatize + +If the language of the reading is set, this method will use the appropriate +Language model to determine the lexemes that belong to this reading. See +L if you wish to lemmatize an entire tradition. =cut @@ -302,6 +319,14 @@ sub lemmatize { } +# For graph serialization. Return a string representation of the associated +# reading lexemes. +sub _serialize_lexemes { + my $self = shift; + return Dump( [ $self->lexemes ] ); +} + + ## Utility methods sub TO_JSON {