From: Tara L Andrews <tla@mit.edu>
Date: Thu, 3 May 2012 08:24:36 +0000 (+0200)
Subject: naive serialization of lexems in GraphML
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7cd9f181280b397e3ef6c95845270d48368fc11f;p=scpubgit%2Fstemmatology.git

naive serialization of lexems in GraphML
---

diff --git a/Makefile.PL b/Makefile.PL
index cf2a5e2..75a438a 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -30,6 +30,7 @@ requires( 'TryCatch' );
 requires( 'XML::Easy::Syntax' );
 requires( 'XML::LibXML' );
 requires( 'XML::LibXML::XPathContext' );
+requires( 'YAML::XS' );
 # For the morphology stuff
 requires( 'Lingua::TagSet::Multext' );
 requires( 'Lingua::TagSet::TreeTagger' );
diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm
index cc26b0e..a77f74f 100644
--- a/lib/Text/Tradition/Collation.pm
+++ b/lib/Text/Tradition/Collation.pm
@@ -974,6 +974,9 @@ sub as_graphml {
 		next unless $save_types{$attr->type_constraint->name};
 		$reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
 	}
+	# Extra custom key for the reading morphology
+	$reading_attributes{'lexemes'} = 'string';
+	
     my %node_data_keys;
     my $ndi = 0;
     foreach my $datum ( sort keys %reading_attributes ) {
@@ -1052,6 +1055,13 @@ sub as_graphml {
         $node_el->setAttribute( 'id', $node_xmlid );
         foreach my $d ( keys %reading_attributes ) {
         	my $nval = $n->$d;
+        	# Custom serialization
+        	if( $d eq 'lexemes' ) {
+				# If nval is a true value, we have lexemes so we need to
+				# serialize them. Otherwise set nval to undef so that the
+				# key is excluded from this reading.
+        		$nval = $nval ? $n->_serialize_lexemes : undef;
+        	}
         	if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
         		# Adjust the ranks within the subgraph.
         		$nval = $n eq $self->end ? $end->rank - $rankoffset + 1 
diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm
index 4bdaecb..0e78cbc 100644
--- a/lib/Text/Tradition/Collation/Reading.pm
+++ b/lib/Text/Tradition/Collation/Reading.pm
@@ -2,6 +2,7 @@ package Text::Tradition::Collation::Reading;
 
 use Moose;
 use Module::Load;
+use YAML::XS;
 use overload '""' => \&_stringify, 'fallback' => 1;
 
 =head1 NAME
@@ -138,8 +139,7 @@ has 'normal_form' => (
 	predicate => 'has_normal_form',
 	);
 
-# Holds the word form. If is_disambiguated is true, the form at index zero
-# is the correct one.
+# Holds the lexemes for the reading.
 has 'reading_lexemes' => (
 	traits => ['Array'],
 	isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
@@ -276,17 +276,34 @@ sub _stringify {
 
 =head1 MORPHOLOGY
 
-A few methods to try to tack on morphological information.
+Methods for the morphological information (if any) attached to readings.
+A reading may be made up of multiple lexemes; the concatenated lexeme
+strings ought to match the reading's normalized form.
+ 
+See L<Text::Tradition::Collation::Reading::Lexeme> for more information
+on Lexeme objects and their attributes.
+
+=head2 has_lexemes
+
+Returns a true value if the reading has any attached lexemes.
 
 =head2 lexemes
 
-=head2 has_lexemes
+Returns the Lexeme objects (if any) attached to the reading.
 
 =head2 clear_lexemes
 
-=head2 add_lexeme
+Wipes any associated Lexeme objects out of the reading.
+
+=head2 add_lexeme( $lexobj )
 
-=head2 lemmatize 
+Adds the Lexeme in $lexobj to the list of lexemes.
+
+=head2 lemmatize
+
+If the language of the reading is set, this method will use the appropriate
+Language model to determine the lexemes that belong to this reading.  See
+L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 
 =cut
 
@@ -302,6 +319,14 @@ sub lemmatize {
 
 }
 
+# For graph serialization. Return a string representation of the associated
+# reading lexemes.
+sub _serialize_lexemes {
+	my $self = shift;
+	return Dump( [ $self->lexemes ] );
+}
+		
+
 ## Utility methods
 
 sub TO_JSON {