naive serialization of lexems in GraphML

[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm

index 7a99102..0e78cbc 100644 (file)
--- a/lib/Text/Tradition/Collation/Reading.pm
+++ b/lib/Text/Tradition/Collation/Reading.pm
@@ -1,12 +1,15 @@
 package Text::Tradition::Collation::Reading;
 
 use Moose;
+use Module::Load;
+use YAML::XS;
 use overload '""' => \&_stringify, 'fallback' => 1;
 
 =head1 NAME
 
-Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
-    
+Text::Tradition::Collation::Reading - represents a reading (usually a word)
+in a collation.
+
 =head1 DESCRIPTION
 
 Text::Tradition is a library for representation and analysis of collated
@@ -19,12 +22,13 @@ a particular sequence
 
 =head2 new
 
-Creates a new reading in the given collation with the given attributes. 
+Creates a new reading in the given collation with the given attributes.
 Options include:
 
 =over 4
 
-=item collation - The Text::Tradition::Collation object to which this reading belongs.  Required.
+=item collation - The Text::Tradition::Collation object to which this
+reading belongs.  Required.
 
 =item id - A unique identifier for this reading. Required.
 
@@ -36,9 +40,11 @@ Options include:
 
 =item is_lacuna - The 'reading' represents a known gap in the text.
 
-=item is_ph - A temporary placeholder for apparatus parsing purposes.  Do not use unless you know what you are doing.
+=item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
+not use unless you know what you are doing.
 
-=item rank - The sequence number of the reading. This should probably not be set manually.
+=item rank - The sequence number of the reading. This should probably not
+be set manually.
 
 =back
 
@@ -82,6 +88,12 @@ has 'text' => (
        writer => 'alter_text',
        );
        
+has 'language' => (
+       is => 'ro',
+       isa => 'Str',
+       predicate => 'has_language',
+       );
+       
 has 'is_start' => (
        is => 'ro',
        isa => 'Bool',
@@ -116,7 +128,43 @@ has 'rank' => (
     is => 'rw',
     isa => 'Int',
     predicate => 'has_rank',
+    clearer => 'clear_rank',
     );
+    
+## For morphological analysis
+
+has 'normal_form' => (
+       is => 'rw',
+       isa => 'Str',
+       predicate => 'has_normal_form',
+       );
+
+# Holds the lexemes for the reading.
+has 'reading_lexemes' => (
+       traits => ['Array'],
+       isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
+       handles => {
+               lexemes => 'elements',
+               has_lexemes => 'count',
+               clear_lexemes => 'clear',
+               add_lexeme => 'push',
+               },
+       default => sub { [] },
+       );
+       
+## For prefix/suffix readings
+
+has 'join_prior' => (
+       is => 'ro',
+       isa => 'Bool',
+       default => undef,
+       );
+       
+has 'join_next' => (
+       is => 'ro',
+       isa => 'Bool',
+       default => undef,
+       );
 
 
 around BUILDARGS => sub {
@@ -226,13 +274,69 @@ sub _stringify {
        return $self->id;
 }
 
+=head1 MORPHOLOGY
+
+Methods for the morphological information (if any) attached to readings.
+A reading may be made up of multiple lexemes; the concatenated lexeme
+strings ought to match the reading's normalized form.
+ 
+See L<Text::Tradition::Collation::Reading::Lexeme> for more information
+on Lexeme objects and their attributes.
+
+=head2 has_lexemes
+
+Returns a true value if the reading has any attached lexemes.
+
+=head2 lexemes
+
+Returns the Lexeme objects (if any) attached to the reading.
+
+=head2 clear_lexemes
+
+Wipes any associated Lexeme objects out of the reading.
+
+=head2 add_lexeme( $lexobj )
+
+Adds the Lexeme in $lexobj to the list of lexemes.
+
+=head2 lemmatize
+
+If the language of the reading is set, this method will use the appropriate
+Language model to determine the lexemes that belong to this reading.  See
+L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
+
+=cut
+
+sub lemmatize {
+       my $self = shift;
+       unless( $self->has_language ) {
+               warn "Please set a language to lemmatize a tradition";
+               return;
+       }
+       my $mod = "Text::Tradition::Language::" . $self->language;
+       load( $mod );
+       $mod->can( 'reading_lookup' )->( $self );
+
+}
+
+# For graph serialization. Return a string representation of the associated
+# reading lexemes.
+sub _serialize_lexemes {
+       my $self = shift;
+       return Dump( [ $self->lexemes ] );
+}
+               
+
+## Utility methods
+
 sub TO_JSON {
        my $self = shift;
        return $self->text;
 }
 
+## TODO will need a throw() here
+
 no Moose;
 __PACKAGE__->meta->make_immutable;
 
 1;
-