1 package Text::Tradition::Morphology;
5 use JSON qw/ from_json /;
8 use Text::Tradition::Collation::Reading::Lexeme;
12 Text::Tradition::Morphology - add-on to associate lemma and part-of-speech
13 information to Text::Tradition::Collation::Reading objects
17 has 'grammar_invalid' => (
23 has 'is_nonsense' => (
29 has 'normal_form' => (
32 predicate => '_has_normal_form',
33 clearer => '_clear_normal_form',
36 # Holds the lexemes for the reading.
37 has 'reading_lexemes' => (
39 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
42 lexemes => 'elements',
43 has_lexemes => 'count',
44 clear_lexemes => 'clear',
47 default => sub { [] },
52 # Make normal_form default to text, transparently.
53 around 'normal_form' => sub {
57 if( $arg && $arg eq $self->text ) {
58 $self->_clear_normal_form;
60 } elsif( !$arg && !$self->_has_normal_form ) {
69 Methods for the morphological information (if any) attached to readings.
70 A reading may be made up of multiple lexemes; the concatenated lexeme
71 strings ought to match the reading's normalized form.
73 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
74 on Lexeme objects and their attributes.
78 Returns a true value if the reading has any attached lexemes.
82 Returns the Lexeme objects (if any) attached to the reading.
86 Wipes any associated Lexeme objects out of the reading.
88 =head2 add_lexeme( $lexobj )
90 Adds the Lexeme in $lexobj to the list of lexemes.
94 If the language of the reading is set, this method will use the appropriate
95 Language model to determine the lexemes that belong to this reading. See
96 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
102 unless( $self->has_language ) {
103 warn "Please set a language to lemmatize a tradition";
106 my $mod = "Text::Tradition::Language::" . $self->language;
108 $mod->can( 'reading_lookup' )->( $self );
112 # For graph serialization. Return a JSON representation of the associated
114 sub _serialize_lexemes {
116 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
117 return $json->encode( [ $self->lexemes ] );
120 # Given a JSON representation of the lexemes, instantiate them and add
121 # them to the reading.
122 sub _deserialize_lexemes {
123 my( $self, $json ) = @_;
124 my $data = from_json( $json );
125 return unless @$data;
128 foreach my $lexhash ( @$data ) {
129 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
130 'JSON' => $lexhash ) );
132 $self->clear_lexemes;
133 $self->add_lexeme( @lexemes );
138 return 0 unless $self->has_lexemes;
139 return !grep { !$_->is_disambiguated } $self->lexemes;
144 # While we are here, get rid of any extra wordforms from a disambiguated
146 if( $self->disambiguated ) {
147 foreach my $lex ( $self->lexemes ) {
148 $lex->clear_matching_forms();
149 $lex->add_matching_form( $lex->form );
154 around 'is_identical' => sub {
158 # If the base class returns true, do an extra check to make sure the
159 # lexemes also match.
160 my $answer = $self->$orig( $other );
162 if( $self->disambiguated && $other->disambiguated ) {
163 my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
164 my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
165 $answer = undef unless $rform eq $uform;
166 } elsif( $self->disambiguated xor $other->disambiguated ) {
173 around 'is_combinable' => sub {
176 # If the reading is marked with invalid grammar or as a nonsense reading,
177 # it is no longer combinable.
178 return undef if $self->grammar_invalid || $self->is_nonsense;
179 return $self->$orig();
182 after '_combine' => sub {
187 join( $joinstr, $self->normal_form, $other->normal_form ) );
188 # Combine the lexemes present in the readings
189 if( $self->has_lexemes && $other->has_lexemes ) {
190 $self->add_lexeme( $other->lexemes );