1 package Text::Tradition::Morphology;
5 use JSON qw/ from_json /;
8 use Text::Tradition::Collation::Reading::Lexeme;
10 use vars qw/ $VERSION /;
15 Text::Tradition::Morphology - add-on to associate lemma and part-of-speech
16 information to Text::Tradition::Collation::Reading objects
20 requires 'is_identical', 'is_combinable', '_combine';
22 has 'grammar_invalid' => (
28 has 'is_nonsense' => (
34 has 'normal_form' => (
37 predicate => '_has_normal_form',
38 clearer => '_clear_normal_form',
41 # Holds the lexemes for the reading.
42 has 'reading_lexemes' => (
44 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
47 lexemes => 'elements',
48 has_lexemes => 'count',
49 clear_lexemes => 'clear',
52 default => sub { [] },
57 # Make normal_form default to text, transparently.
58 around 'normal_form' => sub {
62 if( $arg && $arg eq $self->text ) {
63 $self->_clear_normal_form;
65 } elsif( !$arg && !$self->_has_normal_form ) {
74 Methods for the morphological information (if any) attached to readings.
75 A reading may be made up of multiple lexemes; the concatenated lexeme
76 strings ought to match the reading's normalized form.
78 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
79 on Lexeme objects and their attributes.
83 Returns a true value if the reading has any attached lexemes.
87 Returns the Lexeme objects (if any) attached to the reading.
91 Wipes any associated Lexeme objects out of the reading.
93 =head2 add_lexeme( $lexobj )
95 Adds the Lexeme in $lexobj to the list of lexemes.
99 If the language of the reading is set, this method will use the appropriate
100 Language model to determine the lexemes that belong to this reading. See
101 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
107 unless( $self->has_language ) {
108 warn "Please set a language to lemmatize a tradition";
111 my $mod = "Text::Tradition::Language::" . $self->language;
113 $mod->can( 'reading_lookup' )->( $self );
117 # For graph serialization. Return a JSON representation of the associated
119 sub _serialize_lexemes {
121 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
122 return $json->encode( [ $self->lexemes ] );
125 # Given a JSON representation of the lexemes, instantiate them and add
126 # them to the reading.
127 sub _deserialize_lexemes {
128 my( $self, $json ) = @_;
129 my $data = from_json( $json );
130 return unless @$data;
133 foreach my $lexhash ( @$data ) {
134 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
135 'JSON' => $lexhash ) );
137 $self->clear_lexemes;
138 $self->add_lexeme( @lexemes );
143 return 0 unless $self->has_lexemes;
144 return !grep { !$_->is_disambiguated } $self->lexemes;
149 # While we are here, get rid of any extra wordforms from a disambiguated
151 if( $self->disambiguated ) {
152 foreach my $lex ( $self->lexemes ) {
153 $lex->clear_matching_forms();
154 $lex->add_matching_form( $lex->form );
159 around 'is_identical' => sub {
163 # If the base class returns true, do an extra check to make sure the
164 # lexemes also match.
165 my $answer = $self->$orig( $other );
167 if( $self->disambiguated && $other->disambiguated ) {
168 my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
169 my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
170 $answer = undef unless $rform eq $uform;
171 } elsif( $self->disambiguated xor $other->disambiguated ) {
178 around 'is_combinable' => sub {
181 # If the reading is marked with invalid grammar or as a nonsense reading,
182 # it is no longer combinable.
183 return undef if $self->grammar_invalid || $self->is_nonsense;
184 return $self->$orig();
187 after '_combine' => sub {
192 join( $joinstr, $self->normal_form, $other->normal_form ) );
193 # Combine the lexemes present in the readings
194 if( $self->has_lexemes && $other->has_lexemes ) {
195 $self->add_lexeme( $other->lexemes );
203 This package is free software and is provided "as is" without express
204 or implied warranty. You can redistribute it and/or modify it under
205 the same terms as Perl itself.
209 Tara L Andrews E<lt>aurum@cpan.orgE<gt>