1 package Text::Tradition::Morphology;
5 use JSON qw/ from_json /;
8 use Text::Tradition::Collation::Reading::Lexeme;
10 use vars qw/ $VERSION /;
15 Text::Tradition::Morphology - morphology plugin for Text::Tradition
19 The Text::Tradition::Morphology package enables lemma and part-of-speech
20 information for traditions and their Reading objects. This distribution
21 includes the L<Text::Tradition::Language> role for Traditions, the
22 L<Text::Tradition::Morphology> role (this package) for Readings, and a set
23 of Language::* modules for language-specific lemmatization.
25 See L<Text::Tradition::Collation::Reading::Lexeme> for more about the
26 morphology object structure.
30 requires 'is_identical', 'is_combinable', '_combine';
32 has 'grammar_invalid' => (
38 has 'is_nonsense' => (
44 has 'normal_form' => (
47 predicate => '_has_normal_form',
48 clearer => '_clear_normal_form',
51 # Holds the lexemes for the reading.
52 has 'reading_lexemes' => (
54 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
57 lexemes => 'elements',
58 has_lexemes => 'count',
59 clear_lexemes => 'clear',
62 default => sub { [] },
67 # Make normal_form default to text, transparently.
68 around 'normal_form' => sub {
72 if( $arg && $arg eq $self->text ) {
73 $self->_clear_normal_form;
75 } elsif( !$arg && !$self->_has_normal_form ) {
82 =head1 READING METHODS
84 Methods for the morphological information (if any) attached to readings.
85 A reading may be made up of multiple lexemes; the concatenated lexeme
86 strings ought to match the reading's normalized form.
88 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
89 on Lexeme objects and their attributes.
93 Returns a true value if the reading has any attached lexemes.
97 Returns the Lexeme objects (if any) attached to the reading.
101 Wipes any associated Lexeme objects out of the reading.
103 =head2 add_lexeme( $lexobj )
105 Adds the Lexeme in $lexobj to the list of lexemes.
109 If the language of the reading is set, this method will use the appropriate
110 Language model to determine the lexemes that belong to this reading. See
111 L<Text::Tradition::Language::lemmatize> if you wish to lemmatize an entire tradition.
117 unless( $self->has_language ) {
118 warn "Please set a language to lemmatize a tradition";
121 my $mod = "Text::Tradition::Language::" . $self->language;
123 $mod->can( 'reading_lookup' )->( $self );
127 # For graph serialization. Return a JSON representation of the associated
129 sub _serialize_lexemes {
131 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
132 return $json->encode( [ $self->lexemes ] );
135 # Given a JSON representation of the lexemes, instantiate them and add
136 # them to the reading.
137 sub _deserialize_lexemes {
138 my( $self, $json ) = @_;
139 my $data = from_json( $json );
140 return unless @$data;
143 foreach my $lexhash ( @$data ) {
144 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
145 'JSON' => $lexhash ) );
147 $self->clear_lexemes;
148 $self->add_lexeme( @lexemes );
153 return 0 unless $self->has_lexemes;
154 return !grep { !$_->is_disambiguated } $self->lexemes;
159 # While we are here, get rid of any extra wordforms from a disambiguated
161 if( $self->disambiguated ) {
162 foreach my $lex ( $self->lexemes ) {
163 $lex->clear_matching_forms();
164 $lex->add_matching_form( $lex->form );
169 around 'is_identical' => sub {
173 # If the base class returns true, do an extra check to make sure the
174 # lexemes also match.
175 my $answer = $self->$orig( $other );
177 if( $self->disambiguated && $other->disambiguated ) {
178 my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
179 my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
180 $answer = undef unless $rform eq $uform;
181 } elsif( $self->disambiguated xor $other->disambiguated ) {
188 around 'is_combinable' => sub {
191 # If the reading is marked with invalid grammar or as a nonsense reading,
192 # it is no longer combinable.
193 return undef if $self->grammar_invalid || $self->is_nonsense;
194 return $self->$orig();
197 after '_combine' => sub {
202 join( $joinstr, $self->normal_form, $other->normal_form ) );
203 # Combine the lexemes present in the readings
204 if( $self->has_lexemes && $other->has_lexemes ) {
205 $self->add_lexeme( $other->lexemes );
213 This package is free software and is provided "as is" without express
214 or implied warranty. You can redistribute it and/or modify it under
215 the same terms as Perl itself.
219 Tara L Andrews E<lt>aurum@cpan.orgE<gt>