1 package Text::Tradition::Morphology;
5 use JSON qw/ from_json /;
8 use Text::Tradition::Collation::Reading::Lexeme;
10 use vars qw/ $VERSION /;
15 Text::Tradition::Morphology - morphology plugin for Text::Tradition
19 The Text::Tradition::Morphology package enables lemma and part-of-speech
20 information for traditions and their Reading objects. This distribution
21 includes the L<Text::Tradition::Language> role for Traditions, the
22 L<Text::Tradition::Morphology> role (this package) for Readings, and a set
23 of Language::* modules for language-specific lemmatization.
25 See L<Text::Tradition::Collation::Reading::Lexeme> for more about the
26 morphology object structure.
30 requires 'is_identical', 'is_combinable', '_combine';
35 predicate => 'has_language',
38 has 'grammar_invalid' => (
44 has 'is_nonsense' => (
50 has 'normal_form' => (
53 predicate => '_has_normal_form',
54 clearer => '_clear_normal_form',
57 # Holds the lexemes for the reading.
58 has 'reading_lexemes' => (
60 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
63 lexemes => 'elements',
64 has_lexemes => 'count',
65 clear_lexemes => 'clear',
68 default => sub { [] },
73 # Make normal_form default to text, transparently.
74 around 'normal_form' => sub {
78 if( $arg && $arg eq $self->text ) {
79 $self->_clear_normal_form;
81 } elsif( !$arg && !$self->_has_normal_form ) {
88 =head1 READING METHODS
90 Methods for the morphological information (if any) attached to readings.
91 A reading may be made up of multiple lexemes; the concatenated lexeme
92 strings ought to match the reading's normalized form.
94 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
95 on Lexeme objects and their attributes.
99 Returns a true value if the reading has any attached lexemes.
103 Returns the Lexeme objects (if any) attached to the reading.
107 Wipes any associated Lexeme objects out of the reading.
109 =head2 add_lexeme( $lexobj )
111 Adds the Lexeme in $lexobj to the list of lexemes.
115 If the language of the reading is set, this method will use the appropriate
116 Language model to determine the lexemes that belong to this reading. See
117 L<Text::Tradition::Language::lemmatize> if you wish to lemmatize an entire tradition.
123 unless( $self->has_language ) {
124 warn "Please set a language to lemmatize a tradition";
127 my $mod = "Text::Tradition::Language::" . $self->language;
129 $mod->can( 'reading_lookup' )->( $self );
133 # For graph serialization. Return a JSON representation of the associated
135 sub _serialize_lexemes {
137 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
138 return $json->encode( [ $self->lexemes ] );
141 # Given a JSON representation of the lexemes, instantiate them and add
142 # them to the reading.
143 sub _deserialize_lexemes {
144 my( $self, $json ) = @_;
145 my $data = from_json( $json );
146 return unless @$data;
149 foreach my $lexhash ( @$data ) {
150 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
151 'JSON' => $lexhash ) );
153 $self->clear_lexemes;
154 $self->add_lexeme( @lexemes );
159 return 0 unless $self->has_lexemes;
160 return !grep { !$_->is_disambiguated } $self->lexemes;
165 # While we are here, get rid of any extra wordforms from a disambiguated
167 if( $self->disambiguated ) {
168 foreach my $lex ( $self->lexemes ) {
169 $lex->clear_matching_forms();
170 $lex->add_matching_form( $lex->form );
177 Call the 'regularize' function of the appropriate language model on our
178 own reading text. This is a rules-based function distinct from 'normal_form',
179 which can be set to any arbitrary string.
183 # TODO Test this stuff
187 if( $self->has_language ) {
188 # If we do have a language, regularize the tokens in $answer.
189 my $mod = 'Text::Tradition::Language::' . $self->language;
190 eval { load( $mod ); };
191 # If a module doesn't exist for our language, use the base routine
192 $mod = 'Text::Tradition::Language::Base' if $@;
193 return $mod->can( 'regularize' )->( $self->text );
199 around 'is_identical' => sub {
203 # If the base class returns true, do an extra check to make sure the
204 # lexemes also match.
205 my $answer = $self->$orig( $other );
207 if( $self->disambiguated && $other->disambiguated ) {
208 my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
209 my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
210 $answer = undef unless $rform eq $uform;
211 } elsif( $self->disambiguated xor $other->disambiguated ) {
218 around 'is_combinable' => sub {
221 # If the reading is marked with invalid grammar or as a nonsense reading,
222 # it is no longer combinable.
223 return undef if $self->grammar_invalid || $self->is_nonsense;
224 return $self->$orig();
227 after '_combine' => sub {
232 join( $joinstr, $self->normal_form, $other->normal_form ) );
233 # Combine the lexemes present in the readings
234 if( $self->has_lexemes && $other->has_lexemes ) {
235 $self->add_lexeme( $other->lexemes );
243 This package is free software and is provided "as is" without express
244 or implied warranty. You can redistribute it and/or modify it under
245 the same terms as Perl itself.
249 Tara L Andrews E<lt>aurum@cpan.orgE<gt>