make no-language-module warning less irritating
[scpubgit/stemmatology.git] / morphology / lib / Text / Tradition / Morphology.pm
CommitLineData
a445ce40 1package Text::Tradition::Morphology;
2
3use strict;
4use warnings;
5use JSON qw/ from_json /;
6use Moose::Role;
7use Module::Load;
8use Text::Tradition::Collation::Reading::Lexeme;
9
483478bf 10use vars qw/ $VERSION /;
11$VERSION = "0.1";
12
a445ce40 13=head1 NAME
14
8943ff68 15Text::Tradition::Morphology - morphology plugin for Text::Tradition
16
17=head1 DESCRIPTION
18
19The Text::Tradition::Morphology package enables lemma and part-of-speech
20information for traditions and their Reading objects. This distribution
21includes the L<Text::Tradition::Language> role for Traditions, the
22L<Text::Tradition::Morphology> role (this package) for Readings, and a set
23of Language::* modules for language-specific lemmatization.
24
25See L<Text::Tradition::Collation::Reading::Lexeme> for more about the
26morphology object structure.
a445ce40 27
28=cut
29
332750fc 30requires 'is_identical', 'is_combinable', '_combine';
31
58f9c2b9 32has 'language' => (
33 is => 'ro',
34 isa => 'Str',
35 predicate => 'has_language',
36 );
37
a445ce40 38has 'grammar_invalid' => (
39 is => 'rw',
40 isa => 'Bool',
41 default => undef,
42 );
43
44has 'is_nonsense' => (
45 is => 'rw',
46 isa => 'Bool',
47 default => undef,
48 );
49
50has 'normal_form' => (
51 is => 'rw',
52 isa => 'Str',
53 predicate => '_has_normal_form',
54 clearer => '_clear_normal_form',
55 );
56
57# Holds the lexemes for the reading.
58has 'reading_lexemes' => (
59 traits => ['Array'],
60 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
61 handles => {
62 lexeme => 'get',
63 lexemes => 'elements',
64 has_lexemes => 'count',
65 clear_lexemes => 'clear',
66 add_lexeme => 'push',
67 },
68 default => sub { [] },
69 );
70
71
72
73# Make normal_form default to text, transparently.
74around 'normal_form' => sub {
75 my $orig = shift;
76 my $self = shift;
77 my( $arg ) = @_;
78 if( $arg && $arg eq $self->text ) {
79 $self->_clear_normal_form;
80 return $arg;
81 } elsif( !$arg && !$self->_has_normal_form ) {
82 return $self->text;
83 } else {
84 $self->$orig( @_ );
85 }
86};
87
8943ff68 88=head1 READING METHODS
a445ce40 89
90Methods for the morphological information (if any) attached to readings.
91A reading may be made up of multiple lexemes; the concatenated lexeme
92strings ought to match the reading's normalized form.
93
94See L<Text::Tradition::Collation::Reading::Lexeme> for more information
95on Lexeme objects and their attributes.
96
97=head2 has_lexemes
98
99Returns a true value if the reading has any attached lexemes.
100
101=head2 lexemes
102
103Returns the Lexeme objects (if any) attached to the reading.
104
105=head2 clear_lexemes
106
107Wipes any associated Lexeme objects out of the reading.
108
109=head2 add_lexeme( $lexobj )
110
111Adds the Lexeme in $lexobj to the list of lexemes.
112
113=head2 lemmatize
114
115If the language of the reading is set, this method will use the appropriate
116Language model to determine the lexemes that belong to this reading. See
8943ff68 117L<Text::Tradition::Language::lemmatize> if you wish to lemmatize an entire tradition.
a445ce40 118
119=cut
120
121sub lemmatize {
122 my $self = shift;
123 unless( $self->has_language ) {
124 warn "Please set a language to lemmatize a tradition";
125 return;
126 }
127 my $mod = "Text::Tradition::Language::" . $self->language;
128 load( $mod );
129 $mod->can( 'reading_lookup' )->( $self );
130
131}
132
133# For graph serialization. Return a JSON representation of the associated
134# reading lexemes.
135sub _serialize_lexemes {
136 my $self = shift;
137 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
138 return $json->encode( [ $self->lexemes ] );
139}
140
141# Given a JSON representation of the lexemes, instantiate them and add
142# them to the reading.
143sub _deserialize_lexemes {
144 my( $self, $json ) = @_;
145 my $data = from_json( $json );
146 return unless @$data;
147
148 my @lexemes;
149 foreach my $lexhash ( @$data ) {
150 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
151 'JSON' => $lexhash ) );
152 }
153 $self->clear_lexemes;
154 $self->add_lexeme( @lexemes );
155}
156
157sub disambiguated {
158 my $self = shift;
159 return 0 unless $self->has_lexemes;
160 return !grep { !$_->is_disambiguated } $self->lexemes;
161}
162
163sub filter_lexemes {
164 my $self = shift;
165 # While we are here, get rid of any extra wordforms from a disambiguated
166 # reading.
167 if( $self->disambiguated ) {
168 foreach my $lex ( $self->lexemes ) {
169 $lex->clear_matching_forms();
170 $lex->add_matching_form( $lex->form );
171 }
172 }
173}
174
58f9c2b9 175=head2 regularize
176
177Call the 'regularize' function of the appropriate language model on our
178own reading text. This is a rules-based function distinct from 'normal_form',
179which can be set to any arbitrary string.
180
181=cut
182
183# TODO Test this stuff
184
185sub regularize {
186 my $self = shift;
187 if( $self->has_language ) {
188 # If we do have a language, regularize the tokens in $answer.
189 my $mod = 'Text::Tradition::Language::' . $self->language;
58f9c2b9 190 eval { load( $mod ); };
191 # If a module doesn't exist for our language, use the base routine
192 $mod = 'Text::Tradition::Language::Base' if $@;
193 return $mod->can( 'regularize' )->( $self->text );
194 } else {
195 return $self->text;
196 }
197}
198
a445ce40 199around 'is_identical' => sub {
200 my $orig = shift;
201 my $self = shift;
202 my $other = shift;
203 # If the base class returns true, do an extra check to make sure the
204 # lexemes also match.
205 my $answer = $self->$orig( $other );
206 if( $answer ) {
207 if( $self->disambiguated && $other->disambiguated ) {
208 my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
209 my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
210 $answer = undef unless $rform eq $uform;
211 } elsif( $self->disambiguated xor $other->disambiguated ) {
212 $answer = undef;
213 }
214 }
215 return $answer;
216};
217
218around 'is_combinable' => sub {
219 my $orig = shift;
220 my $self = shift;
221 # If the reading is marked with invalid grammar or as a nonsense reading,
222 # it is no longer combinable.
223 return undef if $self->grammar_invalid || $self->is_nonsense;
224 return $self->$orig();
225};
226
227after '_combine' => sub {
228 my $self = shift;
229 my $other = shift;
230 my $joinstr = shift;
231 $self->normal_form(
232 join( $joinstr, $self->normal_form, $other->normal_form ) );
233 # Combine the lexemes present in the readings
234 if( $self->has_lexemes && $other->has_lexemes ) {
235 $self->add_lexeme( $other->lexemes );
236 }
237};
238
e92d4229 2391;
240
241=head1 LICENSE
242
243This package is free software and is provided "as is" without express
244or implied warranty. You can redistribute it and/or modify it under
245the same terms as Perl itself.
246
247=head1 AUTHOR
248
249Tara L Andrews E<lt>aurum@cpan.orgE<gt>