fix Lexeme to handle empty wordform list input
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading / Lexeme.pm
CommitLineData
cca4f996 1package Text::Tradition::Collation::Reading::Lexeme;
2
3use Moose;
7604424b 4use JSON ();
cca4f996 5use Module::Load;
5ac9acd8 6use Text::Tradition::Collation::Reading::WordForm;
cca4f996 7
8=head1 NAME
9
10Text::Tradition::Collation::Reading::Lexeme - represents the components of
11a Reading.
12
13=head1 DESCRIPTION
14
15Text::Tradition is a library for representation and analysis of collated
16texts, particularly medieval ones. A word form is used for the analysis of
17Reading objects; it consists of a lemma, a language, and a code to
18represent its part of speech. In general the word forms for a particular
19language should be read from / written to some morphological database.
20
21=head1 METHODS
22
23=head2 new
24
25Creates a new lexeme from the passed options.
26
27=head2 language
28
29Returns the language to which this lexeme belongs.
30
31=head2 normalized
32
33Returns the canonical string version of this lexeme.
34
35=head2 matches
36
37Returns the number of possible word forms for this lexeme, as drawn from
38the appropriate database.
39
40=head2 matching_forms
41
42Returns an array of the possible word forms for this lexeme.
43
44=head2 matching_form( $index )
45
46Returns the form at $index in the list of matching forms.
47
48=head2 is_disambiguated
49
50Returns true if a single wordform has been picked as 'correct' for this
51lexeme in its context.
52
53=head2 form
54
55Returns the correct word form (if any has been selected) for the lexeme in
56its context.
57
58=cut
59
60# TODO need to be able to populate this from DB
61has 'language' => (
62 is => 'ro',
63 isa => 'Str',
64 required => 1,
65 );
66
67has 'string' => (
68 is => 'rw',
69 isa => 'Str',
70 required => 1,
71 );
72
73has 'wordform_matchlist' => (
74 isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]',
75 traits => ['Array'],
76 handles => {
77 'matches' => 'count',
78 'matching_forms' => 'elements',
79 'matching_form' => 'get',
80 'add_matching_form' => 'push',
d3e7842a 81 },
5ac9acd8 82 default => sub { [] },
cca4f996 83 );
84
85has 'is_disambiguated' => (
d3e7842a 86 is => 'rw',
cca4f996 87 isa => 'Bool',
88 default => undef,
cca4f996 89 );
90
91has 'form' => (
92 is => 'ro',
93 isa => 'Text::Tradition::Collation::Reading::WordForm',
94 writer => '_set_form',
95 );
96
7604424b 97around BUILDARGS => sub {
98 my $orig = shift;
99 my $class = shift;
100 my $args = @_ == 1 ? $_[0] : { @_ };
101 if( exists $args->{JSON} ) {
102 my $data = $args->{JSON};
103 if( exists $data->{'form'} && $data->{'form'} ) {
104 my $form = Text::Tradition::Collation::Reading::WordForm->new(
105 'JSON' => $data->{'form'} );
106 $data->{'form'} = $form;
107 }
108 if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) {
109 my @ml;
110 foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) {
111 push( @ml, Text::Tradition::Collation::Reading::WordForm->new(
112 'JSON' => $wfjson ) );
113 }
114 $data->{'wordform_matchlist'} = \@ml;
115 }
116 $args = $data;
117 }
118 $class->$orig( $args );
119};
120
d3e7842a 121# Do auto-disambiguation if we were created with a single wordform
122sub BUILD {
123 my $self = shift;
124
125 if( $self->matches == 1 ) {
126 $self->disambiguate( 0 );
127 }
128}
cca4f996 129
da83693e 130around 'add_matching_form' => sub {
131 my $orig = shift;
132 my $self = shift;
133 my @realargs;
134 foreach my $a ( @_ ) {
135 if( ref( $a ) ) {
136 push( @realargs, $a );
137 } else {
138 # Make the wordform from the string
139 my $wf = Text::Tradition::Collation::Reading::WordForm->new(
140 'JSON' => $a );
141 push( @realargs, $wf );
142 }
143 }
144 return $self->$orig( @realargs );
145};
146
cca4f996 147=head2 disambiguate( $index )
148
149Selects the word form at $index in the list of matching forms, and asserts
150that this is the correct form for the lexeme.
151
152=cut
153
154sub disambiguate {
155 my( $self, $idx ) = @_;
156 my $form = $self->matching_form( $idx );
157 throw( "There is no candidate wordform at index $idx" )
158 unless $form;
159 $self->_set_form( $form );
d3e7842a 160 $self->is_disambiguated( 1 );
cca4f996 161}
162
da83693e 163=head2 has_form( $rep )
164
165Returns the index of the matching form whose string representation is in $rep,
166or else undef if none is found.
167
168=cut
169
170sub has_form {
171 my( $self, $rep ) = @_;
172 my $i = 0;
173 foreach my $mf ( $self->matching_forms ) {
174 my $struct = $mf->TO_JSON;
175 return $i if $struct eq $rep;
176 $i++;
177 }
178 return undef;
179}
180
181
7604424b 182sub TO_JSON {
cca4f996 183 my $self = shift;
7604424b 184 my $hash = {};
185 # Do the scalar keys
186 map { $hash->{$_} = $self->$_ if defined $self->$_ }
187 qw/ language string is_disambiguated form /;
188 $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches;
189 return $hash;
cca4f996 190}
cca4f996 191no Moose;
192__PACKAGE__->meta->make_immutable;
193
1941;
195
196=head1 LICENSE
197
198This package is free software and is provided "as is" without express
199or implied warranty. You can redistribute it and/or modify it under
200the same terms as Perl itself.
201
202=head1 AUTHOR
203
204Tara L Andrews E<lt>aurum@cpan.orgE<gt>