allow wordforms to be cleared out
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading / Lexeme.pm
CommitLineData
cca4f996 1package Text::Tradition::Collation::Reading::Lexeme;
2
3use Moose;
7604424b 4use JSON ();
cca4f996 5use Module::Load;
5ac9acd8 6use Text::Tradition::Collation::Reading::WordForm;
dd007c4d 7use Text::Tradition::Error;
cca4f996 8
9=head1 NAME
10
11Text::Tradition::Collation::Reading::Lexeme - represents the components of
12a Reading.
13
14=head1 DESCRIPTION
15
16Text::Tradition is a library for representation and analysis of collated
17texts, particularly medieval ones. A word form is used for the analysis of
18Reading objects; it consists of a lemma, a language, and a code to
19represent its part of speech. In general the word forms for a particular
20language should be read from / written to some morphological database.
21
22=head1 METHODS
23
24=head2 new
25
26Creates a new lexeme from the passed options.
27
28=head2 language
29
30Returns the language to which this lexeme belongs.
31
32=head2 normalized
33
34Returns the canonical string version of this lexeme.
35
36=head2 matches
37
38Returns the number of possible word forms for this lexeme, as drawn from
39the appropriate database.
40
41=head2 matching_forms
42
43Returns an array of the possible word forms for this lexeme.
44
45=head2 matching_form( $index )
46
47Returns the form at $index in the list of matching forms.
48
49=head2 is_disambiguated
50
51Returns true if a single wordform has been picked as 'correct' for this
52lexeme in its context.
53
54=head2 form
55
56Returns the correct word form (if any has been selected) for the lexeme in
57its context.
58
59=cut
60
61# TODO need to be able to populate this from DB
62has 'language' => (
63 is => 'ro',
64 isa => 'Str',
65 required => 1,
66 );
67
68has 'string' => (
69 is => 'rw',
70 isa => 'Str',
71 required => 1,
72 );
73
74has 'wordform_matchlist' => (
75 isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]',
76 traits => ['Array'],
77 handles => {
78 'matches' => 'count',
79 'matching_forms' => 'elements',
80 'matching_form' => 'get',
81 'add_matching_form' => 'push',
6be2fbcb 82 'clear_matching_forms' => 'clear',
d3e7842a 83 },
5ac9acd8 84 default => sub { [] },
cca4f996 85 );
86
87has 'is_disambiguated' => (
d3e7842a 88 is => 'rw',
cca4f996 89 isa => 'Bool',
90 default => undef,
cca4f996 91 );
92
93has 'form' => (
94 is => 'ro',
95 isa => 'Text::Tradition::Collation::Reading::WordForm',
96 writer => '_set_form',
97 );
98
7604424b 99around BUILDARGS => sub {
100 my $orig = shift;
101 my $class = shift;
102 my $args = @_ == 1 ? $_[0] : { @_ };
103 if( exists $args->{JSON} ) {
104 my $data = $args->{JSON};
105 if( exists $data->{'form'} && $data->{'form'} ) {
106 my $form = Text::Tradition::Collation::Reading::WordForm->new(
107 'JSON' => $data->{'form'} );
108 $data->{'form'} = $form;
109 }
110 if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) {
111 my @ml;
112 foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) {
113 push( @ml, Text::Tradition::Collation::Reading::WordForm->new(
114 'JSON' => $wfjson ) );
115 }
116 $data->{'wordform_matchlist'} = \@ml;
117 }
118 $args = $data;
119 }
120 $class->$orig( $args );
121};
122
d3e7842a 123# Do auto-disambiguation if we were created with a single wordform
124sub BUILD {
125 my $self = shift;
126
127 if( $self->matches == 1 ) {
128 $self->disambiguate( 0 );
129 }
130}
cca4f996 131
da83693e 132around 'add_matching_form' => sub {
133 my $orig = shift;
134 my $self = shift;
135 my @realargs;
136 foreach my $a ( @_ ) {
137 if( ref( $a ) ) {
138 push( @realargs, $a );
139 } else {
140 # Make the wordform from the string
141 my $wf = Text::Tradition::Collation::Reading::WordForm->new(
142 'JSON' => $a );
143 push( @realargs, $wf );
144 }
145 }
146 return $self->$orig( @realargs );
147};
148
cca4f996 149=head2 disambiguate( $index )
150
151Selects the word form at $index in the list of matching forms, and asserts
152that this is the correct form for the lexeme.
153
154=cut
155
156sub disambiguate {
157 my( $self, $idx ) = @_;
158 my $form = $self->matching_form( $idx );
159 throw( "There is no candidate wordform at index $idx" )
160 unless $form;
161 $self->_set_form( $form );
d3e7842a 162 $self->is_disambiguated( 1 );
cca4f996 163}
164
da83693e 165=head2 has_form( $rep )
166
167Returns the index of the matching form whose string representation is in $rep,
168or else undef if none is found.
169
170=cut
171
172sub has_form {
173 my( $self, $rep ) = @_;
174 my $i = 0;
175 foreach my $mf ( $self->matching_forms ) {
176 my $struct = $mf->TO_JSON;
177 return $i if $struct eq $rep;
178 $i++;
179 }
180 return undef;
181}
182
183
7604424b 184sub TO_JSON {
cca4f996 185 my $self = shift;
7604424b 186 my $hash = {};
187 # Do the scalar keys
188 map { $hash->{$_} = $self->$_ if defined $self->$_ }
189 qw/ language string is_disambiguated form /;
190 $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches;
191 return $hash;
cca4f996 192}
dd007c4d 193
194sub throw {
195 Text::Tradition::Error->throw(
196 'ident' => 'Lexeme error',
197 'message' => $_[0],
198 );
199}
200
cca4f996 201no Moose;
202__PACKAGE__->meta->make_immutable;
203
2041;
205
206=head1 LICENSE
207
208This package is free software and is provided "as is" without express
209or implied warranty. You can redistribute it and/or modify it under
210the same terms as Perl itself.
211
212=head1 AUTHOR
213
214Tara L Andrews E<lt>aurum@cpan.orgE<gt>