Commit | Line | Data |
cca4f996 |
1 | package Text::Tradition::Collation::Reading::Lexeme; |
2 | |
3 | use Moose; |
7604424b |
4 | use JSON (); |
cca4f996 |
5 | use Module::Load; |
5ac9acd8 |
6 | use Text::Tradition::Collation::Reading::WordForm; |
dd007c4d |
7 | use Text::Tradition::Error; |
cca4f996 |
8 | |
9 | =head1 NAME |
10 | |
11 | Text::Tradition::Collation::Reading::Lexeme - represents the components of |
12 | a Reading. |
13 | |
14 | =head1 DESCRIPTION |
15 | |
16 | Text::Tradition is a library for representation and analysis of collated |
17 | texts, particularly medieval ones. A word form is used for the analysis of |
18 | Reading objects; it consists of a lemma, a language, and a code to |
19 | represent its part of speech. In general the word forms for a particular |
20 | language should be read from / written to some morphological database. |
21 | |
22 | =head1 METHODS |
23 | |
24 | =head2 new |
25 | |
26 | Creates a new lexeme from the passed options. |
27 | |
28 | =head2 language |
29 | |
30 | Returns the language to which this lexeme belongs. |
31 | |
32 | =head2 normalized |
33 | |
34 | Returns the canonical string version of this lexeme. |
35 | |
36 | =head2 matches |
37 | |
38 | Returns the number of possible word forms for this lexeme, as drawn from |
39 | the appropriate database. |
40 | |
41 | =head2 matching_forms |
42 | |
43 | Returns an array of the possible word forms for this lexeme. |
44 | |
45 | =head2 matching_form( $index ) |
46 | |
47 | Returns the form at $index in the list of matching forms. |
48 | |
49 | =head2 is_disambiguated |
50 | |
51 | Returns true if a single wordform has been picked as 'correct' for this |
52 | lexeme in its context. |
53 | |
54 | =head2 form |
55 | |
56 | Returns the correct word form (if any has been selected) for the lexeme in |
57 | its context. |
58 | |
59 | =cut |
60 | |
61 | # TODO need to be able to populate this from DB |
62 | has 'language' => ( |
63 | is => 'ro', |
64 | isa => 'Str', |
65 | required => 1, |
66 | ); |
67 | |
68 | has 'string' => ( |
69 | is => 'rw', |
70 | isa => 'Str', |
71 | required => 1, |
72 | ); |
73 | |
74 | has 'wordform_matchlist' => ( |
75 | isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]', |
76 | traits => ['Array'], |
77 | handles => { |
78 | 'matches' => 'count', |
79 | 'matching_forms' => 'elements', |
80 | 'matching_form' => 'get', |
81 | 'add_matching_form' => 'push', |
6be2fbcb |
82 | 'clear_matching_forms' => 'clear', |
d3e7842a |
83 | }, |
5ac9acd8 |
84 | default => sub { [] }, |
cca4f996 |
85 | ); |
86 | |
87 | has 'is_disambiguated' => ( |
d3e7842a |
88 | is => 'rw', |
cca4f996 |
89 | isa => 'Bool', |
90 | default => undef, |
cca4f996 |
91 | ); |
92 | |
93 | has 'form' => ( |
94 | is => 'ro', |
95 | isa => 'Text::Tradition::Collation::Reading::WordForm', |
96 | writer => '_set_form', |
97 | ); |
98 | |
7604424b |
99 | around BUILDARGS => sub { |
100 | my $orig = shift; |
101 | my $class = shift; |
102 | my $args = @_ == 1 ? $_[0] : { @_ }; |
103 | if( exists $args->{JSON} ) { |
104 | my $data = $args->{JSON}; |
105 | if( exists $data->{'form'} && $data->{'form'} ) { |
106 | my $form = Text::Tradition::Collation::Reading::WordForm->new( |
107 | 'JSON' => $data->{'form'} ); |
108 | $data->{'form'} = $form; |
109 | } |
110 | if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) { |
111 | my @ml; |
112 | foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) { |
113 | push( @ml, Text::Tradition::Collation::Reading::WordForm->new( |
114 | 'JSON' => $wfjson ) ); |
115 | } |
116 | $data->{'wordform_matchlist'} = \@ml; |
117 | } |
118 | $args = $data; |
119 | } |
120 | $class->$orig( $args ); |
121 | }; |
122 | |
d3e7842a |
123 | # Do auto-disambiguation if we were created with a single wordform |
124 | sub BUILD { |
125 | my $self = shift; |
126 | |
127 | if( $self->matches == 1 ) { |
128 | $self->disambiguate( 0 ); |
129 | } |
130 | } |
cca4f996 |
131 | |
da83693e |
132 | around 'add_matching_form' => sub { |
133 | my $orig = shift; |
134 | my $self = shift; |
135 | my @realargs; |
136 | foreach my $a ( @_ ) { |
137 | if( ref( $a ) ) { |
138 | push( @realargs, $a ); |
139 | } else { |
140 | # Make the wordform from the string |
141 | my $wf = Text::Tradition::Collation::Reading::WordForm->new( |
142 | 'JSON' => $a ); |
143 | push( @realargs, $wf ); |
144 | } |
145 | } |
146 | return $self->$orig( @realargs ); |
147 | }; |
148 | |
cca4f996 |
149 | =head2 disambiguate( $index ) |
150 | |
151 | Selects the word form at $index in the list of matching forms, and asserts |
152 | that this is the correct form for the lexeme. |
153 | |
154 | =cut |
155 | |
156 | sub disambiguate { |
157 | my( $self, $idx ) = @_; |
158 | my $form = $self->matching_form( $idx ); |
159 | throw( "There is no candidate wordform at index $idx" ) |
160 | unless $form; |
161 | $self->_set_form( $form ); |
d3e7842a |
162 | $self->is_disambiguated( 1 ); |
cca4f996 |
163 | } |
164 | |
da83693e |
165 | =head2 has_form( $rep ) |
166 | |
167 | Returns the index of the matching form whose string representation is in $rep, |
168 | or else undef if none is found. |
169 | |
170 | =cut |
171 | |
172 | sub has_form { |
173 | my( $self, $rep ) = @_; |
174 | my $i = 0; |
175 | foreach my $mf ( $self->matching_forms ) { |
176 | my $struct = $mf->TO_JSON; |
177 | return $i if $struct eq $rep; |
178 | $i++; |
179 | } |
180 | return undef; |
181 | } |
182 | |
183 | |
7604424b |
184 | sub TO_JSON { |
cca4f996 |
185 | my $self = shift; |
7604424b |
186 | my $hash = {}; |
187 | # Do the scalar keys |
188 | map { $hash->{$_} = $self->$_ if defined $self->$_ } |
189 | qw/ language string is_disambiguated form /; |
190 | $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches; |
191 | return $hash; |
cca4f996 |
192 | } |
dd007c4d |
193 | |
194 | sub throw { |
195 | Text::Tradition::Error->throw( |
196 | 'ident' => 'Lexeme error', |
197 | 'message' => $_[0], |
198 | ); |
199 | } |
200 | |
cca4f996 |
201 | no Moose; |
202 | __PACKAGE__->meta->make_immutable; |
203 | |
204 | 1; |
205 | |
206 | =head1 LICENSE |
207 | |
208 | This package is free software and is provided "as is" without express |
209 | or implied warranty. You can redistribute it and/or modify it under |
210 | the same terms as Perl itself. |
211 | |
212 | =head1 AUTHOR |
213 | |
214 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |