Commit | Line | Data |
cca4f996 |
1 | package Text::Tradition::Collation::Reading::Lexeme; |
2 | |
3 | use Moose; |
7604424b |
4 | use JSON (); |
cca4f996 |
5 | use Module::Load; |
5ac9acd8 |
6 | use Text::Tradition::Collation::Reading::WordForm; |
cca4f996 |
7 | |
8 | =head1 NAME |
9 | |
10 | Text::Tradition::Collation::Reading::Lexeme - represents the components of |
11 | a Reading. |
12 | |
13 | =head1 DESCRIPTION |
14 | |
15 | Text::Tradition is a library for representation and analysis of collated |
16 | texts, particularly medieval ones. A word form is used for the analysis of |
17 | Reading objects; it consists of a lemma, a language, and a code to |
18 | represent its part of speech. In general the word forms for a particular |
19 | language should be read from / written to some morphological database. |
20 | |
21 | =head1 METHODS |
22 | |
23 | =head2 new |
24 | |
25 | Creates a new lexeme from the passed options. |
26 | |
27 | =head2 language |
28 | |
29 | Returns the language to which this lexeme belongs. |
30 | |
31 | =head2 normalized |
32 | |
33 | Returns the canonical string version of this lexeme. |
34 | |
35 | =head2 matches |
36 | |
37 | Returns the number of possible word forms for this lexeme, as drawn from |
38 | the appropriate database. |
39 | |
40 | =head2 matching_forms |
41 | |
42 | Returns an array of the possible word forms for this lexeme. |
43 | |
44 | =head2 matching_form( $index ) |
45 | |
46 | Returns the form at $index in the list of matching forms. |
47 | |
48 | =head2 is_disambiguated |
49 | |
50 | Returns true if a single wordform has been picked as 'correct' for this |
51 | lexeme in its context. |
52 | |
53 | =head2 form |
54 | |
55 | Returns the correct word form (if any has been selected) for the lexeme in |
56 | its context. |
57 | |
58 | =cut |
59 | |
60 | # TODO need to be able to populate this from DB |
61 | has 'language' => ( |
62 | is => 'ro', |
63 | isa => 'Str', |
64 | required => 1, |
65 | ); |
66 | |
67 | has 'string' => ( |
68 | is => 'rw', |
69 | isa => 'Str', |
70 | required => 1, |
71 | ); |
72 | |
73 | has 'wordform_matchlist' => ( |
74 | isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]', |
75 | traits => ['Array'], |
76 | handles => { |
77 | 'matches' => 'count', |
78 | 'matching_forms' => 'elements', |
79 | 'matching_form' => 'get', |
80 | 'add_matching_form' => 'push', |
d3e7842a |
81 | }, |
5ac9acd8 |
82 | default => sub { [] }, |
cca4f996 |
83 | ); |
84 | |
85 | has 'is_disambiguated' => ( |
d3e7842a |
86 | is => 'rw', |
cca4f996 |
87 | isa => 'Bool', |
88 | default => undef, |
cca4f996 |
89 | ); |
90 | |
91 | has 'form' => ( |
92 | is => 'ro', |
93 | isa => 'Text::Tradition::Collation::Reading::WordForm', |
94 | writer => '_set_form', |
95 | ); |
96 | |
7604424b |
97 | around BUILDARGS => sub { |
98 | my $orig = shift; |
99 | my $class = shift; |
100 | my $args = @_ == 1 ? $_[0] : { @_ }; |
101 | if( exists $args->{JSON} ) { |
102 | my $data = $args->{JSON}; |
103 | if( exists $data->{'form'} && $data->{'form'} ) { |
104 | my $form = Text::Tradition::Collation::Reading::WordForm->new( |
105 | 'JSON' => $data->{'form'} ); |
106 | $data->{'form'} = $form; |
107 | } |
108 | if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) { |
109 | my @ml; |
110 | foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) { |
111 | push( @ml, Text::Tradition::Collation::Reading::WordForm->new( |
112 | 'JSON' => $wfjson ) ); |
113 | } |
114 | $data->{'wordform_matchlist'} = \@ml; |
115 | } |
116 | $args = $data; |
117 | } |
118 | $class->$orig( $args ); |
119 | }; |
120 | |
d3e7842a |
121 | # Do auto-disambiguation if we were created with a single wordform |
122 | sub BUILD { |
123 | my $self = shift; |
124 | |
125 | if( $self->matches == 1 ) { |
126 | $self->disambiguate( 0 ); |
127 | } |
128 | } |
cca4f996 |
129 | |
da83693e |
130 | around 'add_matching_form' => sub { |
131 | my $orig = shift; |
132 | my $self = shift; |
133 | my @realargs; |
134 | foreach my $a ( @_ ) { |
135 | if( ref( $a ) ) { |
136 | push( @realargs, $a ); |
137 | } else { |
138 | # Make the wordform from the string |
139 | my $wf = Text::Tradition::Collation::Reading::WordForm->new( |
140 | 'JSON' => $a ); |
141 | push( @realargs, $wf ); |
142 | } |
143 | } |
144 | return $self->$orig( @realargs ); |
145 | }; |
146 | |
cca4f996 |
147 | =head2 disambiguate( $index ) |
148 | |
149 | Selects the word form at $index in the list of matching forms, and asserts |
150 | that this is the correct form for the lexeme. |
151 | |
152 | =cut |
153 | |
154 | sub disambiguate { |
155 | my( $self, $idx ) = @_; |
156 | my $form = $self->matching_form( $idx ); |
157 | throw( "There is no candidate wordform at index $idx" ) |
158 | unless $form; |
159 | $self->_set_form( $form ); |
d3e7842a |
160 | $self->is_disambiguated( 1 ); |
cca4f996 |
161 | } |
162 | |
da83693e |
163 | =head2 has_form( $rep ) |
164 | |
165 | Returns the index of the matching form whose string representation is in $rep, |
166 | or else undef if none is found. |
167 | |
168 | =cut |
169 | |
170 | sub has_form { |
171 | my( $self, $rep ) = @_; |
172 | my $i = 0; |
173 | foreach my $mf ( $self->matching_forms ) { |
174 | my $struct = $mf->TO_JSON; |
175 | return $i if $struct eq $rep; |
176 | $i++; |
177 | } |
178 | return undef; |
179 | } |
180 | |
181 | |
7604424b |
182 | sub TO_JSON { |
cca4f996 |
183 | my $self = shift; |
7604424b |
184 | my $hash = {}; |
185 | # Do the scalar keys |
186 | map { $hash->{$_} = $self->$_ if defined $self->$_ } |
187 | qw/ language string is_disambiguated form /; |
188 | $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches; |
189 | return $hash; |
cca4f996 |
190 | } |
cca4f996 |
191 | no Moose; |
192 | __PACKAGE__->meta->make_immutable; |
193 | |
194 | 1; |
195 | |
196 | =head1 LICENSE |
197 | |
198 | This package is free software and is provided "as is" without express |
199 | or implied warranty. You can redistribute it and/or modify it under |
200 | the same terms as Perl itself. |
201 | |
202 | =head1 AUTHOR |
203 | |
204 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |