add throw() sub to Lexeme.pm
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading / Lexeme.pm
CommitLineData
cca4f996 1package Text::Tradition::Collation::Reading::Lexeme;
2
3use Moose;
7604424b 4use JSON ();
cca4f996 5use Module::Load;
5ac9acd8 6use Text::Tradition::Collation::Reading::WordForm;
dd007c4d 7use Text::Tradition::Error;
cca4f996 8
9=head1 NAME
10
11Text::Tradition::Collation::Reading::Lexeme - represents the components of
12a Reading.
13
14=head1 DESCRIPTION
15
16Text::Tradition is a library for representation and analysis of collated
17texts, particularly medieval ones. A word form is used for the analysis of
18Reading objects; it consists of a lemma, a language, and a code to
19represent its part of speech. In general the word forms for a particular
20language should be read from / written to some morphological database.
21
22=head1 METHODS
23
24=head2 new
25
26Creates a new lexeme from the passed options.
27
28=head2 language
29
30Returns the language to which this lexeme belongs.
31
32=head2 normalized
33
34Returns the canonical string version of this lexeme.
35
36=head2 matches
37
38Returns the number of possible word forms for this lexeme, as drawn from
39the appropriate database.
40
41=head2 matching_forms
42
43Returns an array of the possible word forms for this lexeme.
44
45=head2 matching_form( $index )
46
47Returns the form at $index in the list of matching forms.
48
49=head2 is_disambiguated
50
51Returns true if a single wordform has been picked as 'correct' for this
52lexeme in its context.
53
54=head2 form
55
56Returns the correct word form (if any has been selected) for the lexeme in
57its context.
58
59=cut
60
61# TODO need to be able to populate this from DB
62has 'language' => (
63 is => 'ro',
64 isa => 'Str',
65 required => 1,
66 );
67
68has 'string' => (
69 is => 'rw',
70 isa => 'Str',
71 required => 1,
72 );
73
74has 'wordform_matchlist' => (
75 isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]',
76 traits => ['Array'],
77 handles => {
78 'matches' => 'count',
79 'matching_forms' => 'elements',
80 'matching_form' => 'get',
81 'add_matching_form' => 'push',
d3e7842a 82 },
5ac9acd8 83 default => sub { [] },
cca4f996 84 );
85
86has 'is_disambiguated' => (
d3e7842a 87 is => 'rw',
cca4f996 88 isa => 'Bool',
89 default => undef,
cca4f996 90 );
91
92has 'form' => (
93 is => 'ro',
94 isa => 'Text::Tradition::Collation::Reading::WordForm',
95 writer => '_set_form',
96 );
97
7604424b 98around BUILDARGS => sub {
99 my $orig = shift;
100 my $class = shift;
101 my $args = @_ == 1 ? $_[0] : { @_ };
102 if( exists $args->{JSON} ) {
103 my $data = $args->{JSON};
104 if( exists $data->{'form'} && $data->{'form'} ) {
105 my $form = Text::Tradition::Collation::Reading::WordForm->new(
106 'JSON' => $data->{'form'} );
107 $data->{'form'} = $form;
108 }
109 if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) {
110 my @ml;
111 foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) {
112 push( @ml, Text::Tradition::Collation::Reading::WordForm->new(
113 'JSON' => $wfjson ) );
114 }
115 $data->{'wordform_matchlist'} = \@ml;
116 }
117 $args = $data;
118 }
119 $class->$orig( $args );
120};
121
d3e7842a 122# Do auto-disambiguation if we were created with a single wordform
123sub BUILD {
124 my $self = shift;
125
126 if( $self->matches == 1 ) {
127 $self->disambiguate( 0 );
128 }
129}
cca4f996 130
da83693e 131around 'add_matching_form' => sub {
132 my $orig = shift;
133 my $self = shift;
134 my @realargs;
135 foreach my $a ( @_ ) {
136 if( ref( $a ) ) {
137 push( @realargs, $a );
138 } else {
139 # Make the wordform from the string
140 my $wf = Text::Tradition::Collation::Reading::WordForm->new(
141 'JSON' => $a );
142 push( @realargs, $wf );
143 }
144 }
145 return $self->$orig( @realargs );
146};
147
cca4f996 148=head2 disambiguate( $index )
149
150Selects the word form at $index in the list of matching forms, and asserts
151that this is the correct form for the lexeme.
152
153=cut
154
155sub disambiguate {
156 my( $self, $idx ) = @_;
157 my $form = $self->matching_form( $idx );
158 throw( "There is no candidate wordform at index $idx" )
159 unless $form;
160 $self->_set_form( $form );
d3e7842a 161 $self->is_disambiguated( 1 );
cca4f996 162}
163
da83693e 164=head2 has_form( $rep )
165
166Returns the index of the matching form whose string representation is in $rep,
167or else undef if none is found.
168
169=cut
170
171sub has_form {
172 my( $self, $rep ) = @_;
173 my $i = 0;
174 foreach my $mf ( $self->matching_forms ) {
175 my $struct = $mf->TO_JSON;
176 return $i if $struct eq $rep;
177 $i++;
178 }
179 return undef;
180}
181
182
7604424b 183sub TO_JSON {
cca4f996 184 my $self = shift;
7604424b 185 my $hash = {};
186 # Do the scalar keys
187 map { $hash->{$_} = $self->$_ if defined $self->$_ }
188 qw/ language string is_disambiguated form /;
189 $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches;
190 return $hash;
cca4f996 191}
dd007c4d 192
193sub throw {
194 Text::Tradition::Error->throw(
195 'ident' => 'Lexeme error',
196 'message' => $_[0],
197 );
198}
199
cca4f996 200no Moose;
201__PACKAGE__->meta->make_immutable;
202
2031;
204
205=head1 LICENSE
206
207This package is free software and is provided "as is" without express
208or implied warranty. You can redistribute it and/or modify it under
209the same terms as Perl itself.
210
211=head1 AUTHOR
212
213Tara L Andrews E<lt>aurum@cpan.orgE<gt>