muck about with serialization of lexeme wordforms; allow individual lexeme addressing
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading / Lexeme.pm
CommitLineData
cca4f996 1package Text::Tradition::Collation::Reading::Lexeme;
2
3use Moose;
7604424b 4use JSON ();
cca4f996 5use Module::Load;
6
7=head1 NAME
8
9Text::Tradition::Collation::Reading::Lexeme - represents the components of
10a Reading.
11
12=head1 DESCRIPTION
13
14Text::Tradition is a library for representation and analysis of collated
15texts, particularly medieval ones. A word form is used for the analysis of
16Reading objects; it consists of a lemma, a language, and a code to
17represent its part of speech. In general the word forms for a particular
18language should be read from / written to some morphological database.
19
20=head1 METHODS
21
22=head2 new
23
24Creates a new lexeme from the passed options.
25
26=head2 language
27
28Returns the language to which this lexeme belongs.
29
30=head2 normalized
31
32Returns the canonical string version of this lexeme.
33
34=head2 matches
35
36Returns the number of possible word forms for this lexeme, as drawn from
37the appropriate database.
38
39=head2 matching_forms
40
41Returns an array of the possible word forms for this lexeme.
42
43=head2 matching_form( $index )
44
45Returns the form at $index in the list of matching forms.
46
47=head2 is_disambiguated
48
49Returns true if a single wordform has been picked as 'correct' for this
50lexeme in its context.
51
52=head2 form
53
54Returns the correct word form (if any has been selected) for the lexeme in
55its context.
56
57=cut
58
59# TODO need to be able to populate this from DB
60has 'language' => (
61 is => 'ro',
62 isa => 'Str',
63 required => 1,
64 );
65
66has 'string' => (
67 is => 'rw',
68 isa => 'Str',
69 required => 1,
70 );
71
72has 'wordform_matchlist' => (
73 isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]',
74 traits => ['Array'],
75 handles => {
76 'matches' => 'count',
77 'matching_forms' => 'elements',
78 'matching_form' => 'get',
79 'add_matching_form' => 'push',
d3e7842a 80 },
cca4f996 81 );
82
83has 'is_disambiguated' => (
d3e7842a 84 is => 'rw',
cca4f996 85 isa => 'Bool',
86 default => undef,
cca4f996 87 );
88
89has 'form' => (
90 is => 'ro',
91 isa => 'Text::Tradition::Collation::Reading::WordForm',
92 writer => '_set_form',
93 );
94
7604424b 95around BUILDARGS => sub {
96 my $orig = shift;
97 my $class = shift;
98 my $args = @_ == 1 ? $_[0] : { @_ };
99 if( exists $args->{JSON} ) {
100 my $data = $args->{JSON};
101 if( exists $data->{'form'} && $data->{'form'} ) {
102 my $form = Text::Tradition::Collation::Reading::WordForm->new(
103 'JSON' => $data->{'form'} );
104 $data->{'form'} = $form;
105 }
106 if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) {
107 my @ml;
108 foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) {
109 push( @ml, Text::Tradition::Collation::Reading::WordForm->new(
110 'JSON' => $wfjson ) );
111 }
112 $data->{'wordform_matchlist'} = \@ml;
113 }
114 $args = $data;
115 }
116 $class->$orig( $args );
117};
118
d3e7842a 119# Do auto-disambiguation if we were created with a single wordform
120sub BUILD {
121 my $self = shift;
122
123 if( $self->matches == 1 ) {
124 $self->disambiguate( 0 );
125 }
126}
cca4f996 127
da83693e 128around 'add_matching_form' => sub {
129 my $orig = shift;
130 my $self = shift;
131 my @realargs;
132 foreach my $a ( @_ ) {
133 if( ref( $a ) ) {
134 push( @realargs, $a );
135 } else {
136 # Make the wordform from the string
137 my $wf = Text::Tradition::Collation::Reading::WordForm->new(
138 'JSON' => $a );
139 push( @realargs, $wf );
140 }
141 }
142 return $self->$orig( @realargs );
143};
144
cca4f996 145=head2 disambiguate( $index )
146
147Selects the word form at $index in the list of matching forms, and asserts
148that this is the correct form for the lexeme.
149
150=cut
151
152sub disambiguate {
153 my( $self, $idx ) = @_;
154 my $form = $self->matching_form( $idx );
155 throw( "There is no candidate wordform at index $idx" )
156 unless $form;
157 $self->_set_form( $form );
d3e7842a 158 $self->is_disambiguated( 1 );
cca4f996 159}
160
da83693e 161=head2 has_form( $rep )
162
163Returns the index of the matching form whose string representation is in $rep,
164or else undef if none is found.
165
166=cut
167
168sub has_form {
169 my( $self, $rep ) = @_;
170 my $i = 0;
171 foreach my $mf ( $self->matching_forms ) {
172 my $struct = $mf->TO_JSON;
173 return $i if $struct eq $rep;
174 $i++;
175 }
176 return undef;
177}
178
179
7604424b 180sub TO_JSON {
cca4f996 181 my $self = shift;
7604424b 182 my $hash = {};
183 # Do the scalar keys
184 map { $hash->{$_} = $self->$_ if defined $self->$_ }
185 qw/ language string is_disambiguated form /;
186 $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches;
187 return $hash;
cca4f996 188}
cca4f996 189no Moose;
190__PACKAGE__->meta->make_immutable;
191
1921;
193
194=head1 LICENSE
195
196This package is free software and is provided "as is" without express
197or implied warranty. You can redistribute it and/or modify it under
198the same terms as Perl itself.
199
200=head1 AUTHOR
201
202Tara L Andrews E<lt>aurum@cpan.orgE<gt>