start implementing morphology on readings
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading / Lexeme.pm
CommitLineData
cca4f996 1package Text::Tradition::Collation::Reading::Lexeme;
2
3use Moose;
4use Module::Load;
5
6=head1 NAME
7
8Text::Tradition::Collation::Reading::Lexeme - represents the components of
9a Reading.
10
11=head1 DESCRIPTION
12
13Text::Tradition is a library for representation and analysis of collated
14texts, particularly medieval ones. A word form is used for the analysis of
15Reading objects; it consists of a lemma, a language, and a code to
16represent its part of speech. In general the word forms for a particular
17language should be read from / written to some morphological database.
18
19=head1 METHODS
20
21=head2 new
22
23Creates a new lexeme from the passed options.
24
25=head2 language
26
27Returns the language to which this lexeme belongs.
28
29=head2 normalized
30
31Returns the canonical string version of this lexeme.
32
33=head2 matches
34
35Returns the number of possible word forms for this lexeme, as drawn from
36the appropriate database.
37
38=head2 matching_forms
39
40Returns an array of the possible word forms for this lexeme.
41
42=head2 matching_form( $index )
43
44Returns the form at $index in the list of matching forms.
45
46=head2 is_disambiguated
47
48Returns true if a single wordform has been picked as 'correct' for this
49lexeme in its context.
50
51=head2 form
52
53Returns the correct word form (if any has been selected) for the lexeme in
54its context.
55
56=cut
57
58# TODO need to be able to populate this from DB
59has 'language' => (
60 is => 'ro',
61 isa => 'Str',
62 required => 1,
63 );
64
65has 'string' => (
66 is => 'rw',
67 isa => 'Str',
68 required => 1,
69 );
70
71has 'wordform_matchlist' => (
72 isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]',
73 traits => ['Array'],
74 handles => {
75 'matches' => 'count',
76 'matching_forms' => 'elements',
77 'matching_form' => 'get',
78 'add_matching_form' => 'push',
79 );
80
81has 'is_disambiguated' => (
82 is => 'ro',
83 isa => 'Bool',
84 default => undef,
85 writer => '_set_disambiguated',
86 );
87
88has 'form' => (
89 is => 'ro',
90 isa => 'Text::Tradition::Collation::Reading::WordForm',
91 writer => '_set_form',
92 );
93
94
95=head2 disambiguate( $index )
96
97Selects the word form at $index in the list of matching forms, and asserts
98that this is the correct form for the lexeme.
99
100=cut
101
102sub disambiguate {
103 my( $self, $idx ) = @_;
104 my $form = $self->matching_form( $idx );
105 throw( "There is no candidate wordform at index $idx" )
106 unless $form;
107 $self->_set_form( $form );
108 $self->_set_disambiguated( 1 );
109}
110
111=head2 lookup
112
113Uses the module for the declared language to look up the lexeme in the
114language database (if any.) Sets the returned morphological matches in
115matching_forms, and returns the list as an array of WordForm objects.
116
117=cut
118
119sub lookup {
120 my $self = shift;
121 my $lang = $self->language;
122 my @answers;
123 try {
124 my $langmod = "Text::Tradition::Language::$lang";
125 load( $langmod );
126 @answers = $langmod->can( 'word_lookup' )->( $self->string );
127 } catch {
128 throw( "No language module for $lang, or the module has no word_lookup functionality" );
129 }
130 $self->clear_matching_forms;
131 $self->add_matching_form( @answers );
132 return @answers;
133}
134
135no Moose;
136__PACKAGE__->meta->make_immutable;
137
1381;
139
140=head1 LICENSE
141
142This package is free software and is provided "as is" without express
143or implied warranty. You can redistribute it and/or modify it under
144the same terms as Perl itself.
145
146=head1 AUTHOR
147
148Tara L Andrews E<lt>aurum@cpan.orgE<gt>