make French morph tagging work; dependent on Flemm and TreeTagger
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading / Lexeme.pm
CommitLineData
cca4f996 1package Text::Tradition::Collation::Reading::Lexeme;
2
3use Moose;
4use Module::Load;
5
6=head1 NAME
7
8Text::Tradition::Collation::Reading::Lexeme - represents the components of
9a Reading.
10
11=head1 DESCRIPTION
12
13Text::Tradition is a library for representation and analysis of collated
14texts, particularly medieval ones. A word form is used for the analysis of
15Reading objects; it consists of a lemma, a language, and a code to
16represent its part of speech. In general the word forms for a particular
17language should be read from / written to some morphological database.
18
19=head1 METHODS
20
21=head2 new
22
23Creates a new lexeme from the passed options.
24
25=head2 language
26
27Returns the language to which this lexeme belongs.
28
29=head2 normalized
30
31Returns the canonical string version of this lexeme.
32
33=head2 matches
34
35Returns the number of possible word forms for this lexeme, as drawn from
36the appropriate database.
37
38=head2 matching_forms
39
40Returns an array of the possible word forms for this lexeme.
41
42=head2 matching_form( $index )
43
44Returns the form at $index in the list of matching forms.
45
46=head2 is_disambiguated
47
48Returns true if a single wordform has been picked as 'correct' for this
49lexeme in its context.
50
51=head2 form
52
53Returns the correct word form (if any has been selected) for the lexeme in
54its context.
55
56=cut
57
58# TODO need to be able to populate this from DB
59has 'language' => (
60 is => 'ro',
61 isa => 'Str',
62 required => 1,
63 );
64
65has 'string' => (
66 is => 'rw',
67 isa => 'Str',
68 required => 1,
69 );
70
71has 'wordform_matchlist' => (
72 isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]',
73 traits => ['Array'],
74 handles => {
75 'matches' => 'count',
76 'matching_forms' => 'elements',
77 'matching_form' => 'get',
78 'add_matching_form' => 'push',
d3e7842a 79 },
cca4f996 80 );
81
82has 'is_disambiguated' => (
d3e7842a 83 is => 'rw',
cca4f996 84 isa => 'Bool',
85 default => undef,
cca4f996 86 );
87
88has 'form' => (
89 is => 'ro',
90 isa => 'Text::Tradition::Collation::Reading::WordForm',
91 writer => '_set_form',
92 );
93
d3e7842a 94# Do auto-disambiguation if we were created with a single wordform
95sub BUILD {
96 my $self = shift;
97
98 if( $self->matches == 1 ) {
99 $self->disambiguate( 0 );
100 }
101}
cca4f996 102
103=head2 disambiguate( $index )
104
105Selects the word form at $index in the list of matching forms, and asserts
106that this is the correct form for the lexeme.
107
108=cut
109
110sub disambiguate {
111 my( $self, $idx ) = @_;
112 my $form = $self->matching_form( $idx );
113 throw( "There is no candidate wordform at index $idx" )
114 unless $form;
115 $self->_set_form( $form );
d3e7842a 116 $self->is_disambiguated( 1 );
cca4f996 117}
118
119=head2 lookup
120
121Uses the module for the declared language to look up the lexeme in the
122language database (if any.) Sets the returned morphological matches in
123matching_forms, and returns the list as an array of WordForm objects.
124
125=cut
126
127sub lookup {
128 my $self = shift;
129 my $lang = $self->language;
130 my @answers;
131 try {
132 my $langmod = "Text::Tradition::Language::$lang";
133 load( $langmod );
134 @answers = $langmod->can( 'word_lookup' )->( $self->string );
135 } catch {
136 throw( "No language module for $lang, or the module has no word_lookup functionality" );
137 }
138 $self->clear_matching_forms;
139 $self->add_matching_form( @answers );
140 return @answers;
141}
142
143no Moose;
144__PACKAGE__->meta->make_immutable;
145
1461;
147
148=head1 LICENSE
149
150This package is free software and is provided "as is" without express
151or implied warranty. You can redistribute it and/or modify it under
152the same terms as Perl itself.
153
154=head1 AUTHOR
155
156Tara L Andrews E<lt>aurum@cpan.orgE<gt>