Commit | Line | Data |
cca4f996 |
1 | package Text::Tradition::Collation::Reading::Lexeme; |
2 | |
3 | use Moose; |
4 | use Module::Load; |
5 | |
6 | =head1 NAME |
7 | |
8 | Text::Tradition::Collation::Reading::Lexeme - represents the components of |
9 | a Reading. |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Text::Tradition is a library for representation and analysis of collated |
14 | texts, particularly medieval ones. A word form is used for the analysis of |
15 | Reading objects; it consists of a lemma, a language, and a code to |
16 | represent its part of speech. In general the word forms for a particular |
17 | language should be read from / written to some morphological database. |
18 | |
19 | =head1 METHODS |
20 | |
21 | =head2 new |
22 | |
23 | Creates a new lexeme from the passed options. |
24 | |
25 | =head2 language |
26 | |
27 | Returns the language to which this lexeme belongs. |
28 | |
29 | =head2 normalized |
30 | |
31 | Returns the canonical string version of this lexeme. |
32 | |
33 | =head2 matches |
34 | |
35 | Returns the number of possible word forms for this lexeme, as drawn from |
36 | the appropriate database. |
37 | |
38 | =head2 matching_forms |
39 | |
40 | Returns an array of the possible word forms for this lexeme. |
41 | |
42 | =head2 matching_form( $index ) |
43 | |
44 | Returns the form at $index in the list of matching forms. |
45 | |
46 | =head2 is_disambiguated |
47 | |
48 | Returns true if a single wordform has been picked as 'correct' for this |
49 | lexeme in its context. |
50 | |
51 | =head2 form |
52 | |
53 | Returns the correct word form (if any has been selected) for the lexeme in |
54 | its context. |
55 | |
56 | =cut |
57 | |
58 | # TODO need to be able to populate this from DB |
59 | has 'language' => ( |
60 | is => 'ro', |
61 | isa => 'Str', |
62 | required => 1, |
63 | ); |
64 | |
65 | has 'string' => ( |
66 | is => 'rw', |
67 | isa => 'Str', |
68 | required => 1, |
69 | ); |
70 | |
71 | has 'wordform_matchlist' => ( |
72 | isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]', |
73 | traits => ['Array'], |
74 | handles => { |
75 | 'matches' => 'count', |
76 | 'matching_forms' => 'elements', |
77 | 'matching_form' => 'get', |
78 | 'add_matching_form' => 'push', |
79 | ); |
80 | |
81 | has 'is_disambiguated' => ( |
82 | is => 'ro', |
83 | isa => 'Bool', |
84 | default => undef, |
85 | writer => '_set_disambiguated', |
86 | ); |
87 | |
88 | has 'form' => ( |
89 | is => 'ro', |
90 | isa => 'Text::Tradition::Collation::Reading::WordForm', |
91 | writer => '_set_form', |
92 | ); |
93 | |
94 | |
95 | =head2 disambiguate( $index ) |
96 | |
97 | Selects the word form at $index in the list of matching forms, and asserts |
98 | that this is the correct form for the lexeme. |
99 | |
100 | =cut |
101 | |
102 | sub disambiguate { |
103 | my( $self, $idx ) = @_; |
104 | my $form = $self->matching_form( $idx ); |
105 | throw( "There is no candidate wordform at index $idx" ) |
106 | unless $form; |
107 | $self->_set_form( $form ); |
108 | $self->_set_disambiguated( 1 ); |
109 | } |
110 | |
111 | =head2 lookup |
112 | |
113 | Uses the module for the declared language to look up the lexeme in the |
114 | language database (if any.) Sets the returned morphological matches in |
115 | matching_forms, and returns the list as an array of WordForm objects. |
116 | |
117 | =cut |
118 | |
119 | sub lookup { |
120 | my $self = shift; |
121 | my $lang = $self->language; |
122 | my @answers; |
123 | try { |
124 | my $langmod = "Text::Tradition::Language::$lang"; |
125 | load( $langmod ); |
126 | @answers = $langmod->can( 'word_lookup' )->( $self->string ); |
127 | } catch { |
128 | throw( "No language module for $lang, or the module has no word_lookup functionality" ); |
129 | } |
130 | $self->clear_matching_forms; |
131 | $self->add_matching_form( @answers ); |
132 | return @answers; |
133 | } |
134 | |
135 | no Moose; |
136 | __PACKAGE__->meta->make_immutable; |
137 | |
138 | 1; |
139 | |
140 | =head1 LICENSE |
141 | |
142 | This package is free software and is provided "as is" without express |
143 | or implied warranty. You can redistribute it and/or modify it under |
144 | the same terms as Perl itself. |
145 | |
146 | =head1 AUTHOR |
147 | |
148 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |