Commit | Line | Data |
cca4f996 |
1 | package Text::Tradition::Collation::Reading::Lexeme; |
2 | |
3 | use Moose; |
7604424b |
4 | use JSON (); |
cca4f996 |
5 | use Module::Load; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Collation::Reading::Lexeme - represents the components of |
10 | a Reading. |
11 | |
12 | =head1 DESCRIPTION |
13 | |
14 | Text::Tradition is a library for representation and analysis of collated |
15 | texts, particularly medieval ones. A word form is used for the analysis of |
16 | Reading objects; it consists of a lemma, a language, and a code to |
17 | represent its part of speech. In general the word forms for a particular |
18 | language should be read from / written to some morphological database. |
19 | |
20 | =head1 METHODS |
21 | |
22 | =head2 new |
23 | |
24 | Creates a new lexeme from the passed options. |
25 | |
26 | =head2 language |
27 | |
28 | Returns the language to which this lexeme belongs. |
29 | |
30 | =head2 normalized |
31 | |
32 | Returns the canonical string version of this lexeme. |
33 | |
34 | =head2 matches |
35 | |
36 | Returns the number of possible word forms for this lexeme, as drawn from |
37 | the appropriate database. |
38 | |
39 | =head2 matching_forms |
40 | |
41 | Returns an array of the possible word forms for this lexeme. |
42 | |
43 | =head2 matching_form( $index ) |
44 | |
45 | Returns the form at $index in the list of matching forms. |
46 | |
47 | =head2 is_disambiguated |
48 | |
49 | Returns true if a single wordform has been picked as 'correct' for this |
50 | lexeme in its context. |
51 | |
52 | =head2 form |
53 | |
54 | Returns the correct word form (if any has been selected) for the lexeme in |
55 | its context. |
56 | |
57 | =cut |
58 | |
59 | # TODO need to be able to populate this from DB |
60 | has 'language' => ( |
61 | is => 'ro', |
62 | isa => 'Str', |
63 | required => 1, |
64 | ); |
65 | |
66 | has 'string' => ( |
67 | is => 'rw', |
68 | isa => 'Str', |
69 | required => 1, |
70 | ); |
71 | |
72 | has 'wordform_matchlist' => ( |
73 | isa => 'ArrayRef[Text::Tradition::Collation::Reading::WordForm]', |
74 | traits => ['Array'], |
75 | handles => { |
76 | 'matches' => 'count', |
77 | 'matching_forms' => 'elements', |
78 | 'matching_form' => 'get', |
79 | 'add_matching_form' => 'push', |
d3e7842a |
80 | }, |
cca4f996 |
81 | ); |
82 | |
83 | has 'is_disambiguated' => ( |
d3e7842a |
84 | is => 'rw', |
cca4f996 |
85 | isa => 'Bool', |
86 | default => undef, |
cca4f996 |
87 | ); |
88 | |
89 | has 'form' => ( |
90 | is => 'ro', |
91 | isa => 'Text::Tradition::Collation::Reading::WordForm', |
92 | writer => '_set_form', |
93 | ); |
94 | |
7604424b |
95 | around BUILDARGS => sub { |
96 | my $orig = shift; |
97 | my $class = shift; |
98 | my $args = @_ == 1 ? $_[0] : { @_ }; |
99 | if( exists $args->{JSON} ) { |
100 | my $data = $args->{JSON}; |
101 | if( exists $data->{'form'} && $data->{'form'} ) { |
102 | my $form = Text::Tradition::Collation::Reading::WordForm->new( |
103 | 'JSON' => $data->{'form'} ); |
104 | $data->{'form'} = $form; |
105 | } |
106 | if( exists $data->{'wordform_matchlist'} && $data->{'wordform_matchlist'} ) { |
107 | my @ml; |
108 | foreach my $wfjson ( @{$data->{'wordform_matchlist'}} ) { |
109 | push( @ml, Text::Tradition::Collation::Reading::WordForm->new( |
110 | 'JSON' => $wfjson ) ); |
111 | } |
112 | $data->{'wordform_matchlist'} = \@ml; |
113 | } |
114 | $args = $data; |
115 | } |
116 | $class->$orig( $args ); |
117 | }; |
118 | |
d3e7842a |
119 | # Do auto-disambiguation if we were created with a single wordform |
120 | sub BUILD { |
121 | my $self = shift; |
122 | |
123 | if( $self->matches == 1 ) { |
124 | $self->disambiguate( 0 ); |
125 | } |
126 | } |
cca4f996 |
127 | |
128 | =head2 disambiguate( $index ) |
129 | |
130 | Selects the word form at $index in the list of matching forms, and asserts |
131 | that this is the correct form for the lexeme. |
132 | |
133 | =cut |
134 | |
135 | sub disambiguate { |
136 | my( $self, $idx ) = @_; |
137 | my $form = $self->matching_form( $idx ); |
138 | throw( "There is no candidate wordform at index $idx" ) |
139 | unless $form; |
140 | $self->_set_form( $form ); |
d3e7842a |
141 | $self->is_disambiguated( 1 ); |
cca4f996 |
142 | } |
143 | |
7604424b |
144 | sub TO_JSON { |
cca4f996 |
145 | my $self = shift; |
7604424b |
146 | my $hash = {}; |
147 | # Do the scalar keys |
148 | map { $hash->{$_} = $self->$_ if defined $self->$_ } |
149 | qw/ language string is_disambiguated form /; |
150 | $hash->{'wordform_matchlist'} = [ $self->matching_forms ] if $self->matches; |
151 | return $hash; |
cca4f996 |
152 | } |
cca4f996 |
153 | no Moose; |
154 | __PACKAGE__->meta->make_immutable; |
155 | |
156 | 1; |
157 | |
158 | =head1 LICENSE |
159 | |
160 | This package is free software and is provided "as is" without express |
161 | or implied warranty. You can redistribute it and/or modify it under |
162 | the same terms as Perl itself. |
163 | |
164 | =head1 AUTHOR |
165 | |
166 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |