Commit | Line | Data |
784877d9 |
1 | package Text::Tradition::Collation::Reading; |
2 | |
8e1394aa |
3 | use Moose; |
6ad2ce78 |
4 | use Module::Load; |
70745e70 |
5 | use Text::Tradition::Error; |
7cd9f181 |
6 | use YAML::XS; |
e4b0f464 |
7 | use overload '""' => \&_stringify, 'fallback' => 1; |
784877d9 |
8 | |
3a2ebbf4 |
9 | =head1 NAME |
784877d9 |
10 | |
4aea6e9b |
11 | Text::Tradition::Collation::Reading - represents a reading (usually a word) |
12 | in a collation. |
13 | |
3a2ebbf4 |
14 | =head1 DESCRIPTION |
784877d9 |
15 | |
3a2ebbf4 |
16 | Text::Tradition is a library for representation and analysis of collated |
17 | texts, particularly medieval ones. A 'reading' refers to a unit of text, |
18 | usually a word, that appears in one or more witnesses (manuscripts) of the |
19 | tradition; the text of a given witness is composed of a set of readings in |
20 | a particular sequence |
784877d9 |
21 | |
3a2ebbf4 |
22 | =head1 METHODS |
1ca1163d |
23 | |
3a2ebbf4 |
24 | =head2 new |
8e1394aa |
25 | |
4aea6e9b |
26 | Creates a new reading in the given collation with the given attributes. |
3a2ebbf4 |
27 | Options include: |
94c00c71 |
28 | |
3a2ebbf4 |
29 | =over 4 |
784877d9 |
30 | |
4aea6e9b |
31 | =item collation - The Text::Tradition::Collation object to which this |
32 | reading belongs. Required. |
e2902068 |
33 | |
3a2ebbf4 |
34 | =item id - A unique identifier for this reading. Required. |
910a0a6d |
35 | |
3a2ebbf4 |
36 | =item text - The word or other text of the reading. |
784877d9 |
37 | |
3a2ebbf4 |
38 | =item is_start - The reading is the starting point for the collation. |
3265b0ce |
39 | |
3a2ebbf4 |
40 | =item is_end - The reading is the ending point for the collation. |
784877d9 |
41 | |
3a2ebbf4 |
42 | =item is_lacuna - The 'reading' represents a known gap in the text. |
de51424a |
43 | |
4aea6e9b |
44 | =item is_ph - A temporary placeholder for apparatus parsing purposes. Do |
45 | not use unless you know what you are doing. |
12720144 |
46 | |
4aea6e9b |
47 | =item rank - The sequence number of the reading. This should probably not |
48 | be set manually. |
d047cd52 |
49 | |
3a2ebbf4 |
50 | =back |
8e1394aa |
51 | |
3a2ebbf4 |
52 | One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required. |
8e1394aa |
53 | |
3a2ebbf4 |
54 | =head2 collation |
94c00c71 |
55 | |
3a2ebbf4 |
56 | =head2 id |
94c00c71 |
57 | |
3a2ebbf4 |
58 | =head2 text |
4cdd82f1 |
59 | |
3a2ebbf4 |
60 | =head2 is_start |
4cdd82f1 |
61 | |
3a2ebbf4 |
62 | =head2 is_end |
4a8828f0 |
63 | |
3a2ebbf4 |
64 | =head2 is_lacuna |
4a8828f0 |
65 | |
3a2ebbf4 |
66 | =head2 rank |
4a8828f0 |
67 | |
3a2ebbf4 |
68 | Accessor methods for the given attributes. |
d047cd52 |
69 | |
3a2ebbf4 |
70 | =cut |
d047cd52 |
71 | |
3a2ebbf4 |
72 | has 'collation' => ( |
73 | is => 'ro', |
74 | isa => 'Text::Tradition::Collation', |
75 | # required => 1, |
76 | weak_ref => 1, |
77 | ); |
d047cd52 |
78 | |
3a2ebbf4 |
79 | has 'id' => ( |
80 | is => 'ro', |
81 | isa => 'Str', |
82 | required => 1, |
83 | ); |
d047cd52 |
84 | |
3a2ebbf4 |
85 | has 'text' => ( |
86 | is => 'ro', |
87 | isa => 'Str', |
88 | required => 1, |
49d4f2ac |
89 | writer => 'alter_text', |
3a2ebbf4 |
90 | ); |
0e47f4f6 |
91 | |
fae52efd |
92 | has 'language' => ( |
93 | is => 'ro', |
94 | isa => 'Str', |
6ad2ce78 |
95 | predicate => 'has_language', |
fae52efd |
96 | ); |
97 | |
3a2ebbf4 |
98 | has 'is_start' => ( |
99 | is => 'ro', |
100 | isa => 'Bool', |
101 | default => undef, |
102 | ); |
103 | |
104 | has 'is_end' => ( |
105 | is => 'ro', |
106 | isa => 'Bool', |
107 | default => undef, |
108 | ); |
109 | |
110 | has 'is_lacuna' => ( |
111 | is => 'ro', |
112 | isa => 'Bool', |
113 | default => undef, |
114 | ); |
12720144 |
115 | |
116 | has 'is_ph' => ( |
117 | is => 'ro', |
118 | isa => 'Bool', |
119 | default => undef, |
120 | ); |
d4b75f44 |
121 | |
122 | has 'is_common' => ( |
123 | is => 'rw', |
124 | isa => 'Bool', |
125 | default => undef, |
126 | ); |
3a2ebbf4 |
127 | |
128 | has 'rank' => ( |
129 | is => 'rw', |
130 | isa => 'Int', |
131 | predicate => 'has_rank', |
ca6e6095 |
132 | clearer => 'clear_rank', |
3a2ebbf4 |
133 | ); |
fd602649 |
134 | |
135 | ## For morphological analysis |
136 | |
137 | has 'normal_form' => ( |
138 | is => 'rw', |
139 | isa => 'Str', |
140 | predicate => 'has_normal_form', |
141 | ); |
142 | |
7cd9f181 |
143 | # Holds the lexemes for the reading. |
d3e7842a |
144 | has 'reading_lexemes' => ( |
4d9593df |
145 | traits => ['Array'], |
d3e7842a |
146 | isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]', |
4d9593df |
147 | handles => { |
148 | lexemes => 'elements', |
cca4f996 |
149 | has_lexemes => 'count', |
d3e7842a |
150 | clear_lexemes => 'clear', |
151 | add_lexeme => 'push', |
4d9593df |
152 | }, |
d3e7842a |
153 | default => sub { [] }, |
fd602649 |
154 | ); |
155 | |
629e27b0 |
156 | ## For prefix/suffix readings |
157 | |
158 | has 'join_prior' => ( |
159 | is => 'ro', |
160 | isa => 'Bool', |
161 | default => undef, |
162 | ); |
163 | |
164 | has 'join_next' => ( |
165 | is => 'ro', |
166 | isa => 'Bool', |
167 | default => undef, |
168 | ); |
169 | |
3a2ebbf4 |
170 | |
171 | around BUILDARGS => sub { |
172 | my $orig = shift; |
173 | my $class = shift; |
174 | my $args; |
175 | if( @_ == 1 ) { |
176 | $args = shift; |
177 | } else { |
178 | $args = { @_ }; |
179 | } |
b0b4421a |
180 | |
3a2ebbf4 |
181 | # If one of our special booleans is set, we change the text and the |
182 | # ID to match. |
1d310495 |
183 | if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) { |
56eefa04 |
184 | $args->{'text'} = '#LACUNA#'; |
3a2ebbf4 |
185 | } elsif( exists $args->{'is_start'} ) { |
186 | $args->{'id'} = '#START#'; # Change the ID to ensure we have only one |
187 | $args->{'text'} = '#START#'; |
188 | $args->{'rank'} = 0; |
189 | } elsif( exists $args->{'is_end'} ) { |
190 | $args->{'id'} = '#END#'; # Change the ID to ensure we have only one |
191 | $args->{'text'} = '#END#'; |
12720144 |
192 | } elsif( exists $args->{'is_ph'} ) { |
193 | $args->{'text'} = $args->{'id'}; |
3a2ebbf4 |
194 | } |
195 | |
196 | $class->$orig( $args ); |
197 | }; |
198 | |
70745e70 |
199 | # Look for a lexeme-string argument in the build args. |
200 | sub BUILD { |
201 | my( $self, $args ) = @_; |
202 | if( exists $args->{'lexemes'} ) { |
203 | $self->_deserialize_lexemes( $args->{'lexemes'} ); |
204 | } |
205 | } |
206 | |
3a2ebbf4 |
207 | =head2 is_meta |
208 | |
209 | A meta attribute (ha ha), which should be true if any of our 'special' |
210 | booleans are true. Implies that the reading does not represent a bit |
211 | of text found in a witness. |
212 | |
213 | =cut |
214 | |
215 | sub is_meta { |
216 | my $self = shift; |
12720144 |
217 | return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph; |
3a2ebbf4 |
218 | } |
219 | |
027d819c |
220 | =head1 Convenience methods |
221 | |
222 | =head2 related_readings |
223 | |
224 | Calls Collation's related_readings with $self as the first argument. |
225 | |
226 | =cut |
227 | |
3a2ebbf4 |
228 | sub related_readings { |
229 | my $self = shift; |
230 | return $self->collation->related_readings( $self, @_ ); |
231 | } |
232 | |
7f52eac8 |
233 | =head2 witnesses |
234 | |
235 | Calls Collation's reading_witnesses with $self as the first argument. |
236 | |
237 | =cut |
238 | |
239 | sub witnesses { |
240 | my $self = shift; |
241 | return $self->collation->reading_witnesses( $self, @_ ); |
242 | } |
243 | |
027d819c |
244 | =head2 predecessors |
245 | |
246 | Returns a list of Reading objects that immediately precede $self in the collation. |
247 | |
248 | =cut |
249 | |
22222af9 |
250 | sub predecessors { |
251 | my $self = shift; |
252 | my @pred = $self->collation->sequence->predecessors( $self->id ); |
253 | return map { $self->collation->reading( $_ ) } @pred; |
254 | } |
255 | |
027d819c |
256 | =head2 successors |
257 | |
258 | Returns a list of Reading objects that immediately follow $self in the collation. |
259 | |
260 | =cut |
261 | |
22222af9 |
262 | sub successors { |
263 | my $self = shift; |
264 | my @succ = $self->collation->sequence->successors( $self->id ); |
265 | return map { $self->collation->reading( $_ ) } @succ; |
266 | } |
267 | |
027d819c |
268 | =head2 set_identical( $other_reading) |
269 | |
270 | Backwards compatibility method, to add a transposition relationship |
271 | between $self and $other_reading. Don't use this. |
272 | |
273 | =cut |
274 | |
1d310495 |
275 | sub set_identical { |
276 | my( $self, $other ) = @_; |
277 | return $self->collation->add_relationship( $self, $other, |
278 | { 'type' => 'transposition' } ); |
279 | } |
280 | |
3a2ebbf4 |
281 | sub _stringify { |
282 | my $self = shift; |
283 | return $self->id; |
284 | } |
d047cd52 |
285 | |
4d9593df |
286 | =head1 MORPHOLOGY |
287 | |
7cd9f181 |
288 | Methods for the morphological information (if any) attached to readings. |
289 | A reading may be made up of multiple lexemes; the concatenated lexeme |
290 | strings ought to match the reading's normalized form. |
291 | |
292 | See L<Text::Tradition::Collation::Reading::Lexeme> for more information |
293 | on Lexeme objects and their attributes. |
294 | |
295 | =head2 has_lexemes |
296 | |
297 | Returns a true value if the reading has any attached lexemes. |
4d9593df |
298 | |
6ad2ce78 |
299 | =head2 lexemes |
06e7cbc7 |
300 | |
7cd9f181 |
301 | Returns the Lexeme objects (if any) attached to the reading. |
6ad2ce78 |
302 | |
303 | =head2 clear_lexemes |
304 | |
7cd9f181 |
305 | Wipes any associated Lexeme objects out of the reading. |
306 | |
307 | =head2 add_lexeme( $lexobj ) |
6ad2ce78 |
308 | |
7cd9f181 |
309 | Adds the Lexeme in $lexobj to the list of lexemes. |
310 | |
311 | =head2 lemmatize |
312 | |
313 | If the language of the reading is set, this method will use the appropriate |
314 | Language model to determine the lexemes that belong to this reading. See |
315 | L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition. |
06e7cbc7 |
316 | |
4d9593df |
317 | =cut |
318 | |
6ad2ce78 |
319 | sub lemmatize { |
320 | my $self = shift; |
321 | unless( $self->has_language ) { |
322 | warn "Please set a language to lemmatize a tradition"; |
323 | return; |
324 | } |
325 | my $mod = "Text::Tradition::Language::" . $self->language; |
326 | load( $mod ); |
327 | $mod->can( 'reading_lookup' )->( $self ); |
328 | |
329 | } |
4d9593df |
330 | |
7cd9f181 |
331 | # For graph serialization. Return a string representation of the associated |
332 | # reading lexemes. |
70745e70 |
333 | # TODO Push this in to the Lexeme package. |
7cd9f181 |
334 | sub _serialize_lexemes { |
335 | my $self = shift; |
c3e04fb5 |
336 | my @lexstrs; |
337 | foreach my $l ( $self->lexemes ) { |
338 | my @mf; |
339 | foreach my $wf ( $l->matching_forms ) { |
340 | push( @mf, $wf->to_string ); |
341 | } |
342 | my $form = $l->form ? $l->form->to_string : ''; |
343 | push( @lexstrs, join( '|L|', $l->language, $l->string, $form, |
344 | join( '|M|', @mf ) ) ); |
345 | } |
346 | return join( '|R|', @lexstrs ); |
7cd9f181 |
347 | } |
70745e70 |
348 | |
349 | sub _deserialize_lexemes { |
350 | my( $self, $data ) = @_; |
351 | return unless $data; |
352 | |
353 | # Need to have the lexeme modules in order to have lexemes. |
354 | eval { |
355 | use Text::Tradition::Collation::Reading::Lexeme; |
356 | use Text::Tradition::Collation::Reading::WordForm; |
357 | }; |
358 | throw( $@ ) if $@; |
359 | |
360 | # Good to go - add the lexemes. |
361 | my @lexemes; |
362 | foreach my $lexdata ( split( /\|R\|/, $data ) ) { |
363 | my( $lang, $lstring, $form, $allforms ) = split( /\|L\|/, $lexdata ); |
364 | my @wfdata; |
365 | push( @wfdata, $form ) if $form; |
366 | push( @wfdata, split( /\|M\|/, $allforms ) ); |
367 | my @wforms; |
368 | foreach my $wd ( @wfdata ) { |
369 | my $wf = Text::Tradition::Collation::Reading::WordForm->new( |
370 | 'serial' => $wd ); |
371 | push( @wforms, $wf ); |
372 | } |
373 | my %largs = ( 'language' => $lang, 'string' => $lstring ); |
374 | if( $form ) { |
375 | $largs{'form'} = shift @wforms; |
376 | $largs{'is_disambiguated'} = 1; |
377 | } |
378 | $largs{'wordform_matchlist'} = \@wforms; |
379 | push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new( %largs ) ); |
380 | } |
381 | $self->clear_lexemes; |
382 | $self->add_lexeme( @lexemes ); |
383 | } |
7cd9f181 |
384 | |
4d9593df |
385 | ## Utility methods |
386 | |
2acf0892 |
387 | sub TO_JSON { |
388 | my $self = shift; |
389 | return $self->text; |
390 | } |
391 | |
70745e70 |
392 | sub throw { |
393 | Text::Tradition::Error->throw( |
394 | 'ident' => 'Reading error', |
395 | 'message' => $_[0], |
396 | ); |
397 | } |
4d9593df |
398 | |
399 | no Moose; |
400 | __PACKAGE__->meta->make_immutable; |
401 | |
021bdbac |
402 | 1; |