Commit | Line | Data |
784877d9 |
1 | package Text::Tradition::Collation::Reading; |
2 | |
8e1394aa |
3 | use Moose; |
e4b0f464 |
4 | use overload '""' => \&_stringify, 'fallback' => 1; |
784877d9 |
5 | |
3a2ebbf4 |
6 | =head1 NAME |
784877d9 |
7 | |
4aea6e9b |
8 | Text::Tradition::Collation::Reading - represents a reading (usually a word) |
9 | in a collation. |
10 | |
3a2ebbf4 |
11 | =head1 DESCRIPTION |
784877d9 |
12 | |
3a2ebbf4 |
13 | Text::Tradition is a library for representation and analysis of collated |
14 | texts, particularly medieval ones. A 'reading' refers to a unit of text, |
15 | usually a word, that appears in one or more witnesses (manuscripts) of the |
16 | tradition; the text of a given witness is composed of a set of readings in |
17 | a particular sequence |
784877d9 |
18 | |
3a2ebbf4 |
19 | =head1 METHODS |
1ca1163d |
20 | |
3a2ebbf4 |
21 | =head2 new |
8e1394aa |
22 | |
4aea6e9b |
23 | Creates a new reading in the given collation with the given attributes. |
3a2ebbf4 |
24 | Options include: |
94c00c71 |
25 | |
3a2ebbf4 |
26 | =over 4 |
784877d9 |
27 | |
4aea6e9b |
28 | =item collation - The Text::Tradition::Collation object to which this |
29 | reading belongs. Required. |
e2902068 |
30 | |
3a2ebbf4 |
31 | =item id - A unique identifier for this reading. Required. |
910a0a6d |
32 | |
3a2ebbf4 |
33 | =item text - The word or other text of the reading. |
784877d9 |
34 | |
3a2ebbf4 |
35 | =item is_start - The reading is the starting point for the collation. |
3265b0ce |
36 | |
3a2ebbf4 |
37 | =item is_end - The reading is the ending point for the collation. |
784877d9 |
38 | |
3a2ebbf4 |
39 | =item is_lacuna - The 'reading' represents a known gap in the text. |
de51424a |
40 | |
4aea6e9b |
41 | =item is_ph - A temporary placeholder for apparatus parsing purposes. Do |
42 | not use unless you know what you are doing. |
12720144 |
43 | |
4aea6e9b |
44 | =item rank - The sequence number of the reading. This should probably not |
45 | be set manually. |
d047cd52 |
46 | |
3a2ebbf4 |
47 | =back |
8e1394aa |
48 | |
3a2ebbf4 |
49 | One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required. |
8e1394aa |
50 | |
3a2ebbf4 |
51 | =head2 collation |
94c00c71 |
52 | |
3a2ebbf4 |
53 | =head2 id |
94c00c71 |
54 | |
3a2ebbf4 |
55 | =head2 text |
4cdd82f1 |
56 | |
3a2ebbf4 |
57 | =head2 is_start |
4cdd82f1 |
58 | |
3a2ebbf4 |
59 | =head2 is_end |
4a8828f0 |
60 | |
3a2ebbf4 |
61 | =head2 is_lacuna |
4a8828f0 |
62 | |
3a2ebbf4 |
63 | =head2 rank |
4a8828f0 |
64 | |
3a2ebbf4 |
65 | Accessor methods for the given attributes. |
d047cd52 |
66 | |
3a2ebbf4 |
67 | =cut |
d047cd52 |
68 | |
3a2ebbf4 |
69 | has 'collation' => ( |
70 | is => 'ro', |
71 | isa => 'Text::Tradition::Collation', |
72 | # required => 1, |
73 | weak_ref => 1, |
74 | ); |
d047cd52 |
75 | |
3a2ebbf4 |
76 | has 'id' => ( |
77 | is => 'ro', |
78 | isa => 'Str', |
79 | required => 1, |
80 | ); |
d047cd52 |
81 | |
3a2ebbf4 |
82 | has 'text' => ( |
83 | is => 'ro', |
84 | isa => 'Str', |
85 | required => 1, |
49d4f2ac |
86 | writer => 'alter_text', |
3a2ebbf4 |
87 | ); |
0e47f4f6 |
88 | |
fae52efd |
89 | has 'language' => ( |
90 | is => 'ro', |
91 | isa => 'Str', |
92 | default => 'Default', |
93 | ); |
94 | |
3a2ebbf4 |
95 | has 'is_start' => ( |
96 | is => 'ro', |
97 | isa => 'Bool', |
98 | default => undef, |
99 | ); |
100 | |
101 | has 'is_end' => ( |
102 | is => 'ro', |
103 | isa => 'Bool', |
104 | default => undef, |
105 | ); |
106 | |
107 | has 'is_lacuna' => ( |
108 | is => 'ro', |
109 | isa => 'Bool', |
110 | default => undef, |
111 | ); |
12720144 |
112 | |
113 | has 'is_ph' => ( |
114 | is => 'ro', |
115 | isa => 'Bool', |
116 | default => undef, |
117 | ); |
d4b75f44 |
118 | |
119 | has 'is_common' => ( |
120 | is => 'rw', |
121 | isa => 'Bool', |
122 | default => undef, |
123 | ); |
3a2ebbf4 |
124 | |
125 | has 'rank' => ( |
126 | is => 'rw', |
127 | isa => 'Int', |
128 | predicate => 'has_rank', |
ca6e6095 |
129 | clearer => 'clear_rank', |
3a2ebbf4 |
130 | ); |
fd602649 |
131 | |
132 | ## For morphological analysis |
133 | |
134 | has 'normal_form' => ( |
135 | is => 'rw', |
136 | isa => 'Str', |
137 | predicate => 'has_normal_form', |
138 | ); |
139 | |
140 | has 'lemma' => ( |
141 | is => 'rw', |
142 | isa => 'Str', |
143 | predicate => 'has_lemma', |
144 | ); |
145 | |
146 | has 'morphology' => ( |
4d9593df |
147 | traits => ['Array'], |
148 | isa => 'ArrayRef[HashRef[ArrayRef[Text::Tradition::Collation::Reading::Morphology]]]', |
149 | handles => { |
150 | lexemes => 'elements', |
151 | has_morphology => 'count', |
152 | _clear_morph => 'clear', |
153 | _add_morph => 'push', |
154 | }, |
fd602649 |
155 | ); |
156 | |
629e27b0 |
157 | ## For prefix/suffix readings |
158 | |
159 | has 'join_prior' => ( |
160 | is => 'ro', |
161 | isa => 'Bool', |
162 | default => undef, |
163 | ); |
164 | |
165 | has 'join_next' => ( |
166 | is => 'ro', |
167 | isa => 'Bool', |
168 | default => undef, |
169 | ); |
170 | |
3a2ebbf4 |
171 | |
172 | around BUILDARGS => sub { |
173 | my $orig = shift; |
174 | my $class = shift; |
175 | my $args; |
176 | if( @_ == 1 ) { |
177 | $args = shift; |
178 | } else { |
179 | $args = { @_ }; |
180 | } |
b0b4421a |
181 | |
3a2ebbf4 |
182 | # If one of our special booleans is set, we change the text and the |
183 | # ID to match. |
1d310495 |
184 | if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) { |
56eefa04 |
185 | $args->{'text'} = '#LACUNA#'; |
3a2ebbf4 |
186 | } elsif( exists $args->{'is_start'} ) { |
187 | $args->{'id'} = '#START#'; # Change the ID to ensure we have only one |
188 | $args->{'text'} = '#START#'; |
189 | $args->{'rank'} = 0; |
190 | } elsif( exists $args->{'is_end'} ) { |
191 | $args->{'id'} = '#END#'; # Change the ID to ensure we have only one |
192 | $args->{'text'} = '#END#'; |
12720144 |
193 | } elsif( exists $args->{'is_ph'} ) { |
194 | $args->{'text'} = $args->{'id'}; |
3a2ebbf4 |
195 | } |
196 | |
197 | $class->$orig( $args ); |
198 | }; |
199 | |
200 | =head2 is_meta |
201 | |
202 | A meta attribute (ha ha), which should be true if any of our 'special' |
203 | booleans are true. Implies that the reading does not represent a bit |
204 | of text found in a witness. |
205 | |
206 | =cut |
207 | |
208 | sub is_meta { |
209 | my $self = shift; |
12720144 |
210 | return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph; |
3a2ebbf4 |
211 | } |
212 | |
027d819c |
213 | =head1 Convenience methods |
214 | |
215 | =head2 related_readings |
216 | |
217 | Calls Collation's related_readings with $self as the first argument. |
218 | |
219 | =cut |
220 | |
3a2ebbf4 |
221 | sub related_readings { |
222 | my $self = shift; |
223 | return $self->collation->related_readings( $self, @_ ); |
224 | } |
225 | |
7f52eac8 |
226 | =head2 witnesses |
227 | |
228 | Calls Collation's reading_witnesses with $self as the first argument. |
229 | |
230 | =cut |
231 | |
232 | sub witnesses { |
233 | my $self = shift; |
234 | return $self->collation->reading_witnesses( $self, @_ ); |
235 | } |
236 | |
027d819c |
237 | =head2 predecessors |
238 | |
239 | Returns a list of Reading objects that immediately precede $self in the collation. |
240 | |
241 | =cut |
242 | |
22222af9 |
243 | sub predecessors { |
244 | my $self = shift; |
245 | my @pred = $self->collation->sequence->predecessors( $self->id ); |
246 | return map { $self->collation->reading( $_ ) } @pred; |
247 | } |
248 | |
027d819c |
249 | =head2 successors |
250 | |
251 | Returns a list of Reading objects that immediately follow $self in the collation. |
252 | |
253 | =cut |
254 | |
22222af9 |
255 | sub successors { |
256 | my $self = shift; |
257 | my @succ = $self->collation->sequence->successors( $self->id ); |
258 | return map { $self->collation->reading( $_ ) } @succ; |
259 | } |
260 | |
027d819c |
261 | =head2 set_identical( $other_reading) |
262 | |
263 | Backwards compatibility method, to add a transposition relationship |
264 | between $self and $other_reading. Don't use this. |
265 | |
266 | =cut |
267 | |
1d310495 |
268 | sub set_identical { |
269 | my( $self, $other ) = @_; |
270 | return $self->collation->add_relationship( $self, $other, |
271 | { 'type' => 'transposition' } ); |
272 | } |
273 | |
3a2ebbf4 |
274 | sub _stringify { |
275 | my $self = shift; |
276 | return $self->id; |
277 | } |
d047cd52 |
278 | |
4d9593df |
279 | =head1 MORPHOLOGY |
280 | |
281 | A few methods to try to tack on morphological information. |
282 | |
283 | =head2 is_disambiguated |
284 | |
285 | Returns true if there is only one tag per lexeme in this reading. |
286 | |
06e7cbc7 |
287 | =head2 use_lexemes |
288 | |
289 | TBD |
290 | |
291 | =head2 add_morphological_tag |
292 | |
293 | TBD |
294 | |
295 | =head2 disambiguate |
296 | |
297 | TBD |
298 | |
4d9593df |
299 | =cut |
300 | |
301 | sub use_lexemes { |
302 | my( $self, @lexemes ) = @_; |
303 | # The lexemes need to be the same as $self->text. |
304 | my $cmpstr = $self->has_normal_form ? lc( $self->normal_form ) : lc( $self->text ); |
305 | $cmpstr =~ s/[\s-]+//g; |
306 | my $lexstr = lc( join( '', @lexemes ) ); |
307 | $lexstr =~ s/[\s-]+//g; |
308 | unless( $lexstr eq $cmpstr ) { |
309 | warn "Cannot split " . $self->text . " into " . join( '.', @lexemes ); |
310 | return; |
311 | } |
312 | $self->_clear_morph; |
313 | map { $self->_add_morph( { $_ => [] } ) } @lexemes; |
314 | } |
315 | |
316 | sub add_morphological_tag { |
317 | my( $self, $lexeme, $opts ) = @_; |
318 | my $struct; |
319 | unless( $opts ) { |
320 | # No lexeme was passed; use reading text. |
321 | $opts = $lexeme; |
322 | $lexeme = $self->text; |
323 | $self->use_lexemes( $lexeme ); |
324 | } |
325 | # Get the correct container |
326 | ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes; |
327 | unless( $struct ) { |
328 | warn "No lexeme $lexeme exists in this reading"; |
329 | return; |
330 | } |
331 | # Now make the morph object and add it to this lexeme. |
332 | my $morph_obj = Text::Tradition::Collation::Reading::Morphology->new( $opts ); |
333 | # TODO Check for existence |
334 | push( @{$struct->{$lexeme}}, $morph_obj ); |
335 | } |
336 | |
337 | sub disambiguate { |
338 | my( $self, $lexeme, $index ) = @_; |
339 | my $struct; |
340 | unless( $index ) { |
341 | # No lexeme was passed; use reading text. |
342 | $index = $lexeme; |
343 | $lexeme = $self->text; |
344 | } |
345 | # Get the correct container |
346 | ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes; |
347 | unless( $struct ) { |
348 | warn "No lexeme $lexeme exists in this reading"; |
349 | return; |
350 | } |
351 | # Keep the object at the selected index |
352 | my $selected = $struct->{$lexeme}->[$index]; |
353 | $struct->{$lexeme} = [ $selected ]; |
354 | } |
355 | |
356 | sub is_disambiguated { |
357 | my $self = shift; |
358 | return undef unless $self->has_morphology; |
359 | foreach my $lexeme ( $self->lexemes ) { |
360 | my( $key ) = keys %$lexeme; # will be only one |
361 | return undef unless @{$lexeme->{$key}} == 1; |
362 | } |
363 | return 1; |
364 | } |
365 | |
366 | ## Utility methods |
367 | |
2acf0892 |
368 | sub TO_JSON { |
369 | my $self = shift; |
370 | return $self->text; |
371 | } |
372 | |
4d9593df |
373 | ## TODO will need a throw() here |
374 | |
375 | no Moose; |
376 | __PACKAGE__->meta->make_immutable; |
377 | |
378 | ################################################### |
379 | ### Morphology objects, to be attached to readings |
380 | ################################################### |
381 | |
382 | package Text::Tradition::Collation::Reading::Morphology; |
383 | |
384 | use Moose; |
385 | |
386 | has 'lemma' => ( |
387 | is => 'ro', |
388 | isa => 'Str', |
389 | required => 1, |
390 | ); |
391 | |
392 | has 'code' => ( |
393 | is => 'ro', |
394 | isa => 'Str', |
395 | required => 1, |
396 | ); |
397 | |
398 | has 'language' => ( |
399 | is => 'ro', |
400 | isa => 'Str', |
401 | required => 1, |
402 | ); |
403 | |
404 | ## Transmute codes into comparison arrays for our various languages. |
405 | |
406 | around BUILDARGS => sub { |
407 | my $orig = shift; |
408 | my $class = shift; |
409 | my $args; |
410 | if( @_ == 1 && ref( $_[0] ) ) { |
411 | $args = shift; |
412 | } else { |
413 | $args = { @_ }; |
414 | } |
415 | if( exists( $args->{'serial'} ) ) { |
416 | my( $lemma, $code ) = split( /!!/, delete $args->{'serial'} ); |
417 | $args->{'lemma'} = $lemma; |
418 | $args->{'code'} = $code; |
419 | } |
420 | $class->$orig( $args ); |
421 | }; |
422 | |
423 | sub serialization { |
424 | my $self = shift; |
425 | return join( '!!', $self->lemma, $self->code ); |
426 | }; |
427 | |
428 | sub comparison_array { |
429 | my $self = shift; |
430 | if( $self->language eq 'French' ) { |
431 | my @array; |
432 | my @bits = split( /\+/, $self->code ); |
433 | # First push the non k/v parts. |
434 | while( @bits && $bits[0] !~ /=/ ) { |
435 | push( @array, shift @bits ); |
436 | } |
437 | while( @array < 2 ) { |
438 | push( @array, undef ); |
439 | } |
440 | # Now push the k/v parts in a known order. |
441 | my @fields = qw/ Pers Nb Temps Genre Spec Fonc /; |
442 | my %props; |
443 | map { my( $k, $v ) = split( /=/, $_ ); $props{$k} = $v; } @bits; |
444 | foreach my $k ( @fields ) { |
445 | push( @array, $props{$k} ); |
446 | } |
447 | # Give the answer. |
448 | return @array; |
449 | } elsif( $self->language eq 'English' ) { |
450 | # Do something as yet undetermined |
451 | } else { |
452 | # Latin or Greek or Armenian, just split the chars |
453 | return split( '', $self->code ); |
454 | } |
455 | }; |
456 | |
021bdbac |
457 | no Moose; |
458 | __PACKAGE__->meta->make_immutable; |
d047cd52 |
459 | |
021bdbac |
460 | 1; |
d047cd52 |
461 | |