wrap documentation strings
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
e4b0f464 4use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 5
3a2ebbf4 6=head1 NAME
784877d9 7
4aea6e9b 8Text::Tradition::Collation::Reading - represents a reading (usually a word)
9in a collation.
10
3a2ebbf4 11=head1 DESCRIPTION
784877d9 12
3a2ebbf4 13Text::Tradition is a library for representation and analysis of collated
14texts, particularly medieval ones. A 'reading' refers to a unit of text,
15usually a word, that appears in one or more witnesses (manuscripts) of the
16tradition; the text of a given witness is composed of a set of readings in
17a particular sequence
784877d9 18
3a2ebbf4 19=head1 METHODS
1ca1163d 20
3a2ebbf4 21=head2 new
8e1394aa 22
4aea6e9b 23Creates a new reading in the given collation with the given attributes.
3a2ebbf4 24Options include:
94c00c71 25
3a2ebbf4 26=over 4
784877d9 27
4aea6e9b 28=item collation - The Text::Tradition::Collation object to which this
29reading belongs. Required.
e2902068 30
3a2ebbf4 31=item id - A unique identifier for this reading. Required.
910a0a6d 32
3a2ebbf4 33=item text - The word or other text of the reading.
784877d9 34
3a2ebbf4 35=item is_start - The reading is the starting point for the collation.
3265b0ce 36
3a2ebbf4 37=item is_end - The reading is the ending point for the collation.
784877d9 38
3a2ebbf4 39=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 40
4aea6e9b 41=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
42not use unless you know what you are doing.
12720144 43
4aea6e9b 44=item rank - The sequence number of the reading. This should probably not
45be set manually.
d047cd52 46
3a2ebbf4 47=back
8e1394aa 48
3a2ebbf4 49One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 50
3a2ebbf4 51=head2 collation
94c00c71 52
3a2ebbf4 53=head2 id
94c00c71 54
3a2ebbf4 55=head2 text
4cdd82f1 56
3a2ebbf4 57=head2 is_start
4cdd82f1 58
3a2ebbf4 59=head2 is_end
4a8828f0 60
3a2ebbf4 61=head2 is_lacuna
4a8828f0 62
3a2ebbf4 63=head2 rank
4a8828f0 64
3a2ebbf4 65Accessor methods for the given attributes.
d047cd52 66
3a2ebbf4 67=cut
d047cd52 68
3a2ebbf4 69has 'collation' => (
70 is => 'ro',
71 isa => 'Text::Tradition::Collation',
72 # required => 1,
73 weak_ref => 1,
74 );
d047cd52 75
3a2ebbf4 76has 'id' => (
77 is => 'ro',
78 isa => 'Str',
79 required => 1,
80 );
d047cd52 81
3a2ebbf4 82has 'text' => (
83 is => 'ro',
84 isa => 'Str',
85 required => 1,
49d4f2ac 86 writer => 'alter_text',
3a2ebbf4 87 );
0e47f4f6 88
fae52efd 89has 'language' => (
90 is => 'ro',
91 isa => 'Str',
92 default => 'Default',
93 );
94
3a2ebbf4 95has 'is_start' => (
96 is => 'ro',
97 isa => 'Bool',
98 default => undef,
99 );
100
101has 'is_end' => (
102 is => 'ro',
103 isa => 'Bool',
104 default => undef,
105 );
106
107has 'is_lacuna' => (
108 is => 'ro',
109 isa => 'Bool',
110 default => undef,
111 );
12720144 112
113has 'is_ph' => (
114 is => 'ro',
115 isa => 'Bool',
116 default => undef,
117 );
d4b75f44 118
119has 'is_common' => (
120 is => 'rw',
121 isa => 'Bool',
122 default => undef,
123 );
3a2ebbf4 124
125has 'rank' => (
126 is => 'rw',
127 isa => 'Int',
128 predicate => 'has_rank',
ca6e6095 129 clearer => 'clear_rank',
3a2ebbf4 130 );
fd602649 131
132## For morphological analysis
133
134has 'normal_form' => (
135 is => 'rw',
136 isa => 'Str',
137 predicate => 'has_normal_form',
138 );
139
140has 'lemma' => (
141 is => 'rw',
142 isa => 'Str',
143 predicate => 'has_lemma',
144 );
145
146has 'morphology' => (
4d9593df 147 traits => ['Array'],
148 isa => 'ArrayRef[HashRef[ArrayRef[Text::Tradition::Collation::Reading::Morphology]]]',
149 handles => {
150 lexemes => 'elements',
151 has_morphology => 'count',
152 _clear_morph => 'clear',
153 _add_morph => 'push',
154 },
fd602649 155 );
156
629e27b0 157## For prefix/suffix readings
158
159has 'join_prior' => (
160 is => 'ro',
161 isa => 'Bool',
162 default => undef,
163 );
164
165has 'join_next' => (
166 is => 'ro',
167 isa => 'Bool',
168 default => undef,
169 );
170
3a2ebbf4 171
172around BUILDARGS => sub {
173 my $orig = shift;
174 my $class = shift;
175 my $args;
176 if( @_ == 1 ) {
177 $args = shift;
178 } else {
179 $args = { @_ };
180 }
b0b4421a 181
3a2ebbf4 182 # If one of our special booleans is set, we change the text and the
183 # ID to match.
1d310495 184 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 185 $args->{'text'} = '#LACUNA#';
3a2ebbf4 186 } elsif( exists $args->{'is_start'} ) {
187 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
188 $args->{'text'} = '#START#';
189 $args->{'rank'} = 0;
190 } elsif( exists $args->{'is_end'} ) {
191 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
192 $args->{'text'} = '#END#';
12720144 193 } elsif( exists $args->{'is_ph'} ) {
194 $args->{'text'} = $args->{'id'};
3a2ebbf4 195 }
196
197 $class->$orig( $args );
198};
199
200=head2 is_meta
201
202A meta attribute (ha ha), which should be true if any of our 'special'
203booleans are true. Implies that the reading does not represent a bit
204of text found in a witness.
205
206=cut
207
208sub is_meta {
209 my $self = shift;
12720144 210 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 211}
212
027d819c 213=head1 Convenience methods
214
215=head2 related_readings
216
217Calls Collation's related_readings with $self as the first argument.
218
219=cut
220
3a2ebbf4 221sub related_readings {
222 my $self = shift;
223 return $self->collation->related_readings( $self, @_ );
224}
225
7f52eac8 226=head2 witnesses
227
228Calls Collation's reading_witnesses with $self as the first argument.
229
230=cut
231
232sub witnesses {
233 my $self = shift;
234 return $self->collation->reading_witnesses( $self, @_ );
235}
236
027d819c 237=head2 predecessors
238
239Returns a list of Reading objects that immediately precede $self in the collation.
240
241=cut
242
22222af9 243sub predecessors {
244 my $self = shift;
245 my @pred = $self->collation->sequence->predecessors( $self->id );
246 return map { $self->collation->reading( $_ ) } @pred;
247}
248
027d819c 249=head2 successors
250
251Returns a list of Reading objects that immediately follow $self in the collation.
252
253=cut
254
22222af9 255sub successors {
256 my $self = shift;
257 my @succ = $self->collation->sequence->successors( $self->id );
258 return map { $self->collation->reading( $_ ) } @succ;
259}
260
027d819c 261=head2 set_identical( $other_reading)
262
263Backwards compatibility method, to add a transposition relationship
264between $self and $other_reading. Don't use this.
265
266=cut
267
1d310495 268sub set_identical {
269 my( $self, $other ) = @_;
270 return $self->collation->add_relationship( $self, $other,
271 { 'type' => 'transposition' } );
272}
273
3a2ebbf4 274sub _stringify {
275 my $self = shift;
276 return $self->id;
277}
d047cd52 278
4d9593df 279=head1 MORPHOLOGY
280
281A few methods to try to tack on morphological information.
282
283=head2 is_disambiguated
284
285Returns true if there is only one tag per lexeme in this reading.
286
06e7cbc7 287=head2 use_lexemes
288
289TBD
290
291=head2 add_morphological_tag
292
293TBD
294
295=head2 disambiguate
296
297TBD
298
4d9593df 299=cut
300
301sub use_lexemes {
302 my( $self, @lexemes ) = @_;
303 # The lexemes need to be the same as $self->text.
304 my $cmpstr = $self->has_normal_form ? lc( $self->normal_form ) : lc( $self->text );
305 $cmpstr =~ s/[\s-]+//g;
306 my $lexstr = lc( join( '', @lexemes ) );
307 $lexstr =~ s/[\s-]+//g;
308 unless( $lexstr eq $cmpstr ) {
309 warn "Cannot split " . $self->text . " into " . join( '.', @lexemes );
310 return;
311 }
312 $self->_clear_morph;
313 map { $self->_add_morph( { $_ => [] } ) } @lexemes;
314}
315
316sub add_morphological_tag {
317 my( $self, $lexeme, $opts ) = @_;
318 my $struct;
319 unless( $opts ) {
320 # No lexeme was passed; use reading text.
321 $opts = $lexeme;
322 $lexeme = $self->text;
323 $self->use_lexemes( $lexeme );
324 }
325 # Get the correct container
326 ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
327 unless( $struct ) {
328 warn "No lexeme $lexeme exists in this reading";
329 return;
330 }
331 # Now make the morph object and add it to this lexeme.
332 my $morph_obj = Text::Tradition::Collation::Reading::Morphology->new( $opts );
333 # TODO Check for existence
334 push( @{$struct->{$lexeme}}, $morph_obj );
335}
336
337sub disambiguate {
338 my( $self, $lexeme, $index ) = @_;
339 my $struct;
340 unless( $index ) {
341 # No lexeme was passed; use reading text.
342 $index = $lexeme;
343 $lexeme = $self->text;
344 }
345 # Get the correct container
346 ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
347 unless( $struct ) {
348 warn "No lexeme $lexeme exists in this reading";
349 return;
350 }
351 # Keep the object at the selected index
352 my $selected = $struct->{$lexeme}->[$index];
353 $struct->{$lexeme} = [ $selected ];
354}
355
356sub is_disambiguated {
357 my $self = shift;
358 return undef unless $self->has_morphology;
359 foreach my $lexeme ( $self->lexemes ) {
360 my( $key ) = keys %$lexeme; # will be only one
361 return undef unless @{$lexeme->{$key}} == 1;
362 }
363 return 1;
364}
365
366## Utility methods
367
2acf0892 368sub TO_JSON {
369 my $self = shift;
370 return $self->text;
371}
372
4d9593df 373## TODO will need a throw() here
374
375no Moose;
376__PACKAGE__->meta->make_immutable;
377
378###################################################
379### Morphology objects, to be attached to readings
380###################################################
381
382package Text::Tradition::Collation::Reading::Morphology;
383
384use Moose;
385
386has 'lemma' => (
387 is => 'ro',
388 isa => 'Str',
389 required => 1,
390 );
391
392has 'code' => (
393 is => 'ro',
394 isa => 'Str',
395 required => 1,
396 );
397
398has 'language' => (
399 is => 'ro',
400 isa => 'Str',
401 required => 1,
402 );
403
404## Transmute codes into comparison arrays for our various languages.
405
406around BUILDARGS => sub {
407 my $orig = shift;
408 my $class = shift;
409 my $args;
410 if( @_ == 1 && ref( $_[0] ) ) {
411 $args = shift;
412 } else {
413 $args = { @_ };
414 }
415 if( exists( $args->{'serial'} ) ) {
416 my( $lemma, $code ) = split( /!!/, delete $args->{'serial'} );
417 $args->{'lemma'} = $lemma;
418 $args->{'code'} = $code;
419 }
420 $class->$orig( $args );
421};
422
423sub serialization {
424 my $self = shift;
425 return join( '!!', $self->lemma, $self->code );
426};
427
428sub comparison_array {
429 my $self = shift;
430 if( $self->language eq 'French' ) {
431 my @array;
432 my @bits = split( /\+/, $self->code );
433 # First push the non k/v parts.
434 while( @bits && $bits[0] !~ /=/ ) {
435 push( @array, shift @bits );
436 }
437 while( @array < 2 ) {
438 push( @array, undef );
439 }
440 # Now push the k/v parts in a known order.
441 my @fields = qw/ Pers Nb Temps Genre Spec Fonc /;
442 my %props;
443 map { my( $k, $v ) = split( /=/, $_ ); $props{$k} = $v; } @bits;
444 foreach my $k ( @fields ) {
445 push( @array, $props{$k} );
446 }
447 # Give the answer.
448 return @array;
449 } elsif( $self->language eq 'English' ) {
450 # Do something as yet undetermined
451 } else {
452 # Latin or Greek or Armenian, just split the chars
453 return split( '', $self->code );
454 }
455};
456
021bdbac 457no Moose;
458__PACKAGE__->meta->make_immutable;
d047cd52 459
021bdbac 4601;
d047cd52 461