replace collation relationships whenever we can
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
e4b0f464 4use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 5
3a2ebbf4 6=head1 NAME
784877d9 7
3a2ebbf4 8Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
eca16057 9
3a2ebbf4 10=head1 DESCRIPTION
784877d9 11
3a2ebbf4 12Text::Tradition is a library for representation and analysis of collated
13texts, particularly medieval ones. A 'reading' refers to a unit of text,
14usually a word, that appears in one or more witnesses (manuscripts) of the
15tradition; the text of a given witness is composed of a set of readings in
16a particular sequence
784877d9 17
3a2ebbf4 18=head1 METHODS
1ca1163d 19
3a2ebbf4 20=head2 new
8e1394aa 21
3a2ebbf4 22Creates a new reading in the given collation with the given attributes.
23Options include:
94c00c71 24
3a2ebbf4 25=over 4
784877d9 26
3a2ebbf4 27=item collation - The Text::Tradition::Collation object to which this reading belongs. Required.
e2902068 28
3a2ebbf4 29=item id - A unique identifier for this reading. Required.
910a0a6d 30
3a2ebbf4 31=item text - The word or other text of the reading.
784877d9 32
3a2ebbf4 33=item is_start - The reading is the starting point for the collation.
3265b0ce 34
3a2ebbf4 35=item is_end - The reading is the ending point for the collation.
784877d9 36
3a2ebbf4 37=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 38
12720144 39=item is_ph - A temporary placeholder for apparatus parsing purposes. Do not use unless you know what you are doing.
40
3a2ebbf4 41=item rank - The sequence number of the reading. This should probably not be set manually.
d047cd52 42
3a2ebbf4 43=back
8e1394aa 44
3a2ebbf4 45One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 46
3a2ebbf4 47=head2 collation
94c00c71 48
3a2ebbf4 49=head2 id
94c00c71 50
3a2ebbf4 51=head2 text
4cdd82f1 52
3a2ebbf4 53=head2 is_start
4cdd82f1 54
3a2ebbf4 55=head2 is_end
4a8828f0 56
3a2ebbf4 57=head2 is_lacuna
4a8828f0 58
3a2ebbf4 59=head2 rank
4a8828f0 60
3a2ebbf4 61Accessor methods for the given attributes.
d047cd52 62
3a2ebbf4 63=cut
d047cd52 64
3a2ebbf4 65has 'collation' => (
66 is => 'ro',
67 isa => 'Text::Tradition::Collation',
68 # required => 1,
69 weak_ref => 1,
70 );
d047cd52 71
3a2ebbf4 72has 'id' => (
73 is => 'ro',
74 isa => 'Str',
75 required => 1,
76 );
d047cd52 77
3a2ebbf4 78has 'text' => (
79 is => 'ro',
80 isa => 'Str',
81 required => 1,
49d4f2ac 82 writer => 'alter_text',
3a2ebbf4 83 );
0e47f4f6 84
3a2ebbf4 85has 'is_start' => (
86 is => 'ro',
87 isa => 'Bool',
88 default => undef,
89 );
90
91has 'is_end' => (
92 is => 'ro',
93 isa => 'Bool',
94 default => undef,
95 );
96
97has 'is_lacuna' => (
98 is => 'ro',
99 isa => 'Bool',
100 default => undef,
101 );
12720144 102
103has 'is_ph' => (
104 is => 'ro',
105 isa => 'Bool',
106 default => undef,
107 );
d4b75f44 108
109has 'is_common' => (
110 is => 'rw',
111 isa => 'Bool',
112 default => undef,
113 );
3a2ebbf4 114
115has 'rank' => (
116 is => 'rw',
117 isa => 'Int',
118 predicate => 'has_rank',
ca6e6095 119 clearer => 'clear_rank',
3a2ebbf4 120 );
fd602649 121
122## For morphological analysis
123
124has 'normal_form' => (
125 is => 'rw',
126 isa => 'Str',
127 predicate => 'has_normal_form',
128 );
129
130has 'lemma' => (
131 is => 'rw',
132 isa => 'Str',
133 predicate => 'has_lemma',
134 );
135
136has 'morphology' => (
4d9593df 137 traits => ['Array'],
138 isa => 'ArrayRef[HashRef[ArrayRef[Text::Tradition::Collation::Reading::Morphology]]]',
139 handles => {
140 lexemes => 'elements',
141 has_morphology => 'count',
142 _clear_morph => 'clear',
143 _add_morph => 'push',
144 },
fd602649 145 );
146
629e27b0 147## For prefix/suffix readings
148
149has 'join_prior' => (
150 is => 'ro',
151 isa => 'Bool',
152 default => undef,
153 );
154
155has 'join_next' => (
156 is => 'ro',
157 isa => 'Bool',
158 default => undef,
159 );
160
3a2ebbf4 161
162around BUILDARGS => sub {
163 my $orig = shift;
164 my $class = shift;
165 my $args;
166 if( @_ == 1 ) {
167 $args = shift;
168 } else {
169 $args = { @_ };
170 }
b0b4421a 171
3a2ebbf4 172 # If one of our special booleans is set, we change the text and the
173 # ID to match.
1d310495 174 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 175 $args->{'text'} = '#LACUNA#';
3a2ebbf4 176 } elsif( exists $args->{'is_start'} ) {
177 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
178 $args->{'text'} = '#START#';
179 $args->{'rank'} = 0;
180 } elsif( exists $args->{'is_end'} ) {
181 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
182 $args->{'text'} = '#END#';
12720144 183 } elsif( exists $args->{'is_ph'} ) {
184 $args->{'text'} = $args->{'id'};
3a2ebbf4 185 }
186
187 $class->$orig( $args );
188};
189
190=head2 is_meta
191
192A meta attribute (ha ha), which should be true if any of our 'special'
193booleans are true. Implies that the reading does not represent a bit
194of text found in a witness.
195
196=cut
197
198sub is_meta {
199 my $self = shift;
12720144 200 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 201}
202
027d819c 203=head1 Convenience methods
204
205=head2 related_readings
206
207Calls Collation's related_readings with $self as the first argument.
208
209=cut
210
3a2ebbf4 211sub related_readings {
212 my $self = shift;
213 return $self->collation->related_readings( $self, @_ );
214}
215
7f52eac8 216=head2 witnesses
217
218Calls Collation's reading_witnesses with $self as the first argument.
219
220=cut
221
222sub witnesses {
223 my $self = shift;
224 return $self->collation->reading_witnesses( $self, @_ );
225}
226
027d819c 227=head2 predecessors
228
229Returns a list of Reading objects that immediately precede $self in the collation.
230
231=cut
232
22222af9 233sub predecessors {
234 my $self = shift;
235 my @pred = $self->collation->sequence->predecessors( $self->id );
236 return map { $self->collation->reading( $_ ) } @pred;
237}
238
027d819c 239=head2 successors
240
241Returns a list of Reading objects that immediately follow $self in the collation.
242
243=cut
244
22222af9 245sub successors {
246 my $self = shift;
247 my @succ = $self->collation->sequence->successors( $self->id );
248 return map { $self->collation->reading( $_ ) } @succ;
249}
250
027d819c 251=head2 set_identical( $other_reading)
252
253Backwards compatibility method, to add a transposition relationship
254between $self and $other_reading. Don't use this.
255
256=cut
257
1d310495 258sub set_identical {
259 my( $self, $other ) = @_;
260 return $self->collation->add_relationship( $self, $other,
261 { 'type' => 'transposition' } );
262}
263
3a2ebbf4 264sub _stringify {
265 my $self = shift;
266 return $self->id;
267}
d047cd52 268
4d9593df 269=head1 MORPHOLOGY
270
271A few methods to try to tack on morphological information.
272
273=head2 is_disambiguated
274
275Returns true if there is only one tag per lexeme in this reading.
276
277=cut
278
279sub use_lexemes {
280 my( $self, @lexemes ) = @_;
281 # The lexemes need to be the same as $self->text.
282 my $cmpstr = $self->has_normal_form ? lc( $self->normal_form ) : lc( $self->text );
283 $cmpstr =~ s/[\s-]+//g;
284 my $lexstr = lc( join( '', @lexemes ) );
285 $lexstr =~ s/[\s-]+//g;
286 unless( $lexstr eq $cmpstr ) {
287 warn "Cannot split " . $self->text . " into " . join( '.', @lexemes );
288 return;
289 }
290 $self->_clear_morph;
291 map { $self->_add_morph( { $_ => [] } ) } @lexemes;
292}
293
294sub add_morphological_tag {
295 my( $self, $lexeme, $opts ) = @_;
296 my $struct;
297 unless( $opts ) {
298 # No lexeme was passed; use reading text.
299 $opts = $lexeme;
300 $lexeme = $self->text;
301 $self->use_lexemes( $lexeme );
302 }
303 # Get the correct container
304 ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
305 unless( $struct ) {
306 warn "No lexeme $lexeme exists in this reading";
307 return;
308 }
309 # Now make the morph object and add it to this lexeme.
310 my $morph_obj = Text::Tradition::Collation::Reading::Morphology->new( $opts );
311 # TODO Check for existence
312 push( @{$struct->{$lexeme}}, $morph_obj );
313}
314
315sub disambiguate {
316 my( $self, $lexeme, $index ) = @_;
317 my $struct;
318 unless( $index ) {
319 # No lexeme was passed; use reading text.
320 $index = $lexeme;
321 $lexeme = $self->text;
322 }
323 # Get the correct container
324 ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
325 unless( $struct ) {
326 warn "No lexeme $lexeme exists in this reading";
327 return;
328 }
329 # Keep the object at the selected index
330 my $selected = $struct->{$lexeme}->[$index];
331 $struct->{$lexeme} = [ $selected ];
332}
333
334sub is_disambiguated {
335 my $self = shift;
336 return undef unless $self->has_morphology;
337 foreach my $lexeme ( $self->lexemes ) {
338 my( $key ) = keys %$lexeme; # will be only one
339 return undef unless @{$lexeme->{$key}} == 1;
340 }
341 return 1;
342}
343
344## Utility methods
345
2acf0892 346sub TO_JSON {
347 my $self = shift;
348 return $self->text;
349}
350
4d9593df 351## TODO will need a throw() here
352
353no Moose;
354__PACKAGE__->meta->make_immutable;
355
356###################################################
357### Morphology objects, to be attached to readings
358###################################################
359
360package Text::Tradition::Collation::Reading::Morphology;
361
362use Moose;
363
364has 'lemma' => (
365 is => 'ro',
366 isa => 'Str',
367 required => 1,
368 );
369
370has 'code' => (
371 is => 'ro',
372 isa => 'Str',
373 required => 1,
374 );
375
376has 'language' => (
377 is => 'ro',
378 isa => 'Str',
379 required => 1,
380 );
381
382## Transmute codes into comparison arrays for our various languages.
383
384around BUILDARGS => sub {
385 my $orig = shift;
386 my $class = shift;
387 my $args;
388 if( @_ == 1 && ref( $_[0] ) ) {
389 $args = shift;
390 } else {
391 $args = { @_ };
392 }
393 if( exists( $args->{'serial'} ) ) {
394 my( $lemma, $code ) = split( /!!/, delete $args->{'serial'} );
395 $args->{'lemma'} = $lemma;
396 $args->{'code'} = $code;
397 }
398 $class->$orig( $args );
399};
400
401sub serialization {
402 my $self = shift;
403 return join( '!!', $self->lemma, $self->code );
404};
405
406sub comparison_array {
407 my $self = shift;
408 if( $self->language eq 'French' ) {
409 my @array;
410 my @bits = split( /\+/, $self->code );
411 # First push the non k/v parts.
412 while( @bits && $bits[0] !~ /=/ ) {
413 push( @array, shift @bits );
414 }
415 while( @array < 2 ) {
416 push( @array, undef );
417 }
418 # Now push the k/v parts in a known order.
419 my @fields = qw/ Pers Nb Temps Genre Spec Fonc /;
420 my %props;
421 map { my( $k, $v ) = split( /=/, $_ ); $props{$k} = $v; } @bits;
422 foreach my $k ( @fields ) {
423 push( @array, $props{$k} );
424 }
425 # Give the answer.
426 return @array;
427 } elsif( $self->language eq 'English' ) {
428 # Do something as yet undetermined
429 } else {
430 # Latin or Greek or Armenian, just split the chars
431 return split( '', $self->code );
432 }
433};
434
021bdbac 435no Moose;
436__PACKAGE__->meta->make_immutable;
d047cd52 437
021bdbac 4381;
d047cd52 439