start implementing morphology on readings
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
e4b0f464 4use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 5
3a2ebbf4 6=head1 NAME
784877d9 7
4aea6e9b 8Text::Tradition::Collation::Reading - represents a reading (usually a word)
9in a collation.
10
3a2ebbf4 11=head1 DESCRIPTION
784877d9 12
3a2ebbf4 13Text::Tradition is a library for representation and analysis of collated
14texts, particularly medieval ones. A 'reading' refers to a unit of text,
15usually a word, that appears in one or more witnesses (manuscripts) of the
16tradition; the text of a given witness is composed of a set of readings in
17a particular sequence
784877d9 18
3a2ebbf4 19=head1 METHODS
1ca1163d 20
3a2ebbf4 21=head2 new
8e1394aa 22
4aea6e9b 23Creates a new reading in the given collation with the given attributes.
3a2ebbf4 24Options include:
94c00c71 25
3a2ebbf4 26=over 4
784877d9 27
4aea6e9b 28=item collation - The Text::Tradition::Collation object to which this
29reading belongs. Required.
e2902068 30
3a2ebbf4 31=item id - A unique identifier for this reading. Required.
910a0a6d 32
3a2ebbf4 33=item text - The word or other text of the reading.
784877d9 34
3a2ebbf4 35=item is_start - The reading is the starting point for the collation.
3265b0ce 36
3a2ebbf4 37=item is_end - The reading is the ending point for the collation.
784877d9 38
3a2ebbf4 39=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 40
4aea6e9b 41=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
42not use unless you know what you are doing.
12720144 43
4aea6e9b 44=item rank - The sequence number of the reading. This should probably not
45be set manually.
d047cd52 46
3a2ebbf4 47=back
8e1394aa 48
3a2ebbf4 49One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 50
3a2ebbf4 51=head2 collation
94c00c71 52
3a2ebbf4 53=head2 id
94c00c71 54
3a2ebbf4 55=head2 text
4cdd82f1 56
3a2ebbf4 57=head2 is_start
4cdd82f1 58
3a2ebbf4 59=head2 is_end
4a8828f0 60
3a2ebbf4 61=head2 is_lacuna
4a8828f0 62
3a2ebbf4 63=head2 rank
4a8828f0 64
3a2ebbf4 65Accessor methods for the given attributes.
d047cd52 66
3a2ebbf4 67=cut
d047cd52 68
3a2ebbf4 69has 'collation' => (
70 is => 'ro',
71 isa => 'Text::Tradition::Collation',
72 # required => 1,
73 weak_ref => 1,
74 );
d047cd52 75
3a2ebbf4 76has 'id' => (
77 is => 'ro',
78 isa => 'Str',
79 required => 1,
80 );
d047cd52 81
3a2ebbf4 82has 'text' => (
83 is => 'ro',
84 isa => 'Str',
85 required => 1,
49d4f2ac 86 writer => 'alter_text',
3a2ebbf4 87 );
0e47f4f6 88
fae52efd 89has 'language' => (
90 is => 'ro',
91 isa => 'Str',
92 default => 'Default',
93 );
94
3a2ebbf4 95has 'is_start' => (
96 is => 'ro',
97 isa => 'Bool',
98 default => undef,
99 );
100
101has 'is_end' => (
102 is => 'ro',
103 isa => 'Bool',
104 default => undef,
105 );
106
107has 'is_lacuna' => (
108 is => 'ro',
109 isa => 'Bool',
110 default => undef,
111 );
12720144 112
113has 'is_ph' => (
114 is => 'ro',
115 isa => 'Bool',
116 default => undef,
117 );
d4b75f44 118
119has 'is_common' => (
120 is => 'rw',
121 isa => 'Bool',
122 default => undef,
123 );
3a2ebbf4 124
125has 'rank' => (
126 is => 'rw',
127 isa => 'Int',
128 predicate => 'has_rank',
ca6e6095 129 clearer => 'clear_rank',
3a2ebbf4 130 );
fd602649 131
132## For morphological analysis
133
134has 'normal_form' => (
135 is => 'rw',
136 isa => 'Str',
137 predicate => 'has_normal_form',
138 );
139
cca4f996 140# Holds the word form. If is_disambiguated is true, the form at index zero
141# is the correct one.
142has 'lexemes' => (
4d9593df 143 traits => ['Array'],
cca4f996 144 isa => 'ArrayRef[Text::Tradition::Collation::Lexeme]',
4d9593df 145 handles => {
146 lexemes => 'elements',
cca4f996 147 has_lexemes => 'count',
148 _clear_lexemes => 'clear',
149 _add_lexeme => 'push',
4d9593df 150 },
fd602649 151 );
152
629e27b0 153## For prefix/suffix readings
154
155has 'join_prior' => (
156 is => 'ro',
157 isa => 'Bool',
158 default => undef,
159 );
160
161has 'join_next' => (
162 is => 'ro',
163 isa => 'Bool',
164 default => undef,
165 );
166
3a2ebbf4 167
168around BUILDARGS => sub {
169 my $orig = shift;
170 my $class = shift;
171 my $args;
172 if( @_ == 1 ) {
173 $args = shift;
174 } else {
175 $args = { @_ };
176 }
b0b4421a 177
3a2ebbf4 178 # If one of our special booleans is set, we change the text and the
179 # ID to match.
1d310495 180 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 181 $args->{'text'} = '#LACUNA#';
3a2ebbf4 182 } elsif( exists $args->{'is_start'} ) {
183 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
184 $args->{'text'} = '#START#';
185 $args->{'rank'} = 0;
186 } elsif( exists $args->{'is_end'} ) {
187 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
188 $args->{'text'} = '#END#';
12720144 189 } elsif( exists $args->{'is_ph'} ) {
190 $args->{'text'} = $args->{'id'};
3a2ebbf4 191 }
192
193 $class->$orig( $args );
194};
195
196=head2 is_meta
197
198A meta attribute (ha ha), which should be true if any of our 'special'
199booleans are true. Implies that the reading does not represent a bit
200of text found in a witness.
201
202=cut
203
204sub is_meta {
205 my $self = shift;
12720144 206 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 207}
208
027d819c 209=head1 Convenience methods
210
211=head2 related_readings
212
213Calls Collation's related_readings with $self as the first argument.
214
215=cut
216
3a2ebbf4 217sub related_readings {
218 my $self = shift;
219 return $self->collation->related_readings( $self, @_ );
220}
221
7f52eac8 222=head2 witnesses
223
224Calls Collation's reading_witnesses with $self as the first argument.
225
226=cut
227
228sub witnesses {
229 my $self = shift;
230 return $self->collation->reading_witnesses( $self, @_ );
231}
232
027d819c 233=head2 predecessors
234
235Returns a list of Reading objects that immediately precede $self in the collation.
236
237=cut
238
22222af9 239sub predecessors {
240 my $self = shift;
241 my @pred = $self->collation->sequence->predecessors( $self->id );
242 return map { $self->collation->reading( $_ ) } @pred;
243}
244
027d819c 245=head2 successors
246
247Returns a list of Reading objects that immediately follow $self in the collation.
248
249=cut
250
22222af9 251sub successors {
252 my $self = shift;
253 my @succ = $self->collation->sequence->successors( $self->id );
254 return map { $self->collation->reading( $_ ) } @succ;
255}
256
027d819c 257=head2 set_identical( $other_reading)
258
259Backwards compatibility method, to add a transposition relationship
260between $self and $other_reading. Don't use this.
261
262=cut
263
1d310495 264sub set_identical {
265 my( $self, $other ) = @_;
266 return $self->collation->add_relationship( $self, $other,
267 { 'type' => 'transposition' } );
268}
269
3a2ebbf4 270sub _stringify {
271 my $self = shift;
272 return $self->id;
273}
d047cd52 274
4d9593df 275=head1 MORPHOLOGY
276
277A few methods to try to tack on morphological information.
278
06e7cbc7 279=head2 use_lexemes
280
281TBD
282
4d9593df 283=cut
284
cca4f996 285# sub use_lexemes {
286# my( $self, @lexemes ) = @_;
287# # The lexemes need to be the same as $self->text.
288# my $cmpstr = $self->has_normal_form ? lc( $self->normal_form ) : lc( $self->text );
289# $cmpstr =~ s/[\s-]+//g;
290# my $lexstr = lc( join( '', @lexemes ) );
291# $lexstr =~ s/[\s-]+//g;
292# unless( $lexstr eq $cmpstr ) {
293# warn "Cannot split " . $self->text . " into " . join( '.', @lexemes );
294# return;
295# }
296# $self->_clear_morph;
297# map { $self->_add_morph( { $_ => [] } ) } @lexemes;
298# }
299#
300# sub add_morphological_tag {
301# my( $self, $lexeme, $opts ) = @_;
302# my $struct;
303# unless( $opts ) {
304# # No lexeme was passed; use reading text.
305# $opts = $lexeme;
306# $lexeme = $self->text;
307# $self->use_lexemes( $lexeme );
308# }
309# # Get the correct container
310# ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
311# unless( $struct ) {
312# warn "No lexeme $lexeme exists in this reading";
313# return;
314# }
315# # Now make the morph object and add it to this lexeme.
316# my $morph_obj = Text::Tradition::Collation::Reading::Morphology->new( $opts );
317# # TODO Check for existence
318# push( @{$struct->{$lexeme}}, $morph_obj );
319# }
4d9593df 320
321## Utility methods
322
2acf0892 323sub TO_JSON {
324 my $self = shift;
325 return $self->text;
326}
327
4d9593df 328## TODO will need a throw() here
329
330no Moose;
331__PACKAGE__->meta->make_immutable;
332
333###################################################
334### Morphology objects, to be attached to readings
335###################################################
336
337package Text::Tradition::Collation::Reading::Morphology;
338
339use Moose;
340
341has 'lemma' => (
342 is => 'ro',
343 isa => 'Str',
344 required => 1,
345 );
346
347has 'code' => (
348 is => 'ro',
349 isa => 'Str',
350 required => 1,
351 );
352
353has 'language' => (
354 is => 'ro',
355 isa => 'Str',
356 required => 1,
357 );
358
359## Transmute codes into comparison arrays for our various languages.
360
361around BUILDARGS => sub {
362 my $orig = shift;
363 my $class = shift;
364 my $args;
365 if( @_ == 1 && ref( $_[0] ) ) {
366 $args = shift;
367 } else {
368 $args = { @_ };
369 }
370 if( exists( $args->{'serial'} ) ) {
371 my( $lemma, $code ) = split( /!!/, delete $args->{'serial'} );
372 $args->{'lemma'} = $lemma;
373 $args->{'code'} = $code;
374 }
375 $class->$orig( $args );
376};
377
378sub serialization {
379 my $self = shift;
380 return join( '!!', $self->lemma, $self->code );
381};
382
383sub comparison_array {
384 my $self = shift;
385 if( $self->language eq 'French' ) {
386 my @array;
387 my @bits = split( /\+/, $self->code );
388 # First push the non k/v parts.
389 while( @bits && $bits[0] !~ /=/ ) {
390 push( @array, shift @bits );
391 }
392 while( @array < 2 ) {
393 push( @array, undef );
394 }
395 # Now push the k/v parts in a known order.
396 my @fields = qw/ Pers Nb Temps Genre Spec Fonc /;
397 my %props;
398 map { my( $k, $v ) = split( /=/, $_ ); $props{$k} = $v; } @bits;
399 foreach my $k ( @fields ) {
400 push( @array, $props{$k} );
401 }
402 # Give the answer.
403 return @array;
404 } elsif( $self->language eq 'English' ) {
405 # Do something as yet undetermined
406 } else {
407 # Latin or Greek or Armenian, just split the chars
408 return split( '', $self->code );
409 }
410};
411
021bdbac 412no Moose;
413__PACKAGE__->meta->make_immutable;
d047cd52 414
021bdbac 4151;
d047cd52 416