Add logic for normalized form of lemma to propagate on orth/spelling links
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
a445ce40 4use Moose::Util qw/ does_role apply_all_roles /;
27e161be 5use Text::Tradition::Datatypes;
70745e70 6use Text::Tradition::Error;
10e4b1ac 7use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
e4b0f464 8use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 9
37bf09f4 10# Enable plugin(s) if available
11eval { with 'Text::Tradition::Morphology'; };
12# Morphology package is not on CPAN, so don't warn of its absence
13# if( $@ ) {
14# warn "Text::Tradition::Morphology not found: $@. Disabling lexeme functionality";
15# };
16
3a2ebbf4 17=head1 NAME
784877d9 18
4aea6e9b 19Text::Tradition::Collation::Reading - represents a reading (usually a word)
20in a collation.
21
3a2ebbf4 22=head1 DESCRIPTION
784877d9 23
3a2ebbf4 24Text::Tradition is a library for representation and analysis of collated
25texts, particularly medieval ones. A 'reading' refers to a unit of text,
26usually a word, that appears in one or more witnesses (manuscripts) of the
27tradition; the text of a given witness is composed of a set of readings in
28a particular sequence
784877d9 29
3a2ebbf4 30=head1 METHODS
1ca1163d 31
3a2ebbf4 32=head2 new
8e1394aa 33
4aea6e9b 34Creates a new reading in the given collation with the given attributes.
3a2ebbf4 35Options include:
94c00c71 36
3a2ebbf4 37=over 4
784877d9 38
4aea6e9b 39=item collation - The Text::Tradition::Collation object to which this
40reading belongs. Required.
e2902068 41
3a2ebbf4 42=item id - A unique identifier for this reading. Required.
910a0a6d 43
3a2ebbf4 44=item text - The word or other text of the reading.
784877d9 45
3a2ebbf4 46=item is_start - The reading is the starting point for the collation.
3265b0ce 47
3a2ebbf4 48=item is_end - The reading is the ending point for the collation.
784877d9 49
3a2ebbf4 50=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 51
4aea6e9b 52=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
53not use unless you know what you are doing.
12720144 54
4aea6e9b 55=item rank - The sequence number of the reading. This should probably not
56be set manually.
d047cd52 57
3a2ebbf4 58=back
8e1394aa 59
3a2ebbf4 60One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 61
3a2ebbf4 62=head2 collation
94c00c71 63
3a2ebbf4 64=head2 id
94c00c71 65
3a2ebbf4 66=head2 text
4cdd82f1 67
3a2ebbf4 68=head2 is_start
4cdd82f1 69
3a2ebbf4 70=head2 is_end
4a8828f0 71
3a2ebbf4 72=head2 is_lacuna
4a8828f0 73
3a2ebbf4 74=head2 rank
4a8828f0 75
3a2ebbf4 76Accessor methods for the given attributes.
d047cd52 77
3a2ebbf4 78=cut
d047cd52 79
3a2ebbf4 80has 'collation' => (
81 is => 'ro',
82 isa => 'Text::Tradition::Collation',
83 # required => 1,
84 weak_ref => 1,
85 );
d047cd52 86
3a2ebbf4 87has 'id' => (
88 is => 'ro',
10e4b1ac 89 isa => 'ReadingID',
3a2ebbf4 90 required => 1,
91 );
d047cd52 92
3a2ebbf4 93has 'text' => (
94 is => 'ro',
95 isa => 'Str',
96 required => 1,
49d4f2ac 97 writer => 'alter_text',
3a2ebbf4 98 );
0e47f4f6 99
4a5f5143 100has 'is_lemma' => (
101 is => 'ro',
102 isa => 'Bool',
103 default => undef,
104 writer => 'make_lemma',
105 );
106
3a2ebbf4 107has 'is_start' => (
108 is => 'ro',
109 isa => 'Bool',
110 default => undef,
111 );
112
113has 'is_end' => (
114 is => 'ro',
115 isa => 'Bool',
116 default => undef,
117 );
118
119has 'is_lacuna' => (
120 is => 'ro',
121 isa => 'Bool',
122 default => undef,
123 );
12720144 124
125has 'is_ph' => (
126 is => 'ro',
127 isa => 'Bool',
128 default => undef,
129 );
d4b75f44 130
131has 'is_common' => (
132 is => 'rw',
133 isa => 'Bool',
134 default => undef,
135 );
3a2ebbf4 136
137has 'rank' => (
138 is => 'rw',
139 isa => 'Int',
140 predicate => 'has_rank',
ca6e6095 141 clearer => 'clear_rank',
3a2ebbf4 142 );
fd602649 143
629e27b0 144## For prefix/suffix readings
145
146has 'join_prior' => (
147 is => 'ro',
148 isa => 'Bool',
149 default => undef,
339786dd 150 writer => '_set_join_prior',
629e27b0 151 );
152
153has 'join_next' => (
154 is => 'ro',
155 isa => 'Bool',
156 default => undef,
339786dd 157 writer => '_set_join_next',
629e27b0 158 );
159
3a2ebbf4 160
161around BUILDARGS => sub {
162 my $orig = shift;
163 my $class = shift;
164 my $args;
165 if( @_ == 1 ) {
166 $args = shift;
167 } else {
168 $args = { @_ };
169 }
b0b4421a 170
3a2ebbf4 171 # If one of our special booleans is set, we change the text and the
172 # ID to match.
44924224 173 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 174 $args->{'text'} = '#LACUNA#';
44924224 175 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
10e4b1ac 176 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
3a2ebbf4 177 $args->{'text'} = '#START#';
178 $args->{'rank'} = 0;
44924224 179 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
10e4b1ac 180 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
3a2ebbf4 181 $args->{'text'} = '#END#';
44924224 182 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
12720144 183 $args->{'text'} = $args->{'id'};
3a2ebbf4 184 }
185
10e4b1ac 186 # Backwards compatibility for non-XMLname IDs
187 my $rid = $args->{'id'};
188 $rid =~ s/\#/__/g;
189 $rid =~ s/[\/,]/./g;
190 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
191 $rid = 'r'.$rid;
192 }
193 $args->{'id'} = $rid;
194
3a2ebbf4 195 $class->$orig( $args );
196};
197
a445ce40 198# Look for a lexeme-string argument in the build args; if there, pull in the
199# morphology role if possible.
70745e70 200sub BUILD {
201 my( $self, $args ) = @_;
202 if( exists $args->{'lexemes'} ) {
37bf09f4 203 unless( $self->can( '_deserialize_lexemes' ) ) {
204 warn "No morphology package installed; DROPPING lexemes";
205 return;
a445ce40 206 }
70745e70 207 $self->_deserialize_lexemes( $args->{'lexemes'} );
208 }
209}
210
45095bee 211around make_lemma => sub {
212 my $orig = shift;
213 my $self = shift;
214 my $val = shift;
215
216 # TODO unset the lemma from any other reading at the same rank.
217 if( $val && $self->does( 'Text::Tradition::Morphology' )) {
218 # Set the normal form on all orthographically related readings to match
219 # the normal form on this one.
220 my $filter = sub {
221 my $rl = shift;
222 my $rltype = $self->collation->relations->type( $rl->type );
223 return $rltype->bindlevel < 2
224 };
225 foreach my $r ( $self->related_readings( $filter ) ) {
226 $r->normal_form( $self->normal_form );
227 }
228 }
229 $self->$orig( $val );
230};
231
3a2ebbf4 232=head2 is_meta
233
234A meta attribute (ha ha), which should be true if any of our 'special'
235booleans are true. Implies that the reading does not represent a bit
236of text found in a witness.
237
238=cut
239
240sub is_meta {
241 my $self = shift;
12720144 242 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 243}
244
a445ce40 245=head2 is_identical( $other_reading )
246
247Returns true if the reading is identical to the other reading. The basic test
248is equality of ->text attributes, but this may be wrapped or overridden by
249extensions.
250
251=cut
252
253sub is_identical {
254 my( $self, $other ) = @_;
255 return $self->text eq $other->text;
256}
257
258=head2 is_combinable
259
260Returns true if the reading may in theory be combined into a multi-reading
261segment within the collation graph. The reading must not be a meta reading,
262and it must not have any relationships in its own right with any others.
263This test may be wrapped or overridden by extensions.
264
265=cut
266
267sub is_combinable {
268 my $self = shift;
269 return undef if $self->is_meta;
270 return !$self->related_readings();
271}
272
273# Not really meant for public consumption. Adopt the text of the other reading
274# into this reading.
275sub _combine {
276 my( $self, $other, $joinstr ) = @_;
277 $self->alter_text( join( $joinstr, $self->text, $other->text ) );
278 # Change this reading to a joining one if necessary
279 $self->_set_join_next( $other->join_next );
280}
281
027d819c 282=head1 Convenience methods
283
284=head2 related_readings
285
286Calls Collation's related_readings with $self as the first argument.
287
288=cut
289
3a2ebbf4 290sub related_readings {
291 my $self = shift;
292 return $self->collation->related_readings( $self, @_ );
293}
294
7f52eac8 295=head2 witnesses
296
297Calls Collation's reading_witnesses with $self as the first argument.
298
299=cut
300
301sub witnesses {
302 my $self = shift;
303 return $self->collation->reading_witnesses( $self, @_ );
304}
305
027d819c 306=head2 predecessors
307
308Returns a list of Reading objects that immediately precede $self in the collation.
309
310=cut
311
22222af9 312sub predecessors {
313 my $self = shift;
314 my @pred = $self->collation->sequence->predecessors( $self->id );
315 return map { $self->collation->reading( $_ ) } @pred;
316}
317
027d819c 318=head2 successors
319
320Returns a list of Reading objects that immediately follow $self in the collation.
321
322=cut
323
22222af9 324sub successors {
325 my $self = shift;
326 my @succ = $self->collation->sequence->successors( $self->id );
327 return map { $self->collation->reading( $_ ) } @succ;
328}
329
a445ce40 330## Utility methods
1d310495 331
3a2ebbf4 332sub _stringify {
333 my $self = shift;
334 return $self->id;
335}
d047cd52 336
2acf0892 337sub TO_JSON {
338 my $self = shift;
339 return $self->text;
340}
341
70745e70 342sub throw {
343 Text::Tradition::Error->throw(
344 'ident' => 'Reading error',
345 'message' => $_[0],
346 );
347}
4d9593df 348
349no Moose;
350__PACKAGE__->meta->make_immutable;
351
021bdbac 3521;