prevent merge_readings from causing witness loops
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
a445ce40 4use Moose::Util qw/ does_role apply_all_roles /;
10e4b1ac 5use Moose::Util::TypeConstraints;
70745e70 6use Text::Tradition::Error;
10e4b1ac 7use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
e4b0f464 8use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 9
10e4b1ac 10subtype 'ReadingID',
11 as 'Str',
12 where { $_ =~ /\A$xml10_name_rx\z/ },
13 message { 'Reading ID must be a valid XML attribute string' };
14
15no Moose::Util::TypeConstraints;
16
37bf09f4 17# Enable plugin(s) if available
18eval { with 'Text::Tradition::Morphology'; };
19# Morphology package is not on CPAN, so don't warn of its absence
20# if( $@ ) {
21# warn "Text::Tradition::Morphology not found: $@. Disabling lexeme functionality";
22# };
23
3a2ebbf4 24=head1 NAME
784877d9 25
4aea6e9b 26Text::Tradition::Collation::Reading - represents a reading (usually a word)
27in a collation.
28
3a2ebbf4 29=head1 DESCRIPTION
784877d9 30
3a2ebbf4 31Text::Tradition is a library for representation and analysis of collated
32texts, particularly medieval ones. A 'reading' refers to a unit of text,
33usually a word, that appears in one or more witnesses (manuscripts) of the
34tradition; the text of a given witness is composed of a set of readings in
35a particular sequence
784877d9 36
3a2ebbf4 37=head1 METHODS
1ca1163d 38
3a2ebbf4 39=head2 new
8e1394aa 40
4aea6e9b 41Creates a new reading in the given collation with the given attributes.
3a2ebbf4 42Options include:
94c00c71 43
3a2ebbf4 44=over 4
784877d9 45
4aea6e9b 46=item collation - The Text::Tradition::Collation object to which this
47reading belongs. Required.
e2902068 48
3a2ebbf4 49=item id - A unique identifier for this reading. Required.
910a0a6d 50
3a2ebbf4 51=item text - The word or other text of the reading.
784877d9 52
3a2ebbf4 53=item is_start - The reading is the starting point for the collation.
3265b0ce 54
3a2ebbf4 55=item is_end - The reading is the ending point for the collation.
784877d9 56
3a2ebbf4 57=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 58
4aea6e9b 59=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
60not use unless you know what you are doing.
12720144 61
4aea6e9b 62=item rank - The sequence number of the reading. This should probably not
63be set manually.
d047cd52 64
3a2ebbf4 65=back
8e1394aa 66
3a2ebbf4 67One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 68
3a2ebbf4 69=head2 collation
94c00c71 70
3a2ebbf4 71=head2 id
94c00c71 72
3a2ebbf4 73=head2 text
4cdd82f1 74
3a2ebbf4 75=head2 is_start
4cdd82f1 76
3a2ebbf4 77=head2 is_end
4a8828f0 78
3a2ebbf4 79=head2 is_lacuna
4a8828f0 80
3a2ebbf4 81=head2 rank
4a8828f0 82
3a2ebbf4 83Accessor methods for the given attributes.
d047cd52 84
3a2ebbf4 85=cut
d047cd52 86
3a2ebbf4 87has 'collation' => (
88 is => 'ro',
89 isa => 'Text::Tradition::Collation',
90 # required => 1,
91 weak_ref => 1,
92 );
d047cd52 93
3a2ebbf4 94has 'id' => (
95 is => 'ro',
10e4b1ac 96 isa => 'ReadingID',
3a2ebbf4 97 required => 1,
98 );
d047cd52 99
3a2ebbf4 100has 'text' => (
101 is => 'ro',
102 isa => 'Str',
103 required => 1,
49d4f2ac 104 writer => 'alter_text',
3a2ebbf4 105 );
0e47f4f6 106
4a5f5143 107has 'is_lemma' => (
108 is => 'ro',
109 isa => 'Bool',
110 default => undef,
111 writer => 'make_lemma',
112 );
113
3a2ebbf4 114has 'is_start' => (
115 is => 'ro',
116 isa => 'Bool',
117 default => undef,
118 );
119
120has 'is_end' => (
121 is => 'ro',
122 isa => 'Bool',
123 default => undef,
124 );
125
126has 'is_lacuna' => (
127 is => 'ro',
128 isa => 'Bool',
129 default => undef,
130 );
12720144 131
132has 'is_ph' => (
133 is => 'ro',
134 isa => 'Bool',
135 default => undef,
136 );
d4b75f44 137
138has 'is_common' => (
139 is => 'rw',
140 isa => 'Bool',
141 default => undef,
142 );
3a2ebbf4 143
144has 'rank' => (
145 is => 'rw',
146 isa => 'Int',
147 predicate => 'has_rank',
ca6e6095 148 clearer => 'clear_rank',
3a2ebbf4 149 );
fd602649 150
629e27b0 151## For prefix/suffix readings
152
153has 'join_prior' => (
154 is => 'ro',
155 isa => 'Bool',
156 default => undef,
339786dd 157 writer => '_set_join_prior',
629e27b0 158 );
159
160has 'join_next' => (
161 is => 'ro',
162 isa => 'Bool',
163 default => undef,
339786dd 164 writer => '_set_join_next',
629e27b0 165 );
166
3a2ebbf4 167
168around BUILDARGS => sub {
169 my $orig = shift;
170 my $class = shift;
171 my $args;
172 if( @_ == 1 ) {
173 $args = shift;
174 } else {
175 $args = { @_ };
176 }
b0b4421a 177
3a2ebbf4 178 # If one of our special booleans is set, we change the text and the
179 # ID to match.
44924224 180 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 181 $args->{'text'} = '#LACUNA#';
44924224 182 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
10e4b1ac 183 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
3a2ebbf4 184 $args->{'text'} = '#START#';
185 $args->{'rank'} = 0;
44924224 186 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
10e4b1ac 187 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
3a2ebbf4 188 $args->{'text'} = '#END#';
44924224 189 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
12720144 190 $args->{'text'} = $args->{'id'};
3a2ebbf4 191 }
192
10e4b1ac 193 # Backwards compatibility for non-XMLname IDs
194 my $rid = $args->{'id'};
195 $rid =~ s/\#/__/g;
196 $rid =~ s/[\/,]/./g;
197 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
198 $rid = 'r'.$rid;
199 }
200 $args->{'id'} = $rid;
201
3a2ebbf4 202 $class->$orig( $args );
203};
204
a445ce40 205# Look for a lexeme-string argument in the build args; if there, pull in the
206# morphology role if possible.
70745e70 207sub BUILD {
208 my( $self, $args ) = @_;
209 if( exists $args->{'lexemes'} ) {
37bf09f4 210 unless( $self->can( '_deserialize_lexemes' ) ) {
211 warn "No morphology package installed; DROPPING lexemes";
212 return;
a445ce40 213 }
70745e70 214 $self->_deserialize_lexemes( $args->{'lexemes'} );
215 }
216}
217
3a2ebbf4 218=head2 is_meta
219
220A meta attribute (ha ha), which should be true if any of our 'special'
221booleans are true. Implies that the reading does not represent a bit
222of text found in a witness.
223
224=cut
225
226sub is_meta {
227 my $self = shift;
12720144 228 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 229}
230
a445ce40 231=head2 is_identical( $other_reading )
232
233Returns true if the reading is identical to the other reading. The basic test
234is equality of ->text attributes, but this may be wrapped or overridden by
235extensions.
236
237=cut
238
239sub is_identical {
240 my( $self, $other ) = @_;
241 return $self->text eq $other->text;
242}
243
244=head2 is_combinable
245
246Returns true if the reading may in theory be combined into a multi-reading
247segment within the collation graph. The reading must not be a meta reading,
248and it must not have any relationships in its own right with any others.
249This test may be wrapped or overridden by extensions.
250
251=cut
252
253sub is_combinable {
254 my $self = shift;
255 return undef if $self->is_meta;
256 return !$self->related_readings();
257}
258
259# Not really meant for public consumption. Adopt the text of the other reading
260# into this reading.
261sub _combine {
262 my( $self, $other, $joinstr ) = @_;
263 $self->alter_text( join( $joinstr, $self->text, $other->text ) );
264 # Change this reading to a joining one if necessary
265 $self->_set_join_next( $other->join_next );
266}
267
027d819c 268=head1 Convenience methods
269
270=head2 related_readings
271
272Calls Collation's related_readings with $self as the first argument.
273
274=cut
275
3a2ebbf4 276sub related_readings {
277 my $self = shift;
278 return $self->collation->related_readings( $self, @_ );
279}
280
7f52eac8 281=head2 witnesses
282
283Calls Collation's reading_witnesses with $self as the first argument.
284
285=cut
286
287sub witnesses {
288 my $self = shift;
289 return $self->collation->reading_witnesses( $self, @_ );
290}
291
027d819c 292=head2 predecessors
293
294Returns a list of Reading objects that immediately precede $self in the collation.
295
296=cut
297
22222af9 298sub predecessors {
299 my $self = shift;
300 my @pred = $self->collation->sequence->predecessors( $self->id );
301 return map { $self->collation->reading( $_ ) } @pred;
302}
303
027d819c 304=head2 successors
305
306Returns a list of Reading objects that immediately follow $self in the collation.
307
308=cut
309
22222af9 310sub successors {
311 my $self = shift;
312 my @succ = $self->collation->sequence->successors( $self->id );
313 return map { $self->collation->reading( $_ ) } @succ;
314}
315
a445ce40 316## Utility methods
1d310495 317
3a2ebbf4 318sub _stringify {
319 my $self = shift;
320 return $self->id;
321}
d047cd52 322
2acf0892 323sub TO_JSON {
324 my $self = shift;
325 return $self->text;
326}
327
70745e70 328sub throw {
329 Text::Tradition::Error->throw(
330 'ident' => 'Reading error',
331 'message' => $_[0],
332 );
333}
4d9593df 334
335no Moose;
336__PACKAGE__->meta->make_immutable;
337
021bdbac 3381;