need not tack on reltype
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
a445ce40 4use Moose::Util qw/ does_role apply_all_roles /;
10e4b1ac 5use Moose::Util::TypeConstraints;
70745e70 6use Text::Tradition::Error;
10e4b1ac 7use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
e4b0f464 8use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 9
10e4b1ac 10subtype 'ReadingID',
11 as 'Str',
12 where { $_ =~ /\A$xml10_name_rx\z/ },
13 message { 'Reading ID must be a valid XML attribute string' };
14
15no Moose::Util::TypeConstraints;
16
37bf09f4 17# Enable plugin(s) if available
18eval { with 'Text::Tradition::Morphology'; };
19# Morphology package is not on CPAN, so don't warn of its absence
20# if( $@ ) {
21# warn "Text::Tradition::Morphology not found: $@. Disabling lexeme functionality";
22# };
23
3a2ebbf4 24=head1 NAME
784877d9 25
4aea6e9b 26Text::Tradition::Collation::Reading - represents a reading (usually a word)
27in a collation.
28
3a2ebbf4 29=head1 DESCRIPTION
784877d9 30
3a2ebbf4 31Text::Tradition is a library for representation and analysis of collated
32texts, particularly medieval ones. A 'reading' refers to a unit of text,
33usually a word, that appears in one or more witnesses (manuscripts) of the
34tradition; the text of a given witness is composed of a set of readings in
35a particular sequence
784877d9 36
3a2ebbf4 37=head1 METHODS
1ca1163d 38
3a2ebbf4 39=head2 new
8e1394aa 40
4aea6e9b 41Creates a new reading in the given collation with the given attributes.
3a2ebbf4 42Options include:
94c00c71 43
3a2ebbf4 44=over 4
784877d9 45
4aea6e9b 46=item collation - The Text::Tradition::Collation object to which this
47reading belongs. Required.
e2902068 48
3a2ebbf4 49=item id - A unique identifier for this reading. Required.
910a0a6d 50
3a2ebbf4 51=item text - The word or other text of the reading.
784877d9 52
3a2ebbf4 53=item is_start - The reading is the starting point for the collation.
3265b0ce 54
3a2ebbf4 55=item is_end - The reading is the ending point for the collation.
784877d9 56
3a2ebbf4 57=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 58
4aea6e9b 59=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
60not use unless you know what you are doing.
12720144 61
4aea6e9b 62=item rank - The sequence number of the reading. This should probably not
63be set manually.
d047cd52 64
3a2ebbf4 65=back
8e1394aa 66
3a2ebbf4 67One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 68
3a2ebbf4 69=head2 collation
94c00c71 70
3a2ebbf4 71=head2 id
94c00c71 72
3a2ebbf4 73=head2 text
4cdd82f1 74
3a2ebbf4 75=head2 is_start
4cdd82f1 76
3a2ebbf4 77=head2 is_end
4a8828f0 78
3a2ebbf4 79=head2 is_lacuna
4a8828f0 80
3a2ebbf4 81=head2 rank
4a8828f0 82
3a2ebbf4 83Accessor methods for the given attributes.
d047cd52 84
3a2ebbf4 85=cut
d047cd52 86
3a2ebbf4 87has 'collation' => (
88 is => 'ro',
89 isa => 'Text::Tradition::Collation',
90 # required => 1,
91 weak_ref => 1,
92 );
d047cd52 93
3a2ebbf4 94has 'id' => (
95 is => 'ro',
10e4b1ac 96 isa => 'ReadingID',
3a2ebbf4 97 required => 1,
98 );
d047cd52 99
3a2ebbf4 100has 'text' => (
101 is => 'ro',
102 isa => 'Str',
103 required => 1,
49d4f2ac 104 writer => 'alter_text',
3a2ebbf4 105 );
0e47f4f6 106
3a2ebbf4 107has 'is_start' => (
108 is => 'ro',
109 isa => 'Bool',
110 default => undef,
111 );
112
113has 'is_end' => (
114 is => 'ro',
115 isa => 'Bool',
116 default => undef,
117 );
118
119has 'is_lacuna' => (
120 is => 'ro',
121 isa => 'Bool',
122 default => undef,
123 );
12720144 124
125has 'is_ph' => (
126 is => 'ro',
127 isa => 'Bool',
128 default => undef,
129 );
d4b75f44 130
131has 'is_common' => (
132 is => 'rw',
133 isa => 'Bool',
134 default => undef,
135 );
3a2ebbf4 136
137has 'rank' => (
138 is => 'rw',
139 isa => 'Int',
140 predicate => 'has_rank',
ca6e6095 141 clearer => 'clear_rank',
3a2ebbf4 142 );
fd602649 143
629e27b0 144## For prefix/suffix readings
145
146has 'join_prior' => (
147 is => 'ro',
148 isa => 'Bool',
149 default => undef,
339786dd 150 writer => '_set_join_prior',
629e27b0 151 );
152
153has 'join_next' => (
154 is => 'ro',
155 isa => 'Bool',
156 default => undef,
339786dd 157 writer => '_set_join_next',
629e27b0 158 );
159
3a2ebbf4 160
161around BUILDARGS => sub {
162 my $orig = shift;
163 my $class = shift;
164 my $args;
165 if( @_ == 1 ) {
166 $args = shift;
167 } else {
168 $args = { @_ };
169 }
b0b4421a 170
3a2ebbf4 171 # If one of our special booleans is set, we change the text and the
172 # ID to match.
44924224 173 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 174 $args->{'text'} = '#LACUNA#';
44924224 175 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
10e4b1ac 176 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
3a2ebbf4 177 $args->{'text'} = '#START#';
178 $args->{'rank'} = 0;
44924224 179 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
10e4b1ac 180 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
3a2ebbf4 181 $args->{'text'} = '#END#';
44924224 182 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
12720144 183 $args->{'text'} = $args->{'id'};
3a2ebbf4 184 }
185
10e4b1ac 186 # Backwards compatibility for non-XMLname IDs
187 my $rid = $args->{'id'};
188 $rid =~ s/\#/__/g;
189 $rid =~ s/[\/,]/./g;
190 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
191 $rid = 'r'.$rid;
192 }
193 $args->{'id'} = $rid;
194
3a2ebbf4 195 $class->$orig( $args );
196};
197
a445ce40 198# Look for a lexeme-string argument in the build args; if there, pull in the
199# morphology role if possible.
70745e70 200sub BUILD {
201 my( $self, $args ) = @_;
202 if( exists $args->{'lexemes'} ) {
37bf09f4 203 unless( $self->can( '_deserialize_lexemes' ) ) {
204 warn "No morphology package installed; DROPPING lexemes";
205 return;
a445ce40 206 }
70745e70 207 $self->_deserialize_lexemes( $args->{'lexemes'} );
208 }
209}
210
3a2ebbf4 211=head2 is_meta
212
213A meta attribute (ha ha), which should be true if any of our 'special'
214booleans are true. Implies that the reading does not represent a bit
215of text found in a witness.
216
217=cut
218
219sub is_meta {
220 my $self = shift;
12720144 221 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 222}
223
a445ce40 224=head2 is_identical( $other_reading )
225
226Returns true if the reading is identical to the other reading. The basic test
227is equality of ->text attributes, but this may be wrapped or overridden by
228extensions.
229
230=cut
231
232sub is_identical {
233 my( $self, $other ) = @_;
234 return $self->text eq $other->text;
235}
236
237=head2 is_combinable
238
239Returns true if the reading may in theory be combined into a multi-reading
240segment within the collation graph. The reading must not be a meta reading,
241and it must not have any relationships in its own right with any others.
242This test may be wrapped or overridden by extensions.
243
244=cut
245
246sub is_combinable {
247 my $self = shift;
248 return undef if $self->is_meta;
249 return !$self->related_readings();
250}
251
252# Not really meant for public consumption. Adopt the text of the other reading
253# into this reading.
254sub _combine {
255 my( $self, $other, $joinstr ) = @_;
256 $self->alter_text( join( $joinstr, $self->text, $other->text ) );
257 # Change this reading to a joining one if necessary
258 $self->_set_join_next( $other->join_next );
259}
260
027d819c 261=head1 Convenience methods
262
263=head2 related_readings
264
265Calls Collation's related_readings with $self as the first argument.
266
267=cut
268
3a2ebbf4 269sub related_readings {
270 my $self = shift;
271 return $self->collation->related_readings( $self, @_ );
272}
273
7f52eac8 274=head2 witnesses
275
276Calls Collation's reading_witnesses with $self as the first argument.
277
278=cut
279
280sub witnesses {
281 my $self = shift;
282 return $self->collation->reading_witnesses( $self, @_ );
283}
284
027d819c 285=head2 predecessors
286
287Returns a list of Reading objects that immediately precede $self in the collation.
288
289=cut
290
22222af9 291sub predecessors {
292 my $self = shift;
293 my @pred = $self->collation->sequence->predecessors( $self->id );
294 return map { $self->collation->reading( $_ ) } @pred;
295}
296
027d819c 297=head2 successors
298
299Returns a list of Reading objects that immediately follow $self in the collation.
300
301=cut
302
22222af9 303sub successors {
304 my $self = shift;
305 my @succ = $self->collation->sequence->successors( $self->id );
306 return map { $self->collation->reading( $_ ) } @succ;
307}
308
a445ce40 309## Utility methods
1d310495 310
3a2ebbf4 311sub _stringify {
312 my $self = shift;
313 return $self->id;
314}
d047cd52 315
2acf0892 316sub TO_JSON {
317 my $self = shift;
318 return $self->text;
319}
320
70745e70 321sub throw {
322 Text::Tradition::Error->throw(
323 'ident' => 'Reading error',
324 'message' => $_[0],
325 );
326}
4d9593df 327
328no Moose;
329__PACKAGE__->meta->make_immutable;
330
021bdbac 3311;