load extensions statically to avoid bad object wrapping interactions
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
a445ce40 4use Moose::Util qw/ does_role apply_all_roles /;
10e4b1ac 5use Moose::Util::TypeConstraints;
70745e70 6use Text::Tradition::Error;
10e4b1ac 7use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
e4b0f464 8use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 9
10e4b1ac 10subtype 'ReadingID',
11 as 'Str',
12 where { $_ =~ /\A$xml10_name_rx\z/ },
13 message { 'Reading ID must be a valid XML attribute string' };
14
15no Moose::Util::TypeConstraints;
16
37bf09f4 17# Enable plugin(s) if available
18eval { with 'Text::Tradition::Morphology'; };
19# Morphology package is not on CPAN, so don't warn of its absence
20# if( $@ ) {
21# warn "Text::Tradition::Morphology not found: $@. Disabling lexeme functionality";
22# };
23
3a2ebbf4 24=head1 NAME
784877d9 25
4aea6e9b 26Text::Tradition::Collation::Reading - represents a reading (usually a word)
27in a collation.
28
3a2ebbf4 29=head1 DESCRIPTION
784877d9 30
3a2ebbf4 31Text::Tradition is a library for representation and analysis of collated
32texts, particularly medieval ones. A 'reading' refers to a unit of text,
33usually a word, that appears in one or more witnesses (manuscripts) of the
34tradition; the text of a given witness is composed of a set of readings in
35a particular sequence
784877d9 36
3a2ebbf4 37=head1 METHODS
1ca1163d 38
3a2ebbf4 39=head2 new
8e1394aa 40
4aea6e9b 41Creates a new reading in the given collation with the given attributes.
3a2ebbf4 42Options include:
94c00c71 43
3a2ebbf4 44=over 4
784877d9 45
4aea6e9b 46=item collation - The Text::Tradition::Collation object to which this
47reading belongs. Required.
e2902068 48
3a2ebbf4 49=item id - A unique identifier for this reading. Required.
910a0a6d 50
3a2ebbf4 51=item text - The word or other text of the reading.
784877d9 52
3a2ebbf4 53=item is_start - The reading is the starting point for the collation.
3265b0ce 54
3a2ebbf4 55=item is_end - The reading is the ending point for the collation.
784877d9 56
3a2ebbf4 57=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 58
4aea6e9b 59=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
60not use unless you know what you are doing.
12720144 61
4aea6e9b 62=item rank - The sequence number of the reading. This should probably not
63be set manually.
d047cd52 64
3a2ebbf4 65=back
8e1394aa 66
3a2ebbf4 67One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 68
3a2ebbf4 69=head2 collation
94c00c71 70
3a2ebbf4 71=head2 id
94c00c71 72
3a2ebbf4 73=head2 text
4cdd82f1 74
3a2ebbf4 75=head2 is_start
4cdd82f1 76
3a2ebbf4 77=head2 is_end
4a8828f0 78
3a2ebbf4 79=head2 is_lacuna
4a8828f0 80
3a2ebbf4 81=head2 rank
4a8828f0 82
3a2ebbf4 83Accessor methods for the given attributes.
d047cd52 84
3a2ebbf4 85=cut
d047cd52 86
3a2ebbf4 87has 'collation' => (
88 is => 'ro',
89 isa => 'Text::Tradition::Collation',
90 # required => 1,
91 weak_ref => 1,
92 );
d047cd52 93
3a2ebbf4 94has 'id' => (
95 is => 'ro',
10e4b1ac 96 isa => 'ReadingID',
3a2ebbf4 97 required => 1,
98 );
d047cd52 99
3a2ebbf4 100has 'text' => (
101 is => 'ro',
102 isa => 'Str',
103 required => 1,
49d4f2ac 104 writer => 'alter_text',
3a2ebbf4 105 );
0e47f4f6 106
fae52efd 107has 'language' => (
108 is => 'ro',
109 isa => 'Str',
6ad2ce78 110 predicate => 'has_language',
fae52efd 111 );
112
3a2ebbf4 113has 'is_start' => (
114 is => 'ro',
115 isa => 'Bool',
116 default => undef,
117 );
118
119has 'is_end' => (
120 is => 'ro',
121 isa => 'Bool',
122 default => undef,
123 );
124
125has 'is_lacuna' => (
126 is => 'ro',
127 isa => 'Bool',
128 default => undef,
129 );
12720144 130
131has 'is_ph' => (
132 is => 'ro',
133 isa => 'Bool',
134 default => undef,
135 );
d4b75f44 136
137has 'is_common' => (
138 is => 'rw',
139 isa => 'Bool',
140 default => undef,
141 );
3a2ebbf4 142
143has 'rank' => (
144 is => 'rw',
145 isa => 'Int',
146 predicate => 'has_rank',
ca6e6095 147 clearer => 'clear_rank',
3a2ebbf4 148 );
fd602649 149
629e27b0 150## For prefix/suffix readings
151
152has 'join_prior' => (
153 is => 'ro',
154 isa => 'Bool',
155 default => undef,
339786dd 156 writer => '_set_join_prior',
629e27b0 157 );
158
159has 'join_next' => (
160 is => 'ro',
161 isa => 'Bool',
162 default => undef,
339786dd 163 writer => '_set_join_next',
629e27b0 164 );
165
3a2ebbf4 166
167around BUILDARGS => sub {
168 my $orig = shift;
169 my $class = shift;
170 my $args;
171 if( @_ == 1 ) {
172 $args = shift;
173 } else {
174 $args = { @_ };
175 }
b0b4421a 176
3a2ebbf4 177 # If one of our special booleans is set, we change the text and the
178 # ID to match.
44924224 179 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 180 $args->{'text'} = '#LACUNA#';
44924224 181 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
10e4b1ac 182 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
3a2ebbf4 183 $args->{'text'} = '#START#';
184 $args->{'rank'} = 0;
44924224 185 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
10e4b1ac 186 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
3a2ebbf4 187 $args->{'text'} = '#END#';
44924224 188 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
12720144 189 $args->{'text'} = $args->{'id'};
3a2ebbf4 190 }
191
10e4b1ac 192 # Backwards compatibility for non-XMLname IDs
193 my $rid = $args->{'id'};
194 $rid =~ s/\#/__/g;
195 $rid =~ s/[\/,]/./g;
196 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
197 $rid = 'r'.$rid;
198 }
199 $args->{'id'} = $rid;
200
3a2ebbf4 201 $class->$orig( $args );
202};
203
a445ce40 204# Look for a lexeme-string argument in the build args; if there, pull in the
205# morphology role if possible.
70745e70 206sub BUILD {
207 my( $self, $args ) = @_;
208 if( exists $args->{'lexemes'} ) {
37bf09f4 209 unless( $self->can( '_deserialize_lexemes' ) ) {
210 warn "No morphology package installed; DROPPING lexemes";
211 return;
a445ce40 212 }
70745e70 213 $self->_deserialize_lexemes( $args->{'lexemes'} );
214 }
215}
216
3a2ebbf4 217=head2 is_meta
218
219A meta attribute (ha ha), which should be true if any of our 'special'
220booleans are true. Implies that the reading does not represent a bit
221of text found in a witness.
222
223=cut
224
225sub is_meta {
226 my $self = shift;
12720144 227 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 228}
229
a445ce40 230=head2 is_identical( $other_reading )
231
232Returns true if the reading is identical to the other reading. The basic test
233is equality of ->text attributes, but this may be wrapped or overridden by
234extensions.
235
236=cut
237
238sub is_identical {
239 my( $self, $other ) = @_;
240 return $self->text eq $other->text;
241}
242
243=head2 is_combinable
244
245Returns true if the reading may in theory be combined into a multi-reading
246segment within the collation graph. The reading must not be a meta reading,
247and it must not have any relationships in its own right with any others.
248This test may be wrapped or overridden by extensions.
249
250=cut
251
252sub is_combinable {
253 my $self = shift;
254 return undef if $self->is_meta;
255 return !$self->related_readings();
256}
257
258# Not really meant for public consumption. Adopt the text of the other reading
259# into this reading.
260sub _combine {
261 my( $self, $other, $joinstr ) = @_;
262 $self->alter_text( join( $joinstr, $self->text, $other->text ) );
263 # Change this reading to a joining one if necessary
264 $self->_set_join_next( $other->join_next );
265}
266
027d819c 267=head1 Convenience methods
268
269=head2 related_readings
270
271Calls Collation's related_readings with $self as the first argument.
272
273=cut
274
3a2ebbf4 275sub related_readings {
276 my $self = shift;
277 return $self->collation->related_readings( $self, @_ );
278}
279
7f52eac8 280=head2 witnesses
281
282Calls Collation's reading_witnesses with $self as the first argument.
283
284=cut
285
286sub witnesses {
287 my $self = shift;
288 return $self->collation->reading_witnesses( $self, @_ );
289}
290
027d819c 291=head2 predecessors
292
293Returns a list of Reading objects that immediately precede $self in the collation.
294
295=cut
296
22222af9 297sub predecessors {
298 my $self = shift;
299 my @pred = $self->collation->sequence->predecessors( $self->id );
300 return map { $self->collation->reading( $_ ) } @pred;
301}
302
027d819c 303=head2 successors
304
305Returns a list of Reading objects that immediately follow $self in the collation.
306
307=cut
308
22222af9 309sub successors {
310 my $self = shift;
311 my @succ = $self->collation->sequence->successors( $self->id );
312 return map { $self->collation->reading( $_ ) } @succ;
313}
314
a445ce40 315## Utility methods
1d310495 316
3a2ebbf4 317sub _stringify {
318 my $self = shift;
319 return $self->id;
320}
d047cd52 321
2acf0892 322sub TO_JSON {
323 my $self = shift;
324 return $self->text;
325}
326
70745e70 327sub throw {
328 Text::Tradition::Error->throw(
329 'ident' => 'Reading error',
330 'message' => $_[0],
331 );
332}
4d9593df 333
334no Moose;
335__PACKAGE__->meta->make_immutable;
336
021bdbac 3371;