split out morphology; make all tests pass apart from morphology POD
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
a445ce40 4use Moose::Util qw/ does_role apply_all_roles /;
10e4b1ac 5use Moose::Util::TypeConstraints;
70745e70 6use Text::Tradition::Error;
10e4b1ac 7use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
e4b0f464 8use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 9
10e4b1ac 10subtype 'ReadingID',
11 as 'Str',
12 where { $_ =~ /\A$xml10_name_rx\z/ },
13 message { 'Reading ID must be a valid XML attribute string' };
14
15no Moose::Util::TypeConstraints;
16
3a2ebbf4 17=head1 NAME
784877d9 18
4aea6e9b 19Text::Tradition::Collation::Reading - represents a reading (usually a word)
20in a collation.
21
3a2ebbf4 22=head1 DESCRIPTION
784877d9 23
3a2ebbf4 24Text::Tradition is a library for representation and analysis of collated
25texts, particularly medieval ones. A 'reading' refers to a unit of text,
26usually a word, that appears in one or more witnesses (manuscripts) of the
27tradition; the text of a given witness is composed of a set of readings in
28a particular sequence
784877d9 29
3a2ebbf4 30=head1 METHODS
1ca1163d 31
3a2ebbf4 32=head2 new
8e1394aa 33
4aea6e9b 34Creates a new reading in the given collation with the given attributes.
3a2ebbf4 35Options include:
94c00c71 36
3a2ebbf4 37=over 4
784877d9 38
4aea6e9b 39=item collation - The Text::Tradition::Collation object to which this
40reading belongs. Required.
e2902068 41
3a2ebbf4 42=item id - A unique identifier for this reading. Required.
910a0a6d 43
3a2ebbf4 44=item text - The word or other text of the reading.
784877d9 45
3a2ebbf4 46=item is_start - The reading is the starting point for the collation.
3265b0ce 47
3a2ebbf4 48=item is_end - The reading is the ending point for the collation.
784877d9 49
3a2ebbf4 50=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 51
4aea6e9b 52=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
53not use unless you know what you are doing.
12720144 54
4aea6e9b 55=item rank - The sequence number of the reading. This should probably not
56be set manually.
d047cd52 57
3a2ebbf4 58=back
8e1394aa 59
3a2ebbf4 60One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 61
3a2ebbf4 62=head2 collation
94c00c71 63
3a2ebbf4 64=head2 id
94c00c71 65
3a2ebbf4 66=head2 text
4cdd82f1 67
3a2ebbf4 68=head2 is_start
4cdd82f1 69
3a2ebbf4 70=head2 is_end
4a8828f0 71
3a2ebbf4 72=head2 is_lacuna
4a8828f0 73
3a2ebbf4 74=head2 rank
4a8828f0 75
3a2ebbf4 76Accessor methods for the given attributes.
d047cd52 77
3a2ebbf4 78=cut
d047cd52 79
3a2ebbf4 80has 'collation' => (
81 is => 'ro',
82 isa => 'Text::Tradition::Collation',
83 # required => 1,
84 weak_ref => 1,
85 );
d047cd52 86
3a2ebbf4 87has 'id' => (
88 is => 'ro',
10e4b1ac 89 isa => 'ReadingID',
3a2ebbf4 90 required => 1,
91 );
d047cd52 92
3a2ebbf4 93has 'text' => (
94 is => 'ro',
95 isa => 'Str',
96 required => 1,
49d4f2ac 97 writer => 'alter_text',
3a2ebbf4 98 );
0e47f4f6 99
fae52efd 100has 'language' => (
101 is => 'ro',
102 isa => 'Str',
6ad2ce78 103 predicate => 'has_language',
fae52efd 104 );
105
3a2ebbf4 106has 'is_start' => (
107 is => 'ro',
108 isa => 'Bool',
109 default => undef,
110 );
111
112has 'is_end' => (
113 is => 'ro',
114 isa => 'Bool',
115 default => undef,
116 );
117
118has 'is_lacuna' => (
119 is => 'ro',
120 isa => 'Bool',
121 default => undef,
122 );
12720144 123
124has 'is_ph' => (
125 is => 'ro',
126 isa => 'Bool',
127 default => undef,
128 );
d4b75f44 129
130has 'is_common' => (
131 is => 'rw',
132 isa => 'Bool',
133 default => undef,
134 );
3a2ebbf4 135
136has 'rank' => (
137 is => 'rw',
138 isa => 'Int',
139 predicate => 'has_rank',
ca6e6095 140 clearer => 'clear_rank',
3a2ebbf4 141 );
fd602649 142
629e27b0 143## For prefix/suffix readings
144
145has 'join_prior' => (
146 is => 'ro',
147 isa => 'Bool',
148 default => undef,
339786dd 149 writer => '_set_join_prior',
629e27b0 150 );
151
152has 'join_next' => (
153 is => 'ro',
154 isa => 'Bool',
155 default => undef,
339786dd 156 writer => '_set_join_next',
629e27b0 157 );
158
3a2ebbf4 159
160around BUILDARGS => sub {
161 my $orig = shift;
162 my $class = shift;
163 my $args;
164 if( @_ == 1 ) {
165 $args = shift;
166 } else {
167 $args = { @_ };
168 }
b0b4421a 169
3a2ebbf4 170 # If one of our special booleans is set, we change the text and the
171 # ID to match.
44924224 172 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 173 $args->{'text'} = '#LACUNA#';
44924224 174 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
10e4b1ac 175 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
3a2ebbf4 176 $args->{'text'} = '#START#';
177 $args->{'rank'} = 0;
44924224 178 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
10e4b1ac 179 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
3a2ebbf4 180 $args->{'text'} = '#END#';
44924224 181 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
12720144 182 $args->{'text'} = $args->{'id'};
3a2ebbf4 183 }
184
10e4b1ac 185 # Backwards compatibility for non-XMLname IDs
186 my $rid = $args->{'id'};
187 $rid =~ s/\#/__/g;
188 $rid =~ s/[\/,]/./g;
189 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
190 $rid = 'r'.$rid;
191 }
192 $args->{'id'} = $rid;
193
3a2ebbf4 194 $class->$orig( $args );
195};
196
a445ce40 197# Look for a lexeme-string argument in the build args; if there, pull in the
198# morphology role if possible.
70745e70 199sub BUILD {
200 my( $self, $args ) = @_;
201 if( exists $args->{'lexemes'} ) {
a445ce40 202 unless( does_role( $self, 'Text::Tradition::Morphology' ) ) {
203 eval { apply_all_roles( $self, 'Text::Tradition::Morphology' ) };
204 if( $@ ) {
205 warn "No morphology package installed; DROPPING lexemes";
206 return;
207 }
208 }
70745e70 209 $self->_deserialize_lexemes( $args->{'lexemes'} );
210 }
211}
212
3a2ebbf4 213=head2 is_meta
214
215A meta attribute (ha ha), which should be true if any of our 'special'
216booleans are true. Implies that the reading does not represent a bit
217of text found in a witness.
218
219=cut
220
221sub is_meta {
222 my $self = shift;
12720144 223 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 224}
225
a445ce40 226=head2 is_identical( $other_reading )
227
228Returns true if the reading is identical to the other reading. The basic test
229is equality of ->text attributes, but this may be wrapped or overridden by
230extensions.
231
232=cut
233
234sub is_identical {
235 my( $self, $other ) = @_;
236 return $self->text eq $other->text;
237}
238
239=head2 is_combinable
240
241Returns true if the reading may in theory be combined into a multi-reading
242segment within the collation graph. The reading must not be a meta reading,
243and it must not have any relationships in its own right with any others.
244This test may be wrapped or overridden by extensions.
245
246=cut
247
248sub is_combinable {
249 my $self = shift;
250 return undef if $self->is_meta;
251 return !$self->related_readings();
252}
253
254# Not really meant for public consumption. Adopt the text of the other reading
255# into this reading.
256sub _combine {
257 my( $self, $other, $joinstr ) = @_;
258 $self->alter_text( join( $joinstr, $self->text, $other->text ) );
259 # Change this reading to a joining one if necessary
260 $self->_set_join_next( $other->join_next );
261}
262
027d819c 263=head1 Convenience methods
264
265=head2 related_readings
266
267Calls Collation's related_readings with $self as the first argument.
268
269=cut
270
3a2ebbf4 271sub related_readings {
272 my $self = shift;
273 return $self->collation->related_readings( $self, @_ );
274}
275
7f52eac8 276=head2 witnesses
277
278Calls Collation's reading_witnesses with $self as the first argument.
279
280=cut
281
282sub witnesses {
283 my $self = shift;
284 return $self->collation->reading_witnesses( $self, @_ );
285}
286
027d819c 287=head2 predecessors
288
289Returns a list of Reading objects that immediately precede $self in the collation.
290
291=cut
292
22222af9 293sub predecessors {
294 my $self = shift;
295 my @pred = $self->collation->sequence->predecessors( $self->id );
296 return map { $self->collation->reading( $_ ) } @pred;
297}
298
027d819c 299=head2 successors
300
301Returns a list of Reading objects that immediately follow $self in the collation.
302
303=cut
304
22222af9 305sub successors {
306 my $self = shift;
307 my @succ = $self->collation->sequence->successors( $self->id );
308 return map { $self->collation->reading( $_ ) } @succ;
309}
310
a445ce40 311## Utility methods
1d310495 312
3a2ebbf4 313sub _stringify {
314 my $self = shift;
315 return $self->id;
316}
d047cd52 317
2acf0892 318sub TO_JSON {
319 my $self = shift;
320 return $self->text;
321}
322
70745e70 323sub throw {
324 Text::Tradition::Error->throw(
325 'ident' => 'Reading error',
326 'message' => $_[0],
327 );
328}
4d9593df 329
330no Moose;
331__PACKAGE__->meta->make_immutable;
332
021bdbac 3331;