make French morphology use Lingua objects; add tests
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
6ad2ce78 4use Module::Load;
e4b0f464 5use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 6
3a2ebbf4 7=head1 NAME
784877d9 8
4aea6e9b 9Text::Tradition::Collation::Reading - represents a reading (usually a word)
10in a collation.
11
3a2ebbf4 12=head1 DESCRIPTION
784877d9 13
3a2ebbf4 14Text::Tradition is a library for representation and analysis of collated
15texts, particularly medieval ones. A 'reading' refers to a unit of text,
16usually a word, that appears in one or more witnesses (manuscripts) of the
17tradition; the text of a given witness is composed of a set of readings in
18a particular sequence
784877d9 19
3a2ebbf4 20=head1 METHODS
1ca1163d 21
3a2ebbf4 22=head2 new
8e1394aa 23
4aea6e9b 24Creates a new reading in the given collation with the given attributes.
3a2ebbf4 25Options include:
94c00c71 26
3a2ebbf4 27=over 4
784877d9 28
4aea6e9b 29=item collation - The Text::Tradition::Collation object to which this
30reading belongs. Required.
e2902068 31
3a2ebbf4 32=item id - A unique identifier for this reading. Required.
910a0a6d 33
3a2ebbf4 34=item text - The word or other text of the reading.
784877d9 35
3a2ebbf4 36=item is_start - The reading is the starting point for the collation.
3265b0ce 37
3a2ebbf4 38=item is_end - The reading is the ending point for the collation.
784877d9 39
3a2ebbf4 40=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 41
4aea6e9b 42=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
43not use unless you know what you are doing.
12720144 44
4aea6e9b 45=item rank - The sequence number of the reading. This should probably not
46be set manually.
d047cd52 47
3a2ebbf4 48=back
8e1394aa 49
3a2ebbf4 50One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 51
3a2ebbf4 52=head2 collation
94c00c71 53
3a2ebbf4 54=head2 id
94c00c71 55
3a2ebbf4 56=head2 text
4cdd82f1 57
3a2ebbf4 58=head2 is_start
4cdd82f1 59
3a2ebbf4 60=head2 is_end
4a8828f0 61
3a2ebbf4 62=head2 is_lacuna
4a8828f0 63
3a2ebbf4 64=head2 rank
4a8828f0 65
3a2ebbf4 66Accessor methods for the given attributes.
d047cd52 67
3a2ebbf4 68=cut
d047cd52 69
3a2ebbf4 70has 'collation' => (
71 is => 'ro',
72 isa => 'Text::Tradition::Collation',
73 # required => 1,
74 weak_ref => 1,
75 );
d047cd52 76
3a2ebbf4 77has 'id' => (
78 is => 'ro',
79 isa => 'Str',
80 required => 1,
81 );
d047cd52 82
3a2ebbf4 83has 'text' => (
84 is => 'ro',
85 isa => 'Str',
86 required => 1,
49d4f2ac 87 writer => 'alter_text',
3a2ebbf4 88 );
0e47f4f6 89
fae52efd 90has 'language' => (
91 is => 'ro',
92 isa => 'Str',
6ad2ce78 93 predicate => 'has_language',
fae52efd 94 );
95
3a2ebbf4 96has 'is_start' => (
97 is => 'ro',
98 isa => 'Bool',
99 default => undef,
100 );
101
102has 'is_end' => (
103 is => 'ro',
104 isa => 'Bool',
105 default => undef,
106 );
107
108has 'is_lacuna' => (
109 is => 'ro',
110 isa => 'Bool',
111 default => undef,
112 );
12720144 113
114has 'is_ph' => (
115 is => 'ro',
116 isa => 'Bool',
117 default => undef,
118 );
d4b75f44 119
120has 'is_common' => (
121 is => 'rw',
122 isa => 'Bool',
123 default => undef,
124 );
3a2ebbf4 125
126has 'rank' => (
127 is => 'rw',
128 isa => 'Int',
129 predicate => 'has_rank',
ca6e6095 130 clearer => 'clear_rank',
3a2ebbf4 131 );
fd602649 132
133## For morphological analysis
134
135has 'normal_form' => (
136 is => 'rw',
137 isa => 'Str',
138 predicate => 'has_normal_form',
139 );
140
cca4f996 141# Holds the word form. If is_disambiguated is true, the form at index zero
142# is the correct one.
d3e7842a 143has 'reading_lexemes' => (
4d9593df 144 traits => ['Array'],
d3e7842a 145 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
4d9593df 146 handles => {
147 lexemes => 'elements',
cca4f996 148 has_lexemes => 'count',
d3e7842a 149 clear_lexemes => 'clear',
150 add_lexeme => 'push',
4d9593df 151 },
d3e7842a 152 default => sub { [] },
fd602649 153 );
154
629e27b0 155## For prefix/suffix readings
156
157has 'join_prior' => (
158 is => 'ro',
159 isa => 'Bool',
160 default => undef,
161 );
162
163has 'join_next' => (
164 is => 'ro',
165 isa => 'Bool',
166 default => undef,
167 );
168
3a2ebbf4 169
170around BUILDARGS => sub {
171 my $orig = shift;
172 my $class = shift;
173 my $args;
174 if( @_ == 1 ) {
175 $args = shift;
176 } else {
177 $args = { @_ };
178 }
b0b4421a 179
3a2ebbf4 180 # If one of our special booleans is set, we change the text and the
181 # ID to match.
1d310495 182 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 183 $args->{'text'} = '#LACUNA#';
3a2ebbf4 184 } elsif( exists $args->{'is_start'} ) {
185 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
186 $args->{'text'} = '#START#';
187 $args->{'rank'} = 0;
188 } elsif( exists $args->{'is_end'} ) {
189 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
190 $args->{'text'} = '#END#';
12720144 191 } elsif( exists $args->{'is_ph'} ) {
192 $args->{'text'} = $args->{'id'};
3a2ebbf4 193 }
194
195 $class->$orig( $args );
196};
197
198=head2 is_meta
199
200A meta attribute (ha ha), which should be true if any of our 'special'
201booleans are true. Implies that the reading does not represent a bit
202of text found in a witness.
203
204=cut
205
206sub is_meta {
207 my $self = shift;
12720144 208 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 209}
210
027d819c 211=head1 Convenience methods
212
213=head2 related_readings
214
215Calls Collation's related_readings with $self as the first argument.
216
217=cut
218
3a2ebbf4 219sub related_readings {
220 my $self = shift;
221 return $self->collation->related_readings( $self, @_ );
222}
223
7f52eac8 224=head2 witnesses
225
226Calls Collation's reading_witnesses with $self as the first argument.
227
228=cut
229
230sub witnesses {
231 my $self = shift;
232 return $self->collation->reading_witnesses( $self, @_ );
233}
234
027d819c 235=head2 predecessors
236
237Returns a list of Reading objects that immediately precede $self in the collation.
238
239=cut
240
22222af9 241sub predecessors {
242 my $self = shift;
243 my @pred = $self->collation->sequence->predecessors( $self->id );
244 return map { $self->collation->reading( $_ ) } @pred;
245}
246
027d819c 247=head2 successors
248
249Returns a list of Reading objects that immediately follow $self in the collation.
250
251=cut
252
22222af9 253sub successors {
254 my $self = shift;
255 my @succ = $self->collation->sequence->successors( $self->id );
256 return map { $self->collation->reading( $_ ) } @succ;
257}
258
027d819c 259=head2 set_identical( $other_reading)
260
261Backwards compatibility method, to add a transposition relationship
262between $self and $other_reading. Don't use this.
263
264=cut
265
1d310495 266sub set_identical {
267 my( $self, $other ) = @_;
268 return $self->collation->add_relationship( $self, $other,
269 { 'type' => 'transposition' } );
270}
271
3a2ebbf4 272sub _stringify {
273 my $self = shift;
274 return $self->id;
275}
d047cd52 276
4d9593df 277=head1 MORPHOLOGY
278
279A few methods to try to tack on morphological information.
280
6ad2ce78 281=head2 lexemes
06e7cbc7 282
6ad2ce78 283=head2 has_lexemes
284
285=head2 clear_lexemes
286
287=head2 add_lexeme
288
289=head2 lemmatize
06e7cbc7 290
4d9593df 291=cut
292
6ad2ce78 293sub lemmatize {
294 my $self = shift;
295 unless( $self->has_language ) {
296 warn "Please set a language to lemmatize a tradition";
297 return;
298 }
299 my $mod = "Text::Tradition::Language::" . $self->language;
300 load( $mod );
301 $mod->can( 'reading_lookup' )->( $self );
302
303}
4d9593df 304
305## Utility methods
306
2acf0892 307sub TO_JSON {
308 my $self = shift;
309 return $self->text;
310}
311
4d9593df 312## TODO will need a throw() here
313
314no Moose;
315__PACKAGE__->meta->make_immutable;
316
021bdbac 3171;