naive serialization of lexems in GraphML
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
6ad2ce78 4use Module::Load;
7cd9f181 5use YAML::XS;
e4b0f464 6use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 7
3a2ebbf4 8=head1 NAME
784877d9 9
4aea6e9b 10Text::Tradition::Collation::Reading - represents a reading (usually a word)
11in a collation.
12
3a2ebbf4 13=head1 DESCRIPTION
784877d9 14
3a2ebbf4 15Text::Tradition is a library for representation and analysis of collated
16texts, particularly medieval ones. A 'reading' refers to a unit of text,
17usually a word, that appears in one or more witnesses (manuscripts) of the
18tradition; the text of a given witness is composed of a set of readings in
19a particular sequence
784877d9 20
3a2ebbf4 21=head1 METHODS
1ca1163d 22
3a2ebbf4 23=head2 new
8e1394aa 24
4aea6e9b 25Creates a new reading in the given collation with the given attributes.
3a2ebbf4 26Options include:
94c00c71 27
3a2ebbf4 28=over 4
784877d9 29
4aea6e9b 30=item collation - The Text::Tradition::Collation object to which this
31reading belongs. Required.
e2902068 32
3a2ebbf4 33=item id - A unique identifier for this reading. Required.
910a0a6d 34
3a2ebbf4 35=item text - The word or other text of the reading.
784877d9 36
3a2ebbf4 37=item is_start - The reading is the starting point for the collation.
3265b0ce 38
3a2ebbf4 39=item is_end - The reading is the ending point for the collation.
784877d9 40
3a2ebbf4 41=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 42
4aea6e9b 43=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
44not use unless you know what you are doing.
12720144 45
4aea6e9b 46=item rank - The sequence number of the reading. This should probably not
47be set manually.
d047cd52 48
3a2ebbf4 49=back
8e1394aa 50
3a2ebbf4 51One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 52
3a2ebbf4 53=head2 collation
94c00c71 54
3a2ebbf4 55=head2 id
94c00c71 56
3a2ebbf4 57=head2 text
4cdd82f1 58
3a2ebbf4 59=head2 is_start
4cdd82f1 60
3a2ebbf4 61=head2 is_end
4a8828f0 62
3a2ebbf4 63=head2 is_lacuna
4a8828f0 64
3a2ebbf4 65=head2 rank
4a8828f0 66
3a2ebbf4 67Accessor methods for the given attributes.
d047cd52 68
3a2ebbf4 69=cut
d047cd52 70
3a2ebbf4 71has 'collation' => (
72 is => 'ro',
73 isa => 'Text::Tradition::Collation',
74 # required => 1,
75 weak_ref => 1,
76 );
d047cd52 77
3a2ebbf4 78has 'id' => (
79 is => 'ro',
80 isa => 'Str',
81 required => 1,
82 );
d047cd52 83
3a2ebbf4 84has 'text' => (
85 is => 'ro',
86 isa => 'Str',
87 required => 1,
49d4f2ac 88 writer => 'alter_text',
3a2ebbf4 89 );
0e47f4f6 90
fae52efd 91has 'language' => (
92 is => 'ro',
93 isa => 'Str',
6ad2ce78 94 predicate => 'has_language',
fae52efd 95 );
96
3a2ebbf4 97has 'is_start' => (
98 is => 'ro',
99 isa => 'Bool',
100 default => undef,
101 );
102
103has 'is_end' => (
104 is => 'ro',
105 isa => 'Bool',
106 default => undef,
107 );
108
109has 'is_lacuna' => (
110 is => 'ro',
111 isa => 'Bool',
112 default => undef,
113 );
12720144 114
115has 'is_ph' => (
116 is => 'ro',
117 isa => 'Bool',
118 default => undef,
119 );
d4b75f44 120
121has 'is_common' => (
122 is => 'rw',
123 isa => 'Bool',
124 default => undef,
125 );
3a2ebbf4 126
127has 'rank' => (
128 is => 'rw',
129 isa => 'Int',
130 predicate => 'has_rank',
ca6e6095 131 clearer => 'clear_rank',
3a2ebbf4 132 );
fd602649 133
134## For morphological analysis
135
136has 'normal_form' => (
137 is => 'rw',
138 isa => 'Str',
139 predicate => 'has_normal_form',
140 );
141
7cd9f181 142# Holds the lexemes for the reading.
d3e7842a 143has 'reading_lexemes' => (
4d9593df 144 traits => ['Array'],
d3e7842a 145 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
4d9593df 146 handles => {
147 lexemes => 'elements',
cca4f996 148 has_lexemes => 'count',
d3e7842a 149 clear_lexemes => 'clear',
150 add_lexeme => 'push',
4d9593df 151 },
d3e7842a 152 default => sub { [] },
fd602649 153 );
154
629e27b0 155## For prefix/suffix readings
156
157has 'join_prior' => (
158 is => 'ro',
159 isa => 'Bool',
160 default => undef,
161 );
162
163has 'join_next' => (
164 is => 'ro',
165 isa => 'Bool',
166 default => undef,
167 );
168
3a2ebbf4 169
170around BUILDARGS => sub {
171 my $orig = shift;
172 my $class = shift;
173 my $args;
174 if( @_ == 1 ) {
175 $args = shift;
176 } else {
177 $args = { @_ };
178 }
b0b4421a 179
3a2ebbf4 180 # If one of our special booleans is set, we change the text and the
181 # ID to match.
1d310495 182 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 183 $args->{'text'} = '#LACUNA#';
3a2ebbf4 184 } elsif( exists $args->{'is_start'} ) {
185 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
186 $args->{'text'} = '#START#';
187 $args->{'rank'} = 0;
188 } elsif( exists $args->{'is_end'} ) {
189 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
190 $args->{'text'} = '#END#';
12720144 191 } elsif( exists $args->{'is_ph'} ) {
192 $args->{'text'} = $args->{'id'};
3a2ebbf4 193 }
194
195 $class->$orig( $args );
196};
197
198=head2 is_meta
199
200A meta attribute (ha ha), which should be true if any of our 'special'
201booleans are true. Implies that the reading does not represent a bit
202of text found in a witness.
203
204=cut
205
206sub is_meta {
207 my $self = shift;
12720144 208 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 209}
210
027d819c 211=head1 Convenience methods
212
213=head2 related_readings
214
215Calls Collation's related_readings with $self as the first argument.
216
217=cut
218
3a2ebbf4 219sub related_readings {
220 my $self = shift;
221 return $self->collation->related_readings( $self, @_ );
222}
223
7f52eac8 224=head2 witnesses
225
226Calls Collation's reading_witnesses with $self as the first argument.
227
228=cut
229
230sub witnesses {
231 my $self = shift;
232 return $self->collation->reading_witnesses( $self, @_ );
233}
234
027d819c 235=head2 predecessors
236
237Returns a list of Reading objects that immediately precede $self in the collation.
238
239=cut
240
22222af9 241sub predecessors {
242 my $self = shift;
243 my @pred = $self->collation->sequence->predecessors( $self->id );
244 return map { $self->collation->reading( $_ ) } @pred;
245}
246
027d819c 247=head2 successors
248
249Returns a list of Reading objects that immediately follow $self in the collation.
250
251=cut
252
22222af9 253sub successors {
254 my $self = shift;
255 my @succ = $self->collation->sequence->successors( $self->id );
256 return map { $self->collation->reading( $_ ) } @succ;
257}
258
027d819c 259=head2 set_identical( $other_reading)
260
261Backwards compatibility method, to add a transposition relationship
262between $self and $other_reading. Don't use this.
263
264=cut
265
1d310495 266sub set_identical {
267 my( $self, $other ) = @_;
268 return $self->collation->add_relationship( $self, $other,
269 { 'type' => 'transposition' } );
270}
271
3a2ebbf4 272sub _stringify {
273 my $self = shift;
274 return $self->id;
275}
d047cd52 276
4d9593df 277=head1 MORPHOLOGY
278
7cd9f181 279Methods for the morphological information (if any) attached to readings.
280A reading may be made up of multiple lexemes; the concatenated lexeme
281strings ought to match the reading's normalized form.
282
283See L<Text::Tradition::Collation::Reading::Lexeme> for more information
284on Lexeme objects and their attributes.
285
286=head2 has_lexemes
287
288Returns a true value if the reading has any attached lexemes.
4d9593df 289
6ad2ce78 290=head2 lexemes
06e7cbc7 291
7cd9f181 292Returns the Lexeme objects (if any) attached to the reading.
6ad2ce78 293
294=head2 clear_lexemes
295
7cd9f181 296Wipes any associated Lexeme objects out of the reading.
297
298=head2 add_lexeme( $lexobj )
6ad2ce78 299
7cd9f181 300Adds the Lexeme in $lexobj to the list of lexemes.
301
302=head2 lemmatize
303
304If the language of the reading is set, this method will use the appropriate
305Language model to determine the lexemes that belong to this reading. See
306L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
06e7cbc7 307
4d9593df 308=cut
309
6ad2ce78 310sub lemmatize {
311 my $self = shift;
312 unless( $self->has_language ) {
313 warn "Please set a language to lemmatize a tradition";
314 return;
315 }
316 my $mod = "Text::Tradition::Language::" . $self->language;
317 load( $mod );
318 $mod->can( 'reading_lookup' )->( $self );
319
320}
4d9593df 321
7cd9f181 322# For graph serialization. Return a string representation of the associated
323# reading lexemes.
324sub _serialize_lexemes {
325 my $self = shift;
326 return Dump( [ $self->lexemes ] );
327}
328
329
4d9593df 330## Utility methods
331
2acf0892 332sub TO_JSON {
333 my $self = shift;
334 return $self->text;
335}
336
4d9593df 337## TODO will need a throw() here
338
339no Moose;
340__PACKAGE__->meta->make_immutable;
341
021bdbac 3421;