use JSON for serialization rather than rolling own
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
7604424b 4use JSON qw/ from_json /;
6ad2ce78 5use Module::Load;
70745e70 6use Text::Tradition::Error;
7cd9f181 7use YAML::XS;
e4b0f464 8use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 9
3a2ebbf4 10=head1 NAME
784877d9 11
4aea6e9b 12Text::Tradition::Collation::Reading - represents a reading (usually a word)
13in a collation.
14
3a2ebbf4 15=head1 DESCRIPTION
784877d9 16
3a2ebbf4 17Text::Tradition is a library for representation and analysis of collated
18texts, particularly medieval ones. A 'reading' refers to a unit of text,
19usually a word, that appears in one or more witnesses (manuscripts) of the
20tradition; the text of a given witness is composed of a set of readings in
21a particular sequence
784877d9 22
3a2ebbf4 23=head1 METHODS
1ca1163d 24
3a2ebbf4 25=head2 new
8e1394aa 26
4aea6e9b 27Creates a new reading in the given collation with the given attributes.
3a2ebbf4 28Options include:
94c00c71 29
3a2ebbf4 30=over 4
784877d9 31
4aea6e9b 32=item collation - The Text::Tradition::Collation object to which this
33reading belongs. Required.
e2902068 34
3a2ebbf4 35=item id - A unique identifier for this reading. Required.
910a0a6d 36
3a2ebbf4 37=item text - The word or other text of the reading.
784877d9 38
3a2ebbf4 39=item is_start - The reading is the starting point for the collation.
3265b0ce 40
3a2ebbf4 41=item is_end - The reading is the ending point for the collation.
784877d9 42
3a2ebbf4 43=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 44
4aea6e9b 45=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
46not use unless you know what you are doing.
12720144 47
4aea6e9b 48=item rank - The sequence number of the reading. This should probably not
49be set manually.
d047cd52 50
3a2ebbf4 51=back
8e1394aa 52
3a2ebbf4 53One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 54
3a2ebbf4 55=head2 collation
94c00c71 56
3a2ebbf4 57=head2 id
94c00c71 58
3a2ebbf4 59=head2 text
4cdd82f1 60
3a2ebbf4 61=head2 is_start
4cdd82f1 62
3a2ebbf4 63=head2 is_end
4a8828f0 64
3a2ebbf4 65=head2 is_lacuna
4a8828f0 66
3a2ebbf4 67=head2 rank
4a8828f0 68
3a2ebbf4 69Accessor methods for the given attributes.
d047cd52 70
3a2ebbf4 71=cut
d047cd52 72
3a2ebbf4 73has 'collation' => (
74 is => 'ro',
75 isa => 'Text::Tradition::Collation',
76 # required => 1,
77 weak_ref => 1,
78 );
d047cd52 79
3a2ebbf4 80has 'id' => (
81 is => 'ro',
82 isa => 'Str',
83 required => 1,
84 );
d047cd52 85
3a2ebbf4 86has 'text' => (
87 is => 'ro',
88 isa => 'Str',
89 required => 1,
49d4f2ac 90 writer => 'alter_text',
3a2ebbf4 91 );
0e47f4f6 92
fae52efd 93has 'language' => (
94 is => 'ro',
95 isa => 'Str',
6ad2ce78 96 predicate => 'has_language',
fae52efd 97 );
98
3a2ebbf4 99has 'is_start' => (
100 is => 'ro',
101 isa => 'Bool',
102 default => undef,
103 );
104
105has 'is_end' => (
106 is => 'ro',
107 isa => 'Bool',
108 default => undef,
109 );
110
111has 'is_lacuna' => (
112 is => 'ro',
113 isa => 'Bool',
114 default => undef,
115 );
12720144 116
117has 'is_ph' => (
118 is => 'ro',
119 isa => 'Bool',
120 default => undef,
121 );
d4b75f44 122
123has 'is_common' => (
124 is => 'rw',
125 isa => 'Bool',
126 default => undef,
127 );
3a2ebbf4 128
129has 'rank' => (
130 is => 'rw',
131 isa => 'Int',
132 predicate => 'has_rank',
ca6e6095 133 clearer => 'clear_rank',
3a2ebbf4 134 );
fd602649 135
136## For morphological analysis
137
138has 'normal_form' => (
139 is => 'rw',
140 isa => 'Str',
141 predicate => 'has_normal_form',
142 );
143
7cd9f181 144# Holds the lexemes for the reading.
d3e7842a 145has 'reading_lexemes' => (
4d9593df 146 traits => ['Array'],
d3e7842a 147 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
4d9593df 148 handles => {
149 lexemes => 'elements',
cca4f996 150 has_lexemes => 'count',
d3e7842a 151 clear_lexemes => 'clear',
152 add_lexeme => 'push',
4d9593df 153 },
d3e7842a 154 default => sub { [] },
fd602649 155 );
156
629e27b0 157## For prefix/suffix readings
158
159has 'join_prior' => (
160 is => 'ro',
161 isa => 'Bool',
162 default => undef,
163 );
164
165has 'join_next' => (
166 is => 'ro',
167 isa => 'Bool',
168 default => undef,
169 );
170
3a2ebbf4 171
172around BUILDARGS => sub {
173 my $orig = shift;
174 my $class = shift;
175 my $args;
176 if( @_ == 1 ) {
177 $args = shift;
178 } else {
179 $args = { @_ };
180 }
b0b4421a 181
3a2ebbf4 182 # If one of our special booleans is set, we change the text and the
183 # ID to match.
1d310495 184 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 185 $args->{'text'} = '#LACUNA#';
3a2ebbf4 186 } elsif( exists $args->{'is_start'} ) {
187 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
188 $args->{'text'} = '#START#';
189 $args->{'rank'} = 0;
190 } elsif( exists $args->{'is_end'} ) {
191 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
192 $args->{'text'} = '#END#';
12720144 193 } elsif( exists $args->{'is_ph'} ) {
194 $args->{'text'} = $args->{'id'};
3a2ebbf4 195 }
196
197 $class->$orig( $args );
198};
199
70745e70 200# Look for a lexeme-string argument in the build args.
201sub BUILD {
202 my( $self, $args ) = @_;
203 if( exists $args->{'lexemes'} ) {
204 $self->_deserialize_lexemes( $args->{'lexemes'} );
205 }
206}
207
3a2ebbf4 208=head2 is_meta
209
210A meta attribute (ha ha), which should be true if any of our 'special'
211booleans are true. Implies that the reading does not represent a bit
212of text found in a witness.
213
214=cut
215
216sub is_meta {
217 my $self = shift;
12720144 218 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 219}
220
027d819c 221=head1 Convenience methods
222
223=head2 related_readings
224
225Calls Collation's related_readings with $self as the first argument.
226
227=cut
228
3a2ebbf4 229sub related_readings {
230 my $self = shift;
231 return $self->collation->related_readings( $self, @_ );
232}
233
7f52eac8 234=head2 witnesses
235
236Calls Collation's reading_witnesses with $self as the first argument.
237
238=cut
239
240sub witnesses {
241 my $self = shift;
242 return $self->collation->reading_witnesses( $self, @_ );
243}
244
027d819c 245=head2 predecessors
246
247Returns a list of Reading objects that immediately precede $self in the collation.
248
249=cut
250
22222af9 251sub predecessors {
252 my $self = shift;
253 my @pred = $self->collation->sequence->predecessors( $self->id );
254 return map { $self->collation->reading( $_ ) } @pred;
255}
256
027d819c 257=head2 successors
258
259Returns a list of Reading objects that immediately follow $self in the collation.
260
261=cut
262
22222af9 263sub successors {
264 my $self = shift;
265 my @succ = $self->collation->sequence->successors( $self->id );
266 return map { $self->collation->reading( $_ ) } @succ;
267}
268
027d819c 269=head2 set_identical( $other_reading)
270
271Backwards compatibility method, to add a transposition relationship
272between $self and $other_reading. Don't use this.
273
274=cut
275
1d310495 276sub set_identical {
277 my( $self, $other ) = @_;
278 return $self->collation->add_relationship( $self, $other,
279 { 'type' => 'transposition' } );
280}
281
3a2ebbf4 282sub _stringify {
283 my $self = shift;
284 return $self->id;
285}
d047cd52 286
4d9593df 287=head1 MORPHOLOGY
288
7cd9f181 289Methods for the morphological information (if any) attached to readings.
290A reading may be made up of multiple lexemes; the concatenated lexeme
291strings ought to match the reading's normalized form.
292
293See L<Text::Tradition::Collation::Reading::Lexeme> for more information
294on Lexeme objects and their attributes.
295
296=head2 has_lexemes
297
298Returns a true value if the reading has any attached lexemes.
4d9593df 299
6ad2ce78 300=head2 lexemes
06e7cbc7 301
7cd9f181 302Returns the Lexeme objects (if any) attached to the reading.
6ad2ce78 303
304=head2 clear_lexemes
305
7cd9f181 306Wipes any associated Lexeme objects out of the reading.
307
308=head2 add_lexeme( $lexobj )
6ad2ce78 309
7cd9f181 310Adds the Lexeme in $lexobj to the list of lexemes.
311
312=head2 lemmatize
313
314If the language of the reading is set, this method will use the appropriate
315Language model to determine the lexemes that belong to this reading. See
316L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
06e7cbc7 317
4d9593df 318=cut
319
6ad2ce78 320sub lemmatize {
321 my $self = shift;
322 unless( $self->has_language ) {
323 warn "Please set a language to lemmatize a tradition";
324 return;
325 }
326 my $mod = "Text::Tradition::Language::" . $self->language;
327 load( $mod );
328 $mod->can( 'reading_lookup' )->( $self );
329
330}
4d9593df 331
7604424b 332# For graph serialization. Return a JSON representation of the associated
7cd9f181 333# reading lexemes.
334sub _serialize_lexemes {
335 my $self = shift;
7604424b 336 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
337 return $json->encode( [ $self->lexemes ] );
7cd9f181 338}
70745e70 339
7604424b 340# Given a JSON representation of the lexemes, instantiate them and add
341# them to the reading.
70745e70 342sub _deserialize_lexemes {
7604424b 343 my( $self, $json ) = @_;
344 my $data = from_json( $json );
345 return unless @$data;
70745e70 346
7604424b 347 # Need to have the lexeme module in order to have lexemes.
348 eval { use Text::Tradition::Collation::Reading::Lexeme; };
70745e70 349 throw( $@ ) if $@;
350
351 # Good to go - add the lexemes.
352 my @lexemes;
7604424b 353 foreach my $lexhash ( @$data ) {
354 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
355 'JSON' => $lexhash ) );
70745e70 356 }
357 $self->clear_lexemes;
358 $self->add_lexeme( @lexemes );
359}
7cd9f181 360
4d9593df 361## Utility methods
362
2acf0892 363sub TO_JSON {
364 my $self = shift;
365 return $self->text;
366}
367
70745e70 368sub throw {
369 Text::Tradition::Error->throw(
370 'ident' => 'Reading error',
371 'message' => $_[0],
372 );
373}
4d9593df 374
375no Moose;
376__PACKAGE__->meta->make_immutable;
377
021bdbac 3781;