1 package Text::Tradition::Collation::Reading;
4 use Moose::Util::TypeConstraints;
5 use JSON qw/ from_json /;
7 use Text::Tradition::Error;
8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
10 use overload '""' => \&_stringify, 'fallback' => 1;
14 where { $_ =~ /\A$xml10_name_rx\z/ },
15 message { 'Reading ID must be a valid XML attribute string' };
17 no Moose::Util::TypeConstraints;
21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
26 Text::Tradition is a library for representation and analysis of collated
27 texts, particularly medieval ones. A 'reading' refers to a unit of text,
28 usually a word, that appears in one or more witnesses (manuscripts) of the
29 tradition; the text of a given witness is composed of a set of readings in
36 Creates a new reading in the given collation with the given attributes.
41 =item collation - The Text::Tradition::Collation object to which this
42 reading belongs. Required.
44 =item id - A unique identifier for this reading. Required.
46 =item text - The word or other text of the reading.
48 =item is_start - The reading is the starting point for the collation.
50 =item is_end - The reading is the ending point for the collation.
52 =item is_lacuna - The 'reading' represents a known gap in the text.
54 =item is_ph - A temporary placeholder for apparatus parsing purposes. Do
55 not use unless you know what you are doing.
57 =item rank - The sequence number of the reading. This should probably not
62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
78 Accessor methods for the given attributes.
84 isa => 'Text::Tradition::Collation',
99 writer => 'alter_text',
105 predicate => 'has_language',
141 predicate => 'has_rank',
142 clearer => 'clear_rank',
145 ## For morphological analysis
147 has 'grammar_invalid' => (
153 has 'is_nonsense' => (
159 has 'normal_form' => (
162 predicate => '_has_normal_form',
163 clearer => '_clear_normal_form',
166 # Holds the lexemes for the reading.
167 has 'reading_lexemes' => (
169 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
172 lexemes => 'elements',
173 has_lexemes => 'count',
174 clear_lexemes => 'clear',
175 add_lexeme => 'push',
177 default => sub { [] },
180 ## For prefix/suffix readings
182 has 'join_prior' => (
195 around BUILDARGS => sub {
205 # If one of our special booleans is set, we change the text and the
207 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
208 $args->{'text'} = '#LACUNA#';
209 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
210 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
211 $args->{'text'} = '#START#';
213 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
214 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
215 $args->{'text'} = '#END#';
216 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
217 $args->{'text'} = $args->{'id'};
220 # Backwards compatibility for non-XMLname IDs
221 my $rid = $args->{'id'};
224 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
227 $args->{'id'} = $rid;
229 $class->$orig( $args );
232 # Look for a lexeme-string argument in the build args.
234 my( $self, $args ) = @_;
235 if( exists $args->{'lexemes'} ) {
236 $self->_deserialize_lexemes( $args->{'lexemes'} );
240 # Make normal_form default to text, transparently.
241 around 'normal_form' => sub {
245 if( $arg && $arg eq $self->text ) {
246 $self->_clear_normal_form;
248 } elsif( !$arg && !$self->_has_normal_form ) {
257 A meta attribute (ha ha), which should be true if any of our 'special'
258 booleans are true. Implies that the reading does not represent a bit
259 of text found in a witness.
265 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
270 Similar to is_meta, but returns false for the start and end readings.
276 return $self->is_lacuna || $self->is_ph;
279 =head1 Convenience methods
281 =head2 related_readings
283 Calls Collation's related_readings with $self as the first argument.
287 sub related_readings {
289 return $self->collation->related_readings( $self, @_ );
294 Calls Collation's reading_witnesses with $self as the first argument.
300 return $self->collation->reading_witnesses( $self, @_ );
305 Returns a list of Reading objects that immediately precede $self in the collation.
311 my @pred = $self->collation->sequence->predecessors( $self->id );
312 return map { $self->collation->reading( $_ ) } @pred;
317 Returns a list of Reading objects that immediately follow $self in the collation.
323 my @succ = $self->collation->sequence->successors( $self->id );
324 return map { $self->collation->reading( $_ ) } @succ;
327 =head2 set_identical( $other_reading)
329 Backwards compatibility method, to add a transposition relationship
330 between $self and $other_reading. Don't use this.
335 my( $self, $other ) = @_;
336 return $self->collation->add_relationship( $self, $other,
337 { 'type' => 'transposition' } );
347 Methods for the morphological information (if any) attached to readings.
348 A reading may be made up of multiple lexemes; the concatenated lexeme
349 strings ought to match the reading's normalized form.
351 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
352 on Lexeme objects and their attributes.
356 Returns a true value if the reading has any attached lexemes.
360 Returns the Lexeme objects (if any) attached to the reading.
364 Wipes any associated Lexeme objects out of the reading.
366 =head2 add_lexeme( $lexobj )
368 Adds the Lexeme in $lexobj to the list of lexemes.
372 If the language of the reading is set, this method will use the appropriate
373 Language model to determine the lexemes that belong to this reading. See
374 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
380 unless( $self->has_language ) {
381 warn "Please set a language to lemmatize a tradition";
384 my $mod = "Text::Tradition::Language::" . $self->language;
386 $mod->can( 'reading_lookup' )->( $self );
390 # For graph serialization. Return a JSON representation of the associated
392 sub _serialize_lexemes {
394 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
395 return $json->encode( [ $self->lexemes ] );
398 # Given a JSON representation of the lexemes, instantiate them and add
399 # them to the reading.
400 sub _deserialize_lexemes {
401 my( $self, $json ) = @_;
402 my $data = from_json( $json );
403 return unless @$data;
405 # Need to have the lexeme module in order to have lexemes.
406 eval { use Text::Tradition::Collation::Reading::Lexeme; };
409 # Good to go - add the lexemes.
411 foreach my $lexhash ( @$data ) {
412 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
413 'JSON' => $lexhash ) );
415 $self->clear_lexemes;
416 $self->add_lexeme( @lexemes );
421 return 0 unless $self->has_lexemes;
422 return !grep { !$_->is_disambiguated } $self->lexemes;
433 Text::Tradition::Error->throw(
434 'ident' => 'Reading error',
440 __PACKAGE__->meta->make_immutable;