1 package Text::Tradition::Collation::Reading;
4 use Moose::Util::TypeConstraints;
5 use JSON qw/ from_json /;
7 use Text::Tradition::Error;
8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
10 use overload '""' => \&_stringify, 'fallback' => 1;
14 where { $_ =~ /\A$xml10_name_rx\z/ },
15 message { 'Reading ID must be a valid XML attribute string' };
17 no Moose::Util::TypeConstraints;
21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
26 Text::Tradition is a library for representation and analysis of collated
27 texts, particularly medieval ones. A 'reading' refers to a unit of text,
28 usually a word, that appears in one or more witnesses (manuscripts) of the
29 tradition; the text of a given witness is composed of a set of readings in
36 Creates a new reading in the given collation with the given attributes.
41 =item collation - The Text::Tradition::Collation object to which this
42 reading belongs. Required.
44 =item id - A unique identifier for this reading. Required.
46 =item text - The word or other text of the reading.
48 =item is_start - The reading is the starting point for the collation.
50 =item is_end - The reading is the ending point for the collation.
52 =item is_lacuna - The 'reading' represents a known gap in the text.
54 =item is_ph - A temporary placeholder for apparatus parsing purposes. Do
55 not use unless you know what you are doing.
57 =item rank - The sequence number of the reading. This should probably not
62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
78 Accessor methods for the given attributes.
84 isa => 'Text::Tradition::Collation',
99 writer => 'alter_text',
105 predicate => 'has_language',
141 predicate => 'has_rank',
142 clearer => 'clear_rank',
145 ## For morphological analysis
147 has 'grammar_invalid' => (
153 has 'is_nonsense' => (
159 has 'normal_form' => (
162 predicate => '_has_normal_form',
163 clearer => '_clear_normal_form',
166 # Holds the lexemes for the reading.
167 has 'reading_lexemes' => (
169 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
172 lexemes => 'elements',
173 has_lexemes => 'count',
174 clear_lexemes => 'clear',
175 add_lexeme => 'push',
177 default => sub { [] },
180 ## For prefix/suffix readings
182 has 'join_prior' => (
195 around BUILDARGS => sub {
205 # If one of our special booleans is set, we change the text and the
207 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
208 $args->{'text'} = '#LACUNA#';
209 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
210 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
211 $args->{'text'} = '#START#';
213 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
214 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
215 $args->{'text'} = '#END#';
216 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
217 $args->{'text'} = $args->{'id'};
220 # Backwards compatibility for non-XMLname IDs
221 my $rid = $args->{'id'};
224 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
227 $args->{'id'} = $rid;
229 $class->$orig( $args );
232 # Look for a lexeme-string argument in the build args.
234 my( $self, $args ) = @_;
235 if( exists $args->{'lexemes'} ) {
236 $self->_deserialize_lexemes( $args->{'lexemes'} );
240 # Make normal_form default to text, transparently.
241 around 'normal_form' => sub {
245 if( $arg && $arg eq $self->text ) {
246 $self->_clear_normal_form;
248 } elsif( !$arg && !$self->_has_normal_form ) {
257 A meta attribute (ha ha), which should be true if any of our 'special'
258 booleans are true. Implies that the reading does not represent a bit
259 of text found in a witness.
265 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
268 =head1 Convenience methods
270 =head2 related_readings
272 Calls Collation's related_readings with $self as the first argument.
276 sub related_readings {
278 return $self->collation->related_readings( $self, @_ );
283 Calls Collation's reading_witnesses with $self as the first argument.
289 return $self->collation->reading_witnesses( $self, @_ );
294 Returns a list of Reading objects that immediately precede $self in the collation.
300 my @pred = $self->collation->sequence->predecessors( $self->id );
301 return map { $self->collation->reading( $_ ) } @pred;
306 Returns a list of Reading objects that immediately follow $self in the collation.
312 my @succ = $self->collation->sequence->successors( $self->id );
313 return map { $self->collation->reading( $_ ) } @succ;
316 =head2 set_identical( $other_reading)
318 Backwards compatibility method, to add a transposition relationship
319 between $self and $other_reading. Don't use this.
324 my( $self, $other ) = @_;
325 return $self->collation->add_relationship( $self, $other,
326 { 'type' => 'transposition' } );
336 Methods for the morphological information (if any) attached to readings.
337 A reading may be made up of multiple lexemes; the concatenated lexeme
338 strings ought to match the reading's normalized form.
340 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
341 on Lexeme objects and their attributes.
345 Returns a true value if the reading has any attached lexemes.
349 Returns the Lexeme objects (if any) attached to the reading.
353 Wipes any associated Lexeme objects out of the reading.
355 =head2 add_lexeme( $lexobj )
357 Adds the Lexeme in $lexobj to the list of lexemes.
361 If the language of the reading is set, this method will use the appropriate
362 Language model to determine the lexemes that belong to this reading. See
363 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
369 unless( $self->has_language ) {
370 warn "Please set a language to lemmatize a tradition";
373 my $mod = "Text::Tradition::Language::" . $self->language;
375 $mod->can( 'reading_lookup' )->( $self );
379 # For graph serialization. Return a JSON representation of the associated
381 sub _serialize_lexemes {
383 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
384 return $json->encode( [ $self->lexemes ] );
387 # Given a JSON representation of the lexemes, instantiate them and add
388 # them to the reading.
389 sub _deserialize_lexemes {
390 my( $self, $json ) = @_;
391 my $data = from_json( $json );
392 return unless @$data;
394 # Need to have the lexeme module in order to have lexemes.
395 eval { use Text::Tradition::Collation::Reading::Lexeme; };
398 # Good to go - add the lexemes.
400 foreach my $lexhash ( @$data ) {
401 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
402 'JSON' => $lexhash ) );
404 $self->clear_lexemes;
405 $self->add_lexeme( @lexemes );
410 return 0 unless $self->has_lexemes;
411 return !grep { !$_->is_disambiguated } $self->lexemes;
422 Text::Tradition::Error->throw(
423 'ident' => 'Reading error',
429 __PACKAGE__->meta->make_immutable;