1 package Text::Tradition::Collation::Reading;
4 use Moose::Util::TypeConstraints;
5 use JSON qw/ from_json /;
7 use Text::Tradition::Error;
8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
10 use overload '""' => \&_stringify, 'fallback' => 1;
14 where { $_ =~ /\A$xml10_name_rx\z/ },
15 message { 'Reading ID must be a valid XML attribute string' };
17 no Moose::Util::TypeConstraints;
21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
26 Text::Tradition is a library for representation and analysis of collated
27 texts, particularly medieval ones. A 'reading' refers to a unit of text,
28 usually a word, that appears in one or more witnesses (manuscripts) of the
29 tradition; the text of a given witness is composed of a set of readings in
36 Creates a new reading in the given collation with the given attributes.
41 =item collation - The Text::Tradition::Collation object to which this
42 reading belongs. Required.
44 =item id - A unique identifier for this reading. Required.
46 =item text - The word or other text of the reading.
48 =item is_start - The reading is the starting point for the collation.
50 =item is_end - The reading is the ending point for the collation.
52 =item is_lacuna - The 'reading' represents a known gap in the text.
54 =item is_ph - A temporary placeholder for apparatus parsing purposes. Do
55 not use unless you know what you are doing.
57 =item rank - The sequence number of the reading. This should probably not
62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
78 Accessor methods for the given attributes.
84 isa => 'Text::Tradition::Collation',
99 writer => 'alter_text',
105 predicate => 'has_language',
141 predicate => 'has_rank',
142 clearer => 'clear_rank',
145 ## For morphological analysis
147 has 'grammar_invalid' => (
153 has 'is_nonsense' => (
159 has 'normal_form' => (
162 predicate => '_has_normal_form',
163 clearer => '_clear_normal_form',
166 # Holds the lexemes for the reading.
167 has 'reading_lexemes' => (
169 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
172 lexemes => 'elements',
173 has_lexemes => 'count',
174 clear_lexemes => 'clear',
175 add_lexeme => 'push',
177 default => sub { [] },
180 ## For prefix/suffix readings
182 has 'join_prior' => (
186 writer => '_set_join_prior',
193 writer => '_set_join_next',
197 around BUILDARGS => sub {
207 # If one of our special booleans is set, we change the text and the
209 if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
210 $args->{'text'} = '#LACUNA#';
211 } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
212 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
213 $args->{'text'} = '#START#';
215 } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
216 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
217 $args->{'text'} = '#END#';
218 } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
219 $args->{'text'} = $args->{'id'};
222 # Backwards compatibility for non-XMLname IDs
223 my $rid = $args->{'id'};
226 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
229 $args->{'id'} = $rid;
231 $class->$orig( $args );
234 # Look for a lexeme-string argument in the build args.
236 my( $self, $args ) = @_;
237 if( exists $args->{'lexemes'} ) {
238 $self->_deserialize_lexemes( $args->{'lexemes'} );
242 # Make normal_form default to text, transparently.
243 around 'normal_form' => sub {
247 if( $arg && $arg eq $self->text ) {
248 $self->_clear_normal_form;
250 } elsif( !$arg && !$self->_has_normal_form ) {
259 A meta attribute (ha ha), which should be true if any of our 'special'
260 booleans are true. Implies that the reading does not represent a bit
261 of text found in a witness.
267 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
270 =head1 Convenience methods
272 =head2 related_readings
274 Calls Collation's related_readings with $self as the first argument.
278 sub related_readings {
280 return $self->collation->related_readings( $self, @_ );
285 Calls Collation's reading_witnesses with $self as the first argument.
291 return $self->collation->reading_witnesses( $self, @_ );
296 Returns a list of Reading objects that immediately precede $self in the collation.
302 my @pred = $self->collation->sequence->predecessors( $self->id );
303 return map { $self->collation->reading( $_ ) } @pred;
308 Returns a list of Reading objects that immediately follow $self in the collation.
314 my @succ = $self->collation->sequence->successors( $self->id );
315 return map { $self->collation->reading( $_ ) } @succ;
318 =head2 set_identical( $other_reading)
320 Backwards compatibility method, to add a transposition relationship
321 between $self and $other_reading. Don't use this.
326 my( $self, $other ) = @_;
327 return $self->collation->add_relationship( $self, $other,
328 { 'type' => 'transposition' } );
338 Methods for the morphological information (if any) attached to readings.
339 A reading may be made up of multiple lexemes; the concatenated lexeme
340 strings ought to match the reading's normalized form.
342 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
343 on Lexeme objects and their attributes.
347 Returns a true value if the reading has any attached lexemes.
351 Returns the Lexeme objects (if any) attached to the reading.
355 Wipes any associated Lexeme objects out of the reading.
357 =head2 add_lexeme( $lexobj )
359 Adds the Lexeme in $lexobj to the list of lexemes.
363 If the language of the reading is set, this method will use the appropriate
364 Language model to determine the lexemes that belong to this reading. See
365 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
371 unless( $self->has_language ) {
372 warn "Please set a language to lemmatize a tradition";
375 my $mod = "Text::Tradition::Language::" . $self->language;
377 $mod->can( 'reading_lookup' )->( $self );
381 # For graph serialization. Return a JSON representation of the associated
383 sub _serialize_lexemes {
385 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
386 return $json->encode( [ $self->lexemes ] );
389 # Given a JSON representation of the lexemes, instantiate them and add
390 # them to the reading.
391 sub _deserialize_lexemes {
392 my( $self, $json ) = @_;
393 my $data = from_json( $json );
394 return unless @$data;
396 # Need to have the lexeme module in order to have lexemes.
397 eval { use Text::Tradition::Collation::Reading::Lexeme; };
400 # Good to go - add the lexemes.
402 foreach my $lexhash ( @$data ) {
403 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
404 'JSON' => $lexhash ) );
406 $self->clear_lexemes;
407 $self->add_lexeme( @lexemes );
412 return 0 unless $self->has_lexemes;
413 return !grep { !$_->is_disambiguated } $self->lexemes;
424 Text::Tradition::Error->throw(
425 'ident' => 'Reading error',
431 __PACKAGE__->meta->make_immutable;