1 package Text::Tradition::Collation::Reading;
4 use Moose::Util::TypeConstraints;
5 use JSON qw/ from_json /;
7 use Text::Tradition::Error;
8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
10 use overload '""' => \&_stringify, 'fallback' => 1;
14 where { $_ =~ /\A$xml10_name_rx\z/ },
15 message { 'Reading ID must be a valid XML attribute string' };
17 no Moose::Util::TypeConstraints;
21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
26 Text::Tradition is a library for representation and analysis of collated
27 texts, particularly medieval ones. A 'reading' refers to a unit of text,
28 usually a word, that appears in one or more witnesses (manuscripts) of the
29 tradition; the text of a given witness is composed of a set of readings in
36 Creates a new reading in the given collation with the given attributes.
41 =item collation - The Text::Tradition::Collation object to which this
42 reading belongs. Required.
44 =item id - A unique identifier for this reading. Required.
46 =item text - The word or other text of the reading.
48 =item is_start - The reading is the starting point for the collation.
50 =item is_end - The reading is the ending point for the collation.
52 =item is_lacuna - The 'reading' represents a known gap in the text.
54 =item is_ph - A temporary placeholder for apparatus parsing purposes. Do
55 not use unless you know what you are doing.
57 =item rank - The sequence number of the reading. This should probably not
62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
78 Accessor methods for the given attributes.
84 isa => 'Text::Tradition::Collation',
99 writer => 'alter_text',
105 predicate => 'has_language',
141 predicate => 'has_rank',
142 clearer => 'clear_rank',
145 ## For morphological analysis
147 has 'grammar_invalid' => (
153 has 'is_nonsense' => (
159 has 'normal_form' => (
162 predicate => 'has_normal_form',
165 # Holds the lexemes for the reading.
166 has 'reading_lexemes' => (
168 isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
171 lexemes => 'elements',
172 has_lexemes => 'count',
173 clear_lexemes => 'clear',
174 add_lexeme => 'push',
176 default => sub { [] },
179 ## For prefix/suffix readings
181 has 'join_prior' => (
194 around BUILDARGS => sub {
204 # If one of our special booleans is set, we change the text and the
206 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
207 $args->{'text'} = '#LACUNA#';
208 } elsif( exists $args->{'is_start'} ) {
209 $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
210 $args->{'text'} = '#START#';
212 } elsif( exists $args->{'is_end'} ) {
213 $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
214 $args->{'text'} = '#END#';
215 } elsif( exists $args->{'is_ph'} ) {
216 $args->{'text'} = $args->{'id'};
219 # Backwards compatibility for non-XMLname IDs
220 my $rid = $args->{'id'};
223 if( $rid !~ /^$xml10_namestartchar_rx/ ) {
226 $args->{'id'} = $rid;
228 $class->$orig( $args );
231 # Look for a lexeme-string argument in the build args.
233 my( $self, $args ) = @_;
234 if( exists $args->{'lexemes'} ) {
235 $self->_deserialize_lexemes( $args->{'lexemes'} );
241 A meta attribute (ha ha), which should be true if any of our 'special'
242 booleans are true. Implies that the reading does not represent a bit
243 of text found in a witness.
249 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
252 =head1 Convenience methods
254 =head2 related_readings
256 Calls Collation's related_readings with $self as the first argument.
260 sub related_readings {
262 return $self->collation->related_readings( $self, @_ );
267 Calls Collation's reading_witnesses with $self as the first argument.
273 return $self->collation->reading_witnesses( $self, @_ );
278 Returns a list of Reading objects that immediately precede $self in the collation.
284 my @pred = $self->collation->sequence->predecessors( $self->id );
285 return map { $self->collation->reading( $_ ) } @pred;
290 Returns a list of Reading objects that immediately follow $self in the collation.
296 my @succ = $self->collation->sequence->successors( $self->id );
297 return map { $self->collation->reading( $_ ) } @succ;
300 =head2 set_identical( $other_reading)
302 Backwards compatibility method, to add a transposition relationship
303 between $self and $other_reading. Don't use this.
308 my( $self, $other ) = @_;
309 return $self->collation->add_relationship( $self, $other,
310 { 'type' => 'transposition' } );
320 Methods for the morphological information (if any) attached to readings.
321 A reading may be made up of multiple lexemes; the concatenated lexeme
322 strings ought to match the reading's normalized form.
324 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
325 on Lexeme objects and their attributes.
329 Returns a true value if the reading has any attached lexemes.
333 Returns the Lexeme objects (if any) attached to the reading.
337 Wipes any associated Lexeme objects out of the reading.
339 =head2 add_lexeme( $lexobj )
341 Adds the Lexeme in $lexobj to the list of lexemes.
345 If the language of the reading is set, this method will use the appropriate
346 Language model to determine the lexemes that belong to this reading. See
347 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
353 unless( $self->has_language ) {
354 warn "Please set a language to lemmatize a tradition";
357 my $mod = "Text::Tradition::Language::" . $self->language;
359 $mod->can( 'reading_lookup' )->( $self );
363 # For graph serialization. Return a JSON representation of the associated
365 sub _serialize_lexemes {
367 my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
368 return $json->encode( [ $self->lexemes ] );
371 # Given a JSON representation of the lexemes, instantiate them and add
372 # them to the reading.
373 sub _deserialize_lexemes {
374 my( $self, $json ) = @_;
375 my $data = from_json( $json );
376 return unless @$data;
378 # Need to have the lexeme module in order to have lexemes.
379 eval { use Text::Tradition::Collation::Reading::Lexeme; };
382 # Good to go - add the lexemes.
384 foreach my $lexhash ( @$data ) {
385 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
386 'JSON' => $lexhash ) );
388 $self->clear_lexemes;
389 $self->add_lexeme( @lexemes );
400 Text::Tradition::Error->throw(
401 'ident' => 'Reading error',
407 __PACKAGE__->meta->make_immutable;