lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use Module::Load;
   5 use Text::Tradition::Error;
   6 use YAML::XS;
   7 use overload '""' => \&_stringify, 'fallback' => 1;
   8
   9 =head1 NAME
  10
  11 Text::Tradition::Collation::Reading - represents a reading (usually a word)
  12 in a collation.
  13
  14 =head1 DESCRIPTION
  15
  16 Text::Tradition is a library for representation and analysis of collated
  17 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  18 usually a word, that appears in one or more witnesses (manuscripts) of the
  19 tradition; the text of a given witness is composed of a set of readings in
  20 a particular sequence
  21
  22 =head1 METHODS
  23
  24 =head2 new
  25
  26 Creates a new reading in the given collation with the given attributes.
  27 Options include:
  28
  29 =over 4
  30
  31 =item collation - The Text::Tradition::Collation object to which this
  32 reading belongs.  Required.
  33
  34 =item id - A unique identifier for this reading. Required.
  35
  36 =item text - The word or other text of the reading.
  37
  38 =item is_start - The reading is the starting point for the collation.
  39
  40 =item is_end - The reading is the ending point for the collation.
  41
  42 =item is_lacuna - The 'reading' represents a known gap in the text.
  43
  44 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
  45 not use unless you know what you are doing.
  46
  47 =item rank - The sequence number of the reading. This should probably not
  48 be set manually.
  49
  50 =back
  51
  52 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  53
  54 =head2 collation
  55
  56 =head2 id
  57
  58 =head2 text
  59
  60 =head2 is_start
  61
  62 =head2 is_end
  63
  64 =head2 is_lacuna
  65
  66 =head2 rank
  67
  68 Accessor methods for the given attributes.
  69
  70 =cut
  71
  72 has 'collation' => (
  73         is => 'ro',
  74         isa => 'Text::Tradition::Collation',
  75         # required => 1,
  76         weak_ref => 1,
  77         );
  78
  79 has 'id' => (
  80         is => 'ro',
  81         isa => 'Str',
  82         required => 1,
  83         );
  84
  85 has 'text' => (
  86         is => 'ro',
  87         isa => 'Str',
  88         required => 1,
  89         writer => 'alter_text',
  90         );
  91
  92 has 'language' => (
  93         is => 'ro',
  94         isa => 'Str',
  95         predicate => 'has_language',
  96         );
  97
  98 has 'is_start' => (
  99         is => 'ro',
 100         isa => 'Bool',
 101         default => undef,
 102         );
 103
 104 has 'is_end' => (
 105         is => 'ro',
 106         isa => 'Bool',
 107         default => undef,
 108         );
 109
 110 has 'is_lacuna' => (
 111     is => 'ro',
 112     isa => 'Bool',
 113         default => undef,
 114     );
 115
 116 has 'is_ph' => (
 117         is => 'ro',
 118         isa => 'Bool',
 119         default => undef,
 120         );
 121
 122 has 'is_common' => (
 123         is => 'rw',
 124         isa => 'Bool',
 125         default => undef,
 126         );
 127
 128 has 'rank' => (
 129     is => 'rw',
 130     isa => 'Int',
 131     predicate => 'has_rank',
 132     clearer => 'clear_rank',
 133     );
 134
 135 ## For morphological analysis
 136
 137 has 'normal_form' => (
 138         is => 'rw',
 139         isa => 'Str',
 140         predicate => 'has_normal_form',
 141         );
 142
 143 # Holds the lexemes for the reading.
 144 has 'reading_lexemes' => (
 145         traits => ['Array'],
 146         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
 147         handles => {
 148                 lexemes => 'elements',
 149                 has_lexemes => 'count',
 150                 clear_lexemes => 'clear',
 151                 add_lexeme => 'push',
 152                 },
 153         default => sub { [] },
 154         );
 155
 156 ## For prefix/suffix readings
 157
 158 has 'join_prior' => (
 159         is => 'ro',
 160         isa => 'Bool',
 161         default => undef,
 162         );
 163
 164 has 'join_next' => (
 165         is => 'ro',
 166         isa => 'Bool',
 167         default => undef,
 168         );
 169
 170
 171 around BUILDARGS => sub {
 172         my $orig = shift;
 173         my $class = shift;
 174         my $args;
 175         if( @_ == 1 ) {
 176                 $args = shift;
 177         } else {
 178                 $args = { @_ };
 179         }
 180
 181         # If one of our special booleans is set, we change the text and the
 182         # ID to match.
 183         if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 184                 $args->{'text'} = '#LACUNA#';
 185         } elsif( exists $args->{'is_start'} ) {
 186                 $args->{'id'} = '#START#';  # Change the ID to ensure we have only one
 187                 $args->{'text'} = '#START#';
 188                 $args->{'rank'} = 0;
 189         } elsif( exists $args->{'is_end'} ) {
 190                 $args->{'id'} = '#END#';        # Change the ID to ensure we have only one
 191                 $args->{'text'} = '#END#';
 192         } elsif( exists $args->{'is_ph'} ) {
 193                 $args->{'text'} = $args->{'id'};
 194         }
 195
 196         $class->$orig( $args );
 197 };
 198
 199 # Look for a lexeme-string argument in the build args.
 200 sub BUILD {
 201         my( $self, $args ) = @_;
 202         if( exists $args->{'lexemes'} ) {
 203                 $self->_deserialize_lexemes( $args->{'lexemes'} );
 204         }
 205 }
 206
 207 =head2 is_meta
 208
 209 A meta attribute (ha ha), which should be true if any of our 'special'
 210 booleans are true.  Implies that the reading does not represent a bit
 211 of text found in a witness.
 212
 213 =cut
 214
 215 sub is_meta {
 216         my $self = shift;
 217         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 218 }
 219
 220 =head1 Convenience methods
 221
 222 =head2 related_readings
 223
 224 Calls Collation's related_readings with $self as the first argument.
 225
 226 =cut
 227
 228 sub related_readings {
 229         my $self = shift;
 230         return $self->collation->related_readings( $self, @_ );
 231 }
 232
 233 =head2 witnesses
 234
 235 Calls Collation's reading_witnesses with $self as the first argument.
 236
 237 =cut
 238
 239 sub witnesses {
 240         my $self = shift;
 241         return $self->collation->reading_witnesses( $self, @_ );
 242 }
 243
 244 =head2 predecessors
 245
 246 Returns a list of Reading objects that immediately precede $self in the collation.
 247
 248 =cut
 249
 250 sub predecessors {
 251         my $self = shift;
 252         my @pred = $self->collation->sequence->predecessors( $self->id );
 253         return map { $self->collation->reading( $_ ) } @pred;
 254 }
 255
 256 =head2 successors
 257
 258 Returns a list of Reading objects that immediately follow $self in the collation.
 259
 260 =cut
 261
 262 sub successors {
 263         my $self = shift;
 264         my @succ = $self->collation->sequence->successors( $self->id );
 265         return map { $self->collation->reading( $_ ) } @succ;
 266 }
 267
 268 =head2 set_identical( $other_reading)
 269
 270 Backwards compatibility method, to add a transposition relationship
 271 between $self and $other_reading.  Don't use this.
 272
 273 =cut
 274
 275 sub set_identical {
 276         my( $self, $other ) = @_;
 277         return $self->collation->add_relationship( $self, $other,
 278                 { 'type' => 'transposition' } );
 279 }
 280
 281 sub _stringify {
 282         my $self = shift;
 283         return $self->id;
 284 }
 285
 286 =head1 MORPHOLOGY
 287
 288 Methods for the morphological information (if any) attached to readings.
 289 A reading may be made up of multiple lexemes; the concatenated lexeme
 290 strings ought to match the reading's normalized form.
 291
 292 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
 293 on Lexeme objects and their attributes.
 294
 295 =head2 has_lexemes
 296
 297 Returns a true value if the reading has any attached lexemes.
 298
 299 =head2 lexemes
 300
 301 Returns the Lexeme objects (if any) attached to the reading.
 302
 303 =head2 clear_lexemes
 304
 305 Wipes any associated Lexeme objects out of the reading.
 306
 307 =head2 add_lexeme( $lexobj )
 308
 309 Adds the Lexeme in $lexobj to the list of lexemes.
 310
 311 =head2 lemmatize
 312
 313 If the language of the reading is set, this method will use the appropriate
 314 Language model to determine the lexemes that belong to this reading.  See
 315 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 316
 317 =cut
 318
 319 sub lemmatize {
 320         my $self = shift;
 321         unless( $self->has_language ) {
 322                 warn "Please set a language to lemmatize a tradition";
 323                 return;
 324         }
 325         my $mod = "Text::Tradition::Language::" . $self->language;
 326         load( $mod );
 327         $mod->can( 'reading_lookup' )->( $self );
 328
 329 }
 330
 331 # For graph serialization. Return a string representation of the associated
 332 # reading lexemes.
 333 # TODO Push this in to the Lexeme package.
 334 sub _serialize_lexemes {
 335         my $self = shift;
 336         my @lexstrs;
 337         foreach my $l ( $self->lexemes ) {
 338                 my @mf;
 339                 foreach my $wf ( $l->matching_forms ) {
 340                         push( @mf, $wf->to_string );
 341                 }
 342                 my $form = $l->form ? $l->form->to_string : '';
 343                 push( @lexstrs, join( '|L|', $l->language, $l->string, $form,
 344                         join( '|M|', @mf ) ) );
 345         }
 346         return join( '|R|', @lexstrs );
 347 }
 348
 349 sub _deserialize_lexemes {
 350         my( $self, $data ) = @_;
 351         return unless $data;
 352
 353         # Need to have the lexeme modules in order to have lexemes.
 354         eval {
 355                 use Text::Tradition::Collation::Reading::Lexeme;
 356                 use Text::Tradition::Collation::Reading::WordForm;
 357         };
 358         throw( $@ ) if $@;
 359
 360         # Good to go - add the lexemes.
 361         my @lexemes;
 362         foreach my $lexdata ( split( /\|R\|/, $data ) ) {
 363                 my( $lang, $lstring, $form, $allforms ) = split( /\|L\|/, $lexdata );
 364                 my @wfdata;
 365                 push( @wfdata, $form ) if $form;
 366                 push( @wfdata, split( /\|M\|/, $allforms ) );
 367                 my @wforms;
 368                 foreach my $wd ( @wfdata ) {
 369                         my $wf = Text::Tradition::Collation::Reading::WordForm->new(
 370                                 'serial' => $wd );
 371                         push( @wforms, $wf );
 372                 }
 373                 my %largs = ( 'language' => $lang, 'string' => $lstring );
 374                 if( $form ) {
 375                         $largs{'form'} = shift @wforms;
 376                         $largs{'is_disambiguated'} = 1;
 377                 }
 378                 $largs{'wordform_matchlist'} = \@wforms;
 379                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new( %largs ) );
 380         }
 381         $self->clear_lexemes;
 382         $self->add_lexeme( @lexemes );
 383 }
 384
 385 ## Utility methods
 386
 387 sub TO_JSON {
 388         my $self = shift;
 389         return $self->text;
 390 }
 391
 392 sub throw {
 393         Text::Tradition::Error->throw(
 394                 'ident' => 'Reading error',
 395                 'message' => $_[0],
 396                 );
 397 }
 398
 399 no Moose;
 400 __PACKAGE__->meta->make_immutable;
 401
 402 1;