lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use overload '""' => \&_stringify, 'fallback' => 1;
   5
   6 =head1 NAME
   7
   8 Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
   9
  10 =head1 DESCRIPTION
  11
  12 Text::Tradition is a library for representation and analysis of collated
  13 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  14 usually a word, that appears in one or more witnesses (manuscripts) of the
  15 tradition; the text of a given witness is composed of a set of readings in
  16 a particular sequence
  17
  18 =head1 METHODS
  19
  20 =head2 new
  21
  22 Creates a new reading in the given collation with the given attributes.
  23 Options include:
  24
  25 =over 4
  26
  27 =item collation - The Text::Tradition::Collation object to which this reading belongs.  Required.
  28
  29 =item id - A unique identifier for this reading. Required.
  30
  31 =item text - The word or other text of the reading.
  32
  33 =item is_start - The reading is the starting point for the collation.
  34
  35 =item is_end - The reading is the ending point for the collation.
  36
  37 =item is_lacuna - The 'reading' represents a known gap in the text.
  38
  39 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do not use unless you know what you are doing.
  40
  41 =item rank - The sequence number of the reading. This should probably not be set manually.
  42
  43 =back
  44
  45 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  46
  47 =head2 collation
  48
  49 =head2 id
  50
  51 =head2 text
  52
  53 =head2 is_start
  54
  55 =head2 is_end
  56
  57 =head2 is_lacuna
  58
  59 =head2 rank
  60
  61 Accessor methods for the given attributes.
  62
  63 =cut
  64
  65 has 'collation' => (
  66         is => 'ro',
  67         isa => 'Text::Tradition::Collation',
  68         # required => 1,
  69         weak_ref => 1,
  70         );
  71
  72 has 'id' => (
  73         is => 'ro',
  74         isa => 'Str',
  75         required => 1,
  76         );
  77
  78 has 'text' => (
  79         is => 'ro',
  80         isa => 'Str',
  81         required => 1,
  82         writer => 'alter_text',
  83         );
  84
  85 has 'language' => (
  86         is => 'ro',
  87         isa => 'Str',
  88         default => 'Default',
  89         );
  90
  91 has 'is_start' => (
  92         is => 'ro',
  93         isa => 'Bool',
  94         default => undef,
  95         );
  96
  97 has 'is_end' => (
  98         is => 'ro',
  99         isa => 'Bool',
 100         default => undef,
 101         );
 102
 103 has 'is_lacuna' => (
 104     is => 'ro',
 105     isa => 'Bool',
 106         default => undef,
 107     );
 108
 109 has 'is_ph' => (
 110         is => 'ro',
 111         isa => 'Bool',
 112         default => undef,
 113         );
 114
 115 has 'is_common' => (
 116         is => 'rw',
 117         isa => 'Bool',
 118         default => undef,
 119         );
 120
 121 has 'rank' => (
 122     is => 'rw',
 123     isa => 'Int',
 124     predicate => 'has_rank',
 125     clearer => 'clear_rank',
 126     );
 127
 128 ## For morphological analysis
 129
 130 has 'normal_form' => (
 131         is => 'rw',
 132         isa => 'Str',
 133         predicate => 'has_normal_form',
 134         );
 135
 136 has 'lemma' => (
 137         is => 'rw',
 138         isa => 'Str',
 139         predicate => 'has_lemma',
 140         );
 141
 142 has 'morphology' => (
 143         traits => ['Array'],
 144         isa => 'ArrayRef[HashRef[ArrayRef[Text::Tradition::Collation::Reading::Morphology]]]',
 145         handles => {
 146                 lexemes => 'elements',
 147                 has_morphology => 'count',
 148                 _clear_morph => 'clear',
 149                 _add_morph => 'push',
 150                 },
 151         );
 152
 153 ## For prefix/suffix readings
 154
 155 has 'join_prior' => (
 156         is => 'ro',
 157         isa => 'Bool',
 158         default => undef,
 159         );
 160
 161 has 'join_next' => (
 162         is => 'ro',
 163         isa => 'Bool',
 164         default => undef,
 165         );
 166
 167
 168 around BUILDARGS => sub {
 169         my $orig = shift;
 170         my $class = shift;
 171         my $args;
 172         if( @_ == 1 ) {
 173                 $args = shift;
 174         } else {
 175                 $args = { @_ };
 176         }
 177
 178         # If one of our special booleans is set, we change the text and the
 179         # ID to match.
 180         if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 181                 $args->{'text'} = '#LACUNA#';
 182         } elsif( exists $args->{'is_start'} ) {
 183                 $args->{'id'} = '#START#';  # Change the ID to ensure we have only one
 184                 $args->{'text'} = '#START#';
 185                 $args->{'rank'} = 0;
 186         } elsif( exists $args->{'is_end'} ) {
 187                 $args->{'id'} = '#END#';        # Change the ID to ensure we have only one
 188                 $args->{'text'} = '#END#';
 189         } elsif( exists $args->{'is_ph'} ) {
 190                 $args->{'text'} = $args->{'id'};
 191         }
 192
 193         $class->$orig( $args );
 194 };
 195
 196 =head2 is_meta
 197
 198 A meta attribute (ha ha), which should be true if any of our 'special'
 199 booleans are true.  Implies that the reading does not represent a bit
 200 of text found in a witness.
 201
 202 =cut
 203
 204 sub is_meta {
 205         my $self = shift;
 206         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 207 }
 208
 209 =head1 Convenience methods
 210
 211 =head2 related_readings
 212
 213 Calls Collation's related_readings with $self as the first argument.
 214
 215 =cut
 216
 217 sub related_readings {
 218         my $self = shift;
 219         return $self->collation->related_readings( $self, @_ );
 220 }
 221
 222 =head2 witnesses
 223
 224 Calls Collation's reading_witnesses with $self as the first argument.
 225
 226 =cut
 227
 228 sub witnesses {
 229         my $self = shift;
 230         return $self->collation->reading_witnesses( $self, @_ );
 231 }
 232
 233 =head2 predecessors
 234
 235 Returns a list of Reading objects that immediately precede $self in the collation.
 236
 237 =cut
 238
 239 sub predecessors {
 240         my $self = shift;
 241         my @pred = $self->collation->sequence->predecessors( $self->id );
 242         return map { $self->collation->reading( $_ ) } @pred;
 243 }
 244
 245 =head2 successors
 246
 247 Returns a list of Reading objects that immediately follow $self in the collation.
 248
 249 =cut
 250
 251 sub successors {
 252         my $self = shift;
 253         my @succ = $self->collation->sequence->successors( $self->id );
 254         return map { $self->collation->reading( $_ ) } @succ;
 255 }
 256
 257 =head2 set_identical( $other_reading)
 258
 259 Backwards compatibility method, to add a transposition relationship
 260 between $self and $other_reading.  Don't use this.
 261
 262 =cut
 263
 264 sub set_identical {
 265         my( $self, $other ) = @_;
 266         return $self->collation->add_relationship( $self, $other,
 267                 { 'type' => 'transposition' } );
 268 }
 269
 270 sub _stringify {
 271         my $self = shift;
 272         return $self->id;
 273 }
 274
 275 =head1 MORPHOLOGY
 276
 277 A few methods to try to tack on morphological information.
 278
 279 =head2 is_disambiguated
 280
 281 Returns true if there is only one tag per lexeme in this reading.
 282
 283 =head2 use_lexemes
 284
 285 TBD
 286
 287 =head2 add_morphological_tag
 288
 289 TBD
 290
 291 =head2 disambiguate
 292
 293 TBD
 294
 295 =cut
 296
 297 sub use_lexemes {
 298         my( $self, @lexemes ) = @_;
 299         # The lexemes need to be the same as $self->text.
 300         my $cmpstr = $self->has_normal_form ? lc( $self->normal_form ) : lc( $self->text );
 301         $cmpstr =~ s/[\s-]+//g;
 302         my $lexstr = lc( join( '', @lexemes ) );
 303         $lexstr =~ s/[\s-]+//g;
 304         unless( $lexstr eq $cmpstr ) {
 305                 warn "Cannot split " . $self->text . " into " . join( '.', @lexemes );
 306                 return;
 307         }
 308         $self->_clear_morph;
 309         map { $self->_add_morph( { $_ => [] } ) } @lexemes;
 310 }
 311
 312 sub add_morphological_tag {
 313         my( $self, $lexeme, $opts ) = @_;
 314         my $struct;
 315         unless( $opts ) {
 316                 # No lexeme was passed; use reading text.
 317                 $opts = $lexeme;
 318                 $lexeme = $self->text;
 319                 $self->use_lexemes( $lexeme );
 320         }
 321         # Get the correct container
 322         ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
 323         unless( $struct ) {
 324                 warn "No lexeme $lexeme exists in this reading";
 325                 return;
 326         }
 327         # Now make the morph object and add it to this lexeme.
 328         my $morph_obj = Text::Tradition::Collation::Reading::Morphology->new( $opts );
 329         # TODO Check for existence
 330         push( @{$struct->{$lexeme}}, $morph_obj );
 331 }
 332
 333 sub disambiguate {
 334         my( $self, $lexeme, $index ) = @_;
 335         my $struct;
 336         unless( $index ) {
 337                 # No lexeme was passed; use reading text.
 338                 $index = $lexeme;
 339                 $lexeme = $self->text;
 340         }
 341         # Get the correct container
 342         ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
 343         unless( $struct ) {
 344                 warn "No lexeme $lexeme exists in this reading";
 345                 return;
 346         }
 347         # Keep the object at the selected index
 348         my $selected = $struct->{$lexeme}->[$index];
 349         $struct->{$lexeme} = [ $selected ];
 350 }
 351
 352 sub is_disambiguated {
 353         my $self = shift;
 354         return undef unless $self->has_morphology;
 355         foreach my $lexeme ( $self->lexemes ) {
 356                 my( $key ) = keys %$lexeme; # will be only one
 357                 return undef unless @{$lexeme->{$key}} == 1;
 358         }
 359         return 1;
 360 }
 361
 362 ## Utility methods
 363
 364 sub TO_JSON {
 365         my $self = shift;
 366         return $self->text;
 367 }
 368
 369 ## TODO will need a throw() here
 370
 371 no Moose;
 372 __PACKAGE__->meta->make_immutable;
 373
 374 ###################################################
 375 ### Morphology objects, to be attached to readings
 376 ###################################################
 377
 378 package Text::Tradition::Collation::Reading::Morphology;
 379
 380 use Moose;
 381
 382 has 'lemma' => (
 383         is => 'ro',
 384         isa => 'Str',
 385         required => 1,
 386         );
 387
 388 has 'code' => (
 389         is => 'ro',
 390         isa => 'Str',
 391         required => 1,
 392         );
 393
 394 has 'language' => (
 395         is => 'ro',
 396         isa => 'Str',
 397         required => 1,
 398         );
 399
 400 ## Transmute codes into comparison arrays for our various languages.
 401
 402 around BUILDARGS => sub {
 403         my $orig = shift;
 404         my $class = shift;
 405         my $args;
 406         if( @_ == 1 && ref( $_[0] ) ) {
 407                 $args = shift;
 408         } else {
 409                 $args = { @_ };
 410         }
 411         if( exists( $args->{'serial'} ) ) {
 412                 my( $lemma, $code ) = split( /!!/, delete $args->{'serial'} );
 413                 $args->{'lemma'} = $lemma;
 414                 $args->{'code'} = $code;
 415         }
 416         $class->$orig( $args );
 417 };
 418
 419 sub serialization {
 420         my $self = shift;
 421         return join( '!!', $self->lemma, $self->code );
 422 };
 423
 424 sub comparison_array {
 425         my $self = shift;
 426         if( $self->language eq 'French' ) {
 427                 my @array;
 428                 my @bits = split( /\+/, $self->code );
 429                 # First push the non k/v parts.
 430                 while( @bits && $bits[0] !~ /=/ ) {
 431                         push( @array, shift @bits );
 432                 }
 433                 while( @array < 2 ) {
 434                         push( @array, undef );
 435                 }
 436                 # Now push the k/v parts in a known order.
 437                 my @fields = qw/ Pers Nb Temps Genre Spec Fonc /;
 438                 my %props;
 439                 map { my( $k, $v ) = split( /=/, $_ ); $props{$k} = $v; } @bits;
 440                 foreach my $k ( @fields ) {
 441                         push( @array, $props{$k} );
 442                 }
 443                 # Give the answer.
 444                 return @array;
 445         } elsif( $self->language eq 'English' ) {
 446                 # Do something as yet undetermined
 447         } else {
 448                 # Latin or Greek or Armenian, just split the chars
 449                 return split( '', $self->code );
 450         }
 451 };
 452
 453 no Moose;
 454 __PACKAGE__->meta->make_immutable;
 455
 456 1;
 457