lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use overload '""' => \&_stringify, 'fallback' => 1;
   5
   6 =head1 NAME
   7
   8 Text::Tradition::Collation::Reading - represents a reading (usually a word)
   9 in a collation.
  10
  11 =head1 DESCRIPTION
  12
  13 Text::Tradition is a library for representation and analysis of collated
  14 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  15 usually a word, that appears in one or more witnesses (manuscripts) of the
  16 tradition; the text of a given witness is composed of a set of readings in
  17 a particular sequence
  18
  19 =head1 METHODS
  20
  21 =head2 new
  22
  23 Creates a new reading in the given collation with the given attributes.
  24 Options include:
  25
  26 =over 4
  27
  28 =item collation - The Text::Tradition::Collation object to which this
  29 reading belongs.  Required.
  30
  31 =item id - A unique identifier for this reading. Required.
  32
  33 =item text - The word or other text of the reading.
  34
  35 =item is_start - The reading is the starting point for the collation.
  36
  37 =item is_end - The reading is the ending point for the collation.
  38
  39 =item is_lacuna - The 'reading' represents a known gap in the text.
  40
  41 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
  42 not use unless you know what you are doing.
  43
  44 =item rank - The sequence number of the reading. This should probably not
  45 be set manually.
  46
  47 =back
  48
  49 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  50
  51 =head2 collation
  52
  53 =head2 id
  54
  55 =head2 text
  56
  57 =head2 is_start
  58
  59 =head2 is_end
  60
  61 =head2 is_lacuna
  62
  63 =head2 rank
  64
  65 Accessor methods for the given attributes.
  66
  67 =cut
  68
  69 has 'collation' => (
  70         is => 'ro',
  71         isa => 'Text::Tradition::Collation',
  72         # required => 1,
  73         weak_ref => 1,
  74         );
  75
  76 has 'id' => (
  77         is => 'ro',
  78         isa => 'Str',
  79         required => 1,
  80         );
  81
  82 has 'text' => (
  83         is => 'ro',
  84         isa => 'Str',
  85         required => 1,
  86         writer => 'alter_text',
  87         );
  88
  89 has 'language' => (
  90         is => 'ro',
  91         isa => 'Str',
  92         default => 'Default',
  93         );
  94
  95 has 'is_start' => (
  96         is => 'ro',
  97         isa => 'Bool',
  98         default => undef,
  99         );
 100
 101 has 'is_end' => (
 102         is => 'ro',
 103         isa => 'Bool',
 104         default => undef,
 105         );
 106
 107 has 'is_lacuna' => (
 108     is => 'ro',
 109     isa => 'Bool',
 110         default => undef,
 111     );
 112
 113 has 'is_ph' => (
 114         is => 'ro',
 115         isa => 'Bool',
 116         default => undef,
 117         );
 118
 119 has 'is_common' => (
 120         is => 'rw',
 121         isa => 'Bool',
 122         default => undef,
 123         );
 124
 125 has 'rank' => (
 126     is => 'rw',
 127     isa => 'Int',
 128     predicate => 'has_rank',
 129     clearer => 'clear_rank',
 130     );
 131
 132 ## For morphological analysis
 133
 134 has 'normal_form' => (
 135         is => 'rw',
 136         isa => 'Str',
 137         predicate => 'has_normal_form',
 138         );
 139
 140 # Holds the word form. If is_disambiguated is true, the form at index zero
 141 # is the correct one.
 142 has 'reading_lexemes' => (
 143         traits => ['Array'],
 144         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
 145         handles => {
 146                 lexemes => 'elements',
 147                 has_lexemes => 'count',
 148                 clear_lexemes => 'clear',
 149                 add_lexeme => 'push',
 150                 },
 151         default => sub { [] },
 152         );
 153
 154 ## For prefix/suffix readings
 155
 156 has 'join_prior' => (
 157         is => 'ro',
 158         isa => 'Bool',
 159         default => undef,
 160         );
 161
 162 has 'join_next' => (
 163         is => 'ro',
 164         isa => 'Bool',
 165         default => undef,
 166         );
 167
 168
 169 around BUILDARGS => sub {
 170         my $orig = shift;
 171         my $class = shift;
 172         my $args;
 173         if( @_ == 1 ) {
 174                 $args = shift;
 175         } else {
 176                 $args = { @_ };
 177         }
 178
 179         # If one of our special booleans is set, we change the text and the
 180         # ID to match.
 181         if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 182                 $args->{'text'} = '#LACUNA#';
 183         } elsif( exists $args->{'is_start'} ) {
 184                 $args->{'id'} = '#START#';  # Change the ID to ensure we have only one
 185                 $args->{'text'} = '#START#';
 186                 $args->{'rank'} = 0;
 187         } elsif( exists $args->{'is_end'} ) {
 188                 $args->{'id'} = '#END#';        # Change the ID to ensure we have only one
 189                 $args->{'text'} = '#END#';
 190         } elsif( exists $args->{'is_ph'} ) {
 191                 $args->{'text'} = $args->{'id'};
 192         }
 193
 194         $class->$orig( $args );
 195 };
 196
 197 =head2 is_meta
 198
 199 A meta attribute (ha ha), which should be true if any of our 'special'
 200 booleans are true.  Implies that the reading does not represent a bit
 201 of text found in a witness.
 202
 203 =cut
 204
 205 sub is_meta {
 206         my $self = shift;
 207         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 208 }
 209
 210 =head1 Convenience methods
 211
 212 =head2 related_readings
 213
 214 Calls Collation's related_readings with $self as the first argument.
 215
 216 =cut
 217
 218 sub related_readings {
 219         my $self = shift;
 220         return $self->collation->related_readings( $self, @_ );
 221 }
 222
 223 =head2 witnesses
 224
 225 Calls Collation's reading_witnesses with $self as the first argument.
 226
 227 =cut
 228
 229 sub witnesses {
 230         my $self = shift;
 231         return $self->collation->reading_witnesses( $self, @_ );
 232 }
 233
 234 =head2 predecessors
 235
 236 Returns a list of Reading objects that immediately precede $self in the collation.
 237
 238 =cut
 239
 240 sub predecessors {
 241         my $self = shift;
 242         my @pred = $self->collation->sequence->predecessors( $self->id );
 243         return map { $self->collation->reading( $_ ) } @pred;
 244 }
 245
 246 =head2 successors
 247
 248 Returns a list of Reading objects that immediately follow $self in the collation.
 249
 250 =cut
 251
 252 sub successors {
 253         my $self = shift;
 254         my @succ = $self->collation->sequence->successors( $self->id );
 255         return map { $self->collation->reading( $_ ) } @succ;
 256 }
 257
 258 =head2 set_identical( $other_reading)
 259
 260 Backwards compatibility method, to add a transposition relationship
 261 between $self and $other_reading.  Don't use this.
 262
 263 =cut
 264
 265 sub set_identical {
 266         my( $self, $other ) = @_;
 267         return $self->collation->add_relationship( $self, $other,
 268                 { 'type' => 'transposition' } );
 269 }
 270
 271 sub _stringify {
 272         my $self = shift;
 273         return $self->id;
 274 }
 275
 276 =head1 MORPHOLOGY
 277
 278 A few methods to try to tack on morphological information.
 279
 280 =head2 use_lexemes
 281
 282 TBD
 283
 284 =cut
 285
 286 # sub use_lexemes {
 287 #       my( $self, @lexemes ) = @_;
 288 #       # The lexemes need to be the same as $self->text.
 289 #       my $cmpstr = $self->has_normal_form ? lc( $self->normal_form ) : lc( $self->text );
 290 #       $cmpstr =~ s/[\s-]+//g;
 291 #       my $lexstr = lc( join( '', @lexemes ) );
 292 #       $lexstr =~ s/[\s-]+//g;
 293 #       unless( $lexstr eq $cmpstr ) {
 294 #               warn "Cannot split " . $self->text . " into " . join( '.', @lexemes );
 295 #               return;
 296 #       }
 297 #       $self->_clear_morph;
 298 #       map { $self->_add_morph( { $_ => [] } ) } @lexemes;
 299 # }
 300 #
 301 # sub add_morphological_tag {
 302 #       my( $self, $lexeme, $opts ) = @_;
 303 #       my $struct;
 304 #       unless( $opts ) {
 305 #               # No lexeme was passed; use reading text.
 306 #               $opts = $lexeme;
 307 #               $lexeme = $self->text;
 308 #               $self->use_lexemes( $lexeme );
 309 #       }
 310 #       # Get the correct container
 311 #       ( $struct ) = grep { exists $_->{$lexeme} } $self->lexemes;
 312 #       unless( $struct ) {
 313 #               warn "No lexeme $lexeme exists in this reading";
 314 #               return;
 315 #       }
 316 #       # Now make the morph object and add it to this lexeme.
 317 #       my $morph_obj = Text::Tradition::Collation::Reading::Morphology->new( $opts );
 318 #       # TODO Check for existence
 319 #       push( @{$struct->{$lexeme}}, $morph_obj );
 320 # }
 321
 322 ## Utility methods
 323
 324 sub TO_JSON {
 325         my $self = shift;
 326         return $self->text;
 327 }
 328
 329 ## TODO will need a throw() here
 330
 331 no Moose;
 332 __PACKAGE__->meta->make_immutable;
 333
 334 ###################################################
 335 ### Morphology objects, to be attached to readings
 336 ###################################################
 337
 338 package Text::Tradition::Collation::Reading::Morphology;
 339
 340 use Moose;
 341
 342 has 'lemma' => (
 343         is => 'ro',
 344         isa => 'Str',
 345         required => 1,
 346         );
 347
 348 has 'code' => (
 349         is => 'ro',
 350         isa => 'Str',
 351         required => 1,
 352         );
 353
 354 has 'language' => (
 355         is => 'ro',
 356         isa => 'Str',
 357         required => 1,
 358         );
 359
 360 ## Transmute codes into comparison arrays for our various languages.
 361
 362 around BUILDARGS => sub {
 363         my $orig = shift;
 364         my $class = shift;
 365         my $args;
 366         if( @_ == 1 && ref( $_[0] ) ) {
 367                 $args = shift;
 368         } else {
 369                 $args = { @_ };
 370         }
 371         if( exists( $args->{'serial'} ) ) {
 372                 my( $lemma, $code ) = split( /!!/, delete $args->{'serial'} );
 373                 $args->{'lemma'} = $lemma;
 374                 $args->{'code'} = $code;
 375         }
 376         $class->$orig( $args );
 377 };
 378
 379 sub serialization {
 380         my $self = shift;
 381         return join( '!!', $self->lemma, $self->code );
 382 };
 383
 384 sub comparison_array {
 385         my $self = shift;
 386         if( $self->language eq 'French' ) {
 387                 my @array;
 388                 my @bits = split( /\+/, $self->code );
 389                 # First push the non k/v parts.
 390                 while( @bits && $bits[0] !~ /=/ ) {
 391                         push( @array, shift @bits );
 392                 }
 393                 while( @array < 2 ) {
 394                         push( @array, undef );
 395                 }
 396                 # Now push the k/v parts in a known order.
 397                 my @fields = qw/ Pers Nb Temps Genre Spec Fonc /;
 398                 my %props;
 399                 map { my( $k, $v ) = split( /=/, $_ ); $props{$k} = $v; } @bits;
 400                 foreach my $k ( @fields ) {
 401                         push( @array, $props{$k} );
 402                 }
 403                 # Give the answer.
 404                 return @array;
 405         } elsif( $self->language eq 'English' ) {
 406                 # Do something as yet undetermined
 407         } else {
 408                 # Latin or Greek or Armenian, just split the chars
 409                 return split( '', $self->code );
 410         }
 411 };
 412
 413 no Moose;
 414 __PACKAGE__->meta->make_immutable;
 415
 416 1;
 417