lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use Moose::Util::TypeConstraints;
   5 use JSON qw/ from_json /;
   6 use Module::Load;
   7 use Text::Tradition::Error;
   8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
   9 use YAML::XS;
  10 use overload '""' => \&_stringify, 'fallback' => 1;
  11
  12 subtype 'ReadingID',
  13         as 'Str',
  14         where { $_ =~ /\A$xml10_name_rx\z/ },
  15         message { 'Reading ID must be a valid XML attribute string' };
  16
  17 no Moose::Util::TypeConstraints;
  18
  19 =head1 NAME
  20
  21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
  22 in a collation.
  23
  24 =head1 DESCRIPTION
  25
  26 Text::Tradition is a library for representation and analysis of collated
  27 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  28 usually a word, that appears in one or more witnesses (manuscripts) of the
  29 tradition; the text of a given witness is composed of a set of readings in
  30 a particular sequence
  31
  32 =head1 METHODS
  33
  34 =head2 new
  35
  36 Creates a new reading in the given collation with the given attributes.
  37 Options include:
  38
  39 =over 4
  40
  41 =item collation - The Text::Tradition::Collation object to which this
  42 reading belongs.  Required.
  43
  44 =item id - A unique identifier for this reading. Required.
  45
  46 =item text - The word or other text of the reading.
  47
  48 =item is_start - The reading is the starting point for the collation.
  49
  50 =item is_end - The reading is the ending point for the collation.
  51
  52 =item is_lacuna - The 'reading' represents a known gap in the text.
  53
  54 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
  55 not use unless you know what you are doing.
  56
  57 =item rank - The sequence number of the reading. This should probably not
  58 be set manually.
  59
  60 =back
  61
  62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  63
  64 =head2 collation
  65
  66 =head2 id
  67
  68 =head2 text
  69
  70 =head2 is_start
  71
  72 =head2 is_end
  73
  74 =head2 is_lacuna
  75
  76 =head2 rank
  77
  78 Accessor methods for the given attributes.
  79
  80 =cut
  81
  82 has 'collation' => (
  83         is => 'ro',
  84         isa => 'Text::Tradition::Collation',
  85         # required => 1,
  86         weak_ref => 1,
  87         );
  88
  89 has 'id' => (
  90         is => 'ro',
  91         isa => 'ReadingID',
  92         required => 1,
  93         );
  94
  95 has 'text' => (
  96         is => 'ro',
  97         isa => 'Str',
  98         required => 1,
  99         writer => 'alter_text',
 100         );
 101
 102 has 'language' => (
 103         is => 'ro',
 104         isa => 'Str',
 105         predicate => 'has_language',
 106         );
 107
 108 has 'is_start' => (
 109         is => 'ro',
 110         isa => 'Bool',
 111         default => undef,
 112         );
 113
 114 has 'is_end' => (
 115         is => 'ro',
 116         isa => 'Bool',
 117         default => undef,
 118         );
 119
 120 has 'is_lacuna' => (
 121     is => 'ro',
 122     isa => 'Bool',
 123         default => undef,
 124     );
 125
 126 has 'is_ph' => (
 127         is => 'ro',
 128         isa => 'Bool',
 129         default => undef,
 130         );
 131
 132 has 'is_common' => (
 133         is => 'rw',
 134         isa => 'Bool',
 135         default => undef,
 136         );
 137
 138 has 'rank' => (
 139     is => 'rw',
 140     isa => 'Int',
 141     predicate => 'has_rank',
 142     clearer => 'clear_rank',
 143     );
 144
 145 ## For morphological analysis
 146
 147 has 'grammar_invalid' => (
 148         is => 'rw',
 149         isa => 'Bool',
 150         default => undef,
 151         );
 152
 153 has 'is_nonsense' => (
 154         is => 'rw',
 155         isa => 'Bool',
 156         default => undef,
 157         );
 158
 159 has 'normal_form' => (
 160         is => 'rw',
 161         isa => 'Str',
 162         predicate => '_has_normal_form',
 163         clearer => '_clear_normal_form',
 164         );
 165
 166 # Holds the lexemes for the reading.
 167 has 'reading_lexemes' => (
 168         traits => ['Array'],
 169         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
 170         handles => {
 171                 lexeme => 'get',
 172                 lexemes => 'elements',
 173                 has_lexemes => 'count',
 174                 clear_lexemes => 'clear',
 175                 add_lexeme => 'push',
 176                 },
 177         default => sub { [] },
 178         );
 179
 180 ## For prefix/suffix readings
 181
 182 has 'join_prior' => (
 183         is => 'ro',
 184         isa => 'Bool',
 185         default => undef,
 186         );
 187
 188 has 'join_next' => (
 189         is => 'ro',
 190         isa => 'Bool',
 191         default => undef,
 192         );
 193
 194
 195 around BUILDARGS => sub {
 196         my $orig = shift;
 197         my $class = shift;
 198         my $args;
 199         if( @_ == 1 ) {
 200                 $args = shift;
 201         } else {
 202                 $args = { @_ };
 203         }
 204
 205         # If one of our special booleans is set, we change the text and the
 206         # ID to match.
 207         if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 208                 $args->{'text'} = '#LACUNA#';
 209         } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) {
 210                 $args->{'id'} = '__START__';  # Change the ID to ensure we have only one
 211                 $args->{'text'} = '#START#';
 212                 $args->{'rank'} = 0;
 213         } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) {
 214                 $args->{'id'} = '__END__';      # Change the ID to ensure we have only one
 215                 $args->{'text'} = '#END#';
 216         } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) {
 217                 $args->{'text'} = $args->{'id'};
 218         }
 219
 220         # Backwards compatibility for non-XMLname IDs
 221         my $rid = $args->{'id'};
 222         $rid =~ s/\#/__/g;
 223         $rid =~ s/[\/,]/./g;
 224     if( $rid !~ /^$xml10_namestartchar_rx/ ) {
 225         $rid = 'r'.$rid;
 226     }
 227         $args->{'id'} = $rid;
 228
 229         $class->$orig( $args );
 230 };
 231
 232 # Look for a lexeme-string argument in the build args.
 233 sub BUILD {
 234         my( $self, $args ) = @_;
 235         if( exists $args->{'lexemes'} ) {
 236                 $self->_deserialize_lexemes( $args->{'lexemes'} );
 237         }
 238 }
 239
 240 # Make normal_form default to text, transparently.
 241 around 'normal_form' => sub {
 242         my $orig = shift;
 243         my $self = shift;
 244         my( $arg ) = @_;
 245         if( $arg && $arg eq $self->text ) {
 246                 $self->_clear_normal_form;
 247                 return $arg;
 248         } elsif( !$arg && !$self->_has_normal_form ) {
 249                 return $self->text;
 250         } else {
 251                 $self->$orig( @_ );
 252         }
 253 };
 254
 255 =head2 is_meta
 256
 257 A meta attribute (ha ha), which should be true if any of our 'special'
 258 booleans are true.  Implies that the reading does not represent a bit
 259 of text found in a witness.
 260
 261 =cut
 262
 263 sub is_meta {
 264         my $self = shift;
 265         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 266 }
 267
 268 =head2 is_nonrel
 269
 270 Similar to is_meta, but returns false for the start and end readings.
 271
 272 =cut
 273
 274 sub is_nonrel {
 275         my $self = shift;
 276         return $self->is_lacuna || $self->is_ph;
 277 }
 278
 279 =head1 Convenience methods
 280
 281 =head2 related_readings
 282
 283 Calls Collation's related_readings with $self as the first argument.
 284
 285 =cut
 286
 287 sub related_readings {
 288         my $self = shift;
 289         return $self->collation->related_readings( $self, @_ );
 290 }
 291
 292 =head2 witnesses
 293
 294 Calls Collation's reading_witnesses with $self as the first argument.
 295
 296 =cut
 297
 298 sub witnesses {
 299         my $self = shift;
 300         return $self->collation->reading_witnesses( $self, @_ );
 301 }
 302
 303 =head2 predecessors
 304
 305 Returns a list of Reading objects that immediately precede $self in the collation.
 306
 307 =cut
 308
 309 sub predecessors {
 310         my $self = shift;
 311         my @pred = $self->collation->sequence->predecessors( $self->id );
 312         return map { $self->collation->reading( $_ ) } @pred;
 313 }
 314
 315 =head2 successors
 316
 317 Returns a list of Reading objects that immediately follow $self in the collation.
 318
 319 =cut
 320
 321 sub successors {
 322         my $self = shift;
 323         my @succ = $self->collation->sequence->successors( $self->id );
 324         return map { $self->collation->reading( $_ ) } @succ;
 325 }
 326
 327 =head2 set_identical( $other_reading)
 328
 329 Backwards compatibility method, to add a transposition relationship
 330 between $self and $other_reading.  Don't use this.
 331
 332 =cut
 333
 334 sub set_identical {
 335         my( $self, $other ) = @_;
 336         return $self->collation->add_relationship( $self, $other,
 337                 { 'type' => 'transposition' } );
 338 }
 339
 340 sub _stringify {
 341         my $self = shift;
 342         return $self->id;
 343 }
 344
 345 =head1 MORPHOLOGY
 346
 347 Methods for the morphological information (if any) attached to readings.
 348 A reading may be made up of multiple lexemes; the concatenated lexeme
 349 strings ought to match the reading's normalized form.
 350
 351 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
 352 on Lexeme objects and their attributes.
 353
 354 =head2 has_lexemes
 355
 356 Returns a true value if the reading has any attached lexemes.
 357
 358 =head2 lexemes
 359
 360 Returns the Lexeme objects (if any) attached to the reading.
 361
 362 =head2 clear_lexemes
 363
 364 Wipes any associated Lexeme objects out of the reading.
 365
 366 =head2 add_lexeme( $lexobj )
 367
 368 Adds the Lexeme in $lexobj to the list of lexemes.
 369
 370 =head2 lemmatize
 371
 372 If the language of the reading is set, this method will use the appropriate
 373 Language model to determine the lexemes that belong to this reading.  See
 374 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 375
 376 =cut
 377
 378 sub lemmatize {
 379         my $self = shift;
 380         unless( $self->has_language ) {
 381                 warn "Please set a language to lemmatize a tradition";
 382                 return;
 383         }
 384         my $mod = "Text::Tradition::Language::" . $self->language;
 385         load( $mod );
 386         $mod->can( 'reading_lookup' )->( $self );
 387
 388 }
 389
 390 # For graph serialization. Return a JSON representation of the associated
 391 # reading lexemes.
 392 sub _serialize_lexemes {
 393         my $self = shift;
 394         my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
 395         return $json->encode( [ $self->lexemes ] );
 396 }
 397
 398 # Given a JSON representation of the lexemes, instantiate them and add
 399 # them to the reading.
 400 sub _deserialize_lexemes {
 401         my( $self, $json ) = @_;
 402         my $data = from_json( $json );
 403         return unless @$data;
 404
 405         # Need to have the lexeme module in order to have lexemes.
 406         eval { use Text::Tradition::Collation::Reading::Lexeme; };
 407         throw( $@ ) if $@;
 408
 409         # Good to go - add the lexemes.
 410         my @lexemes;
 411         foreach my $lexhash ( @$data ) {
 412                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
 413                         'JSON' => $lexhash ) );
 414         }
 415         $self->clear_lexemes;
 416         $self->add_lexeme( @lexemes );
 417 }
 418
 419 sub disambiguated {
 420         my $self = shift;
 421         return 0 unless $self->has_lexemes;
 422         return !grep { !$_->is_disambiguated } $self->lexemes;
 423 }
 424
 425 ## Utility methods
 426
 427 sub TO_JSON {
 428         my $self = shift;
 429         return $self->text;
 430 }
 431
 432 sub throw {
 433         Text::Tradition::Error->throw(
 434                 'ident' => 'Reading error',
 435                 'message' => $_[0],
 436                 );
 437 }
 438
 439 no Moose;
 440 __PACKAGE__->meta->make_immutable;
 441
 442 1;