lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use Moose::Util::TypeConstraints;
   5 use JSON qw/ from_json /;
   6 use Module::Load;
   7 use Text::Tradition::Error;
   8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
   9 use YAML::XS;
  10 use overload '""' => \&_stringify, 'fallback' => 1;
  11
  12 subtype 'ReadingID',
  13         as 'Str',
  14         where { $_ =~ /\A$xml10_name_rx\z/ },
  15         message { 'Reading ID must be a valid XML attribute string' };
  16
  17 no Moose::Util::TypeConstraints;
  18
  19 =head1 NAME
  20
  21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
  22 in a collation.
  23
  24 =head1 DESCRIPTION
  25
  26 Text::Tradition is a library for representation and analysis of collated
  27 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  28 usually a word, that appears in one or more witnesses (manuscripts) of the
  29 tradition; the text of a given witness is composed of a set of readings in
  30 a particular sequence
  31
  32 =head1 METHODS
  33
  34 =head2 new
  35
  36 Creates a new reading in the given collation with the given attributes.
  37 Options include:
  38
  39 =over 4
  40
  41 =item collation - The Text::Tradition::Collation object to which this
  42 reading belongs.  Required.
  43
  44 =item id - A unique identifier for this reading. Required.
  45
  46 =item text - The word or other text of the reading.
  47
  48 =item is_start - The reading is the starting point for the collation.
  49
  50 =item is_end - The reading is the ending point for the collation.
  51
  52 =item is_lacuna - The 'reading' represents a known gap in the text.
  53
  54 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
  55 not use unless you know what you are doing.
  56
  57 =item rank - The sequence number of the reading. This should probably not
  58 be set manually.
  59
  60 =back
  61
  62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  63
  64 =head2 collation
  65
  66 =head2 id
  67
  68 =head2 text
  69
  70 =head2 is_start
  71
  72 =head2 is_end
  73
  74 =head2 is_lacuna
  75
  76 =head2 rank
  77
  78 Accessor methods for the given attributes.
  79
  80 =cut
  81
  82 has 'collation' => (
  83         is => 'ro',
  84         isa => 'Text::Tradition::Collation',
  85         # required => 1,
  86         weak_ref => 1,
  87         );
  88
  89 has 'id' => (
  90         is => 'ro',
  91         isa => 'ReadingID',
  92         required => 1,
  93         );
  94
  95 has 'text' => (
  96         is => 'ro',
  97         isa => 'Str',
  98         required => 1,
  99         writer => 'alter_text',
 100         );
 101
 102 has 'language' => (
 103         is => 'ro',
 104         isa => 'Str',
 105         predicate => 'has_language',
 106         );
 107
 108 has 'is_start' => (
 109         is => 'ro',
 110         isa => 'Bool',
 111         default => undef,
 112         );
 113
 114 has 'is_end' => (
 115         is => 'ro',
 116         isa => 'Bool',
 117         default => undef,
 118         );
 119
 120 has 'is_lacuna' => (
 121     is => 'ro',
 122     isa => 'Bool',
 123         default => undef,
 124     );
 125
 126 has 'is_ph' => (
 127         is => 'ro',
 128         isa => 'Bool',
 129         default => undef,
 130         );
 131
 132 has 'is_common' => (
 133         is => 'rw',
 134         isa => 'Bool',
 135         default => undef,
 136         );
 137
 138 has 'rank' => (
 139     is => 'rw',
 140     isa => 'Int',
 141     predicate => 'has_rank',
 142     clearer => 'clear_rank',
 143     );
 144
 145 ## For morphological analysis
 146
 147 has 'grammar_invalid' => (
 148         is => 'rw',
 149         isa => 'Bool',
 150         default => undef,
 151         );
 152
 153 has 'is_nonsense' => (
 154         is => 'rw',
 155         isa => 'Bool',
 156         default => undef,
 157         );
 158
 159 has 'normal_form' => (
 160         is => 'rw',
 161         isa => 'Str',
 162         predicate => 'has_normal_form',
 163         );
 164
 165 # Holds the lexemes for the reading.
 166 has 'reading_lexemes' => (
 167         traits => ['Array'],
 168         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
 169         handles => {
 170                 lexeme => 'get',
 171                 lexemes => 'elements',
 172                 has_lexemes => 'count',
 173                 clear_lexemes => 'clear',
 174                 add_lexeme => 'push',
 175                 },
 176         default => sub { [] },
 177         );
 178
 179 ## For prefix/suffix readings
 180
 181 has 'join_prior' => (
 182         is => 'ro',
 183         isa => 'Bool',
 184         default => undef,
 185         );
 186
 187 has 'join_next' => (
 188         is => 'ro',
 189         isa => 'Bool',
 190         default => undef,
 191         );
 192
 193
 194 around BUILDARGS => sub {
 195         my $orig = shift;
 196         my $class = shift;
 197         my $args;
 198         if( @_ == 1 ) {
 199                 $args = shift;
 200         } else {
 201                 $args = { @_ };
 202         }
 203
 204         # If one of our special booleans is set, we change the text and the
 205         # ID to match.
 206         if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 207                 $args->{'text'} = '#LACUNA#';
 208         } elsif( exists $args->{'is_start'} ) {
 209                 $args->{'id'} = '__START__';  # Change the ID to ensure we have only one
 210                 $args->{'text'} = '#START#';
 211                 $args->{'rank'} = 0;
 212         } elsif( exists $args->{'is_end'} ) {
 213                 $args->{'id'} = '__END__';      # Change the ID to ensure we have only one
 214                 $args->{'text'} = '#END#';
 215         } elsif( exists $args->{'is_ph'} ) {
 216                 $args->{'text'} = $args->{'id'};
 217         }
 218
 219         # Backwards compatibility for non-XMLname IDs
 220         my $rid = $args->{'id'};
 221         $rid =~ s/\#/__/g;
 222         $rid =~ s/[\/,]/./g;
 223     if( $rid !~ /^$xml10_namestartchar_rx/ ) {
 224         $rid = 'r'.$rid;
 225     }
 226         $args->{'id'} = $rid;
 227
 228         $class->$orig( $args );
 229 };
 230
 231 # Look for a lexeme-string argument in the build args.
 232 sub BUILD {
 233         my( $self, $args ) = @_;
 234         if( exists $args->{'lexemes'} ) {
 235                 $self->_deserialize_lexemes( $args->{'lexemes'} );
 236         }
 237 }
 238
 239 =head2 is_meta
 240
 241 A meta attribute (ha ha), which should be true if any of our 'special'
 242 booleans are true.  Implies that the reading does not represent a bit
 243 of text found in a witness.
 244
 245 =cut
 246
 247 sub is_meta {
 248         my $self = shift;
 249         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 250 }
 251
 252 =head1 Convenience methods
 253
 254 =head2 related_readings
 255
 256 Calls Collation's related_readings with $self as the first argument.
 257
 258 =cut
 259
 260 sub related_readings {
 261         my $self = shift;
 262         return $self->collation->related_readings( $self, @_ );
 263 }
 264
 265 =head2 witnesses
 266
 267 Calls Collation's reading_witnesses with $self as the first argument.
 268
 269 =cut
 270
 271 sub witnesses {
 272         my $self = shift;
 273         return $self->collation->reading_witnesses( $self, @_ );
 274 }
 275
 276 =head2 predecessors
 277
 278 Returns a list of Reading objects that immediately precede $self in the collation.
 279
 280 =cut
 281
 282 sub predecessors {
 283         my $self = shift;
 284         my @pred = $self->collation->sequence->predecessors( $self->id );
 285         return map { $self->collation->reading( $_ ) } @pred;
 286 }
 287
 288 =head2 successors
 289
 290 Returns a list of Reading objects that immediately follow $self in the collation.
 291
 292 =cut
 293
 294 sub successors {
 295         my $self = shift;
 296         my @succ = $self->collation->sequence->successors( $self->id );
 297         return map { $self->collation->reading( $_ ) } @succ;
 298 }
 299
 300 =head2 set_identical( $other_reading)
 301
 302 Backwards compatibility method, to add a transposition relationship
 303 between $self and $other_reading.  Don't use this.
 304
 305 =cut
 306
 307 sub set_identical {
 308         my( $self, $other ) = @_;
 309         return $self->collation->add_relationship( $self, $other,
 310                 { 'type' => 'transposition' } );
 311 }
 312
 313 sub _stringify {
 314         my $self = shift;
 315         return $self->id;
 316 }
 317
 318 =head1 MORPHOLOGY
 319
 320 Methods for the morphological information (if any) attached to readings.
 321 A reading may be made up of multiple lexemes; the concatenated lexeme
 322 strings ought to match the reading's normalized form.
 323
 324 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
 325 on Lexeme objects and their attributes.
 326
 327 =head2 has_lexemes
 328
 329 Returns a true value if the reading has any attached lexemes.
 330
 331 =head2 lexemes
 332
 333 Returns the Lexeme objects (if any) attached to the reading.
 334
 335 =head2 clear_lexemes
 336
 337 Wipes any associated Lexeme objects out of the reading.
 338
 339 =head2 add_lexeme( $lexobj )
 340
 341 Adds the Lexeme in $lexobj to the list of lexemes.
 342
 343 =head2 lemmatize
 344
 345 If the language of the reading is set, this method will use the appropriate
 346 Language model to determine the lexemes that belong to this reading.  See
 347 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 348
 349 =cut
 350
 351 sub lemmatize {
 352         my $self = shift;
 353         unless( $self->has_language ) {
 354                 warn "Please set a language to lemmatize a tradition";
 355                 return;
 356         }
 357         my $mod = "Text::Tradition::Language::" . $self->language;
 358         load( $mod );
 359         $mod->can( 'reading_lookup' )->( $self );
 360
 361 }
 362
 363 # For graph serialization. Return a JSON representation of the associated
 364 # reading lexemes.
 365 sub _serialize_lexemes {
 366         my $self = shift;
 367         my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
 368         return $json->encode( [ $self->lexemes ] );
 369 }
 370
 371 # Given a JSON representation of the lexemes, instantiate them and add
 372 # them to the reading.
 373 sub _deserialize_lexemes {
 374         my( $self, $json ) = @_;
 375         my $data = from_json( $json );
 376         return unless @$data;
 377
 378         # Need to have the lexeme module in order to have lexemes.
 379         eval { use Text::Tradition::Collation::Reading::Lexeme; };
 380         throw( $@ ) if $@;
 381
 382         # Good to go - add the lexemes.
 383         my @lexemes;
 384         foreach my $lexhash ( @$data ) {
 385                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
 386                         'JSON' => $lexhash ) );
 387         }
 388         $self->clear_lexemes;
 389         $self->add_lexeme( @lexemes );
 390 }
 391
 392 ## Utility methods
 393
 394 sub TO_JSON {
 395         my $self = shift;
 396         return $self->text;
 397 }
 398
 399 sub throw {
 400         Text::Tradition::Error->throw(
 401                 'ident' => 'Reading error',
 402                 'message' => $_[0],
 403                 );
 404 }
 405
 406 no Moose;
 407 __PACKAGE__->meta->make_immutable;
 408
 409 1;