lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use Moose::Util::TypeConstraints;
   5 use JSON qw/ from_json /;
   6 use Module::Load;
   7 use Text::Tradition::Error;
   8 use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
   9 use YAML::XS;
  10 use overload '""' => \&_stringify, 'fallback' => 1;
  11
  12 subtype 'ReadingID',
  13         as 'Str',
  14         where { $_ =~ /\A$xml10_name_rx\z/ },
  15         message { 'Reading ID must be a valid XML attribute string' };
  16
  17 no Moose::Util::TypeConstraints;
  18
  19 =head1 NAME
  20
  21 Text::Tradition::Collation::Reading - represents a reading (usually a word)
  22 in a collation.
  23
  24 =head1 DESCRIPTION
  25
  26 Text::Tradition is a library for representation and analysis of collated
  27 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  28 usually a word, that appears in one or more witnesses (manuscripts) of the
  29 tradition; the text of a given witness is composed of a set of readings in
  30 a particular sequence
  31
  32 =head1 METHODS
  33
  34 =head2 new
  35
  36 Creates a new reading in the given collation with the given attributes.
  37 Options include:
  38
  39 =over 4
  40
  41 =item collation - The Text::Tradition::Collation object to which this
  42 reading belongs.  Required.
  43
  44 =item id - A unique identifier for this reading. Required.
  45
  46 =item text - The word or other text of the reading.
  47
  48 =item is_start - The reading is the starting point for the collation.
  49
  50 =item is_end - The reading is the ending point for the collation.
  51
  52 =item is_lacuna - The 'reading' represents a known gap in the text.
  53
  54 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
  55 not use unless you know what you are doing.
  56
  57 =item rank - The sequence number of the reading. This should probably not
  58 be set manually.
  59
  60 =back
  61
  62 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  63
  64 =head2 collation
  65
  66 =head2 id
  67
  68 =head2 text
  69
  70 =head2 is_start
  71
  72 =head2 is_end
  73
  74 =head2 is_lacuna
  75
  76 =head2 rank
  77
  78 Accessor methods for the given attributes.
  79
  80 =cut
  81
  82 has 'collation' => (
  83         is => 'ro',
  84         isa => 'Text::Tradition::Collation',
  85         # required => 1,
  86         weak_ref => 1,
  87         );
  88
  89 has 'id' => (
  90         is => 'ro',
  91         isa => 'ReadingID',
  92         required => 1,
  93         );
  94
  95 has 'text' => (
  96         is => 'ro',
  97         isa => 'Str',
  98         required => 1,
  99         writer => 'alter_text',
 100         );
 101
 102 has 'language' => (
 103         is => 'ro',
 104         isa => 'Str',
 105         predicate => 'has_language',
 106         );
 107
 108 has 'is_start' => (
 109         is => 'ro',
 110         isa => 'Bool',
 111         default => undef,
 112         );
 113
 114 has 'is_end' => (
 115         is => 'ro',
 116         isa => 'Bool',
 117         default => undef,
 118         );
 119
 120 has 'is_lacuna' => (
 121     is => 'ro',
 122     isa => 'Bool',
 123         default => undef,
 124     );
 125
 126 has 'is_ph' => (
 127         is => 'ro',
 128         isa => 'Bool',
 129         default => undef,
 130         );
 131
 132 has 'is_common' => (
 133         is => 'rw',
 134         isa => 'Bool',
 135         default => undef,
 136         );
 137
 138 has 'rank' => (
 139     is => 'rw',
 140     isa => 'Int',
 141     predicate => 'has_rank',
 142     clearer => 'clear_rank',
 143     );
 144
 145 ## For morphological analysis
 146
 147 has 'normal_form' => (
 148         is => 'rw',
 149         isa => 'Str',
 150         predicate => 'has_normal_form',
 151         );
 152
 153 # Holds the lexemes for the reading.
 154 has 'reading_lexemes' => (
 155         traits => ['Array'],
 156         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
 157         handles => {
 158                 lexemes => 'elements',
 159                 has_lexemes => 'count',
 160                 clear_lexemes => 'clear',
 161                 add_lexeme => 'push',
 162                 },
 163         default => sub { [] },
 164         );
 165
 166 ## For prefix/suffix readings
 167
 168 has 'join_prior' => (
 169         is => 'ro',
 170         isa => 'Bool',
 171         default => undef,
 172         );
 173
 174 has 'join_next' => (
 175         is => 'ro',
 176         isa => 'Bool',
 177         default => undef,
 178         );
 179
 180
 181 around BUILDARGS => sub {
 182         my $orig = shift;
 183         my $class = shift;
 184         my $args;
 185         if( @_ == 1 ) {
 186                 $args = shift;
 187         } else {
 188                 $args = { @_ };
 189         }
 190
 191         # If one of our special booleans is set, we change the text and the
 192         # ID to match.
 193         if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 194                 $args->{'text'} = '#LACUNA#';
 195         } elsif( exists $args->{'is_start'} ) {
 196                 $args->{'id'} = '__START__';  # Change the ID to ensure we have only one
 197                 $args->{'text'} = '#START#';
 198                 $args->{'rank'} = 0;
 199         } elsif( exists $args->{'is_end'} ) {
 200                 $args->{'id'} = '__END__';      # Change the ID to ensure we have only one
 201                 $args->{'text'} = '#END#';
 202         } elsif( exists $args->{'is_ph'} ) {
 203                 $args->{'text'} = $args->{'id'};
 204         }
 205
 206         # Backwards compatibility for non-XMLname IDs
 207         my $rid = $args->{'id'};
 208         $rid =~ s/\#/__/g;
 209         $rid =~ s/[\/,]/./g;
 210     if( $rid !~ /^$xml10_namestartchar_rx/ ) {
 211         $rid = 'r'.$rid;
 212     }
 213         $args->{'id'} = $rid;
 214
 215         $class->$orig( $args );
 216 };
 217
 218 # Look for a lexeme-string argument in the build args.
 219 sub BUILD {
 220         my( $self, $args ) = @_;
 221         if( exists $args->{'lexemes'} ) {
 222                 $self->_deserialize_lexemes( $args->{'lexemes'} );
 223         }
 224 }
 225
 226 =head2 is_meta
 227
 228 A meta attribute (ha ha), which should be true if any of our 'special'
 229 booleans are true.  Implies that the reading does not represent a bit
 230 of text found in a witness.
 231
 232 =cut
 233
 234 sub is_meta {
 235         my $self = shift;
 236         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 237 }
 238
 239 =head1 Convenience methods
 240
 241 =head2 related_readings
 242
 243 Calls Collation's related_readings with $self as the first argument.
 244
 245 =cut
 246
 247 sub related_readings {
 248         my $self = shift;
 249         return $self->collation->related_readings( $self, @_ );
 250 }
 251
 252 =head2 witnesses
 253
 254 Calls Collation's reading_witnesses with $self as the first argument.
 255
 256 =cut
 257
 258 sub witnesses {
 259         my $self = shift;
 260         return $self->collation->reading_witnesses( $self, @_ );
 261 }
 262
 263 =head2 predecessors
 264
 265 Returns a list of Reading objects that immediately precede $self in the collation.
 266
 267 =cut
 268
 269 sub predecessors {
 270         my $self = shift;
 271         my @pred = $self->collation->sequence->predecessors( $self->id );
 272         return map { $self->collation->reading( $_ ) } @pred;
 273 }
 274
 275 =head2 successors
 276
 277 Returns a list of Reading objects that immediately follow $self in the collation.
 278
 279 =cut
 280
 281 sub successors {
 282         my $self = shift;
 283         my @succ = $self->collation->sequence->successors( $self->id );
 284         return map { $self->collation->reading( $_ ) } @succ;
 285 }
 286
 287 =head2 set_identical( $other_reading)
 288
 289 Backwards compatibility method, to add a transposition relationship
 290 between $self and $other_reading.  Don't use this.
 291
 292 =cut
 293
 294 sub set_identical {
 295         my( $self, $other ) = @_;
 296         return $self->collation->add_relationship( $self, $other,
 297                 { 'type' => 'transposition' } );
 298 }
 299
 300 sub _stringify {
 301         my $self = shift;
 302         return $self->id;
 303 }
 304
 305 =head1 MORPHOLOGY
 306
 307 Methods for the morphological information (if any) attached to readings.
 308 A reading may be made up of multiple lexemes; the concatenated lexeme
 309 strings ought to match the reading's normalized form.
 310
 311 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
 312 on Lexeme objects and their attributes.
 313
 314 =head2 has_lexemes
 315
 316 Returns a true value if the reading has any attached lexemes.
 317
 318 =head2 lexemes
 319
 320 Returns the Lexeme objects (if any) attached to the reading.
 321
 322 =head2 clear_lexemes
 323
 324 Wipes any associated Lexeme objects out of the reading.
 325
 326 =head2 add_lexeme( $lexobj )
 327
 328 Adds the Lexeme in $lexobj to the list of lexemes.
 329
 330 =head2 lemmatize
 331
 332 If the language of the reading is set, this method will use the appropriate
 333 Language model to determine the lexemes that belong to this reading.  See
 334 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 335
 336 =cut
 337
 338 sub lemmatize {
 339         my $self = shift;
 340         unless( $self->has_language ) {
 341                 warn "Please set a language to lemmatize a tradition";
 342                 return;
 343         }
 344         my $mod = "Text::Tradition::Language::" . $self->language;
 345         load( $mod );
 346         $mod->can( 'reading_lookup' )->( $self );
 347
 348 }
 349
 350 # For graph serialization. Return a JSON representation of the associated
 351 # reading lexemes.
 352 sub _serialize_lexemes {
 353         my $self = shift;
 354         my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
 355         return $json->encode( [ $self->lexemes ] );
 356 }
 357
 358 # Given a JSON representation of the lexemes, instantiate them and add
 359 # them to the reading.
 360 sub _deserialize_lexemes {
 361         my( $self, $json ) = @_;
 362         my $data = from_json( $json );
 363         return unless @$data;
 364
 365         # Need to have the lexeme module in order to have lexemes.
 366         eval { use Text::Tradition::Collation::Reading::Lexeme; };
 367         throw( $@ ) if $@;
 368
 369         # Good to go - add the lexemes.
 370         my @lexemes;
 371         foreach my $lexhash ( @$data ) {
 372                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
 373                         'JSON' => $lexhash ) );
 374         }
 375         $self->clear_lexemes;
 376         $self->add_lexeme( @lexemes );
 377 }
 378
 379 ## Utility methods
 380
 381 sub TO_JSON {
 382         my $self = shift;
 383         return $self->text;
 384 }
 385
 386 sub throw {
 387         Text::Tradition::Error->throw(
 388                 'ident' => 'Reading error',
 389                 'message' => $_[0],
 390                 );
 391 }
 392
 393 no Moose;
 394 __PACKAGE__->meta->make_immutable;
 395
 396 1;