lib/Text/Tradition/Collation/Reading.pm

   1 package Text::Tradition::Collation::Reading;
   2
   3 use Moose;
   4 use Module::Load;
   5 use YAML::XS;
   6 use overload '""' => \&_stringify, 'fallback' => 1;
   7
   8 =head1 NAME
   9
  10 Text::Tradition::Collation::Reading - represents a reading (usually a word)
  11 in a collation.
  12
  13 =head1 DESCRIPTION
  14
  15 Text::Tradition is a library for representation and analysis of collated
  16 texts, particularly medieval ones.  A 'reading' refers to a unit of text,
  17 usually a word, that appears in one or more witnesses (manuscripts) of the
  18 tradition; the text of a given witness is composed of a set of readings in
  19 a particular sequence
  20
  21 =head1 METHODS
  22
  23 =head2 new
  24
  25 Creates a new reading in the given collation with the given attributes.
  26 Options include:
  27
  28 =over 4
  29
  30 =item collation - The Text::Tradition::Collation object to which this
  31 reading belongs.  Required.
  32
  33 =item id - A unique identifier for this reading. Required.
  34
  35 =item text - The word or other text of the reading.
  36
  37 =item is_start - The reading is the starting point for the collation.
  38
  39 =item is_end - The reading is the ending point for the collation.
  40
  41 =item is_lacuna - The 'reading' represents a known gap in the text.
  42
  43 =item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
  44 not use unless you know what you are doing.
  45
  46 =item rank - The sequence number of the reading. This should probably not
  47 be set manually.
  48
  49 =back
  50
  51 One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
  52
  53 =head2 collation
  54
  55 =head2 id
  56
  57 =head2 text
  58
  59 =head2 is_start
  60
  61 =head2 is_end
  62
  63 =head2 is_lacuna
  64
  65 =head2 rank
  66
  67 Accessor methods for the given attributes.
  68
  69 =cut
  70
  71 has 'collation' => (
  72         is => 'ro',
  73         isa => 'Text::Tradition::Collation',
  74         # required => 1,
  75         weak_ref => 1,
  76         );
  77
  78 has 'id' => (
  79         is => 'ro',
  80         isa => 'Str',
  81         required => 1,
  82         );
  83
  84 has 'text' => (
  85         is => 'ro',
  86         isa => 'Str',
  87         required => 1,
  88         writer => 'alter_text',
  89         );
  90
  91 has 'language' => (
  92         is => 'ro',
  93         isa => 'Str',
  94         predicate => 'has_language',
  95         );
  96
  97 has 'is_start' => (
  98         is => 'ro',
  99         isa => 'Bool',
 100         default => undef,
 101         );
 102
 103 has 'is_end' => (
 104         is => 'ro',
 105         isa => 'Bool',
 106         default => undef,
 107         );
 108
 109 has 'is_lacuna' => (
 110     is => 'ro',
 111     isa => 'Bool',
 112         default => undef,
 113     );
 114
 115 has 'is_ph' => (
 116         is => 'ro',
 117         isa => 'Bool',
 118         default => undef,
 119         );
 120
 121 has 'is_common' => (
 122         is => 'rw',
 123         isa => 'Bool',
 124         default => undef,
 125         );
 126
 127 has 'rank' => (
 128     is => 'rw',
 129     isa => 'Int',
 130     predicate => 'has_rank',
 131     clearer => 'clear_rank',
 132     );
 133
 134 ## For morphological analysis
 135
 136 has 'normal_form' => (
 137         is => 'rw',
 138         isa => 'Str',
 139         predicate => 'has_normal_form',
 140         );
 141
 142 # Holds the lexemes for the reading.
 143 has 'reading_lexemes' => (
 144         traits => ['Array'],
 145         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
 146         handles => {
 147                 lexemes => 'elements',
 148                 has_lexemes => 'count',
 149                 clear_lexemes => 'clear',
 150                 add_lexeme => 'push',
 151                 },
 152         default => sub { [] },
 153         );
 154
 155 ## For prefix/suffix readings
 156
 157 has 'join_prior' => (
 158         is => 'ro',
 159         isa => 'Bool',
 160         default => undef,
 161         );
 162
 163 has 'join_next' => (
 164         is => 'ro',
 165         isa => 'Bool',
 166         default => undef,
 167         );
 168
 169
 170 around BUILDARGS => sub {
 171         my $orig = shift;
 172         my $class = shift;
 173         my $args;
 174         if( @_ == 1 ) {
 175                 $args = shift;
 176         } else {
 177                 $args = { @_ };
 178         }
 179
 180         # If one of our special booleans is set, we change the text and the
 181         # ID to match.
 182         if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
 183                 $args->{'text'} = '#LACUNA#';
 184         } elsif( exists $args->{'is_start'} ) {
 185                 $args->{'id'} = '#START#';  # Change the ID to ensure we have only one
 186                 $args->{'text'} = '#START#';
 187                 $args->{'rank'} = 0;
 188         } elsif( exists $args->{'is_end'} ) {
 189                 $args->{'id'} = '#END#';        # Change the ID to ensure we have only one
 190                 $args->{'text'} = '#END#';
 191         } elsif( exists $args->{'is_ph'} ) {
 192                 $args->{'text'} = $args->{'id'};
 193         }
 194
 195         $class->$orig( $args );
 196 };
 197
 198 =head2 is_meta
 199
 200 A meta attribute (ha ha), which should be true if any of our 'special'
 201 booleans are true.  Implies that the reading does not represent a bit
 202 of text found in a witness.
 203
 204 =cut
 205
 206 sub is_meta {
 207         my $self = shift;
 208         return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
 209 }
 210
 211 =head1 Convenience methods
 212
 213 =head2 related_readings
 214
 215 Calls Collation's related_readings with $self as the first argument.
 216
 217 =cut
 218
 219 sub related_readings {
 220         my $self = shift;
 221         return $self->collation->related_readings( $self, @_ );
 222 }
 223
 224 =head2 witnesses
 225
 226 Calls Collation's reading_witnesses with $self as the first argument.
 227
 228 =cut
 229
 230 sub witnesses {
 231         my $self = shift;
 232         return $self->collation->reading_witnesses( $self, @_ );
 233 }
 234
 235 =head2 predecessors
 236
 237 Returns a list of Reading objects that immediately precede $self in the collation.
 238
 239 =cut
 240
 241 sub predecessors {
 242         my $self = shift;
 243         my @pred = $self->collation->sequence->predecessors( $self->id );
 244         return map { $self->collation->reading( $_ ) } @pred;
 245 }
 246
 247 =head2 successors
 248
 249 Returns a list of Reading objects that immediately follow $self in the collation.
 250
 251 =cut
 252
 253 sub successors {
 254         my $self = shift;
 255         my @succ = $self->collation->sequence->successors( $self->id );
 256         return map { $self->collation->reading( $_ ) } @succ;
 257 }
 258
 259 =head2 set_identical( $other_reading)
 260
 261 Backwards compatibility method, to add a transposition relationship
 262 between $self and $other_reading.  Don't use this.
 263
 264 =cut
 265
 266 sub set_identical {
 267         my( $self, $other ) = @_;
 268         return $self->collation->add_relationship( $self, $other,
 269                 { 'type' => 'transposition' } );
 270 }
 271
 272 sub _stringify {
 273         my $self = shift;
 274         return $self->id;
 275 }
 276
 277 =head1 MORPHOLOGY
 278
 279 Methods for the morphological information (if any) attached to readings.
 280 A reading may be made up of multiple lexemes; the concatenated lexeme
 281 strings ought to match the reading's normalized form.
 282
 283 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
 284 on Lexeme objects and their attributes.
 285
 286 =head2 has_lexemes
 287
 288 Returns a true value if the reading has any attached lexemes.
 289
 290 =head2 lexemes
 291
 292 Returns the Lexeme objects (if any) attached to the reading.
 293
 294 =head2 clear_lexemes
 295
 296 Wipes any associated Lexeme objects out of the reading.
 297
 298 =head2 add_lexeme( $lexobj )
 299
 300 Adds the Lexeme in $lexobj to the list of lexemes.
 301
 302 =head2 lemmatize
 303
 304 If the language of the reading is set, this method will use the appropriate
 305 Language model to determine the lexemes that belong to this reading.  See
 306 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 307
 308 =cut
 309
 310 sub lemmatize {
 311         my $self = shift;
 312         unless( $self->has_language ) {
 313                 warn "Please set a language to lemmatize a tradition";
 314                 return;
 315         }
 316         my $mod = "Text::Tradition::Language::" . $self->language;
 317         load( $mod );
 318         $mod->can( 'reading_lookup' )->( $self );
 319
 320 }
 321
 322 # For graph serialization. Return a string representation of the associated
 323 # reading lexemes.
 324 sub _serialize_lexemes {
 325         my $self = shift;
 326         return Dump( [ $self->lexemes ] );
 327 }
 328
 329
 330 ## Utility methods
 331
 332 sub TO_JSON {
 333         my $self = shift;
 334         return $self->text;
 335 }
 336
 337 ## TODO will need a throw() here
 338
 339 no Moose;
 340 __PACKAGE__->meta->make_immutable;
 341
 342 1;