X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FWitness.pm;h=551d3ea38ea9f903ac1c0b08944c9c45b89b1d6c;hb=566f45958928958a29cec59caf402715a1d4b7cb;hp=145eba4c92e5ba074d25df96dc7599e110307824;hpb=6a222840d60d8ef64c47e5ce0c4f033db1f72e2b;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Witness.pm b/lib/Text/Tradition/Witness.pm index 145eba4..551d3ea 100644 --- a/lib/Text/Tradition/Witness.pm +++ b/lib/Text/Tradition/Witness.pm @@ -2,98 +2,197 @@ package Text::Tradition::Witness; use Moose; use Moose::Util::TypeConstraints; +=head1 NAME + +Text::Tradition::Witness - a manuscript witness to a text tradition + +=head1 SYNOPSIS + + use Text::Tradition::Witness; + my $w = Text::Tradition::Witness->new( + 'sigil' => 'A', + 'identifier' => 'Oxford MS Ex.1932', + ); + +=head1 DESCRIPTION + +Text::Tradition::Witness is an object representation of a manuscript +witness to a text tradition. A manuscript has a sigil (a short code that +represents it in the wider tradition), an identifier (e.g. the library ID), +and probably a text. + +=head1 METHODS + +=head2 new + +Create a new witness. Options include: + +=over + +=item * sigil - A short code to represent the manuscript. Required. + +=item * text - An array of strings (words) that contains the text of the +manuscript. + +=item * source - A reference to the text, such as a filename, if it is not +given in the 'text' option. + +=item * identifier - The recognized name of the manuscript, e.g. a library +identifier. + +=item * other_info - A freeform string for any other description of the +manuscript. + +=back + +=head2 sigil + +Accessor method for the witness sigil. + +=head2 text + +Accessor method to get and set the text array. + +=head2 source + +Accessor method to get and set the text source. + +=head2 identifier + +Accessor method for the witness identifier. + +=head2 other_info + +Accessor method for the general witness description. + +=head2 is_layered + +Boolean method to note whether the witness has layers (e.g. pre-correction +readings) in the collation. + +=begin testing + +use_ok( 'Text::Tradition::Witness', "can use module" ); + +my @text = qw( This is a line of text ); +my $wit = Text::Tradition::Witness->new( + 'sigil' => 'A', + 'text' => \@text, + ); +is( ref( $wit ), 'Text::Tradition::Witness', 'Created a witness' ); +if( $wit ) { + is( $wit->sigil, 'A', "Witness has correct sigil" ); + is( join( ' ', @{$wit->text} ), join( ' ', @text ), "Witness has correct text" ); +} + +=end testing + +=cut + # Sigil. Required identifier for a witness. has 'sigil' => ( - is => 'ro', - isa => 'Str', - required => 1, - ); + is => 'ro', + isa => 'Str', + required => 1, + ); -# Text. This is an array of strings (i.e. word tokens). +# Text. This is an array of strings (i.e. word tokens). # TODO Think about how to handle this for the case of pre-prepared # collations, where the tokens are in the graph already. has 'text' => ( - is => 'rw', - isa => 'ArrayRef[Str]', - predicate => 'has_text', - ); + is => 'rw', + isa => 'ArrayRef[Str]', + predicate => 'has_text', + ); # Source. This is where we read in the witness, if not from a # pre-prepared collation. It is probably a filename. has 'source' => ( - is => 'ro', - isa => 'Str', - predicate => 'has_source', - ); + is => 'ro', + isa => 'Str', + predicate => 'has_source', + ); +# Path. This is an array of Reading nodes that can be saved during +# initialization, but should be cleared before saving in a DB. has 'path' => ( - is => 'rw', - isa => 'ArrayRef[Text::Tradition::Collation::Reading]', - predicate => 'has_path', - ); - -subtype 'Correction', - as 'ArrayRef', - where { @{$_} == 3 && - find_type_constraint('Int')->check( $_->[0] ) && - find_type_constraint('Int')->check( $_->[1] ) && - find_type_constraint('ArrayRef[Text::Tradition::Collation::Reading]')->check( $_->[2] ); - }, - message { 'Correction must be a tuple of [offset, length, list]' }; - -has 'ante_corr' => ( - is => 'rw', - isa => 'ArrayRef[Correction]', - predicate => 'has_ante_corr', - ); - + is => 'rw', + isa => 'ArrayRef[Text::Tradition::Collation::Reading]', + predicate => 'has_path', + clearer => 'clear_path', + ); -sub BUILD { - my $self = shift; - if( $self->has_source ) { - # Read the file and initialize the text. - open( WITNESS, $self->source ) or die "Could not open " - . $self->file . "for reading"; - # TODO support TEI as well as plaintext, sometime - my @words; - while() { - chomp; - push( @words, split( /\s+/, $_ ) ); - } - close WITNESS; - $self->text( \@words ); - } -} +has 'uncorrected_path' => ( + is => 'rw', + isa => 'ArrayRef[Text::Tradition::Collation::Reading]', + clearer => 'clear_uncorrected_path', + ); + +has 'is_layered' => ( + is => 'rw', + isa => 'Bool', + ); -# If the text is not present, and the path is, and this is a 'get' -# request, generate text from path. -around text => sub { - my $orig = shift; - my $self = shift; +# Manuscript name or similar +has 'identifier' => ( + is => 'ro', + isa => 'Str', + ); - if( $self->has_path && !$self->has_text && !@_ ) { - my @words = map { $_->label } @{$self->path}; - $self->$orig( \@words ); - } - - $self->$orig( @_ ); +# Any other info we have +has 'other_info' => ( + is => 'ro', + isa => 'Str', + ); + +# If we set an uncorrected path, ever, remember that we did so. +around 'uncorrected_path' => sub { + my $orig = shift; + my $self = shift; + + $self->is_layered( 1 ); + $self->$orig( @_ ); }; -sub uncorrected_path { - my $self = shift; - - my @new_path; - push( @new_path, @{$self->path} ); - my $drift = 0; - foreach my $change ( @{$self->ante_corr} ) { - my( $offset, $length, $items ) = @$change; - my $realoffset = $offset + $drift; - splice( @new_path, $realoffset, $length, @$items ); - $drift += @$items - $length; - } - return \@new_path; +sub BUILD { + my $self = shift; + if( $self->has_source ) { + # Read the file and initialize the text. + my $rc; + eval { no warnings; $rc = open( WITNESS, $self->source ); }; + # If we didn't open a file, assume it is a string. + if( $rc ) { + my @words; + while() { + chomp; + push( @words, split( /\s+/, $_ ) ); + } + close WITNESS; + $self->text( \@words ); + } # else the text is in the source string, probably + # XML, and we are doing nothing with it. + } } - no Moose; __PACKAGE__->meta->make_immutable; + +=head1 BUGS / TODO + +=over + +=item * Get rid of either text or path, as they are redundant. + +=item * Re-think the mechanism for pre-correction readings etc. + +=back + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE