BROKEN - making lemmatization work
[scpubgit/stemmatology.git] / lib / Text / Tradition / Witness.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Witness;
2use Moose;
3
784877d9 4# Sigil. Required identifier for a witness.
dd3b58b0 5has 'sigil' => (
d047cd52 6 is => 'ro',
7 isa => 'Str',
8 required => 1,
9 );
dd3b58b0 10
d047cd52 11# Text. This is an array of strings (i.e. word tokens).
12# TODO Think about how to handle this for the case of pre-prepared
13# collations, where the tokens are in the graph already.
dd3b58b0 14has 'text' => (
d047cd52 15 is => 'rw',
16 isa => 'ArrayRef[Str]',
17 );
dd3b58b0 18
d047cd52 19# Source. This is where we read in the witness, if not from a
20# pre-prepared collation. It is probably a filename.
21has 'source' => (
22 is => 'ro',
23 isa => 'Str',
8e1394aa 24 predicate => 'has_source',
d047cd52 25 );
784877d9 26
4a8828f0 27has 'path' => (
28 is => 'rw',
29 isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
30 );
31
784877d9 32sub BUILD {
33 my $self = shift;
d047cd52 34 if( $self->has_source ) {
784877d9 35 # Read the file and initialize the text.
d047cd52 36 open( WITNESS, $self->source ) or die "Could not open "
784877d9 37 . $self->file . "for reading";
38 # TODO support TEI as well as plaintext, sometime
39 my @words;
40 while(<WITNESS>) {
41 chomp;
42 push( @words, split( /\s+/, $_ ) );
43 }
44 close WITNESS;
d047cd52 45 $self->text( \@words );
784877d9 46 }
47}
48
dd3b58b0 49no Moose;
50__PACKAGE__->meta->make_immutable;