Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition::Witness; |
2 | use Moose; |
7854e12e |
3 | use Moose::Util::TypeConstraints; |
dd3b58b0 |
4 | |
784877d9 |
5 | # Sigil. Required identifier for a witness. |
dd3b58b0 |
6 | has 'sigil' => ( |
d047cd52 |
7 | is => 'ro', |
8 | isa => 'Str', |
9 | required => 1, |
10 | ); |
dd3b58b0 |
11 | |
d047cd52 |
12 | # Text. This is an array of strings (i.e. word tokens). |
13 | # TODO Think about how to handle this for the case of pre-prepared |
14 | # collations, where the tokens are in the graph already. |
dd3b58b0 |
15 | has 'text' => ( |
d047cd52 |
16 | is => 'rw', |
17 | isa => 'ArrayRef[Str]', |
de51424a |
18 | predicate => 'has_text', |
d047cd52 |
19 | ); |
dd3b58b0 |
20 | |
d047cd52 |
21 | # Source. This is where we read in the witness, if not from a |
22 | # pre-prepared collation. It is probably a filename. |
23 | has 'source' => ( |
24 | is => 'ro', |
25 | isa => 'Str', |
8e1394aa |
26 | predicate => 'has_source', |
d047cd52 |
27 | ); |
784877d9 |
28 | |
4a8828f0 |
29 | has 'path' => ( |
30 | is => 'rw', |
31 | isa => 'ArrayRef[Text::Tradition::Collation::Reading]', |
de51424a |
32 | predicate => 'has_path', |
4a8828f0 |
33 | ); |
34 | |
e2902068 |
35 | has 'post_correctione' => ( |
36 | is => 'rw', |
37 | isa => 'Str', |
38 | predicate => 'has_post_correctione', |
39 | ); |
7854e12e |
40 | |
41 | subtype 'Correction', |
42 | as 'ArrayRef', |
43 | where { @{$_} == 3 && |
44 | $_->[0]->isa( 'Int' ) && |
45 | $_->[1]->isa( 'Int' ) && |
46 | $_->[2]->isa( 'ArrayRef[Text::Tradition::Collation::Reading]' ); |
47 | }, |
48 | message { 'Correction must be a tuple of [offset, length, list]' }; |
49 | |
50 | has 'corrections' => ( |
51 | is => 'ro', |
52 | isa => 'ArrayRef[Correction]', |
53 | default => sub { [] }, |
54 | ); |
e2902068 |
55 | |
56 | |
784877d9 |
57 | sub BUILD { |
58 | my $self = shift; |
d047cd52 |
59 | if( $self->has_source ) { |
784877d9 |
60 | # Read the file and initialize the text. |
d047cd52 |
61 | open( WITNESS, $self->source ) or die "Could not open " |
784877d9 |
62 | . $self->file . "for reading"; |
63 | # TODO support TEI as well as plaintext, sometime |
64 | my @words; |
65 | while(<WITNESS>) { |
66 | chomp; |
67 | push( @words, split( /\s+/, $_ ) ); |
68 | } |
69 | close WITNESS; |
d047cd52 |
70 | $self->text( \@words ); |
784877d9 |
71 | } |
72 | } |
73 | |
de51424a |
74 | # If the text is not present, and the path is, and this is a 'get' |
75 | # request, generate text from path. |
76 | around text => sub { |
77 | my $orig = shift; |
78 | my $self = shift; |
79 | |
80 | if( $self->has_path && !$self->has_text && !@_ ) { |
81 | my @words = map { $_->label } @{$self->path}; |
82 | $self->$orig( \@words ); |
83 | } |
84 | |
85 | $self->$orig( @_ ); |
86 | }; |
87 | |
7854e12e |
88 | sub add_correction { |
930ff666 |
89 | my( $self, $offset, $length, @replacement ) = @_; |
90 | # Rely on Moose for type checking of the arguments |
91 | push( @{$self->corrections}, [ $offset, $length, \@replacement ] ); |
92 | } |
93 | |
94 | sub corrected_path { |
7854e12e |
95 | my $self = shift; |
930ff666 |
96 | |
97 | my @new_path; |
98 | push( @new_path, @{$self->path} ); |
99 | my $drift = 0; |
100 | foreach my $correction ( @{$self->corrections} ) { |
101 | my( $offset, $length, $items ) = @$correction; |
102 | my $realoffset = $offset + $drift; |
103 | splice( @new_path, $realoffset, $length, @$items ); |
104 | $drift += @$items - $length; |
105 | } |
106 | return \@new_path; |
7854e12e |
107 | } |
930ff666 |
108 | |
7854e12e |
109 | |
dd3b58b0 |
110 | no Moose; |
111 | __PACKAGE__->meta->make_immutable; |