only use applicable a.c. witnesses in stemma for analysis
[scpubgit/stemmatology.git] / lib / Text / Tradition / Witness.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Witness;
2use Moose;
7854e12e 3use Moose::Util::TypeConstraints;
dd3b58b0 4
7158714d 5=head1 NAME
6
7Text::Tradition::Witness - a manuscript witness to a text tradition
8
9=head1 SYNOPSIS
10
11 use Text::Tradition::Witness;
12 my $w = Text::Tradition::Witness->new(
13 'sigil' => 'A',
14 'identifier' => 'Oxford MS Ex.1932',
15 );
16
17=head1 DESCRIPTION
18
19Text::Tradition::Witness is an object representation of a manuscript
20witness to a text tradition. A manuscript has a sigil (a short code that
21represents it in the wider tradition), an identifier (e.g. the library ID),
22and probably a text.
23
24=head1 METHODS
25
26=head2 new
27
28Create a new witness. Options include:
29
30=over
31
32=item * sigil - A short code to represent the manuscript. Required.
33
34=item * text - An array of strings (words) that contains the text of the
861c3e27 35manuscript. This should not change after the witness has been instantiated,
36and the path through the collation should always match it.
37
38=item * layertext - An array of strings (words) that contains the layered text,
39if any, of the manuscript. This should not change after the witness has been
40instantiated, and the path through the collation should always match it.
7158714d 41
42=item * source - A reference to the text, such as a filename, if it is not
43given in the 'text' option.
44
45=item * identifier - The recognized name of the manuscript, e.g. a library
46identifier.
47
48=item * other_info - A freeform string for any other description of the
49manuscript.
50
51=back
52
53=head2 sigil
54
55Accessor method for the witness sigil.
56
57=head2 text
58
59Accessor method to get and set the text array.
60
61=head2 source
62
63Accessor method to get and set the text source.
64
65=head2 identifier
66
67Accessor method for the witness identifier.
68
69=head2 other_info
70
71Accessor method for the general witness description.
72
1f7aa795 73=head2 is_layered
7158714d 74
1f7aa795 75Boolean method to note whether the witness has layers (e.g. pre-correction
76readings) in the collation.
7158714d 77
78=begin testing
79
80use_ok( 'Text::Tradition::Witness', "can use module" );
81
82my @text = qw( This is a line of text );
83my $wit = Text::Tradition::Witness->new(
84 'sigil' => 'A',
85 'text' => \@text,
86 );
87is( ref( $wit ), 'Text::Tradition::Witness', 'Created a witness' );
88if( $wit ) {
89 is( $wit->sigil, 'A', "Witness has correct sigil" );
90 is( join( ' ', @{$wit->text} ), join( ' ', @text ), "Witness has correct text" );
91}
92
93=end testing
94
95=cut
96
784877d9 97# Sigil. Required identifier for a witness.
dd3b58b0 98has 'sigil' => (
7158714d 99 is => 'ro',
100 isa => 'Str',
101 required => 1,
102 );
dd3b58b0 103
7158714d 104# Text. This is an array of strings (i.e. word tokens).
d047cd52 105# TODO Think about how to handle this for the case of pre-prepared
106# collations, where the tokens are in the graph already.
dd3b58b0 107has 'text' => (
7158714d 108 is => 'rw',
109 isa => 'ArrayRef[Str]',
110 predicate => 'has_text',
111 );
b0b4421a 112
113has 'layertext' => (
114 is => 'rw',
115 isa => 'ArrayRef[Str]',
116 predicate => 'has_layertext',
117 );
dd3b58b0 118
d047cd52 119# Source. This is where we read in the witness, if not from a
120# pre-prepared collation. It is probably a filename.
121has 'source' => (
7158714d 122 is => 'ro',
123 isa => 'Str',
124 predicate => 'has_source',
125 );
784877d9 126
1f7aa795 127# Path. This is an array of Reading nodes that can be saved during
128# initialization, but should be cleared before saving in a DB.
4a8828f0 129has 'path' => (
7158714d 130 is => 'rw',
131 isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
132 predicate => 'has_path',
1f7aa795 133 clearer => 'clear_path',
7158714d 134 );
4a8828f0 135
b15511bf 136has 'uncorrected_path' => (
7158714d 137 is => 'rw',
138 isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
1f7aa795 139 clearer => 'clear_uncorrected_path',
140 );
141
142has 'is_layered' => (
143 is => 'rw',
144 isa => 'Bool',
7158714d 145 );
f6066bac 146
147# Manuscript name or similar
148has 'identifier' => (
7158714d 149 is => 'ro',
150 isa => 'Str',
151 );
f6066bac 152
153# Any other info we have
154has 'other_info' => (
7158714d 155 is => 'ro',
156 isa => 'Str',
157 );
158
1f7aa795 159# If we set an uncorrected path, ever, remember that we did so.
160around 'uncorrected_path' => sub {
161 my $orig = shift;
162 my $self = shift;
163
164 $self->is_layered( 1 );
165 $self->$orig( @_ );
166};
e2902068 167
784877d9 168sub BUILD {
7158714d 169 my $self = shift;
170 if( $self->has_source ) {
171 # Read the file and initialize the text.
172 my $rc;
173 eval { no warnings; $rc = open( WITNESS, $self->source ); };
174 # If we didn't open a file, assume it is a string.
175 if( $rc ) {
176 my @words;
177 while(<WITNESS>) {
178 chomp;
179 push( @words, split( /\s+/, $_ ) );
180 }
181 close WITNESS;
182 $self->text( \@words );
183 } # else the text is in the source string, probably
184 # XML, and we are doing nothing with it.
185 }
186}
187
f025e303 188=head2 export_as_json
189
190Exports the witness as a JSON structure, with the following keys:
191
192=over 4
193
194=item * id - The witness sigil
195
196=item * name - The witness identifier
197
198=item * tokens - An array of hashes of the form { "t":"WORD" }
199
200=back
201
202=begin testing
203
204use Text::Tradition;
205
206my @text = qw( This is a line of text );
207my $wit = Text::Tradition::Witness->new(
208 'sigil' => 'A',
209 'text' => \@text,
210 'identifier' => 'test witness',
211 );
212my $jsonstruct = $wit->export_as_json;
213is( $jsonstruct->{'id'}, 'A', "got the right witness sigil" );
214is( $jsonstruct->{'name'}, 'test witness', "got the right identifier" );
215is( scalar @{$jsonstruct->{'tokens'}}, 6, "got six text tokens" );
216foreach my $idx ( 0 .. $#text ) {
217 is( $jsonstruct->{'tokens'}->[$idx]->{'t'}, $text[$idx], "tokens look OK" );
218}
219
220my @ctext = qw( when april with his showers sweet with fruit the drought of march
221 has pierced unto the root );
222my $trad = Text::Tradition->new(
223 'input' => 'CollateX',
224 'file' => 't/data/Collatex-16.xml' );
225
226$jsonstruct = $trad->witness('A')->export_as_json;
227is( $jsonstruct->{'id'}, 'A', "got the right witness sigil" );
228is( $jsonstruct->{'name'}, undef, "got undef for missing identifier" );
229is( scalar @{$jsonstruct->{'tokens'}}, 17, "got all text tokens" );
230foreach my $idx ( 0 .. $#ctext ) {
231 is( $jsonstruct->{'tokens'}->[$idx]->{'t'}, $ctext[$idx], "tokens look OK" );
232}
233
234=end testing
235
236=cut
237
238sub export_as_json {
239 my $self = shift;
240 my @wordlist = map { { 't' => $_ || '' } } @{$self->text};
241 return {
242 'id' => $self->sigil,
243 'tokens' => \@wordlist,
244 'name' => $self->identifier,
245 };
246}
247
dd3b58b0 248no Moose;
249__PACKAGE__->meta->make_immutable;
7158714d 250
251=head1 BUGS / TODO
252
253=over
254
255=item * Get rid of either text or path, as they are redundant.
256
257=item * Re-think the mechanism for pre-correction readings etc.
258
259=back
260
261=head1 LICENSE
262
263This package is free software and is provided "as is" without express
264or implied warranty. You can redistribute it and/or modify it under
265the same terms as Perl itself.
266
267=head1 AUTHOR
268
269Tara L Andrews E<lt>aurum@cpan.orgE<gt>