Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition::Witness; |
2 | use Moose; |
7854e12e |
3 | use Moose::Util::TypeConstraints; |
dd3b58b0 |
4 | |
7158714d |
5 | =head1 NAME |
6 | |
7 | Text::Tradition::Witness - a manuscript witness to a text tradition |
8 | |
9 | =head1 SYNOPSIS |
10 | |
11 | use Text::Tradition::Witness; |
12 | my $w = Text::Tradition::Witness->new( |
13 | 'sigil' => 'A', |
14 | 'identifier' => 'Oxford MS Ex.1932', |
15 | ); |
16 | |
17 | =head1 DESCRIPTION |
18 | |
19 | Text::Tradition::Witness is an object representation of a manuscript |
20 | witness to a text tradition. A manuscript has a sigil (a short code that |
21 | represents it in the wider tradition), an identifier (e.g. the library ID), |
22 | and probably a text. |
23 | |
24 | =head1 METHODS |
25 | |
26 | =head2 new |
27 | |
28 | Create a new witness. Options include: |
29 | |
30 | =over |
31 | |
32 | =item * sigil - A short code to represent the manuscript. Required. |
33 | |
34 | =item * text - An array of strings (words) that contains the text of the |
861c3e27 |
35 | manuscript. This should not change after the witness has been instantiated, |
36 | and the path through the collation should always match it. |
37 | |
38 | =item * layertext - An array of strings (words) that contains the layered text, |
39 | if any, of the manuscript. This should not change after the witness has been |
40 | instantiated, and the path through the collation should always match it. |
7158714d |
41 | |
42 | =item * source - A reference to the text, such as a filename, if it is not |
43 | given in the 'text' option. |
44 | |
45 | =item * identifier - The recognized name of the manuscript, e.g. a library |
46 | identifier. |
47 | |
48 | =item * other_info - A freeform string for any other description of the |
49 | manuscript. |
50 | |
51 | =back |
52 | |
53 | =head2 sigil |
54 | |
55 | Accessor method for the witness sigil. |
56 | |
57 | =head2 text |
58 | |
59 | Accessor method to get and set the text array. |
60 | |
61 | =head2 source |
62 | |
63 | Accessor method to get and set the text source. |
64 | |
65 | =head2 identifier |
66 | |
67 | Accessor method for the witness identifier. |
68 | |
69 | =head2 other_info |
70 | |
71 | Accessor method for the general witness description. |
72 | |
1f7aa795 |
73 | =head2 is_layered |
7158714d |
74 | |
1f7aa795 |
75 | Boolean method to note whether the witness has layers (e.g. pre-correction |
76 | readings) in the collation. |
7158714d |
77 | |
78 | =begin testing |
79 | |
80 | use_ok( 'Text::Tradition::Witness', "can use module" ); |
81 | |
82 | my @text = qw( This is a line of text ); |
83 | my $wit = Text::Tradition::Witness->new( |
84 | 'sigil' => 'A', |
85 | 'text' => \@text, |
86 | ); |
87 | is( ref( $wit ), 'Text::Tradition::Witness', 'Created a witness' ); |
88 | if( $wit ) { |
89 | is( $wit->sigil, 'A', "Witness has correct sigil" ); |
90 | is( join( ' ', @{$wit->text} ), join( ' ', @text ), "Witness has correct text" ); |
91 | } |
92 | |
93 | =end testing |
94 | |
95 | =cut |
96 | |
784877d9 |
97 | # Sigil. Required identifier for a witness. |
dd3b58b0 |
98 | has 'sigil' => ( |
7158714d |
99 | is => 'ro', |
100 | isa => 'Str', |
101 | required => 1, |
102 | ); |
dd3b58b0 |
103 | |
7158714d |
104 | # Text. This is an array of strings (i.e. word tokens). |
d047cd52 |
105 | # TODO Think about how to handle this for the case of pre-prepared |
106 | # collations, where the tokens are in the graph already. |
dd3b58b0 |
107 | has 'text' => ( |
7158714d |
108 | is => 'rw', |
109 | isa => 'ArrayRef[Str]', |
110 | predicate => 'has_text', |
111 | ); |
b0b4421a |
112 | |
113 | has 'layertext' => ( |
114 | is => 'rw', |
115 | isa => 'ArrayRef[Str]', |
116 | predicate => 'has_layertext', |
117 | ); |
dd3b58b0 |
118 | |
d047cd52 |
119 | # Source. This is where we read in the witness, if not from a |
120 | # pre-prepared collation. It is probably a filename. |
121 | has 'source' => ( |
7158714d |
122 | is => 'ro', |
123 | isa => 'Str', |
124 | predicate => 'has_source', |
125 | ); |
784877d9 |
126 | |
1f7aa795 |
127 | # Path. This is an array of Reading nodes that can be saved during |
128 | # initialization, but should be cleared before saving in a DB. |
4a8828f0 |
129 | has 'path' => ( |
7158714d |
130 | is => 'rw', |
131 | isa => 'ArrayRef[Text::Tradition::Collation::Reading]', |
132 | predicate => 'has_path', |
1f7aa795 |
133 | clearer => 'clear_path', |
7158714d |
134 | ); |
4a8828f0 |
135 | |
b15511bf |
136 | has 'uncorrected_path' => ( |
7158714d |
137 | is => 'rw', |
138 | isa => 'ArrayRef[Text::Tradition::Collation::Reading]', |
1f7aa795 |
139 | clearer => 'clear_uncorrected_path', |
140 | ); |
141 | |
142 | has 'is_layered' => ( |
143 | is => 'rw', |
144 | isa => 'Bool', |
7158714d |
145 | ); |
f6066bac |
146 | |
147 | # Manuscript name or similar |
148 | has 'identifier' => ( |
7158714d |
149 | is => 'ro', |
150 | isa => 'Str', |
151 | ); |
f6066bac |
152 | |
153 | # Any other info we have |
154 | has 'other_info' => ( |
7158714d |
155 | is => 'ro', |
156 | isa => 'Str', |
157 | ); |
158 | |
1f7aa795 |
159 | # If we set an uncorrected path, ever, remember that we did so. |
160 | around 'uncorrected_path' => sub { |
161 | my $orig = shift; |
162 | my $self = shift; |
163 | |
164 | $self->is_layered( 1 ); |
165 | $self->$orig( @_ ); |
166 | }; |
e2902068 |
167 | |
784877d9 |
168 | sub BUILD { |
7158714d |
169 | my $self = shift; |
170 | if( $self->has_source ) { |
171 | # Read the file and initialize the text. |
172 | my $rc; |
173 | eval { no warnings; $rc = open( WITNESS, $self->source ); }; |
174 | # If we didn't open a file, assume it is a string. |
175 | if( $rc ) { |
176 | my @words; |
177 | while(<WITNESS>) { |
178 | chomp; |
179 | push( @words, split( /\s+/, $_ ) ); |
180 | } |
181 | close WITNESS; |
182 | $self->text( \@words ); |
183 | } # else the text is in the source string, probably |
184 | # XML, and we are doing nothing with it. |
185 | } |
186 | } |
187 | |
f025e303 |
188 | =head2 export_as_json |
189 | |
190 | Exports the witness as a JSON structure, with the following keys: |
191 | |
192 | =over 4 |
193 | |
194 | =item * id - The witness sigil |
195 | |
196 | =item * name - The witness identifier |
197 | |
198 | =item * tokens - An array of hashes of the form { "t":"WORD" } |
199 | |
200 | =back |
201 | |
202 | =begin testing |
203 | |
204 | use Text::Tradition; |
205 | |
206 | my @text = qw( This is a line of text ); |
207 | my $wit = Text::Tradition::Witness->new( |
208 | 'sigil' => 'A', |
209 | 'text' => \@text, |
210 | 'identifier' => 'test witness', |
211 | ); |
212 | my $jsonstruct = $wit->export_as_json; |
213 | is( $jsonstruct->{'id'}, 'A', "got the right witness sigil" ); |
214 | is( $jsonstruct->{'name'}, 'test witness', "got the right identifier" ); |
215 | is( scalar @{$jsonstruct->{'tokens'}}, 6, "got six text tokens" ); |
216 | foreach my $idx ( 0 .. $#text ) { |
217 | is( $jsonstruct->{'tokens'}->[$idx]->{'t'}, $text[$idx], "tokens look OK" ); |
218 | } |
219 | |
220 | my @ctext = qw( when april with his showers sweet with fruit the drought of march |
221 | has pierced unto the root ); |
222 | my $trad = Text::Tradition->new( |
223 | 'input' => 'CollateX', |
224 | 'file' => 't/data/Collatex-16.xml' ); |
225 | |
226 | $jsonstruct = $trad->witness('A')->export_as_json; |
227 | is( $jsonstruct->{'id'}, 'A', "got the right witness sigil" ); |
228 | is( $jsonstruct->{'name'}, undef, "got undef for missing identifier" ); |
229 | is( scalar @{$jsonstruct->{'tokens'}}, 17, "got all text tokens" ); |
230 | foreach my $idx ( 0 .. $#ctext ) { |
231 | is( $jsonstruct->{'tokens'}->[$idx]->{'t'}, $ctext[$idx], "tokens look OK" ); |
232 | } |
233 | |
234 | =end testing |
235 | |
236 | =cut |
237 | |
238 | sub export_as_json { |
239 | my $self = shift; |
240 | my @wordlist = map { { 't' => $_ || '' } } @{$self->text}; |
241 | return { |
242 | 'id' => $self->sigil, |
243 | 'tokens' => \@wordlist, |
244 | 'name' => $self->identifier, |
245 | }; |
246 | } |
247 | |
dd3b58b0 |
248 | no Moose; |
249 | __PACKAGE__->meta->make_immutable; |
7158714d |
250 | |
251 | =head1 BUGS / TODO |
252 | |
253 | =over |
254 | |
255 | =item * Get rid of either text or path, as they are redundant. |
256 | |
257 | =item * Re-think the mechanism for pre-correction readings etc. |
258 | |
259 | =back |
260 | |
261 | =head1 LICENSE |
262 | |
263 | This package is free software and is provided "as is" without express |
264 | or implied warranty. You can redistribute it and/or modify it under |
265 | the same terms as Perl itself. |
266 | |
267 | =head1 AUTHOR |
268 | |
269 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |