Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition; |
2 | |
65ed66b9 |
3 | use JSON qw / from_json /; |
4a8828f0 |
4 | use Module::Load; |
dd3b58b0 |
5 | use Moose; |
951ddfe8 |
6 | use Moose::Util qw/ does_role apply_all_roles /; |
4889be4f |
7 | use Safe::Isa; |
8e1394aa |
8 | use Text::Tradition::Collation; |
951ddfe8 |
9 | use Text::Tradition::Error; |
8e1394aa |
10 | use Text::Tradition::Witness; |
951ddfe8 |
11 | use TryCatch; |
dd3b58b0 |
12 | |
331c2dbf |
13 | use vars qw( $VERSION ); |
4889be4f |
14 | $VERSION = "1.2"; |
37bf09f4 |
15 | |
16 | # Enable plugin(s) if available |
17 | eval { with 'Text::Tradition::HasStemma'; }; |
ed5b9b70 |
18 | # Don't warn normally |
19 | # if( $@ ) { |
20 | # warn "Text::Tradition::Analysis not found. Disabling stemma analysis functionality"; |
21 | # }; |
e92d4229 |
22 | eval { with 'Text::Tradition::Language'; }; |
8943ff68 |
23 | eval { with 'Text::Tradition::Ownership'; }; |
331c2dbf |
24 | |
dd3b58b0 |
25 | has 'collation' => ( |
8e1394aa |
26 | is => 'ro', |
27 | isa => 'Text::Tradition::Collation', |
28 | writer => '_save_collation', |
29 | ); |
dd3b58b0 |
30 | |
3b853983 |
31 | has 'witness_hash' => ( |
32 | traits => ['Hash'], |
33 | isa => 'HashRef[Text::Tradition::Witness]', |
8e1394aa |
34 | handles => { |
3b853983 |
35 | witness => 'get', |
36 | add_witness => 'set', |
37 | del_witness => 'delete', |
38 | has_witness => 'exists', |
39 | witnesses => 'values', |
8e1394aa |
40 | }, |
3b853983 |
41 | default => sub { {} }, |
8e1394aa |
42 | ); |
c5104dc0 |
43 | |
df6d9812 |
44 | has 'name' => ( |
45 | is => 'rw', |
46 | isa => 'Str', |
47 | default => 'Tradition', |
48 | ); |
56cf65bd |
49 | |
3579c22b |
50 | has '_initialized' => ( |
10943ab0 |
51 | is => 'ro', |
52 | isa => 'Bool', |
53 | default => undef, |
54 | writer => '_init_done', |
55 | ); |
56 | |
4889be4f |
57 | # Create the witness if necessary before trying to add it |
910a0a6d |
58 | around 'add_witness' => sub { |
59 | my $orig = shift; |
60 | my $self = shift; |
4889be4f |
61 | my $new_wit; |
62 | if( @_ == 1 && $_[0]->$_isa( 'Text::Tradition::Witness' ) ) { |
63 | $new_wit = shift; |
64 | } else { |
65 | my %args = @_ == 1 ? %{$_[0]} : @_; |
66 | $args{'tradition'} = $self; |
67 | $new_wit = Text::Tradition::Witness->new( %args ); |
68 | } |
3b853983 |
69 | $self->$orig( $new_wit->sigil => $new_wit ); |
910a0a6d |
70 | return $new_wit; |
71 | }; |
331c2dbf |
72 | |
3b853983 |
73 | # Allow deletion of witness by object as well as by sigil |
74 | around 'del_witness' => sub { |
75 | my $orig = shift; |
76 | my $self = shift; |
77 | my @key_args; |
78 | foreach my $arg ( @_ ) { |
79 | push( @key_args, |
80 | ref( $arg ) eq 'Text::Tradition::Witness' ? $arg->sigil : $arg ); |
81 | } |
82 | return $self->$orig( @key_args ); |
83 | }; |
84 | |
85 | # Don't allow an empty hash value |
86 | around 'witness' => sub { |
87 | my( $orig, $self, $arg ) = @_; |
88 | return unless $self->has_witness( $arg ); |
89 | return $self->$orig( $arg ); |
90 | }; |
91 | |
4889be4f |
92 | # Cope with witness sigil changes |
93 | sub rename_witness { |
94 | my( $self, $sig, $newsig ) = @_; |
4889be4f |
95 | my $wit = $self->witness( $sig ); |
96 | $self->throw( "No such witness $sig" ) unless $wit; |
97 | $self->throw( "Cannot rename witness that has already been collated" ) |
98 | if $wit->is_collated; |
99 | $wit = $self->del_witness( $sig ); |
100 | try { |
101 | $wit->_set_sigil( $newsig ); |
102 | } catch ( $e ) { |
103 | # Don't lose the witness if the rename failed |
104 | $self->add_witness( $wit ); |
105 | $self->throw( $e ); |
106 | } |
107 | $self->add_witness( $wit ); |
108 | } |
109 | |
331c2dbf |
110 | =head1 NAME |
111 | |
112 | Text::Tradition - a software model for a set of collated texts |
113 | |
114 | =head1 SYNOPSIS |
115 | |
116 | use Text::Tradition; |
117 | my $t = Text::Tradition->new( |
118 | 'name' => 'this is a text', |
119 | 'input' => 'TEI', |
120 | 'file' => '/path/to/tei_parallel_seg_file.xml' ); |
121 | |
122 | my @text_wits = $t->witnesses(); |
123 | my $manuscript_a = $t->witness( 'A' ); |
82fa4d57 |
124 | |
125 | $t = Text::Tradition->new(); |
126 | $t->add_witness( 'sourcetype' => 'xmldesc', |
127 | 'file' => '/path/to/teitranscription.xml' ); |
128 | $t->add_witness( 'sourcetype => 'plaintext', 'sigil' => 'Q', |
129 | 'string' => 'The quick brown fox jumped over the lazy dogs' ); |
130 | ## TODO |
131 | $t->collate_texts; |
331c2dbf |
132 | |
133 | my $text_path_svg = $t->collation->as_svg(); |
134 | ## See Text::Tradition::Collation for more on text collation itself |
135 | |
136 | =head1 DESCRIPTION |
137 | |
138 | Text::Tradition is a library for representation and analysis of collated |
139 | texts, particularly medieval ones. A 'tradition' refers to the aggregation |
140 | of surviving versions of a text, generally preserved in multiple |
141 | manuscripts (or 'witnesses'). A Tradition object thus has one more more |
142 | Witnesses, as well as a Collation that represents the unity of all versions |
143 | of the text. |
144 | |
145 | =head1 METHODS |
146 | |
147 | =head2 new |
148 | |
149 | Creates and returns a new text tradition object. The following options are |
150 | accepted. |
151 | |
152 | General options: |
153 | |
154 | =over 4 |
155 | |
156 | =item B<name> - The name of the text. |
157 | |
158 | =back |
159 | |
160 | Initialization based on a collation file: |
161 | |
162 | =over 4 |
163 | |
164 | =item B<input> - The input format of the collation file. Can be one of the |
165 | following: |
166 | |
167 | =over 4 |
168 | |
169 | =item * Self - a GraphML format produced by this module |
170 | |
171 | =item * CollateX - a GraphML format produced by CollateX |
172 | |
173 | =item * CTE - a TEI XML format produced by Classical Text Editor |
174 | |
a445ce40 |
175 | =item * JSON - an alignment table in JSON format, as produced by CollateX and |
176 | other tools |
331c2dbf |
177 | |
178 | =item * TEI - a TEI parallel segmentation format file |
179 | |
a445ce40 |
180 | =item * Tabular - a spreadsheet collation. See the documentation for |
181 | L<Text::Tradition::Parser::Tabular> for an explanation of additional options. |
331c2dbf |
182 | |
183 | =back |
184 | |
185 | =item B<file> - The name of the file that contains the data. One of 'file' |
186 | or 'string' should be specified. |
187 | |
188 | =item B<string> - A text string that contains the data. One of 'file' or |
189 | 'string' should be specified. |
190 | |
331c2dbf |
191 | =back |
192 | |
193 | Initialization based on a list of witnesses [NOT YET IMPLEMENTED]: |
194 | |
195 | =over 4 |
196 | |
197 | =item B<witnesses> - A reference to an array of Text::Tradition::Witness |
198 | objects that carry the text to be collated. |
199 | |
200 | =item B<collator> - A reference to a collation program that will accept |
201 | Witness objects. |
202 | |
203 | =back |
204 | |
205 | =head2 B<witnesses> |
206 | |
207 | Return the Text::Tradition::Witness objects associated with this tradition, |
208 | as an array. |
209 | |
044d1e45 |
210 | =head2 B<witness>( $sigil ) |
211 | |
212 | Returns the Text::Tradition::Witness object whose sigil is $sigil, or undef |
213 | if there is no such object within the tradition. |
214 | |
331c2dbf |
215 | =head2 B<add_witness>( %opts ) |
216 | |
217 | Instantiate a new witness with the given options (see documentation for |
218 | Text::Tradition::Witness) and add it to the tradition. |
219 | |
044d1e45 |
220 | =head2 B<del_witness>( $sigil ) |
221 | |
222 | Delete the witness with the given sigil from the tradition. Returns the |
223 | witness object for the deleted witness. |
224 | |
331c2dbf |
225 | =begin testing |
226 | |
4889be4f |
227 | use TryCatch; |
331c2dbf |
228 | use_ok( 'Text::Tradition', "can use module" ); |
229 | |
230 | my $t = Text::Tradition->new( 'name' => 'empty' ); |
231 | is( ref( $t ), 'Text::Tradition', "initialized an empty Tradition object" ); |
232 | is( $t->name, 'empty', "object has the right name" ); |
233 | is( scalar $t->witnesses, 0, "object has no witnesses" ); |
234 | |
235 | my $simple = 't/data/simple.txt'; |
236 | my $s = Text::Tradition->new( |
237 | 'name' => 'inline', |
238 | 'input' => 'Tabular', |
239 | 'file' => $simple, |
240 | ); |
241 | is( ref( $s ), 'Text::Tradition', "initialized a Tradition object" ); |
242 | is( $s->name, 'inline', "object has the right name" ); |
243 | is( scalar $s->witnesses, 3, "object has three witnesses" ); |
244 | |
044d1e45 |
245 | my $wit_a = $s->witness('A'); |
246 | is( ref( $wit_a ), 'Text::Tradition::Witness', "Found a witness A" ); |
247 | if( $wit_a ) { |
248 | is( $wit_a->sigil, 'A', "Witness A has the right sigil" ); |
249 | } |
250 | is( $s->witness('X'), undef, "There is no witness X" ); |
251 | ok( !exists $s->{'witnesses'}->{'X'}, "Witness key X not created" ); |
252 | |
4889be4f |
253 | my $wit_d = $s->add_witness( 'sigil' => 'D', 'sourcetype' => 'plaintext', |
254 | 'string' => 'je suis depourvu de foi' ); |
044d1e45 |
255 | is( ref( $wit_d ), 'Text::Tradition::Witness', "new witness created" ); |
256 | is( $wit_d->sigil, 'D', "witness has correct sigil" ); |
331c2dbf |
257 | is( scalar $s->witnesses, 4, "object now has four witnesses" ); |
258 | |
4889be4f |
259 | try { |
260 | $s->rename_witness( 'D', 'Invalid Sigil' ); |
261 | ok( 0, "Renamed witness with bad sigil" ); |
262 | } catch ( Text::Tradition::Error $e ) { |
4889be4f |
263 | is( $s->witness('D'), $wit_d, "Held onto witness during bad rename" ); |
264 | } |
265 | |
266 | try { |
267 | $s->rename_witness( 'D', 'Q' ); |
268 | ok( 1, "Rename of witness succeeded" ); |
269 | is( $s->witness('Q'), $wit_d, "Witness available under new sigil" ); |
270 | ok( !$s->has_witness('D'), "Witness no longer available under old sigil" ); |
271 | } catch ( Text::Tradition::Error $e ) { |
272 | ok( 0, "Failed to rename witness: " . $e->message ); |
273 | } |
274 | |
275 | my $del = $s->del_witness( 'Q' ); |
044d1e45 |
276 | is( $del, $wit_d, "Deleted correct witness" ); |
3b853983 |
277 | is( scalar $s->witnesses, 3, "object has three witnesses again" ); |
278 | |
4889be4f |
279 | try { |
280 | $s->rename_witness( 'A', 'WitA' ); |
281 | ok( 0, "Successfully renamed an already collated witness" ); |
282 | } catch ( Text::Tradition::Error $e ) { |
283 | is( $e->message, 'Cannot rename witness that has already been collated', |
284 | "Refused to rename an already-collated witness" ); |
285 | } |
331c2dbf |
286 | |
287 | =end testing |
288 | |
289 | =cut |
910a0a6d |
290 | |
df6d9812 |
291 | |
8e1394aa |
292 | sub BUILD { |
293 | my( $self, $init_args ) = @_; |
fae52efd |
294 | |
295 | # First, make a collation object. This will use only those arguments in |
296 | # init_args that apply to the collation. |
297 | my $collation = Text::Tradition::Collation->new( %$init_args, |
298 | 'tradition' => $self ); |
299 | $self->_save_collation( $collation ); |
c5104dc0 |
300 | |
fae52efd |
301 | if( exists $init_args->{'input'} ) { |
910a0a6d |
302 | # Call the appropriate parser on the given data |
a731e73a |
303 | my @format_standalone = qw/ Self CollateText CollateX CTE JSON TEI Tabular /; |
dfc37e38 |
304 | my @format_basetext = qw/ KUL /; |
305 | my $use_base; |
306 | my $format = $init_args->{'input'}; |
dfc37e38 |
307 | if( $format && !( grep { $_ eq $format } @format_standalone ) |
308 | && !( grep { $_ eq $format } @format_basetext ) ) { |
309 | warn "Unrecognized input format $format; not parsing"; |
910a0a6d |
310 | return; |
311 | } |
dfc37e38 |
312 | if( $format && grep { $_ eq $format } @format_basetext ) { |
313 | $use_base = 1; |
314 | if( !exists $init_args->{'base'} ) { |
315 | warn "Cannot make a collation from $format without a base text"; |
316 | return; |
317 | } |
318 | } |
4a8828f0 |
319 | |
910a0a6d |
320 | # Now do the parsing. |
910a0a6d |
321 | if( $format ) { |
dfc37e38 |
322 | if( $use_base ) { |
323 | $format = 'BaseText'; # Use the BaseText module for parsing, |
324 | # but retain the original input arg. |
910a0a6d |
325 | } |
326 | my $mod = "Text::Tradition::Parser::$format"; |
327 | load( $mod ); |
dfc37e38 |
328 | $mod->can('parse')->( $self, $init_args ); |
910a0a6d |
329 | } |
c5104dc0 |
330 | } |
10943ab0 |
331 | $self->_init_done( 1 ); |
fae52efd |
332 | return $self; |
333 | } |
334 | |
d1a7f940 |
335 | =head2 clear_collation |
336 | |
337 | Blow away the existing collation object and mark all witnesses as uncollated. |
338 | Not to be used lightly. |
339 | |
340 | =cut |
341 | |
342 | sub clear_collation { |
343 | my $self = shift; |
344 | $self->_save_collation( Text::Tradition::Collation->new( tradition => $self ) ); |
345 | map { $_->is_collated( 0 ) } $self->witnesses; |
346 | } |
347 | |
fae52efd |
348 | =head2 add_json_witnesses( $jsonstring, $options ) |
349 | |
350 | Adds a set of witnesses from a JSON array specification. This is a wrapper |
351 | to parse the JSON and call add_witness (with the specified $options) for |
352 | each element therein. |
353 | |
354 | =cut |
355 | |
356 | sub add_json_witnesses { |
357 | my( $self, $jsonstr, $extraopts ) = @_; |
65ed66b9 |
358 | my $witarray = from_json( $jsonstr ); |
359 | foreach my $witspec ( @{$witarray->{witnesses}} ) { |
fae52efd |
360 | my $opts = $extraopts || {}; |
361 | $opts->{'sourcetype'} = 'json'; |
362 | $opts->{'object'} = $witspec; |
363 | $self->add_witness( $opts ); |
364 | } |
8e1394aa |
365 | } |
c5104dc0 |
366 | |
951ddfe8 |
367 | sub throw { |
142698b8 |
368 | my $self = shift; |
951ddfe8 |
369 | Text::Tradition::Error->throw( |
370 | 'ident' => 'Tradition error', |
371 | 'message' => $_[0], |
372 | ); |
373 | } |
374 | |
dd3b58b0 |
375 | no Moose; |
376 | __PACKAGE__->meta->make_immutable; |
331c2dbf |
377 | |
378 | |
379 | =head1 BUGS / TODO |
380 | |
381 | =over |
382 | |
383 | =item * Allow tradition to be initialized via passing to a collator. |
384 | |
385 | =back |
386 | |
387 | =head1 LICENSE |
388 | |
389 | This package is free software and is provided "as is" without express |
390 | or implied warranty. You can redistribute it and/or modify it under |
391 | the same terms as Perl itself. |
392 | |
393 | =head1 AUTHOR |
394 | |
395 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |