Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition; |
2 | |
65ed66b9 |
3 | use JSON qw / from_json /; |
4a8828f0 |
4 | use Module::Load; |
dd3b58b0 |
5 | use Moose; |
8e1394aa |
6 | use Text::Tradition::Collation; |
56cf65bd |
7 | use Text::Tradition::Stemma; |
8e1394aa |
8 | use Text::Tradition::Witness; |
cf7e4e7b |
9 | use Text::Tradition::User; |
dd3b58b0 |
10 | |
331c2dbf |
11 | use vars qw( $VERSION ); |
f242e74d |
12 | $VERSION = "0.5"; |
331c2dbf |
13 | |
dd3b58b0 |
14 | has 'collation' => ( |
8e1394aa |
15 | is => 'ro', |
16 | isa => 'Text::Tradition::Collation', |
17 | writer => '_save_collation', |
18 | ); |
dd3b58b0 |
19 | |
3b853983 |
20 | has 'witness_hash' => ( |
21 | traits => ['Hash'], |
22 | isa => 'HashRef[Text::Tradition::Witness]', |
8e1394aa |
23 | handles => { |
3b853983 |
24 | witness => 'get', |
25 | add_witness => 'set', |
26 | del_witness => 'delete', |
27 | has_witness => 'exists', |
28 | witnesses => 'values', |
8e1394aa |
29 | }, |
3b853983 |
30 | default => sub { {} }, |
8e1394aa |
31 | ); |
c5104dc0 |
32 | |
df6d9812 |
33 | has 'name' => ( |
34 | is => 'rw', |
35 | isa => 'Str', |
36 | default => 'Tradition', |
37 | ); |
56cf65bd |
38 | |
55bc8c78 |
39 | has 'language' => ( |
bbd064a9 |
40 | is => 'rw', |
55bc8c78 |
41 | isa => 'Str', |
fae52efd |
42 | predicate => 'has_language', |
55bc8c78 |
43 | ); |
44 | |
e0d617e6 |
45 | has 'stemmata' => ( |
46 | traits => ['Array'], |
47 | isa => 'ArrayRef[Text::Tradition::Stemma]', |
48 | handles => { |
7f52eac8 |
49 | stemmata => 'elements', |
e0d617e6 |
50 | _add_stemma => 'push', |
51 | stemma => 'get', |
52 | stemma_count => 'count', |
53 | clear_stemmata => 'clear', |
54 | }, |
c3c79612 |
55 | default => sub { [] }, |
56cf65bd |
56 | ); |
3b853983 |
57 | |
3579c22b |
58 | has '_initialized' => ( |
10943ab0 |
59 | is => 'ro', |
60 | isa => 'Bool', |
61 | default => undef, |
62 | writer => '_init_done', |
63 | ); |
64 | |
f54b1ba7 |
65 | has 'user' => ( |
66 | is => 'rw', |
67 | isa => 'Text::Tradition::User', |
68 | required => 0, |
69 | predicate => 'has_user', |
ec7ea4e6 |
70 | clearer => 'clear_user', |
d8a14401 |
71 | weak_ref => 1 |
f54b1ba7 |
72 | ); |
3724dfa7 |
73 | |
74 | has 'public' => ( |
75 | is => 'rw', |
76 | isa => 'Bool', |
77 | required => 0, |
78 | default => sub { 0; }, |
79 | ); |
80 | |
3b853983 |
81 | # Create the witness before trying to add it |
910a0a6d |
82 | around 'add_witness' => sub { |
83 | my $orig = shift; |
84 | my $self = shift; |
331c2dbf |
85 | # TODO allow add of a Witness object? |
fae52efd |
86 | my %args = @_ == 1 ? %{$_[0]} : @_; |
87 | $args{'tradition'} = $self; |
88 | $args{'language'} = $self->language |
89 | if( $self->language && !exists $args{'language'} ); |
90 | my $new_wit = Text::Tradition::Witness->new( %args ); |
3b853983 |
91 | $self->$orig( $new_wit->sigil => $new_wit ); |
910a0a6d |
92 | return $new_wit; |
93 | }; |
331c2dbf |
94 | |
3b853983 |
95 | # Allow deletion of witness by object as well as by sigil |
96 | around 'del_witness' => sub { |
97 | my $orig = shift; |
98 | my $self = shift; |
99 | my @key_args; |
100 | foreach my $arg ( @_ ) { |
101 | push( @key_args, |
102 | ref( $arg ) eq 'Text::Tradition::Witness' ? $arg->sigil : $arg ); |
103 | } |
104 | return $self->$orig( @key_args ); |
105 | }; |
106 | |
107 | # Don't allow an empty hash value |
108 | around 'witness' => sub { |
109 | my( $orig, $self, $arg ) = @_; |
110 | return unless $self->has_witness( $arg ); |
111 | return $self->$orig( $arg ); |
112 | }; |
113 | |
331c2dbf |
114 | =head1 NAME |
115 | |
116 | Text::Tradition - a software model for a set of collated texts |
117 | |
118 | =head1 SYNOPSIS |
119 | |
120 | use Text::Tradition; |
121 | my $t = Text::Tradition->new( |
122 | 'name' => 'this is a text', |
123 | 'input' => 'TEI', |
124 | 'file' => '/path/to/tei_parallel_seg_file.xml' ); |
125 | |
126 | my @text_wits = $t->witnesses(); |
127 | my $manuscript_a = $t->witness( 'A' ); |
82fa4d57 |
128 | |
129 | $t = Text::Tradition->new(); |
130 | $t->add_witness( 'sourcetype' => 'xmldesc', |
131 | 'file' => '/path/to/teitranscription.xml' ); |
132 | $t->add_witness( 'sourcetype => 'plaintext', 'sigil' => 'Q', |
133 | 'string' => 'The quick brown fox jumped over the lazy dogs' ); |
134 | ## TODO |
135 | $t->collate_texts; |
331c2dbf |
136 | |
137 | my $text_path_svg = $t->collation->as_svg(); |
138 | ## See Text::Tradition::Collation for more on text collation itself |
139 | |
140 | =head1 DESCRIPTION |
141 | |
142 | Text::Tradition is a library for representation and analysis of collated |
143 | texts, particularly medieval ones. A 'tradition' refers to the aggregation |
144 | of surviving versions of a text, generally preserved in multiple |
145 | manuscripts (or 'witnesses'). A Tradition object thus has one more more |
146 | Witnesses, as well as a Collation that represents the unity of all versions |
147 | of the text. |
148 | |
149 | =head1 METHODS |
150 | |
151 | =head2 new |
152 | |
153 | Creates and returns a new text tradition object. The following options are |
154 | accepted. |
155 | |
156 | General options: |
157 | |
158 | =over 4 |
159 | |
160 | =item B<name> - The name of the text. |
161 | |
162 | =back |
163 | |
164 | Initialization based on a collation file: |
165 | |
166 | =over 4 |
167 | |
168 | =item B<input> - The input format of the collation file. Can be one of the |
169 | following: |
170 | |
171 | =over 4 |
172 | |
173 | =item * Self - a GraphML format produced by this module |
174 | |
175 | =item * CollateX - a GraphML format produced by CollateX |
176 | |
177 | =item * CTE - a TEI XML format produced by Classical Text Editor |
178 | |
a731e73a |
179 | =item * JSON - an alignment table in JSON format, as produced by CollateX and other tools |
180 | |
331c2dbf |
181 | =item * KUL - a specific CSV format for variants, not documented here |
182 | |
183 | =item * TEI - a TEI parallel segmentation format file |
184 | |
185 | =item * Tabular - a comma- or tab-separated collation. Takes an additional |
186 | option, 'sep_char', which defaults to the tab character. |
187 | |
188 | =back |
189 | |
190 | =item B<file> - The name of the file that contains the data. One of 'file' |
191 | or 'string' should be specified. |
192 | |
193 | =item B<string> - A text string that contains the data. One of 'file' or |
194 | 'string' should be specified. |
195 | |
196 | =item B<base> - The name of a text file that contains the base text, to be |
197 | used with input formats that require it (currently only KUL). |
198 | |
199 | =back |
200 | |
201 | Initialization based on a list of witnesses [NOT YET IMPLEMENTED]: |
202 | |
203 | =over 4 |
204 | |
205 | =item B<witnesses> - A reference to an array of Text::Tradition::Witness |
206 | objects that carry the text to be collated. |
207 | |
208 | =item B<collator> - A reference to a collation program that will accept |
209 | Witness objects. |
210 | |
211 | =back |
212 | |
213 | =head2 B<witnesses> |
214 | |
215 | Return the Text::Tradition::Witness objects associated with this tradition, |
216 | as an array. |
217 | |
044d1e45 |
218 | =head2 B<witness>( $sigil ) |
219 | |
220 | Returns the Text::Tradition::Witness object whose sigil is $sigil, or undef |
221 | if there is no such object within the tradition. |
222 | |
331c2dbf |
223 | =head2 B<add_witness>( %opts ) |
224 | |
225 | Instantiate a new witness with the given options (see documentation for |
226 | Text::Tradition::Witness) and add it to the tradition. |
227 | |
044d1e45 |
228 | =head2 B<del_witness>( $sigil ) |
229 | |
230 | Delete the witness with the given sigil from the tradition. Returns the |
231 | witness object for the deleted witness. |
232 | |
331c2dbf |
233 | =begin testing |
234 | |
235 | use_ok( 'Text::Tradition', "can use module" ); |
236 | |
237 | my $t = Text::Tradition->new( 'name' => 'empty' ); |
238 | is( ref( $t ), 'Text::Tradition', "initialized an empty Tradition object" ); |
239 | is( $t->name, 'empty', "object has the right name" ); |
240 | is( scalar $t->witnesses, 0, "object has no witnesses" ); |
241 | |
242 | my $simple = 't/data/simple.txt'; |
243 | my $s = Text::Tradition->new( |
244 | 'name' => 'inline', |
245 | 'input' => 'Tabular', |
246 | 'file' => $simple, |
247 | ); |
248 | is( ref( $s ), 'Text::Tradition', "initialized a Tradition object" ); |
249 | is( $s->name, 'inline', "object has the right name" ); |
250 | is( scalar $s->witnesses, 3, "object has three witnesses" ); |
251 | |
044d1e45 |
252 | my $wit_a = $s->witness('A'); |
253 | is( ref( $wit_a ), 'Text::Tradition::Witness', "Found a witness A" ); |
254 | if( $wit_a ) { |
255 | is( $wit_a->sigil, 'A', "Witness A has the right sigil" ); |
256 | } |
257 | is( $s->witness('X'), undef, "There is no witness X" ); |
258 | ok( !exists $s->{'witnesses'}->{'X'}, "Witness key X not created" ); |
259 | |
82fa4d57 |
260 | my $wit_d = $s->add_witness( 'sigil' => 'D', 'sourcetype' => 'collation' ); |
044d1e45 |
261 | is( ref( $wit_d ), 'Text::Tradition::Witness', "new witness created" ); |
262 | is( $wit_d->sigil, 'D', "witness has correct sigil" ); |
331c2dbf |
263 | is( scalar $s->witnesses, 4, "object now has four witnesses" ); |
264 | |
3b853983 |
265 | my $del = $s->del_witness( 'D' ); |
044d1e45 |
266 | is( $del, $wit_d, "Deleted correct witness" ); |
3b853983 |
267 | is( scalar $s->witnesses, 3, "object has three witnesses again" ); |
268 | |
331c2dbf |
269 | # TODO test initialization by witness list when we have it |
270 | |
271 | =end testing |
272 | |
273 | =cut |
910a0a6d |
274 | |
df6d9812 |
275 | |
8e1394aa |
276 | sub BUILD { |
277 | my( $self, $init_args ) = @_; |
fae52efd |
278 | |
279 | # First, make a collation object. This will use only those arguments in |
280 | # init_args that apply to the collation. |
281 | my $collation = Text::Tradition::Collation->new( %$init_args, |
282 | 'tradition' => $self ); |
283 | $self->_save_collation( $collation ); |
c5104dc0 |
284 | |
fae52efd |
285 | if( exists $init_args->{'input'} ) { |
910a0a6d |
286 | # Call the appropriate parser on the given data |
a731e73a |
287 | my @format_standalone = qw/ Self CollateText CollateX CTE JSON TEI Tabular /; |
dfc37e38 |
288 | my @format_basetext = qw/ KUL /; |
289 | my $use_base; |
290 | my $format = $init_args->{'input'}; |
dfc37e38 |
291 | if( $format && !( grep { $_ eq $format } @format_standalone ) |
292 | && !( grep { $_ eq $format } @format_basetext ) ) { |
293 | warn "Unrecognized input format $format; not parsing"; |
910a0a6d |
294 | return; |
295 | } |
dfc37e38 |
296 | if( $format && grep { $_ eq $format } @format_basetext ) { |
297 | $use_base = 1; |
298 | if( !exists $init_args->{'base'} ) { |
299 | warn "Cannot make a collation from $format without a base text"; |
300 | return; |
301 | } |
302 | } |
4a8828f0 |
303 | |
910a0a6d |
304 | # Now do the parsing. |
910a0a6d |
305 | if( $format ) { |
dfc37e38 |
306 | if( $use_base ) { |
307 | $format = 'BaseText'; # Use the BaseText module for parsing, |
308 | # but retain the original input arg. |
910a0a6d |
309 | } |
310 | my $mod = "Text::Tradition::Parser::$format"; |
311 | load( $mod ); |
dfc37e38 |
312 | $mod->can('parse')->( $self, $init_args ); |
910a0a6d |
313 | } |
c5104dc0 |
314 | } |
10943ab0 |
315 | $self->_init_done( 1 ); |
fae52efd |
316 | return $self; |
317 | } |
318 | |
319 | =head2 add_json_witnesses( $jsonstring, $options ) |
320 | |
321 | Adds a set of witnesses from a JSON array specification. This is a wrapper |
322 | to parse the JSON and call add_witness (with the specified $options) for |
323 | each element therein. |
324 | |
325 | =cut |
326 | |
327 | sub add_json_witnesses { |
328 | my( $self, $jsonstr, $extraopts ) = @_; |
65ed66b9 |
329 | my $witarray = from_json( $jsonstr ); |
330 | foreach my $witspec ( @{$witarray->{witnesses}} ) { |
fae52efd |
331 | my $opts = $extraopts || {}; |
332 | $opts->{'sourcetype'} = 'json'; |
333 | $opts->{'object'} = $witspec; |
334 | $self->add_witness( $opts ); |
335 | } |
8e1394aa |
336 | } |
c5104dc0 |
337 | |
56cf65bd |
338 | =head2 add_stemma( $dotfile ) |
339 | |
340 | Initializes a Text::Tradition::Stemma object from the given dotfile, |
341 | and associates it with the tradition. |
342 | |
343 | =begin testing |
344 | |
345 | use Text::Tradition; |
346 | |
347 | my $t = Text::Tradition->new( |
348 | 'name' => 'simple test', |
349 | 'input' => 'Tabular', |
350 | 'file' => 't/data/simple.txt', |
351 | ); |
352 | |
c3c79612 |
353 | is( $t->stemma_count, 0, "No stemmas added yet" ); |
56cf65bd |
354 | my $s; |
9ba651b9 |
355 | ok( $s = $t->add_stemma( dotfile => 't/data/simple.dot' ), "Added a simple stemma" ); |
56cf65bd |
356 | is( ref( $s ), 'Text::Tradition::Stemma', "Got a stemma object returned" ); |
e0d617e6 |
357 | is( $t->stemma_count, 1, "Tradition claims to have a stemma" ); |
358 | is( $t->stemma(0), $s, "Tradition hands back the right stemma" ); |
56cf65bd |
359 | |
360 | =end testing |
361 | |
362 | =cut |
363 | |
364 | sub add_stemma { |
bffafb73 |
365 | my $self = shift; |
366 | my %opts = @_; |
367 | my $stemma_fh; |
368 | if( $opts{'dotfile'} ) { |
369 | open $stemma_fh, '<', $opts{'dotfile'} |
370 | or warn "Could not open file " . $opts{'dotfile'}; |
371 | } elsif( $opts{'dot'} ) { |
372 | my $str = $opts{'dot'}; |
373 | open $stemma_fh, '<', \$str; |
374 | } |
375 | # Assume utf-8 |
376 | binmode $stemma_fh, ':utf8'; |
56cf65bd |
377 | my $stemma = Text::Tradition::Stemma->new( |
56cf65bd |
378 | 'dot' => $stemma_fh ); |
379 | $self->_add_stemma( $stemma ) if $stemma; |
380 | return $stemma; |
381 | } |
382 | |
d3e7842a |
383 | sub lemmatize { |
384 | my $self = shift; |
385 | unless( $self->has_language ) { |
386 | warn "Please set a language to lemmatize a tradition"; |
387 | return; |
388 | } |
389 | my $mod = "Text::Tradition::Language::" . $self->language; |
390 | load( $mod ); |
391 | $mod->can( 'lemmatize' )->( $self ); |
392 | } |
393 | |
dd3b58b0 |
394 | no Moose; |
395 | __PACKAGE__->meta->make_immutable; |
331c2dbf |
396 | |
397 | |
398 | =head1 BUGS / TODO |
399 | |
400 | =over |
401 | |
402 | =item * Allow tradition to be initialized via passing to a collator. |
403 | |
404 | =back |
405 | |
406 | =head1 LICENSE |
407 | |
408 | This package is free software and is provided "as is" without express |
409 | or implied warranty. You can redistribute it and/or modify it under |
410 | the same terms as Perl itself. |
411 | |
412 | =head1 AUTHOR |
413 | |
414 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |