Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition; |
2 | |
4a8828f0 |
3 | use Module::Load; |
dd3b58b0 |
4 | use Moose; |
8e1394aa |
5 | use Text::Tradition::Collation; |
6 | use Text::Tradition::Witness; |
dd3b58b0 |
7 | |
331c2dbf |
8 | use vars qw( $VERSION ); |
9 | $VERSION = "0.1"; |
10 | |
dd3b58b0 |
11 | has 'collation' => ( |
8e1394aa |
12 | is => 'ro', |
13 | isa => 'Text::Tradition::Collation', |
14 | writer => '_save_collation', |
15 | ); |
dd3b58b0 |
16 | |
17 | has 'witnesses' => ( |
8e1394aa |
18 | traits => ['Array'], |
8e1394aa |
19 | isa => 'ArrayRef[Text::Tradition::Witness]', |
20 | handles => { |
910a0a6d |
21 | witnesses => 'elements', |
22 | add_witness => 'push', |
8e1394aa |
23 | }, |
4a8828f0 |
24 | default => sub { [] }, |
8e1394aa |
25 | ); |
c5104dc0 |
26 | |
df6d9812 |
27 | has 'name' => ( |
28 | is => 'rw', |
29 | isa => 'Str', |
30 | default => 'Tradition', |
31 | ); |
910a0a6d |
32 | |
33 | around 'add_witness' => sub { |
34 | my $orig = shift; |
35 | my $self = shift; |
331c2dbf |
36 | # TODO allow add of a Witness object? |
910a0a6d |
37 | my $new_wit = Text::Tradition::Witness->new( @_ ); |
38 | $self->$orig( $new_wit ); |
39 | return $new_wit; |
40 | }; |
331c2dbf |
41 | |
42 | =head1 NAME |
43 | |
44 | Text::Tradition - a software model for a set of collated texts |
45 | |
46 | =head1 SYNOPSIS |
47 | |
48 | use Text::Tradition; |
49 | my $t = Text::Tradition->new( |
50 | 'name' => 'this is a text', |
51 | 'input' => 'TEI', |
52 | 'file' => '/path/to/tei_parallel_seg_file.xml' ); |
53 | |
54 | my @text_wits = $t->witnesses(); |
55 | my $manuscript_a = $t->witness( 'A' ); |
56 | my $new_ms = $t->add_witness( 'sigil' => 'B' ); |
57 | |
58 | my $text_path_svg = $t->collation->as_svg(); |
59 | ## See Text::Tradition::Collation for more on text collation itself |
60 | |
61 | =head1 DESCRIPTION |
62 | |
63 | Text::Tradition is a library for representation and analysis of collated |
64 | texts, particularly medieval ones. A 'tradition' refers to the aggregation |
65 | of surviving versions of a text, generally preserved in multiple |
66 | manuscripts (or 'witnesses'). A Tradition object thus has one more more |
67 | Witnesses, as well as a Collation that represents the unity of all versions |
68 | of the text. |
69 | |
70 | =head1 METHODS |
71 | |
72 | =head2 new |
73 | |
74 | Creates and returns a new text tradition object. The following options are |
75 | accepted. |
76 | |
77 | General options: |
78 | |
79 | =over 4 |
80 | |
81 | =item B<name> - The name of the text. |
82 | |
83 | =back |
84 | |
85 | Initialization based on a collation file: |
86 | |
87 | =over 4 |
88 | |
89 | =item B<input> - The input format of the collation file. Can be one of the |
90 | following: |
91 | |
92 | =over 4 |
93 | |
94 | =item * Self - a GraphML format produced by this module |
95 | |
96 | =item * CollateX - a GraphML format produced by CollateX |
97 | |
98 | =item * CTE - a TEI XML format produced by Classical Text Editor |
99 | |
100 | =item * KUL - a specific CSV format for variants, not documented here |
101 | |
102 | =item * TEI - a TEI parallel segmentation format file |
103 | |
104 | =item * Tabular - a comma- or tab-separated collation. Takes an additional |
105 | option, 'sep_char', which defaults to the tab character. |
106 | |
107 | =back |
108 | |
109 | =item B<file> - The name of the file that contains the data. One of 'file' |
110 | or 'string' should be specified. |
111 | |
112 | =item B<string> - A text string that contains the data. One of 'file' or |
113 | 'string' should be specified. |
114 | |
115 | =item B<base> - The name of a text file that contains the base text, to be |
116 | used with input formats that require it (currently only KUL). |
117 | |
118 | =back |
119 | |
120 | Initialization based on a list of witnesses [NOT YET IMPLEMENTED]: |
121 | |
122 | =over 4 |
123 | |
124 | =item B<witnesses> - A reference to an array of Text::Tradition::Witness |
125 | objects that carry the text to be collated. |
126 | |
127 | =item B<collator> - A reference to a collation program that will accept |
128 | Witness objects. |
129 | |
130 | =back |
131 | |
132 | =head2 B<witnesses> |
133 | |
134 | Return the Text::Tradition::Witness objects associated with this tradition, |
135 | as an array. |
136 | |
137 | =head2 B<add_witness>( %opts ) |
138 | |
139 | Instantiate a new witness with the given options (see documentation for |
140 | Text::Tradition::Witness) and add it to the tradition. |
141 | |
142 | =begin testing |
143 | |
144 | use_ok( 'Text::Tradition', "can use module" ); |
145 | |
146 | my $t = Text::Tradition->new( 'name' => 'empty' ); |
147 | is( ref( $t ), 'Text::Tradition', "initialized an empty Tradition object" ); |
148 | is( $t->name, 'empty', "object has the right name" ); |
149 | is( scalar $t->witnesses, 0, "object has no witnesses" ); |
150 | |
151 | my $simple = 't/data/simple.txt'; |
152 | my $s = Text::Tradition->new( |
153 | 'name' => 'inline', |
154 | 'input' => 'Tabular', |
155 | 'file' => $simple, |
156 | ); |
157 | is( ref( $s ), 'Text::Tradition', "initialized a Tradition object" ); |
158 | is( $s->name, 'inline', "object has the right name" ); |
159 | is( scalar $s->witnesses, 3, "object has three witnesses" ); |
160 | |
161 | my $w = $s->add_witness( 'sigil' => 'D' ); |
162 | is( ref( $w ), 'Text::Tradition::Witness', "new witness created" ); |
163 | is( $w->sigil, 'D', "witness has correct sigil" ); |
164 | is( scalar $s->witnesses, 4, "object now has four witnesses" ); |
165 | |
166 | # TODO test initialization by witness list when we have it |
167 | |
168 | =end testing |
169 | |
170 | =cut |
910a0a6d |
171 | |
df6d9812 |
172 | |
8e1394aa |
173 | sub BUILD { |
174 | my( $self, $init_args ) = @_; |
c5104dc0 |
175 | |
8e1394aa |
176 | if( exists $init_args->{'witnesses'} ) { |
910a0a6d |
177 | # We got passed an uncollated list of witnesses. Make a |
178 | # witness object for each witness, and then send them to the |
179 | # collator. |
180 | my $autosigil = 0; |
181 | foreach my $wit ( %{$init_args->{'witnesses'}} ) { |
182 | # Each item in the list is either a string or an arrayref. |
183 | # If it's a string, it is a filename; if it's an arrayref, |
184 | # it is a tuple of 'sigil, file'. Handle either case. |
185 | my $args; |
186 | if( ref( $wit ) eq 'ARRAY' ) { |
187 | $args = { 'sigil' => $wit->[0], |
188 | 'file' => $wit->[1] }; |
189 | } else { |
190 | $args = { 'sigil' => chr( $autosigil+65 ), |
191 | 'file' => $wit }; |
192 | $autosigil++; |
193 | } |
194 | $self->witnesses->add_witness( $args ); |
195 | # TODO Now how to collate these? |
196 | } |
c5104dc0 |
197 | } else { |
910a0a6d |
198 | # Else we need to parse some collation data. Make a Collation object |
199 | my $collation = Text::Tradition::Collation->new( %$init_args, |
200 | 'tradition' => $self ); |
201 | $self->_save_collation( $collation ); |
4a8828f0 |
202 | |
910a0a6d |
203 | # Call the appropriate parser on the given data |
331c2dbf |
204 | my @format_standalone = qw/ Self CollateX CTE TEI Tabular /; |
dfc37e38 |
205 | my @format_basetext = qw/ KUL /; |
206 | my $use_base; |
207 | my $format = $init_args->{'input'}; |
dfc37e38 |
208 | if( $format && !( grep { $_ eq $format } @format_standalone ) |
209 | && !( grep { $_ eq $format } @format_basetext ) ) { |
210 | warn "Unrecognized input format $format; not parsing"; |
910a0a6d |
211 | return; |
212 | } |
dfc37e38 |
213 | if( $format && grep { $_ eq $format } @format_basetext ) { |
214 | $use_base = 1; |
215 | if( !exists $init_args->{'base'} ) { |
216 | warn "Cannot make a collation from $format without a base text"; |
217 | return; |
218 | } |
219 | } |
4a8828f0 |
220 | |
910a0a6d |
221 | # Now do the parsing. |
910a0a6d |
222 | if( $format ) { |
dfc37e38 |
223 | if( $use_base ) { |
224 | $format = 'BaseText'; # Use the BaseText module for parsing, |
225 | # but retain the original input arg. |
910a0a6d |
226 | } |
227 | my $mod = "Text::Tradition::Parser::$format"; |
228 | load( $mod ); |
dfc37e38 |
229 | $mod->can('parse')->( $self, $init_args ); |
910a0a6d |
230 | } |
c5104dc0 |
231 | } |
8e1394aa |
232 | } |
c5104dc0 |
233 | |
331c2dbf |
234 | =head2 B<witness>( $sigil ) |
235 | |
236 | Returns the Text::Tradition::Witness object whose sigil is $sigil, or undef |
237 | if there is no such object within the tradition. |
238 | |
239 | =begin testing |
240 | |
241 | use Text::Tradition; |
242 | |
243 | my $simple = 't/data/simple.txt'; |
244 | my $s = Text::Tradition->new( |
245 | 'name' => 'inline', |
246 | 'input' => 'Tabular', |
247 | 'file' => $simple, |
248 | ); |
249 | my $wit_a = $s->witness('A'); |
250 | is( ref( $wit_a ), 'Text::Tradition::Witness', "Found a witness A" ); |
251 | if( $wit_a ) { |
252 | is( $wit_a->sigil, 'A', "Witness A has the right sigil" ); |
253 | } |
254 | is( $s->witness('X'), undef, "There is no witness X" ); |
255 | |
256 | =end testing |
257 | |
258 | =cut |
259 | |
de51424a |
260 | sub witness { |
261 | my( $self, $sigil ) = @_; |
262 | my $requested_wit; |
910a0a6d |
263 | foreach my $wit ( $self->witnesses ) { |
ecff899f |
264 | if( $wit->sigil eq $sigil ) { |
265 | $requested_wit = $wit; |
266 | last; |
267 | } |
de51424a |
268 | } |
e2902068 |
269 | # We depend on an undef return value for no such witness. |
270 | # warn "No such witness $sigil" unless $requested_wit; |
de51424a |
271 | return $requested_wit; |
272 | } |
ecff899f |
273 | |
dd3b58b0 |
274 | no Moose; |
275 | __PACKAGE__->meta->make_immutable; |
331c2dbf |
276 | |
277 | |
278 | =head1 BUGS / TODO |
279 | |
280 | =over |
281 | |
282 | =item * Allow tradition to be initialized via passing to a collator. |
283 | |
284 | =back |
285 | |
286 | =head1 LICENSE |
287 | |
288 | This package is free software and is provided "as is" without express |
289 | or implied warranty. You can redistribute it and/or modify it under |
290 | the same terms as Perl itself. |
291 | |
292 | =head1 AUTHOR |
293 | |
294 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |