Commit | Line | Data |
7e17346f |
1 | package Text::Tradition::Analysis::Result; |
2 | |
3 | use Moose; |
a42a164c |
4 | use Digest::MD5 qw/ md5_hex /; |
5 | use Encode qw/ encode_utf8 /; |
7e17346f |
6 | use JSON qw/ to_json /; |
7 | use Set::Scalar; |
8 | use Text::Tradition::Error; |
9 | |
10 | =head1 NAME |
11 | |
12 | Text::Tradition::Analysis::Result - object to express an IDP calculation result |
13 | for a particular graph problem. |
14 | |
15 | =head1 DESCRIPTION |
16 | |
17 | Given a graph (expressing a stemma hypothesis) and a set of witness groupings |
18 | (expressing variation in reading between witnesses related according to the |
19 | stemma hypothesis), it is possible to calculate certain properties of how the |
20 | readings might be related to each other. This calculation depends on a custom |
21 | program run under the IDP system [TODO URL]. As the problem is NP-hard, the |
22 | calculation can take a long time. The purpose of this object is to allow storage |
23 | of calculated results in a database. |
24 | |
25 | For each graph problem, the following features can be calculated: |
26 | |
27 | =over 4 |
28 | |
29 | =item * Whether the reading groups form a genealogical pattern on the stemma. |
30 | |
31 | =item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma. |
32 | |
33 | =item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's. |
34 | |
35 | =back |
36 | |
37 | =head1 CONSTRUCTOR |
38 | |
39 | =head2 new |
40 | |
41 | Creates a new graph problem. Requires two properties: |
42 | |
43 | =over 4 |
44 | |
45 | =item * setlist - An array of arrays expressing the witness sets. The inner |
46 | arrays will be converted to Set::Scalar objects, and must have distinct members. |
47 | |
48 | =item * graph - A dot description of a graph (e.g. the output of a call to |
49 | Text::Tradition::Stemma::editable) against which the sets will be analyzed. |
50 | |
51 | =back |
52 | |
0a17afe9 |
53 | All other properties should be calculated by IDP rather than set manually. |
54 | These include: |
55 | |
56 | =over 4 |
57 | |
58 | =item * is_genealogical - Boolean, indicating whether the witness sets form |
59 | genealogical groupings on the graph. |
60 | |
61 | =item * status - String to indicate whether a solution has been calculated |
62 | for this analysis problem. Recognized values are "OK" (calculated) and |
63 | "running" (being calculated now). All other values, or no value, imply that |
64 | the calculation has yet to take place. |
65 | |
66 | =item * groupings - These are extended (if necessary) versions of the witness |
67 | sets, which include the hypothetical witnesses necessary to minimize coincidence |
68 | of variation. |
69 | |
70 | =item * classes - These are key/value pairs, keyed by witness, indicating for |
71 | each witness whether it is the source of a reading variant, whether it represents |
72 | a reversion to an ancestor (but not parent) reading, or whether its reading |
73 | follows that of a parent on the graph. |
74 | |
75 | =back |
76 | |
77 | =begin testing |
78 | |
85a74a8d |
79 | use Set::Scalar; |
0a17afe9 |
80 | use Test::More::UTF8; |
81 | use Text::Tradition; |
82 | use TryCatch; |
83 | use_ok( 'Text::Tradition::Analysis::Result' ); |
84 | |
85 | # Make a problem with a graph and a set of groupings |
86 | |
87 | my $datafile = 't/data/florilegium_tei_ps.xml'; |
88 | my $tradition = Text::Tradition->new( 'input' => 'TEI', |
89 | 'name' => 'flortest', |
90 | 'file' => $datafile ); |
951ddfe8 |
91 | $tradition->enable_stemmata; |
0a17afe9 |
92 | my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); |
93 | |
94 | my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ]; |
95 | my $extant = {}; |
96 | foreach my $set ( @$sets ) { |
97 | map { $extant->{$_} = 1 } @$set; |
98 | } |
99 | my $sitgraph = $s->editable( { extant => $extant } ); |
100 | my $result = Text::Tradition::Analysis::Result->new( |
101 | graph => $sitgraph, |
102 | setlist => $sets ); |
103 | is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" ); |
104 | is( $result->graph, $sitgraph, "Got identical graph string back" ); |
105 | is( $result->status, "new", "Calculation status of result set correctly" ); |
106 | my @rsets = $result->sets; |
107 | is( $rsets[0], '(A B C P S T)', "First set is biggest set" ); |
108 | is( $rsets[1], '(D Q)', "Second set is by alphabetical order" ); |
109 | is( $rsets[2], '(F H)', "Second set is by alphabetical order" ); |
110 | |
111 | # Add some calculation values |
112 | $result->is_genealogical( 1 ); |
113 | $result->record_grouping( [ qw/ 4 5 D Q / ] ); |
114 | try { |
115 | $result->record_grouping( [ qw/ 3 4 D H / ] ); |
116 | ok( 0, "Recorded a grouping that does not match the input sets" ); |
117 | } catch ( Text::Tradition::Error $e ) { |
118 | like( $e->message, qr/Failed to find witness set that is a subset of/, |
119 | "Correct error thrown on bad record_grouping attempt" ); |
120 | } |
85a74a8d |
121 | # Test manually setting an out-of-range group |
122 | try { |
123 | $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) ); |
124 | ok( 0, "Set a grouping at an invalid index" ); |
125 | } catch ( Text::Tradition::Error $e ) { |
126 | is( $e->message, 'Set / group index 3 out of range for set_grouping', |
127 | "Caught attempt to set grouping at invalid index" ); |
128 | } |
0a17afe9 |
129 | $result->record_grouping( [ qw/ 3 F H / ] ); |
130 | my $gp1 = $result->grouping(1); |
131 | is( $result->minimum_grouping_for( $rsets[1] ), $gp1, |
132 | "Found a minimum grouping for D Q" ); |
133 | is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" ); |
134 | is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0], |
135 | "Default minimum grouping found for biggest group" ); |
136 | $result->record_grouping( [ qw/ 1 α δ A B C P S T / ] ); |
137 | my %classes = ( |
138 | α => 'source', |
139 | 3 => 'source', |
140 | 4 => 'source' ); |
141 | foreach my $gp ( $result->groupings ) { |
142 | map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp; |
143 | } |
144 | foreach my $gp ( $result->groupings ) { |
145 | foreach my $wit ( @$gp ) { |
146 | my $expected = $classes{$wit} || 'copy'; |
147 | is( $result->class( $wit ), $expected, "Got expected witness class for $wit" ); |
148 | } |
149 | } |
150 | |
151 | # Now write it out to JSON |
152 | my $struct = $result->TO_JSON; |
153 | my $newresult = Text::Tradition::Analysis::Result->new( $struct ); |
154 | is( $result->object_key, $newresult->object_key, |
155 | "Object key stayed constant on export/import" ); |
156 | my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets ); |
157 | is( $problem->object_key, $result->object_key, |
158 | "Object key stayed constant for newly created problem" ); |
159 | |
160 | |
161 | =end testing |
162 | |
163 | =head1 METHODS |
164 | |
165 | =head2 $self->has_class( $witness ) |
166 | =head2 $self->class( $witness ) |
167 | |
168 | If a class has been calculated for the given witness, has_class returns true |
169 | and class returns the calculated answer. |
170 | |
7e17346f |
171 | =cut |
172 | |
173 | has 'setlist' => ( |
174 | traits => ['Array'], |
175 | isa => 'ArrayRef[Set::Scalar]', |
176 | handles => { |
177 | sets => 'elements', |
178 | set_index => 'first_index', |
179 | }, |
180 | required => 1 |
181 | ); |
182 | |
183 | has 'graph' => ( |
184 | is => 'ro', |
185 | isa => 'Str', |
186 | required => 1 |
187 | ); |
188 | |
74038ae5 |
189 | has 'status' => ( |
190 | is => 'rw', |
191 | isa => 'Str' |
192 | ); |
193 | |
7e17346f |
194 | has 'is_genealogical' => ( |
195 | is => 'rw', |
196 | isa => 'Bool', |
197 | predicate => 'has_genealogical_result' |
198 | ); |
199 | |
200 | has 'groupinglist' => ( |
201 | traits => ['Array'], |
202 | isa => 'ArrayRef[Set::Scalar]', |
203 | handles => { |
204 | groupings => 'elements', |
7e17346f |
205 | _set_grouping => 'set', |
206 | grouping => 'get', |
207 | }, |
208 | default => sub { [] } |
209 | ); |
210 | |
211 | has 'classlist' => ( |
212 | traits => ['Hash'], |
213 | isa => 'HashRef[Str]', |
214 | handles => { |
215 | class => 'get', |
216 | has_class => 'exists', |
217 | set_class => 'set', |
218 | classes => 'elements', |
219 | assigned_wits => 'keys', |
220 | }, |
221 | ); |
222 | |
223 | around BUILDARGS => sub { |
224 | my $orig = shift; |
225 | my $class = shift; |
226 | my $args = @_ == 1 ? $_[0] : { @_ }; |
227 | |
228 | # Convert the set list into a list of Set::Scalars, ordered first by size and |
229 | # then alphabetically by first-sorted. |
70406673 |
230 | throw( "Must specify a set list to Analysis::Result->new()" ) |
7e17346f |
231 | unless ref( $args->{'setlist'} ) eq 'ARRAY'; |
70406673 |
232 | throw( "Empty set list specified to Analysis::Result->new()" ) |
03c0a7d7 |
233 | unless @{$args->{'setlist'}}; |
a42a164c |
234 | # Order the sets and make sure they are all distinct Set::Scalars. |
b42d7113 |
235 | $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) } |
236 | _check_set_args( $args->{'setlist'} ) ]; |
0a17afe9 |
237 | if( exists $args->{'groupinglist'} ) { |
238 | $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ]; |
239 | } |
7e17346f |
240 | |
c90ef1a3 |
241 | # If we have been passed a Text::Tradition::Stemma or a Graph, save only |
242 | # its string. |
243 | if( ref( $args->{'graph'} ) ) { |
7e17346f |
244 | my $st = delete $args->{'graph'}; |
c90ef1a3 |
245 | my $type = ref( $st ); |
246 | my $gopt = { linesep => ' ' }; |
247 | if( $type eq 'Text::Tradition::Stemma' ) { |
248 | $args->{'graph'} = $st->editable( $gopt ); |
249 | } elsif( $type eq 'Graph' ) { |
250 | $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt ); |
251 | } else { |
70406673 |
252 | throw( "Passed argument to graph that is neither Stemma nor Graph" ); |
c90ef1a3 |
253 | } |
254 | } |
74038ae5 |
255 | |
256 | # If our only args are graph and setlist, then status should be 'new' |
257 | if( scalar keys %$args == 2 ) { |
258 | $args->{'status'} = 'new'; |
259 | } |
c90ef1a3 |
260 | |
7e17346f |
261 | return $class->$orig( $args ); |
262 | }; |
263 | |
a42a164c |
264 | sub _check_set_args { |
265 | my $setlist = shift; |
266 | my @sets; |
267 | foreach my $set ( @{$setlist} ) { |
268 | my $s = $set; |
269 | # Check uniqueness of the current set |
270 | if( ref( $set ) ne 'Set::Scalar' ) { |
271 | $s = Set::Scalar->new( @$set ); |
70406673 |
272 | throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" ) |
a42a164c |
273 | unless @$set == $s->elements; |
274 | } |
275 | # Check distinctness of the set from all other sets given so far |
276 | foreach my $ps ( @sets ) { |
70406673 |
277 | throw( "Two sets $s / $ps are not disjoint" ) |
a42a164c |
278 | unless $s->is_disjoint( $ps ); |
279 | } |
280 | # Save the set. |
281 | push( @sets, $s ); |
282 | } |
b42d7113 |
283 | return @sets; |
a42a164c |
284 | } |
285 | |
7e17346f |
286 | sub BUILD { |
287 | my $self = shift; |
288 | |
289 | # Initialize the groupings array |
85a74a8d |
290 | my @sets = $self->sets; |
291 | foreach my $idx( 0 .. $#sets ) { |
292 | unless( $self->grouping( $idx ) ) { |
6724a135 |
293 | my $g = $sets[$idx]->clone(); |
294 | $self->_set_grouping( $idx, $g ); |
85a74a8d |
295 | } |
296 | } |
7e17346f |
297 | } |
298 | |
85a74a8d |
299 | before '_set_grouping' => sub { |
300 | my $self = shift; |
301 | my $idx = $_[0]; |
302 | my $max = scalar $self->sets; |
303 | if( $idx >= $max ) { |
304 | throw( "Set / group index $idx out of range for set_grouping" ); |
305 | } |
306 | }; |
307 | |
0a17afe9 |
308 | =head2 $self->object_key |
7e17346f |
309 | |
0a17afe9 |
310 | Returns a unique key that can be used to look up this graph/set combination in |
311 | a database. Currently an MD5 hash of the request_string. |
312 | |
313 | =cut |
1e4b66f4 |
314 | |
a42a164c |
315 | sub object_key { |
316 | my $self = shift; |
317 | return md5_hex( encode_utf8( $self->request_string ) ); |
318 | } |
319 | |
0a17afe9 |
320 | =head2 $self->request_string |
321 | |
322 | A request string is the graph followed by the groups, which should form a unique |
323 | key for the result. |
324 | |
325 | =cut |
326 | |
7e17346f |
327 | sub request_string { |
328 | my $self = shift; |
03c0a7d7 |
329 | return $self->graph . '//' . join( ',', $self->sets ); |
7e17346f |
330 | } |
331 | |
0a17afe9 |
332 | =head2 by_size_and_alpha |
333 | |
334 | A useful utility function to sort Set::Scalar objects first in descending |
335 | order by size, then in ascending alphabetical order by first element (i.e. |
336 | by stringification.) |
337 | |
338 | =cut |
339 | |
7e17346f |
340 | sub by_size_and_alpha { |
341 | my( $a, $b ) = @_; |
342 | my $size = $b->members <=> $a->members; |
343 | return $size if $size; |
344 | # Then sort by alphabetical order of set elements. |
345 | return "$a" cmp "$b"; |
346 | } |
347 | |
0a17afe9 |
348 | =head2 $self->sources |
349 | |
350 | Return all 'source' class witnesses in these sets for this graph. |
351 | |
352 | =cut |
353 | |
7e17346f |
354 | sub sources { |
355 | my $self = shift; |
356 | my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits; |
357 | return @sources; |
358 | } |
359 | |
0a17afe9 |
360 | =head2 $self->minimum_grouping_for( $set ) |
361 | |
362 | Return the minimum grouping (including necessary hypothetical witnesses) for |
363 | the witness set specified. Will return undef if $set does not match one of |
364 | the defined witness sets in $self->sets. |
365 | |
366 | =cut |
367 | |
7e17346f |
368 | # Look for a matching set in our setlist, and return its corresponding group |
369 | sub minimum_grouping_for { |
370 | my( $self, $set ) = @_; |
371 | my $midx = $self->set_index( sub { "$set" eq "$_" } ); |
372 | return undef unless defined $midx; |
373 | return $self->grouping( $midx ); |
374 | } |
375 | |
0a17afe9 |
376 | =head1 CALCULATION STORAGE METHODS |
377 | |
378 | =head2 $self->is_genealogical( $bool ) |
379 | |
380 | Record that the sets are genealogical for this graph. |
381 | |
382 | =head2 $self->set_class( $witness, $class ) |
383 | |
384 | Record that the witness in question is of the given class. |
385 | |
386 | =head2 $self->record_grouping( $group ) |
387 | |
388 | Record that the group in question (either an arrayref or a Set::Scalar) forms |
389 | a minimum grouping on the graph. Will throw an error unless the group is a |
390 | (non-proper) superset of an existing witness set. |
391 | |
392 | =cut |
393 | |
394 | sub record_grouping { |
395 | my( $self, $group ) = @_; |
396 | unless( ref( $group ) eq 'Set::Scalar' ) { |
397 | my $s = Set::Scalar->new( @$group ); |
398 | $group = $s; |
399 | } |
400 | # Find the set that is a subset of this group, and record it in the |
401 | # correct spot in our groupinglist. |
402 | my $idx = 0; |
403 | foreach my $set ( $self->sets ) { |
404 | if( _is_subset( $set, $group ) ) { |
405 | $self->_set_grouping( $idx, $group ); |
406 | last; |
407 | } |
408 | $idx++; |
409 | } |
410 | if( $idx == scalar( $self->sets ) ) { |
411 | throw( "Failed to find witness set that is a subset of $group" ); |
412 | } |
413 | } |
414 | |
415 | sub _is_subset { |
416 | # A replacement for the stupid Set::Scalar::is_subset |
417 | my( $set1, $set2 ) = @_; |
418 | my %all; |
419 | map { $all{$_} = 1 } $set2->members; |
420 | foreach my $m ( $set1->members ) { |
421 | return 0 unless $all{$m}; |
422 | } |
423 | return 1; |
424 | } |
425 | |
a42a164c |
426 | sub TO_JSON { |
427 | my $self = shift; |
0a17afe9 |
428 | # Required values: graph and setlist |
a42a164c |
429 | my $data = { |
430 | graph => $self->graph, |
431 | setlist => [], |
a42a164c |
432 | }; |
a42a164c |
433 | foreach my $set ( $self->sets ) { |
434 | push( @{$data->{setlist}}, [ $set->members ] ); |
435 | } |
0a17afe9 |
436 | # Scalar values, if they are set |
437 | $data->{is_genealogical} = 1 if $self->is_genealogical; |
438 | $data->{status} = $self->status if $self->status; |
439 | |
440 | # Set values, if they exist |
441 | $data->{groupinglist} = [] if $self->groupings; |
a42a164c |
442 | foreach my $group ( $self->groupings ) { |
443 | push( @{$data->{groupinglist}}, [ $group->members ] ); |
444 | } |
0a17afe9 |
445 | $data->{classlist} = {} if $self->assigned_wits; |
a42a164c |
446 | foreach my $wit ( $self->assigned_wits ) { |
447 | $data->{classlist}->{$wit} = $self->class( $wit ); |
448 | } |
449 | return $data; |
450 | } |
451 | |
7e17346f |
452 | sub throw { |
453 | Text::Tradition::Error->throw( |
454 | 'ident' => 'Analysis::Result error', |
455 | 'message' => $_[0], |
456 | ); |
457 | } |
458 | |
459 | no Moose; |
460 | __PACKAGE__->meta->make_immutable; |
461 | |
462 | 1; |