Commit | Line | Data |
7e17346f |
1 | package Text::Tradition::Analysis::Result; |
2 | |
3 | use Moose; |
a42a164c |
4 | use Digest::MD5 qw/ md5_hex /; |
5 | use Encode qw/ encode_utf8 /; |
7e17346f |
6 | use JSON qw/ to_json /; |
7 | use Set::Scalar; |
8 | use Text::Tradition::Error; |
9 | |
10 | =head1 NAME |
11 | |
12 | Text::Tradition::Analysis::Result - object to express an IDP calculation result |
13 | for a particular graph problem. |
14 | |
15 | =head1 DESCRIPTION |
16 | |
17 | Given a graph (expressing a stemma hypothesis) and a set of witness groupings |
18 | (expressing variation in reading between witnesses related according to the |
19 | stemma hypothesis), it is possible to calculate certain properties of how the |
20 | readings might be related to each other. This calculation depends on a custom |
21 | program run under the IDP system [TODO URL]. As the problem is NP-hard, the |
22 | calculation can take a long time. The purpose of this object is to allow storage |
23 | of calculated results in a database. |
24 | |
25 | For each graph problem, the following features can be calculated: |
26 | |
27 | =over 4 |
28 | |
29 | =item * Whether the reading groups form a genealogical pattern on the stemma. |
30 | |
31 | =item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma. |
32 | |
33 | =item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's. |
34 | |
35 | =back |
36 | |
37 | =head1 CONSTRUCTOR |
38 | |
39 | =head2 new |
40 | |
41 | Creates a new graph problem. Requires two properties: |
42 | |
43 | =over 4 |
44 | |
45 | =item * setlist - An array of arrays expressing the witness sets. The inner |
46 | arrays will be converted to Set::Scalar objects, and must have distinct members. |
47 | |
48 | =item * graph - A dot description of a graph (e.g. the output of a call to |
49 | Text::Tradition::Stemma::editable) against which the sets will be analyzed. |
50 | |
51 | =back |
52 | |
0a17afe9 |
53 | All other properties should be calculated by IDP rather than set manually. |
54 | These include: |
55 | |
56 | =over 4 |
57 | |
58 | =item * is_genealogical - Boolean, indicating whether the witness sets form |
59 | genealogical groupings on the graph. |
60 | |
61 | =item * status - String to indicate whether a solution has been calculated |
62 | for this analysis problem. Recognized values are "OK" (calculated) and |
63 | "running" (being calculated now). All other values, or no value, imply that |
64 | the calculation has yet to take place. |
65 | |
66 | =item * groupings - These are extended (if necessary) versions of the witness |
67 | sets, which include the hypothetical witnesses necessary to minimize coincidence |
68 | of variation. |
69 | |
70 | =item * classes - These are key/value pairs, keyed by witness, indicating for |
71 | each witness whether it is the source of a reading variant, whether it represents |
72 | a reversion to an ancestor (but not parent) reading, or whether its reading |
73 | follows that of a parent on the graph. |
74 | |
75 | =back |
76 | |
77 | =begin testing |
78 | |
85a74a8d |
79 | use Set::Scalar; |
0a17afe9 |
80 | use Test::More::UTF8; |
81 | use Text::Tradition; |
82 | use TryCatch; |
83 | use_ok( 'Text::Tradition::Analysis::Result' ); |
84 | |
85 | # Make a problem with a graph and a set of groupings |
86 | |
87 | my $datafile = 't/data/florilegium_tei_ps.xml'; |
88 | my $tradition = Text::Tradition->new( 'input' => 'TEI', |
89 | 'name' => 'flortest', |
90 | 'file' => $datafile ); |
91 | my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); |
92 | |
93 | my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ]; |
94 | my $extant = {}; |
95 | foreach my $set ( @$sets ) { |
96 | map { $extant->{$_} = 1 } @$set; |
97 | } |
98 | my $sitgraph = $s->editable( { extant => $extant } ); |
99 | my $result = Text::Tradition::Analysis::Result->new( |
100 | graph => $sitgraph, |
101 | setlist => $sets ); |
102 | is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" ); |
103 | is( $result->graph, $sitgraph, "Got identical graph string back" ); |
104 | is( $result->status, "new", "Calculation status of result set correctly" ); |
105 | my @rsets = $result->sets; |
106 | is( $rsets[0], '(A B C P S T)', "First set is biggest set" ); |
107 | is( $rsets[1], '(D Q)', "Second set is by alphabetical order" ); |
108 | is( $rsets[2], '(F H)', "Second set is by alphabetical order" ); |
109 | |
110 | # Add some calculation values |
111 | $result->is_genealogical( 1 ); |
112 | $result->record_grouping( [ qw/ 4 5 D Q / ] ); |
113 | try { |
114 | $result->record_grouping( [ qw/ 3 4 D H / ] ); |
115 | ok( 0, "Recorded a grouping that does not match the input sets" ); |
116 | } catch ( Text::Tradition::Error $e ) { |
117 | like( $e->message, qr/Failed to find witness set that is a subset of/, |
118 | "Correct error thrown on bad record_grouping attempt" ); |
119 | } |
85a74a8d |
120 | # Test manually setting an out-of-range group |
121 | try { |
122 | $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) ); |
123 | ok( 0, "Set a grouping at an invalid index" ); |
124 | } catch ( Text::Tradition::Error $e ) { |
125 | is( $e->message, 'Set / group index 3 out of range for set_grouping', |
126 | "Caught attempt to set grouping at invalid index" ); |
127 | } |
0a17afe9 |
128 | $result->record_grouping( [ qw/ 3 F H / ] ); |
129 | my $gp1 = $result->grouping(1); |
130 | is( $result->minimum_grouping_for( $rsets[1] ), $gp1, |
131 | "Found a minimum grouping for D Q" ); |
132 | is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" ); |
133 | is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0], |
134 | "Default minimum grouping found for biggest group" ); |
135 | $result->record_grouping( [ qw/ 1 α δ A B C P S T / ] ); |
136 | my %classes = ( |
137 | α => 'source', |
138 | 3 => 'source', |
139 | 4 => 'source' ); |
140 | foreach my $gp ( $result->groupings ) { |
141 | map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp; |
142 | } |
143 | foreach my $gp ( $result->groupings ) { |
144 | foreach my $wit ( @$gp ) { |
145 | my $expected = $classes{$wit} || 'copy'; |
146 | is( $result->class( $wit ), $expected, "Got expected witness class for $wit" ); |
147 | } |
148 | } |
149 | |
150 | # Now write it out to JSON |
151 | my $struct = $result->TO_JSON; |
152 | my $newresult = Text::Tradition::Analysis::Result->new( $struct ); |
153 | is( $result->object_key, $newresult->object_key, |
154 | "Object key stayed constant on export/import" ); |
155 | my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets ); |
156 | is( $problem->object_key, $result->object_key, |
157 | "Object key stayed constant for newly created problem" ); |
158 | |
159 | |
160 | =end testing |
161 | |
162 | =head1 METHODS |
163 | |
164 | =head2 $self->has_class( $witness ) |
165 | =head2 $self->class( $witness ) |
166 | |
167 | If a class has been calculated for the given witness, has_class returns true |
168 | and class returns the calculated answer. |
169 | |
7e17346f |
170 | =cut |
171 | |
172 | has 'setlist' => ( |
173 | traits => ['Array'], |
174 | isa => 'ArrayRef[Set::Scalar]', |
175 | handles => { |
176 | sets => 'elements', |
177 | set_index => 'first_index', |
178 | }, |
179 | required => 1 |
180 | ); |
181 | |
182 | has 'graph' => ( |
183 | is => 'ro', |
184 | isa => 'Str', |
185 | required => 1 |
186 | ); |
187 | |
74038ae5 |
188 | has 'status' => ( |
189 | is => 'rw', |
190 | isa => 'Str' |
191 | ); |
192 | |
7e17346f |
193 | has 'is_genealogical' => ( |
194 | is => 'rw', |
195 | isa => 'Bool', |
196 | predicate => 'has_genealogical_result' |
197 | ); |
198 | |
199 | has 'groupinglist' => ( |
200 | traits => ['Array'], |
201 | isa => 'ArrayRef[Set::Scalar]', |
202 | handles => { |
203 | groupings => 'elements', |
7e17346f |
204 | _set_grouping => 'set', |
205 | grouping => 'get', |
206 | }, |
207 | default => sub { [] } |
208 | ); |
209 | |
210 | has 'classlist' => ( |
211 | traits => ['Hash'], |
212 | isa => 'HashRef[Str]', |
213 | handles => { |
214 | class => 'get', |
215 | has_class => 'exists', |
216 | set_class => 'set', |
217 | classes => 'elements', |
218 | assigned_wits => 'keys', |
219 | }, |
220 | ); |
221 | |
222 | around BUILDARGS => sub { |
223 | my $orig = shift; |
224 | my $class = shift; |
225 | my $args = @_ == 1 ? $_[0] : { @_ }; |
226 | |
227 | # Convert the set list into a list of Set::Scalars, ordered first by size and |
228 | # then alphabetically by first-sorted. |
70406673 |
229 | throw( "Must specify a set list to Analysis::Result->new()" ) |
7e17346f |
230 | unless ref( $args->{'setlist'} ) eq 'ARRAY'; |
70406673 |
231 | throw( "Empty set list specified to Analysis::Result->new()" ) |
03c0a7d7 |
232 | unless @{$args->{'setlist'}}; |
a42a164c |
233 | # Order the sets and make sure they are all distinct Set::Scalars. |
b42d7113 |
234 | $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) } |
235 | _check_set_args( $args->{'setlist'} ) ]; |
0a17afe9 |
236 | if( exists $args->{'groupinglist'} ) { |
237 | $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ]; |
238 | } |
7e17346f |
239 | |
c90ef1a3 |
240 | # If we have been passed a Text::Tradition::Stemma or a Graph, save only |
241 | # its string. |
242 | if( ref( $args->{'graph'} ) ) { |
7e17346f |
243 | my $st = delete $args->{'graph'}; |
c90ef1a3 |
244 | my $type = ref( $st ); |
245 | my $gopt = { linesep => ' ' }; |
246 | if( $type eq 'Text::Tradition::Stemma' ) { |
247 | $args->{'graph'} = $st->editable( $gopt ); |
248 | } elsif( $type eq 'Graph' ) { |
249 | $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt ); |
250 | } else { |
70406673 |
251 | throw( "Passed argument to graph that is neither Stemma nor Graph" ); |
c90ef1a3 |
252 | } |
253 | } |
74038ae5 |
254 | |
255 | # If our only args are graph and setlist, then status should be 'new' |
256 | if( scalar keys %$args == 2 ) { |
257 | $args->{'status'} = 'new'; |
258 | } |
c90ef1a3 |
259 | |
7e17346f |
260 | return $class->$orig( $args ); |
261 | }; |
262 | |
a42a164c |
263 | sub _check_set_args { |
264 | my $setlist = shift; |
265 | my @sets; |
266 | foreach my $set ( @{$setlist} ) { |
267 | my $s = $set; |
268 | # Check uniqueness of the current set |
269 | if( ref( $set ) ne 'Set::Scalar' ) { |
270 | $s = Set::Scalar->new( @$set ); |
70406673 |
271 | throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" ) |
a42a164c |
272 | unless @$set == $s->elements; |
273 | } |
274 | # Check distinctness of the set from all other sets given so far |
275 | foreach my $ps ( @sets ) { |
70406673 |
276 | throw( "Two sets $s / $ps are not disjoint" ) |
a42a164c |
277 | unless $s->is_disjoint( $ps ); |
278 | } |
279 | # Save the set. |
280 | push( @sets, $s ); |
281 | } |
b42d7113 |
282 | return @sets; |
a42a164c |
283 | } |
284 | |
7e17346f |
285 | sub BUILD { |
286 | my $self = shift; |
287 | |
288 | # Initialize the groupings array |
85a74a8d |
289 | my @sets = $self->sets; |
290 | foreach my $idx( 0 .. $#sets ) { |
291 | unless( $self->grouping( $idx ) ) { |
292 | $self->_set_grouping( $idx, $sets[$idx] ); |
293 | } |
294 | } |
7e17346f |
295 | } |
296 | |
85a74a8d |
297 | before '_set_grouping' => sub { |
298 | my $self = shift; |
299 | my $idx = $_[0]; |
300 | my $max = scalar $self->sets; |
301 | if( $idx >= $max ) { |
302 | throw( "Set / group index $idx out of range for set_grouping" ); |
303 | } |
304 | }; |
305 | |
0a17afe9 |
306 | =head2 $self->object_key |
7e17346f |
307 | |
0a17afe9 |
308 | Returns a unique key that can be used to look up this graph/set combination in |
309 | a database. Currently an MD5 hash of the request_string. |
310 | |
311 | =cut |
1e4b66f4 |
312 | |
a42a164c |
313 | sub object_key { |
314 | my $self = shift; |
315 | return md5_hex( encode_utf8( $self->request_string ) ); |
316 | } |
317 | |
0a17afe9 |
318 | =head2 $self->request_string |
319 | |
320 | A request string is the graph followed by the groups, which should form a unique |
321 | key for the result. |
322 | |
323 | =cut |
324 | |
7e17346f |
325 | sub request_string { |
326 | my $self = shift; |
03c0a7d7 |
327 | return $self->graph . '//' . join( ',', $self->sets ); |
7e17346f |
328 | } |
329 | |
0a17afe9 |
330 | =head2 by_size_and_alpha |
331 | |
332 | A useful utility function to sort Set::Scalar objects first in descending |
333 | order by size, then in ascending alphabetical order by first element (i.e. |
334 | by stringification.) |
335 | |
336 | =cut |
337 | |
7e17346f |
338 | sub by_size_and_alpha { |
339 | my( $a, $b ) = @_; |
340 | my $size = $b->members <=> $a->members; |
341 | return $size if $size; |
342 | # Then sort by alphabetical order of set elements. |
343 | return "$a" cmp "$b"; |
344 | } |
345 | |
0a17afe9 |
346 | =head2 $self->sources |
347 | |
348 | Return all 'source' class witnesses in these sets for this graph. |
349 | |
350 | =cut |
351 | |
7e17346f |
352 | sub sources { |
353 | my $self = shift; |
354 | my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits; |
355 | return @sources; |
356 | } |
357 | |
0a17afe9 |
358 | =head2 $self->minimum_grouping_for( $set ) |
359 | |
360 | Return the minimum grouping (including necessary hypothetical witnesses) for |
361 | the witness set specified. Will return undef if $set does not match one of |
362 | the defined witness sets in $self->sets. |
363 | |
364 | =cut |
365 | |
7e17346f |
366 | # Look for a matching set in our setlist, and return its corresponding group |
367 | sub minimum_grouping_for { |
368 | my( $self, $set ) = @_; |
369 | my $midx = $self->set_index( sub { "$set" eq "$_" } ); |
370 | return undef unless defined $midx; |
371 | return $self->grouping( $midx ); |
372 | } |
373 | |
0a17afe9 |
374 | =head1 CALCULATION STORAGE METHODS |
375 | |
376 | =head2 $self->is_genealogical( $bool ) |
377 | |
378 | Record that the sets are genealogical for this graph. |
379 | |
380 | =head2 $self->set_class( $witness, $class ) |
381 | |
382 | Record that the witness in question is of the given class. |
383 | |
384 | =head2 $self->record_grouping( $group ) |
385 | |
386 | Record that the group in question (either an arrayref or a Set::Scalar) forms |
387 | a minimum grouping on the graph. Will throw an error unless the group is a |
388 | (non-proper) superset of an existing witness set. |
389 | |
390 | =cut |
391 | |
392 | sub record_grouping { |
393 | my( $self, $group ) = @_; |
394 | unless( ref( $group ) eq 'Set::Scalar' ) { |
395 | my $s = Set::Scalar->new( @$group ); |
396 | $group = $s; |
397 | } |
398 | # Find the set that is a subset of this group, and record it in the |
399 | # correct spot in our groupinglist. |
400 | my $idx = 0; |
401 | foreach my $set ( $self->sets ) { |
402 | if( _is_subset( $set, $group ) ) { |
403 | $self->_set_grouping( $idx, $group ); |
404 | last; |
405 | } |
406 | $idx++; |
407 | } |
408 | if( $idx == scalar( $self->sets ) ) { |
409 | throw( "Failed to find witness set that is a subset of $group" ); |
410 | } |
411 | } |
412 | |
413 | sub _is_subset { |
414 | # A replacement for the stupid Set::Scalar::is_subset |
415 | my( $set1, $set2 ) = @_; |
416 | my %all; |
417 | map { $all{$_} = 1 } $set2->members; |
418 | foreach my $m ( $set1->members ) { |
419 | return 0 unless $all{$m}; |
420 | } |
421 | return 1; |
422 | } |
423 | |
a42a164c |
424 | sub TO_JSON { |
425 | my $self = shift; |
0a17afe9 |
426 | # Required values: graph and setlist |
a42a164c |
427 | my $data = { |
428 | graph => $self->graph, |
429 | setlist => [], |
a42a164c |
430 | }; |
a42a164c |
431 | foreach my $set ( $self->sets ) { |
432 | push( @{$data->{setlist}}, [ $set->members ] ); |
433 | } |
0a17afe9 |
434 | # Scalar values, if they are set |
435 | $data->{is_genealogical} = 1 if $self->is_genealogical; |
436 | $data->{status} = $self->status if $self->status; |
437 | |
438 | # Set values, if they exist |
439 | $data->{groupinglist} = [] if $self->groupings; |
a42a164c |
440 | foreach my $group ( $self->groupings ) { |
441 | push( @{$data->{groupinglist}}, [ $group->members ] ); |
442 | } |
0a17afe9 |
443 | $data->{classlist} = {} if $self->assigned_wits; |
a42a164c |
444 | foreach my $wit ( $self->assigned_wits ) { |
445 | $data->{classlist}->{$wit} = $self->class( $wit ); |
446 | } |
447 | return $data; |
448 | } |
449 | |
7e17346f |
450 | sub throw { |
451 | Text::Tradition::Error->throw( |
452 | 'ident' => 'Analysis::Result error', |
453 | 'message' => $_[0], |
454 | ); |
455 | } |
456 | |
457 | no Moose; |
458 | __PACKAGE__->meta->make_immutable; |
459 | |
460 | 1; |