1 package Text::Tradition::Analysis::Result;
4 use Digest::MD5 qw/ md5_hex /;
5 use Encode qw/ encode_utf8 /;
6 use JSON qw/ to_json /;
8 use Text::Tradition::Error;
12 Text::Tradition::Analysis::Result - object to express an IDP calculation result
13 for a particular graph problem.
17 Given a graph (expressing a stemma hypothesis) and a set of witness groupings
18 (expressing variation in reading between witnesses related according to the
19 stemma hypothesis), it is possible to calculate certain properties of how the
20 readings might be related to each other. This calculation depends on a custom
21 program run under the IDP system [TODO URL]. As the problem is NP-hard, the
22 calculation can take a long time. The purpose of this object is to allow storage
23 of calculated results in a database.
25 For each graph problem, the following features can be calculated:
29 =item * Whether the reading groups form a genealogical pattern on the stemma.
31 =item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma.
33 =item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's.
41 Creates a new graph problem. Requires two properties:
45 =item * setlist - An array of arrays expressing the witness sets. The inner
46 arrays will be converted to Set::Scalar objects, and must have distinct members.
48 =item * graph - A dot description of a graph (e.g. the output of a call to
49 Text::Tradition::Stemma::editable) against which the sets will be analyzed.
53 All other properties should be calculated by IDP rather than set manually.
58 =item * is_genealogical - Boolean, indicating whether the witness sets form
59 genealogical groupings on the graph.
61 =item * status - String to indicate whether a solution has been calculated
62 for this analysis problem. Recognized values are "OK" (calculated) and
63 "running" (being calculated now). All other values, or no value, imply that
64 the calculation has yet to take place.
66 =item * groupings - These are extended (if necessary) versions of the witness
67 sets, which include the hypothetical witnesses necessary to minimize coincidence
70 =item * classes - These are key/value pairs, keyed by witness, indicating for
71 each witness whether it is the source of a reading variant, whether it represents
72 a reversion to an ancestor (but not parent) reading, or whether its reading
73 follows that of a parent on the graph.
83 use_ok( 'Text::Tradition::Analysis::Result' );
85 # Make a problem with a graph and a set of groupings
87 my $datafile = 't/data/florilegium_tei_ps.xml';
88 my $tradition = Text::Tradition->new( 'input' => 'TEI',
90 'file' => $datafile );
91 $tradition->enable_stemmata;
92 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
94 my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ];
96 foreach my $set ( @$sets ) {
97 map { $extant->{$_} = 1 } @$set;
99 my $sitgraph = $s->editable( { extant => $extant } );
100 my $result = Text::Tradition::Analysis::Result->new(
103 is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" );
104 is( $result->graph, $sitgraph, "Got identical graph string back" );
105 is( $result->status, "new", "Calculation status of result set correctly" );
106 my @rsets = $result->sets;
107 is( $rsets[0], '(A B C P S T)', "First set is biggest set" );
108 is( $rsets[1], '(D Q)', "Second set is by alphabetical order" );
109 is( $rsets[2], '(F H)', "Second set is by alphabetical order" );
111 # Add some calculation values
112 $result->is_genealogical( 1 );
113 $result->record_grouping( [ qw/ 4 5 D Q / ] );
115 $result->record_grouping( [ qw/ 3 4 D H / ] );
116 ok( 0, "Recorded a grouping that does not match the input sets" );
117 } catch ( Text::Tradition::Error $e ) {
118 like( $e->message, qr/Failed to find witness set that is a subset of/,
119 "Correct error thrown on bad record_grouping attempt" );
121 # Test manually setting an out-of-range group
123 $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) );
124 ok( 0, "Set a grouping at an invalid index" );
125 } catch ( Text::Tradition::Error $e ) {
126 is( $e->message, 'Set / group index 3 out of range for set_grouping',
127 "Caught attempt to set grouping at invalid index" );
129 $result->record_grouping( [ qw/ 3 F H / ] );
130 my $gp1 = $result->grouping(1);
131 is( $result->minimum_grouping_for( $rsets[1] ), $gp1,
132 "Found a minimum grouping for D Q" );
133 is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" );
134 is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0],
135 "Default minimum grouping found for biggest group" );
136 $result->record_grouping( [ qw/ 1 α δ A B C P S T / ] );
141 foreach my $gp ( $result->groupings ) {
142 map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp;
144 foreach my $gp ( $result->groupings ) {
145 foreach my $wit ( @$gp ) {
146 my $expected = $classes{$wit} || 'copy';
147 is( $result->class( $wit ), $expected, "Got expected witness class for $wit" );
151 # Now write it out to JSON
152 my $struct = $result->TO_JSON;
153 my $newresult = Text::Tradition::Analysis::Result->new( $struct );
154 is( $result->object_key, $newresult->object_key,
155 "Object key stayed constant on export/import" );
156 my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets );
157 is( $problem->object_key, $result->object_key,
158 "Object key stayed constant for newly created problem" );
165 =head2 $self->has_class( $witness )
166 =head2 $self->class( $witness )
168 If a class has been calculated for the given witness, has_class returns true
169 and class returns the calculated answer.
175 isa => 'ArrayRef[Set::Scalar]',
178 set_index => 'first_index',
194 has 'is_genealogical' => (
197 predicate => 'has_genealogical_result'
200 has 'groupinglist' => (
202 isa => 'ArrayRef[Set::Scalar]',
204 groupings => 'elements',
205 _set_grouping => 'set',
208 default => sub { [] }
213 isa => 'HashRef[Str]',
216 has_class => 'exists',
218 classes => 'elements',
219 assigned_wits => 'keys',
223 around BUILDARGS => sub {
226 my $args = @_ == 1 ? $_[0] : { @_ };
228 # Convert the set list into a list of Set::Scalars, ordered first by size and
229 # then alphabetically by first-sorted.
230 throw( "Must specify a set list to Analysis::Result->new()" )
231 unless ref( $args->{'setlist'} ) eq 'ARRAY';
232 throw( "Empty set list specified to Analysis::Result->new()" )
233 unless @{$args->{'setlist'}};
234 # Order the sets and make sure they are all distinct Set::Scalars.
235 $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) }
236 _check_set_args( $args->{'setlist'} ) ];
237 if( exists $args->{'groupinglist'} ) {
238 $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ];
241 # If we have been passed a Text::Tradition::Stemma or a Graph, save only
243 if( ref( $args->{'graph'} ) ) {
244 my $st = delete $args->{'graph'};
245 my $type = ref( $st );
246 my $gopt = { linesep => ' ' };
247 if( $type eq 'Text::Tradition::Stemma' ) {
248 $args->{'graph'} = $st->editable( $gopt );
249 } elsif( $type eq 'Graph' ) {
250 $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt );
252 throw( "Passed argument to graph that is neither Stemma nor Graph" );
256 # If our only args are graph and setlist, then status should be 'new'
257 if( scalar keys %$args == 2 ) {
258 $args->{'status'} = 'new';
261 return $class->$orig( $args );
264 sub _check_set_args {
267 foreach my $set ( @{$setlist} ) {
269 # Check uniqueness of the current set
270 if( ref( $set ) ne 'Set::Scalar' ) {
271 $s = Set::Scalar->new( @$set );
272 throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" )
273 unless @$set == $s->elements;
275 # Check distinctness of the set from all other sets given so far
276 foreach my $ps ( @sets ) {
277 throw( "Two sets $s / $ps are not disjoint" )
278 unless $s->is_disjoint( $ps );
289 # Initialize the groupings array
290 my @sets = $self->sets;
291 foreach my $idx( 0 .. $#sets ) {
292 unless( $self->grouping( $idx ) ) {
293 my $g = $sets[$idx]->clone();
294 $self->_set_grouping( $idx, $g );
299 before '_set_grouping' => sub {
302 my $max = scalar $self->sets;
304 throw( "Set / group index $idx out of range for set_grouping" );
308 =head2 $self->object_key
310 Returns a unique key that can be used to look up this graph/set combination in
311 a database. Currently an MD5 hash of the request_string.
317 return md5_hex( encode_utf8( $self->request_string ) );
320 =head2 $self->request_string
322 A request string is the graph followed by the groups, which should form a unique
329 return $self->graph . '//' . join( ',', $self->sets );
332 =head2 by_size_and_alpha
334 A useful utility function to sort Set::Scalar objects first in descending
335 order by size, then in ascending alphabetical order by first element (i.e.
340 sub by_size_and_alpha {
342 my $size = $b->members <=> $a->members;
343 return $size if $size;
344 # Then sort by alphabetical order of set elements.
345 return "$a" cmp "$b";
348 =head2 $self->sources
350 Return all 'source' class witnesses in these sets for this graph.
356 my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits;
360 =head2 $self->minimum_grouping_for( $set )
362 Return the minimum grouping (including necessary hypothetical witnesses) for
363 the witness set specified. Will return undef if $set does not match one of
364 the defined witness sets in $self->sets.
368 # Look for a matching set in our setlist, and return its corresponding group
369 sub minimum_grouping_for {
370 my( $self, $set ) = @_;
371 my $midx = $self->set_index( sub { "$set" eq "$_" } );
372 return undef unless defined $midx;
373 return $self->grouping( $midx );
376 =head1 CALCULATION STORAGE METHODS
378 =head2 $self->is_genealogical( $bool )
380 Record that the sets are genealogical for this graph.
382 =head2 $self->set_class( $witness, $class )
384 Record that the witness in question is of the given class.
386 =head2 $self->record_grouping( $group )
388 Record that the group in question (either an arrayref or a Set::Scalar) forms
389 a minimum grouping on the graph. Will throw an error unless the group is a
390 (non-proper) superset of an existing witness set.
394 sub record_grouping {
395 my( $self, $group ) = @_;
396 unless( ref( $group ) eq 'Set::Scalar' ) {
397 my $s = Set::Scalar->new( @$group );
400 # Find the set that is a subset of this group, and record it in the
401 # correct spot in our groupinglist.
403 foreach my $set ( $self->sets ) {
404 if( _is_subset( $set, $group ) ) {
405 $self->_set_grouping( $idx, $group );
410 if( $idx == scalar( $self->sets ) ) {
411 throw( "Failed to find witness set that is a subset of $group" );
416 # A replacement for the stupid Set::Scalar::is_subset
417 my( $set1, $set2 ) = @_;
419 map { $all{$_} = 1 } $set2->members;
420 foreach my $m ( $set1->members ) {
421 return 0 unless $all{$m};
428 # Required values: graph and setlist
430 graph => $self->graph,
433 foreach my $set ( $self->sets ) {
434 push( @{$data->{setlist}}, [ $set->members ] );
436 # Scalar values, if they are set
437 $data->{is_genealogical} = 1 if $self->is_genealogical;
438 $data->{status} = $self->status if $self->status;
440 # Set values, if they exist
441 $data->{groupinglist} = [] if $self->groupings;
442 foreach my $group ( $self->groupings ) {
443 push( @{$data->{groupinglist}}, [ $group->members ] );
445 $data->{classlist} = {} if $self->assigned_wits;
446 foreach my $wit ( $self->assigned_wits ) {
447 $data->{classlist}->{$wit} = $self->class( $wit );
453 Text::Tradition::Error->throw(
454 'ident' => 'Analysis::Result error',
460 __PACKAGE__->meta->make_immutable;