1 package Text::Tradition::Analysis::Result;
4 use Digest::MD5 qw/ md5_hex /;
5 use Encode qw/ encode_utf8 /;
6 use JSON qw/ to_json /;
8 use Text::Tradition::Error;
12 Text::Tradition::Analysis::Result - object to express an IDP calculation result
13 for a particular graph problem.
17 Given a graph (expressing a stemma hypothesis) and a set of witness groupings
18 (expressing variation in reading between witnesses related according to the
19 stemma hypothesis), it is possible to calculate certain properties of how the
20 readings might be related to each other. This calculation depends on a custom
21 program run under the IDP system [TODO URL]. As the problem is NP-hard, the
22 calculation can take a long time. The purpose of this object is to allow storage
23 of calculated results in a database.
25 For each graph problem, the following features can be calculated:
29 =item * Whether the reading groups form a genealogical pattern on the stemma.
31 =item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma.
33 =item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's.
41 Creates a new graph problem. Requires two properties:
45 =item * setlist - An array of arrays expressing the witness sets. The inner
46 arrays will be converted to Set::Scalar objects, and must have distinct members.
48 =item * graph - A dot description of a graph (e.g. the output of a call to
49 Text::Tradition::Stemma::editable) against which the sets will be analyzed.
53 All other properties should be calculated by IDP rather than set manually.
58 =item * is_genealogical - Boolean, indicating whether the witness sets form
59 genealogical groupings on the graph.
61 =item * status - String to indicate whether a solution has been calculated
62 for this analysis problem. Recognized values are "OK" (calculated) and
63 "running" (being calculated now). All other values, or no value, imply that
64 the calculation has yet to take place.
66 =item * groupings - These are extended (if necessary) versions of the witness
67 sets, which include the hypothetical witnesses necessary to minimize coincidence
70 =item * classes - These are key/value pairs, keyed by witness, indicating for
71 each witness whether it is the source of a reading variant, whether it represents
72 a reversion to an ancestor (but not parent) reading, or whether its reading
73 follows that of a parent on the graph.
83 use_ok( 'Text::Tradition::Analysis::Result' );
85 # Make a problem with a graph and a set of groupings
87 my $datafile = 't/data/florilegium_tei_ps.xml';
88 my $tradition = Text::Tradition->new( 'input' => 'TEI',
90 'file' => $datafile );
91 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
93 my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ];
95 foreach my $set ( @$sets ) {
96 map { $extant->{$_} = 1 } @$set;
98 my $sitgraph = $s->editable( { extant => $extant } );
99 my $result = Text::Tradition::Analysis::Result->new(
102 is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" );
103 is( $result->graph, $sitgraph, "Got identical graph string back" );
104 is( $result->status, "new", "Calculation status of result set correctly" );
105 my @rsets = $result->sets;
106 is( $rsets[0], '(A B C P S T)', "First set is biggest set" );
107 is( $rsets[1], '(D Q)', "Second set is by alphabetical order" );
108 is( $rsets[2], '(F H)', "Second set is by alphabetical order" );
110 # Add some calculation values
111 $result->is_genealogical( 1 );
112 $result->record_grouping( [ qw/ 4 5 D Q / ] );
114 $result->record_grouping( [ qw/ 3 4 D H / ] );
115 ok( 0, "Recorded a grouping that does not match the input sets" );
116 } catch ( Text::Tradition::Error $e ) {
117 like( $e->message, qr/Failed to find witness set that is a subset of/,
118 "Correct error thrown on bad record_grouping attempt" );
120 # Test manually setting an out-of-range group
122 $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) );
123 ok( 0, "Set a grouping at an invalid index" );
124 } catch ( Text::Tradition::Error $e ) {
125 is( $e->message, 'Set / group index 3 out of range for set_grouping',
126 "Caught attempt to set grouping at invalid index" );
128 $result->record_grouping( [ qw/ 3 F H / ] );
129 my $gp1 = $result->grouping(1);
130 is( $result->minimum_grouping_for( $rsets[1] ), $gp1,
131 "Found a minimum grouping for D Q" );
132 is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" );
133 is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0],
134 "Default minimum grouping found for biggest group" );
135 $result->record_grouping( [ qw/ 1 α δ A B C P S T / ] );
140 foreach my $gp ( $result->groupings ) {
141 map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp;
143 foreach my $gp ( $result->groupings ) {
144 foreach my $wit ( @$gp ) {
145 my $expected = $classes{$wit} || 'copy';
146 is( $result->class( $wit ), $expected, "Got expected witness class for $wit" );
150 # Now write it out to JSON
151 my $struct = $result->TO_JSON;
152 my $newresult = Text::Tradition::Analysis::Result->new( $struct );
153 is( $result->object_key, $newresult->object_key,
154 "Object key stayed constant on export/import" );
155 my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets );
156 is( $problem->object_key, $result->object_key,
157 "Object key stayed constant for newly created problem" );
164 =head2 $self->has_class( $witness )
165 =head2 $self->class( $witness )
167 If a class has been calculated for the given witness, has_class returns true
168 and class returns the calculated answer.
174 isa => 'ArrayRef[Set::Scalar]',
177 set_index => 'first_index',
193 has 'is_genealogical' => (
196 predicate => 'has_genealogical_result'
199 has 'groupinglist' => (
201 isa => 'ArrayRef[Set::Scalar]',
203 groupings => 'elements',
204 _set_grouping => 'set',
207 default => sub { [] }
212 isa => 'HashRef[Str]',
215 has_class => 'exists',
217 classes => 'elements',
218 assigned_wits => 'keys',
222 around BUILDARGS => sub {
225 my $args = @_ == 1 ? $_[0] : { @_ };
227 # Convert the set list into a list of Set::Scalars, ordered first by size and
228 # then alphabetically by first-sorted.
229 throw( "Must specify a set list to Analysis::Result->new()" )
230 unless ref( $args->{'setlist'} ) eq 'ARRAY';
231 throw( "Empty set list specified to Analysis::Result->new()" )
232 unless @{$args->{'setlist'}};
233 # Order the sets and make sure they are all distinct Set::Scalars.
234 $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) }
235 _check_set_args( $args->{'setlist'} ) ];
236 if( exists $args->{'groupinglist'} ) {
237 $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ];
240 # If we have been passed a Text::Tradition::Stemma or a Graph, save only
242 if( ref( $args->{'graph'} ) ) {
243 my $st = delete $args->{'graph'};
244 my $type = ref( $st );
245 my $gopt = { linesep => ' ' };
246 if( $type eq 'Text::Tradition::Stemma' ) {
247 $args->{'graph'} = $st->editable( $gopt );
248 } elsif( $type eq 'Graph' ) {
249 $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt );
251 throw( "Passed argument to graph that is neither Stemma nor Graph" );
255 # If our only args are graph and setlist, then status should be 'new'
256 if( scalar keys %$args == 2 ) {
257 $args->{'status'} = 'new';
260 return $class->$orig( $args );
263 sub _check_set_args {
266 foreach my $set ( @{$setlist} ) {
268 # Check uniqueness of the current set
269 if( ref( $set ) ne 'Set::Scalar' ) {
270 $s = Set::Scalar->new( @$set );
271 throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" )
272 unless @$set == $s->elements;
274 # Check distinctness of the set from all other sets given so far
275 foreach my $ps ( @sets ) {
276 throw( "Two sets $s / $ps are not disjoint" )
277 unless $s->is_disjoint( $ps );
288 # Initialize the groupings array
289 my @sets = $self->sets;
290 foreach my $idx( 0 .. $#sets ) {
291 unless( $self->grouping( $idx ) ) {
292 $self->_set_grouping( $idx, $sets[$idx] );
297 before '_set_grouping' => sub {
300 my $max = scalar $self->sets;
302 throw( "Set / group index $idx out of range for set_grouping" );
306 =head2 $self->object_key
308 Returns a unique key that can be used to look up this graph/set combination in
309 a database. Currently an MD5 hash of the request_string.
315 return md5_hex( encode_utf8( $self->request_string ) );
318 =head2 $self->request_string
320 A request string is the graph followed by the groups, which should form a unique
327 return $self->graph . '//' . join( ',', $self->sets );
330 =head2 by_size_and_alpha
332 A useful utility function to sort Set::Scalar objects first in descending
333 order by size, then in ascending alphabetical order by first element (i.e.
338 sub by_size_and_alpha {
340 my $size = $b->members <=> $a->members;
341 return $size if $size;
342 # Then sort by alphabetical order of set elements.
343 return "$a" cmp "$b";
346 =head2 $self->sources
348 Return all 'source' class witnesses in these sets for this graph.
354 my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits;
358 =head2 $self->minimum_grouping_for( $set )
360 Return the minimum grouping (including necessary hypothetical witnesses) for
361 the witness set specified. Will return undef if $set does not match one of
362 the defined witness sets in $self->sets.
366 # Look for a matching set in our setlist, and return its corresponding group
367 sub minimum_grouping_for {
368 my( $self, $set ) = @_;
369 my $midx = $self->set_index( sub { "$set" eq "$_" } );
370 return undef unless defined $midx;
371 return $self->grouping( $midx );
374 =head1 CALCULATION STORAGE METHODS
376 =head2 $self->is_genealogical( $bool )
378 Record that the sets are genealogical for this graph.
380 =head2 $self->set_class( $witness, $class )
382 Record that the witness in question is of the given class.
384 =head2 $self->record_grouping( $group )
386 Record that the group in question (either an arrayref or a Set::Scalar) forms
387 a minimum grouping on the graph. Will throw an error unless the group is a
388 (non-proper) superset of an existing witness set.
392 sub record_grouping {
393 my( $self, $group ) = @_;
394 unless( ref( $group ) eq 'Set::Scalar' ) {
395 my $s = Set::Scalar->new( @$group );
398 # Find the set that is a subset of this group, and record it in the
399 # correct spot in our groupinglist.
401 foreach my $set ( $self->sets ) {
402 if( _is_subset( $set, $group ) ) {
403 $self->_set_grouping( $idx, $group );
408 if( $idx == scalar( $self->sets ) ) {
409 throw( "Failed to find witness set that is a subset of $group" );
414 # A replacement for the stupid Set::Scalar::is_subset
415 my( $set1, $set2 ) = @_;
417 map { $all{$_} = 1 } $set2->members;
418 foreach my $m ( $set1->members ) {
419 return 0 unless $all{$m};
426 # Required values: graph and setlist
428 graph => $self->graph,
431 foreach my $set ( $self->sets ) {
432 push( @{$data->{setlist}}, [ $set->members ] );
434 # Scalar values, if they are set
435 $data->{is_genealogical} = 1 if $self->is_genealogical;
436 $data->{status} = $self->status if $self->status;
438 # Set values, if they exist
439 $data->{groupinglist} = [] if $self->groupings;
440 foreach my $group ( $self->groupings ) {
441 push( @{$data->{groupinglist}}, [ $group->members ] );
443 $data->{classlist} = {} if $self->assigned_wits;
444 foreach my $wit ( $self->assigned_wits ) {
445 $data->{classlist}->{$wit} = $self->class( $wit );
451 Text::Tradition::Error->throw(
452 'ident' => 'Analysis::Result error',
458 __PACKAGE__->meta->make_immutable;