1 package Text::Tradition::Analysis::Result;
4 use Digest::MD5 qw/ md5_hex /;
5 use Encode qw/ encode_utf8 /;
6 use JSON qw/ to_json /;
8 use Text::Tradition::Error;
12 Text::Tradition::Analysis::Result - object to express an IDP calculation result
13 for a particular graph problem.
17 Given a graph (expressing a stemma hypothesis) and a set of witness groupings
18 (expressing variation in reading between witnesses related according to the
19 stemma hypothesis), it is possible to calculate certain properties of how the
20 readings might be related to each other. This calculation depends on a custom
21 program run under the IDP system [TODO URL]. As the problem is NP-hard, the
22 calculation can take a long time. The purpose of this object is to allow storage
23 of calculated results in a database.
25 For each graph problem, the following features can be calculated:
29 =item * Whether the reading groups form a genealogical pattern on the stemma.
31 =item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma.
33 =item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's.
41 Creates a new graph problem. Requires two properties:
45 =item * setlist - An array of arrays expressing the witness sets. The inner
46 arrays will be converted to Set::Scalar objects, and must have distinct members.
48 =item * graph - A dot description of a graph (e.g. the output of a call to
49 Text::Tradition::Stemma::editable) against which the sets will be analyzed.
53 All other properties should be calculated by IDP rather than set manually.
58 =item * is_genealogical - Boolean, indicating whether the witness sets form
59 genealogical groupings on the graph.
61 =item * status - String to indicate whether a solution has been calculated
62 for this analysis problem. Recognized values are "OK" (calculated) and
63 "running" (being calculated now). All other values, or no value, imply that
64 the calculation has yet to take place.
66 =item * groupings - These are extended (if necessary) versions of the witness
67 sets, which include the hypothetical witnesses necessary to minimize coincidence
70 =item * classes - These are key/value pairs, keyed by witness, indicating for
71 each witness whether it is the source of a reading variant, whether it represents
72 a reversion to an ancestor (but not parent) reading, or whether its reading
73 follows that of a parent on the graph.
83 use_ok( 'Text::Tradition::Analysis::Result' );
85 # Make a problem with a graph and a set of groupings
87 my $datafile = 't/data/florilegium_tei_ps.xml';
88 my $tradition = Text::Tradition->new( 'input' => 'TEI',
90 'file' => $datafile );
91 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
93 my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ];
95 foreach my $set ( @$sets ) {
96 map { $extant->{$_} = 1 } @$set;
98 my $sitgraph = $s->editable( { extant => $extant } );
99 my $result = Text::Tradition::Analysis::Result->new(
102 is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" );
103 is( $result->graph, $sitgraph, "Got identical graph string back" );
104 is( $result->status, "new", "Calculation status of result set correctly" );
105 my @rsets = $result->sets;
106 is( $rsets[0], '(A B C P S T)', "First set is biggest set" );
107 is( $rsets[1], '(D Q)', "Second set is by alphabetical order" );
108 is( $rsets[2], '(F H)', "Second set is by alphabetical order" );
110 # Add some calculation values
111 $result->is_genealogical( 1 );
112 $result->record_grouping( [ qw/ 4 5 D Q / ] );
114 $result->record_grouping( [ qw/ 3 4 D H / ] );
115 ok( 0, "Recorded a grouping that does not match the input sets" );
116 } catch ( Text::Tradition::Error $e ) {
117 like( $e->message, qr/Failed to find witness set that is a subset of/,
118 "Correct error thrown on bad record_grouping attempt" );
120 # Test manually setting an out-of-range group
122 $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) );
123 ok( 0, "Set a grouping at an invalid index" );
124 } catch ( Text::Tradition::Error $e ) {
125 is( $e->message, 'Set / group index 3 out of range for set_grouping',
126 "Caught attempt to set grouping at invalid index" );
128 $result->record_grouping( [ qw/ 3 F H / ] );
129 my $gp1 = $result->grouping(1);
130 is( $result->minimum_grouping_for( $rsets[1] ), $gp1,
131 "Found a minimum grouping for D Q" );
132 is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" );
133 is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0],
134 "Default minimum grouping found for biggest group" );
135 $result->record_grouping( [ qw/ 1 α δ A B C P S T / ] );
140 foreach my $gp ( $result->groupings ) {
141 map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp;
143 foreach my $gp ( $result->groupings ) {
144 foreach my $wit ( @$gp ) {
145 my $expected = $classes{$wit} || 'copy';
146 is( $result->class( $wit ), $expected, "Got expected witness class for $wit" );
150 # Now write it out to JSON
151 my $struct = $result->TO_JSON;
152 my $newresult = Text::Tradition::Analysis::Result->new( $struct );
153 is( $result->object_key, $newresult->object_key,
154 "Object key stayed constant on export/import" );
155 my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets );
156 is( $problem->object_key, $result->object_key,
157 "Object key stayed constant for newly created problem" );
164 =head2 $self->has_class( $witness )
165 =head2 $self->class( $witness )
167 If a class has been calculated for the given witness, has_class returns true
168 and class returns the calculated answer.
174 isa => 'ArrayRef[Set::Scalar]',
177 set_index => 'first_index',
193 has 'is_genealogical' => (
196 predicate => 'has_genealogical_result'
199 has 'groupinglist' => (
201 isa => 'ArrayRef[Set::Scalar]',
203 groupings => 'elements',
204 _set_grouping => 'set',
207 default => sub { [] }
212 isa => 'HashRef[Str]',
215 has_class => 'exists',
217 classes => 'elements',
218 assigned_wits => 'keys',
222 around BUILDARGS => sub {
225 my $args = @_ == 1 ? $_[0] : { @_ };
227 # Convert the set list into a list of Set::Scalars, ordered first by size and
228 # then alphabetically by first-sorted.
229 throw( "Must specify a set list to Analysis::Result->new()" )
230 unless ref( $args->{'setlist'} ) eq 'ARRAY';
231 throw( "Empty set list specified to Analysis::Result->new()" )
232 unless @{$args->{'setlist'}};
233 # Order the sets and make sure they are all distinct Set::Scalars.
234 $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) }
235 _check_set_args( $args->{'setlist'} ) ];
236 if( exists $args->{'groupinglist'} ) {
237 $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ];
240 # If we have been passed a Text::Tradition::Stemma or a Graph, save only
242 if( ref( $args->{'graph'} ) ) {
243 my $st = delete $args->{'graph'};
244 my $type = ref( $st );
245 my $gopt = { linesep => ' ' };
246 if( $type eq 'Text::Tradition::Stemma' ) {
247 $args->{'graph'} = $st->editable( $gopt );
248 } elsif( $type eq 'Graph' ) {
249 $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt );
251 throw( "Passed argument to graph that is neither Stemma nor Graph" );
255 # If our only args are graph and setlist, then status should be 'new'
256 if( scalar keys %$args == 2 ) {
257 $args->{'status'} = 'new';
260 return $class->$orig( $args );
263 sub _check_set_args {
266 foreach my $set ( @{$setlist} ) {
268 # Check uniqueness of the current set
269 if( ref( $set ) ne 'Set::Scalar' ) {
270 $s = Set::Scalar->new( @$set );
271 throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" )
272 unless @$set == $s->elements;
274 # Check distinctness of the set from all other sets given so far
275 foreach my $ps ( @sets ) {
276 throw( "Two sets $s / $ps are not disjoint" )
277 unless $s->is_disjoint( $ps );
288 # Initialize the groupings array
289 my @sets = $self->sets;
290 foreach my $idx( 0 .. $#sets ) {
291 unless( $self->grouping( $idx ) ) {
292 my $g = $sets[$idx]->clone();
293 $self->_set_grouping( $idx, $g );
298 before '_set_grouping' => sub {
301 my $max = scalar $self->sets;
303 throw( "Set / group index $idx out of range for set_grouping" );
307 =head2 $self->object_key
309 Returns a unique key that can be used to look up this graph/set combination in
310 a database. Currently an MD5 hash of the request_string.
316 return md5_hex( encode_utf8( $self->request_string ) );
319 =head2 $self->request_string
321 A request string is the graph followed by the groups, which should form a unique
328 return $self->graph . '//' . join( ',', $self->sets );
331 =head2 by_size_and_alpha
333 A useful utility function to sort Set::Scalar objects first in descending
334 order by size, then in ascending alphabetical order by first element (i.e.
339 sub by_size_and_alpha {
341 my $size = $b->members <=> $a->members;
342 return $size if $size;
343 # Then sort by alphabetical order of set elements.
344 return "$a" cmp "$b";
347 =head2 $self->sources
349 Return all 'source' class witnesses in these sets for this graph.
355 my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits;
359 =head2 $self->minimum_grouping_for( $set )
361 Return the minimum grouping (including necessary hypothetical witnesses) for
362 the witness set specified. Will return undef if $set does not match one of
363 the defined witness sets in $self->sets.
367 # Look for a matching set in our setlist, and return its corresponding group
368 sub minimum_grouping_for {
369 my( $self, $set ) = @_;
370 my $midx = $self->set_index( sub { "$set" eq "$_" } );
371 return undef unless defined $midx;
372 return $self->grouping( $midx );
375 =head1 CALCULATION STORAGE METHODS
377 =head2 $self->is_genealogical( $bool )
379 Record that the sets are genealogical for this graph.
381 =head2 $self->set_class( $witness, $class )
383 Record that the witness in question is of the given class.
385 =head2 $self->record_grouping( $group )
387 Record that the group in question (either an arrayref or a Set::Scalar) forms
388 a minimum grouping on the graph. Will throw an error unless the group is a
389 (non-proper) superset of an existing witness set.
393 sub record_grouping {
394 my( $self, $group ) = @_;
395 unless( ref( $group ) eq 'Set::Scalar' ) {
396 my $s = Set::Scalar->new( @$group );
399 # Find the set that is a subset of this group, and record it in the
400 # correct spot in our groupinglist.
402 foreach my $set ( $self->sets ) {
403 if( _is_subset( $set, $group ) ) {
404 $self->_set_grouping( $idx, $group );
409 if( $idx == scalar( $self->sets ) ) {
410 throw( "Failed to find witness set that is a subset of $group" );
415 # A replacement for the stupid Set::Scalar::is_subset
416 my( $set1, $set2 ) = @_;
418 map { $all{$_} = 1 } $set2->members;
419 foreach my $m ( $set1->members ) {
420 return 0 unless $all{$m};
427 # Required values: graph and setlist
429 graph => $self->graph,
432 foreach my $set ( $self->sets ) {
433 push( @{$data->{setlist}}, [ $set->members ] );
435 # Scalar values, if they are set
436 $data->{is_genealogical} = 1 if $self->is_genealogical;
437 $data->{status} = $self->status if $self->status;
439 # Set values, if they exist
440 $data->{groupinglist} = [] if $self->groupings;
441 foreach my $group ( $self->groupings ) {
442 push( @{$data->{groupinglist}}, [ $group->members ] );
444 $data->{classlist} = {} if $self->assigned_wits;
445 foreach my $wit ( $self->assigned_wits ) {
446 $data->{classlist}->{$wit} = $self->class( $wit );
452 Text::Tradition::Error->throw(
453 'ident' => 'Analysis::Result error',
459 __PACKAGE__->meta->make_immutable;