split off stemma analysis modules from base Tradition layer
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Analysis / Result.pm
CommitLineData
7e17346f 1package Text::Tradition::Analysis::Result;
2
3use Moose;
a42a164c 4use Digest::MD5 qw/ md5_hex /;
5use Encode qw/ encode_utf8 /;
7e17346f 6use JSON qw/ to_json /;
7use Set::Scalar;
8use Text::Tradition::Error;
9
10=head1 NAME
11
12Text::Tradition::Analysis::Result - object to express an IDP calculation result
13for a particular graph problem.
14
15=head1 DESCRIPTION
16
17Given a graph (expressing a stemma hypothesis) and a set of witness groupings
18(expressing variation in reading between witnesses related according to the
19stemma hypothesis), it is possible to calculate certain properties of how the
20readings might be related to each other. This calculation depends on a custom
21program run under the IDP system [TODO URL]. As the problem is NP-hard, the
22calculation can take a long time. The purpose of this object is to allow storage
23of calculated results in a database.
24
25For each graph problem, the following features can be calculated:
26
27=over 4
28
29=item * Whether the reading groups form a genealogical pattern on the stemma.
30
31=item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma.
32
33=item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's.
34
35=back
36
37=head1 CONSTRUCTOR
38
39=head2 new
40
41Creates a new graph problem. Requires two properties:
42
43=over 4
44
45=item * setlist - An array of arrays expressing the witness sets. The inner
46arrays will be converted to Set::Scalar objects, and must have distinct members.
47
48=item * graph - A dot description of a graph (e.g. the output of a call to
49Text::Tradition::Stemma::editable) against which the sets will be analyzed.
50
51=back
52
0a17afe9 53All other properties should be calculated by IDP rather than set manually.
54These include:
55
56=over 4
57
58=item * is_genealogical - Boolean, indicating whether the witness sets form
59genealogical groupings on the graph.
60
61=item * status - String to indicate whether a solution has been calculated
62for this analysis problem. Recognized values are "OK" (calculated) and
63"running" (being calculated now). All other values, or no value, imply that
64the calculation has yet to take place.
65
66=item * groupings - These are extended (if necessary) versions of the witness
67sets, which include the hypothetical witnesses necessary to minimize coincidence
68of variation.
69
70=item * classes - These are key/value pairs, keyed by witness, indicating for
71each witness whether it is the source of a reading variant, whether it represents
72a reversion to an ancestor (but not parent) reading, or whether its reading
73follows that of a parent on the graph.
74
75=back
76
77=begin testing
78
85a74a8d 79use Set::Scalar;
0a17afe9 80use Test::More::UTF8;
81use Text::Tradition;
82use TryCatch;
83use_ok( 'Text::Tradition::Analysis::Result' );
84
85# Make a problem with a graph and a set of groupings
86
87my $datafile = 't/data/florilegium_tei_ps.xml';
88my $tradition = Text::Tradition->new( 'input' => 'TEI',
89 'name' => 'flortest',
90 'file' => $datafile );
951ddfe8 91$tradition->enable_stemmata;
0a17afe9 92my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
93
94my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ];
95my $extant = {};
96foreach my $set ( @$sets ) {
97 map { $extant->{$_} = 1 } @$set;
98}
99my $sitgraph = $s->editable( { extant => $extant } );
100my $result = Text::Tradition::Analysis::Result->new(
101 graph => $sitgraph,
102 setlist => $sets );
103is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" );
104is( $result->graph, $sitgraph, "Got identical graph string back" );
105is( $result->status, "new", "Calculation status of result set correctly" );
106my @rsets = $result->sets;
107is( $rsets[0], '(A B C P S T)', "First set is biggest set" );
108is( $rsets[1], '(D Q)', "Second set is by alphabetical order" );
109is( $rsets[2], '(F H)', "Second set is by alphabetical order" );
110
111# Add some calculation values
112$result->is_genealogical( 1 );
113$result->record_grouping( [ qw/ 4 5 D Q / ] );
114try {
115 $result->record_grouping( [ qw/ 3 4 D H / ] );
116 ok( 0, "Recorded a grouping that does not match the input sets" );
117} catch ( Text::Tradition::Error $e ) {
118 like( $e->message, qr/Failed to find witness set that is a subset of/,
119 "Correct error thrown on bad record_grouping attempt" );
120}
85a74a8d 121# Test manually setting an out-of-range group
122try {
123 $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) );
124 ok( 0, "Set a grouping at an invalid index" );
125} catch ( Text::Tradition::Error $e ) {
126 is( $e->message, 'Set / group index 3 out of range for set_grouping',
127 "Caught attempt to set grouping at invalid index" );
128}
0a17afe9 129$result->record_grouping( [ qw/ 3 F H / ] );
130my $gp1 = $result->grouping(1);
131is( $result->minimum_grouping_for( $rsets[1] ), $gp1,
132 "Found a minimum grouping for D Q" );
133is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" );
134is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0],
135 "Default minimum grouping found for biggest group" );
136$result->record_grouping( [ qw/ 1 α δ A B C P S T / ] );
137my %classes = (
138 α => 'source',
139 3 => 'source',
140 4 => 'source' );
141foreach my $gp ( $result->groupings ) {
142 map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp;
143}
144foreach my $gp ( $result->groupings ) {
145 foreach my $wit ( @$gp ) {
146 my $expected = $classes{$wit} || 'copy';
147 is( $result->class( $wit ), $expected, "Got expected witness class for $wit" );
148 }
149}
150
151# Now write it out to JSON
152my $struct = $result->TO_JSON;
153my $newresult = Text::Tradition::Analysis::Result->new( $struct );
154is( $result->object_key, $newresult->object_key,
155 "Object key stayed constant on export/import" );
156my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets );
157is( $problem->object_key, $result->object_key,
158 "Object key stayed constant for newly created problem" );
159
160
161=end testing
162
163=head1 METHODS
164
165=head2 $self->has_class( $witness )
166=head2 $self->class( $witness )
167
168If a class has been calculated for the given witness, has_class returns true
169and class returns the calculated answer.
170
7e17346f 171=cut
172
173has 'setlist' => (
174 traits => ['Array'],
175 isa => 'ArrayRef[Set::Scalar]',
176 handles => {
177 sets => 'elements',
178 set_index => 'first_index',
179 },
180 required => 1
181);
182
183has 'graph' => (
184 is => 'ro',
185 isa => 'Str',
186 required => 1
187);
188
74038ae5 189has 'status' => (
190 is => 'rw',
191 isa => 'Str'
192);
193
7e17346f 194has 'is_genealogical' => (
195 is => 'rw',
196 isa => 'Bool',
197 predicate => 'has_genealogical_result'
198);
199
200has 'groupinglist' => (
201 traits => ['Array'],
202 isa => 'ArrayRef[Set::Scalar]',
203 handles => {
204 groupings => 'elements',
7e17346f 205 _set_grouping => 'set',
206 grouping => 'get',
207 },
208 default => sub { [] }
209);
210
211has 'classlist' => (
212 traits => ['Hash'],
213 isa => 'HashRef[Str]',
214 handles => {
215 class => 'get',
216 has_class => 'exists',
217 set_class => 'set',
218 classes => 'elements',
219 assigned_wits => 'keys',
220 },
221);
222
223around BUILDARGS => sub {
224 my $orig = shift;
225 my $class = shift;
226 my $args = @_ == 1 ? $_[0] : { @_ };
227
228 # Convert the set list into a list of Set::Scalars, ordered first by size and
229 # then alphabetically by first-sorted.
70406673 230 throw( "Must specify a set list to Analysis::Result->new()" )
7e17346f 231 unless ref( $args->{'setlist'} ) eq 'ARRAY';
70406673 232 throw( "Empty set list specified to Analysis::Result->new()" )
03c0a7d7 233 unless @{$args->{'setlist'}};
a42a164c 234 # Order the sets and make sure they are all distinct Set::Scalars.
b42d7113 235 $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) }
236 _check_set_args( $args->{'setlist'} ) ];
0a17afe9 237 if( exists $args->{'groupinglist'} ) {
238 $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ];
239 }
7e17346f 240
c90ef1a3 241 # If we have been passed a Text::Tradition::Stemma or a Graph, save only
242 # its string.
243 if( ref( $args->{'graph'} ) ) {
7e17346f 244 my $st = delete $args->{'graph'};
c90ef1a3 245 my $type = ref( $st );
246 my $gopt = { linesep => ' ' };
247 if( $type eq 'Text::Tradition::Stemma' ) {
248 $args->{'graph'} = $st->editable( $gopt );
249 } elsif( $type eq 'Graph' ) {
250 $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt );
251 } else {
70406673 252 throw( "Passed argument to graph that is neither Stemma nor Graph" );
c90ef1a3 253 }
254 }
74038ae5 255
256 # If our only args are graph and setlist, then status should be 'new'
257 if( scalar keys %$args == 2 ) {
258 $args->{'status'} = 'new';
259 }
c90ef1a3 260
7e17346f 261 return $class->$orig( $args );
262};
263
a42a164c 264sub _check_set_args {
265 my $setlist = shift;
266 my @sets;
267 foreach my $set ( @{$setlist} ) {
268 my $s = $set;
269 # Check uniqueness of the current set
270 if( ref( $set ) ne 'Set::Scalar' ) {
271 $s = Set::Scalar->new( @$set );
70406673 272 throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" )
a42a164c 273 unless @$set == $s->elements;
274 }
275 # Check distinctness of the set from all other sets given so far
276 foreach my $ps ( @sets ) {
70406673 277 throw( "Two sets $s / $ps are not disjoint" )
a42a164c 278 unless $s->is_disjoint( $ps );
279 }
280 # Save the set.
281 push( @sets, $s );
282 }
b42d7113 283 return @sets;
a42a164c 284}
285
7e17346f 286sub BUILD {
287 my $self = shift;
288
289 # Initialize the groupings array
85a74a8d 290 my @sets = $self->sets;
291 foreach my $idx( 0 .. $#sets ) {
292 unless( $self->grouping( $idx ) ) {
6724a135 293 my $g = $sets[$idx]->clone();
294 $self->_set_grouping( $idx, $g );
85a74a8d 295 }
296 }
7e17346f 297}
298
85a74a8d 299before '_set_grouping' => sub {
300 my $self = shift;
301 my $idx = $_[0];
302 my $max = scalar $self->sets;
303 if( $idx >= $max ) {
304 throw( "Set / group index $idx out of range for set_grouping" );
305 }
306};
307
0a17afe9 308=head2 $self->object_key
7e17346f 309
0a17afe9 310Returns a unique key that can be used to look up this graph/set combination in
311a database. Currently an MD5 hash of the request_string.
312
313=cut
1e4b66f4 314
a42a164c 315sub object_key {
316 my $self = shift;
317 return md5_hex( encode_utf8( $self->request_string ) );
318}
319
0a17afe9 320=head2 $self->request_string
321
322A request string is the graph followed by the groups, which should form a unique
323key for the result.
324
325=cut
326
7e17346f 327sub request_string {
328 my $self = shift;
03c0a7d7 329 return $self->graph . '//' . join( ',', $self->sets );
7e17346f 330}
331
0a17afe9 332=head2 by_size_and_alpha
333
334A useful utility function to sort Set::Scalar objects first in descending
335order by size, then in ascending alphabetical order by first element (i.e.
336by stringification.)
337
338=cut
339
7e17346f 340sub by_size_and_alpha {
341 my( $a, $b ) = @_;
342 my $size = $b->members <=> $a->members;
343 return $size if $size;
344 # Then sort by alphabetical order of set elements.
345 return "$a" cmp "$b";
346}
347
0a17afe9 348=head2 $self->sources
349
350Return all 'source' class witnesses in these sets for this graph.
351
352=cut
353
7e17346f 354sub sources {
355 my $self = shift;
356 my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits;
357 return @sources;
358}
359
0a17afe9 360=head2 $self->minimum_grouping_for( $set )
361
362Return the minimum grouping (including necessary hypothetical witnesses) for
363the witness set specified. Will return undef if $set does not match one of
364the defined witness sets in $self->sets.
365
366=cut
367
7e17346f 368# Look for a matching set in our setlist, and return its corresponding group
369sub minimum_grouping_for {
370 my( $self, $set ) = @_;
371 my $midx = $self->set_index( sub { "$set" eq "$_" } );
372 return undef unless defined $midx;
373 return $self->grouping( $midx );
374}
375
0a17afe9 376=head1 CALCULATION STORAGE METHODS
377
378=head2 $self->is_genealogical( $bool )
379
380Record that the sets are genealogical for this graph.
381
382=head2 $self->set_class( $witness, $class )
383
384Record that the witness in question is of the given class.
385
386=head2 $self->record_grouping( $group )
387
388Record that the group in question (either an arrayref or a Set::Scalar) forms
389a minimum grouping on the graph. Will throw an error unless the group is a
390(non-proper) superset of an existing witness set.
391
392=cut
393
394sub record_grouping {
395 my( $self, $group ) = @_;
396 unless( ref( $group ) eq 'Set::Scalar' ) {
397 my $s = Set::Scalar->new( @$group );
398 $group = $s;
399 }
400 # Find the set that is a subset of this group, and record it in the
401 # correct spot in our groupinglist.
402 my $idx = 0;
403 foreach my $set ( $self->sets ) {
404 if( _is_subset( $set, $group ) ) {
405 $self->_set_grouping( $idx, $group );
406 last;
407 }
408 $idx++;
409 }
410 if( $idx == scalar( $self->sets ) ) {
411 throw( "Failed to find witness set that is a subset of $group" );
412 }
413}
414
415sub _is_subset {
416 # A replacement for the stupid Set::Scalar::is_subset
417 my( $set1, $set2 ) = @_;
418 my %all;
419 map { $all{$_} = 1 } $set2->members;
420 foreach my $m ( $set1->members ) {
421 return 0 unless $all{$m};
422 }
423 return 1;
424}
425
a42a164c 426sub TO_JSON {
427 my $self = shift;
0a17afe9 428 # Required values: graph and setlist
a42a164c 429 my $data = {
430 graph => $self->graph,
431 setlist => [],
a42a164c 432 };
a42a164c 433 foreach my $set ( $self->sets ) {
434 push( @{$data->{setlist}}, [ $set->members ] );
435 }
0a17afe9 436 # Scalar values, if they are set
437 $data->{is_genealogical} = 1 if $self->is_genealogical;
438 $data->{status} = $self->status if $self->status;
439
440 # Set values, if they exist
441 $data->{groupinglist} = [] if $self->groupings;
a42a164c 442 foreach my $group ( $self->groupings ) {
443 push( @{$data->{groupinglist}}, [ $group->members ] );
444 }
0a17afe9 445 $data->{classlist} = {} if $self->assigned_wits;
a42a164c 446 foreach my $wit ( $self->assigned_wits ) {
447 $data->{classlist}->{$wit} = $self->class( $wit );
448 }
449 return $data;
450}
451
7e17346f 452sub throw {
453 Text::Tradition::Error->throw(
454 'ident' => 'Analysis::Result error',
455 'message' => $_[0],
456 );
457}
458
459no Moose;
460__PACKAGE__->meta->make_immutable;
461
4621;