Merge branch 'master' of github.com:tla/stemmatology
[scpubgit/stemmatology.git] / lib / Text / Tradition / Analysis / Result.pm
CommitLineData
7e17346f 1package Text::Tradition::Analysis::Result;
2
3use Moose;
a42a164c 4use Digest::MD5 qw/ md5_hex /;
5use Encode qw/ encode_utf8 /;
7e17346f 6use JSON qw/ to_json /;
7use Set::Scalar;
8use Text::Tradition::Error;
9
10=head1 NAME
11
12Text::Tradition::Analysis::Result - object to express an IDP calculation result
13for a particular graph problem.
14
15=head1 DESCRIPTION
16
17Given a graph (expressing a stemma hypothesis) and a set of witness groupings
18(expressing variation in reading between witnesses related according to the
19stemma hypothesis), it is possible to calculate certain properties of how the
20readings might be related to each other. This calculation depends on a custom
21program run under the IDP system [TODO URL]. As the problem is NP-hard, the
22calculation can take a long time. The purpose of this object is to allow storage
23of calculated results in a database.
24
25For each graph problem, the following features can be calculated:
26
27=over 4
28
29=item * Whether the reading groups form a genealogical pattern on the stemma.
30
31=item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma.
32
33=item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's.
34
35=back
36
37=head1 CONSTRUCTOR
38
39=head2 new
40
41Creates a new graph problem. Requires two properties:
42
43=over 4
44
45=item * setlist - An array of arrays expressing the witness sets. The inner
46arrays will be converted to Set::Scalar objects, and must have distinct members.
47
48=item * graph - A dot description of a graph (e.g. the output of a call to
49Text::Tradition::Stemma::editable) against which the sets will be analyzed.
50
51=back
52
0a17afe9 53All other properties should be calculated by IDP rather than set manually.
54These include:
55
56=over 4
57
58=item * is_genealogical - Boolean, indicating whether the witness sets form
59genealogical groupings on the graph.
60
61=item * status - String to indicate whether a solution has been calculated
62for this analysis problem. Recognized values are "OK" (calculated) and
63"running" (being calculated now). All other values, or no value, imply that
64the calculation has yet to take place.
65
66=item * groupings - These are extended (if necessary) versions of the witness
67sets, which include the hypothetical witnesses necessary to minimize coincidence
68of variation.
69
70=item * classes - These are key/value pairs, keyed by witness, indicating for
71each witness whether it is the source of a reading variant, whether it represents
72a reversion to an ancestor (but not parent) reading, or whether its reading
73follows that of a parent on the graph.
74
75=back
76
77=begin testing
78
85a74a8d 79use Set::Scalar;
0a17afe9 80use Test::More::UTF8;
81use Text::Tradition;
82use TryCatch;
83use_ok( 'Text::Tradition::Analysis::Result' );
84
85# Make a problem with a graph and a set of groupings
86
87my $datafile = 't/data/florilegium_tei_ps.xml';
88my $tradition = Text::Tradition->new( 'input' => 'TEI',
89 'name' => 'flortest',
90 'file' => $datafile );
91my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
92
93my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ];
94my $extant = {};
95foreach my $set ( @$sets ) {
96 map { $extant->{$_} = 1 } @$set;
97}
98my $sitgraph = $s->editable( { extant => $extant } );
99my $result = Text::Tradition::Analysis::Result->new(
100 graph => $sitgraph,
101 setlist => $sets );
102is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" );
103is( $result->graph, $sitgraph, "Got identical graph string back" );
104is( $result->status, "new", "Calculation status of result set correctly" );
105my @rsets = $result->sets;
106is( $rsets[0], '(A B C P S T)', "First set is biggest set" );
107is( $rsets[1], '(D Q)', "Second set is by alphabetical order" );
108is( $rsets[2], '(F H)', "Second set is by alphabetical order" );
109
110# Add some calculation values
111$result->is_genealogical( 1 );
112$result->record_grouping( [ qw/ 4 5 D Q / ] );
113try {
114 $result->record_grouping( [ qw/ 3 4 D H / ] );
115 ok( 0, "Recorded a grouping that does not match the input sets" );
116} catch ( Text::Tradition::Error $e ) {
117 like( $e->message, qr/Failed to find witness set that is a subset of/,
118 "Correct error thrown on bad record_grouping attempt" );
119}
85a74a8d 120# Test manually setting an out-of-range group
121try {
122 $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) );
123 ok( 0, "Set a grouping at an invalid index" );
124} catch ( Text::Tradition::Error $e ) {
125 is( $e->message, 'Set / group index 3 out of range for set_grouping',
126 "Caught attempt to set grouping at invalid index" );
127}
0a17afe9 128$result->record_grouping( [ qw/ 3 F H / ] );
129my $gp1 = $result->grouping(1);
130is( $result->minimum_grouping_for( $rsets[1] ), $gp1,
131 "Found a minimum grouping for D Q" );
132is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" );
133is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0],
134 "Default minimum grouping found for biggest group" );
135$result->record_grouping( [ qw/ 1 α δ A B C P S T / ] );
136my %classes = (
137 α => 'source',
138 3 => 'source',
139 4 => 'source' );
140foreach my $gp ( $result->groupings ) {
141 map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp;
142}
143foreach my $gp ( $result->groupings ) {
144 foreach my $wit ( @$gp ) {
145 my $expected = $classes{$wit} || 'copy';
146 is( $result->class( $wit ), $expected, "Got expected witness class for $wit" );
147 }
148}
149
150# Now write it out to JSON
151my $struct = $result->TO_JSON;
152my $newresult = Text::Tradition::Analysis::Result->new( $struct );
153is( $result->object_key, $newresult->object_key,
154 "Object key stayed constant on export/import" );
155my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets );
156is( $problem->object_key, $result->object_key,
157 "Object key stayed constant for newly created problem" );
158
159
160=end testing
161
162=head1 METHODS
163
164=head2 $self->has_class( $witness )
165=head2 $self->class( $witness )
166
167If a class has been calculated for the given witness, has_class returns true
168and class returns the calculated answer.
169
7e17346f 170=cut
171
172has 'setlist' => (
173 traits => ['Array'],
174 isa => 'ArrayRef[Set::Scalar]',
175 handles => {
176 sets => 'elements',
177 set_index => 'first_index',
178 },
179 required => 1
180);
181
182has 'graph' => (
183 is => 'ro',
184 isa => 'Str',
185 required => 1
186);
187
74038ae5 188has 'status' => (
189 is => 'rw',
190 isa => 'Str'
191);
192
7e17346f 193has 'is_genealogical' => (
194 is => 'rw',
195 isa => 'Bool',
196 predicate => 'has_genealogical_result'
197);
198
199has 'groupinglist' => (
200 traits => ['Array'],
201 isa => 'ArrayRef[Set::Scalar]',
202 handles => {
203 groupings => 'elements',
7e17346f 204 _set_grouping => 'set',
205 grouping => 'get',
206 },
207 default => sub { [] }
208);
209
210has 'classlist' => (
211 traits => ['Hash'],
212 isa => 'HashRef[Str]',
213 handles => {
214 class => 'get',
215 has_class => 'exists',
216 set_class => 'set',
217 classes => 'elements',
218 assigned_wits => 'keys',
219 },
220);
221
222around BUILDARGS => sub {
223 my $orig = shift;
224 my $class = shift;
225 my $args = @_ == 1 ? $_[0] : { @_ };
226
227 # Convert the set list into a list of Set::Scalars, ordered first by size and
228 # then alphabetically by first-sorted.
70406673 229 throw( "Must specify a set list to Analysis::Result->new()" )
7e17346f 230 unless ref( $args->{'setlist'} ) eq 'ARRAY';
70406673 231 throw( "Empty set list specified to Analysis::Result->new()" )
03c0a7d7 232 unless @{$args->{'setlist'}};
a42a164c 233 # Order the sets and make sure they are all distinct Set::Scalars.
b42d7113 234 $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) }
235 _check_set_args( $args->{'setlist'} ) ];
0a17afe9 236 if( exists $args->{'groupinglist'} ) {
237 $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ];
238 }
7e17346f 239
c90ef1a3 240 # If we have been passed a Text::Tradition::Stemma or a Graph, save only
241 # its string.
242 if( ref( $args->{'graph'} ) ) {
7e17346f 243 my $st = delete $args->{'graph'};
c90ef1a3 244 my $type = ref( $st );
245 my $gopt = { linesep => ' ' };
246 if( $type eq 'Text::Tradition::Stemma' ) {
247 $args->{'graph'} = $st->editable( $gopt );
248 } elsif( $type eq 'Graph' ) {
249 $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt );
250 } else {
70406673 251 throw( "Passed argument to graph that is neither Stemma nor Graph" );
c90ef1a3 252 }
253 }
74038ae5 254
255 # If our only args are graph and setlist, then status should be 'new'
256 if( scalar keys %$args == 2 ) {
257 $args->{'status'} = 'new';
258 }
c90ef1a3 259
7e17346f 260 return $class->$orig( $args );
261};
262
a42a164c 263sub _check_set_args {
264 my $setlist = shift;
265 my @sets;
266 foreach my $set ( @{$setlist} ) {
267 my $s = $set;
268 # Check uniqueness of the current set
269 if( ref( $set ) ne 'Set::Scalar' ) {
270 $s = Set::Scalar->new( @$set );
70406673 271 throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" )
a42a164c 272 unless @$set == $s->elements;
273 }
274 # Check distinctness of the set from all other sets given so far
275 foreach my $ps ( @sets ) {
70406673 276 throw( "Two sets $s / $ps are not disjoint" )
a42a164c 277 unless $s->is_disjoint( $ps );
278 }
279 # Save the set.
280 push( @sets, $s );
281 }
b42d7113 282 return @sets;
a42a164c 283}
284
7e17346f 285sub BUILD {
286 my $self = shift;
287
288 # Initialize the groupings array
85a74a8d 289 my @sets = $self->sets;
290 foreach my $idx( 0 .. $#sets ) {
291 unless( $self->grouping( $idx ) ) {
6724a135 292 my $g = $sets[$idx]->clone();
293 $self->_set_grouping( $idx, $g );
85a74a8d 294 }
295 }
7e17346f 296}
297
85a74a8d 298before '_set_grouping' => sub {
299 my $self = shift;
300 my $idx = $_[0];
301 my $max = scalar $self->sets;
302 if( $idx >= $max ) {
303 throw( "Set / group index $idx out of range for set_grouping" );
304 }
305};
306
0a17afe9 307=head2 $self->object_key
7e17346f 308
0a17afe9 309Returns a unique key that can be used to look up this graph/set combination in
310a database. Currently an MD5 hash of the request_string.
311
312=cut
1e4b66f4 313
a42a164c 314sub object_key {
315 my $self = shift;
316 return md5_hex( encode_utf8( $self->request_string ) );
317}
318
0a17afe9 319=head2 $self->request_string
320
321A request string is the graph followed by the groups, which should form a unique
322key for the result.
323
324=cut
325
7e17346f 326sub request_string {
327 my $self = shift;
03c0a7d7 328 return $self->graph . '//' . join( ',', $self->sets );
7e17346f 329}
330
0a17afe9 331=head2 by_size_and_alpha
332
333A useful utility function to sort Set::Scalar objects first in descending
334order by size, then in ascending alphabetical order by first element (i.e.
335by stringification.)
336
337=cut
338
7e17346f 339sub by_size_and_alpha {
340 my( $a, $b ) = @_;
341 my $size = $b->members <=> $a->members;
342 return $size if $size;
343 # Then sort by alphabetical order of set elements.
344 return "$a" cmp "$b";
345}
346
0a17afe9 347=head2 $self->sources
348
349Return all 'source' class witnesses in these sets for this graph.
350
351=cut
352
7e17346f 353sub sources {
354 my $self = shift;
355 my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits;
356 return @sources;
357}
358
0a17afe9 359=head2 $self->minimum_grouping_for( $set )
360
361Return the minimum grouping (including necessary hypothetical witnesses) for
362the witness set specified. Will return undef if $set does not match one of
363the defined witness sets in $self->sets.
364
365=cut
366
7e17346f 367# Look for a matching set in our setlist, and return its corresponding group
368sub minimum_grouping_for {
369 my( $self, $set ) = @_;
370 my $midx = $self->set_index( sub { "$set" eq "$_" } );
371 return undef unless defined $midx;
372 return $self->grouping( $midx );
373}
374
0a17afe9 375=head1 CALCULATION STORAGE METHODS
376
377=head2 $self->is_genealogical( $bool )
378
379Record that the sets are genealogical for this graph.
380
381=head2 $self->set_class( $witness, $class )
382
383Record that the witness in question is of the given class.
384
385=head2 $self->record_grouping( $group )
386
387Record that the group in question (either an arrayref or a Set::Scalar) forms
388a minimum grouping on the graph. Will throw an error unless the group is a
389(non-proper) superset of an existing witness set.
390
391=cut
392
393sub record_grouping {
394 my( $self, $group ) = @_;
395 unless( ref( $group ) eq 'Set::Scalar' ) {
396 my $s = Set::Scalar->new( @$group );
397 $group = $s;
398 }
399 # Find the set that is a subset of this group, and record it in the
400 # correct spot in our groupinglist.
401 my $idx = 0;
402 foreach my $set ( $self->sets ) {
403 if( _is_subset( $set, $group ) ) {
404 $self->_set_grouping( $idx, $group );
405 last;
406 }
407 $idx++;
408 }
409 if( $idx == scalar( $self->sets ) ) {
410 throw( "Failed to find witness set that is a subset of $group" );
411 }
412}
413
414sub _is_subset {
415 # A replacement for the stupid Set::Scalar::is_subset
416 my( $set1, $set2 ) = @_;
417 my %all;
418 map { $all{$_} = 1 } $set2->members;
419 foreach my $m ( $set1->members ) {
420 return 0 unless $all{$m};
421 }
422 return 1;
423}
424
a42a164c 425sub TO_JSON {
426 my $self = shift;
0a17afe9 427 # Required values: graph and setlist
a42a164c 428 my $data = {
429 graph => $self->graph,
430 setlist => [],
a42a164c 431 };
a42a164c 432 foreach my $set ( $self->sets ) {
433 push( @{$data->{setlist}}, [ $set->members ] );
434 }
0a17afe9 435 # Scalar values, if they are set
436 $data->{is_genealogical} = 1 if $self->is_genealogical;
437 $data->{status} = $self->status if $self->status;
438
439 # Set values, if they exist
440 $data->{groupinglist} = [] if $self->groupings;
a42a164c 441 foreach my $group ( $self->groupings ) {
442 push( @{$data->{groupinglist}}, [ $group->members ] );
443 }
0a17afe9 444 $data->{classlist} = {} if $self->assigned_wits;
a42a164c 445 foreach my $wit ( $self->assigned_wits ) {
446 $data->{classlist}->{$wit} = $self->class( $wit );
447 }
448 return $data;
449}
450
7e17346f 451sub throw {
452 Text::Tradition::Error->throw(
453 'ident' => 'Analysis::Result error',
454 'message' => $_[0],
455 );
456}
457
458no Moose;
459__PACKAGE__->meta->make_immutable;
460
4611;