cosmetic fixes to error messages
[scpubgit/stemmatology.git] / lib / Text / Tradition / Analysis / Result.pm
CommitLineData
7e17346f 1package Text::Tradition::Analysis::Result;
2
3use Moose;
a42a164c 4use Digest::MD5 qw/ md5_hex /;
5use Encode qw/ encode_utf8 /;
7e17346f 6use JSON qw/ to_json /;
7use Set::Scalar;
8use Text::Tradition::Error;
9
10=head1 NAME
11
12Text::Tradition::Analysis::Result - object to express an IDP calculation result
13for a particular graph problem.
14
15=head1 DESCRIPTION
16
17Given a graph (expressing a stemma hypothesis) and a set of witness groupings
18(expressing variation in reading between witnesses related according to the
19stemma hypothesis), it is possible to calculate certain properties of how the
20readings might be related to each other. This calculation depends on a custom
21program run under the IDP system [TODO URL]. As the problem is NP-hard, the
22calculation can take a long time. The purpose of this object is to allow storage
23of calculated results in a database.
24
25For each graph problem, the following features can be calculated:
26
27=over 4
28
29=item * Whether the reading groups form a genealogical pattern on the stemma.
30
31=item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma.
32
33=item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's.
34
35=back
36
37=head1 CONSTRUCTOR
38
39=head2 new
40
41Creates a new graph problem. Requires two properties:
42
43=over 4
44
45=item * setlist - An array of arrays expressing the witness sets. The inner
46arrays will be converted to Set::Scalar objects, and must have distinct members.
47
48=item * graph - A dot description of a graph (e.g. the output of a call to
49Text::Tradition::Stemma::editable) against which the sets will be analyzed.
50
51=back
52
0a17afe9 53All other properties should be calculated by IDP rather than set manually.
54These include:
55
56=over 4
57
58=item * is_genealogical - Boolean, indicating whether the witness sets form
59genealogical groupings on the graph.
60
61=item * status - String to indicate whether a solution has been calculated
62for this analysis problem. Recognized values are "OK" (calculated) and
63"running" (being calculated now). All other values, or no value, imply that
64the calculation has yet to take place.
65
66=item * groupings - These are extended (if necessary) versions of the witness
67sets, which include the hypothetical witnesses necessary to minimize coincidence
68of variation.
69
70=item * classes - These are key/value pairs, keyed by witness, indicating for
71each witness whether it is the source of a reading variant, whether it represents
72a reversion to an ancestor (but not parent) reading, or whether its reading
73follows that of a parent on the graph.
74
75=back
76
77=begin testing
78
85a74a8d 79use Set::Scalar;
0a17afe9 80use Test::More::UTF8;
81use Text::Tradition;
82use TryCatch;
83use_ok( 'Text::Tradition::Analysis::Result' );
84
85# Make a problem with a graph and a set of groupings
86
87my $datafile = 't/data/florilegium_tei_ps.xml';
88my $tradition = Text::Tradition->new( 'input' => 'TEI',
89 'name' => 'flortest',
90 'file' => $datafile );
91my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
92
93my $sets = [ [ qw/ D Q / ], [ qw/ F H / ], [ qw/ A B C P S T / ] ];
94my $extant = {};
95foreach my $set ( @$sets ) {
96 map { $extant->{$_} = 1 } @$set;
97}
98my $sitgraph = $s->editable( { extant => $extant } );
99my $result = Text::Tradition::Analysis::Result->new(
100 graph => $sitgraph,
101 setlist => $sets );
102is( ref( $result ), 'Text::Tradition::Analysis::Result', "Got a Result object" );
103is( $result->graph, $sitgraph, "Got identical graph string back" );
104is( $result->status, "new", "Calculation status of result set correctly" );
105my @rsets = $result->sets;
106is( $rsets[0], '(A B C P S T)', "First set is biggest set" );
107is( $rsets[1], '(D Q)', "Second set is by alphabetical order" );
108is( $rsets[2], '(F H)', "Second set is by alphabetical order" );
109
110# Add some calculation values
111$result->is_genealogical( 1 );
112$result->record_grouping( [ qw/ 4 5 D Q / ] );
113try {
114 $result->record_grouping( [ qw/ 3 4 D H / ] );
115 ok( 0, "Recorded a grouping that does not match the input sets" );
116} catch ( Text::Tradition::Error $e ) {
117 like( $e->message, qr/Failed to find witness set that is a subset of/,
118 "Correct error thrown on bad record_grouping attempt" );
119}
85a74a8d 120# Test manually setting an out-of-range group
121try {
122 $result->_set_grouping( 3, Set::Scalar->new( qw/ X Y / ) );
123 ok( 0, "Set a grouping at an invalid index" );
124} catch ( Text::Tradition::Error $e ) {
125 is( $e->message, 'Set / group index 3 out of range for set_grouping',
126 "Caught attempt to set grouping at invalid index" );
127}
0a17afe9 128$result->record_grouping( [ qw/ 3 F H / ] );
129my $gp1 = $result->grouping(1);
130is( $result->minimum_grouping_for( $rsets[1] ), $gp1,
131 "Found a minimum grouping for D Q" );
132is( "$gp1", "(4 5 D Q)", "Retrieved minimum grouping is correct" );
133is( $result->minimum_grouping_for( $rsets[0] ), $rsets[0],
134 "Default minimum grouping found for biggest group" );
135$result->record_grouping( [ qw/ 1 α δ A B C P S T / ] );
136my %classes = (
137 α => 'source',
138 3 => 'source',
139 4 => 'source' );
140foreach my $gp ( $result->groupings ) {
141 map { my $c = $classes{$_} || 'copy'; $result->set_class( $_, $c ) } @$gp;
142}
143foreach my $gp ( $result->groupings ) {
144 foreach my $wit ( @$gp ) {
145 my $expected = $classes{$wit} || 'copy';
146 is( $result->class( $wit ), $expected, "Got expected witness class for $wit" );
147 }
148}
149
150# Now write it out to JSON
151my $struct = $result->TO_JSON;
152my $newresult = Text::Tradition::Analysis::Result->new( $struct );
153is( $result->object_key, $newresult->object_key,
154 "Object key stayed constant on export/import" );
155my $problem = Text::Tradition::Analysis::Result->new( graph => $sitgraph, setlist => $sets );
156is( $problem->object_key, $result->object_key,
157 "Object key stayed constant for newly created problem" );
158
159
160=end testing
161
162=head1 METHODS
163
164=head2 $self->has_class( $witness )
165=head2 $self->class( $witness )
166
167If a class has been calculated for the given witness, has_class returns true
168and class returns the calculated answer.
169
7e17346f 170=cut
171
172has 'setlist' => (
173 traits => ['Array'],
174 isa => 'ArrayRef[Set::Scalar]',
175 handles => {
176 sets => 'elements',
177 set_index => 'first_index',
178 },
179 required => 1
180);
181
182has 'graph' => (
183 is => 'ro',
184 isa => 'Str',
185 required => 1
186);
187
74038ae5 188has 'status' => (
189 is => 'rw',
190 isa => 'Str'
191);
192
7e17346f 193has 'is_genealogical' => (
194 is => 'rw',
195 isa => 'Bool',
196 predicate => 'has_genealogical_result'
197);
198
199has 'groupinglist' => (
200 traits => ['Array'],
201 isa => 'ArrayRef[Set::Scalar]',
202 handles => {
203 groupings => 'elements',
7e17346f 204 _set_grouping => 'set',
205 grouping => 'get',
206 },
207 default => sub { [] }
208);
209
210has 'classlist' => (
211 traits => ['Hash'],
212 isa => 'HashRef[Str]',
213 handles => {
214 class => 'get',
215 has_class => 'exists',
216 set_class => 'set',
217 classes => 'elements',
218 assigned_wits => 'keys',
219 },
220);
221
222around BUILDARGS => sub {
223 my $orig = shift;
224 my $class = shift;
225 my $args = @_ == 1 ? $_[0] : { @_ };
226
227 # Convert the set list into a list of Set::Scalars, ordered first by size and
228 # then alphabetically by first-sorted.
70406673 229 throw( "Must specify a set list to Analysis::Result->new()" )
7e17346f 230 unless ref( $args->{'setlist'} ) eq 'ARRAY';
70406673 231 throw( "Empty set list specified to Analysis::Result->new()" )
03c0a7d7 232 unless @{$args->{'setlist'}};
a42a164c 233 # Order the sets and make sure they are all distinct Set::Scalars.
b42d7113 234 $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) }
235 _check_set_args( $args->{'setlist'} ) ];
0a17afe9 236 if( exists $args->{'groupinglist'} ) {
237 $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ];
238 }
7e17346f 239
c90ef1a3 240 # If we have been passed a Text::Tradition::Stemma or a Graph, save only
241 # its string.
242 if( ref( $args->{'graph'} ) ) {
7e17346f 243 my $st = delete $args->{'graph'};
c90ef1a3 244 my $type = ref( $st );
245 my $gopt = { linesep => ' ' };
246 if( $type eq 'Text::Tradition::Stemma' ) {
247 $args->{'graph'} = $st->editable( $gopt );
248 } elsif( $type eq 'Graph' ) {
249 $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt );
250 } else {
70406673 251 throw( "Passed argument to graph that is neither Stemma nor Graph" );
c90ef1a3 252 }
253 }
74038ae5 254
255 # If our only args are graph and setlist, then status should be 'new'
256 if( scalar keys %$args == 2 ) {
257 $args->{'status'} = 'new';
258 }
c90ef1a3 259
7e17346f 260 return $class->$orig( $args );
261};
262
a42a164c 263sub _check_set_args {
264 my $setlist = shift;
265 my @sets;
266 foreach my $set ( @{$setlist} ) {
267 my $s = $set;
268 # Check uniqueness of the current set
269 if( ref( $set ) ne 'Set::Scalar' ) {
270 $s = Set::Scalar->new( @$set );
70406673 271 throw( "Duplicate element(s) in set or group passed to Analysis::Result->new()" )
a42a164c 272 unless @$set == $s->elements;
273 }
274 # Check distinctness of the set from all other sets given so far
275 foreach my $ps ( @sets ) {
70406673 276 throw( "Two sets $s / $ps are not disjoint" )
a42a164c 277 unless $s->is_disjoint( $ps );
278 }
279 # Save the set.
280 push( @sets, $s );
281 }
b42d7113 282 return @sets;
a42a164c 283}
284
7e17346f 285sub BUILD {
286 my $self = shift;
287
288 # Initialize the groupings array
85a74a8d 289 my @sets = $self->sets;
290 foreach my $idx( 0 .. $#sets ) {
291 unless( $self->grouping( $idx ) ) {
292 $self->_set_grouping( $idx, $sets[$idx] );
293 }
294 }
7e17346f 295}
296
85a74a8d 297before '_set_grouping' => sub {
298 my $self = shift;
299 my $idx = $_[0];
300 my $max = scalar $self->sets;
301 if( $idx >= $max ) {
302 throw( "Set / group index $idx out of range for set_grouping" );
303 }
304};
305
0a17afe9 306=head2 $self->object_key
7e17346f 307
0a17afe9 308Returns a unique key that can be used to look up this graph/set combination in
309a database. Currently an MD5 hash of the request_string.
310
311=cut
1e4b66f4 312
a42a164c 313sub object_key {
314 my $self = shift;
315 return md5_hex( encode_utf8( $self->request_string ) );
316}
317
0a17afe9 318=head2 $self->request_string
319
320A request string is the graph followed by the groups, which should form a unique
321key for the result.
322
323=cut
324
7e17346f 325sub request_string {
326 my $self = shift;
03c0a7d7 327 return $self->graph . '//' . join( ',', $self->sets );
7e17346f 328}
329
0a17afe9 330=head2 by_size_and_alpha
331
332A useful utility function to sort Set::Scalar objects first in descending
333order by size, then in ascending alphabetical order by first element (i.e.
334by stringification.)
335
336=cut
337
7e17346f 338sub by_size_and_alpha {
339 my( $a, $b ) = @_;
340 my $size = $b->members <=> $a->members;
341 return $size if $size;
342 # Then sort by alphabetical order of set elements.
343 return "$a" cmp "$b";
344}
345
0a17afe9 346=head2 $self->sources
347
348Return all 'source' class witnesses in these sets for this graph.
349
350=cut
351
7e17346f 352sub sources {
353 my $self = shift;
354 my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits;
355 return @sources;
356}
357
0a17afe9 358=head2 $self->minimum_grouping_for( $set )
359
360Return the minimum grouping (including necessary hypothetical witnesses) for
361the witness set specified. Will return undef if $set does not match one of
362the defined witness sets in $self->sets.
363
364=cut
365
7e17346f 366# Look for a matching set in our setlist, and return its corresponding group
367sub minimum_grouping_for {
368 my( $self, $set ) = @_;
369 my $midx = $self->set_index( sub { "$set" eq "$_" } );
370 return undef unless defined $midx;
371 return $self->grouping( $midx );
372}
373
0a17afe9 374=head1 CALCULATION STORAGE METHODS
375
376=head2 $self->is_genealogical( $bool )
377
378Record that the sets are genealogical for this graph.
379
380=head2 $self->set_class( $witness, $class )
381
382Record that the witness in question is of the given class.
383
384=head2 $self->record_grouping( $group )
385
386Record that the group in question (either an arrayref or a Set::Scalar) forms
387a minimum grouping on the graph. Will throw an error unless the group is a
388(non-proper) superset of an existing witness set.
389
390=cut
391
392sub record_grouping {
393 my( $self, $group ) = @_;
394 unless( ref( $group ) eq 'Set::Scalar' ) {
395 my $s = Set::Scalar->new( @$group );
396 $group = $s;
397 }
398 # Find the set that is a subset of this group, and record it in the
399 # correct spot in our groupinglist.
400 my $idx = 0;
401 foreach my $set ( $self->sets ) {
402 if( _is_subset( $set, $group ) ) {
403 $self->_set_grouping( $idx, $group );
404 last;
405 }
406 $idx++;
407 }
408 if( $idx == scalar( $self->sets ) ) {
409 throw( "Failed to find witness set that is a subset of $group" );
410 }
411}
412
413sub _is_subset {
414 # A replacement for the stupid Set::Scalar::is_subset
415 my( $set1, $set2 ) = @_;
416 my %all;
417 map { $all{$_} = 1 } $set2->members;
418 foreach my $m ( $set1->members ) {
419 return 0 unless $all{$m};
420 }
421 return 1;
422}
423
a42a164c 424sub TO_JSON {
425 my $self = shift;
0a17afe9 426 # Required values: graph and setlist
a42a164c 427 my $data = {
428 graph => $self->graph,
429 setlist => [],
a42a164c 430 };
a42a164c 431 foreach my $set ( $self->sets ) {
432 push( @{$data->{setlist}}, [ $set->members ] );
433 }
0a17afe9 434 # Scalar values, if they are set
435 $data->{is_genealogical} = 1 if $self->is_genealogical;
436 $data->{status} = $self->status if $self->status;
437
438 # Set values, if they exist
439 $data->{groupinglist} = [] if $self->groupings;
a42a164c 440 foreach my $group ( $self->groupings ) {
441 push( @{$data->{groupinglist}}, [ $group->members ] );
442 }
0a17afe9 443 $data->{classlist} = {} if $self->assigned_wits;
a42a164c 444 foreach my $wit ( $self->assigned_wits ) {
445 $data->{classlist}->{$wit} = $self->class( $wit );
446 }
447 return $data;
448}
449
7e17346f 450sub throw {
451 Text::Tradition::Error->throw(
452 'ident' => 'Analysis::Result error',
453 'message' => $_[0],
454 );
455}
456
457no Moose;
458__PACKAGE__->meta->make_immutable;
459
4601;