Commit | Line | Data |
7e17346f |
1 | package Text::Tradition::Analysis::Result; |
2 | |
3 | use Moose; |
a42a164c |
4 | use Digest::MD5 qw/ md5_hex /; |
5 | use Encode qw/ encode_utf8 /; |
7e17346f |
6 | use JSON qw/ to_json /; |
7 | use Set::Scalar; |
8 | use Text::Tradition::Error; |
9 | |
10 | =head1 NAME |
11 | |
12 | Text::Tradition::Analysis::Result - object to express an IDP calculation result |
13 | for a particular graph problem. |
14 | |
15 | =head1 DESCRIPTION |
16 | |
17 | Given a graph (expressing a stemma hypothesis) and a set of witness groupings |
18 | (expressing variation in reading between witnesses related according to the |
19 | stemma hypothesis), it is possible to calculate certain properties of how the |
20 | readings might be related to each other. This calculation depends on a custom |
21 | program run under the IDP system [TODO URL]. As the problem is NP-hard, the |
22 | calculation can take a long time. The purpose of this object is to allow storage |
23 | of calculated results in a database. |
24 | |
25 | For each graph problem, the following features can be calculated: |
26 | |
27 | =over 4 |
28 | |
29 | =item * Whether the reading groups form a genealogical pattern on the stemma. |
30 | |
31 | =item * The groupings, including lost/hypothetical witnesses if necessary, that minimize the amount of non-genealogical variation on the stemma. |
32 | |
33 | =item * The classes, which for each witness express whether (in a minimally non-genealogical case) the witness is a source of its reading, follows a parent witness, or reverts to an ancestral reading that is not the parent's. |
34 | |
35 | =back |
36 | |
37 | =head1 CONSTRUCTOR |
38 | |
39 | =head2 new |
40 | |
41 | Creates a new graph problem. Requires two properties: |
42 | |
43 | =over 4 |
44 | |
45 | =item * setlist - An array of arrays expressing the witness sets. The inner |
46 | arrays will be converted to Set::Scalar objects, and must have distinct members. |
47 | |
48 | =item * graph - A dot description of a graph (e.g. the output of a call to |
49 | Text::Tradition::Stemma::editable) against which the sets will be analyzed. |
50 | |
51 | =back |
52 | |
53 | =cut |
54 | |
55 | has 'setlist' => ( |
56 | traits => ['Array'], |
57 | isa => 'ArrayRef[Set::Scalar]', |
58 | handles => { |
59 | sets => 'elements', |
60 | set_index => 'first_index', |
61 | }, |
62 | required => 1 |
63 | ); |
64 | |
65 | has 'graph' => ( |
66 | is => 'ro', |
67 | isa => 'Str', |
68 | required => 1 |
69 | ); |
70 | |
74038ae5 |
71 | has 'status' => ( |
72 | is => 'rw', |
73 | isa => 'Str' |
74 | ); |
75 | |
7e17346f |
76 | has 'is_genealogical' => ( |
77 | is => 'rw', |
78 | isa => 'Bool', |
79 | predicate => 'has_genealogical_result' |
80 | ); |
81 | |
82 | has 'groupinglist' => ( |
83 | traits => ['Array'], |
84 | isa => 'ArrayRef[Set::Scalar]', |
85 | handles => { |
86 | groupings => 'elements', |
87 | _add_grouping => 'push', |
88 | _set_grouping => 'set', |
89 | grouping => 'get', |
90 | }, |
91 | default => sub { [] } |
92 | ); |
93 | |
94 | has 'classlist' => ( |
95 | traits => ['Hash'], |
96 | isa => 'HashRef[Str]', |
97 | handles => { |
98 | class => 'get', |
99 | has_class => 'exists', |
100 | set_class => 'set', |
101 | classes => 'elements', |
102 | assigned_wits => 'keys', |
103 | }, |
104 | ); |
105 | |
106 | around BUILDARGS => sub { |
107 | my $orig = shift; |
108 | my $class = shift; |
109 | my $args = @_ == 1 ? $_[0] : { @_ }; |
110 | |
111 | # Convert the set list into a list of Set::Scalars, ordered first by size and |
112 | # then alphabetically by first-sorted. |
113 | die "Must specify a set list to Analysis::Result->new()" |
114 | unless ref( $args->{'setlist'} ) eq 'ARRAY'; |
a42a164c |
115 | # Order the sets and make sure they are all distinct Set::Scalars. |
b42d7113 |
116 | $args->{'setlist'} = [ sort { by_size_and_alpha( $a, $b ) } |
117 | _check_set_args( $args->{'setlist'} ) ]; |
118 | $args->{'groupinglist'} = [ _check_set_args( $args->{'groupinglist'} ) ]; |
7e17346f |
119 | |
c90ef1a3 |
120 | # If we have been passed a Text::Tradition::Stemma or a Graph, save only |
121 | # its string. |
122 | if( ref( $args->{'graph'} ) ) { |
7e17346f |
123 | my $st = delete $args->{'graph'}; |
c90ef1a3 |
124 | my $type = ref( $st ); |
125 | my $gopt = { linesep => ' ' }; |
126 | if( $type eq 'Text::Tradition::Stemma' ) { |
127 | $args->{'graph'} = $st->editable( $gopt ); |
128 | } elsif( $type eq 'Graph' ) { |
129 | $args->{'graph'} = Text::Tradition::Stemma::editable_graph( $st, $gopt ); |
130 | } else { |
131 | die "Passed argument to graph that is neither Stemma nor Graph"; |
132 | } |
133 | } |
74038ae5 |
134 | |
135 | # If our only args are graph and setlist, then status should be 'new' |
136 | if( scalar keys %$args == 2 ) { |
137 | $args->{'status'} = 'new'; |
138 | } |
c90ef1a3 |
139 | |
7e17346f |
140 | return $class->$orig( $args ); |
141 | }; |
142 | |
a42a164c |
143 | sub _check_set_args { |
144 | my $setlist = shift; |
145 | my @sets; |
146 | foreach my $set ( @{$setlist} ) { |
147 | my $s = $set; |
148 | # Check uniqueness of the current set |
149 | if( ref( $set ) ne 'Set::Scalar' ) { |
150 | $s = Set::Scalar->new( @$set ); |
151 | die "Duplicate element(s) in set or group passed to Analysis::Result->new()" |
152 | unless @$set == $s->elements; |
153 | } |
154 | # Check distinctness of the set from all other sets given so far |
155 | foreach my $ps ( @sets ) { |
156 | die "Two sets are not disjoint" |
157 | unless $s->is_disjoint( $ps ); |
158 | } |
159 | # Save the set. |
160 | push( @sets, $s ); |
161 | } |
b42d7113 |
162 | return @sets; |
a42a164c |
163 | } |
164 | |
7e17346f |
165 | sub BUILD { |
166 | my $self = shift; |
167 | |
168 | # Initialize the groupings array |
169 | map { $self->_add_grouping( $_ ) } $self->sets; |
170 | } |
171 | |
172 | sub record_grouping { |
173 | my( $self, $group ) = @_; |
174 | unless( ref( $group ) eq 'Set::Scalar' ) { |
175 | my $s = Set::Scalar->new( @$group ); |
176 | $group = $s; |
177 | } |
178 | # Find the set that is a subset of this group, and record it in the |
179 | # correct spot in our groupinglist. |
180 | my $idx = 0; |
181 | foreach my $set ( $self->sets ) { |
182 | if( $set->is_subset( $group ) ) { |
183 | $self->_set_grouping( $idx, $group ); |
184 | last; |
185 | } |
186 | $idx++; |
187 | } |
188 | if( $idx == scalar( $self->sets ) ) { |
189 | throw( "Failed to find witness set that is a subset of $group" ); |
190 | } |
191 | } |
192 | |
193 | # A request string is the graph followed by the groups, which should form a unique |
194 | # key for the result. |
a42a164c |
195 | sub object_key { |
196 | my $self = shift; |
197 | return md5_hex( encode_utf8( $self->request_string ) ); |
198 | } |
199 | |
7e17346f |
200 | sub request_string { |
201 | my $self = shift; |
202 | return string_from_graph_problem( $self->graph, [ $self->sets ] ); |
203 | } |
204 | |
205 | sub string_from_graph_problem { |
206 | my( $graph, $grouping ) = @_; |
207 | my( $graphstr, @groupsets ); |
208 | # Get the graph string |
209 | if( ref( $graph ) && ref( $graph ) eq 'Graph' ) { |
210 | $graphstr = Text::Tradition::Stemma::editable_graph( $graph, { 'linesep' => ' ' } ); |
211 | } else { |
212 | throw( "Passed non-graph object $graph to stringification" ) |
213 | if ref( $graph ); |
214 | $graphstr = $graph; |
215 | } |
216 | # Make sure all groupings are sets |
217 | foreach my $g ( @$grouping ) { |
218 | if( ref( $g ) eq 'ARRAY' ) { |
219 | push( @groupsets, Set::Scalar->new( @$g ) ); |
220 | } elsif( ref( $g ) eq 'Set::Scalar' ) { |
221 | push( @groupsets, $g ); |
222 | } else { |
223 | throw( "Tried to stringify grouping $g that is neither set nor array" ); |
224 | } |
225 | } |
226 | return $graphstr . '//' . |
227 | join( ',', sort { by_size_and_alpha( $a, $b ) } @groupsets ); |
228 | } |
229 | |
c90ef1a3 |
230 | # This should work as $self->problem_json or as problem_json( @objects ) |
7e17346f |
231 | sub problem_json { |
c90ef1a3 |
232 | my( @objects ) = @_; |
233 | # There should be a distinct problem for each unique graph. |
234 | my %distinct_problems; |
235 | foreach my $o ( @objects ) { |
236 | unless( exists $distinct_problems{$o->graph} ) { |
237 | $distinct_problems{$o->graph} = []; |
238 | } |
239 | my @groupings; |
240 | map { push( @groupings, [ $_->members ] ) } $o->sets; |
241 | push( @{$distinct_problems{$o->graph}}, \@groupings ); |
242 | } |
243 | my @pstrs = map { to_json( |
244 | { graph => $_, groupings => $distinct_problems{$_} } ) } |
245 | keys %distinct_problems; |
246 | return @pstrs; |
7e17346f |
247 | } |
248 | |
249 | sub by_size_and_alpha { |
250 | my( $a, $b ) = @_; |
251 | my $size = $b->members <=> $a->members; |
252 | return $size if $size; |
253 | # Then sort by alphabetical order of set elements. |
254 | return "$a" cmp "$b"; |
255 | } |
256 | |
257 | sub sources { |
258 | my $self = shift; |
259 | my @sources = grep { $self->class( $_ ) eq 'source' } $self->assigned_wits; |
260 | return @sources; |
261 | } |
262 | |
263 | # Look for a matching set in our setlist, and return its corresponding group |
264 | sub minimum_grouping_for { |
265 | my( $self, $set ) = @_; |
266 | my $midx = $self->set_index( sub { "$set" eq "$_" } ); |
267 | return undef unless defined $midx; |
268 | return $self->grouping( $midx ); |
269 | } |
270 | |
a42a164c |
271 | sub TO_JSON { |
272 | my $self = shift; |
273 | # Graph and setlist |
274 | my $data = { |
275 | graph => $self->graph, |
276 | setlist => [], |
277 | groupinglist => [], |
278 | classlist => {} |
279 | }; |
280 | $data->{is_genealogical} = 1 if $self->is_genealogical; |
281 | foreach my $set ( $self->sets ) { |
282 | push( @{$data->{setlist}}, [ $set->members ] ); |
283 | } |
284 | # groupinglist |
285 | foreach my $group ( $self->groupings ) { |
286 | push( @{$data->{groupinglist}}, [ $group->members ] ); |
287 | } |
288 | # classlist |
289 | foreach my $wit ( $self->assigned_wits ) { |
290 | $data->{classlist}->{$wit} = $self->class( $wit ); |
291 | } |
292 | return $data; |
293 | } |
294 | |
7e17346f |
295 | sub throw { |
296 | Text::Tradition::Error->throw( |
297 | 'ident' => 'Analysis::Result error', |
298 | 'message' => $_[0], |
299 | ); |
300 | } |
301 | |
302 | no Moose; |
303 | __PACKAGE__->meta->make_immutable; |
304 | |
305 | 1; |