make Analysis work with new async setup
[scpubgit/stemmatology.git] / t / analysis.t
CommitLineData
f00cefe8 1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5use lib 'lib';
6use feature 'say';
7use Test::More;
8use Text::Tradition;
9use Text::Tradition::Analysis qw/ run_analysis /;
10
11binmode STDOUT, ':utf8';
12binmode STDERR, ':utf8';
13eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
14
15my $tradition = Text::Tradition->new(
16 'input' => 'Self',
17 'file' => 't/data/besoin.xml' );
18$tradition->add_stemma( 'dotfile' => 't/data/besoin.dot' );
19
20# Run the analysis of the tradition
7e17346f 21## TODO Make proper test db
22my $calcdsn = 'dbi:SQLite:dbname=t/data/analysis.db';
f00cefe8 23
24my %expected = (
e6823346 25 2 => 'type1',
26 3 => 'genealogical',
27 28 => 'genealogical',
28 39 => 'genealogical',
29 62 => 'type1',
30 63 => 'type1',
31 73 => 'reverted',
32 76 => 'genealogical',
33 91 => 'conflict',
34 93 => 'genealogical',
35 94 => 'genealogical',
36 99 => 'reverted',
37 110 => 'type1',
38 117 => 'type1',
39 136 => 'reverted',
40 142 => 'conflict',
41 155 => 'genealogical',
42 170 => 'genealogical',
43 182 => 'type1',
44 205 => 'genealogical',
45 219 => 'genealogical',
46 239 => 'genealogical',
47 244 => 'genealogical',
48 245 => 'type1',
49 251 => 'genealogical',
50 252 => 'genealogical',
51 293 => 'genealogical',
52 295 => 'genealogical',
53 309 => 'genealogical',
54 310 => 'type1',
55 314 => 'type1',
56 315 => 'type1',
57 317 => 'reverted',
58 318 => 'genealogical',
59 319 => 'genealogical',
60 324 => 'type1',
61 328 => 'reverted',
62 334 => 'type1',
63 335 => 'genealogical',
64 350 => 'reverted',
65 361 => 'reverted',
66 367 => 'type1',
67 370 => 'type1',
68 382 => 'reverted',
69 385 => 'reverted',
70 406 => 'genealogical',
71 413 => 'genealogical',
72 417 => 'type1',
73 418 => 'reverted',
74 459 => 'type1',
75 493 => 'genealogical',
76 497 => 'reverted',
77 499 => 'type1',
78 500 => 'reverted',
79 515 => 'reverted',
80 556 => 'type1',
81 558 => 'conflict',
82 597 => 'type1',
83 615 => 'type1',
84 617 => 'type1',
85 632 => 'genealogical',
86 634 => 'genealogical',
87 636 => 'genealogical',
88 685 => 'genealogical',
89 737 => 'genealogical',
90 742 => 'reverted',
91 743 => 'reverted',
92 744 => 'reverted',
93 745 => 'type1',
94 746 => 'type1',
95 747 => 'type1',
96 757 => 'type1',
97 762 => 'type1',
98 763 => 'type1',
99 777 => 'reverted',
100 780 => 'genealogical',
101 802 => 'type1',
102 803 => 'type1',
103 815 => 'type1',
104 837 => 'genealogical',
105 854 => 'type1',
106 855 => 'type1',
107 856 => 'type1',
108 857 => 'type1',
109 858 => 'type1',
110 859 => 'type1',
111 860 => 'type1',
112 861 => 'type1',
113 862 => 'type1',
114 863 => 'type1',
115 864 => 'type1',
116 865 => 'type1',
117 866 => 'type1',
118 867 => 'type1',
119 868 => 'type1',
120 869 => 'type1',
121 870 => 'type1',
122 871 => 'type1',
123 872 => 'type1',
124 873 => 'type1',
125 874 => 'type1',
126 875 => 'type1',
127 876 => 'type1',
128 877 => 'type1',
129 878 => 'type1',
130 879 => 'type1',
131 880 => 'type1',
132 881 => 'type1',
133 882 => 'type1',
134 883 => 'type1',
135 884 => 'type1',
136 885 => 'type1',
137 886 => 'type1',
138 887 => 'type1',
139 888 => 'type1',
140 889 => 'type1',
141 890 => 'type1',
142 891 => 'type1',
143 892 => 'type1',
144 893 => 'type1',
145 894 => 'type1',
146 895 => 'type1',
147 896 => 'type1',
148 897 => 'conflict',
149 898 => 'conflict',
150 899 => 'type1',
151 900 => 'type1',
152 901 => 'type1',
153 902 => 'type1',
154 903 => 'type1',
155 904 => 'type1',
156 905 => 'type1',
157 906 => 'type1',
158 907 => 'type1',
159 915 => 'type1',
160 916 => 'type1',
161 925 => 'genealogical',
162 927 => 'type1',
163 952 => 'genealogical',
164 954 => 'genealogical',
165 969 => 'genealogical',
166 972 => 'genealogical',
167 973 => 'genealogical',
168 974 => 'type1',
169 1003 => 'genealogical',
170 1004 => 'genealogical' # check for transp
f00cefe8 171);
172
e6823346 173my %num_readings;
174
175my @all_variant_ranks = sort { $a <=> $b } keys( %expected );
f00cefe8 176# Look through the results
f00cefe8 177my $c = $tradition->collation;
a745c3d9 178my %analysis_opts;
e6823346 179my $results = run_analysis( $tradition, %analysis_opts );
180my @analyzed;
f00cefe8 181foreach my $row ( @{$results->{'variants'}} ) {
e6823346 182 push( @analyzed, $row->{id} );
183 $num_readings{$row->{id}} = scalar @{$row->{'readings'}};
184 my $type = 'genealogical';
185 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
186 $type = 'conflict';
187 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
188 $type = 'reverted';
189 }
190 my $expected = $expected{$row->{'id'}};
191 $expected = 'genealogical' if $expected eq 'type1';
192 is( $type, $expected, "Got expected genealogical result for rank " . $row->{'id'} );
193 # If the row is genealogical, there should be one reading with no parents,
194 # every reading should independently occur exactly once, and the total
195 # number of changes + maybe-changes should equal the total number of
196 # readings who have that one as a parent.
a745c3d9 197 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
198 next if exists $row->{'unsolved'};
e6823346 199 if( $row->{'genealogical'} ) {
200 # Make the mapping of parent -> child readings
201 my %is_parent;
202 my @has_no_parent;
f00cefe8 203 foreach my $rdg ( @{$row->{'readings'}} ) {
e6823346 204 my $parents = $rdg->{'source_parents'} || {};
205 foreach my $p ( keys %$parents ) {
206 push( @{$is_parent{$p}}, $rdg->{'readingid'} );
f00cefe8 207 }
e6823346 208 push( @has_no_parent, $rdg->{'readingid'} ) unless keys %$parents;
f00cefe8 209 }
e6823346 210 # Test some stuff
211 foreach my $rdg ( @{$row->{'readings'}} ) {
212 is( @{$rdg->{'independent_occurrence'}}, 1,
213 "Genealogical reading originates exactly once" );
f00cefe8 214 }
e6823346 215 is( @has_no_parent, 1, "Only one genealogical reading lacks a parent" );
f00cefe8 216 }
217}
e6823346 218# Check that run_analysis ran an analysis on all our known variant ranks
219is_deeply( \@all_variant_ranks, \@analyzed, "Ran analysis for all expected rows" );
220
221# Now run it again, excluding type 1 variants.
222map { delete $expected{$_} if $expected{$_} eq 'type1' } keys %expected;
223my @useful_variant_ranks = sort { $a <=> $b } keys( %expected );
224$analysis_opts{'exclude_type1'} = 1;
225@analyzed = ();
226$results = run_analysis( $tradition, %analysis_opts );
227foreach my $row ( @{$results->{'variants'}} ) {
a745c3d9 228 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
229 next if exists $row->{'unsolved'};
e6823346 230 push( @analyzed, $row->{id} );
231 my $type = 'genealogical';
232 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
233 $type = 'conflict';
234 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
235 $type = 'reverted';
236 }
237 is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on exclude_type1 run for rank " . $row->{'id'} );
238}
239is_deeply( \@analyzed, \@useful_variant_ranks, "Ran analysis for all useful rows" );
240
241# Now run it again, excluding orthography / spelling.
242my @merged_exclude = qw/ 76 136 142 155 293 317 319 335 350 361 413 500 515 636 685
243 737 954 1003 /;
244# 205 now conflicts; it and 493 should also have one fewer reading
245$expected{205} = 'conflict';
246$num_readings{205}--;
247$num_readings{493}--;
248map { delete $expected{$_} } @merged_exclude;
249my @merged_remaining = sort { $a <=> $b } keys( %expected );
250$analysis_opts{'merge_types'} = [ qw/ orthographic spelling / ];
251@analyzed = ();
252$results = run_analysis( $tradition, %analysis_opts );
253foreach my $row ( @{$results->{'variants'}} ) {
254 push( @analyzed, $row->{id} );
a745c3d9 255 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
256 next if exists $row->{'unsolved'};
e6823346 257 my $type = 'genealogical';
258 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
259 $type = 'conflict';
260 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
261 $type = 'reverted';
262 }
263 is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on merge_types run for rank " . $row->{'id'} );
264 is( scalar @{$row->{'readings'}}, $num_readings{$row->{id}}, "Got expected number of readings during merge" );
265}
266is_deeply( \@analyzed, \@merged_remaining, "Ran analysis for all useful unmerged rows" );
267
268done_testing();