split off stemma analysis modules from base Tradition layer
[scpubgit/stemmatology.git] / analysis / t / analysis.t
CommitLineData
f00cefe8 1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5use lib 'lib';
6use feature 'say';
7use Test::More;
8use Text::Tradition;
9use Text::Tradition::Analysis qw/ run_analysis /;
10
11binmode STDOUT, ':utf8';
12binmode STDERR, ':utf8';
13eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
14
15my $tradition = Text::Tradition->new(
16 'input' => 'Self',
17 'file' => 't/data/besoin.xml' );
951ddfe8 18$tradition->enable_stemmata;
f00cefe8 19$tradition->add_stemma( 'dotfile' => 't/data/besoin.dot' );
20
21# Run the analysis of the tradition
7e17346f 22## TODO Make proper test db
23my $calcdsn = 'dbi:SQLite:dbname=t/data/analysis.db';
f00cefe8 24
25my %expected = (
e6823346 26 2 => 'type1',
27 3 => 'genealogical',
28 28 => 'genealogical',
29 39 => 'genealogical',
30 62 => 'type1',
31 63 => 'type1',
32 73 => 'reverted',
33 76 => 'genealogical',
34 91 => 'conflict',
35 93 => 'genealogical',
36 94 => 'genealogical',
37 99 => 'reverted',
38 110 => 'type1',
39 117 => 'type1',
40 136 => 'reverted',
41 142 => 'conflict',
42 155 => 'genealogical',
43 170 => 'genealogical',
44 182 => 'type1',
45 205 => 'genealogical',
46 219 => 'genealogical',
47 239 => 'genealogical',
48 244 => 'genealogical',
49 245 => 'type1',
50 251 => 'genealogical',
51 252 => 'genealogical',
52 293 => 'genealogical',
53 295 => 'genealogical',
54 309 => 'genealogical',
55 310 => 'type1',
56 314 => 'type1',
57 315 => 'type1',
58 317 => 'reverted',
59 318 => 'genealogical',
60 319 => 'genealogical',
61 324 => 'type1',
62 328 => 'reverted',
63 334 => 'type1',
64 335 => 'genealogical',
65 350 => 'reverted',
66 361 => 'reverted',
67 367 => 'type1',
68 370 => 'type1',
69 382 => 'reverted',
70 385 => 'reverted',
71 406 => 'genealogical',
72 413 => 'genealogical',
73 417 => 'type1',
74 418 => 'reverted',
75 459 => 'type1',
76 493 => 'genealogical',
77 497 => 'reverted',
78 499 => 'type1',
79 500 => 'reverted',
80 515 => 'reverted',
81 556 => 'type1',
82 558 => 'conflict',
83 597 => 'type1',
84 615 => 'type1',
85 617 => 'type1',
86 632 => 'genealogical',
87 634 => 'genealogical',
88 636 => 'genealogical',
89 685 => 'genealogical',
90 737 => 'genealogical',
91 742 => 'reverted',
92 743 => 'reverted',
93 744 => 'reverted',
94 745 => 'type1',
95 746 => 'type1',
96 747 => 'type1',
97 757 => 'type1',
98 762 => 'type1',
99 763 => 'type1',
100 777 => 'reverted',
101 780 => 'genealogical',
102 802 => 'type1',
103 803 => 'type1',
104 815 => 'type1',
105 837 => 'genealogical',
106 854 => 'type1',
107 855 => 'type1',
108 856 => 'type1',
109 857 => 'type1',
110 858 => 'type1',
111 859 => 'type1',
112 860 => 'type1',
113 861 => 'type1',
114 862 => 'type1',
115 863 => 'type1',
116 864 => 'type1',
117 865 => 'type1',
118 866 => 'type1',
119 867 => 'type1',
120 868 => 'type1',
121 869 => 'type1',
122 870 => 'type1',
123 871 => 'type1',
124 872 => 'type1',
125 873 => 'type1',
126 874 => 'type1',
127 875 => 'type1',
128 876 => 'type1',
129 877 => 'type1',
130 878 => 'type1',
131 879 => 'type1',
132 880 => 'type1',
133 881 => 'type1',
134 882 => 'type1',
135 883 => 'type1',
136 884 => 'type1',
137 885 => 'type1',
138 886 => 'type1',
139 887 => 'type1',
140 888 => 'type1',
141 889 => 'type1',
142 890 => 'type1',
143 891 => 'type1',
144 892 => 'type1',
145 893 => 'type1',
146 894 => 'type1',
147 895 => 'type1',
148 896 => 'type1',
149 897 => 'conflict',
150 898 => 'conflict',
151 899 => 'type1',
152 900 => 'type1',
153 901 => 'type1',
154 902 => 'type1',
155 903 => 'type1',
156 904 => 'type1',
157 905 => 'type1',
158 906 => 'type1',
159 907 => 'type1',
160 915 => 'type1',
161 916 => 'type1',
162 925 => 'genealogical',
163 927 => 'type1',
164 952 => 'genealogical',
165 954 => 'genealogical',
166 969 => 'genealogical',
167 972 => 'genealogical',
168 973 => 'genealogical',
169 974 => 'type1',
170 1003 => 'genealogical',
171 1004 => 'genealogical' # check for transp
f00cefe8 172);
173
e6823346 174my %num_readings;
175
176my @all_variant_ranks = sort { $a <=> $b } keys( %expected );
f00cefe8 177# Look through the results
f00cefe8 178my $c = $tradition->collation;
a745c3d9 179my %analysis_opts;
e6823346 180my $results = run_analysis( $tradition, %analysis_opts );
181my @analyzed;
f00cefe8 182foreach my $row ( @{$results->{'variants'}} ) {
e6823346 183 push( @analyzed, $row->{id} );
184 $num_readings{$row->{id}} = scalar @{$row->{'readings'}};
185 my $type = 'genealogical';
186 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
187 $type = 'conflict';
188 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
189 $type = 'reverted';
190 }
191 my $expected = $expected{$row->{'id'}};
192 $expected = 'genealogical' if $expected eq 'type1';
193 is( $type, $expected, "Got expected genealogical result for rank " . $row->{'id'} );
194 # If the row is genealogical, there should be one reading with no parents,
195 # every reading should independently occur exactly once, and the total
196 # number of changes + maybe-changes should equal the total number of
197 # readings who have that one as a parent.
a745c3d9 198 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
199 next if exists $row->{'unsolved'};
e6823346 200 if( $row->{'genealogical'} ) {
201 # Make the mapping of parent -> child readings
202 my %is_parent;
203 my @has_no_parent;
f00cefe8 204 foreach my $rdg ( @{$row->{'readings'}} ) {
e6823346 205 my $parents = $rdg->{'source_parents'} || {};
206 foreach my $p ( keys %$parents ) {
207 push( @{$is_parent{$p}}, $rdg->{'readingid'} );
f00cefe8 208 }
e6823346 209 push( @has_no_parent, $rdg->{'readingid'} ) unless keys %$parents;
f00cefe8 210 }
e6823346 211 # Test some stuff
212 foreach my $rdg ( @{$row->{'readings'}} ) {
213 is( @{$rdg->{'independent_occurrence'}}, 1,
214 "Genealogical reading originates exactly once" );
f00cefe8 215 }
e6823346 216 is( @has_no_parent, 1, "Only one genealogical reading lacks a parent" );
f00cefe8 217 }
218}
e6823346 219# Check that run_analysis ran an analysis on all our known variant ranks
220is_deeply( \@all_variant_ranks, \@analyzed, "Ran analysis for all expected rows" );
221
222# Now run it again, excluding type 1 variants.
223map { delete $expected{$_} if $expected{$_} eq 'type1' } keys %expected;
224my @useful_variant_ranks = sort { $a <=> $b } keys( %expected );
225$analysis_opts{'exclude_type1'} = 1;
226@analyzed = ();
227$results = run_analysis( $tradition, %analysis_opts );
228foreach my $row ( @{$results->{'variants'}} ) {
a745c3d9 229 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
230 next if exists $row->{'unsolved'};
e6823346 231 push( @analyzed, $row->{id} );
232 my $type = 'genealogical';
233 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
234 $type = 'conflict';
235 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
236 $type = 'reverted';
237 }
238 is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on exclude_type1 run for rank " . $row->{'id'} );
239}
240is_deeply( \@analyzed, \@useful_variant_ranks, "Ran analysis for all useful rows" );
241
242# Now run it again, excluding orthography / spelling.
243my @merged_exclude = qw/ 76 136 142 155 293 317 319 335 350 361 413 500 515 636 685
244 737 954 1003 /;
245# 205 now conflicts; it and 493 should also have one fewer reading
246$expected{205} = 'conflict';
247$num_readings{205}--;
248$num_readings{493}--;
249map { delete $expected{$_} } @merged_exclude;
250my @merged_remaining = sort { $a <=> $b } keys( %expected );
251$analysis_opts{'merge_types'} = [ qw/ orthographic spelling / ];
252@analyzed = ();
253$results = run_analysis( $tradition, %analysis_opts );
254foreach my $row ( @{$results->{'variants'}} ) {
255 push( @analyzed, $row->{id} );
a745c3d9 256 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
257 next if exists $row->{'unsolved'};
e6823346 258 my $type = 'genealogical';
259 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
260 $type = 'conflict';
261 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
262 $type = 'reverted';
263 }
264 is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on merge_types run for rank " . $row->{'id'} );
265 is( scalar @{$row->{'readings'}}, $num_readings{$row->{id}}, "Got expected number of readings during merge" );
266}
267is_deeply( \@analyzed, \@merged_remaining, "Ran analysis for all useful unmerged rows" );
268
269done_testing();