Rework Stemma / StemmaUtil so that utility functions are all in the latter. Fixes #14
[scpubgit/stemmatology.git] / analysis / t / analysis.t
CommitLineData
f00cefe8 1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5use lib 'lib';
6use feature 'say';
7use Test::More;
8use Text::Tradition;
9use Text::Tradition::Analysis qw/ run_analysis /;
10
11binmode STDOUT, ':utf8';
12binmode STDERR, ':utf8';
13eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
14
15my $tradition = Text::Tradition->new(
16 'input' => 'Self',
17 'file' => 't/data/besoin.xml' );
18$tradition->add_stemma( 'dotfile' => 't/data/besoin.dot' );
19
20# Run the analysis of the tradition
f00cefe8 21
22my %expected = (
e6823346 23 2 => 'type1',
24 3 => 'genealogical',
25 28 => 'genealogical',
26 39 => 'genealogical',
27 62 => 'type1',
28 63 => 'type1',
29 73 => 'reverted',
30 76 => 'genealogical',
31 91 => 'conflict',
32 93 => 'genealogical',
33 94 => 'genealogical',
34 99 => 'reverted',
35 110 => 'type1',
36 117 => 'type1',
37 136 => 'reverted',
38 142 => 'conflict',
39 155 => 'genealogical',
40 170 => 'genealogical',
41 182 => 'type1',
42 205 => 'genealogical',
43 219 => 'genealogical',
44 239 => 'genealogical',
45 244 => 'genealogical',
46 245 => 'type1',
47 251 => 'genealogical',
48 252 => 'genealogical',
49 293 => 'genealogical',
50 295 => 'genealogical',
51 309 => 'genealogical',
52 310 => 'type1',
53 314 => 'type1',
54 315 => 'type1',
55 317 => 'reverted',
56 318 => 'genealogical',
57 319 => 'genealogical',
58 324 => 'type1',
59 328 => 'reverted',
60 334 => 'type1',
61 335 => 'genealogical',
62 350 => 'reverted',
63 361 => 'reverted',
64 367 => 'type1',
65 370 => 'type1',
66 382 => 'reverted',
67 385 => 'reverted',
68 406 => 'genealogical',
69 413 => 'genealogical',
70 417 => 'type1',
71 418 => 'reverted',
72 459 => 'type1',
73 493 => 'genealogical',
74 497 => 'reverted',
75 499 => 'type1',
76 500 => 'reverted',
77 515 => 'reverted',
78 556 => 'type1',
79 558 => 'conflict',
80 597 => 'type1',
81 615 => 'type1',
82 617 => 'type1',
83 632 => 'genealogical',
84 634 => 'genealogical',
85 636 => 'genealogical',
86 685 => 'genealogical',
87 737 => 'genealogical',
88 742 => 'reverted',
89 743 => 'reverted',
90 744 => 'reverted',
91 745 => 'type1',
92 746 => 'type1',
93 747 => 'type1',
94 757 => 'type1',
95 762 => 'type1',
96 763 => 'type1',
97 777 => 'reverted',
98 780 => 'genealogical',
99 802 => 'type1',
100 803 => 'type1',
101 815 => 'type1',
102 837 => 'genealogical',
103 854 => 'type1',
104 855 => 'type1',
105 856 => 'type1',
106 857 => 'type1',
107 858 => 'type1',
108 859 => 'type1',
109 860 => 'type1',
110 861 => 'type1',
111 862 => 'type1',
112 863 => 'type1',
113 864 => 'type1',
114 865 => 'type1',
115 866 => 'type1',
116 867 => 'type1',
117 868 => 'type1',
118 869 => 'type1',
119 870 => 'type1',
120 871 => 'type1',
121 872 => 'type1',
122 873 => 'type1',
123 874 => 'type1',
124 875 => 'type1',
125 876 => 'type1',
126 877 => 'type1',
127 878 => 'type1',
128 879 => 'type1',
129 880 => 'type1',
130 881 => 'type1',
131 882 => 'type1',
132 883 => 'type1',
133 884 => 'type1',
134 885 => 'type1',
135 886 => 'type1',
136 887 => 'type1',
137 888 => 'type1',
138 889 => 'type1',
139 890 => 'type1',
140 891 => 'type1',
141 892 => 'type1',
142 893 => 'type1',
143 894 => 'type1',
144 895 => 'type1',
145 896 => 'type1',
146 897 => 'conflict',
147 898 => 'conflict',
148 899 => 'type1',
149 900 => 'type1',
150 901 => 'type1',
151 902 => 'type1',
152 903 => 'type1',
153 904 => 'type1',
154 905 => 'type1',
155 906 => 'type1',
156 907 => 'type1',
157 915 => 'type1',
158 916 => 'type1',
159 925 => 'genealogical',
160 927 => 'type1',
161 952 => 'genealogical',
162 954 => 'genealogical',
163 969 => 'genealogical',
164 972 => 'genealogical',
165 973 => 'genealogical',
166 974 => 'type1',
167 1003 => 'genealogical',
168 1004 => 'genealogical' # check for transp
f00cefe8 169);
170
e6823346 171my %num_readings;
172
173my @all_variant_ranks = sort { $a <=> $b } keys( %expected );
f00cefe8 174# Look through the results
f00cefe8 175my $c = $tradition->collation;
a745c3d9 176my %analysis_opts;
e6823346 177my $results = run_analysis( $tradition, %analysis_opts );
178my @analyzed;
f00cefe8 179foreach my $row ( @{$results->{'variants'}} ) {
e6823346 180 push( @analyzed, $row->{id} );
181 $num_readings{$row->{id}} = scalar @{$row->{'readings'}};
182 my $type = 'genealogical';
183 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
184 $type = 'conflict';
185 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
186 $type = 'reverted';
187 }
188 my $expected = $expected{$row->{'id'}};
189 $expected = 'genealogical' if $expected eq 'type1';
190 is( $type, $expected, "Got expected genealogical result for rank " . $row->{'id'} );
191 # If the row is genealogical, there should be one reading with no parents,
192 # every reading should independently occur exactly once, and the total
193 # number of changes + maybe-changes should equal the total number of
194 # readings who have that one as a parent.
a745c3d9 195 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
196 next if exists $row->{'unsolved'};
e6823346 197 if( $row->{'genealogical'} ) {
198 # Make the mapping of parent -> child readings
199 my %is_parent;
200 my @has_no_parent;
f00cefe8 201 foreach my $rdg ( @{$row->{'readings'}} ) {
e6823346 202 my $parents = $rdg->{'source_parents'} || {};
203 foreach my $p ( keys %$parents ) {
204 push( @{$is_parent{$p}}, $rdg->{'readingid'} );
f00cefe8 205 }
e6823346 206 push( @has_no_parent, $rdg->{'readingid'} ) unless keys %$parents;
f00cefe8 207 }
e6823346 208 # Test some stuff
209 foreach my $rdg ( @{$row->{'readings'}} ) {
210 is( @{$rdg->{'independent_occurrence'}}, 1,
211 "Genealogical reading originates exactly once" );
f00cefe8 212 }
e6823346 213 is( @has_no_parent, 1, "Only one genealogical reading lacks a parent" );
f00cefe8 214 }
215}
e6823346 216# Check that run_analysis ran an analysis on all our known variant ranks
217is_deeply( \@all_variant_ranks, \@analyzed, "Ran analysis for all expected rows" );
218
219# Now run it again, excluding type 1 variants.
220map { delete $expected{$_} if $expected{$_} eq 'type1' } keys %expected;
221my @useful_variant_ranks = sort { $a <=> $b } keys( %expected );
222$analysis_opts{'exclude_type1'} = 1;
223@analyzed = ();
224$results = run_analysis( $tradition, %analysis_opts );
225foreach my $row ( @{$results->{'variants'}} ) {
a745c3d9 226 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
227 next if exists $row->{'unsolved'};
e6823346 228 push( @analyzed, $row->{id} );
229 my $type = 'genealogical';
230 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
231 $type = 'conflict';
232 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
233 $type = 'reverted';
234 }
235 is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on exclude_type1 run for rank " . $row->{'id'} );
236}
237is_deeply( \@analyzed, \@useful_variant_ranks, "Ran analysis for all useful rows" );
238
239# Now run it again, excluding orthography / spelling.
240my @merged_exclude = qw/ 76 136 142 155 293 317 319 335 350 361 413 500 515 636 685
241 737 954 1003 /;
242# 205 now conflicts; it and 493 should also have one fewer reading
243$expected{205} = 'conflict';
244$num_readings{205}--;
245$num_readings{493}--;
246map { delete $expected{$_} } @merged_exclude;
247my @merged_remaining = sort { $a <=> $b } keys( %expected );
248$analysis_opts{'merge_types'} = [ qw/ orthographic spelling / ];
249@analyzed = ();
250$results = run_analysis( $tradition, %analysis_opts );
251foreach my $row ( @{$results->{'variants'}} ) {
252 push( @analyzed, $row->{id} );
a745c3d9 253 ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" );
254 next if exists $row->{'unsolved'};
e6823346 255 my $type = 'genealogical';
256 if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) {
257 $type = 'conflict';
258 } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) {
259 $type = 'reverted';
260 }
261 is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on merge_types run for rank " . $row->{'id'} );
262 is( scalar @{$row->{'readings'}}, $num_readings{$row->{id}}, "Got expected number of readings during merge" );
263}
264is_deeply( \@analyzed, \@merged_remaining, "Ran analysis for all useful unmerged rows" );
265
266done_testing();