Commit | Line | Data |
f00cefe8 |
1 | #!/usr/bin/env perl |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use lib 'lib'; |
6 | use feature 'say'; |
7 | use Test::More; |
8 | use Text::Tradition; |
9 | use Text::Tradition::Analysis qw/ run_analysis /; |
10 | |
11 | binmode STDOUT, ':utf8'; |
12 | binmode STDERR, ':utf8'; |
13 | eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 }; |
14 | |
15 | my $tradition = Text::Tradition->new( |
16 | 'input' => 'Self', |
17 | 'file' => 't/data/besoin.xml' ); |
18 | $tradition->add_stemma( 'dotfile' => 't/data/besoin.dot' ); |
19 | |
20 | # Run the analysis of the tradition |
7e17346f |
21 | ## TODO Make proper test db |
22 | my $calcdsn = 'dbi:SQLite:dbname=t/data/analysis.db'; |
f00cefe8 |
23 | |
24 | my %expected = ( |
e6823346 |
25 | 2 => 'type1', |
26 | 3 => 'genealogical', |
27 | 28 => 'genealogical', |
28 | 39 => 'genealogical', |
29 | 62 => 'type1', |
30 | 63 => 'type1', |
31 | 73 => 'reverted', |
32 | 76 => 'genealogical', |
33 | 91 => 'conflict', |
34 | 93 => 'genealogical', |
35 | 94 => 'genealogical', |
36 | 99 => 'reverted', |
37 | 110 => 'type1', |
38 | 117 => 'type1', |
39 | 136 => 'reverted', |
40 | 142 => 'conflict', |
41 | 155 => 'genealogical', |
42 | 170 => 'genealogical', |
43 | 182 => 'type1', |
44 | 205 => 'genealogical', |
45 | 219 => 'genealogical', |
46 | 239 => 'genealogical', |
47 | 244 => 'genealogical', |
48 | 245 => 'type1', |
49 | 251 => 'genealogical', |
50 | 252 => 'genealogical', |
51 | 293 => 'genealogical', |
52 | 295 => 'genealogical', |
53 | 309 => 'genealogical', |
54 | 310 => 'type1', |
55 | 314 => 'type1', |
56 | 315 => 'type1', |
57 | 317 => 'reverted', |
58 | 318 => 'genealogical', |
59 | 319 => 'genealogical', |
60 | 324 => 'type1', |
61 | 328 => 'reverted', |
62 | 334 => 'type1', |
63 | 335 => 'genealogical', |
64 | 350 => 'reverted', |
65 | 361 => 'reverted', |
66 | 367 => 'type1', |
67 | 370 => 'type1', |
68 | 382 => 'reverted', |
69 | 385 => 'reverted', |
70 | 406 => 'genealogical', |
71 | 413 => 'genealogical', |
72 | 417 => 'type1', |
73 | 418 => 'reverted', |
74 | 459 => 'type1', |
75 | 493 => 'genealogical', |
76 | 497 => 'reverted', |
77 | 499 => 'type1', |
78 | 500 => 'reverted', |
79 | 515 => 'reverted', |
80 | 556 => 'type1', |
81 | 558 => 'conflict', |
82 | 597 => 'type1', |
83 | 615 => 'type1', |
84 | 617 => 'type1', |
85 | 632 => 'genealogical', |
86 | 634 => 'genealogical', |
87 | 636 => 'genealogical', |
88 | 685 => 'genealogical', |
89 | 737 => 'genealogical', |
90 | 742 => 'reverted', |
91 | 743 => 'reverted', |
92 | 744 => 'reverted', |
93 | 745 => 'type1', |
94 | 746 => 'type1', |
95 | 747 => 'type1', |
96 | 757 => 'type1', |
97 | 762 => 'type1', |
98 | 763 => 'type1', |
99 | 777 => 'reverted', |
100 | 780 => 'genealogical', |
101 | 802 => 'type1', |
102 | 803 => 'type1', |
103 | 815 => 'type1', |
104 | 837 => 'genealogical', |
105 | 854 => 'type1', |
106 | 855 => 'type1', |
107 | 856 => 'type1', |
108 | 857 => 'type1', |
109 | 858 => 'type1', |
110 | 859 => 'type1', |
111 | 860 => 'type1', |
112 | 861 => 'type1', |
113 | 862 => 'type1', |
114 | 863 => 'type1', |
115 | 864 => 'type1', |
116 | 865 => 'type1', |
117 | 866 => 'type1', |
118 | 867 => 'type1', |
119 | 868 => 'type1', |
120 | 869 => 'type1', |
121 | 870 => 'type1', |
122 | 871 => 'type1', |
123 | 872 => 'type1', |
124 | 873 => 'type1', |
125 | 874 => 'type1', |
126 | 875 => 'type1', |
127 | 876 => 'type1', |
128 | 877 => 'type1', |
129 | 878 => 'type1', |
130 | 879 => 'type1', |
131 | 880 => 'type1', |
132 | 881 => 'type1', |
133 | 882 => 'type1', |
134 | 883 => 'type1', |
135 | 884 => 'type1', |
136 | 885 => 'type1', |
137 | 886 => 'type1', |
138 | 887 => 'type1', |
139 | 888 => 'type1', |
140 | 889 => 'type1', |
141 | 890 => 'type1', |
142 | 891 => 'type1', |
143 | 892 => 'type1', |
144 | 893 => 'type1', |
145 | 894 => 'type1', |
146 | 895 => 'type1', |
147 | 896 => 'type1', |
148 | 897 => 'conflict', |
149 | 898 => 'conflict', |
150 | 899 => 'type1', |
151 | 900 => 'type1', |
152 | 901 => 'type1', |
153 | 902 => 'type1', |
154 | 903 => 'type1', |
155 | 904 => 'type1', |
156 | 905 => 'type1', |
157 | 906 => 'type1', |
158 | 907 => 'type1', |
159 | 915 => 'type1', |
160 | 916 => 'type1', |
161 | 925 => 'genealogical', |
162 | 927 => 'type1', |
163 | 952 => 'genealogical', |
164 | 954 => 'genealogical', |
165 | 969 => 'genealogical', |
166 | 972 => 'genealogical', |
167 | 973 => 'genealogical', |
168 | 974 => 'type1', |
169 | 1003 => 'genealogical', |
170 | 1004 => 'genealogical' # check for transp |
f00cefe8 |
171 | ); |
172 | |
e6823346 |
173 | my %num_readings; |
174 | |
175 | my @all_variant_ranks = sort { $a <=> $b } keys( %expected ); |
f00cefe8 |
176 | # Look through the results |
f00cefe8 |
177 | my $c = $tradition->collation; |
a745c3d9 |
178 | my %analysis_opts; |
e6823346 |
179 | my $results = run_analysis( $tradition, %analysis_opts ); |
180 | my @analyzed; |
f00cefe8 |
181 | foreach my $row ( @{$results->{'variants'}} ) { |
e6823346 |
182 | push( @analyzed, $row->{id} ); |
183 | $num_readings{$row->{id}} = scalar @{$row->{'readings'}}; |
184 | my $type = 'genealogical'; |
185 | if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) { |
186 | $type = 'conflict'; |
187 | } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) { |
188 | $type = 'reverted'; |
189 | } |
190 | my $expected = $expected{$row->{'id'}}; |
191 | $expected = 'genealogical' if $expected eq 'type1'; |
192 | is( $type, $expected, "Got expected genealogical result for rank " . $row->{'id'} ); |
193 | # If the row is genealogical, there should be one reading with no parents, |
194 | # every reading should independently occur exactly once, and the total |
195 | # number of changes + maybe-changes should equal the total number of |
196 | # readings who have that one as a parent. |
a745c3d9 |
197 | ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" ); |
198 | next if exists $row->{'unsolved'}; |
e6823346 |
199 | if( $row->{'genealogical'} ) { |
200 | # Make the mapping of parent -> child readings |
201 | my %is_parent; |
202 | my @has_no_parent; |
f00cefe8 |
203 | foreach my $rdg ( @{$row->{'readings'}} ) { |
e6823346 |
204 | my $parents = $rdg->{'source_parents'} || {}; |
205 | foreach my $p ( keys %$parents ) { |
206 | push( @{$is_parent{$p}}, $rdg->{'readingid'} ); |
f00cefe8 |
207 | } |
e6823346 |
208 | push( @has_no_parent, $rdg->{'readingid'} ) unless keys %$parents; |
f00cefe8 |
209 | } |
e6823346 |
210 | # Test some stuff |
211 | foreach my $rdg ( @{$row->{'readings'}} ) { |
212 | is( @{$rdg->{'independent_occurrence'}}, 1, |
213 | "Genealogical reading originates exactly once" ); |
f00cefe8 |
214 | } |
e6823346 |
215 | is( @has_no_parent, 1, "Only one genealogical reading lacks a parent" ); |
f00cefe8 |
216 | } |
217 | } |
e6823346 |
218 | # Check that run_analysis ran an analysis on all our known variant ranks |
219 | is_deeply( \@all_variant_ranks, \@analyzed, "Ran analysis for all expected rows" ); |
220 | |
221 | # Now run it again, excluding type 1 variants. |
222 | map { delete $expected{$_} if $expected{$_} eq 'type1' } keys %expected; |
223 | my @useful_variant_ranks = sort { $a <=> $b } keys( %expected ); |
224 | $analysis_opts{'exclude_type1'} = 1; |
225 | @analyzed = (); |
226 | $results = run_analysis( $tradition, %analysis_opts ); |
227 | foreach my $row ( @{$results->{'variants'}} ) { |
a745c3d9 |
228 | ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" ); |
229 | next if exists $row->{'unsolved'}; |
e6823346 |
230 | push( @analyzed, $row->{id} ); |
231 | my $type = 'genealogical'; |
232 | if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) { |
233 | $type = 'conflict'; |
234 | } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) { |
235 | $type = 'reverted'; |
236 | } |
237 | is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on exclude_type1 run for rank " . $row->{'id'} ); |
238 | } |
239 | is_deeply( \@analyzed, \@useful_variant_ranks, "Ran analysis for all useful rows" ); |
240 | |
241 | # Now run it again, excluding orthography / spelling. |
242 | my @merged_exclude = qw/ 76 136 142 155 293 317 319 335 350 361 413 500 515 636 685 |
243 | 737 954 1003 /; |
244 | # 205 now conflicts; it and 493 should also have one fewer reading |
245 | $expected{205} = 'conflict'; |
246 | $num_readings{205}--; |
247 | $num_readings{493}--; |
248 | map { delete $expected{$_} } @merged_exclude; |
249 | my @merged_remaining = sort { $a <=> $b } keys( %expected ); |
250 | $analysis_opts{'merge_types'} = [ qw/ orthographic spelling / ]; |
251 | @analyzed = (); |
252 | $results = run_analysis( $tradition, %analysis_opts ); |
253 | foreach my $row ( @{$results->{'variants'}} ) { |
254 | push( @analyzed, $row->{id} ); |
a745c3d9 |
255 | ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" ); |
256 | next if exists $row->{'unsolved'}; |
e6823346 |
257 | my $type = 'genealogical'; |
258 | if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) { |
259 | $type = 'conflict'; |
260 | } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) { |
261 | $type = 'reverted'; |
262 | } |
263 | is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on merge_types run for rank " . $row->{'id'} ); |
264 | is( scalar @{$row->{'readings'}}, $num_readings{$row->{id}}, "Got expected number of readings during merge" ); |
265 | } |
266 | is_deeply( \@analyzed, \@merged_remaining, "Ran analysis for all useful unmerged rows" ); |
267 | |
268 | done_testing(); |