Commit | Line | Data |
f00cefe8 |
1 | #!/usr/bin/env perl |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use lib 'lib'; |
6 | use feature 'say'; |
7 | use Test::More; |
8 | use Text::Tradition; |
9 | use Text::Tradition::Analysis qw/ run_analysis /; |
10 | |
11 | binmode STDOUT, ':utf8'; |
12 | binmode STDERR, ':utf8'; |
13 | eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 }; |
14 | |
15 | my $tradition = Text::Tradition->new( |
16 | 'input' => 'Self', |
17 | 'file' => 't/data/besoin.xml' ); |
18 | $tradition->add_stemma( 'dotfile' => 't/data/besoin.dot' ); |
19 | |
20 | # Run the analysis of the tradition |
f00cefe8 |
21 | |
22 | my %expected = ( |
e6823346 |
23 | 2 => 'type1', |
24 | 3 => 'genealogical', |
25 | 28 => 'genealogical', |
26 | 39 => 'genealogical', |
27 | 62 => 'type1', |
28 | 63 => 'type1', |
29 | 73 => 'reverted', |
30 | 76 => 'genealogical', |
31 | 91 => 'conflict', |
32 | 93 => 'genealogical', |
33 | 94 => 'genealogical', |
34 | 99 => 'reverted', |
35 | 110 => 'type1', |
36 | 117 => 'type1', |
37 | 136 => 'reverted', |
38 | 142 => 'conflict', |
39 | 155 => 'genealogical', |
40 | 170 => 'genealogical', |
41 | 182 => 'type1', |
42 | 205 => 'genealogical', |
43 | 219 => 'genealogical', |
44 | 239 => 'genealogical', |
45 | 244 => 'genealogical', |
46 | 245 => 'type1', |
47 | 251 => 'genealogical', |
48 | 252 => 'genealogical', |
49 | 293 => 'genealogical', |
50 | 295 => 'genealogical', |
51 | 309 => 'genealogical', |
52 | 310 => 'type1', |
53 | 314 => 'type1', |
54 | 315 => 'type1', |
55 | 317 => 'reverted', |
56 | 318 => 'genealogical', |
57 | 319 => 'genealogical', |
58 | 324 => 'type1', |
59 | 328 => 'reverted', |
60 | 334 => 'type1', |
61 | 335 => 'genealogical', |
62 | 350 => 'reverted', |
63 | 361 => 'reverted', |
64 | 367 => 'type1', |
65 | 370 => 'type1', |
66 | 382 => 'reverted', |
67 | 385 => 'reverted', |
68 | 406 => 'genealogical', |
69 | 413 => 'genealogical', |
70 | 417 => 'type1', |
71 | 418 => 'reverted', |
72 | 459 => 'type1', |
73 | 493 => 'genealogical', |
74 | 497 => 'reverted', |
75 | 499 => 'type1', |
76 | 500 => 'reverted', |
77 | 515 => 'reverted', |
78 | 556 => 'type1', |
79 | 558 => 'conflict', |
80 | 597 => 'type1', |
81 | 615 => 'type1', |
82 | 617 => 'type1', |
83 | 632 => 'genealogical', |
84 | 634 => 'genealogical', |
85 | 636 => 'genealogical', |
86 | 685 => 'genealogical', |
87 | 737 => 'genealogical', |
88 | 742 => 'reverted', |
89 | 743 => 'reverted', |
90 | 744 => 'reverted', |
91 | 745 => 'type1', |
92 | 746 => 'type1', |
93 | 747 => 'type1', |
94 | 757 => 'type1', |
95 | 762 => 'type1', |
96 | 763 => 'type1', |
97 | 777 => 'reverted', |
98 | 780 => 'genealogical', |
99 | 802 => 'type1', |
100 | 803 => 'type1', |
101 | 815 => 'type1', |
102 | 837 => 'genealogical', |
103 | 854 => 'type1', |
104 | 855 => 'type1', |
105 | 856 => 'type1', |
106 | 857 => 'type1', |
107 | 858 => 'type1', |
108 | 859 => 'type1', |
109 | 860 => 'type1', |
110 | 861 => 'type1', |
111 | 862 => 'type1', |
112 | 863 => 'type1', |
113 | 864 => 'type1', |
114 | 865 => 'type1', |
115 | 866 => 'type1', |
116 | 867 => 'type1', |
117 | 868 => 'type1', |
118 | 869 => 'type1', |
119 | 870 => 'type1', |
120 | 871 => 'type1', |
121 | 872 => 'type1', |
122 | 873 => 'type1', |
123 | 874 => 'type1', |
124 | 875 => 'type1', |
125 | 876 => 'type1', |
126 | 877 => 'type1', |
127 | 878 => 'type1', |
128 | 879 => 'type1', |
129 | 880 => 'type1', |
130 | 881 => 'type1', |
131 | 882 => 'type1', |
132 | 883 => 'type1', |
133 | 884 => 'type1', |
134 | 885 => 'type1', |
135 | 886 => 'type1', |
136 | 887 => 'type1', |
137 | 888 => 'type1', |
138 | 889 => 'type1', |
139 | 890 => 'type1', |
140 | 891 => 'type1', |
141 | 892 => 'type1', |
142 | 893 => 'type1', |
143 | 894 => 'type1', |
144 | 895 => 'type1', |
145 | 896 => 'type1', |
146 | 897 => 'conflict', |
147 | 898 => 'conflict', |
148 | 899 => 'type1', |
149 | 900 => 'type1', |
150 | 901 => 'type1', |
151 | 902 => 'type1', |
152 | 903 => 'type1', |
153 | 904 => 'type1', |
154 | 905 => 'type1', |
155 | 906 => 'type1', |
156 | 907 => 'type1', |
157 | 915 => 'type1', |
158 | 916 => 'type1', |
159 | 925 => 'genealogical', |
160 | 927 => 'type1', |
161 | 952 => 'genealogical', |
162 | 954 => 'genealogical', |
163 | 969 => 'genealogical', |
164 | 972 => 'genealogical', |
165 | 973 => 'genealogical', |
166 | 974 => 'type1', |
167 | 1003 => 'genealogical', |
168 | 1004 => 'genealogical' # check for transp |
f00cefe8 |
169 | ); |
170 | |
e6823346 |
171 | my %num_readings; |
172 | |
173 | my @all_variant_ranks = sort { $a <=> $b } keys( %expected ); |
f00cefe8 |
174 | # Look through the results |
f00cefe8 |
175 | my $c = $tradition->collation; |
a745c3d9 |
176 | my %analysis_opts; |
e6823346 |
177 | my $results = run_analysis( $tradition, %analysis_opts ); |
178 | my @analyzed; |
f00cefe8 |
179 | foreach my $row ( @{$results->{'variants'}} ) { |
e6823346 |
180 | push( @analyzed, $row->{id} ); |
181 | $num_readings{$row->{id}} = scalar @{$row->{'readings'}}; |
182 | my $type = 'genealogical'; |
183 | if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) { |
184 | $type = 'conflict'; |
185 | } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) { |
186 | $type = 'reverted'; |
187 | } |
188 | my $expected = $expected{$row->{'id'}}; |
189 | $expected = 'genealogical' if $expected eq 'type1'; |
190 | is( $type, $expected, "Got expected genealogical result for rank " . $row->{'id'} ); |
191 | # If the row is genealogical, there should be one reading with no parents, |
192 | # every reading should independently occur exactly once, and the total |
193 | # number of changes + maybe-changes should equal the total number of |
194 | # readings who have that one as a parent. |
a745c3d9 |
195 | ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" ); |
196 | next if exists $row->{'unsolved'}; |
e6823346 |
197 | if( $row->{'genealogical'} ) { |
198 | # Make the mapping of parent -> child readings |
199 | my %is_parent; |
200 | my @has_no_parent; |
f00cefe8 |
201 | foreach my $rdg ( @{$row->{'readings'}} ) { |
e6823346 |
202 | my $parents = $rdg->{'source_parents'} || {}; |
203 | foreach my $p ( keys %$parents ) { |
204 | push( @{$is_parent{$p}}, $rdg->{'readingid'} ); |
f00cefe8 |
205 | } |
e6823346 |
206 | push( @has_no_parent, $rdg->{'readingid'} ) unless keys %$parents; |
f00cefe8 |
207 | } |
e6823346 |
208 | # Test some stuff |
209 | foreach my $rdg ( @{$row->{'readings'}} ) { |
210 | is( @{$rdg->{'independent_occurrence'}}, 1, |
211 | "Genealogical reading originates exactly once" ); |
f00cefe8 |
212 | } |
e6823346 |
213 | is( @has_no_parent, 1, "Only one genealogical reading lacks a parent" ); |
f00cefe8 |
214 | } |
215 | } |
e6823346 |
216 | # Check that run_analysis ran an analysis on all our known variant ranks |
217 | is_deeply( \@all_variant_ranks, \@analyzed, "Ran analysis for all expected rows" ); |
218 | |
219 | # Now run it again, excluding type 1 variants. |
220 | map { delete $expected{$_} if $expected{$_} eq 'type1' } keys %expected; |
221 | my @useful_variant_ranks = sort { $a <=> $b } keys( %expected ); |
222 | $analysis_opts{'exclude_type1'} = 1; |
223 | @analyzed = (); |
224 | $results = run_analysis( $tradition, %analysis_opts ); |
225 | foreach my $row ( @{$results->{'variants'}} ) { |
a745c3d9 |
226 | ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" ); |
227 | next if exists $row->{'unsolved'}; |
e6823346 |
228 | push( @analyzed, $row->{id} ); |
229 | my $type = 'genealogical'; |
230 | if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) { |
231 | $type = 'conflict'; |
232 | } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) { |
233 | $type = 'reverted'; |
234 | } |
235 | is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on exclude_type1 run for rank " . $row->{'id'} ); |
236 | } |
237 | is_deeply( \@analyzed, \@useful_variant_ranks, "Ran analysis for all useful rows" ); |
238 | |
239 | # Now run it again, excluding orthography / spelling. |
240 | my @merged_exclude = qw/ 76 136 142 155 293 317 319 335 350 361 413 500 515 636 685 |
241 | 737 954 1003 /; |
242 | # 205 now conflicts; it and 493 should also have one fewer reading |
243 | $expected{205} = 'conflict'; |
244 | $num_readings{205}--; |
245 | $num_readings{493}--; |
246 | map { delete $expected{$_} } @merged_exclude; |
247 | my @merged_remaining = sort { $a <=> $b } keys( %expected ); |
248 | $analysis_opts{'merge_types'} = [ qw/ orthographic spelling / ]; |
249 | @analyzed = (); |
250 | $results = run_analysis( $tradition, %analysis_opts ); |
251 | foreach my $row ( @{$results->{'variants'}} ) { |
252 | push( @analyzed, $row->{id} ); |
a745c3d9 |
253 | ok( !exists $row->{'unsolved'}, "Got a solution for the stated problem" ); |
254 | next if exists $row->{'unsolved'}; |
e6823346 |
255 | my $type = 'genealogical'; |
256 | if( grep { $_->{'is_conflict'} } @{$row->{'readings'}} ) { |
257 | $type = 'conflict'; |
258 | } elsif( grep { $_->{'is_reverted'} } @{$row->{'readings'}} ) { |
259 | $type = 'reverted'; |
260 | } |
261 | is( $type, $expected{$row->{'id'}}, "Got expected genealogical result on merge_types run for rank " . $row->{'id'} ); |
262 | is( scalar @{$row->{'readings'}}, $num_readings{$row->{id}}, "Got expected number of readings during merge" ); |
263 | } |
264 | is_deeply( \@analyzed, \@merged_remaining, "Ran analysis for all useful unmerged rows" ); |
265 | |
266 | done_testing(); |