fix typo in Collation.pm test
[scpubgit/stemmatology.git] / base / t / text_tradition_collation.t
CommitLineData
0e47f4f6 1#!/usr/bin/perl -w
2
3use strict;
4use Test::More 'no_plan';
5$| = 1;
6
7
8
9# =begin testing
10{
11use Text::Tradition;
58568d5c 12use TryCatch;
0e47f4f6 13
4e483aa5 14my $cxfile = 't/data/Collatex-16.xml';
15my $t = Text::Tradition->new(
16 'name' => 'inline',
17 'input' => 'CollateX',
18 'file' => $cxfile,
19 );
20my $c = $t->collation;
21
22my $rno = scalar $c->readings;
3c234eb6 23# Split n21 ('unto') for testing purposes
4e483aa5 24my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
25my $old_r = $c->reading( 'n21' );
26$old_r->alter_text( 'to' );
27$c->del_path( 'n20', 'n21', 'A' );
28$c->add_path( 'n20', 'n21p0', 'A' );
29$c->add_path( 'n21p0', 'n21', 'A' );
7a0956c1 30$c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
4e483aa5 31$c->flatten_ranks();
32ok( $c->reading( 'n21p0' ), "New reading exists" );
33is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
34
679f17e1 35# Combine n3 and n4 ( with his )
4e483aa5 36$c->merge_readings( 'n3', 'n4', 1 );
37ok( !$c->reading('n4'), "Reading n4 is gone" );
38is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
39
679f17e1 40# Collapse n9 and n10 ( rood / root )
41$c->merge_readings( 'n9', 'n10' );
42ok( !$c->reading('n10'), "Reading n10 is gone" );
43is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 44
58568d5c 45# Try to combine n21 and n21p0. This should break.
4e483aa5 46my $remaining = $c->reading('n21');
47$remaining ||= $c->reading('n22'); # one of these should still exist
58568d5c 48try {
49 $c->merge_readings( 'n21p0', $remaining, 1 );
50 ok( 0, "Bad reading merge changed the graph" );
51} catch( Text::Tradition::Error $e ) {
52 like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
53} catch {
54 ok( 0, "Unexpected error on bad reading merge: $@" );
55}
56
57try {
58 $c->calculate_ranks();
59 ok( 1, "Graph is still evidently whole" );
60} catch( Text::Tradition::Error $e ) {
61 ok( 0, "Caught a rank exception: " . $e->message );
62}
4e483aa5 63}
64
65
66
67# =begin testing
68{
58568d5c 69use Test::Warn;
8d9494a8 70use Text::Tradition;
71use TryCatch;
72
58568d5c 73my $t;
74warnings_exist {
75 $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
76} [qr/Cannot set relationship on a meta reading/],
77 "Got expected relationship drop warning on parse";
78
8d9494a8 79my $c = $t->collation;
a512d0e6 80# Force the transitive propagation of all existing relationships.
81$c->relations->propagate_all_relationships();
8d9494a8 82
83my %rdg_ids;
84map { $rdg_ids{$_} = 1 } $c->readings;
85$c->merge_related( 'orthographic' );
751ee528 86is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
8d9494a8 87 "Successfully collapsed orthographic variation" );
751ee528 88map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
8d9494a8 89foreach my $rid ( keys %rdg_ids ) {
90 my $exp = $rdg_ids{$rid};
91 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
92 ( $exp ? "retained" : "removed" ) );
93}
94ok( $c->linear, "Graph is still linear" );
95try {
96 $c->calculate_ranks; # This should succeed
97 ok( 1, "Can still calculate ranks on the new graph" );
98} catch {
99 ok( 0, "Rank calculation on merged graph failed: $@" );
100}
101
102# Now add some transpositions
103$c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
104$c->merge_related( 'transposition' );
751ee528 105is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
8d9494a8 106 "Transposed relationship is merged away" );
107ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
108ok( !$c->linear, "Graph is no longer linear" );
109try {
110 $c->calculate_ranks; # This should fail
111 ok( 0, "Rank calculation happened on nonlinear graph?!" );
112} catch ( Text::Tradition::Error $e ) {
113 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
114 "Rank calculation on merged graph threw an error" );
115}
116}
117
118
119
120# =begin testing
121{
c67190dd 122use Text::Tradition;
123
2e818703 124my $t = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
c67190dd 125my $c = $t->collation;
126my $n = scalar $c->readings;
127$c->compress_readings();
128is( scalar $c->readings, $n - 6, "Compressing readings seems to work" );
129
130# Now put in a join-word and make sure the thing still works.
2e818703 131my $t2 = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
c67190dd 132my $c2 = $t2->collation;
133# Split n21 ('unto') for testing purposes
134my $new_r = $c2->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
135my $old_r = $c2->reading( 'n21' );
136$old_r->alter_text( 'to' );
137$c2->del_path( 'n20', 'n21', 'A' );
138$c2->add_path( 'n20', 'n21p0', 'A' );
139$c2->add_path( 'n21p0', 'n21', 'A' );
140$c2->calculate_ranks();
141is( scalar $c2->readings, $n + 1, "We have our extra test reading" );
142$c2->compress_readings();
143is( scalar $c2->readings, $n - 6, "Compressing readings also works with join_next" );
144is( $c2->reading( 'n21p0' )->text, 'unto', "The joined word has no space" );
145}
146
147
148
149# =begin testing
150{
68e48c06 151use Test::More::UTF8;
4e483aa5 152use Text::Tradition;
68e48c06 153use TryCatch;
f97ef19e 154
155my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
156is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
157ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
158
159my $sc = $st->collation;
160my $numr = 17;
161ok( $sc->reading('n131'), "Tradition has the affected reading" );
162is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
163is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
164
165# Detach the erroneously collated reading
2dcb5d11 166my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
ef73c20a 167ok( $newr, "New reading was created" );
f97ef19e 168ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
169is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
170is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
3c234eb6 171my $csucc = $sc->common_successor( 'n131', 'n131_0' );
172is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
f97ef19e 173
174# Check that the bad transposition is gone
2dcb5d11 175is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
f97ef19e 176is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
177
e19635f8 178# The collation should not be fixed
179my @pairs = $sc->identical_readings();
180is( scalar @pairs, 0, "Not re-collated yet" );
f97ef19e 181# Fix the collation
3c234eb6 182ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
e19635f8 183@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
3c234eb6 184is( scalar @pairs, 3, "Found three more identical readings" );
e19635f8 185is( $sc->end->rank, 11, "The ranks shifted appropriately" );
3c234eb6 186$sc->flatten_ranks();
f97ef19e 187is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
68e48c06 188
189# Check that we can't "duplicate" a reading with no wits or with all wits
190try {
191 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
192 ok( 0, "Reading duplication without witnesses throws an error" );
193} catch( Text::Tradition::Error $e ) {
194 like( $e->message, qr/Must specify one or more witnesses/,
195 "Reading duplication without witnesses throws the expected error" );
196} catch {
197 ok( 0, "Reading duplication without witnesses threw the wrong error" );
198}
199
200try {
201 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
202 ok( 0, "Reading duplication with all witnesses throws an error" );
203} catch( Text::Tradition::Error $e ) {
204 like( $e->message, qr/Cannot join all witnesses/,
205 "Reading duplication with all witnesses throws the expected error" );
206} catch {
207 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
208}
58568d5c 209
210try {
211 $sc->calculate_ranks();
212 ok( 1, "Graph is still evidently whole" );
213} catch( Text::Tradition::Error $e ) {
214 ok( 0, "Caught a rank exception: " . $e->message );
215}
f97ef19e 216}
217
218
219
220# =begin testing
221{
8a9a8200 222use JSON qw/ from_json /;
223use Text::Tradition;
224
225my $t = Text::Tradition->new(
226 'input' => 'Self',
227 'file' => 't/data/florilegium_graphml.xml' );
228my $c = $t->collation;
229
230# Make a connection so we can test rank preservation
231$c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
232
233# Create an adjacency list of the whole thing; test the output.
234my $adj_whole = from_json( $c->as_adjacency_list() );
235is( scalar @$adj_whole, scalar $c->readings(),
236 "Same number of nodes in graph and adjacency list" );
237my @adj_whole_edges;
238map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
239is( scalar @adj_whole_edges, scalar $c->sequence->edges,
240 "Same number of edges in graph and adjacency list" );
241# Find the reading whose rank should be preserved
242my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
243my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
244is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
245
246# Now create an adjacency list of just a portion. w76 to w122
247my $adj_part = from_json( $c->as_adjacency_list(
248 { from => $c->reading('w76')->rank,
249 to => $c->reading('w122')->rank }));
250is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
251my @adj_part_edges;
252map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
253is( scalar @adj_part_edges, 58,
254 "Same number of edges in partial graph and adjacency list" );
255# Check for consistency
256my %part_nodes;
257map { $part_nodes{$_->{id}} = 1 } @$adj_part;
258foreach my $edge ( @adj_part_edges ) {
259 my $testid = $edge->{id};
260 ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
261}
262}
263
264
265
266# =begin testing
267{
f97ef19e 268use Text::Tradition;
951ddfe8 269use TryCatch;
4e483aa5 270
56eefa04 271my $READINGS = 311;
272my $PATHS = 361;
273
274my $datafile = 't/data/florilegium_tei_ps.xml';
275my $tradition = Text::Tradition->new( 'input' => 'TEI',
276 'name' => 'test0',
277 'file' => $datafile,
278 'linear' => 1 );
279
280ok( $tradition, "Got a tradition object" );
281is( scalar $tradition->witnesses, 13, "Found all witnesses" );
282ok( $tradition->collation, "Tradition has a collation" );
283
284my $c = $tradition->collation;
285is( scalar $c->readings, $READINGS, "Collation has all readings" );
286is( scalar $c->paths, $PATHS, "Collation has all paths" );
287is( scalar $c->relationships, 0, "Collation has all relationships" );
288
289# Add a few relationships
290$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
291$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
b71e7ea8 292$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
293 'is_significant' => 'yes' } );
56eefa04 294
295# Now write it to GraphML and parse it again.
296
297my $graphml = $c->as_graphml;
298my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
299is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
300is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
301is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
b71e7ea8 302my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
303is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
2a812726 304
9fef629b 305# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 306SKIP: {
37bf09f4 307 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 308 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
309 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
310 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
311 $graphml = $c->as_graphml;
312 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
313}
56eefa04 314}
315
316
317
318# =begin testing
319{
16203db5 320use Text::Tradition;
34ca808b 321use Text::CSV;
16203db5 322
323my $READINGS = 311;
324my $PATHS = 361;
34ca808b 325my $WITS = 13;
326my $WITAC = 4;
16203db5 327
328my $datafile = 't/data/florilegium_tei_ps.xml';
329my $tradition = Text::Tradition->new( 'input' => 'TEI',
330 'name' => 'test0',
331 'file' => $datafile,
332 'linear' => 1 );
333
334my $c = $tradition->collation;
335# Export the thing to CSV
336my $csvstr = $c->as_csv();
34ca808b 337# Count the columns
338my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
339my @lines = split(/\n/, $csvstr );
340ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
341is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
cbc8e08f 342my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
343ok( @q_ac, "Found a layered witness" );
344
16203db5 345my $t2 = Text::Tradition->new( input => 'Tabular',
346 name => 'test2',
347 string => $csvstr,
348 sep_char => ',' );
349is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
350is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
351
352# Now do it with TSV
353my $tsvstr = $c->as_tsv();
354my $t3 = Text::Tradition->new( input => 'Tabular',
355 name => 'test3',
356 string => $tsvstr,
357 sep_char => "\t" );
358is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
359is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
34ca808b 360
4e64b669 361my $table = $c->alignment_table;
34ca808b 362my $noaccsv = $c->as_csv({ noac => 1 });
363my @noaclines = split(/\n/, $noaccsv );
364ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
365is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
4e64b669 366is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
cbc8e08f 367
368my $safecsv = $c->as_csv({ safe_ac => 1});
369my @safelines = split(/\n/, $safecsv );
370ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
371is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
372@q_ac = grep { $_ eq 'Q__L' } $csv->fields;
373ok( @q_ac, "Found a sanitized layered witness" );
374is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
de20588d 375
376# Test relationship collapse
377$c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
378$c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
379
380my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
381my $t4 = Text::Tradition->new( input => 'Tabular',
382 name => 'test4',
383 string => $mergedtsv,
384 sep_char => "\t" );
385is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
386is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
53ec2b6d 387
388# Test non-ASCII sigla
389my $t5 = Text::Tradition->new( input => 'Tabular',
390 name => 'nonascii',
391 file => 't/data/armexample.xlsx',
392 excel => 'xlsx' );
393my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
394my @awitlines = split( /\n/, $awittsv );
395like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
16203db5 396}
397
398
399
400# =begin testing
401{
56eefa04 402use Text::Tradition;
403
0e47f4f6 404my $cxfile = 't/data/Collatex-16.xml';
405my $t = Text::Tradition->new(
406 'name' => 'inline',
407 'input' => 'CollateX',
408 'file' => $cxfile,
409 );
410my $c = $t->collation;
4633f9e4 411
b365fbae 412# Make an svg
bfcbcecb 413my $table = $c->alignment_table;
414ok( $c->has_cached_table, "Alignment table was cached" );
415is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 416$c->calculate_ranks;
bfcbcecb 417is( $c->alignment_table, $table, "Cached table retained with no rank change" );
864ee4bf 418$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
419is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
420$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
421isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
b365fbae 422}
423
424
425
426# =begin testing
427{
428use Text::Tradition;
429
430my $cxfile = 't/data/Collatex-16.xml';
431my $t = Text::Tradition->new(
432 'name' => 'inline',
433 'input' => 'CollateX',
434 'file' => $cxfile,
435 );
436my $c = $t->collation;
0e47f4f6 437
d4b75f44 438my @common = $c->calculate_common_readings();
439is( scalar @common, 8, "Found correct number of common readings" );
440my @marked = sort $c->common_readings();
441is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 442my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 443is_deeply( \@marked, \@expected, "Found correct list of common readings" );
444}
445
446
447
448# =begin testing
449{
450use Text::Tradition;
451
452my $cxfile = 't/data/Collatex-16.xml';
453my $t = Text::Tradition->new(
454 'name' => 'inline',
455 'input' => 'CollateX',
456 'file' => $cxfile,
457 );
458my $c = $t->collation;
459
679f17e1 460is( $c->common_predecessor( 'n24', 'n23' )->id,
0e47f4f6 461 'n20', "Found correct common predecessor" );
679f17e1 462is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 463 '__END__', "Found correct common successor" );
0e47f4f6 464
4e5a7b2c 465is( $c->common_predecessor( 'n19', 'n17' )->id,
0e47f4f6 466 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 467is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 468 '__END__', "Found correct common successor for readings on same path" );
0e47f4f6 469}
470
471
472
473
4741;