Commit | Line | Data |
a5d53377 |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Text::Tradition; |
7 | use Text::Tradition::Stemma; |
8 | |
9 | binmode STDERR, ":utf8"; |
10 | binmode STDOUT, ":utf8"; |
11 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
12 | |
13 | my $informat = 'TEI'; |
14 | my $inbase; |
15 | my $linear = 1; |
16 | |
17 | # Parse the tradition data |
18 | |
19 | my $input = $ARGV[0]; |
20 | my @lines; |
21 | open( INFILE, "$input" ) or die "Could not read $input"; |
22 | binmode INFILE, ':utf8'; |
23 | @lines = <INFILE>; |
24 | close INFILE; |
25 | $input = join( '', @lines ); |
26 | |
27 | my %args = ( $informat => $input, |
28 | 'linear' => $linear ); |
29 | $args{'base'} = $inbase if $inbase; |
e79c23c7 |
30 | my $tradition = Text::Tradition->new( %args ); |
a5d53377 |
31 | |
32 | # Parse the stemma hypothesis |
33 | my $stemma = Text::Tradition::Stemma->new( |
34 | 'collation' => $tradition->collation, |
35 | 'dot' => $ARGV[1], |
36 | ); |
37 | |
38 | # We have the collation, so get the alignment table with witnesses in rows. |
39 | |
40 | my $all_wits_table = $tradition->collation->make_alignment_table( 1 ); |
41 | |
42 | # For each column in the alignment table, we want to see if the existing |
43 | # groupings of witnesses match our stemma hypothesis. First let's just go |
44 | # through the groupings. |
45 | |
46 | # Strip the list of sigla and save it for correlation to the readings. |
47 | my $col_wits = shift @$all_wits_table; |
48 | |
49 | # For each column in the table, group the readings by witness. |
50 | |
40f19742 |
51 | my $used_vars = 0; |
a5d53377 |
52 | foreach my $i ( 0 .. $#$all_wits_table ) { |
53 | my $rdg_wits = {}; |
54 | my $col_rdgs = shift @$all_wits_table; |
55 | foreach my $j ( 0 .. $#{$col_rdgs} ) { |
56 | my $rdg = $col_rdgs->[$j]; |
57 | $rdg = '' unless $rdg; # We care about empty readings |
58 | $rdg = undef if $rdg eq '#LACUNA#'; # ... unless they're lacunas |
59 | if( $rdg ) { |
60 | $rdg_wits->{$rdg} = [] unless $rdg_wits->{$rdg}; |
61 | add_variant_wit( $rdg_wits->{$rdg}, $col_wits->[$j] ); |
62 | } |
63 | } |
64 | |
65 | my( $groups, $readings ) = useful_variant( $rdg_wits ); |
66 | next unless $groups && $readings; |
67 | |
40f19742 |
68 | # We can look up witnesses for a reading; we also want to look up readings |
69 | # for a given witness. |
70 | my $group_readings = {}; |
71 | foreach my $x ( 0 .. $#$groups ) { |
72 | $group_readings->{wit_stringify( $groups->[$x] )} = $readings->[$x]; |
73 | } |
74 | |
75 | # For all the groups with more than one member, collect the list of all |
76 | # contiguous vertices needed to connect them. |
77 | # TODO: deal with a.c. reading logic |
78 | my $conflict = analyze_variant_location( $group_readings, $groups, $stemma->apsp ); |
79 | print wit_stringify( $groups ) . ' - ' . join( " / ", @$readings ) . "\n"; |
80 | foreach my $rdg ( keys %$conflict ) { |
81 | my $var = $conflict->{$rdg}; |
82 | print "\tReadings '$rdg' and '$var' are not genealogical\n"; |
83 | } |
84 | |
85 | # Now run the same analysis given a distance tree. |
86 | my $distance_apsp = $stemma->distance_trees->[0]->APSP_Floyd_Warshall(); |
87 | $conflict = analyze_variant_location( $group_readings, $groups, $distance_apsp ); |
88 | foreach my $rdg ( keys %$conflict ) { |
89 | my $var = $conflict->{$rdg}; |
90 | print "\tReadings '$rdg' and '$var' disregarded by parsimony\n"; |
91 | } |
92 | |
93 | # Record that we used this variant in an analysis |
94 | $used_vars++; |
95 | |
96 | } |
97 | print "Found $used_vars useful variants in this analysis\n"; |
e79c23c7 |
98 | # Save the stemma picture |
99 | open( STEMMA, ">stemma_graph.svg" ) or die "Could not open stemma graph to write"; |
100 | binmode STEMMA, ":utf8"; |
101 | print STEMMA $stemma->as_svg; |
102 | close STEMMA; |
40f19742 |
103 | |
104 | sub analyze_variant_location { |
105 | my( $group_readings, $groups, $apsp ) = @_; |
a5d53377 |
106 | my %contig; |
40f19742 |
107 | my $conflict = {}; |
108 | foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) { |
109 | my @members = @$g; |
110 | my $gst = wit_stringify( $g ); |
111 | map { $contig{$_} = $gst } @members; # The witnesses need themselves to be |
112 | # in their collection. |
a5d53377 |
113 | next unless @members > 1; |
a5d53377 |
114 | my $curr = pop @members; |
115 | foreach my $m ( @members ) { |
40f19742 |
116 | foreach my $v ( $apsp->path_vertices( $curr, $m ) ) { |
117 | $contig{$v} = $gst unless exists $contig{$v}; |
118 | next if $contig{$v} eq $gst; |
119 | # print STDERR "Conflict at $v between group $gst and group " |
a5d53377 |
120 | # . $contig{$v} . "\n"; |
40f19742 |
121 | # Record what is conflicting. |
122 | $conflict->{$group_readings->{$gst}} = $group_readings->{$contig{$v}}; |
a5d53377 |
123 | } |
124 | } |
125 | } |
40f19742 |
126 | return $conflict; |
a5d53377 |
127 | } |
a5d53377 |
128 | |
129 | # Add the variant, subject to a.c. representation logic. |
130 | # This assumes that we will see the 'main' version before the a.c. version. |
131 | sub add_variant_wit { |
132 | my( $arr, $wit ) = @_; |
133 | my $acstr = $tradition->collation->ac_label; |
134 | my $skip; |
135 | if( $wit =~ /^(.*)\Q$acstr\E$/ ) { |
136 | my $real = $1; |
137 | $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr; |
138 | } |
139 | push( @$arr, $wit ) unless $skip; |
140 | } |
141 | |
142 | # Return an answer if the variant is useful, i.e. if there are at least 2 variants |
143 | # with at least 2 witnesses each. |
144 | sub useful_variant { |
145 | my( $readings ) = @_; |
146 | my $total = keys %$readings; |
147 | foreach my $var ( keys %$readings ) { |
148 | $total-- if @{$readings->{$var}} == 1; |
149 | } |
150 | return( undef, undef ) if $total <= 1; |
151 | my( $groups, $text ); |
152 | foreach my $var ( keys %$readings ) { |
40f19742 |
153 | push( @$groups, $readings->{$var} ); |
a5d53377 |
154 | push( @$text, $var ); |
155 | } |
156 | return( $groups, $text ); |
157 | } |
40f19742 |
158 | |
159 | # Take an array of witness groupings and produce a string like |
160 | # A,B / C,D,E / F |
161 | |
162 | sub wit_stringify { |
163 | my $groups = shift; |
164 | my @gst; |
165 | # If we were passed an array of witnesses instead of an array of |
166 | # groupings, then "group" the witnesses first. |
167 | unless( ref( $groups->[0] ) ) { |
168 | my $mkgrp = [ $groups ]; |
169 | $groups = $mkgrp; |
170 | } |
171 | foreach my $g ( @$groups ) { |
172 | push( @gst, join( ',', @$g ) ); |
173 | } |
174 | return join( ' / ', @gst ); |
175 | } |
176 | |