Commit | Line | Data |
a5d53377 |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Text::Tradition; |
7 | use Text::Tradition::Stemma; |
8 | |
9 | binmode STDERR, ":utf8"; |
10 | binmode STDOUT, ":utf8"; |
11 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
12 | |
a5d53377 |
13 | # Parse the tradition data |
7fd4b80c |
14 | my $informat = 'Self'; |
a5d53377 |
15 | |
7fd4b80c |
16 | my %args = ( 'input' => $informat, |
17 | 'file' => $ARGV[0] ); |
e79c23c7 |
18 | my $tradition = Text::Tradition->new( %args ); |
a5d53377 |
19 | |
20 | # Parse the stemma hypothesis |
21 | my $stemma = Text::Tradition::Stemma->new( |
22 | 'collation' => $tradition->collation, |
23 | 'dot' => $ARGV[1], |
24 | ); |
25 | |
26 | # We have the collation, so get the alignment table with witnesses in rows. |
0e476982 |
27 | # Also return the reading objects in the table, rather than just the words. |
a5d53377 |
28 | |
0e476982 |
29 | my $all_wits_table = $tradition->collation->make_alignment_table( 'refs' ); |
a5d53377 |
30 | |
31 | # For each column in the alignment table, we want to see if the existing |
0e476982 |
32 | # groupings of witnesses match our stemma hypothesis. We also want, at the |
33 | # end, to produce an HTML table with all the variants. |
34 | my $html_columns = 0; |
35 | my $html_data = []; |
36 | my $total = 0; # Keep track of the total number of variant locations |
a5d53377 |
37 | |
38 | # Strip the list of sigla and save it for correlation to the readings. |
39 | my $col_wits = shift @$all_wits_table; |
40 | |
a5d53377 |
41 | foreach my $i ( 0 .. $#$all_wits_table ) { |
0e476982 |
42 | # For each column in the table, group the readings by witness. |
a5d53377 |
43 | my $rdg_wits = {}; |
44 | my $col_rdgs = shift @$all_wits_table; |
0e476982 |
45 | my $rank; |
a5d53377 |
46 | foreach my $j ( 0 .. $#{$col_rdgs} ) { |
47 | my $rdg = $col_rdgs->[$j]; |
0e476982 |
48 | $rank = $rdg->rank if $rdg; # Save the rank for later display |
49 | my $rdg_text = '(omitted)'; # Initialize in case of empty reading |
a5d53377 |
50 | if( $rdg ) { |
0e476982 |
51 | $rdg_text = $rdg->is_lacuna ? undef : $rdg->text; # Don't count lacunae |
52 | } |
53 | if( defined $rdg_text ) { |
54 | # Initialize the witness array if we haven't got one yet |
55 | $rdg_wits->{$rdg_text} = [] unless $rdg_wits->{$rdg_text}; |
56 | # Add the relevant witness, subject to a.c. logic |
57 | add_variant_wit( $rdg_wits->{$rdg_text}, $col_wits->[$j] ); |
a5d53377 |
58 | } |
59 | } |
60 | |
0e476982 |
61 | # See if this column has any potentially genealogical variants. |
62 | # If not, skip to the next. |
63 | $total++ unless scalar keys %$rdg_wits == 1; |
a5d53377 |
64 | my( $groups, $readings ) = useful_variant( $rdg_wits ); |
0e476982 |
65 | next unless $groups && $readings; |
66 | $html_columns = scalar @$groups if scalar @$groups > $html_columns; |
a5d53377 |
67 | |
0e476982 |
68 | # We can already look up witnesses for a reading; we also want to look |
69 | # up readings for a given witness. |
40f19742 |
70 | my $group_readings = {}; |
71 | foreach my $x ( 0 .. $#$groups ) { |
72 | $group_readings->{wit_stringify( $groups->[$x] )} = $readings->[$x]; |
73 | } |
74 | |
75 | # For all the groups with more than one member, collect the list of all |
76 | # contiguous vertices needed to connect them. |
77 | # TODO: deal with a.c. reading logic |
0e476982 |
78 | my $sc = analyze_variant_location( $group_readings, $groups, $stemma->apsp ); |
40f19742 |
79 | print wit_stringify( $groups ) . ' - ' . join( " / ", @$readings ) . "\n"; |
0e476982 |
80 | foreach my $rdg ( keys %$sc ) { |
81 | my $var = $sc->{$rdg}; |
40f19742 |
82 | print "\tReadings '$rdg' and '$var' are not genealogical\n"; |
83 | } |
84 | |
0e476982 |
85 | # Now run the same analysis given the calculated distance tree(s). |
86 | foreach my $tree ( 0 .. $#{$stemma->distance_trees} ) { |
87 | my $dc = analyze_variant_location( $group_readings, $groups, |
88 | $stemma->distance_apsps->[$tree] ); |
89 | foreach my $rdg ( keys %$dc ) { |
90 | my $var = $dc->{$rdg}; |
91 | print "\tReadings '$rdg' and '$var' disregarded by parsimony on tree $tree\n"; |
92 | } |
40f19742 |
93 | } |
94 | |
95 | # Record that we used this variant in an analysis |
0e476982 |
96 | push( @$html_data, [ $rank, $readings, $sc ] ); |
40f19742 |
97 | } |
0e476982 |
98 | |
e79c23c7 |
99 | # Save the stemma picture |
100 | open( STEMMA, ">stemma_graph.svg" ) or die "Could not open stemma graph to write"; |
101 | binmode STEMMA, ":utf8"; |
102 | print STEMMA $stemma->as_svg; |
103 | close STEMMA; |
40f19742 |
104 | |
0e476982 |
105 | printf( "Ran analysis on %d / %d variant locations\n", scalar @$html_data, $total ); |
106 | |
40f19742 |
107 | sub analyze_variant_location { |
108 | my( $group_readings, $groups, $apsp ) = @_; |
a5d53377 |
109 | my %contig; |
40f19742 |
110 | my $conflict = {}; |
111 | foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) { |
112 | my @members = @$g; |
113 | my $gst = wit_stringify( $g ); |
114 | map { $contig{$_} = $gst } @members; # The witnesses need themselves to be |
115 | # in their collection. |
a5d53377 |
116 | next unless @members > 1; |
a5d53377 |
117 | my $curr = pop @members; |
118 | foreach my $m ( @members ) { |
40f19742 |
119 | foreach my $v ( $apsp->path_vertices( $curr, $m ) ) { |
120 | $contig{$v} = $gst unless exists $contig{$v}; |
121 | next if $contig{$v} eq $gst; |
122 | # print STDERR "Conflict at $v between group $gst and group " |
a5d53377 |
123 | # . $contig{$v} . "\n"; |
40f19742 |
124 | # Record what is conflicting. |
125 | $conflict->{$group_readings->{$gst}} = $group_readings->{$contig{$v}}; |
a5d53377 |
126 | } |
127 | } |
128 | } |
40f19742 |
129 | return $conflict; |
a5d53377 |
130 | } |
a5d53377 |
131 | |
132 | # Add the variant, subject to a.c. representation logic. |
133 | # This assumes that we will see the 'main' version before the a.c. version. |
134 | sub add_variant_wit { |
135 | my( $arr, $wit ) = @_; |
136 | my $acstr = $tradition->collation->ac_label; |
137 | my $skip; |
138 | if( $wit =~ /^(.*)\Q$acstr\E$/ ) { |
139 | my $real = $1; |
140 | $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr; |
141 | } |
142 | push( @$arr, $wit ) unless $skip; |
143 | } |
144 | |
145 | # Return an answer if the variant is useful, i.e. if there are at least 2 variants |
146 | # with at least 2 witnesses each. |
147 | sub useful_variant { |
148 | my( $readings ) = @_; |
149 | my $total = keys %$readings; |
150 | foreach my $var ( keys %$readings ) { |
151 | $total-- if @{$readings->{$var}} == 1; |
152 | } |
153 | return( undef, undef ) if $total <= 1; |
154 | my( $groups, $text ); |
155 | foreach my $var ( keys %$readings ) { |
40f19742 |
156 | push( @$groups, $readings->{$var} ); |
a5d53377 |
157 | push( @$text, $var ); |
158 | } |
159 | return( $groups, $text ); |
160 | } |
40f19742 |
161 | |
162 | # Take an array of witness groupings and produce a string like |
163 | # A,B / C,D,E / F |
164 | |
165 | sub wit_stringify { |
166 | my $groups = shift; |
167 | my @gst; |
168 | # If we were passed an array of witnesses instead of an array of |
169 | # groupings, then "group" the witnesses first. |
170 | unless( ref( $groups->[0] ) ) { |
171 | my $mkgrp = [ $groups ]; |
172 | $groups = $mkgrp; |
173 | } |
174 | foreach my $g ( @$groups ) { |
175 | push( @gst, join( ',', @$g ) ); |
176 | } |
177 | return join( ' / ', @gst ); |
178 | } |
179 | |