Commit | Line | Data |
a5d53377 |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Text::Tradition; |
7 | use Text::Tradition::Stemma; |
8 | |
9 | binmode STDERR, ":utf8"; |
10 | binmode STDOUT, ":utf8"; |
11 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
12 | |
13 | my $informat = 'TEI'; |
14 | my $inbase; |
15 | my $linear = 1; |
16 | |
17 | # Parse the tradition data |
18 | |
19 | my $input = $ARGV[0]; |
20 | my @lines; |
21 | open( INFILE, "$input" ) or die "Could not read $input"; |
22 | binmode INFILE, ':utf8'; |
23 | @lines = <INFILE>; |
24 | close INFILE; |
25 | $input = join( '', @lines ); |
26 | |
27 | my %args = ( $informat => $input, |
28 | 'linear' => $linear ); |
29 | $args{'base'} = $inbase if $inbase; |
30 | my $tradition = Text::Tradition->new( %args ); |
31 | |
32 | # Parse the stemma hypothesis |
33 | my $stemma = Text::Tradition::Stemma->new( |
34 | 'collation' => $tradition->collation, |
35 | 'dot' => $ARGV[1], |
36 | ); |
37 | |
38 | # We have the collation, so get the alignment table with witnesses in rows. |
39 | |
40 | my $all_wits_table = $tradition->collation->make_alignment_table( 1 ); |
41 | |
42 | # For each column in the alignment table, we want to see if the existing |
43 | # groupings of witnesses match our stemma hypothesis. First let's just go |
44 | # through the groupings. |
45 | |
46 | # Strip the list of sigla and save it for correlation to the readings. |
47 | my $col_wits = shift @$all_wits_table; |
48 | |
49 | # For each column in the table, group the readings by witness. |
50 | |
40f19742 |
51 | my $used_vars = 0; |
a5d53377 |
52 | foreach my $i ( 0 .. $#$all_wits_table ) { |
53 | my $rdg_wits = {}; |
54 | my $col_rdgs = shift @$all_wits_table; |
55 | foreach my $j ( 0 .. $#{$col_rdgs} ) { |
56 | my $rdg = $col_rdgs->[$j]; |
57 | $rdg = '' unless $rdg; # We care about empty readings |
58 | $rdg = undef if $rdg eq '#LACUNA#'; # ... unless they're lacunas |
59 | if( $rdg ) { |
60 | $rdg_wits->{$rdg} = [] unless $rdg_wits->{$rdg}; |
61 | add_variant_wit( $rdg_wits->{$rdg}, $col_wits->[$j] ); |
62 | } |
63 | } |
64 | |
65 | my( $groups, $readings ) = useful_variant( $rdg_wits ); |
66 | next unless $groups && $readings; |
67 | |
40f19742 |
68 | # We can look up witnesses for a reading; we also want to look up readings |
69 | # for a given witness. |
70 | my $group_readings = {}; |
71 | foreach my $x ( 0 .. $#$groups ) { |
72 | $group_readings->{wit_stringify( $groups->[$x] )} = $readings->[$x]; |
73 | } |
74 | |
75 | # For all the groups with more than one member, collect the list of all |
76 | # contiguous vertices needed to connect them. |
77 | # TODO: deal with a.c. reading logic |
78 | my $conflict = analyze_variant_location( $group_readings, $groups, $stemma->apsp ); |
79 | print wit_stringify( $groups ) . ' - ' . join( " / ", @$readings ) . "\n"; |
80 | foreach my $rdg ( keys %$conflict ) { |
81 | my $var = $conflict->{$rdg}; |
82 | print "\tReadings '$rdg' and '$var' are not genealogical\n"; |
83 | } |
84 | |
85 | # Now run the same analysis given a distance tree. |
86 | my $distance_apsp = $stemma->distance_trees->[0]->APSP_Floyd_Warshall(); |
87 | $conflict = analyze_variant_location( $group_readings, $groups, $distance_apsp ); |
88 | foreach my $rdg ( keys %$conflict ) { |
89 | my $var = $conflict->{$rdg}; |
90 | print "\tReadings '$rdg' and '$var' disregarded by parsimony\n"; |
91 | } |
92 | |
93 | # Record that we used this variant in an analysis |
94 | $used_vars++; |
95 | |
96 | } |
97 | print "Found $used_vars useful variants in this analysis\n"; |
98 | |
99 | sub analyze_variant_location { |
100 | my( $group_readings, $groups, $apsp ) = @_; |
a5d53377 |
101 | my %contig; |
40f19742 |
102 | my $conflict = {}; |
103 | foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) { |
104 | my @members = @$g; |
105 | my $gst = wit_stringify( $g ); |
106 | map { $contig{$_} = $gst } @members; # The witnesses need themselves to be |
107 | # in their collection. |
a5d53377 |
108 | next unless @members > 1; |
a5d53377 |
109 | my $curr = pop @members; |
110 | foreach my $m ( @members ) { |
40f19742 |
111 | foreach my $v ( $apsp->path_vertices( $curr, $m ) ) { |
112 | $contig{$v} = $gst unless exists $contig{$v}; |
113 | next if $contig{$v} eq $gst; |
114 | # print STDERR "Conflict at $v between group $gst and group " |
a5d53377 |
115 | # . $contig{$v} . "\n"; |
40f19742 |
116 | # Record what is conflicting. |
117 | $conflict->{$group_readings->{$gst}} = $group_readings->{$contig{$v}}; |
a5d53377 |
118 | } |
119 | } |
120 | } |
40f19742 |
121 | return $conflict; |
a5d53377 |
122 | } |
a5d53377 |
123 | |
124 | # Add the variant, subject to a.c. representation logic. |
125 | # This assumes that we will see the 'main' version before the a.c. version. |
126 | sub add_variant_wit { |
127 | my( $arr, $wit ) = @_; |
128 | my $acstr = $tradition->collation->ac_label; |
129 | my $skip; |
130 | if( $wit =~ /^(.*)\Q$acstr\E$/ ) { |
131 | my $real = $1; |
132 | $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr; |
133 | } |
134 | push( @$arr, $wit ) unless $skip; |
135 | } |
136 | |
137 | # Return an answer if the variant is useful, i.e. if there are at least 2 variants |
138 | # with at least 2 witnesses each. |
139 | sub useful_variant { |
140 | my( $readings ) = @_; |
141 | my $total = keys %$readings; |
142 | foreach my $var ( keys %$readings ) { |
143 | $total-- if @{$readings->{$var}} == 1; |
144 | } |
145 | return( undef, undef ) if $total <= 1; |
146 | my( $groups, $text ); |
147 | foreach my $var ( keys %$readings ) { |
40f19742 |
148 | push( @$groups, $readings->{$var} ); |
a5d53377 |
149 | push( @$text, $var ); |
150 | } |
151 | return( $groups, $text ); |
152 | } |
40f19742 |
153 | |
154 | # Take an array of witness groupings and produce a string like |
155 | # A,B / C,D,E / F |
156 | |
157 | sub wit_stringify { |
158 | my $groups = shift; |
159 | my @gst; |
160 | # If we were passed an array of witnesses instead of an array of |
161 | # groupings, then "group" the witnesses first. |
162 | unless( ref( $groups->[0] ) ) { |
163 | my $mkgrp = [ $groups ]; |
164 | $groups = $mkgrp; |
165 | } |
166 | foreach my $g ( @$groups ) { |
167 | push( @gst, join( ',', @$g ) ); |
168 | } |
169 | return join( ' / ', @gst ); |
170 | } |
171 | |