Commit | Line | Data |
a5d53377 |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Text::Tradition; |
7 | use Text::Tradition::Stemma; |
8 | |
9 | binmode STDERR, ":utf8"; |
10 | binmode STDOUT, ":utf8"; |
11 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
12 | |
13 | my $informat = 'TEI'; |
14 | my $inbase; |
15 | my $linear = 1; |
16 | |
17 | # Parse the tradition data |
18 | |
19 | my $input = $ARGV[0]; |
20 | my @lines; |
21 | open( INFILE, "$input" ) or die "Could not read $input"; |
22 | binmode INFILE, ':utf8'; |
23 | @lines = <INFILE>; |
24 | close INFILE; |
25 | $input = join( '', @lines ); |
26 | |
27 | my %args = ( $informat => $input, |
28 | 'linear' => $linear ); |
29 | $args{'base'} = $inbase if $inbase; |
30 | my $tradition = Text::Tradition->new( %args ); |
31 | |
32 | # Parse the stemma hypothesis |
33 | my $stemma = Text::Tradition::Stemma->new( |
34 | 'collation' => $tradition->collation, |
35 | 'dot' => $ARGV[1], |
36 | ); |
37 | |
38 | # We have the collation, so get the alignment table with witnesses in rows. |
39 | |
40 | my $all_wits_table = $tradition->collation->make_alignment_table( 1 ); |
41 | |
42 | # For each column in the alignment table, we want to see if the existing |
43 | # groupings of witnesses match our stemma hypothesis. First let's just go |
44 | # through the groupings. |
45 | |
46 | # Strip the list of sigla and save it for correlation to the readings. |
47 | my $col_wits = shift @$all_wits_table; |
48 | |
49 | # For each column in the table, group the readings by witness. |
50 | |
51 | my $useful_vars = 0; |
52 | foreach my $i ( 0 .. $#$all_wits_table ) { |
53 | my $rdg_wits = {}; |
54 | my $col_rdgs = shift @$all_wits_table; |
55 | foreach my $j ( 0 .. $#{$col_rdgs} ) { |
56 | my $rdg = $col_rdgs->[$j]; |
57 | $rdg = '' unless $rdg; # We care about empty readings |
58 | $rdg = undef if $rdg eq '#LACUNA#'; # ... unless they're lacunas |
59 | if( $rdg ) { |
60 | $rdg_wits->{$rdg} = [] unless $rdg_wits->{$rdg}; |
61 | add_variant_wit( $rdg_wits->{$rdg}, $col_wits->[$j] ); |
62 | } |
63 | } |
64 | |
65 | my( $groups, $readings ) = useful_variant( $rdg_wits ); |
66 | next unless $groups && $readings; |
67 | |
68 | # For all the groups with more than one member, make a group that contains |
69 | # all contiguous vertices to connect them. |
70 | # TODO Need to do pairwise comparison of groups - a variant location can |
71 | # have both coincidental and genealogical variants! |
72 | my %contig; |
73 | my $conflict; |
74 | foreach my $g ( @$groups ) { |
75 | my @members = split( /,/, $g ); |
76 | next unless @members > 1; |
77 | map { $contig{$_} = $g } @members; |
78 | my $curr = pop @members; |
79 | foreach my $m ( @members ) { |
80 | foreach my $v ( $stemma->apsp->path_vertices( $curr, $m ) ) { |
81 | $contig{$v} = $g unless exists $contig{$v}; |
82 | next if $contig{$v} eq $g; |
83 | # print STDERR "Conflict at $v between group $g and group " |
84 | # . $contig{$v} . "\n"; |
85 | $conflict = 1; |
86 | } |
87 | } |
88 | } |
89 | print join( " / ", @$groups ) . ' - ' . join( " / ", @$readings ) . ' - '; |
90 | print $conflict ? "coincidental" : "genealogical"; |
91 | print "\n"; |
92 | $useful_vars++; |
93 | |
94 | } |
95 | print "Found $useful_vars useful variants\n"; |
96 | |
97 | # Add the variant, subject to a.c. representation logic. |
98 | # This assumes that we will see the 'main' version before the a.c. version. |
99 | sub add_variant_wit { |
100 | my( $arr, $wit ) = @_; |
101 | my $acstr = $tradition->collation->ac_label; |
102 | my $skip; |
103 | if( $wit =~ /^(.*)\Q$acstr\E$/ ) { |
104 | my $real = $1; |
105 | $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr; |
106 | } |
107 | push( @$arr, $wit ) unless $skip; |
108 | } |
109 | |
110 | # Return an answer if the variant is useful, i.e. if there are at least 2 variants |
111 | # with at least 2 witnesses each. |
112 | sub useful_variant { |
113 | my( $readings ) = @_; |
114 | my $total = keys %$readings; |
115 | foreach my $var ( keys %$readings ) { |
116 | $total-- if @{$readings->{$var}} == 1; |
117 | } |
118 | return( undef, undef ) if $total <= 1; |
119 | my( $groups, $text ); |
120 | foreach my $var ( keys %$readings ) { |
121 | push( @$groups, join( ',', @{$readings->{$var}} ) ); |
122 | push( @$text, $var ); |
123 | } |
124 | return( $groups, $text ); |
125 | } |