slightly looser a.c. / p.c. parsing
[scpubgit/stemmatology.git] / base / script / rejoin_split.pl
CommitLineData
874e62ac 1#!/usr/bin/env perl
2
3use lib 'lib';
4use feature 'say';
5use strict;
6use warnings;
7use Text::Tradition;
8use Text::Tradition::Directory;
9use TryCatch;
10
11binmode STDOUT, ':utf8';
12binmode STDERR, ':utf8';
13
14my $dir = Text::Tradition::Directory->new(
15 'dsn' => 'dbi:SQLite:dbname=db/traditions.db',
16 );
17
18my $scope = $dir->new_scope();
19my $main = Text::Tradition->new( name => 'Heinrichi combined', language => 'Finnish' );
20$main->_init_done( 0 ); # so that we can add disconnected readings
21my $mc = $main->collation;
22
23my $first = $ARGV[0];
24my $last = $ARGV[-1];
25my $prior;
26my @endpaths;
27foreach my $id ( @ARGV ) {
28 my $tradition = $dir->lookup( $id );
29 say STDERR "Applying readings from " . $tradition->name;
30 my $c = $tradition->collation;
31
32 ## Duplicate the witnesses
33 foreach my $wit ( $tradition->witnesses ) {
34 unless( $main->has_witness( $wit ) ) {
35 my %witopts = (
36 sigil => $wit->sigil,
37 sourcetype => $wit->sourcetype,
38 is_layered => $wit->is_layered,
39 is_collated => $wit->is_collated
40 );
41 $main->add_witness( %witopts );
42 }
43 }
44
45 ## Duplicate the readings
46 foreach my $rdg ( $c->readings ) {
47 my %rdg_opts;
48 my $skip;
49
50 ## If the reading exists already, just check consistency.
51 if( $mc->reading( $rdg->id ) ) {
52 # Check that it matches
53 die "Reading mismatch at $rdg" unless $rdg->text eq $mc->reading( $rdg->id )->text;
54 } else {
55 # Create the new reading
56 %rdg_opts = (
57 id => $rdg->id,
58 text => $rdg->text,
59 is_lacuna => $rdg->is_lacuna,
60 is_common => $rdg->is_common,
61 rank => $rdg->rank,
62 join_prior => $rdg->join_prior,
63 join_next => $rdg->join_next )
64 unless keys( %rdg_opts );
65 $mc->add_reading( \%rdg_opts );
66 }
67 }
68}
69
70## Duplicate the edges
71my %need_end;
72foreach my $id ( @ARGV ) {
73 my $tradition = $dir->lookup( $id );
74 say STDERR "Applying edges from " . $tradition->name;
75 my $c = $tradition->collation;
76 foreach my $p ( sort { _by_source_rank( $a, $b, $c ) } $c->paths ) {
77 foreach my $wit ( $c->path_witnesses( $p ) ) {
78 my @vector = @$p;
79 # Don't connect to intermediate start/end nodes
80 if( $id ne $first && $c->reading( $p->[0] )->is_start ) {
81 if( exists $need_end{$wit} ) {
82 # Connect them
83 my $start = delete $need_end{$wit};
84 next if $start eq $p->[1];
85 @vector = ( $start, $p->[1] );
86 } else {
87 warn "Unconnected second half of path for $wit at " . $p->[1];
88 # Try connecting it to the start node
89 @vector = ( $mc->start, $p->[1] );
90 }
91 }
92 if ( $id ne $last && $c->reading( $p->[1] )->is_end ) {
93 $need_end{$wit} = $p->[0];
94 next;
95 }
96 unless( $mc->has_path( @vector, $wit ) ) {
97 $mc->add_path( @vector, $wit );
98 }
99 }
100 }
101 if( $id eq $last ) {
102 # Connect whatever is left in $need_end
103 foreach my $wit ( keys %need_end ) {
104 my $start = delete $need_end{$wit};
105 $mc->add_path( $start, $mc->end, $wit );
106 }
107 }
108}
109
110## Make the witness text and indicate that the paths are in place
111$mc->text_from_paths();
112$main->_init_done( 1 );
113$mc->calculate_ranks();
114
115## Apply the relationships
116foreach my $id ( @ARGV ) {
117 my $tradition = $dir->lookup( $id );
118 say STDERR "Applying relationships from " . $tradition->name;
119 my $c = $tradition->collation;
120 ## Duplicate the relationships
121 foreach my $reledge ( sort {
122 _apply_relationship_order( $a, $b, $c ) } $c->relationships ) {
123 my $rel = $c->get_relationship( $reledge );
124 # Apply it
125 try {
126 apply_relationship( $rel, $reledge );
127 } catch( Text::Tradition::Error $err ) {
128 say STDERR "Failed to apply " . $rel->type . " rel at @$reledge: "
129 . $err->message;
130 }
131 }
132}
133
134$mc->calculate_ranks();
135$mc->flatten_ranks();
136say $mc->as_graphml();
137
138sub apply_relationship {
139 my( $rel, $reledge ) = @_;
140 my $opts = {
141 type => $rel->type,
142 scope => $rel->scope };
143 $opts->{annotation} = $rel->annotation if $rel->has_annotation;
144 $opts->{thispaironly} = 1 if $rel->nonlocal;
145 unless( $mc->get_relationship( $reledge ) ) {
146 $mc->add_relationship( @$reledge, $opts );
147 }
148}
149
150# Helper function for applying the path edges in the expected order
151sub _by_source_rank {
152 my( $a, $b, $c ) = @_;
153 return $c->reading( $a->[0] )->rank <=> $c->reading( $b->[0] )->rank;
154}
155
156# Helper sort function for applying the saved relationships in a
157# sensible order.
158sub _apply_relationship_order {
159 my( $a, $b, $c ) = @_;
160 my $rg = $mc->relations;
161 my $aobj = $c->get_relationship( $a ); my $bobj = $c->get_relationship( $b );
162 my $at = $rg->type( $aobj->type ); my $bt = $rg->type( $bobj->type );
163 # Apply strong relationships before weak
164 return -1 if $bt->is_weak && !$at->is_weak;
165 return 1 if $at->is_weak && !$bt->is_weak;
166 # Apply local before global
167 return -1 if !$aobj->nonlocal && $bobj->nonlocal;
168 return 1 if !$bobj->nonlocal && $aobj->nonlocal;
169 # Apply more tightly bound relationships first
170 return $at->bindlevel <=> $bt->bindlevel;
171}