Move some active node logic into the positions library
[scpubgit/stemmatology.git] / lib / Traditions / Graph / Position.pm
CommitLineData
a25d4374 1package Traditions::Graph::Position;
2
3use strict;
4use warnings;
5
6=head1 NAME
7
8Traditions::Graph::Position
9
10=head1 SUMMARY
11
12An object to go with a text graph that keeps track of relative
13positions of the nodes.
14
15=head1 METHODS
16
17=over 4
18
19=item B<new>
20
21Takes two arguments: a list of names of common nodes in the graph, and
22a list of witness paths. Calculates position identifiers for each
23node based on this.
24
25=cut
26
27sub new {
28 my $proto = shift;
29 my( $common_nodes, $witness_paths ) = @_;
30
31 my $self = {};
32
33 # We have to calculate the position identifiers for each word,
34 # keyed on the common nodes. This will be 'fun'. The end result
35 # is a hash per witness, whose key is the word node and whose
36 # value is its position in the text. Common nodes are always N,1
37 # so have identical positions in each text.
38
39 my $node_pos = {};
40 foreach my $wit ( keys %$witness_paths ) {
41 # First we walk each path, making a matrix for each witness that
42 # corresponds to its eventual position identifier. Common nodes
43 # always start a new row, and are thus always in the first column.
44
45 my $wit_matrix = [];
46 my $cn = 0; # We should hit the common nodes in order.
47 my $row = [];
48 foreach my $wn ( @{$witness_paths->{$wit}} ) { # $wn is a node name
49 if( $wn eq $common_nodes->[$cn] ) {
50 # Set up to look for the next common node, and
51 # start a new row of words.
52 $cn++;
53 push( @$wit_matrix, $row ) if scalar( @$row );
54 $row = [];
55 }
56 push( @$row, $wn );
57 }
58 push( @$wit_matrix, $row ); # Push the last row onto the matrix
59
60 # Now we have a matrix per witness, so that each row in the
61 # matrix begins with a common node, and continues with all the
62 # variant words that appear in the witness. We turn this into
63 # real positions in row,cell format. But we need some
64 # trickery in order to make sure that each node gets assigned
65 # to only one position.
66
67 foreach my $li ( 1..scalar(@$wit_matrix) ) {
68 foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
69 my $node = $wit_matrix->[$li-1]->[$di-1];
70 my $position = "$li,$di";
71 # If we have seen this node before, we need to compare
72 # its position with what went before.
73 unless( exists $node_pos->{ $node } &&
74 _cmp_position( $position, $node_pos->{ $node }) < 1 ) {
75 # The new position ID replaces the old one.
76 $node_pos->{$node} = $position;
77 } # otherwise, the old position needs to stay.
78 }
79 }
80 }
58a3c424 81
a25d4374 82 # Now we have a hash of node positions keyed on node.
83 $self->{'node_positions'} = $node_pos;
58a3c424 84 # We should also save our witness paths, as long as we have them.
85 # Right now each path is a list of nodes; we may want to make it
86 # a list of position refs.
a25d4374 87 $self->{'witness_paths'} = $witness_paths;
88
58a3c424 89 # We are also going to want to keep track of whether a position has
90 # been explicitly emptied, for our lemmatization.
91 my $position_state = {};
92 map { $position_state->{ $_ } = undef } values %$node_pos;
93 $self->{'position_state'} = $position_state;
94
95
a25d4374 96 bless( $self, $proto );
97 return $self;
98}
99
100sub node_position {
101 my( $self, $node ) = @_;
102 $node = _name( $node );
103
104 unless( exists( $self->{'node_positions'}->{ $node } ) ) {
105 warn "No node with name $node known to the graph";
106 return;
107 }
108
109 return $self->{'node_positions'}->{ $node };
110}
111
112sub nodes_at_position {
113 my( $self, $pos ) = @_;
114
115 my $positions = $self->calc_positions();
116 unless( exists $positions->{ $pos } ) {
117 warn "No position $pos in the graph";
118 return;
119 }
120 return @{ $positions->{ $pos }};
121}
122
123sub colocated_nodes {
124 my( $self, $node ) = @_;
125 $node = _name( $node );
126 my $pos = $self->node_position( $node );
127 my @loc_nodes = $self->nodes_at_position( $pos );
128
129 my @cn = grep { $_ !~ /^$node$/ } @loc_nodes;
130 return @cn;
131}
132
58a3c424 133# Returns an ordered list of positions in this graph
a25d4374 134sub all {
135 my( $self ) = @_;
136 my $pos = $self->calc_positions;
137 return sort by_position keys( %$pos );
138}
139
58a3c424 140# Returns undef if no decision has been taken on this position, the
141# node name if there is a lemma for it, and 0 if there is no lemma for
142# it.
143sub state {
144 my( $self, $pos ) = @_;
145 return $self->{'position_state'}->{ $pos };
146}
147
148sub set_state {
149 my( $self, $pos, $state ) = @_;
150 $self->{'position_state'}->{ $pos } = $state;
151}
152
153sub init_lemmatizer {
154 my( $self, @nodes ) = @_;
155 foreach my $n ( @nodes ) {
156 $self->set_state( $self->node_position( $n ), $n );
157 }
158}
159
a25d4374 160sub witness_path {
161 my( $self, $wit ) = @_;
162 return @{$self->{'witness_paths'}->{ $wit }};
163}
164
165# At some point I may find myself using scalar references for the node
166# positions, in order to keep them easily in sync. Just in case, I will
167# calculate this every time I need it.
168sub calc_positions {
169 my $self = shift;
170 return _invert_hash( $self->{'node_positions'} )
171}
172
173# Helper for dealing with node refs
174sub _name {
175 my( $node ) = @_;
176 # We work with node names in this library
177 if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) {
178 $node = $node->name();
179 }
180 return $node;
181}
182
183### Comparison functions
184
185# Compares two nodes according to their positions in the witness
186# index hash.
187sub by_position {
188 my $self = shift;
189 return _cmp_position( $a, $b );
190}
191
192# Takes two position strings (X,Y) and sorts them.
193sub _cmp_position {
194 my( $a, $b ) = @_;
195 my @pos_a = split(/,/, $a );
196 my @pos_b = split(/,/, $b );
197
198 my $big_cmp = $pos_a[0] <=> $pos_b[0];
199 return $big_cmp if $big_cmp;
200 # else
201 return $pos_a[1] <=> $pos_b[1];
202}
203
204# Useful helper. Will be especially useful if I find myself using
205# scalar references for the positions after all - it can dereference
206# them here.
207sub _invert_hash {
208 my ( $hash, $plaintext_keys ) = @_;
209 my %new_hash;
210 foreach my $key ( keys %$hash ) {
211 my $val = $hash->{$key};
212 my $valkey = $val;
213 if( $plaintext_keys
214 && ref( $val ) ) {
215 $valkey = $plaintext_keys->{ scalar( $val ) };
216 warn( "No plaintext value given for $val" ) unless $valkey;
217 }
218 if( exists ( $new_hash{$valkey} ) ) {
219 push( @{$new_hash{$valkey}}, $key );
220 } else {
221 $new_hash{$valkey} = [ $key ];
222 }
223 }
224 return \%new_hash;
225}
226
2271;