1 package Traditions::Graph::Position;
8 Traditions::Graph::Position
12 An object to go with a text graph that keeps track of relative
13 positions of the nodes.
21 Takes two arguments: a list of names of common nodes in the graph, and
22 a list of witness paths. Calculates position identifiers for each
29 my( $common_nodes, $witness_paths ) = @_;
33 # We have to calculate the position identifiers for each word,
34 # keyed on the common nodes. This will be 'fun'. The end result
35 # is a hash per witness, whose key is the word node and whose
36 # value is its position in the text. Common nodes are always N,1
37 # so have identical positions in each text.
40 foreach my $wit ( keys %$witness_paths ) {
41 # First we walk each path, making a matrix for each witness that
42 # corresponds to its eventual position identifier. Common nodes
43 # always start a new row, and are thus always in the first column.
46 my $cn = 0; # We should hit the common nodes in order.
48 foreach my $wn ( @{$witness_paths->{$wit}} ) { # $wn is a node name
49 if( $wn eq $common_nodes->[$cn] ) {
50 # Set up to look for the next common node, and
51 # start a new row of words.
53 push( @$wit_matrix, $row ) if scalar( @$row );
58 push( @$wit_matrix, $row ); # Push the last row onto the matrix
60 # Now we have a matrix per witness, so that each row in the
61 # matrix begins with a common node, and continues with all the
62 # variant words that appear in the witness. We turn this into
63 # real positions in row,cell format. But we need some
64 # trickery in order to make sure that each node gets assigned
65 # to only one position.
67 foreach my $li ( 1..scalar(@$wit_matrix) ) {
68 foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
69 my $node = $wit_matrix->[$li-1]->[$di-1];
70 my $position = "$li,$di";
71 # If we have seen this node before, we need to compare
72 # its position with what went before.
73 unless( exists $node_pos->{ $node } &&
74 _cmp_position( $position, $node_pos->{ $node }) < 1 ) {
75 # The new position ID replaces the old one.
76 $node_pos->{$node} = $position;
77 } # otherwise, the old position needs to stay.
82 # Now we have a hash of node positions keyed on node.
83 $self->{'node_positions'} = $node_pos;
84 # We should also save our witness paths, as long as we have them.
85 # Right now each path is a list of nodes; we may want to make it
86 # a list of position refs.
87 $self->{'witness_paths'} = $witness_paths;
89 # We are also going to want to keep track of whether a position has
90 # been explicitly emptied, for our lemmatization.
91 my $position_state = {};
92 map { $position_state->{ $_ } = undef } values %$node_pos;
93 $self->{'position_state'} = $position_state;
96 bless( $self, $proto );
101 my( $self, $node ) = @_;
102 $node = _name( $node );
104 unless( exists( $self->{'node_positions'}->{ $node } ) ) {
105 warn "No node with name $node known to the graph";
109 return $self->{'node_positions'}->{ $node };
112 sub nodes_at_position {
113 my( $self, $pos ) = @_;
115 my $positions = $self->calc_positions();
116 unless( exists $positions->{ $pos } ) {
117 warn "No position $pos in the graph";
120 return @{ $positions->{ $pos }};
123 sub colocated_nodes {
124 my( $self, $node ) = @_;
125 $node = _name( $node );
126 my $pos = $self->node_position( $node );
127 my @loc_nodes = $self->nodes_at_position( $pos );
129 my @cn = grep { $_ !~ /^$node$/ } @loc_nodes;
133 # Returns an ordered list of positions in this graph
136 my $pos = $self->calc_positions;
137 return sort by_position keys( %$pos );
140 # Returns undef if no decision has been taken on this position, the
141 # node name if there is a lemma for it, and 0 if there is no lemma for
144 my( $self, $pos ) = @_;
145 return $self->{'position_state'}->{ $pos };
149 my( $self, $pos, $state ) = @_;
150 $self->{'position_state'}->{ $pos } = $state;
153 sub init_lemmatizer {
154 my( $self, @nodes ) = @_;
155 foreach my $n ( @nodes ) {
156 $self->set_state( $self->node_position( $n ), $n );
161 my( $self, $wit ) = @_;
162 return @{$self->{'witness_paths'}->{ $wit }};
165 # At some point I may find myself using scalar references for the node
166 # positions, in order to keep them easily in sync. Just in case, I will
167 # calculate this every time I need it.
170 return _invert_hash( $self->{'node_positions'} )
173 # Helper for dealing with node refs
176 # We work with node names in this library
177 if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) {
178 $node = $node->name();
183 ### Comparison functions
185 # Compares two nodes according to their positions in the witness
189 return _cmp_position( $a, $b );
192 # Takes two position strings (X,Y) and sorts them.
195 my @pos_a = split(/,/, $a );
196 my @pos_b = split(/,/, $b );
198 my $big_cmp = $pos_a[0] <=> $pos_b[0];
199 return $big_cmp if $big_cmp;
201 return $pos_a[1] <=> $pos_b[1];
204 # Useful helper. Will be especially useful if I find myself using
205 # scalar references for the positions after all - it can dereference
208 my ( $hash, $plaintext_keys ) = @_;
210 foreach my $key ( keys %$hash ) {
211 my $val = $hash->{$key};
215 $valkey = $plaintext_keys->{ scalar( $val ) };
216 warn( "No plaintext value given for $val" ) unless $valkey;
218 if( exists ( $new_hash{$valkey} ) ) {
219 push( @{$new_hash{$valkey}}, $key );
221 $new_hash{$valkey} = [ $key ];