1 package Text::Tradition::Graph::Position;
8 Text::Tradition::Graph::Position
12 An object to go with a text graph that keeps track of relative
13 positions of the nodes on that graph. This is useful for keeping
14 track of which readings are variants of each other, which is expensive
15 to calculate every time from the graph itself.
23 Takes two arguments: a list of names of common nodes in the graph, and
24 a list of witness paths. Calculates position identifiers for each
29 # TODO Why not just hand over the graph and calculate the common nodes
30 # and witness paths here?
33 my( $common_nodes, $witness_paths ) = @_;
37 # We have to calculate the position identifiers for each word,
38 # keyed on the common nodes. This will be 'fun'. The end result
39 # is a hash per witness, whose key is the word node and whose
40 # value is its position in the text. Common nodes are always N,1
41 # so have identical positions in each text.
44 foreach my $wit ( keys %$witness_paths ) {
45 # First we walk each path, making a matrix for each witness that
46 # corresponds to its eventual position identifier. Common nodes
47 # always start a new row, and are thus always in the first column.
50 my $cn = 0; # We should hit the common nodes in order.
52 foreach my $wn ( @{$witness_paths->{$wit}} ) { # $wn is a node name
53 if( $wn eq $common_nodes->[$cn] ) {
54 # Set up to look for the next common node, and
55 # start a new row of words.
57 push( @$wit_matrix, $row ) if scalar( @$row );
62 push( @$wit_matrix, $row ); # Push the last row onto the matrix
64 # Now we have a matrix per witness, so that each row in the
65 # matrix begins with a common node, and continues with all the
66 # variant words that appear in the witness. We turn this into
67 # real positions in row,cell format. But we need some
68 # trickery in order to make sure that each node gets assigned
69 # to only one position.
71 foreach my $li ( 1..scalar(@$wit_matrix) ) {
72 foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
73 my $node = $wit_matrix->[$li-1]->[$di-1];
74 my $position = "$li,$di";
75 # If we have seen this node before, we need to compare
76 # its position with what went before.
77 unless( exists $node_pos->{ $node } &&
78 _cmp_position( $position, $node_pos->{ $node }) < 1 ) {
79 # The new position ID replaces the old one.
80 $node_pos->{$node} = $position;
81 } # otherwise, the old position needs to stay.
86 # Now we have a hash of node positions keyed on node.
87 $self->{'node_positions'} = $node_pos;
88 # We should also save our witness paths, as long as we have them.
89 # Right now each path is a list of nodes; we may want to make it
90 # a list of position refs.
91 $self->{'witness_paths'} = $witness_paths;
93 # We are also going to want to keep track of whether a position has
94 # been explicitly emptied, for our lemmatization.
95 my $position_state = {};
96 map { $position_state->{ $_ } = undef } values %$node_pos;
97 $self->{'position_state'} = $position_state;
100 bless( $self, $proto );
104 =item B<node_position>
106 my $pos = $positions->node_position( $node );
108 Returns the position identifier for a given node in the graph.
113 my( $self, $node ) = @_;
114 $node = _name( $node );
116 unless( exists( $self->{'node_positions'}->{ $node } ) ) {
117 warn "No node with name $node known to the graph";
121 return $self->{'node_positions'}->{ $node };
124 =item B<nodes_at_position>
126 my @nodes = $positions->nodes_at_position( $pos );
128 Returns the names of all the nodes in the graph at a given position.
132 sub nodes_at_position {
133 my( $self, $pos ) = @_;
135 my $positions = $self->_calc_positions();
136 unless( exists $positions->{ $pos } ) {
137 warn "No position $pos in the graph";
140 return @{ $positions->{ $pos }};
143 =item B<colocated_nodes>
145 my @nodes = $positions->colocated_nodes( $node );
147 Returns the names of all the nodes in the graph at the same position
148 as the node given, apart from that node itself.
152 sub colocated_nodes {
153 my( $self, $node ) = @_;
154 $node = _name( $node );
155 my $pos = $self->node_position( $node );
156 my @loc_nodes = $self->nodes_at_position( $pos );
158 my @cn = grep { $_ !~ /^$node$/ } @loc_nodes;
164 my @position_list = $positions->all()
166 Returns an ordered list of positions in the graph.
172 my $pos = $self->_calc_positions;
173 return sort by_position keys( %$pos );
177 my( $self, $wit ) = @_;
178 return @{$self->{'witness_paths'}->{ $wit }};
183 =head2 Lemmatization functions
185 For some traditions, each position will have at least one node that is
186 the 'lemma text', that is, the text that an editor has chosen to stand
187 as authoritative for the tradition. The following methods keep
188 track of what lemma, if any, should stand at each position.
192 =item B<init_lemmatizer>
194 $positions->init_lemmatizer( @nodelist )
196 Sets up the necessary logic for keeping track of lemmas. It should be
197 called once, with the initial list of lemmas.
201 # TODO We can initialize this without the argument, based on the passed
202 # list of common nodes.
203 sub init_lemmatizer {
204 my( $self, @nodes ) = @_;
205 foreach my $n ( @nodes ) {
206 $self->set_state( $self->node_position( $n ), $n );
212 my $answer = $positions->state( $position_id )
214 For the given position ID, returns the node (if any) that stands at
215 the lemma. If no node should stand as lemma at this position, returns
216 0; if no decision has been made for this position, returns undef.
221 my( $self, $pos ) = @_;
222 return $self->{'position_state'}->{ $pos };
227 $positions->set_state( $position_id, $state )
229 For the given position ID, sets the lemma (if any). State can be the
230 name of a node, 0 (for cases when no lemma should stand), or undef
231 (for cases when no decision has been made).
236 my( $self, $pos, $state ) = @_;
237 $self->{'position_state'}->{ $pos } = $state;
242 =head2 Comparison function
248 my @nodelist = sort $positions->by_position @nodelist;
250 For use in the 'sort' function. Returns a comparison value based on
251 the position of the given nodes.
255 # Compares two nodes according to their positions in the witness
259 return _cmp_position( $a, $b );
262 # Takes two position strings (X,Y) and sorts them.
265 my @pos_a = split(/,/, $a );
266 my @pos_b = split(/,/, $b );
268 my $big_cmp = $pos_a[0] <=> $pos_b[0];
269 return $big_cmp if $big_cmp;
271 return $pos_a[1] <=> $pos_b[1];
275 #### HELPER FUNCTIONS ####
277 # At some point I may find myself using scalar references for the node
278 # positions, in order to keep them easily in sync. Just in case, I will
279 # calculate this every time I need it.
280 sub _calc_positions {
282 return _invert_hash( $self->{'node_positions'} )
285 # Helper for dealing with node refs
288 # We work with node names in this library
289 if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) {
290 $node = $node->name();
295 # Useful helper. Will be especially useful if I find myself using
296 # scalar references for the positions after all - it can dereference
299 my ( $hash, $plaintext_keys ) = @_;
301 foreach my $key ( keys %$hash ) {
302 my $val = $hash->{$key};
306 $valkey = $plaintext_keys->{ scalar( $val ) };
307 warn( "No plaintext value given for $val" ) unless $valkey;
309 if( exists ( $new_hash{$valkey} ) ) {
310 push( @{$new_hash{$valkey}}, $key );
312 $new_hash{$valkey} = [ $key ];
322 This package is free software and is provided "as is" without express
323 or implied warranty. You can redistribute it and/or modify it under
324 the same terms as Perl itself.
328 Tara L Andrews, aurum@cpan.org