lib/Text/Tradition/Graph/Position.pm

   1 package Text::Tradition::Graph::Position;
   2
   3 use strict;
   4 use warnings;
   5
   6 =head1 NAME
   7
   8 Text::Tradition::Graph::Position
   9
  10 =head1 SUMMARY
  11
  12 An object to go with a text graph that keeps track of relative
  13 positions of the nodes on that graph.  This is useful for keeping
  14 track of which readings are variants of each other, which is expensive
  15 to calculate every time from the graph itself.
  16
  17 =head1 METHODS
  18
  19 =over 4
  20
  21 =item B<new>
  22
  23 Takes two arguments: a list of names of common nodes in the graph, and
  24 a list of witness paths.  Calculates position identifiers for each
  25 node based on this.
  26
  27 =cut
  28
  29 # TODO Why not just hand over the graph and calculate the common nodes
  30 # and witness paths here?
  31 sub new {
  32     my $proto = shift;
  33     my( $common_nodes, $witness_paths ) = @_;
  34
  35     my $self = {};
  36
  37     # We have to calculate the position identifiers for each word,
  38     # keyed on the common nodes.  This will be 'fun'.  The end result
  39     # is a hash per witness, whose key is the word node and whose
  40     # value is its position in the text.  Common nodes are always N,1
  41     # so have identical positions in each text.
  42
  43     my $node_pos = {};
  44     foreach my $wit ( keys %$witness_paths ) {
  45         # First we walk each path, making a matrix for each witness that
  46         # corresponds to its eventual position identifier.  Common nodes
  47         # always start a new row, and are thus always in the first column.
  48
  49         my $wit_matrix = [];
  50         my $cn = 0;  # We should hit the common nodes in order.
  51         my $row = [];
  52         foreach my $wn ( @{$witness_paths->{$wit}} ) { # $wn is a node name
  53             if( $wn eq $common_nodes->[$cn] ) {
  54                 # Set up to look for the next common node, and
  55                 # start a new row of words.
  56                 $cn++;
  57                 push( @$wit_matrix, $row ) if scalar( @$row );
  58                 $row = [];
  59             }
  60             push( @$row, $wn );
  61         }
  62         push( @$wit_matrix, $row );  # Push the last row onto the matrix
  63
  64         # Now we have a matrix per witness, so that each row in the
  65         # matrix begins with a common node, and continues with all the
  66         # variant words that appear in the witness.  We turn this into
  67         # real positions in row,cell format.  But we need some
  68         # trickery in order to make sure that each node gets assigned
  69         # to only one position.
  70
  71         foreach my $li ( 1..scalar(@$wit_matrix) ) {
  72             foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
  73                 my $node = $wit_matrix->[$li-1]->[$di-1];
  74                 my $position = "$li,$di";
  75                 # If we have seen this node before, we need to compare
  76                 # its position with what went before.
  77                 unless( exists $node_pos->{ $node } &&
  78                         _cmp_position( $position, $node_pos->{ $node }) < 1 ) {
  79                     # The new position ID replaces the old one.
  80                     $node_pos->{$node} = $position;
  81                 } # otherwise, the old position needs to stay.
  82             }
  83         }
  84     }
  85
  86     # Now we have a hash of node positions keyed on node.
  87     $self->{'node_positions'} = $node_pos;
  88     # We should also save our witness paths, as long as we have them.
  89     # Right now each path is a list of nodes; we may want to make it
  90     # a list of position refs.
  91     $self->{'witness_paths'} = $witness_paths;
  92
  93     # We are also going to want to keep track of whether a position has
  94     # been explicitly emptied, for our lemmatization.
  95     my $position_state = {};
  96     map { $position_state->{ $_ } = undef } values %$node_pos;
  97     $self->{'position_state'} = $position_state;
  98
  99
 100     bless( $self, $proto );
 101     return $self;
 102 }
 103
 104 =item B<node_position>
 105
 106 my $pos = $positions->node_position( $node );
 107
 108 Returns the position identifier for a given node in the graph.
 109
 110 =cut
 111
 112 sub node_position {
 113     my( $self, $node ) = @_;
 114     $node = _name( $node );
 115
 116     unless( exists( $self->{'node_positions'}->{ $node } ) ) {
 117         warn "No node with name $node known to the graph";
 118         return;
 119     }
 120
 121     return $self->{'node_positions'}->{ $node };
 122 }
 123
 124 =item B<nodes_at_position>
 125
 126 my @nodes = $positions->nodes_at_position( $pos );
 127
 128 Returns the names of all the nodes in the graph at a given position.
 129
 130 =cut
 131
 132 sub nodes_at_position {
 133     my( $self, $pos ) = @_;
 134
 135     my $positions = $self->_calc_positions();
 136     unless( exists $positions->{ $pos } ) {
 137         warn "No position $pos in the graph";
 138         return;
 139     }
 140     return @{ $positions->{ $pos }};
 141 }
 142
 143 =item B<colocated_nodes>
 144
 145 my @nodes = $positions->colocated_nodes( $node );
 146
 147 Returns the names of all the nodes in the graph at the same position
 148 as the node given, apart from that node itself.
 149
 150 =cut
 151
 152 sub colocated_nodes {
 153     my( $self, $node ) = @_;
 154     $node = _name( $node );
 155     my $pos = $self->node_position( $node );
 156     my @loc_nodes = $self->nodes_at_position( $pos );
 157
 158     my @cn = grep { $_ !~ /^$node$/ } @loc_nodes;
 159     return @cn;
 160 }
 161
 162 =item B<all>
 163
 164 my @position_list = $positions->all()
 165
 166 Returns an ordered list of positions in the graph.
 167
 168 =cut
 169
 170 sub all {
 171     my( $self ) = @_;
 172     my $pos = $self->_calc_positions;
 173     return sort by_position keys( %$pos );
 174 }
 175
 176 sub witness_path {
 177     my( $self, $wit ) = @_;
 178     return @{$self->{'witness_paths'}->{ $wit }};
 179 }
 180
 181 =back
 182
 183 =head2 Lemmatization functions
 184
 185 For some traditions, each position will have at least one node that is
 186 the 'lemma text', that is, the text that an editor has chosen to stand
 187 as authoritative for the tradition.  The following methods keep
 188 track of what lemma, if any, should stand at each position.
 189
 190 =over
 191
 192 =item B<init_lemmatizer>
 193
 194 $positions->init_lemmatizer( @nodelist )
 195
 196 Sets up the necessary logic for keeping track of lemmas.  It should be
 197 called once, with the initial list of lemmas.
 198
 199 =cut
 200
 201 # TODO We can initialize this without the argument, based on the passed
 202 # list of common nodes.
 203 sub init_lemmatizer {
 204     my( $self, @nodes ) = @_;
 205     foreach my $n ( @nodes ) {
 206         $self->set_state( $self->node_position( $n ), $n );
 207     }
 208 }
 209
 210 =item B<state>
 211
 212 my $answer = $positions->state( $position_id )
 213
 214 For the given position ID, returns the node (if any) that stands at
 215 the lemma.  If no node should stand as lemma at this position, returns
 216 0; if no decision has been made for this position, returns undef.
 217
 218 =cut
 219
 220 sub state {
 221     my( $self, $pos ) = @_;
 222     return $self->{'position_state'}->{ $pos };
 223 }
 224
 225 =item B<set_state>
 226
 227 $positions->set_state( $position_id, $state )
 228
 229 For the given position ID, sets the lemma (if any).  State can be the
 230 name of a node, 0 (for cases when no lemma should stand), or undef
 231 (for cases when no decision has been made).
 232
 233 =cut
 234
 235 sub set_state {
 236     my( $self, $pos, $state ) = @_;
 237     $self->{'position_state'}->{ $pos } = $state;
 238 }
 239
 240 =back
 241
 242 =head2 Comparison function
 243
 244 =over
 245
 246 =item B<by_position>
 247
 248 my @nodelist = sort $positions->by_position @nodelist;
 249
 250 For use in the 'sort' function.  Returns a comparison value based on
 251 the position of the given nodes.
 252
 253 =cut
 254
 255 # Compares two nodes according to their positions in the witness
 256 # index hash.
 257 sub by_position {
 258     my $self = shift;
 259     return _cmp_position( $a, $b );
 260 }
 261
 262 # Takes two position strings (X,Y) and sorts them.
 263 sub _cmp_position {
 264     my( $a, $b ) = @_;
 265     my @pos_a = split(/,/, $a );
 266     my @pos_b = split(/,/, $b );
 267
 268     my $big_cmp = $pos_a[0] <=> $pos_b[0];
 269     return $big_cmp if $big_cmp;
 270     # else
 271     return $pos_a[1] <=> $pos_b[1];
 272 }
 273
 274
 275 #### HELPER FUNCTIONS ####
 276
 277 # At some point I may find myself using scalar references for the node
 278 # positions, in order to keep them easily in sync.  Just in case, I will
 279 # calculate this every time I need it.
 280 sub _calc_positions {
 281     my $self = shift;
 282     return _invert_hash( $self->{'node_positions'} )
 283 }
 284
 285 # Helper for dealing with node refs
 286 sub _name {
 287     my( $node ) = @_;
 288     # We work with node names in this library
 289     if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) {
 290         $node = $node->name();
 291     }
 292     return $node;
 293 }
 294
 295 # Useful helper.  Will be especially useful if I find myself using
 296 # scalar references for the positions after all - it can dereference
 297 # them here.
 298 sub _invert_hash {
 299     my ( $hash, $plaintext_keys ) = @_;
 300     my %new_hash;
 301     foreach my $key ( keys %$hash ) {
 302         my $val = $hash->{$key};
 303         my $valkey = $val;
 304         if( $plaintext_keys
 305             && ref( $val ) ) {
 306             $valkey = $plaintext_keys->{ scalar( $val ) };
 307             warn( "No plaintext value given for $val" ) unless $valkey;
 308         }
 309         if( exists ( $new_hash{$valkey} ) ) {
 310             push( @{$new_hash{$valkey}}, $key );
 311         } else {
 312             $new_hash{$valkey} = [ $key ];
 313         }
 314     }
 315     return \%new_hash;
 316 }
 317
 318 =back
 319
 320 =head1 LICENSE
 321
 322 This package is free software and is provided "as is" without express
 323 or implied warranty.  You can redistribute it and/or modify it under
 324 the same terms as Perl itself.
 325
 326 =head1 AUTHOR
 327
 328 Tara L Andrews, aurum@cpan.org
 329
 330 =cut
 331
 332 1;