sub next_word {
# Return the successor via the corresponding edge.
- my( $self, $node, $edge ) = @_;
- $edge = "base text" unless $edge;
- my @next_edges = $node->outgoing();
- return undef unless scalar( @next_edges );
-
- foreach my $e ( @next_edges ) {
- next unless $e->label() eq $edge;
- return $e->to();
- }
-
- warn "Could not find node connected to edge $edge";
- return undef;
+ my $self = shift;
+ return $self->_find_linked_word( 'next', @_ );
}
=item B<prior_word>
sub prior_word {
# Return the predecessor via the corresponding edge.
- my( $self, $node, $edge ) = @_;
- $edge = "base text" unless $edge;
- my @prior_edges = $node->incoming();
- return undef unless scalar( @prior_edges );
+ my $self = shift;
+ return $self->_find_linked_word( 'prior', @_ );
+}
+
+sub _find_linked_word {
+ my( $self, $direction, $node, $edge ) = @_;
+ $edge = 'base text' unless $edge;
+ my @linked_edges = $direction eq 'next'
+ ? $node->outgoing() : $node->incoming();
+ return undef unless scalar( @linked_edges );
- foreach my $e ( @prior_edges ) {
- next unless $e->label() eq $edge;
- return $e->from();
+ # We have to find the linked edge that contains all of the
+ # witnesses supplied in $edge.
+ my @edge_wits = split( /, /, $edge );
+ foreach my $le ( @linked_edges ) {
+ my @le_wits = split( /, /, $le->name() );
+ if( _is_within( \@edge_wits, \@le_wits ) ) {
+ # This is the right edge.
+ return $direction eq 'next' ? $le->to() : $le->from();
+ }
}
-
- warn "Could not find node connected from edge $edge";
+ warn "Could not find $direction node from " . $node->label
+ . " along edge $edge";
return undef;
}
+# Some set logic.
+sub _is_within {
+ my( $set1, $set2 ) = @_;
+ my $ret = 1;
+ foreach my $el ( @$set1 ) {
+ $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
+ }
+ return $ret;
+}
+
=item B<node_sequence>
my @nodes = $graph->node_sequence( $first, $last, $path );
}
-# Takes a list of nodes that have just been turned off, and returns a
-# set of tuples of the form ['node', 'state'] that indicates what
-# changes need to be made to the graph.
-# A state of 1 means 'turn on this node'
-# A state of 0 means 'turn off this node'
-# A state of undef means 'an ellipsis belongs in the text here because
-# no decision has been made'
+=item B<toggle_node>
+
+my @nodes_turned_off = $graph->toggle_node( $node );
+
+Takes a node name, and either lemmatizes or de-lemmatizes it. Returns
+a list of all nodes that are de-lemmatized as a result of the toggle.
+
+=cut
+
+sub toggle_node {
+ my( $self, $node ) = @_;
+
+ # In case this is being called for the first time.
+ $self->init_lemmatizer();
+
+ if( $self->is_common( $node ) ) {
+ # Do nothing, it's a common node.
+ return;
+ }
+
+ my $pos = $self->{'positions'}->node_position( $node );
+ my $old_state = $self->{'positions'}->state( $pos );
+ my @nodes_off;
+ if( $old_state && $old_state eq $node ) {
+ # Turn off the node. We turn on no others by default.
+ push( @nodes_off, $node );
+ } else {
+ # Turn on the node.
+ $self->{'positions'}->set_state( $pos, $node );
+ # Any other 'on' nodes in the same position should be off.
+ push( @nodes_off, $self->colocated_nodes( $node ) );
+ # Any node that is an identical transposed one should be off.
+ push( @nodes_off, $self->identical_nodes( $node ) )
+ if $self->identical_nodes( $node );
+ }
+ @nodes_off = unique_list( @nodes_off );
+
+ # Turn off the nodes that need to be turned off.
+ my @nodes_turned_off;
+ foreach my $n ( @nodes_off ) {
+ my $npos = $self->{'positions'}->node_position( $n );
+ my $state = $self->{'positions'}->state( $npos );
+ if( $state && $state eq $n ) {
+ # this node is still on
+ push( @nodes_turned_off, $n );
+ my $new_state = undef;
+ if( $n eq $node ) {
+ # This is the node that was clicked, so if there are no
+ # other nodes there, turn off the position. In all other
+ # cases, restore the ellipsis.
+ my @all_n = $self->{'positions'}->nodes_at_position( $pos );
+ $new_state = 0 if scalar( @all_n ) == 1;
+ }
+ $self->{'positions'}->set_state( $npos, $new_state );
+ } elsif( $old_state && $old_state eq $n ) {
+ # another node has already been turned on here
+ push( @nodes_turned_off, $n );
+ } # else some other node was on anyway, so pass.
+ }
+ return @nodes_turned_off;
+}
+
+=item B<active_nodes>
+
+my @state = $graph->active_nodes( @nodes_turned_off );
+
+Takes a list of nodes that have just been turned off, and returns a
+set of tuples of the form ['node', 'state'] that indicates what
+changes need to be made to the graph.
+
+=over
+
+=item *
+
+A state of 1 means 'turn on this node'
+
+=item *
+
+A state of 0 means 'turn off this node'
+
+=item *
+
+A state of undef means 'an ellipsis belongs in the text here because
+no decision has been made'
+
+=back
+
+=cut
+
sub active_nodes {
my( $self, @toggled_off_nodes ) = @_;
return $node;
}
-# toggle_node takes a node name, and either lemmatizes or de-lemmatizes it.
-# Returns a list of nodes that are de-lemmatized as a result of the toggle.
-
-sub toggle_node {
- my( $self, $node ) = @_;
-
- # In case this is being called for the first time.
- $self->init_lemmatizer();
-
- if( $self->is_common( $node ) ) {
- # Do nothing, it's a common node.
- return;
- }
-
- my $pos = $self->{'positions'}->node_position( $node );
- my $old_state = $self->{'positions'}->state( $pos );
- my @nodes_off;
- if( $old_state && $old_state eq $node ) {
- # Turn off the node. We turn on no others by default.
- push( @nodes_off, $node );
- } else {
- # Turn on the node.
- $self->{'positions'}->set_state( $pos, $node );
- # Any other 'on' nodes in the same position should be off.
- push( @nodes_off, $self->colocated_nodes( $node ) );
- # Any node that is an identical transposed one should be off.
- push( @nodes_off, $self->identical_nodes( $node ) )
- if $self->identical_nodes( $node );
- }
- @nodes_off = unique_list( @nodes_off );
-
- # Turn off the nodes that need to be turned off.
- my @nodes_turned_off;
- foreach my $n ( @nodes_off ) {
- my $npos = $self->{'positions'}->node_position( $n );
- my $state = $self->{'positions'}->state( $npos );
- if( $state && $state eq $n ) {
- # this node is still on
- push( @nodes_turned_off, $n );
- my $new_state = undef;
- if( $n eq $node ) {
- # This is the node that was clicked, so if there are no
- # other nodes there, turn off the position. In all other
- # cases, restore the ellipsis.
- my @all_n = $self->{'positions'}->nodes_at_position( $pos );
- $new_state = 0 if scalar( @all_n ) == 1;
- }
- $self->{'positions'}->set_state( $npos, $new_state );
- } elsif( $old_state && $old_state eq $n ) {
- # another node has already been turned on here
- push( @nodes_turned_off, $n );
- } # else some other node was on anyway, so pass.
- }
- return @nodes_turned_off;
-}
-
sub colocated_nodes {
my $self = shift;
return $self->{'positions'}->colocated_nodes( @_ );
# DEBUG with a short graph
# last if $line > 2;
# DEBUG for problematic entries
- # my $scrutinize = "21.8";
+ my $scrutinize = "";
my $first_line_node = $base_line_starts[ $line ];
my $too_far = $base_line_starts[ $line+1 ];
# Now we have our lemma nodes; we add the variant nodes to the graph.
+ # Keep track of the start and end point of each reading for later
+ # node collapse.
+ my @readings = ( $lemma_start, $lemma_end );
+
# For each reading that is not rdg_0, we make a chain of nodes
# and connect them to the anchor. Edges are named after the mss
# that are relevant.
$graph->add_edge( $last_node, $graph->next_word( $lemma_end ),
$edge_name );
- # Now collate and collapse the identical nodes within the graph.
- collate_variant( $graph, $lemma_start, $lemma_end,
- $var_start, $last_node );
-
+ if( $var_start ) { # if it wasn't an empty reading
+ push( @readings, $var_start, $last_node );
+ }
}
+
+ # Now collate and collapse the identical nodes within the graph.
+ collate_variants( $graph, @readings );
}
## Now in theory I have a graph. I want to make it a little easier to
$started = 1;
}
if( $last_node ) {
- $graph->add_edge( $last_node, $node, "base text" );
+ my $edge = $graph->add_edge( $last_node, $node, "base text" );
+ $edge->set_attribute( 'class', 'basetext' );
$last_node = $node;
} # TODO there should be no else here...
}
return( @$lineref_array );
}
-=item B<collate_variant>
+=item B<collate_variants>
-collate_variant( $graph, $lemma_start, $lemma_end, $var_start, $var_end );
+collate_variants( $graph, @readings )
-Given a lemma and a variant as start- and endpoints on the graph,
+Given a set of readings in the form
+( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
walks through each to identify those nodes that are identical. The
-graph is a Text::Tradition::Graph object; the other arguments are
+graph is a Text::Tradition::Graph object; the elements of @readings are
Graph::Easy::Node objects that appear on the graph.
TODO: Handle collapsed and non-collapsed transpositions.
=cut
-sub collate_variant {
- my( $graph, $lemma_start, $lemma_end, $var_start, $var_end ) = @_;
- # If var_start is undef, then the variant is an omission and
- # there's nothing to collate. Return.
- return unless $var_start;
-
- # I want to look at the nodes in the variant and lemma, and
- # collapse nodes that are the same word. This is mini-collation.
- my %collapsed = ();
- # There will only be one outgoing edge at first, so this is safe.
- my @out = $var_start->outgoing();
- my $var_label = $out[0]->label();
+sub collate_variants {
+ my( $graph, @readings ) = @_;
+ my $lemma_start = shift @readings;
+ my $lemma_end = shift @readings;
+ my $detranspose = 1;
- my @lemma_nodes;
+ # Start the list of distinct nodes with those nodes in the lemma.
+ my @distinct_nodes;
while( $lemma_start ne $lemma_end ) {
- push( @lemma_nodes, $lemma_start );
+ push( @distinct_nodes, [ $lemma_start, 'base text' ] );
$lemma_start = $graph->next_word( $lemma_start );
}
- push( @lemma_nodes, $lemma_end );
+ push( @distinct_nodes, [ $lemma_end, 'base text' ] );
- my @variant_nodes;
- while( $var_start ne $var_end ) {
- push( @variant_nodes, $var_start );
- $var_start = $graph->next_word( $var_start, $var_label );
- }
- push( @variant_nodes, $var_end );
-
- # Go through the variant nodes, and if we find a lemma node that
- # hasn't yet been collapsed with a node, equate them.
-
- foreach my $w ( @variant_nodes ) {
- my $word = $w->label();
- foreach my $l ( @lemma_nodes ) {
- if( $word eq cmp_str( $l ) ) {
- next if exists( $collapsed{ $l->label } )
- && $collapsed{ $l->label } eq $l;
- # Collapse the nodes.
- printf STDERR "Merging nodes %s/%s and %s/%s\n",
- $l->name, $l->label, $w->name, $w->label;
- $graph->merge_nodes( $l, $w );
- $collapsed{ $l->label } = $l;
- # Now collapse any multiple edges to and from the node.
- # Rely on the presence of the 'base text' edge.
- remove_duplicate_edges( $graph, $graph->prior_word( $l ), $l );
- remove_duplicate_edges( $graph, $l, $graph->next_word( $l ) );
+
+ while( scalar @readings ) {
+ my( $var_start, $var_end ) = splice( @readings, 0, 2 );
+
+ # I want to look at the nodes in the variant and lemma, and
+ # collapse nodes that are the same word. This is mini-collation.
+ # Each word in the 'main' list can only be collapsed once with a
+ # word from the current reading.
+ my %collapsed = ();
+
+ # Get the label. There will only be one outgoing edge to start
+ # with, so this is safe.
+ my @out = $var_start->outgoing();
+ my $var_label = $out[0]->label();
+
+ my @variant_nodes;
+ while( $var_start ne $var_end ) {
+ push( @variant_nodes, $var_start );
+ $var_start = $graph->next_word( $var_start, $var_label );
+ }
+ push( @variant_nodes, $var_end );
+
+ # Go through the variant nodes, and if we find a lemma node that
+ # hasn't yet been collapsed with a node, equate them. If we do
+ # not, keep them to push onto the end of all_nodes.
+ my @remaining_nodes;
+ my $last_index = 0;
+ foreach my $w ( @variant_nodes ) {
+ my $word = $w->label();
+ my $matched = 0;
+ foreach my $idx ( $last_index .. $#distinct_nodes ) {
+ my( $l, $edgelabel ) = @{$distinct_nodes[$idx]};
+ if( $word eq cmp_str( $l ) ) {
+ next if exists( $collapsed{ $l->label } )
+ && $collapsed{ $l->label } eq $l;
+ $matched = 1;
+ $last_index = $idx if $detranspose;
+ # Collapse the nodes.
+ printf STDERR "Merging nodes %s/%s and %s/%s\n",
+ $l->name, $l->label, $w->name, $w->label;
+ $graph->merge_nodes( $l, $w );
+ $collapsed{ $l->label } = $l;
+ # Now collapse any multiple edges to and from the node.
+ remove_duplicate_edges( $graph,
+ $graph->prior_word( $l, $edgelabel ), $l );
+ remove_duplicate_edges( $graph, $l,
+ $graph->next_word( $l, $edgelabel ) );
+ }
}
+ push( @remaining_nodes, [ $w, $var_label ] ) unless $matched;
}
+ push( @distinct_nodes, @remaining_nodes) if scalar( @remaining_nodes );
}
}