package Text::Tradition::Collation;
+use feature 'say';
use Encode qw( decode_utf8 );
use File::Temp;
use File::Which;
use Text::Tradition::Collation::Reading;
use Text::Tradition::Collation::RelationshipStore;
use Text::Tradition::Error;
+use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
use XML::LibXML;
use XML::LibXML::XPathContext;
use Moose;
related_readings => 'related_readings',
get_relationship => 'get_relationship',
del_relationship => 'del_relationship',
+ equivalence => 'equivalence',
+ equivalence_graph => 'equivalence_graph',
},
writer => '_set_relations',
);
sub BUILD {
my $self = shift;
$self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
- $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
- $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
+ $self->_set_start( $self->add_reading(
+ { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
+ $self->_set_end( $self->add_reading(
+ { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
}
### Reading construct/destruct functions
my( $self, $reading ) = @_;
unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
my %args = %$reading;
+ if( $args{'init'} ) {
+ # If we are initializing an empty collation, don't assume that we
+ # have set a tradition.
+ delete $args{'init'};
+ } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
+ $args{'language'} = $self->tradition->language;
+ }
$reading = Text::Tradition::Collation::Reading->new(
'collation' => $self,
%args );
sub merge_readings {
my $self = shift;
+ # Sanity check
+ my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
+ my $mergemeta = $kept_obj->is_meta;
+ throw( "Cannot merge meta and non-meta reading" )
+ unless ( $mergemeta && $del_obj->is_meta )
+ || ( !$mergemeta && !$del_obj->is_meta );
+ if( $mergemeta ) {
+ throw( "Cannot merge with start or end node" )
+ if( $kept_obj eq $self->start || $kept_obj eq $self->end
+ || $del_obj eq $self->start || $del_obj eq $self->end );
+ }
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
- my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
+ my $kept = $kept_obj->id;
+ my $deleted = $del_obj->id;
$self->_graphcalc_done(0);
-
+
# The kept reading should inherit the paths and the relationships
# of the deleted reading.
foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
@wits{keys %$fwits} = values %$fwits;
$self->sequence->set_edge_attributes( @vector, \%wits );
}
- $self->relations->merge_readings( $kept, $deleted, $combine_char );
+ $self->relations->merge_readings( $kept, $deleted, $combine );
# Do the deletion deed.
if( $combine ) {
- my $kept_obj = $self->reading( $kept );
- my $del_obj = $self->reading( $deleted );
+ # Combine the text of the readings
my $joinstr = $combine_char;
unless( defined $joinstr ) {
$joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
$joinstr = $self->wordsep unless defined $joinstr;
}
$kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
+ $kept_obj->normal_form(
+ join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
+ # Combine the lexemes present in the readings
+ if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
+ $kept_obj->add_lexeme( $del_obj->lexemes );
+ }
}
$self->del_reading( $deleted );
}
+=head2 compress_readings
+
+Where possible in the graph, compresses plain sequences of readings into a
+single reading. The sequences must consist of readings with no
+relationships to other readings, with only a single witness path between
+them and no other witness paths from either that would skip the other. The
+readings must also not be marked as nonsense or bad grammar.
+
+WARNING: This operation cannot be undone.
+
+=cut
+
+sub compress_readings {
+ my $self = shift;
+ # Anywhere in the graph that there is a reading that joins only to a single
+ # successor, and neither of these have any relationships, just join the two
+ # readings.
+ my %gobbled;
+ foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
+ next if $rdg->is_meta;
+ next if $gobbled{$rdg->id};
+ next if $rdg->grammar_invalid || $rdg->is_nonsense;
+ next if $rdg->related_readings();
+ my %seen;
+ while( $self->sequence->successors( $rdg ) == 1 ) {
+ my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
+ throw( "Infinite loop" ) if $seen{$next->id};
+ $seen{$next->id} = 1;
+ last if $self->sequence->predecessors( $next ) > 1;
+ last if $next->is_meta;
+ last if $next->grammar_invalid || $next->is_nonsense;
+ last if $next->related_readings();
+ say "Joining readings $rdg and $next";
+ $self->merge_readings( $rdg, $next, 1 );
+ }
+ }
+ # Make sure we haven't screwed anything up
+ foreach my $wit ( $self->tradition->witnesses ) {
+ my $pathtext = $self->path_text( $wit->sigil );
+ my $origtext = join( ' ', @{$wit->text} );
+ throw( "Text differs for witness " . $wit->sigil )
+ unless $pathtext eq $origtext;
+ if( $wit->is_layered ) {
+ $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
+ $origtext = join( ' ', @{$wit->layertext} );
+ throw( "Ante-corr text differs for witness " . $wit->sigil )
+ unless $pathtext eq $origtext;
+ }
+ }
+
+ $self->relations->rebuild_equivalence();
+ $self->calculate_ranks();
+}
# Helper function for manipulating the graph.
sub _stringify_args {
$self->_graphcalc_done(0);
# Connect the readings
- $self->sequence->add_edge( $source, $target );
+ unless( $self->sequence->has_edge( $source, $target ) ) {
+ $self->sequence->add_edge( $source, $target );
+ $self->relations->add_equivalence_edge( $source, $target );
+ }
# Note the witness in question
$self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
-};
+}
sub del_path {
my $self = shift;
}
unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
$self->sequence->delete_edge( $source, $target );
+ $self->relations->delete_equivalence_edge( $source, $target );
}
}
throw( "Need GraphViz installed to output SVG" )
unless File::Which::which( 'dot' );
my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
- $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
+ $self->calculate_ranks()
+ unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
my @cmd = qw/dot -Tsvg/;
my( $svg, $err );
# Output substitute start/end readings if necessary
if( $startrank ) {
- $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
+ $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
}
if( $endrank ) {
- $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
+ $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
}
if( $STRAIGHTENHACK ) {
## HACK part 1
- my $startlabel = $startrank ? 'SUBSTART' : 'START';
- $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
+ my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
+ $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
$dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
}
my %used; # Keep track of the readings that actually appear in the graph
$label = "-$label" if $reading->join_prior;
$label =~ s/\"/\\\"/g;
$rattrs->{'label'} = $label;
+ $rattrs->{'id'} = $reading->id;
$rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
$dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
}
$dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
$edge->[0], $edge->[1], $varopts );
} elsif( $used{$edge->[0]} ) {
- $subend{$edge->[0]} = 1;
+ $subend{$edge->[0]} = $edge->[1];
} elsif( $used{$edge->[1]} ) {
- $substart{$edge->[1]} = 1;
+ $substart{$edge->[1]} = $edge->[0];
}
}
# Add substitute start and end edges if necessary
foreach my $node ( keys %substart ) {
- my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
+ my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
my $variables = { %edge_attrs, 'label' => $witstr };
+ my $nrdg = $self->reading( $node );
+ if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
+ # Substart is actually one lower than $startrank
+ $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
+ }
my $varopts = _dot_attr_string( $variables );
- $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
+ $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
}
foreach my $node ( keys %subend ) {
- my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
+ my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
my $variables = { %edge_attrs, 'label' => $witstr };
my $varopts = _dot_attr_string( $variables );
- $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
+ $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
}
# HACK part 2
if( $STRAIGHTENHACK ) {
- my $endlabel = $endrank ? 'SUBEND' : 'END';
- $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
+ my $endlabel = $endrank ? '__SUBEND__' : '__END__';
+ $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
}
$dot .= "}\n";
# See if we are in a majority situation.
my $maj = scalar( $self->tradition->witnesses ) * 0.6;
+ $maj = $maj > 5 ? $maj : 5;
if( scalar keys %wits > $maj ) {
unshift( @disp_ac, 'majority' );
return join( ', ', @disp_ac );
is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
+# Now add a stemma, write to GraphML, and parse again.
+my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
+is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
+is( $tradition->stemmata, 1, "Tradition now has the stemma" );
+$graphml = $c->as_graphml;
+like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
+
=end testing
=cut
'Str' => 'string',
'Int' => 'int',
'Bool' => 'boolean',
+ 'ReadingID' => 'string',
'RelationshipType' => 'string',
'RelationshipScope' => 'string',
);
next unless $save_types{$attr->type_constraint->name};
$graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
}
+ # Extra custom key for the tradition stemma(ta)
+ $graph_attributes{'stemmata'} = 'string';
foreach my $datum ( sort keys %graph_attributes ) {
$graph_data_keys{$datum} = 'dg'.$gdi++;
next unless $save_types{$attr->type_constraint->name};
$reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
}
+ # Extra custom key for the reading morphology
+ $reading_attributes{'lexemes'} = 'string';
+
my %node_data_keys;
my $ndi = 0;
foreach my $datum ( sort keys %reading_attributes ) {
$key->setAttribute( 'id', $edge_data_keys{$datum} );
}
- # Add the collation graph itself
+ # Add the collation graph itself. First, sanitize the name to a valid XML ID.
+ my $xmlidname = $self->tradition->name;
+ $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
+ if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
+ $xmlidname = '_'.$xmlidname;
+ }
my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
$sgraph->setAttribute( 'edgedefault', 'directed' );
- $sgraph->setAttribute( 'id', $self->tradition->name );
+ $sgraph->setAttribute( 'id', $xmlidname );
$sgraph->setAttribute( 'parse.edgeids', 'canonical' );
- $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
+ $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
$sgraph->setAttribute( 'parse.nodeids', 'canonical' );
- $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
+ $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
$sgraph->setAttribute( 'parse.order', 'nodesfirst' );
- # Collation attribute data
+ # Tradition/collation attribute data
foreach my $datum ( keys %graph_attributes ) {
my $value;
if( $datum eq 'version' ) {
- $value = '3.1';
+ $value = '3.2';
+ } elsif( $datum eq 'stemmata' ) {
+ my @stemstrs;
+ map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
+ $self->tradition->stemmata;
+ $value = join( "\n", @stemstrs );
} elsif( $gattr_from{$datum} eq 'Tradition' ) {
$value = $self->tradition->$datum;
} else {
$node_el->setAttribute( 'id', $node_xmlid );
foreach my $d ( keys %reading_attributes ) {
my $nval = $n->$d;
- if( $rankoffset && $d eq 'rank' ) {
+ # Custom serialization
+ if( $d eq 'lexemes' ) {
+ # If nval is a true value, we have lexemes so we need to
+ # serialize them. Otherwise set nval to undef so that the
+ # key is excluded from this reading.
+ $nval = $nval ? $n->_serialize_lexemes : undef;
+ } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
+ $nval = undef;
+ }
+ if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
# Adjust the ranks within the subgraph.
- $nval = $n eq $self->end ? $end->rank + 1 : $nval - $rankoffset;
+ $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
+ : $nval - $rankoffset;
}
_add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
if defined $nval;
# We add an edge in the graphml for every witness in $e.
next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
my @edge_wits = sort $self->path_witnesses( $e );
- $e->[0] = $self->start unless $use_readings{$e->[0]};
- $e->[1] = $self->end unless $use_readings{$e->[1]};
+ $e->[0] = $self->start->id unless $use_readings{$e->[0]};
+ $e->[1] = $self->end->id unless $use_readings{$e->[1]};
+ # Skip any path from start to end; that witness is not in the subgraph.
+ next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
foreach my $wit ( @edge_wits ) {
my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
$node_hash{ $e->[0] },
}
}
+ # Report the actual number of nodes and edges that went in
+ $sgraph->setAttribute( 'parse.edges', $edge_ctr );
+ $sgraph->setAttribute( 'parse.nodes', $node_ctr );
+
# Add the relationship graph to the XML
map { delete $edge_data_keys{$_} } @path_attributes;
$self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
my @all_pos = ( 1 .. $self->end->rank - 1 );
foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
- # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
+ # say STDERR "Making witness row(s) for " . $wit->sigil;
my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
my @row = _make_witness_row( \@wit_path, \@all_pos );
push( @{$table->{'alignment'}},
foreach my $rdg ( @$path ) {
my $rtext = $rdg->text;
$rtext = '#LACUNA#' if $rdg->is_lacuna;
- print STDERR "rank " . $rdg->rank . "\n" if $debug;
- # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
+ say STDERR "rank " . $rdg->rank if $debug;
+ # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
$char_hash{$rdg->rank} = { 't' => $rdg };
}
my @row = map { $char_hash{$_} } @$positions;
my $pathtext = '';
my $last;
foreach my $r ( @path ) {
- if( $r->join_prior || !$last || $last->join_next ) {
- $pathtext .= $r->text;
- } else {
- $pathtext .= ' ' . $r->text;
- }
+ unless ( $r->join_prior || !$last || $last->join_next ) {
+ $pathtext .= ' ';
+ }
+ $pathtext .= $r->text;
$last = $r;
}
return $pathtext;
sub make_witness_paths {
my( $self ) = @_;
foreach my $wit ( $self->tradition->witnesses ) {
- # print STDERR "Making path for " . $wit->sigil . "\n";
+ # say STDERR "Making path for " . $wit->sigil;
$self->make_witness_path( $wit );
}
}
$wit->clear_uncorrected_path;
}
-=head2 equivalence_graph( \%readingmap, $startrank, $endrank, @testvector )
-
-Returns an equivalence graph of the collation, in which all readings
-related via a 'colocated' relationship are transformed into a single
-vertex. Can be used to determine the validity of a new relationship. The
-mapping between equivalence vertices and reading IDs will be stored in the
-hash whose reference is passed as readingmap. For a subset of the graph,
-pass in a start and/or an ending rank (this only works if L<calculate_ranks>
-has been called at least once.)
-
-It is also possible to pass in a test relationship in @testvector, and get
-the resulting equivalence graph before the relationship has been made.
-
-=cut
-
-sub equivalence_graph {
- my( $self, $map, $start, $end, @newvector ) = @_;
- $start = undef unless $self->end->has_rank;
- $end = undef unless $self->end->has_rank;
- my $eqgraph = Graph->new();
- my $rel_ctr = 0;
- # Add the nodes
- foreach my $r ( $self->readings ) {
- unless( $r eq $self->start || $r eq $self->end ) {
- next if $start && $r->rank < $start;
- next if $end && $r->rank > $end;
- }
- next if exists $map->{$r->id};
- my @rels = $self->related_readings( $r->id, 'colocated' );
- push( @rels, $r->id );
- # Make an equivalence vertex
- my $rn = 'equivalence_' . $rel_ctr++;
- $eqgraph->add_vertex( $rn );
- # Note which readings belong to this vertex.
- push( @rels, $r->id );
- foreach( @rels ) {
- $map->{$_} = $rn;
- }
- }
-
- # Add the edges.
- foreach my $p ( $self->paths ) {
- my $efrom = exists $map->{$p->[0]} ? $map->{$p->[0]}
- : $map->{$self->start->id};
- my $eto = exists $map->{$p->[1]} ? $map->{$p->[1]}
- : $map->{$self->end->id};
- $eqgraph->add_edge( $efrom, $eto );
- }
-
- # Collapse the vertices in @newvector if applicable.
- if( @newvector ) {
- my( $eqs, $eqt ) = map { $map->{$_} } @newvector;
- $DB::single = 1 unless $eqs && $eqt;
- unless( $eqs eq $eqt ) {
- # Combine the vertices.
- map { $eqgraph->add_edge( $eqs, $_ ) } $eqgraph->successors( $eqt );
- map { $eqgraph->add_edge( $_, $eqs ) } $eqgraph->predecessors( $eqt );
- $eqgraph->delete_vertex( $eqt );
- }
- }
- return $eqgraph;
-}
-
=head2 calculate_ranks
Calculate the reading ranks (that is, their aligned positions relative
# Save the existing ranks, in case we need to invalidate the cached SVG.
my %existing_ranks;
map { $existing_ranks{$_} = $_->rank } $self->readings;
- # Walk a version of the graph where every node linked by a relationship
- # edge is fundamentally the same node, and do a topological ranking on
- # the nodes in this graph.
- my %rel_containers;
- my $topo_graph = $self->equivalence_graph( \%rel_containers );
-
- # Now do the rankings, starting with the start node.
- my $topo_start = $rel_containers{$self->start->id};
- my $node_ranks = { $topo_start => 0 };
- my @curr_origin = ( $topo_start );
- # A little iterative function.
- while( @curr_origin ) {
- @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
- }
+
+ # Do the rankings based on the relationship equivalence graph, starting
+ # with the start node.
+ my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
+
# Transfer our rankings from the topological graph to the real one.
foreach my $r ( $self->readings ) {
- if( defined $node_ranks->{$rel_containers{$r->id}} ) {
- $r->rank( $node_ranks->{$rel_containers{$r->id}} );
+ if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
+ $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
} else {
# Die. Find the last rank we calculated.
- my @all_defined = sort { ( $node_ranks->{$rel_containers{$a->id}}||-1 )
- <=> ( $node_ranks->{$rel_containers{$b->id}}||-1 ) }
+ my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
+ <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
$self->readings;
my $last = pop @all_defined;
throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
$self->_graphcalc_done(1);
}
-sub _assign_rank {
- my( $graph, $node_ranks, @current_nodes ) = @_;
- # Look at each of the children of @current_nodes. If all the child's
- # parents have a rank, assign it the highest rank + 1 and add it to
- # @next_nodes. Otherwise skip it; we will return when the highest-ranked
- # parent gets a rank.
- my @next_nodes;
- foreach my $c ( @current_nodes ) {
- warn "Current reading $c has no rank!"
- unless exists $node_ranks->{$c};
- # print STDERR "Looking at child of node $c, rank "
- # . $node_ranks->{$c} . "\n";
- foreach my $child ( $graph->successors( $c ) ) {
- next if exists $node_ranks->{$child};
- my $highest_rank = -1;
- my $skip = 0;
- foreach my $parent ( $graph->predecessors( $child ) ) {
- if( exists $node_ranks->{$parent} ) {
- $highest_rank = $node_ranks->{$parent}
- if $highest_rank <= $node_ranks->{$parent};
- } else {
- $skip = 1;
- last;
- }
- }
- next if $skip;
- my $c_rank = $highest_rank + 1;
- # print STDERR "Assigning rank $c_rank to node $child \n";
- $node_ranks->{$child} = $c_rank;
- push( @next_nodes, $child );
- }
- }
- return @next_nodes;
-}
-
sub _clear_cache {
my $self = shift;
$self->wipe_svg if $self->has_cached_svg;
next unless $rdg->has_rank;
my $key = $rdg->rank . "||" . $rdg->text;
if( exists $unique_rank_rdg{$key} ) {
+ # Make sure they don't have different grammatical forms
+ my $ur = $unique_rank_rdg{$key};
+ if( $rdg->disambiguated && $ur->disambiguated ) {
+ my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
+ my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
+ next unless $rform eq $uform;
+ } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
+ next;
+ }
# Combine!
- # print STDERR "Combining readings at same rank: $key\n";
+ #say STDERR "Combining readings at same rank: $key";
$changed = 1;
$self->merge_readings( $unique_rank_rdg{$key}, $rdg );
# TODO see if this now makes a common point.
is( $c->common_predecessor( 'n24', 'n23' )->id,
'n20', "Found correct common predecessor" );
is( $c->common_successor( 'n24', 'n23' )->id,
- '#END#', "Found correct common successor" );
+ '__END__', "Found correct common successor" );
is( $c->common_predecessor( 'n19', 'n17' )->id,
'n16', "Found correct common predecessor for readings on same path" );
is( $c->common_successor( 'n21', 'n10' )->id,
- '#END#', "Found correct common successor for readings on same path" );
+ '__END__', "Found correct common successor for readings on same path" );
=end testing
my @last_r2 = ( $r2 );
# my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
my %all_seen;
- # print STDERR "Finding common $dir for $r1, $r2\n";
+ # say STDERR "Finding common $dir for $r1, $r2";
while( !@candidates ) {
last unless $iter--; # Avoid looping infinitely
# Iterate separately down the graph from r1 and r2
foreach my $lc ( @last_r1 ) {
foreach my $p ( $lc->$dir ) {
if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
- # print STDERR "Path candidate $p from $lc\n";
+ # say STDERR "Path candidate $p from $lc";
push( @candidates, $p );
} elsif( !$all_seen{$p->id} ) {
$all_seen{$p->id} = 'r1';
foreach my $lc ( @last_r2 ) {
foreach my $p ( $lc->$dir ) {
if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
- # print STDERR "Path candidate $p from $lc\n";
+ # say STDERR "Path candidate $p from $lc";
push( @candidates, $p );
} elsif( !$all_seen{$p->id} ) {
$all_seen{$p->id} = 'r2';