lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use Text::CSV;
  10 use Text::Tradition::Collation::Reading;
  11 use Text::Tradition::Collation::RelationshipStore;
  12 use Text::Tradition::Error;
  13 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  14 use Moose;
  15
  16 has 'sequence' => (
  17     is => 'ro',
  18     isa => 'Graph',
  19     default => sub { Graph->new() },
  20     handles => {
  21         paths => 'edges',
  22     },
  23     );
  24
  25 has 'relations' => (
  26         is => 'ro',
  27         isa => 'Text::Tradition::Collation::RelationshipStore',
  28         handles => {
  29                 relationships => 'relationships',
  30                 related_readings => 'related_readings',
  31                 get_relationship => 'get_relationship',
  32                 del_relationship => 'del_relationship',
  33                 equivalence => 'equivalence',
  34                 equivalence_graph => 'equivalence_graph',
  35         },
  36         writer => '_set_relations',
  37         );
  38
  39 has 'tradition' => (
  40     is => 'ro',
  41     isa => 'Text::Tradition',
  42     writer => '_set_tradition',
  43     weak_ref => 1,
  44     );
  45
  46 has 'readings' => (
  47         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  48         traits => ['Hash'],
  49     handles => {
  50         reading     => 'get',
  51         _add_reading => 'set',
  52         del_reading => 'delete',
  53         has_reading => 'exists',
  54         readings   => 'values',
  55     },
  56     default => sub { {} },
  57         );
  58
  59 has 'wit_list_separator' => (
  60     is => 'rw',
  61     isa => 'Str',
  62     default => ', ',
  63     );
  64
  65 has 'baselabel' => (
  66     is => 'rw',
  67     isa => 'Str',
  68     default => 'base text',
  69     );
  70
  71 has 'linear' => (
  72     is => 'rw',
  73     isa => 'Bool',
  74     default => 1,
  75     );
  76
  77 has 'ac_label' => (
  78     is => 'rw',
  79     isa => 'Str',
  80     default => ' (a.c.)',
  81     );
  82
  83 has 'wordsep' => (
  84         is => 'rw',
  85         isa => 'Str',
  86         default => ' ',
  87         );
  88
  89 has 'start' => (
  90         is => 'ro',
  91         isa => 'Text::Tradition::Collation::Reading',
  92         writer => '_set_start',
  93         weak_ref => 1,
  94         );
  95
  96 has 'end' => (
  97         is => 'ro',
  98         isa => 'Text::Tradition::Collation::Reading',
  99         writer => '_set_end',
 100         weak_ref => 1,
 101         );
 102
 103 has 'cached_table' => (
 104         is => 'rw',
 105         isa => 'HashRef',
 106         predicate => 'has_cached_table',
 107         clearer => 'wipe_table',
 108         );
 109
 110 has '_graphcalc_done' => (
 111         is => 'rw',
 112         isa => 'Bool',
 113         default => undef,
 114         );
 115
 116 =head1 NAME
 117
 118 Text::Tradition::Collation - a software model for a text collation
 119
 120 =head1 SYNOPSIS
 121
 122   use Text::Tradition;
 123   my $t = Text::Tradition->new(
 124     'name' => 'this is a text',
 125     'input' => 'TEI',
 126     'file' => '/path/to/tei_parallel_seg_file.xml' );
 127
 128   my $c = $t->collation;
 129   my @readings = $c->readings;
 130   my @paths = $c->paths;
 131   my @relationships = $c->relationships;
 132
 133   my $svg_variant_graph = $t->collation->as_svg();
 134
 135 =head1 DESCRIPTION
 136
 137 Text::Tradition is a library for representation and analysis of collated
 138 texts, particularly medieval ones.  The Collation is the central feature of
 139 a Tradition, where the text, its sequence of readings, and its relationships
 140 between readings are actually kept.
 141
 142 =head1 CONSTRUCTOR
 143
 144 =head2 new
 145
 146 The constructor.  Takes a hash or hashref of the following arguments:
 147
 148 =over
 149
 150 =item * tradition - The Text::Tradition object to which the collation
 151 belongs. Required.
 152
 153 =item * linear - Whether the collation should be linear; that is, whether
 154 transposed readings should be treated as two linked readings rather than one,
 155 and therefore whether the collation graph is acyclic.  Defaults to true.
 156
 157 =item * baselabel - The default label for the path taken by a base text
 158 (if any). Defaults to 'base text'.
 159
 160 =item * wit_list_separator - The string to join a list of witnesses for
 161 purposes of making labels in display graphs.  Defaults to ', '.
 162
 163 =item * ac_label - The extra label to tack onto a witness sigil when
 164 representing another layer of path for the given witness - that is, when
 165 a text has more than one possible reading due to scribal corrections or
 166 the like.  Defaults to ' (a.c.)'.
 167
 168 =item * wordsep - The string used to separate words in the original text.
 169 Defaults to ' '.
 170
 171 =back
 172
 173 =head1 ACCESSORS
 174
 175 =head2 tradition
 176
 177 =head2 linear
 178
 179 =head2 wit_list_separator
 180
 181 =head2 baselabel
 182
 183 =head2 ac_label
 184
 185 =head2 wordsep
 186
 187 Simple accessors for collation attributes.
 188
 189 =head2 start
 190
 191 The meta-reading at the start of every witness path.
 192
 193 =head2 end
 194
 195 The meta-reading at the end of every witness path.
 196
 197 =head2 readings
 198
 199 Returns all Reading objects in the graph.
 200
 201 =head2 reading( $id )
 202
 203 Returns the Reading object corresponding to the given ID.
 204
 205 =head2 add_reading( $reading_args )
 206
 207 Adds a new reading object to the collation.
 208 See L<Text::Tradition::Collation::Reading> for the available arguments.
 209
 210 =head2 del_reading( $object_or_id )
 211
 212 Removes the given reading from the collation, implicitly removing its
 213 paths and relationships.
 214
 215 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 216
 217 Merges the $second reading into the $main one. If $concatenate is true, then
 218 the merged node will carry the text of both readings, concatenated with either
 219 $with_str (if specified) or a sensible default (the empty string if the
 220 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 221
 222 The first two arguments may be either readings or reading IDs.
 223
 224 =head2 has_reading( $id )
 225
 226 Predicate to see whether a given reading ID is in the graph.
 227
 228 =head2 reading_witnesses( $object_or_id )
 229
 230 Returns a list of sigils whose witnesses contain the reading.
 231
 232 =head2 paths
 233
 234 Returns all reading paths within the document - that is, all edges in the
 235 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 236
 237 =head2 add_path( $source, $target, $sigil )
 238
 239 Links the given readings in the collation in sequence, under the given witness
 240 sigil.  The readings may be specified by object or ID.
 241
 242 =head2 del_path( $source, $target, $sigil )
 243
 244 Links the given readings in the collation in sequence, under the given witness
 245 sigil.  The readings may be specified by object or ID.
 246
 247 =head2 has_path( $source, $target );
 248
 249 Returns true if the two readings are linked in sequence in any witness.
 250 The readings may be specified by object or ID.
 251
 252 =head2 relationships
 253
 254 Returns all Relationship objects in the collation.
 255
 256 =head2 add_relationship( $reading, $other_reading, $options )
 257
 258 Adds a new relationship of the type given in $options between the two readings,
 259 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 260 where $status is true on success, and @vectors is a list of relationship edges
 261 that were ultimately added.
 262 See L<Text::Tradition::Collation::Relationship> for the available options.
 263
 264 =cut
 265
 266 sub BUILD {
 267     my $self = shift;
 268     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 269     $self->_set_start( $self->add_reading(
 270         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 271     $self->_set_end( $self->add_reading(
 272         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 273 }
 274
 275 ### Reading construct/destruct functions
 276
 277 sub add_reading {
 278         my( $self, $reading ) = @_;
 279         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 280                 my %args = %$reading;
 281                 if( $args{'init'} ) {
 282                         # If we are initializing an empty collation, don't assume that we
 283                         # have set a tradition.
 284                         delete $args{'init'};
 285                 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
 286                         $args{'language'} = $self->tradition->language;
 287                 }
 288                 $reading = Text::Tradition::Collation::Reading->new(
 289                         'collation' => $self,
 290                         %args );
 291         }
 292         # First check to see if a reading with this ID exists.
 293         if( $self->reading( $reading->id ) ) {
 294                 throw( "Collation already has a reading with id " . $reading->id );
 295         }
 296         $self->_graphcalc_done(0);
 297         $self->_add_reading( $reading->id => $reading );
 298         # Once the reading has been added, put it in both graphs.
 299         $self->sequence->add_vertex( $reading->id );
 300         $self->relations->add_reading( $reading->id );
 301         return $reading;
 302 };
 303
 304 around del_reading => sub {
 305         my $orig = shift;
 306         my $self = shift;
 307         my $arg = shift;
 308
 309         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 310                 $arg = $arg->id;
 311         }
 312         # Remove the reading from the graphs.
 313         $self->_graphcalc_done(0);
 314         $self->_clear_cache; # Explicitly clear caches to GC the reading
 315         $self->sequence->delete_vertex( $arg );
 316         $self->relations->delete_reading( $arg );
 317
 318         # Carry on.
 319         $self->$orig( $arg );
 320 };
 321
 322 =begin testing
 323
 324 use Text::Tradition;
 325
 326 my $cxfile = 't/data/Collatex-16.xml';
 327 my $t = Text::Tradition->new(
 328     'name'  => 'inline',
 329     'input' => 'CollateX',
 330     'file'  => $cxfile,
 331     );
 332 my $c = $t->collation;
 333
 334 my $rno = scalar $c->readings;
 335 # Split n21 for testing purposes
 336 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 337 my $old_r = $c->reading( 'n21' );
 338 $old_r->alter_text( 'to' );
 339 $c->del_path( 'n20', 'n21', 'A' );
 340 $c->add_path( 'n20', 'n21p0', 'A' );
 341 $c->add_path( 'n21p0', 'n21', 'A' );
 342 $c->flatten_ranks();
 343 ok( $c->reading( 'n21p0' ), "New reading exists" );
 344 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 345
 346 # Combine n3 and n4 ( with his )
 347 $c->merge_readings( 'n3', 'n4', 1 );
 348 ok( !$c->reading('n4'), "Reading n4 is gone" );
 349 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 350
 351 # Collapse n9 and n10 ( rood / root )
 352 $c->merge_readings( 'n9', 'n10' );
 353 ok( !$c->reading('n10'), "Reading n10 is gone" );
 354 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 355
 356 # Combine n21 and n21p0
 357 my $remaining = $c->reading('n21');
 358 $remaining ||= $c->reading('n22');  # one of these should still exist
 359 $c->merge_readings( 'n21p0', $remaining, 1 );
 360 ok( !$c->reading('n21'), "Reading $remaining is gone" );
 361 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
 362
 363 =end testing
 364
 365 =cut
 366
 367 sub merge_readings {
 368         my $self = shift;
 369
 370         # Sanity check
 371         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 372         my $mergemeta = $kept_obj->is_meta;
 373         throw( "Cannot merge meta and non-meta reading" )
 374                 unless ( $mergemeta && $del_obj->is_meta )
 375                         || ( !$mergemeta && !$del_obj->is_meta );
 376         if( $mergemeta ) {
 377                 throw( "Cannot merge with start or end node" )
 378                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 379                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 380         }
 381         # We only need the IDs for adding paths to the graph, not the reading
 382         # objects themselves.
 383         my $kept = $kept_obj->id;
 384         my $deleted = $del_obj->id;
 385         $self->_graphcalc_done(0);
 386
 387     # The kept reading should inherit the paths and the relationships
 388     # of the deleted reading.
 389         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 390                 my @vector = ( $kept );
 391                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 392                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 393                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 394                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 395                 $self->sequence->add_edge( @vector );
 396                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 397                 @wits{keys %$fwits} = values %$fwits;
 398                 $self->sequence->set_edge_attributes( @vector, \%wits );
 399         }
 400         $self->relations->merge_readings( $kept, $deleted, $combine );
 401
 402         # Do the deletion deed.
 403         if( $combine ) {
 404                 # Combine the text of the readings
 405                 my $joinstr = $combine_char;
 406                 unless( defined $joinstr ) {
 407                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 408                         $joinstr = $self->wordsep unless defined $joinstr;
 409                 }
 410                 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
 411                 # Change this reading to a joining one if necessary
 412                 $kept_obj->_set_join_next( $del_obj->join_next );
 413                 $kept_obj->normal_form(
 414                         join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
 415                 # Combine the lexemes present in the readings
 416                 if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
 417                         $kept_obj->add_lexeme( $del_obj->lexemes );
 418                 }
 419         }
 420         $self->del_reading( $deleted );
 421 }
 422
 423 =head2 compress_readings
 424
 425 Where possible in the graph, compresses plain sequences of readings into a
 426 single reading. The sequences must consist of readings with no
 427 relationships to other readings, with only a single witness path between
 428 them and no other witness paths from either that would skip the other. The
 429 readings must also not be marked as nonsense or bad grammar.
 430
 431 WARNING: This operation cannot be undone.
 432
 433 =cut
 434
 435 sub compress_readings {
 436         my $self = shift;
 437         # Anywhere in the graph that there is a reading that joins only to a single
 438         # successor, and neither of these have any relationships, just join the two
 439         # readings.
 440         my %gobbled;
 441         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 442                 # While we are here, get rid of any extra wordforms from a disambiguated
 443                 # reading.
 444                 if( $rdg->disambiguated ) {
 445                         foreach my $lex ( $rdg->lexemes ) {
 446                                 $lex->clear_matching_forms();
 447                                 $lex->add_matching_form( $lex->form );
 448                         }
 449                 }
 450                 # Now look for readings that can be joined to their successors.
 451                 next if $rdg->is_meta;
 452                 next if $gobbled{$rdg->id};
 453                 next if $rdg->grammar_invalid || $rdg->is_nonsense;
 454                 next if $rdg->related_readings();
 455                 my %seen;
 456                 while( $self->sequence->successors( $rdg ) == 1 ) {
 457                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 458                         throw( "Infinite loop" ) if $seen{$next->id};
 459                         $seen{$next->id} = 1;
 460                         last if $self->sequence->predecessors( $next ) > 1;
 461                         last if $next->is_meta;
 462                         last if $next->grammar_invalid || $next->is_nonsense;
 463                         last if $next->related_readings();
 464                         say "Joining readings $rdg and $next";
 465                         $self->merge_readings( $rdg, $next, 1 );
 466                 }
 467         }
 468         # Make sure we haven't screwed anything up
 469         foreach my $wit ( $self->tradition->witnesses ) {
 470                 my $pathtext = $self->path_text( $wit->sigil );
 471                 my $origtext = join( ' ', @{$wit->text} );
 472                 throw( "Text differs for witness " . $wit->sigil )
 473                         unless $pathtext eq $origtext;
 474                 if( $wit->is_layered ) {
 475                         $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
 476                         $origtext = join( ' ', @{$wit->layertext} );
 477                         throw( "Ante-corr text differs for witness " . $wit->sigil )
 478                                 unless $pathtext eq $origtext;
 479                 }
 480         }
 481
 482         $self->relations->rebuild_equivalence();
 483         $self->calculate_ranks();
 484 }
 485
 486 # Helper function for manipulating the graph.
 487 sub _stringify_args {
 488         my( $self, $first, $second, @args ) = @_;
 489     $first = $first->id
 490         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 491     $second = $second->id
 492         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 493     return( $first, $second, @args );
 494 }
 495
 496 # Helper function for manipulating the graph.
 497 sub _objectify_args {
 498         my( $self, $first, $second, $arg ) = @_;
 499     $first = $self->reading( $first )
 500         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 501     $second = $self->reading( $second )
 502         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 503     return( $first, $second, $arg );
 504 }
 505 ### Path logic
 506
 507 sub add_path {
 508         my $self = shift;
 509
 510         # We only need the IDs for adding paths to the graph, not the reading
 511         # objects themselves.
 512     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 513
 514         $self->_graphcalc_done(0);
 515         # Connect the readings
 516         unless( $self->sequence->has_edge( $source, $target ) ) {
 517             $self->sequence->add_edge( $source, $target );
 518             $self->relations->add_equivalence_edge( $source, $target );
 519         }
 520     # Note the witness in question
 521     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 522 }
 523
 524 sub del_path {
 525         my $self = shift;
 526         my @args;
 527         if( ref( $_[0] ) eq 'ARRAY' ) {
 528                 my $e = shift @_;
 529                 @args = ( @$e, @_ );
 530         } else {
 531                 @args = @_;
 532         }
 533
 534         # We only need the IDs for adding paths to the graph, not the reading
 535         # objects themselves.
 536     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 537
 538         $self->_graphcalc_done(0);
 539         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 540                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 541         }
 542         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 543                 $self->sequence->delete_edge( $source, $target );
 544                 $self->relations->delete_equivalence_edge( $source, $target );
 545         }
 546 }
 547
 548
 549 # Extra graph-alike utility
 550 sub has_path {
 551         my $self = shift;
 552     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 553         return undef unless $self->sequence->has_edge( $source, $target );
 554         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 555 }
 556
 557 =head2 clear_witness( @sigil_list )
 558
 559 Clear the given witnesses out of the collation entirely, removing references
 560 to them in paths, and removing readings that belong only to them.  Should only
 561 be called via $tradition->del_witness.
 562
 563 =cut
 564
 565 sub clear_witness {
 566         my( $self, @sigils ) = @_;
 567
 568         $self->_graphcalc_done(0);
 569         # Clear the witness(es) out of the paths
 570         foreach my $e ( $self->paths ) {
 571                 foreach my $sig ( @sigils ) {
 572                         $self->del_path( $e, $sig );
 573                 }
 574         }
 575
 576         # Clear out the newly unused readings
 577         foreach my $r ( $self->readings ) {
 578                 unless( $self->reading_witnesses( $r ) ) {
 579                         $self->del_reading( $r );
 580                 }
 581         }
 582 }
 583
 584 sub add_relationship {
 585         my $self = shift;
 586     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 587     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 588         $self->_graphcalc_done(0);
 589     return @vectors;
 590 }
 591
 592 around qw/ get_relationship del_relationship / => sub {
 593         my $orig = shift;
 594         my $self = shift;
 595         my @args = @_;
 596         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 597                 @args = @{$_[0]};
 598         }
 599         my( $source, $target ) = $self->_stringify_args( @args );
 600         $self->$orig( $source, $target );
 601 };
 602
 603 =head2 reading_witnesses( $reading )
 604
 605 Return a list of sigils corresponding to the witnesses in which the reading appears.
 606
 607 =cut
 608
 609 sub reading_witnesses {
 610         my( $self, $reading ) = @_;
 611         # We need only check either the incoming or the outgoing edges; I have
 612         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 613         if( $reading eq $self->start ) {
 614                 return map { $_->sigil } $self->tradition->witnesses;
 615         }
 616         my %all_witnesses;
 617         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 618                 my $wits = $self->sequence->get_edge_attributes( @$e );
 619                 @all_witnesses{ keys %$wits } = 1;
 620         }
 621         my $acstr = $self->ac_label;
 622         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 623                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 624         }
 625         return keys %all_witnesses;
 626 }
 627
 628 =head1 OUTPUT METHODS
 629
 630 =head2 as_svg( \%options )
 631
 632 Returns an SVG string that represents the graph, via as_dot and graphviz.
 633 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 634
 635 =cut
 636
 637 sub as_svg {
 638     my( $self, $opts ) = @_;
 639     throw( "Need GraphViz installed to output SVG" )
 640         unless File::Which::which( 'dot' );
 641     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 642     $self->calculate_ranks()
 643         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 644         my @cmd = qw/dot -Tsvg/;
 645         my( $svg, $err );
 646         my $dotfile = File::Temp->new();
 647         ## USE FOR DEBUGGING
 648         # $dotfile->unlink_on_destroy(0);
 649         binmode $dotfile, ':utf8';
 650         print $dotfile $self->as_dot( $opts );
 651         push( @cmd, $dotfile->filename );
 652         run( \@cmd, ">", binary(), \$svg );
 653         $svg = decode_utf8( $svg );
 654         return $svg;
 655 }
 656
 657
 658 =head2 as_dot( \%options )
 659
 660 Returns a string that is the collation graph expressed in dot
 661 (i.e. GraphViz) format.  Options include:
 662
 663 =over 4
 664
 665 =item * from
 666
 667 =item * to
 668
 669 =item * color_common
 670
 671 =back
 672
 673 =cut
 674
 675 sub as_dot {
 676     my( $self, $opts ) = @_;
 677     my $startrank = $opts->{'from'} if $opts;
 678     my $endrank = $opts->{'to'} if $opts;
 679     my $color_common = $opts->{'color_common'} if $opts;
 680     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 681        && $self->end->rank > 100;
 682     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 683
 684     # Check the arguments
 685     if( $startrank ) {
 686         return if $endrank && $startrank > $endrank;
 687         return if $startrank > $self->end->rank;
 688         }
 689         if( defined $endrank ) {
 690                 return if $endrank < 0;
 691                 $endrank = undef if $endrank == $self->end->rank;
 692         }
 693
 694     my $graph_name = $self->tradition->name;
 695     $graph_name =~ s/[^\w\s]//g;
 696     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 697
 698     my %graph_attrs = (
 699         'rankdir' => 'LR',
 700         'bgcolor' => 'none',
 701         );
 702     my %node_attrs = (
 703         'fontsize' => 14,
 704         'fillcolor' => 'white',
 705         'style' => 'filled',
 706         'shape' => 'ellipse'
 707         );
 708     my %edge_attrs = (
 709         'arrowhead' => 'open',
 710         'color' => '#000000',
 711         'fontcolor' => '#000000',
 712         );
 713
 714     my $dot = sprintf( "digraph %s {\n", $graph_name );
 715     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 716     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 717
 718         # Output substitute start/end readings if necessary
 719         if( $startrank ) {
 720                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
 721         }
 722         if( $endrank ) {
 723                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
 724         }
 725         if( $STRAIGHTENHACK ) {
 726                 ## HACK part 1
 727                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
 728                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
 729                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 730         }
 731         my %used;  # Keep track of the readings that actually appear in the graph
 732         # Sort the readings by rank if we have ranks; this speeds layout.
 733         my @all_readings = $self->end->has_rank
 734                 ? sort { $a->rank <=> $b->rank } $self->readings
 735                 : $self->readings;
 736         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 737     foreach my $reading ( @all_readings ) {
 738         # Only output readings within our rank range.
 739         next if $startrank && $reading->rank < $startrank;
 740         next if $endrank && $reading->rank > $endrank;
 741         $used{$reading->id} = 1;
 742         # Need not output nodes without separate labels
 743         next if $reading->id eq $reading->text;
 744         my $rattrs;
 745         my $label = $reading->text;
 746         $label .= '-' if $reading->join_next;
 747         $label = "-$label" if $reading->join_prior;
 748         $label =~ s/\"/\\\"/g;
 749                 $rattrs->{'label'} = $label;
 750                 $rattrs->{'id'} = $reading->id;
 751                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 752         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 753     }
 754
 755         # Add the real edges. Need to weight one edge per rank jump, in a
 756         # continuous line.
 757         # my $weighted = $self->_add_edge_weights;
 758     my @edges = $self->paths;
 759         my( %substart, %subend );
 760     foreach my $edge ( @edges ) {
 761         # Do we need to output this edge?
 762         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 763                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 764                         my $variables = { %edge_attrs, 'label' => $label };
 765
 766                         # Account for the rank gap if necessary
 767                         my $rank0 = $self->reading( $edge->[0] )->rank
 768                                 if $self->reading( $edge->[0] )->has_rank;
 769                         my $rank1 = $self->reading( $edge->[1] )->rank
 770                                 if $self->reading( $edge->[1] )->has_rank;
 771                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 772                                 $variables->{'minlen'} = $rank1 - $rank0;
 773                         }
 774
 775                         # Add the calculated edge weights
 776                         # if( exists $weighted->{$edge->[0]}
 777                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 778                         #       # $variables->{'color'} = 'red';
 779                         #       $variables->{'weight'} = 3.0;
 780                         # }
 781
 782                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 783                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 784                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 785
 786                         my $varopts = _dot_attr_string( $variables );
 787                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 788                                 $edge->[0], $edge->[1], $varopts );
 789         } elsif( $used{$edge->[0]} ) {
 790                 $subend{$edge->[0]} = $edge->[1];
 791         } elsif( $used{$edge->[1]} ) {
 792                 $substart{$edge->[1]} = $edge->[0];
 793         }
 794     }
 795     # Add substitute start and end edges if necessary
 796     foreach my $node ( keys %substart ) {
 797         my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
 798         my $variables = { %edge_attrs, 'label' => $witstr };
 799         my $nrdg = $self->reading( $node );
 800         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
 801                 # Substart is actually one lower than $startrank
 802                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
 803         }
 804         my $varopts = _dot_attr_string( $variables );
 805         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
 806         }
 807     foreach my $node ( keys %subend ) {
 808         my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
 809         my $variables = { %edge_attrs, 'label' => $witstr };
 810         my $varopts = _dot_attr_string( $variables );
 811         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
 812         }
 813         # HACK part 2
 814         if( $STRAIGHTENHACK ) {
 815                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
 816                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 817         }
 818
 819     $dot .= "}\n";
 820     return $dot;
 821 }
 822
 823 sub _dot_attr_string {
 824         my( $hash ) = @_;
 825         my @attrs;
 826         foreach my $k ( sort keys %$hash ) {
 827                 my $v = $hash->{$k};
 828                 push( @attrs, $k.'="'.$v.'"' );
 829         }
 830         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 831 }
 832
 833 sub _add_edge_weights {
 834         my $self = shift;
 835         # Walk the graph from START to END, choosing the successor node with
 836         # the largest number of witness paths each time.
 837         my $weighted = {};
 838         my $curr = $self->start->id;
 839         my $ranked = $self->end->has_rank;
 840         while( $curr ne $self->end->id ) {
 841                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 842                 my @succ = sort { $self->path_witnesses( $curr, $a )
 843                                                         <=> $self->path_witnesses( $curr, $b ) }
 844                         $self->sequence->successors( $curr );
 845                 my $next = pop @succ;
 846                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 847                 # Try to avoid lacunae in the weighted path.
 848                 while( @succ &&
 849                            ( $self->reading( $next )->is_lacuna ||
 850                                  $nextrank - $rank > 1 ) ){
 851                         $next = pop @succ;
 852                 }
 853                 $weighted->{$curr} = $next;
 854                 $curr = $next;
 855         }
 856         return $weighted;
 857 }
 858
 859 =head2 path_witnesses( $edge )
 860
 861 Returns the list of sigils whose witnesses are associated with the given edge.
 862 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 863
 864 =cut
 865
 866 sub path_witnesses {
 867         my( $self, @edge ) = @_;
 868         # If edge is an arrayref, cope.
 869         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 870                 my $e = shift @edge;
 871                 @edge = @$e;
 872         }
 873         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 874         return @wits;
 875 }
 876
 877 # Helper function. Make a display label for the given witnesses, showing a.c.
 878 # witnesses only where the main witness is not also in the list.
 879 sub _path_display_label {
 880         my $self = shift;
 881         my %wits;
 882         map { $wits{$_} = 1 } @_;
 883
 884         # If an a.c. wit is listed, remove it if the main wit is also listed.
 885         # Otherwise keep it for explicit listing.
 886         my $aclabel = $self->ac_label;
 887         my @disp_ac;
 888         foreach my $w ( sort keys %wits ) {
 889                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
 890                         if( exists $wits{$1} ) {
 891                                 delete $wits{$w};
 892                         } else {
 893                                 push( @disp_ac, $w );
 894                         }
 895                 }
 896         }
 897
 898         # See if we are in a majority situation.
 899         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 900         $maj = $maj > 5 ? $maj : 5;
 901         if( scalar keys %wits > $maj ) {
 902                 unshift( @disp_ac, 'majority' );
 903                 return join( ', ', @disp_ac );
 904         } else {
 905                 return join( ', ', sort keys %wits );
 906         }
 907 }
 908
 909 =head2 readings_at_rank( $rank )
 910
 911 Returns a list of readings at a given rank, taken from the alignment table.
 912
 913 =cut
 914
 915 sub readings_at_rank {
 916         my( $self, $rank ) = @_;
 917         my $table = $self->alignment_table;
 918         # Table rank is real rank - 1.
 919         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
 920         my %readings;
 921         foreach my $e ( @elements ) {
 922                 next unless ref( $e ) eq 'HASH';
 923                 next unless exists $e->{'t'};
 924                 $readings{$e->{'t'}->id} = $e->{'t'};
 925         }
 926         return values %readings;
 927 }
 928
 929 =head2 as_graphml
 930
 931 Returns a GraphML representation of the collation.  The GraphML will contain
 932 two graphs. The first expresses the attributes of the readings and the witness
 933 paths that link them; the second expresses the relationships that link the
 934 readings.  This is the native transfer format for a tradition.
 935
 936 =begin testing
 937
 938 use Text::Tradition;
 939
 940 my $READINGS = 311;
 941 my $PATHS = 361;
 942
 943 my $datafile = 't/data/florilegium_tei_ps.xml';
 944 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 945                                       'name' => 'test0',
 946                                       'file' => $datafile,
 947                                       'linear' => 1 );
 948
 949 ok( $tradition, "Got a tradition object" );
 950 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 951 ok( $tradition->collation, "Tradition has a collation" );
 952
 953 my $c = $tradition->collation;
 954 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 955 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 956 is( scalar $c->relationships, 0, "Collation has all relationships" );
 957
 958 # Add a few relationships
 959 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 960 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 961 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 962
 963 # Now write it to GraphML and parse it again.
 964
 965 my $graphml = $c->as_graphml;
 966 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 967 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 968 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 969 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 970
 971 # Now add a stemma, write to GraphML, and look at the output.
 972 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
 973 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
 974 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
 975 $graphml = $c->as_graphml;
 976 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
 977
 978 # Now add a user, write to GraphML, and look at the output.
 979 unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" );
 980 my $testuser = Text::Tradition::User->new(
 981         id => 'testuser', password => 'testpass' );
 982 is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" );
 983 $testuser->add_tradition( $tradition );
 984 is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" );
 985 $graphml = $c->as_graphml;
 986 like( $graphml, qr/testuser/, "Test user name now exists in GraphML" );
 987
 988 =end testing
 989
 990 =cut
 991
 992 sub as_graphml {
 993     my( $self, $options ) = @_;
 994         $self->calculate_ranks unless $self->_graphcalc_done;
 995
 996         my $start = $options->{'from'}
 997                 ? $self->reading( $options->{'from'} ) : $self->start;
 998         my $end = $options->{'to'}
 999                 ? $self->reading( $options->{'to'} ) : $self->end;
1000         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1001                 throw( 'Start node must be before end node' );
1002         }
1003         # The readings need to be ranked for this to work.
1004         $start = $self->start unless $start->has_rank;
1005         $end = $self->end unless $end->has_rank;
1006         my $rankoffset = 0;
1007         unless( $start eq $self->start ) {
1008                 $rankoffset = $start->rank - 1;
1009         }
1010         my %use_readings;
1011
1012     # Some namespaces
1013     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1014     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1015     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1016         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1017
1018     # Create the document and root node
1019     require XML::LibXML;
1020     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1021     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1022     $graphml->setDocumentElement( $root );
1023     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1024     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1025
1026     # List of attribute types to save on our objects and their corresponding
1027     # GraphML types
1028     my %save_types = (
1029         'Str' => 'string',
1030         'Int' => 'int',
1031         'Bool' => 'boolean',
1032         'ReadingID' => 'string',
1033         'RelationshipType' => 'string',
1034         'RelationshipScope' => 'string',
1035     );
1036
1037     # Add the data keys for the graph. Include an extra key 'version' for the
1038     # GraphML output version.
1039     my %graph_data_keys;
1040     my $gdi = 0;
1041     my %graph_attributes = ( 'version' => 'string' );
1042         # Graph attributes include those of Tradition and those of Collation.
1043         my %gattr_from;
1044         my $tmeta = $self->tradition->meta;
1045         my $cmeta = $self->meta;
1046         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1047         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1048         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1049                 next if $attr->name =~ /^_/;
1050                 next unless $save_types{$attr->type_constraint->name};
1051                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1052         }
1053     # Extra custom keys for complex objects that should be saved in some form.
1054     # The subroutine should return a string, or undef/empty.
1055     $graph_attributes{'stemmata'} = sub {
1056         my @stemstrs;
1057                 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1058                         $self->tradition->stemmata;
1059                 join( "\n", @stemstrs );
1060         };
1061     $graph_attributes{'user'} = sub {
1062         $self->tradition->user ? $self->tradition->user->id : undef
1063     };
1064
1065     foreach my $datum ( sort keys %graph_attributes ) {
1066         $graph_data_keys{$datum} = 'dg'.$gdi++;
1067         my $key = $root->addNewChild( $graphml_ns, 'key' );
1068         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1069                 : $graph_attributes{$datum};
1070         $key->setAttribute( 'attr.name', $datum );
1071         $key->setAttribute( 'attr.type', $dtype );
1072         $key->setAttribute( 'for', 'graph' );
1073         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1074     }
1075
1076     # Add the data keys for reading nodes
1077     my %reading_attributes;
1078     my $rmeta = Text::Tradition::Collation::Reading->meta;
1079     foreach my $attr( $rmeta->get_all_attributes ) {
1080                 next if $attr->name =~ /^_/;
1081                 next unless $save_types{$attr->type_constraint->name};
1082                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1083         }
1084         # Extra custom key for the reading morphology
1085         $reading_attributes{'lexemes'} = 'string';
1086
1087     my %node_data_keys;
1088     my $ndi = 0;
1089     foreach my $datum ( sort keys %reading_attributes ) {
1090         $node_data_keys{$datum} = 'dn'.$ndi++;
1091         my $key = $root->addNewChild( $graphml_ns, 'key' );
1092         $key->setAttribute( 'attr.name', $datum );
1093         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1094         $key->setAttribute( 'for', 'node' );
1095         $key->setAttribute( 'id', $node_data_keys{$datum} );
1096     }
1097
1098     # Add the data keys for edges, that is, paths and relationships. Path
1099     # data does not come from a Moose class so is here manually.
1100     my $edi = 0;
1101     my %edge_data_keys;
1102     my %edge_attributes = (
1103         witness => 'string',                    # ID/label for a path
1104         extra => 'boolean',                             # Path key
1105         );
1106     my @path_attributes = keys %edge_attributes; # track our manual additions
1107     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1108     foreach my $attr( $pmeta->get_all_attributes ) {
1109                 next if $attr->name =~ /^_/;
1110                 next unless $save_types{$attr->type_constraint->name};
1111                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1112         }
1113     foreach my $datum ( sort keys %edge_attributes ) {
1114         $edge_data_keys{$datum} = 'de'.$edi++;
1115         my $key = $root->addNewChild( $graphml_ns, 'key' );
1116         $key->setAttribute( 'attr.name', $datum );
1117         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1118         $key->setAttribute( 'for', 'edge' );
1119         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1120     }
1121
1122     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1123     my $xmlidname = $self->tradition->name;
1124     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1125     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1126         $xmlidname = '_'.$xmlidname;
1127     }
1128     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1129     $sgraph->setAttribute( 'edgedefault', 'directed' );
1130     $sgraph->setAttribute( 'id', $xmlidname );
1131     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1132     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1133     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1134     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1135     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1136
1137     # Tradition/collation attribute data
1138     foreach my $datum ( keys %graph_attributes ) {
1139         my $value;
1140         if( $datum eq 'version' ) {
1141                 $value = '3.2';
1142         } elsif( ref( $graph_attributes{$datum} ) ) {
1143                 my $sub = $graph_attributes{$datum};
1144                 $value = &$sub();
1145         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1146                 $value = $self->tradition->$datum;
1147         } else {
1148                 $value = $self->$datum;
1149         }
1150                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1151         }
1152
1153     my $node_ctr = 0;
1154     my %node_hash;
1155     # Add our readings to the graph
1156     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1157         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1158                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1159         $use_readings{$n->id} = 1;
1160         # Add to the main graph
1161         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1162         my $node_xmlid = 'n' . $node_ctr++;
1163         $node_hash{ $n->id } = $node_xmlid;
1164         $node_el->setAttribute( 'id', $node_xmlid );
1165         foreach my $d ( keys %reading_attributes ) {
1166                 my $nval = $n->$d;
1167                 # Custom serialization
1168                 if( $d eq 'lexemes' ) {
1169                                 # If nval is a true value, we have lexemes so we need to
1170                                 # serialize them. Otherwise set nval to undef so that the
1171                                 # key is excluded from this reading.
1172                         $nval = $nval ? $n->_serialize_lexemes : undef;
1173                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1174                         $nval = undef;
1175                 }
1176                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1177                         # Adjust the ranks within the subgraph.
1178                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1179                                 : $nval - $rankoffset;
1180                 }
1181                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1182                         if defined $nval;
1183         }
1184     }
1185
1186     # Add the path edges to the sequence graph
1187     my $edge_ctr = 0;
1188     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1189         # We add an edge in the graphml for every witness in $e.
1190         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1191         my @edge_wits = sort $self->path_witnesses( $e );
1192         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1193         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1194         # Skip any path from start to end; that witness is not in the subgraph.
1195         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1196         foreach my $wit ( @edge_wits ) {
1197                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1198                                                                                 $node_hash{ $e->[0] },
1199                                                                                 $node_hash{ $e->[1] } );
1200                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1201                         $edge_el->setAttribute( 'source', $from );
1202                         $edge_el->setAttribute( 'target', $to );
1203                         $edge_el->setAttribute( 'id', $id );
1204
1205                         # It's a witness path, so add the witness
1206                         my $base = $wit;
1207                         my $key = $edge_data_keys{'witness'};
1208                         # Is this an ante-corr witness?
1209                         my $aclabel = $self->ac_label;
1210                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1211                                 # Keep the base witness
1212                                 $base = $1;
1213                                 # ...and record that this is an 'extra' reading path
1214                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1215                         }
1216                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1217                 }
1218         }
1219
1220         # Report the actual number of nodes and edges that went in
1221         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1222         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1223
1224         # Add the relationship graph to the XML
1225         map { delete $edge_data_keys{$_} } @path_attributes;
1226         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1227                 $node_data_keys{'id'}, \%edge_data_keys );
1228
1229     # Save and return the thing
1230     my $result = decode_utf8( $graphml->toString(1) );
1231     return $result;
1232 }
1233
1234 sub _add_graphml_data {
1235     my( $el, $key, $value ) = @_;
1236     return unless defined $value;
1237     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1238     $data_el->setAttribute( 'key', $key );
1239     $data_el->appendText( $value );
1240 }
1241
1242 =head2 as_csv
1243
1244 Returns a CSV alignment table representation of the collation graph, one
1245 row per witness (or witness uncorrected.)
1246
1247 =cut
1248
1249 sub as_csv {
1250     my( $self ) = @_;
1251     my $table = $self->alignment_table;
1252     my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1253     my @result;
1254     # Make the header row
1255     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1256         push( @result, decode_utf8( $csv->string ) );
1257     # Make the rest of the rows
1258     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1259         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1260         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1261         $csv->combine( @row );
1262         push( @result, decode_utf8( $csv->string ) );
1263     }
1264     return join( "\n", @result );
1265 }
1266
1267 =head2 alignment_table( $use_refs, $include_witnesses )
1268
1269 Return a reference to an alignment table, in a slightly enhanced CollateX
1270 format which looks like this:
1271
1272  $table = { alignment => [ { witness => "SIGIL",
1273                              tokens => [ { t => "TEXT" }, ... ] },
1274                            { witness => "SIG2",
1275                              tokens => [ { t => "TEXT" }, ... ] },
1276                            ... ],
1277             length => TEXTLEN };
1278
1279 If $use_refs is set to 1, the reading object is returned in the table
1280 instead of READINGTEXT; if not, the text of the reading is returned.
1281
1282 If $include_witnesses is set to a hashref, only the witnesses whose sigil
1283 keys have a true hash value will be included.
1284
1285 =cut
1286
1287 sub alignment_table {
1288     my( $self ) = @_;
1289     $self->calculate_ranks() unless $self->_graphcalc_done;
1290     return $self->cached_table if $self->has_cached_table;
1291
1292     # Make sure we can do this
1293         throw( "Need a linear graph in order to make an alignment table" )
1294                 unless $self->linear;
1295         $self->calculate_ranks unless $self->end->has_rank;
1296
1297     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1298     my @all_pos = ( 1 .. $self->end->rank - 1 );
1299     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1300         # say STDERR "Making witness row(s) for " . $wit->sigil;
1301         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1302         my @row = _make_witness_row( \@wit_path, \@all_pos );
1303         push( @{$table->{'alignment'}},
1304                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1305         if( $wit->is_layered ) {
1306                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1307                         $wit->sigil.$self->ac_label );
1308             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1309                         push( @{$table->{'alignment'}},
1310                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
1311         }
1312     }
1313     $self->cached_table( $table );
1314     return $table;
1315 }
1316
1317 sub _make_witness_row {
1318     my( $path, $positions ) = @_;
1319     my %char_hash;
1320     map { $char_hash{$_} = undef } @$positions;
1321     my $debug = 0;
1322     foreach my $rdg ( @$path ) {
1323         my $rtext = $rdg->text;
1324         $rtext = '#LACUNA#' if $rdg->is_lacuna;
1325         say STDERR "rank " . $rdg->rank if $debug;
1326         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1327         $char_hash{$rdg->rank} = { 't' => $rdg };
1328     }
1329     my @row = map { $char_hash{$_} } @$positions;
1330     # Fill in lacuna markers for undef spots in the row
1331     my $last_el = shift @row;
1332     my @filled_row = ( $last_el );
1333     foreach my $el ( @row ) {
1334         # If we are using node reference, make the lacuna node appear many times
1335         # in the table.  If not, use the lacuna tag.
1336         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1337             $el = $last_el;
1338         }
1339         push( @filled_row, $el );
1340         $last_el = $el;
1341     }
1342     return @filled_row;
1343 }
1344
1345 =head1 NAVIGATION METHODS
1346
1347 =head2 reading_sequence( $first, $last, $sigil, $backup )
1348
1349 Returns the ordered list of readings, starting with $first and ending
1350 with $last, for the witness given in $sigil. If a $backup sigil is
1351 specified (e.g. when walking a layered witness), it will be used wherever
1352 no $sigil path exists.  If there is a base text reading, that will be
1353 used wherever no path exists for $sigil or $backup.
1354
1355 =cut
1356
1357 # TODO Think about returning some lazy-eval iterator.
1358 # TODO Get rid of backup; we should know from what witness is whether we need it.
1359
1360 sub reading_sequence {
1361     my( $self, $start, $end, $witness ) = @_;
1362
1363     $witness = $self->baselabel unless $witness;
1364     my @readings = ( $start );
1365     my %seen;
1366     my $n = $start;
1367     while( $n && $n->id ne $end->id ) {
1368         if( exists( $seen{$n->id} ) ) {
1369             throw( "Detected loop for $witness at " . $n->id );
1370         }
1371         $seen{$n->id} = 1;
1372
1373         my $next = $self->next_reading( $n, $witness );
1374         unless( $next ) {
1375             throw( "Did not find any path for $witness from reading " . $n->id );
1376         }
1377         push( @readings, $next );
1378         $n = $next;
1379     }
1380     # Check that the last reading is our end reading.
1381     my $last = $readings[$#readings];
1382     throw( "Last reading found from " . $start->text .
1383         " for witness $witness is not the end!" ) # TODO do we get this far?
1384         unless $last->id eq $end->id;
1385
1386     return @readings;
1387 }
1388
1389 =head2 next_reading( $reading, $sigil );
1390
1391 Returns the reading that follows the given reading along the given witness
1392 path.
1393
1394 =cut
1395
1396 sub next_reading {
1397     # Return the successor via the corresponding path.
1398     my $self = shift;
1399     my $answer = $self->_find_linked_reading( 'next', @_ );
1400         return undef unless $answer;
1401     return $self->reading( $answer );
1402 }
1403
1404 =head2 prior_reading( $reading, $sigil )
1405
1406 Returns the reading that precedes the given reading along the given witness
1407 path.
1408
1409 =cut
1410
1411 sub prior_reading {
1412     # Return the predecessor via the corresponding path.
1413     my $self = shift;
1414     my $answer = $self->_find_linked_reading( 'prior', @_ );
1415     return $self->reading( $answer );
1416 }
1417
1418 sub _find_linked_reading {
1419     my( $self, $direction, $node, $path ) = @_;
1420
1421     # Get a backup if we are dealing with a layered witness
1422     my $alt_path;
1423     my $aclabel = $self->ac_label;
1424     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1425         $alt_path = $1;
1426     }
1427
1428     my @linked_paths = $direction eq 'next'
1429         ? $self->sequence->edges_from( $node )
1430         : $self->sequence->edges_to( $node );
1431     return undef unless scalar( @linked_paths );
1432
1433     # We have to find the linked path that contains all of the
1434     # witnesses supplied in $path.
1435     my( @path_wits, @alt_path_wits );
1436     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1437     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1438     my $base_le;
1439     my $alt_le;
1440     foreach my $le ( @linked_paths ) {
1441         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1442             $base_le = $le;
1443         }
1444                 my @le_wits = sort $self->path_witnesses( $le );
1445                 if( _is_within( \@path_wits, \@le_wits ) ) {
1446                         # This is the right path.
1447                         return $direction eq 'next' ? $le->[1] : $le->[0];
1448                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1449                         $alt_le = $le;
1450                 }
1451     }
1452     # Got this far? Return the alternate path if it exists.
1453     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1454         if $alt_le;
1455
1456     # Got this far? Return the base path if it exists.
1457     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1458         if $base_le;
1459
1460     # Got this far? We have no appropriate path.
1461     warn "Could not find $direction node from " . $node->id
1462         . " along path $path";
1463     return undef;
1464 }
1465
1466 # Some set logic.
1467 sub _is_within {
1468     my( $set1, $set2 ) = @_;
1469     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1470     foreach my $el ( @$set1 ) {
1471         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1472     }
1473     return $ret;
1474 }
1475
1476 # Return the string that joins together a list of witnesses for
1477 # display on a single path.
1478 sub _witnesses_of_label {
1479     my( $self, $label ) = @_;
1480     my $regex = $self->wit_list_separator;
1481     my @answer = split( /\Q$regex\E/, $label );
1482     return @answer;
1483 }
1484
1485 =head2 common_readings
1486
1487 Returns the list of common readings in the graph (i.e. those readings that are
1488 shared by all non-lacunose witnesses.)
1489
1490 =cut
1491
1492 sub common_readings {
1493         my $self = shift;
1494         my @common = grep { $_->is_common } $self->readings;
1495         return @common;
1496 }
1497
1498 =head2 path_text( $sigil, [, $start, $end ] )
1499
1500 Returns the text of a witness (plus its backup, if we are using a layer)
1501 as stored in the collation.  The text is returned as a string, where the
1502 individual readings are joined with spaces and the meta-readings (e.g.
1503 lacunae) are omitted.  Optional specification of $start and $end allows
1504 the generation of a subset of the witness text.
1505
1506 =cut
1507
1508 sub path_text {
1509         my( $self, $wit, $start, $end ) = @_;
1510         $start = $self->start unless $start;
1511         $end = $self->end unless $end;
1512         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1513         my $pathtext = '';
1514         my $last;
1515         foreach my $r ( @path ) {
1516                 unless ( $r->join_prior || !$last || $last->join_next ) {
1517                         $pathtext .= ' ';
1518                 }
1519                 $pathtext .= $r->text;
1520                 $last = $r;
1521         }
1522         return $pathtext;
1523 }
1524
1525 =head1 INITIALIZATION METHODS
1526
1527 These are mostly for use by parsers.
1528
1529 =head2 make_witness_path( $witness )
1530
1531 Link the array of readings contained in $witness->path (and in
1532 $witness->uncorrected_path if it exists) into collation paths.
1533 Clear out the arrays when finished.
1534
1535 =head2 make_witness_paths
1536
1537 Call make_witness_path for all witnesses in the tradition.
1538
1539 =cut
1540
1541 # For use when a collation is constructed from a base text and an apparatus.
1542 # We have the sequences of readings and just need to add path edges.
1543 # When we are done, clear out the witness path attributes, as they are no
1544 # longer needed.
1545 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1546
1547 sub make_witness_paths {
1548     my( $self ) = @_;
1549     foreach my $wit ( $self->tradition->witnesses ) {
1550         # say STDERR "Making path for " . $wit->sigil;
1551         $self->make_witness_path( $wit );
1552     }
1553 }
1554
1555 sub make_witness_path {
1556     my( $self, $wit ) = @_;
1557     my @chain = @{$wit->path};
1558     my $sig = $wit->sigil;
1559     # Add start and end if necessary
1560     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1561     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1562     foreach my $idx ( 0 .. $#chain-1 ) {
1563         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1564     }
1565     if( $wit->is_layered ) {
1566         @chain = @{$wit->uncorrected_path};
1567                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1568                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1569         foreach my $idx( 0 .. $#chain-1 ) {
1570             my $source = $chain[$idx];
1571             my $target = $chain[$idx+1];
1572             $self->add_path( $source, $target, $sig.$self->ac_label )
1573                 unless $self->has_path( $source, $target, $sig );
1574         }
1575     }
1576     $wit->clear_path;
1577     $wit->clear_uncorrected_path;
1578 }
1579
1580 =head2 calculate_ranks
1581
1582 Calculate the reading ranks (that is, their aligned positions relative
1583 to each other) for the graph.  This can only be called on linear collations.
1584
1585 =begin testing
1586
1587 use Text::Tradition;
1588
1589 my $cxfile = 't/data/Collatex-16.xml';
1590 my $t = Text::Tradition->new(
1591     'name'  => 'inline',
1592     'input' => 'CollateX',
1593     'file'  => $cxfile,
1594     );
1595 my $c = $t->collation;
1596
1597 # Make an svg
1598 my $table = $c->alignment_table;
1599 ok( $c->has_cached_table, "Alignment table was cached" );
1600 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1601 $c->calculate_ranks;
1602 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1603 $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
1604 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
1605
1606 =end testing
1607
1608 =cut
1609
1610 sub calculate_ranks {
1611     my $self = shift;
1612     # Save the existing ranks, in case we need to invalidate the cached SVG.
1613     my %existing_ranks;
1614     map { $existing_ranks{$_} = $_->rank } $self->readings;
1615
1616     # Do the rankings based on the relationship equivalence graph, starting
1617     # with the start node.
1618     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1619
1620     # Transfer our rankings from the topological graph to the real one.
1621     foreach my $r ( $self->readings ) {
1622         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1623             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1624         } else {
1625                 # Die. Find the last rank we calculated.
1626                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1627                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1628                         $self->readings;
1629                 my $last = pop @all_defined;
1630             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1631         }
1632     }
1633     # Do we need to invalidate the cached data?
1634     if( $self->has_cached_table ) {
1635         foreach my $r ( $self->readings ) {
1636                 next if defined( $existing_ranks{$r} )
1637                         && $existing_ranks{$r} == $r->rank;
1638                 # Something has changed, so clear the cache
1639                 $self->_clear_cache;
1640                         # ...and recalculate the common readings.
1641                         $self->calculate_common_readings();
1642                 last;
1643         }
1644     }
1645         # The graph calculation information is now up to date.
1646         $self->_graphcalc_done(1);
1647 }
1648
1649 sub _clear_cache {
1650         my $self = shift;
1651         $self->wipe_table if $self->has_cached_table;
1652 }
1653
1654
1655 =head2 flatten_ranks
1656
1657 A convenience method for parsing collation data.  Searches the graph for readings
1658 with the same text at the same rank, and merges any that are found.
1659
1660 =cut
1661
1662 sub flatten_ranks {
1663     my $self = shift;
1664     my %unique_rank_rdg;
1665     my $changed;
1666     foreach my $rdg ( $self->readings ) {
1667         next unless $rdg->has_rank;
1668         my $key = $rdg->rank . "||" . $rdg->text;
1669         if( exists $unique_rank_rdg{$key} ) {
1670                 # Make sure they don't have different grammatical forms
1671                         my $ur = $unique_rank_rdg{$key};
1672                         if( $rdg->disambiguated && $ur->disambiguated ) {
1673                                 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1674                                 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1675                                 next unless $rform eq $uform;
1676                         } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1677                                 next;
1678                         }
1679             # Combine!
1680                 #say STDERR "Combining readings at same rank: $key";
1681                 $changed = 1;
1682             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1683             # TODO see if this now makes a common point.
1684         } else {
1685             $unique_rank_rdg{$key} = $rdg;
1686         }
1687     }
1688     # If we merged readings, the ranks are still fine but the alignment
1689     # table is wrong. Wipe it.
1690     $self->wipe_table() if $changed;
1691 }
1692
1693
1694 =head2 calculate_common_readings
1695
1696 Goes through the graph identifying the readings that appear in every witness
1697 (apart from those with lacunae at that spot.) Marks them as common and returns
1698 the list.
1699
1700 =begin testing
1701
1702 use Text::Tradition;
1703
1704 my $cxfile = 't/data/Collatex-16.xml';
1705 my $t = Text::Tradition->new(
1706     'name'  => 'inline',
1707     'input' => 'CollateX',
1708     'file'  => $cxfile,
1709     );
1710 my $c = $t->collation;
1711
1712 my @common = $c->calculate_common_readings();
1713 is( scalar @common, 8, "Found correct number of common readings" );
1714 my @marked = sort $c->common_readings();
1715 is( scalar @common, 8, "All common readings got marked as such" );
1716 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1717 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1718
1719 =end testing
1720
1721 =cut
1722
1723 sub calculate_common_readings {
1724         my $self = shift;
1725         my @common;
1726         map { $_->is_common( 0 ) } $self->readings;
1727         # Implicitly calls calculate_ranks
1728         my $table = $self->alignment_table;
1729         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1730                 my @row = map { $_->{'tokens'}->[$idx]
1731                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1732                                         @{$table->{'alignment'}};
1733                 my %hash;
1734                 foreach my $r ( @row ) {
1735                         if( $r ) {
1736                                 $hash{$r->id} = $r unless $r->is_meta;
1737                         } else {
1738                                 $hash{'UNDEF'} = $r;
1739                         }
1740                 }
1741                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1742                         my( $r ) = values %hash;
1743                         $r->is_common( 1 );
1744                         push( @common, $r );
1745                 }
1746         }
1747         return @common;
1748 }
1749
1750 =head2 text_from_paths
1751
1752 Calculate the text array for all witnesses from the path, for later consistency
1753 checking.  Only to be used if there is no non-graph-based way to know the
1754 original texts.
1755
1756 =cut
1757
1758 sub text_from_paths {
1759         my $self = shift;
1760     foreach my $wit ( $self->tradition->witnesses ) {
1761         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1762         my @text;
1763         foreach my $r ( @readings ) {
1764                 next if $r->is_meta;
1765                 push( @text, $r->text );
1766         }
1767         $wit->text( \@text );
1768         if( $wit->is_layered ) {
1769                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1770                                                                                                   $wit->sigil.$self->ac_label );
1771                         my @uctext;
1772                         foreach my $r ( @ucrdgs ) {
1773                                 next if $r->is_meta;
1774                                 push( @uctext, $r->text );
1775                         }
1776                         $wit->layertext( \@uctext );
1777         }
1778     }
1779 }
1780
1781 =head1 UTILITY FUNCTIONS
1782
1783 =head2 common_predecessor( $reading_a, $reading_b )
1784
1785 Find the last reading that occurs in sequence before both the given readings.
1786 At the very least this should be $self->start.
1787
1788 =head2 common_successor( $reading_a, $reading_b )
1789
1790 Find the first reading that occurs in sequence after both the given readings.
1791 At the very least this should be $self->end.
1792
1793 =begin testing
1794
1795 use Text::Tradition;
1796
1797 my $cxfile = 't/data/Collatex-16.xml';
1798 my $t = Text::Tradition->new(
1799     'name'  => 'inline',
1800     'input' => 'CollateX',
1801     'file'  => $cxfile,
1802     );
1803 my $c = $t->collation;
1804
1805 is( $c->common_predecessor( 'n24', 'n23' )->id,
1806     'n20', "Found correct common predecessor" );
1807 is( $c->common_successor( 'n24', 'n23' )->id,
1808     '__END__', "Found correct common successor" );
1809
1810 is( $c->common_predecessor( 'n19', 'n17' )->id,
1811     'n16', "Found correct common predecessor for readings on same path" );
1812 is( $c->common_successor( 'n21', 'n10' )->id,
1813     '__END__', "Found correct common successor for readings on same path" );
1814
1815 =end testing
1816
1817 =cut
1818
1819 ## Return the closest reading that is a predecessor of both the given readings.
1820 sub common_predecessor {
1821         my $self = shift;
1822         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1823         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1824 }
1825
1826 sub common_successor {
1827         my $self = shift;
1828         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1829         return $self->_common_in_path( $r1, $r2, 'successors' );
1830 }
1831
1832
1833 # TODO think about how to do this without ranks...
1834 sub _common_in_path {
1835         my( $self, $r1, $r2, $dir ) = @_;
1836         my $iter = $self->end->rank;
1837         my @candidates;
1838         my @last_r1 = ( $r1 );
1839         my @last_r2 = ( $r2 );
1840         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1841         my %all_seen;
1842         # say STDERR "Finding common $dir for $r1, $r2";
1843         while( !@candidates ) {
1844                 last unless $iter--;  # Avoid looping infinitely
1845                 # Iterate separately down the graph from r1 and r2
1846                 my( @new_lc1, @new_lc2 );
1847                 foreach my $lc ( @last_r1 ) {
1848                         foreach my $p ( $lc->$dir ) {
1849                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1850                                         # say STDERR "Path candidate $p from $lc";
1851                                         push( @candidates, $p );
1852                                 } elsif( !$all_seen{$p->id} ) {
1853                                         $all_seen{$p->id} = 'r1';
1854                                         push( @new_lc1, $p );
1855                                 }
1856                         }
1857                 }
1858                 foreach my $lc ( @last_r2 ) {
1859                         foreach my $p ( $lc->$dir ) {
1860                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1861                                         # say STDERR "Path candidate $p from $lc";
1862                                         push( @candidates, $p );
1863                                 } elsif( !$all_seen{$p->id} ) {
1864                                         $all_seen{$p->id} = 'r2';
1865                                         push( @new_lc2, $p );
1866                                 }
1867                         }
1868                 }
1869                 @last_r1 = @new_lc1;
1870                 @last_r2 = @new_lc2;
1871         }
1872         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1873         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1874 }
1875
1876 sub throw {
1877         Text::Tradition::Error->throw(
1878                 'ident' => 'Collation error',
1879                 'message' => $_[0],
1880                 );
1881 }
1882
1883 no Moose;
1884 __PACKAGE__->meta->make_immutable;
1885
1886 =head1 LICENSE
1887
1888 This package is free software and is provided "as is" without express
1889 or implied warranty.  You can redistribute it and/or modify it under
1890 the same terms as Perl itself.
1891
1892 =head1 AUTHOR
1893
1894 Tara L Andrews E<lt>aurum@cpan.orgE<gt>