lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use Text::CSV;
  10 use Text::Tradition::Collation::Reading;
  11 use Text::Tradition::Collation::RelationshipStore;
  12 use Text::Tradition::Error;
  13 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  14 use XML::LibXML;
  15 use XML::LibXML::XPathContext;
  16 use Moose;
  17
  18 has 'sequence' => (
  19     is => 'ro',
  20     isa => 'Graph',
  21     default => sub { Graph->new() },
  22     handles => {
  23         paths => 'edges',
  24     },
  25     );
  26
  27 has 'relations' => (
  28         is => 'ro',
  29         isa => 'Text::Tradition::Collation::RelationshipStore',
  30         handles => {
  31                 relationships => 'relationships',
  32                 related_readings => 'related_readings',
  33                 get_relationship => 'get_relationship',
  34                 del_relationship => 'del_relationship',
  35                 equivalence => 'equivalence',
  36                 equivalence_graph => 'equivalence_graph',
  37         },
  38         writer => '_set_relations',
  39         );
  40
  41 has 'tradition' => (
  42     is => 'ro',
  43     isa => 'Text::Tradition',
  44     writer => '_set_tradition',
  45     weak_ref => 1,
  46     );
  47
  48 has 'readings' => (
  49         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  50         traits => ['Hash'],
  51     handles => {
  52         reading     => 'get',
  53         _add_reading => 'set',
  54         del_reading => 'delete',
  55         has_reading => 'exists',
  56         readings   => 'values',
  57     },
  58     default => sub { {} },
  59         );
  60
  61 has 'wit_list_separator' => (
  62     is => 'rw',
  63     isa => 'Str',
  64     default => ', ',
  65     );
  66
  67 has 'baselabel' => (
  68     is => 'rw',
  69     isa => 'Str',
  70     default => 'base text',
  71     );
  72
  73 has 'linear' => (
  74     is => 'rw',
  75     isa => 'Bool',
  76     default => 1,
  77     );
  78
  79 has 'ac_label' => (
  80     is => 'rw',
  81     isa => 'Str',
  82     default => ' (a.c.)',
  83     );
  84
  85 has 'wordsep' => (
  86         is => 'rw',
  87         isa => 'Str',
  88         default => ' ',
  89         );
  90
  91 has 'start' => (
  92         is => 'ro',
  93         isa => 'Text::Tradition::Collation::Reading',
  94         writer => '_set_start',
  95         weak_ref => 1,
  96         );
  97
  98 has 'end' => (
  99         is => 'ro',
 100         isa => 'Text::Tradition::Collation::Reading',
 101         writer => '_set_end',
 102         weak_ref => 1,
 103         );
 104
 105 has 'cached_table' => (
 106         is => 'rw',
 107         isa => 'HashRef',
 108         predicate => 'has_cached_table',
 109         clearer => 'wipe_table',
 110         );
 111
 112 has '_graphcalc_done' => (
 113         is => 'rw',
 114         isa => 'Bool',
 115         default => undef,
 116         );
 117
 118 =head1 NAME
 119
 120 Text::Tradition::Collation - a software model for a text collation
 121
 122 =head1 SYNOPSIS
 123
 124   use Text::Tradition;
 125   my $t = Text::Tradition->new(
 126     'name' => 'this is a text',
 127     'input' => 'TEI',
 128     'file' => '/path/to/tei_parallel_seg_file.xml' );
 129
 130   my $c = $t->collation;
 131   my @readings = $c->readings;
 132   my @paths = $c->paths;
 133   my @relationships = $c->relationships;
 134
 135   my $svg_variant_graph = $t->collation->as_svg();
 136
 137 =head1 DESCRIPTION
 138
 139 Text::Tradition is a library for representation and analysis of collated
 140 texts, particularly medieval ones.  The Collation is the central feature of
 141 a Tradition, where the text, its sequence of readings, and its relationships
 142 between readings are actually kept.
 143
 144 =head1 CONSTRUCTOR
 145
 146 =head2 new
 147
 148 The constructor.  Takes a hash or hashref of the following arguments:
 149
 150 =over
 151
 152 =item * tradition - The Text::Tradition object to which the collation
 153 belongs. Required.
 154
 155 =item * linear - Whether the collation should be linear; that is, whether
 156 transposed readings should be treated as two linked readings rather than one,
 157 and therefore whether the collation graph is acyclic.  Defaults to true.
 158
 159 =item * baselabel - The default label for the path taken by a base text
 160 (if any). Defaults to 'base text'.
 161
 162 =item * wit_list_separator - The string to join a list of witnesses for
 163 purposes of making labels in display graphs.  Defaults to ', '.
 164
 165 =item * ac_label - The extra label to tack onto a witness sigil when
 166 representing another layer of path for the given witness - that is, when
 167 a text has more than one possible reading due to scribal corrections or
 168 the like.  Defaults to ' (a.c.)'.
 169
 170 =item * wordsep - The string used to separate words in the original text.
 171 Defaults to ' '.
 172
 173 =back
 174
 175 =head1 ACCESSORS
 176
 177 =head2 tradition
 178
 179 =head2 linear
 180
 181 =head2 wit_list_separator
 182
 183 =head2 baselabel
 184
 185 =head2 ac_label
 186
 187 =head2 wordsep
 188
 189 Simple accessors for collation attributes.
 190
 191 =head2 start
 192
 193 The meta-reading at the start of every witness path.
 194
 195 =head2 end
 196
 197 The meta-reading at the end of every witness path.
 198
 199 =head2 readings
 200
 201 Returns all Reading objects in the graph.
 202
 203 =head2 reading( $id )
 204
 205 Returns the Reading object corresponding to the given ID.
 206
 207 =head2 add_reading( $reading_args )
 208
 209 Adds a new reading object to the collation.
 210 See L<Text::Tradition::Collation::Reading> for the available arguments.
 211
 212 =head2 del_reading( $object_or_id )
 213
 214 Removes the given reading from the collation, implicitly removing its
 215 paths and relationships.
 216
 217 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 218
 219 Merges the $second reading into the $main one. If $concatenate is true, then
 220 the merged node will carry the text of both readings, concatenated with either
 221 $with_str (if specified) or a sensible default (the empty string if the
 222 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 223
 224 The first two arguments may be either readings or reading IDs.
 225
 226 =head2 has_reading( $id )
 227
 228 Predicate to see whether a given reading ID is in the graph.
 229
 230 =head2 reading_witnesses( $object_or_id )
 231
 232 Returns a list of sigils whose witnesses contain the reading.
 233
 234 =head2 paths
 235
 236 Returns all reading paths within the document - that is, all edges in the
 237 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 238
 239 =head2 add_path( $source, $target, $sigil )
 240
 241 Links the given readings in the collation in sequence, under the given witness
 242 sigil.  The readings may be specified by object or ID.
 243
 244 =head2 del_path( $source, $target, $sigil )
 245
 246 Links the given readings in the collation in sequence, under the given witness
 247 sigil.  The readings may be specified by object or ID.
 248
 249 =head2 has_path( $source, $target );
 250
 251 Returns true if the two readings are linked in sequence in any witness.
 252 The readings may be specified by object or ID.
 253
 254 =head2 relationships
 255
 256 Returns all Relationship objects in the collation.
 257
 258 =head2 add_relationship( $reading, $other_reading, $options )
 259
 260 Adds a new relationship of the type given in $options between the two readings,
 261 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 262 where $status is true on success, and @vectors is a list of relationship edges
 263 that were ultimately added.
 264 See L<Text::Tradition::Collation::Relationship> for the available options.
 265
 266 =cut
 267
 268 sub BUILD {
 269     my $self = shift;
 270     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 271     $self->_set_start( $self->add_reading(
 272         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 273     $self->_set_end( $self->add_reading(
 274         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 275 }
 276
 277 ### Reading construct/destruct functions
 278
 279 sub add_reading {
 280         my( $self, $reading ) = @_;
 281         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 282                 my %args = %$reading;
 283                 if( $args{'init'} ) {
 284                         # If we are initializing an empty collation, don't assume that we
 285                         # have set a tradition.
 286                         delete $args{'init'};
 287                 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
 288                         $args{'language'} = $self->tradition->language;
 289                 }
 290                 $reading = Text::Tradition::Collation::Reading->new(
 291                         'collation' => $self,
 292                         %args );
 293         }
 294         # First check to see if a reading with this ID exists.
 295         if( $self->reading( $reading->id ) ) {
 296                 throw( "Collation already has a reading with id " . $reading->id );
 297         }
 298         $self->_graphcalc_done(0);
 299         $self->_add_reading( $reading->id => $reading );
 300         # Once the reading has been added, put it in both graphs.
 301         $self->sequence->add_vertex( $reading->id );
 302         $self->relations->add_reading( $reading->id );
 303         return $reading;
 304 };
 305
 306 around del_reading => sub {
 307         my $orig = shift;
 308         my $self = shift;
 309         my $arg = shift;
 310
 311         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 312                 $arg = $arg->id;
 313         }
 314         # Remove the reading from the graphs.
 315         $self->_graphcalc_done(0);
 316         $self->_clear_cache; # Explicitly clear caches to GC the reading
 317         $self->sequence->delete_vertex( $arg );
 318         $self->relations->delete_reading( $arg );
 319
 320         # Carry on.
 321         $self->$orig( $arg );
 322 };
 323
 324 =begin testing
 325
 326 use Text::Tradition;
 327
 328 my $cxfile = 't/data/Collatex-16.xml';
 329 my $t = Text::Tradition->new(
 330     'name'  => 'inline',
 331     'input' => 'CollateX',
 332     'file'  => $cxfile,
 333     );
 334 my $c = $t->collation;
 335
 336 my $rno = scalar $c->readings;
 337 # Split n21 for testing purposes
 338 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 339 my $old_r = $c->reading( 'n21' );
 340 $old_r->alter_text( 'to' );
 341 $c->del_path( 'n20', 'n21', 'A' );
 342 $c->add_path( 'n20', 'n21p0', 'A' );
 343 $c->add_path( 'n21p0', 'n21', 'A' );
 344 $c->flatten_ranks();
 345 ok( $c->reading( 'n21p0' ), "New reading exists" );
 346 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 347
 348 # Combine n3 and n4 ( with his )
 349 $c->merge_readings( 'n3', 'n4', 1 );
 350 ok( !$c->reading('n4'), "Reading n4 is gone" );
 351 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 352
 353 # Collapse n9 and n10 ( rood / root )
 354 $c->merge_readings( 'n9', 'n10' );
 355 ok( !$c->reading('n10'), "Reading n10 is gone" );
 356 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 357
 358 # Combine n21 and n21p0
 359 my $remaining = $c->reading('n21');
 360 $remaining ||= $c->reading('n22');  # one of these should still exist
 361 $c->merge_readings( 'n21p0', $remaining, 1 );
 362 ok( !$c->reading('n21'), "Reading $remaining is gone" );
 363 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
 364
 365 =end testing
 366
 367 =cut
 368
 369 sub merge_readings {
 370         my $self = shift;
 371
 372         # Sanity check
 373         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 374         my $mergemeta = $kept_obj->is_meta;
 375         throw( "Cannot merge meta and non-meta reading" )
 376                 unless ( $mergemeta && $del_obj->is_meta )
 377                         || ( !$mergemeta && !$del_obj->is_meta );
 378         if( $mergemeta ) {
 379                 throw( "Cannot merge with start or end node" )
 380                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 381                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 382         }
 383         # We only need the IDs for adding paths to the graph, not the reading
 384         # objects themselves.
 385         my $kept = $kept_obj->id;
 386         my $deleted = $del_obj->id;
 387         $self->_graphcalc_done(0);
 388
 389     # The kept reading should inherit the paths and the relationships
 390     # of the deleted reading.
 391         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 392                 my @vector = ( $kept );
 393                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 394                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 395                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 396                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 397                 $self->sequence->add_edge( @vector );
 398                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 399                 @wits{keys %$fwits} = values %$fwits;
 400                 $self->sequence->set_edge_attributes( @vector, \%wits );
 401         }
 402         $self->relations->merge_readings( $kept, $deleted, $combine );
 403
 404         # Do the deletion deed.
 405         if( $combine ) {
 406                 # Combine the text of the readings
 407                 my $joinstr = $combine_char;
 408                 unless( defined $joinstr ) {
 409                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 410                         $joinstr = $self->wordsep unless defined $joinstr;
 411                 }
 412                 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
 413                 # Change this reading to a joining one if necessary
 414                 $kept_obj->_set_join_next( $del_obj->join_next );
 415                 $kept_obj->normal_form(
 416                         join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
 417                 # Combine the lexemes present in the readings
 418                 if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
 419                         $kept_obj->add_lexeme( $del_obj->lexemes );
 420                 }
 421         }
 422         $self->del_reading( $deleted );
 423 }
 424
 425 =head2 compress_readings
 426
 427 Where possible in the graph, compresses plain sequences of readings into a
 428 single reading. The sequences must consist of readings with no
 429 relationships to other readings, with only a single witness path between
 430 them and no other witness paths from either that would skip the other. The
 431 readings must also not be marked as nonsense or bad grammar.
 432
 433 WARNING: This operation cannot be undone.
 434
 435 =cut
 436
 437 sub compress_readings {
 438         my $self = shift;
 439         # Anywhere in the graph that there is a reading that joins only to a single
 440         # successor, and neither of these have any relationships, just join the two
 441         # readings.
 442         my %gobbled;
 443         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 444                 next if $rdg->is_meta;
 445                 next if $gobbled{$rdg->id};
 446                 next if $rdg->grammar_invalid || $rdg->is_nonsense;
 447                 next if $rdg->related_readings();
 448                 my %seen;
 449                 while( $self->sequence->successors( $rdg ) == 1 ) {
 450                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 451                         throw( "Infinite loop" ) if $seen{$next->id};
 452                         $seen{$next->id} = 1;
 453                         last if $self->sequence->predecessors( $next ) > 1;
 454                         last if $next->is_meta;
 455                         last if $next->grammar_invalid || $next->is_nonsense;
 456                         last if $next->related_readings();
 457                         say "Joining readings $rdg and $next";
 458                         $self->merge_readings( $rdg, $next, 1 );
 459                 }
 460         }
 461         # Make sure we haven't screwed anything up
 462         foreach my $wit ( $self->tradition->witnesses ) {
 463                 my $pathtext = $self->path_text( $wit->sigil );
 464                 my $origtext = join( ' ', @{$wit->text} );
 465                 throw( "Text differs for witness " . $wit->sigil )
 466                         unless $pathtext eq $origtext;
 467                 if( $wit->is_layered ) {
 468                         $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
 469                         $origtext = join( ' ', @{$wit->layertext} );
 470                         throw( "Ante-corr text differs for witness " . $wit->sigil )
 471                                 unless $pathtext eq $origtext;
 472                 }
 473         }
 474
 475         $self->relations->rebuild_equivalence();
 476         $self->calculate_ranks();
 477 }
 478
 479 # Helper function for manipulating the graph.
 480 sub _stringify_args {
 481         my( $self, $first, $second, @args ) = @_;
 482     $first = $first->id
 483         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 484     $second = $second->id
 485         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 486     return( $first, $second, @args );
 487 }
 488
 489 # Helper function for manipulating the graph.
 490 sub _objectify_args {
 491         my( $self, $first, $second, $arg ) = @_;
 492     $first = $self->reading( $first )
 493         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 494     $second = $self->reading( $second )
 495         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 496     return( $first, $second, $arg );
 497 }
 498 ### Path logic
 499
 500 sub add_path {
 501         my $self = shift;
 502
 503         # We only need the IDs for adding paths to the graph, not the reading
 504         # objects themselves.
 505     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 506
 507         $self->_graphcalc_done(0);
 508         # Connect the readings
 509         unless( $self->sequence->has_edge( $source, $target ) ) {
 510             $self->sequence->add_edge( $source, $target );
 511             $self->relations->add_equivalence_edge( $source, $target );
 512         }
 513     # Note the witness in question
 514     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 515 }
 516
 517 sub del_path {
 518         my $self = shift;
 519         my @args;
 520         if( ref( $_[0] ) eq 'ARRAY' ) {
 521                 my $e = shift @_;
 522                 @args = ( @$e, @_ );
 523         } else {
 524                 @args = @_;
 525         }
 526
 527         # We only need the IDs for adding paths to the graph, not the reading
 528         # objects themselves.
 529     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 530
 531         $self->_graphcalc_done(0);
 532         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 533                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 534         }
 535         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 536                 $self->sequence->delete_edge( $source, $target );
 537                 $self->relations->delete_equivalence_edge( $source, $target );
 538         }
 539 }
 540
 541
 542 # Extra graph-alike utility
 543 sub has_path {
 544         my $self = shift;
 545     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 546         return undef unless $self->sequence->has_edge( $source, $target );
 547         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 548 }
 549
 550 =head2 clear_witness( @sigil_list )
 551
 552 Clear the given witnesses out of the collation entirely, removing references
 553 to them in paths, and removing readings that belong only to them.  Should only
 554 be called via $tradition->del_witness.
 555
 556 =cut
 557
 558 sub clear_witness {
 559         my( $self, @sigils ) = @_;
 560
 561         $self->_graphcalc_done(0);
 562         # Clear the witness(es) out of the paths
 563         foreach my $e ( $self->paths ) {
 564                 foreach my $sig ( @sigils ) {
 565                         $self->del_path( $e, $sig );
 566                 }
 567         }
 568
 569         # Clear out the newly unused readings
 570         foreach my $r ( $self->readings ) {
 571                 unless( $self->reading_witnesses( $r ) ) {
 572                         $self->del_reading( $r );
 573                 }
 574         }
 575 }
 576
 577 sub add_relationship {
 578         my $self = shift;
 579     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 580     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 581         $self->_graphcalc_done(0);
 582     return @vectors;
 583 }
 584
 585 around qw/ get_relationship del_relationship / => sub {
 586         my $orig = shift;
 587         my $self = shift;
 588         my @args = @_;
 589         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 590                 @args = @{$_[0]};
 591         }
 592         my( $source, $target ) = $self->_stringify_args( @args );
 593         $self->$orig( $source, $target );
 594 };
 595
 596 =head2 reading_witnesses( $reading )
 597
 598 Return a list of sigils corresponding to the witnesses in which the reading appears.
 599
 600 =cut
 601
 602 sub reading_witnesses {
 603         my( $self, $reading ) = @_;
 604         # We need only check either the incoming or the outgoing edges; I have
 605         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 606         if( $reading eq $self->start ) {
 607                 return map { $_->sigil } $self->tradition->witnesses;
 608         }
 609         my %all_witnesses;
 610         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 611                 my $wits = $self->sequence->get_edge_attributes( @$e );
 612                 @all_witnesses{ keys %$wits } = 1;
 613         }
 614         my $acstr = $self->ac_label;
 615         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 616                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 617         }
 618         return keys %all_witnesses;
 619 }
 620
 621 =head1 OUTPUT METHODS
 622
 623 =head2 as_svg( \%options )
 624
 625 Returns an SVG string that represents the graph, via as_dot and graphviz.
 626 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 627
 628 =cut
 629
 630 sub as_svg {
 631     my( $self, $opts ) = @_;
 632     throw( "Need GraphViz installed to output SVG" )
 633         unless File::Which::which( 'dot' );
 634     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 635     $self->calculate_ranks()
 636         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 637         my @cmd = qw/dot -Tsvg/;
 638         my( $svg, $err );
 639         my $dotfile = File::Temp->new();
 640         ## USE FOR DEBUGGING
 641         # $dotfile->unlink_on_destroy(0);
 642         binmode $dotfile, ':utf8';
 643         print $dotfile $self->as_dot( $opts );
 644         push( @cmd, $dotfile->filename );
 645         run( \@cmd, ">", binary(), \$svg );
 646         $svg = decode_utf8( $svg );
 647         return $svg;
 648 }
 649
 650
 651 =head2 as_dot( \%options )
 652
 653 Returns a string that is the collation graph expressed in dot
 654 (i.e. GraphViz) format.  Options include:
 655
 656 =over 4
 657
 658 =item * from
 659
 660 =item * to
 661
 662 =item * color_common
 663
 664 =back
 665
 666 =cut
 667
 668 sub as_dot {
 669     my( $self, $opts ) = @_;
 670     my $startrank = $opts->{'from'} if $opts;
 671     my $endrank = $opts->{'to'} if $opts;
 672     my $color_common = $opts->{'color_common'} if $opts;
 673     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 674        && $self->end->rank > 100;
 675     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 676
 677     # Check the arguments
 678     if( $startrank ) {
 679         return if $endrank && $startrank > $endrank;
 680         return if $startrank > $self->end->rank;
 681         }
 682         if( defined $endrank ) {
 683                 return if $endrank < 0;
 684                 $endrank = undef if $endrank == $self->end->rank;
 685         }
 686
 687     my $graph_name = $self->tradition->name;
 688     $graph_name =~ s/[^\w\s]//g;
 689     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 690
 691     my %graph_attrs = (
 692         'rankdir' => 'LR',
 693         'bgcolor' => 'none',
 694         );
 695     my %node_attrs = (
 696         'fontsize' => 14,
 697         'fillcolor' => 'white',
 698         'style' => 'filled',
 699         'shape' => 'ellipse'
 700         );
 701     my %edge_attrs = (
 702         'arrowhead' => 'open',
 703         'color' => '#000000',
 704         'fontcolor' => '#000000',
 705         );
 706
 707     my $dot = sprintf( "digraph %s {\n", $graph_name );
 708     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 709     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 710
 711         # Output substitute start/end readings if necessary
 712         if( $startrank ) {
 713                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
 714         }
 715         if( $endrank ) {
 716                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
 717         }
 718         if( $STRAIGHTENHACK ) {
 719                 ## HACK part 1
 720                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
 721                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
 722                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 723         }
 724         my %used;  # Keep track of the readings that actually appear in the graph
 725         # Sort the readings by rank if we have ranks; this speeds layout.
 726         my @all_readings = $self->end->has_rank
 727                 ? sort { $a->rank <=> $b->rank } $self->readings
 728                 : $self->readings;
 729         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 730     foreach my $reading ( @all_readings ) {
 731         # Only output readings within our rank range.
 732         next if $startrank && $reading->rank < $startrank;
 733         next if $endrank && $reading->rank > $endrank;
 734         $used{$reading->id} = 1;
 735         # Need not output nodes without separate labels
 736         next if $reading->id eq $reading->text;
 737         my $rattrs;
 738         my $label = $reading->text;
 739         $label .= '-' if $reading->join_next;
 740         $label = "-$label" if $reading->join_prior;
 741         $label =~ s/\"/\\\"/g;
 742                 $rattrs->{'label'} = $label;
 743                 $rattrs->{'id'} = $reading->id;
 744                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 745         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 746     }
 747
 748         # Add the real edges. Need to weight one edge per rank jump, in a
 749         # continuous line.
 750         # my $weighted = $self->_add_edge_weights;
 751     my @edges = $self->paths;
 752         my( %substart, %subend );
 753     foreach my $edge ( @edges ) {
 754         # Do we need to output this edge?
 755         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 756                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 757                         my $variables = { %edge_attrs, 'label' => $label };
 758
 759                         # Account for the rank gap if necessary
 760                         my $rank0 = $self->reading( $edge->[0] )->rank
 761                                 if $self->reading( $edge->[0] )->has_rank;
 762                         my $rank1 = $self->reading( $edge->[1] )->rank
 763                                 if $self->reading( $edge->[1] )->has_rank;
 764                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 765                                 $variables->{'minlen'} = $rank1 - $rank0;
 766                         }
 767
 768                         # Add the calculated edge weights
 769                         # if( exists $weighted->{$edge->[0]}
 770                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 771                         #       # $variables->{'color'} = 'red';
 772                         #       $variables->{'weight'} = 3.0;
 773                         # }
 774
 775                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 776                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 777                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 778
 779                         my $varopts = _dot_attr_string( $variables );
 780                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 781                                 $edge->[0], $edge->[1], $varopts );
 782         } elsif( $used{$edge->[0]} ) {
 783                 $subend{$edge->[0]} = $edge->[1];
 784         } elsif( $used{$edge->[1]} ) {
 785                 $substart{$edge->[1]} = $edge->[0];
 786         }
 787     }
 788     # Add substitute start and end edges if necessary
 789     foreach my $node ( keys %substart ) {
 790         my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
 791         my $variables = { %edge_attrs, 'label' => $witstr };
 792         my $nrdg = $self->reading( $node );
 793         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
 794                 # Substart is actually one lower than $startrank
 795                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
 796         }
 797         my $varopts = _dot_attr_string( $variables );
 798         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
 799         }
 800     foreach my $node ( keys %subend ) {
 801         my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
 802         my $variables = { %edge_attrs, 'label' => $witstr };
 803         my $varopts = _dot_attr_string( $variables );
 804         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
 805         }
 806         # HACK part 2
 807         if( $STRAIGHTENHACK ) {
 808                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
 809                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 810         }
 811
 812     $dot .= "}\n";
 813     return $dot;
 814 }
 815
 816 sub _dot_attr_string {
 817         my( $hash ) = @_;
 818         my @attrs;
 819         foreach my $k ( sort keys %$hash ) {
 820                 my $v = $hash->{$k};
 821                 push( @attrs, $k.'="'.$v.'"' );
 822         }
 823         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 824 }
 825
 826 sub _add_edge_weights {
 827         my $self = shift;
 828         # Walk the graph from START to END, choosing the successor node with
 829         # the largest number of witness paths each time.
 830         my $weighted = {};
 831         my $curr = $self->start->id;
 832         my $ranked = $self->end->has_rank;
 833         while( $curr ne $self->end->id ) {
 834                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 835                 my @succ = sort { $self->path_witnesses( $curr, $a )
 836                                                         <=> $self->path_witnesses( $curr, $b ) }
 837                         $self->sequence->successors( $curr );
 838                 my $next = pop @succ;
 839                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 840                 # Try to avoid lacunae in the weighted path.
 841                 while( @succ &&
 842                            ( $self->reading( $next )->is_lacuna ||
 843                                  $nextrank - $rank > 1 ) ){
 844                         $next = pop @succ;
 845                 }
 846                 $weighted->{$curr} = $next;
 847                 $curr = $next;
 848         }
 849         return $weighted;
 850 }
 851
 852 =head2 path_witnesses( $edge )
 853
 854 Returns the list of sigils whose witnesses are associated with the given edge.
 855 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 856
 857 =cut
 858
 859 sub path_witnesses {
 860         my( $self, @edge ) = @_;
 861         # If edge is an arrayref, cope.
 862         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 863                 my $e = shift @edge;
 864                 @edge = @$e;
 865         }
 866         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 867         return @wits;
 868 }
 869
 870 # Helper function. Make a display label for the given witnesses, showing a.c.
 871 # witnesses only where the main witness is not also in the list.
 872 sub _path_display_label {
 873         my $self = shift;
 874         my %wits;
 875         map { $wits{$_} = 1 } @_;
 876
 877         # If an a.c. wit is listed, remove it if the main wit is also listed.
 878         # Otherwise keep it for explicit listing.
 879         my $aclabel = $self->ac_label;
 880         my @disp_ac;
 881         foreach my $w ( sort keys %wits ) {
 882                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
 883                         if( exists $wits{$1} ) {
 884                                 delete $wits{$w};
 885                         } else {
 886                                 push( @disp_ac, $w );
 887                         }
 888                 }
 889         }
 890
 891         # See if we are in a majority situation.
 892         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 893         $maj = $maj > 5 ? $maj : 5;
 894         if( scalar keys %wits > $maj ) {
 895                 unshift( @disp_ac, 'majority' );
 896                 return join( ', ', @disp_ac );
 897         } else {
 898                 return join( ', ', sort keys %wits );
 899         }
 900 }
 901
 902 =head2 readings_at_rank( $rank )
 903
 904 Returns a list of readings at a given rank, taken from the alignment table.
 905
 906 =cut
 907
 908 sub readings_at_rank {
 909         my( $self, $rank ) = @_;
 910         my $table = $self->alignment_table;
 911         # Table rank is real rank - 1.
 912         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
 913         my %readings;
 914         foreach my $e ( @elements ) {
 915                 next unless ref( $e ) eq 'HASH';
 916                 next unless exists $e->{'t'};
 917                 $readings{$e->{'t'}->id} = $e->{'t'};
 918         }
 919         return values %readings;
 920 }
 921
 922 =head2 as_graphml
 923
 924 Returns a GraphML representation of the collation.  The GraphML will contain
 925 two graphs. The first expresses the attributes of the readings and the witness
 926 paths that link them; the second expresses the relationships that link the
 927 readings.  This is the native transfer format for a tradition.
 928
 929 =begin testing
 930
 931 use Text::Tradition;
 932
 933 my $READINGS = 311;
 934 my $PATHS = 361;
 935
 936 my $datafile = 't/data/florilegium_tei_ps.xml';
 937 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 938                                       'name' => 'test0',
 939                                       'file' => $datafile,
 940                                       'linear' => 1 );
 941
 942 ok( $tradition, "Got a tradition object" );
 943 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 944 ok( $tradition->collation, "Tradition has a collation" );
 945
 946 my $c = $tradition->collation;
 947 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 948 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 949 is( scalar $c->relationships, 0, "Collation has all relationships" );
 950
 951 # Add a few relationships
 952 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 953 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 954 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 955
 956 # Now write it to GraphML and parse it again.
 957
 958 my $graphml = $c->as_graphml;
 959 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 960 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 961 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 962 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 963
 964 # Now add a stemma, write to GraphML, and look at the output.
 965 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
 966 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
 967 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
 968 $graphml = $c->as_graphml;
 969 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
 970
 971 # Now add a user, write to GraphML, and look at the output.
 972 unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" );
 973 my $testuser = Text::Tradition::User->new(
 974         id => 'testuser', password => 'testpass' );
 975 is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" );
 976 $testuser->add_tradition( $tradition );
 977 is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" );
 978 $graphml = $c->as_graphml;
 979 like( $graphml, qr/testuser/, "Test user name now exists in GraphML" );
 980
 981 =end testing
 982
 983 =cut
 984
 985 sub as_graphml {
 986     my( $self, $options ) = @_;
 987         $self->calculate_ranks unless $self->_graphcalc_done;
 988
 989         my $start = $options->{'from'}
 990                 ? $self->reading( $options->{'from'} ) : $self->start;
 991         my $end = $options->{'to'}
 992                 ? $self->reading( $options->{'to'} ) : $self->end;
 993         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
 994                 throw( 'Start node must be before end node' );
 995         }
 996         # The readings need to be ranked for this to work.
 997         $start = $self->start unless $start->has_rank;
 998         $end = $self->end unless $end->has_rank;
 999         my $rankoffset = 0;
1000         unless( $start eq $self->start ) {
1001                 $rankoffset = $start->rank - 1;
1002         }
1003         my %use_readings;
1004
1005     # Some namespaces
1006     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1007     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1008     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1009         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1010
1011     # Create the document and root node
1012     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1013     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1014     $graphml->setDocumentElement( $root );
1015     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1016     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1017
1018     # List of attribute types to save on our objects and their corresponding
1019     # GraphML types
1020     my %save_types = (
1021         'Str' => 'string',
1022         'Int' => 'int',
1023         'Bool' => 'boolean',
1024         'ReadingID' => 'string',
1025         'RelationshipType' => 'string',
1026         'RelationshipScope' => 'string',
1027     );
1028
1029     # Add the data keys for the graph. Include an extra key 'version' for the
1030     # GraphML output version.
1031     my %graph_data_keys;
1032     my $gdi = 0;
1033     my %graph_attributes = ( 'version' => 'string' );
1034         # Graph attributes include those of Tradition and those of Collation.
1035         my %gattr_from;
1036         my $tmeta = $self->tradition->meta;
1037         my $cmeta = $self->meta;
1038         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1039         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1040         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1041                 next if $attr->name =~ /^_/;
1042                 next if $skipsave{$attr->name};
1043                 next unless $save_types{$attr->type_constraint->name};
1044                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1045         }
1046     # Extra custom keys for complex objects that should be saved in some form.
1047     # The subroutine should return a string, or undef/empty.
1048     $graph_attributes{'stemmata'} = sub {
1049         my @stemstrs;
1050                 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1051                         $self->tradition->stemmata;
1052                 join( "\n", @stemstrs );
1053         };
1054     $graph_attributes{'user'} = sub {
1055         $self->tradition->user ? $self->tradition->user->id : undef
1056     };
1057
1058     foreach my $datum ( sort keys %graph_attributes ) {
1059         $graph_data_keys{$datum} = 'dg'.$gdi++;
1060         my $key = $root->addNewChild( $graphml_ns, 'key' );
1061         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1062                 : $graph_attributes{$datum};
1063         $key->setAttribute( 'attr.name', $datum );
1064         $key->setAttribute( 'attr.type', $dtype );
1065         $key->setAttribute( 'for', 'graph' );
1066         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1067     }
1068
1069     # Add the data keys for reading nodes
1070     my %reading_attributes;
1071     my $rmeta = Text::Tradition::Collation::Reading->meta;
1072     foreach my $attr( $rmeta->get_all_attributes ) {
1073                 next if $attr->name =~ /^_/;
1074                 next if $skipsave{$attr->name};
1075                 next unless $save_types{$attr->type_constraint->name};
1076                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1077         }
1078         # Extra custom key for the reading morphology
1079         $reading_attributes{'lexemes'} = 'string';
1080
1081     my %node_data_keys;
1082     my $ndi = 0;
1083     foreach my $datum ( sort keys %reading_attributes ) {
1084         $node_data_keys{$datum} = 'dn'.$ndi++;
1085         my $key = $root->addNewChild( $graphml_ns, 'key' );
1086         $key->setAttribute( 'attr.name', $datum );
1087         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1088         $key->setAttribute( 'for', 'node' );
1089         $key->setAttribute( 'id', $node_data_keys{$datum} );
1090     }
1091
1092     # Add the data keys for edges, that is, paths and relationships. Path
1093     # data does not come from a Moose class so is here manually.
1094     my $edi = 0;
1095     my %edge_data_keys;
1096     my %edge_attributes = (
1097         witness => 'string',                    # ID/label for a path
1098         extra => 'boolean',                             # Path key
1099         );
1100     my @path_attributes = keys %edge_attributes; # track our manual additions
1101     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1102     foreach my $attr( $pmeta->get_all_attributes ) {
1103                 next if $attr->name =~ /^_/;
1104                 next if $skipsave{$attr->name};
1105                 next unless $save_types{$attr->type_constraint->name};
1106                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1107         }
1108     foreach my $datum ( sort keys %edge_attributes ) {
1109         $edge_data_keys{$datum} = 'de'.$edi++;
1110         my $key = $root->addNewChild( $graphml_ns, 'key' );
1111         $key->setAttribute( 'attr.name', $datum );
1112         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1113         $key->setAttribute( 'for', 'edge' );
1114         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1115     }
1116
1117     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1118     my $xmlidname = $self->tradition->name;
1119     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1120     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1121         $xmlidname = '_'.$xmlidname;
1122     }
1123     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1124     $sgraph->setAttribute( 'edgedefault', 'directed' );
1125     $sgraph->setAttribute( 'id', $xmlidname );
1126     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1127     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1128     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1129     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1130     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1131
1132     # Tradition/collation attribute data
1133     foreach my $datum ( keys %graph_attributes ) {
1134         my $value;
1135         if( $datum eq 'version' ) {
1136                 $value = '3.2';
1137         } elsif( ref( $graph_attributes{$datum} ) ) {
1138                 my $sub = $graph_attributes{$datum};
1139                 $value = &$sub();
1140         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1141                 $value = $self->tradition->$datum;
1142         } else {
1143                 $value = $self->$datum;
1144         }
1145                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1146         }
1147
1148     my $node_ctr = 0;
1149     my %node_hash;
1150     # Add our readings to the graph
1151     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1152         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1153                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1154         $use_readings{$n->id} = 1;
1155         # Add to the main graph
1156         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1157         my $node_xmlid = 'n' . $node_ctr++;
1158         $node_hash{ $n->id } = $node_xmlid;
1159         $node_el->setAttribute( 'id', $node_xmlid );
1160         foreach my $d ( keys %reading_attributes ) {
1161                 my $nval = $n->$d;
1162                 # Custom serialization
1163                 if( $d eq 'lexemes' ) {
1164                                 # If nval is a true value, we have lexemes so we need to
1165                                 # serialize them. Otherwise set nval to undef so that the
1166                                 # key is excluded from this reading.
1167                         $nval = $nval ? $n->_serialize_lexemes : undef;
1168                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1169                         $nval = undef;
1170                 }
1171                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1172                         # Adjust the ranks within the subgraph.
1173                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1174                                 : $nval - $rankoffset;
1175                 }
1176                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1177                         if defined $nval;
1178         }
1179     }
1180
1181     # Add the path edges to the sequence graph
1182     my $edge_ctr = 0;
1183     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1184         # We add an edge in the graphml for every witness in $e.
1185         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1186         my @edge_wits = sort $self->path_witnesses( $e );
1187         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1188         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1189         # Skip any path from start to end; that witness is not in the subgraph.
1190         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1191         foreach my $wit ( @edge_wits ) {
1192                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1193                                                                                 $node_hash{ $e->[0] },
1194                                                                                 $node_hash{ $e->[1] } );
1195                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1196                         $edge_el->setAttribute( 'source', $from );
1197                         $edge_el->setAttribute( 'target', $to );
1198                         $edge_el->setAttribute( 'id', $id );
1199
1200                         # It's a witness path, so add the witness
1201                         my $base = $wit;
1202                         my $key = $edge_data_keys{'witness'};
1203                         # Is this an ante-corr witness?
1204                         my $aclabel = $self->ac_label;
1205                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1206                                 # Keep the base witness
1207                                 $base = $1;
1208                                 # ...and record that this is an 'extra' reading path
1209                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1210                         }
1211                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1212                 }
1213         }
1214
1215         # Report the actual number of nodes and edges that went in
1216         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1217         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1218
1219         # Add the relationship graph to the XML
1220         map { delete $edge_data_keys{$_} } @path_attributes;
1221         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1222                 $node_data_keys{'id'}, \%edge_data_keys );
1223
1224     # Save and return the thing
1225     my $result = decode_utf8( $graphml->toString(1) );
1226     return $result;
1227 }
1228
1229 sub _add_graphml_data {
1230     my( $el, $key, $value ) = @_;
1231     return unless defined $value;
1232     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1233     $data_el->setAttribute( 'key', $key );
1234     $data_el->appendText( $value );
1235 }
1236
1237 =head2 as_csv
1238
1239 Returns a CSV alignment table representation of the collation graph, one
1240 row per witness (or witness uncorrected.)
1241
1242 =cut
1243
1244 sub as_csv {
1245     my( $self ) = @_;
1246     my $table = $self->alignment_table;
1247     my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1248     my @result;
1249     # Make the header row
1250     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1251         push( @result, decode_utf8( $csv->string ) );
1252     # Make the rest of the rows
1253     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1254         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1255         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1256         $csv->combine( @row );
1257         push( @result, decode_utf8( $csv->string ) );
1258     }
1259     return join( "\n", @result );
1260 }
1261
1262 =head2 alignment_table( $use_refs, $include_witnesses )
1263
1264 Return a reference to an alignment table, in a slightly enhanced CollateX
1265 format which looks like this:
1266
1267  $table = { alignment => [ { witness => "SIGIL",
1268                              tokens => [ { t => "TEXT" }, ... ] },
1269                            { witness => "SIG2",
1270                              tokens => [ { t => "TEXT" }, ... ] },
1271                            ... ],
1272             length => TEXTLEN };
1273
1274 If $use_refs is set to 1, the reading object is returned in the table
1275 instead of READINGTEXT; if not, the text of the reading is returned.
1276
1277 If $include_witnesses is set to a hashref, only the witnesses whose sigil
1278 keys have a true hash value will be included.
1279
1280 =cut
1281
1282 sub alignment_table {
1283     my( $self ) = @_;
1284     $self->calculate_ranks() unless $self->_graphcalc_done;
1285     return $self->cached_table if $self->has_cached_table;
1286
1287     # Make sure we can do this
1288         throw( "Need a linear graph in order to make an alignment table" )
1289                 unless $self->linear;
1290         $self->calculate_ranks unless $self->end->has_rank;
1291
1292     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1293     my @all_pos = ( 1 .. $self->end->rank - 1 );
1294     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1295         # say STDERR "Making witness row(s) for " . $wit->sigil;
1296         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1297         my @row = _make_witness_row( \@wit_path, \@all_pos );
1298         push( @{$table->{'alignment'}},
1299                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1300         if( $wit->is_layered ) {
1301                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1302                         $wit->sigil.$self->ac_label );
1303             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1304                         push( @{$table->{'alignment'}},
1305                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
1306         }
1307     }
1308     $self->cached_table( $table );
1309     return $table;
1310 }
1311
1312 sub _make_witness_row {
1313     my( $path, $positions ) = @_;
1314     my %char_hash;
1315     map { $char_hash{$_} = undef } @$positions;
1316     my $debug = 0;
1317     foreach my $rdg ( @$path ) {
1318         my $rtext = $rdg->text;
1319         $rtext = '#LACUNA#' if $rdg->is_lacuna;
1320         say STDERR "rank " . $rdg->rank if $debug;
1321         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1322         $char_hash{$rdg->rank} = { 't' => $rdg };
1323     }
1324     my @row = map { $char_hash{$_} } @$positions;
1325     # Fill in lacuna markers for undef spots in the row
1326     my $last_el = shift @row;
1327     my @filled_row = ( $last_el );
1328     foreach my $el ( @row ) {
1329         # If we are using node reference, make the lacuna node appear many times
1330         # in the table.  If not, use the lacuna tag.
1331         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1332             $el = $last_el;
1333         }
1334         push( @filled_row, $el );
1335         $last_el = $el;
1336     }
1337     return @filled_row;
1338 }
1339
1340 =head1 NAVIGATION METHODS
1341
1342 =head2 reading_sequence( $first, $last, $sigil, $backup )
1343
1344 Returns the ordered list of readings, starting with $first and ending
1345 with $last, for the witness given in $sigil. If a $backup sigil is
1346 specified (e.g. when walking a layered witness), it will be used wherever
1347 no $sigil path exists.  If there is a base text reading, that will be
1348 used wherever no path exists for $sigil or $backup.
1349
1350 =cut
1351
1352 # TODO Think about returning some lazy-eval iterator.
1353 # TODO Get rid of backup; we should know from what witness is whether we need it.
1354
1355 sub reading_sequence {
1356     my( $self, $start, $end, $witness ) = @_;
1357
1358     $witness = $self->baselabel unless $witness;
1359     my @readings = ( $start );
1360     my %seen;
1361     my $n = $start;
1362     while( $n && $n->id ne $end->id ) {
1363         if( exists( $seen{$n->id} ) ) {
1364             throw( "Detected loop for $witness at " . $n->id );
1365         }
1366         $seen{$n->id} = 1;
1367
1368         my $next = $self->next_reading( $n, $witness );
1369         unless( $next ) {
1370             throw( "Did not find any path for $witness from reading " . $n->id );
1371         }
1372         push( @readings, $next );
1373         $n = $next;
1374     }
1375     # Check that the last reading is our end reading.
1376     my $last = $readings[$#readings];
1377     throw( "Last reading found from " . $start->text .
1378         " for witness $witness is not the end!" ) # TODO do we get this far?
1379         unless $last->id eq $end->id;
1380
1381     return @readings;
1382 }
1383
1384 =head2 next_reading( $reading, $sigil );
1385
1386 Returns the reading that follows the given reading along the given witness
1387 path.
1388
1389 =cut
1390
1391 sub next_reading {
1392     # Return the successor via the corresponding path.
1393     my $self = shift;
1394     my $answer = $self->_find_linked_reading( 'next', @_ );
1395         return undef unless $answer;
1396     return $self->reading( $answer );
1397 }
1398
1399 =head2 prior_reading( $reading, $sigil )
1400
1401 Returns the reading that precedes the given reading along the given witness
1402 path.
1403
1404 =cut
1405
1406 sub prior_reading {
1407     # Return the predecessor via the corresponding path.
1408     my $self = shift;
1409     my $answer = $self->_find_linked_reading( 'prior', @_ );
1410     return $self->reading( $answer );
1411 }
1412
1413 sub _find_linked_reading {
1414     my( $self, $direction, $node, $path ) = @_;
1415
1416     # Get a backup if we are dealing with a layered witness
1417     my $alt_path;
1418     my $aclabel = $self->ac_label;
1419     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1420         $alt_path = $1;
1421     }
1422
1423     my @linked_paths = $direction eq 'next'
1424         ? $self->sequence->edges_from( $node )
1425         : $self->sequence->edges_to( $node );
1426     return undef unless scalar( @linked_paths );
1427
1428     # We have to find the linked path that contains all of the
1429     # witnesses supplied in $path.
1430     my( @path_wits, @alt_path_wits );
1431     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1432     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1433     my $base_le;
1434     my $alt_le;
1435     foreach my $le ( @linked_paths ) {
1436         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1437             $base_le = $le;
1438         }
1439                 my @le_wits = sort $self->path_witnesses( $le );
1440                 if( _is_within( \@path_wits, \@le_wits ) ) {
1441                         # This is the right path.
1442                         return $direction eq 'next' ? $le->[1] : $le->[0];
1443                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1444                         $alt_le = $le;
1445                 }
1446     }
1447     # Got this far? Return the alternate path if it exists.
1448     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1449         if $alt_le;
1450
1451     # Got this far? Return the base path if it exists.
1452     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1453         if $base_le;
1454
1455     # Got this far? We have no appropriate path.
1456     warn "Could not find $direction node from " . $node->id
1457         . " along path $path";
1458     return undef;
1459 }
1460
1461 # Some set logic.
1462 sub _is_within {
1463     my( $set1, $set2 ) = @_;
1464     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1465     foreach my $el ( @$set1 ) {
1466         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1467     }
1468     return $ret;
1469 }
1470
1471 # Return the string that joins together a list of witnesses for
1472 # display on a single path.
1473 sub _witnesses_of_label {
1474     my( $self, $label ) = @_;
1475     my $regex = $self->wit_list_separator;
1476     my @answer = split( /\Q$regex\E/, $label );
1477     return @answer;
1478 }
1479
1480 =head2 common_readings
1481
1482 Returns the list of common readings in the graph (i.e. those readings that are
1483 shared by all non-lacunose witnesses.)
1484
1485 =cut
1486
1487 sub common_readings {
1488         my $self = shift;
1489         my @common = grep { $_->is_common } $self->readings;
1490         return @common;
1491 }
1492
1493 =head2 path_text( $sigil, [, $start, $end ] )
1494
1495 Returns the text of a witness (plus its backup, if we are using a layer)
1496 as stored in the collation.  The text is returned as a string, where the
1497 individual readings are joined with spaces and the meta-readings (e.g.
1498 lacunae) are omitted.  Optional specification of $start and $end allows
1499 the generation of a subset of the witness text.
1500
1501 =cut
1502
1503 sub path_text {
1504         my( $self, $wit, $start, $end ) = @_;
1505         $start = $self->start unless $start;
1506         $end = $self->end unless $end;
1507         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1508         my $pathtext = '';
1509         my $last;
1510         foreach my $r ( @path ) {
1511                 unless ( $r->join_prior || !$last || $last->join_next ) {
1512                         $pathtext .= ' ';
1513                 }
1514                 $pathtext .= $r->text;
1515                 $last = $r;
1516         }
1517         return $pathtext;
1518 }
1519
1520 =head1 INITIALIZATION METHODS
1521
1522 These are mostly for use by parsers.
1523
1524 =head2 make_witness_path( $witness )
1525
1526 Link the array of readings contained in $witness->path (and in
1527 $witness->uncorrected_path if it exists) into collation paths.
1528 Clear out the arrays when finished.
1529
1530 =head2 make_witness_paths
1531
1532 Call make_witness_path for all witnesses in the tradition.
1533
1534 =cut
1535
1536 # For use when a collation is constructed from a base text and an apparatus.
1537 # We have the sequences of readings and just need to add path edges.
1538 # When we are done, clear out the witness path attributes, as they are no
1539 # longer needed.
1540 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1541
1542 sub make_witness_paths {
1543     my( $self ) = @_;
1544     foreach my $wit ( $self->tradition->witnesses ) {
1545         # say STDERR "Making path for " . $wit->sigil;
1546         $self->make_witness_path( $wit );
1547     }
1548 }
1549
1550 sub make_witness_path {
1551     my( $self, $wit ) = @_;
1552     my @chain = @{$wit->path};
1553     my $sig = $wit->sigil;
1554     # Add start and end if necessary
1555     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1556     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1557     foreach my $idx ( 0 .. $#chain-1 ) {
1558         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1559     }
1560     if( $wit->is_layered ) {
1561         @chain = @{$wit->uncorrected_path};
1562                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1563                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1564         foreach my $idx( 0 .. $#chain-1 ) {
1565             my $source = $chain[$idx];
1566             my $target = $chain[$idx+1];
1567             $self->add_path( $source, $target, $sig.$self->ac_label )
1568                 unless $self->has_path( $source, $target, $sig );
1569         }
1570     }
1571     $wit->clear_path;
1572     $wit->clear_uncorrected_path;
1573 }
1574
1575 =head2 calculate_ranks
1576
1577 Calculate the reading ranks (that is, their aligned positions relative
1578 to each other) for the graph.  This can only be called on linear collations.
1579
1580 =begin testing
1581
1582 use Text::Tradition;
1583
1584 my $cxfile = 't/data/Collatex-16.xml';
1585 my $t = Text::Tradition->new(
1586     'name'  => 'inline',
1587     'input' => 'CollateX',
1588     'file'  => $cxfile,
1589     );
1590 my $c = $t->collation;
1591
1592 # Make an svg
1593 my $table = $c->alignment_table;
1594 ok( $c->has_cached_table, "Alignment table was cached" );
1595 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1596 $c->calculate_ranks;
1597 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1598 $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
1599 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
1600
1601 =end testing
1602
1603 =cut
1604
1605 sub calculate_ranks {
1606     my $self = shift;
1607     # Save the existing ranks, in case we need to invalidate the cached SVG.
1608     my %existing_ranks;
1609     map { $existing_ranks{$_} = $_->rank } $self->readings;
1610
1611     # Do the rankings based on the relationship equivalence graph, starting
1612     # with the start node.
1613     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1614
1615     # Transfer our rankings from the topological graph to the real one.
1616     foreach my $r ( $self->readings ) {
1617         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1618             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1619         } else {
1620                 # Die. Find the last rank we calculated.
1621                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1622                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1623                         $self->readings;
1624                 my $last = pop @all_defined;
1625             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1626         }
1627     }
1628     # Do we need to invalidate the cached data?
1629     if( $self->has_cached_table ) {
1630         foreach my $r ( $self->readings ) {
1631                 next if defined( $existing_ranks{$r} )
1632                         && $existing_ranks{$r} == $r->rank;
1633                 # Something has changed, so clear the cache
1634                 $self->_clear_cache;
1635                         # ...and recalculate the common readings.
1636                         $self->calculate_common_readings();
1637                 last;
1638         }
1639     }
1640         # The graph calculation information is now up to date.
1641         $self->_graphcalc_done(1);
1642 }
1643
1644 sub _clear_cache {
1645         my $self = shift;
1646         $self->wipe_table if $self->has_cached_table;
1647 }
1648
1649
1650 =head2 flatten_ranks
1651
1652 A convenience method for parsing collation data.  Searches the graph for readings
1653 with the same text at the same rank, and merges any that are found.
1654
1655 =cut
1656
1657 sub flatten_ranks {
1658     my $self = shift;
1659     my %unique_rank_rdg;
1660     my $changed;
1661     foreach my $rdg ( $self->readings ) {
1662         next unless $rdg->has_rank;
1663         my $key = $rdg->rank . "||" . $rdg->text;
1664         if( exists $unique_rank_rdg{$key} ) {
1665                 # Make sure they don't have different grammatical forms
1666                         my $ur = $unique_rank_rdg{$key};
1667                         if( $rdg->disambiguated && $ur->disambiguated ) {
1668                                 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1669                                 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1670                                 next unless $rform eq $uform;
1671                         } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1672                                 next;
1673                         }
1674             # Combine!
1675                 #say STDERR "Combining readings at same rank: $key";
1676                 $changed = 1;
1677             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1678             # TODO see if this now makes a common point.
1679         } else {
1680             $unique_rank_rdg{$key} = $rdg;
1681         }
1682     }
1683     # If we merged readings, the ranks are still fine but the alignment
1684     # table is wrong. Wipe it.
1685     $self->wipe_table() if $changed;
1686 }
1687
1688
1689 =head2 calculate_common_readings
1690
1691 Goes through the graph identifying the readings that appear in every witness
1692 (apart from those with lacunae at that spot.) Marks them as common and returns
1693 the list.
1694
1695 =begin testing
1696
1697 use Text::Tradition;
1698
1699 my $cxfile = 't/data/Collatex-16.xml';
1700 my $t = Text::Tradition->new(
1701     'name'  => 'inline',
1702     'input' => 'CollateX',
1703     'file'  => $cxfile,
1704     );
1705 my $c = $t->collation;
1706
1707 my @common = $c->calculate_common_readings();
1708 is( scalar @common, 8, "Found correct number of common readings" );
1709 my @marked = sort $c->common_readings();
1710 is( scalar @common, 8, "All common readings got marked as such" );
1711 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1712 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1713
1714 =end testing
1715
1716 =cut
1717
1718 sub calculate_common_readings {
1719         my $self = shift;
1720         my @common;
1721         map { $_->is_common( 0 ) } $self->readings;
1722         # Implicitly calls calculate_ranks
1723         my $table = $self->alignment_table;
1724         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1725                 my @row = map { $_->{'tokens'}->[$idx]
1726                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1727                                         @{$table->{'alignment'}};
1728                 my %hash;
1729                 foreach my $r ( @row ) {
1730                         if( $r ) {
1731                                 $hash{$r->id} = $r unless $r->is_meta;
1732                         } else {
1733                                 $hash{'UNDEF'} = $r;
1734                         }
1735                 }
1736                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1737                         my( $r ) = values %hash;
1738                         $r->is_common( 1 );
1739                         push( @common, $r );
1740                 }
1741         }
1742         return @common;
1743 }
1744
1745 =head2 text_from_paths
1746
1747 Calculate the text array for all witnesses from the path, for later consistency
1748 checking.  Only to be used if there is no non-graph-based way to know the
1749 original texts.
1750
1751 =cut
1752
1753 sub text_from_paths {
1754         my $self = shift;
1755     foreach my $wit ( $self->tradition->witnesses ) {
1756         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1757         my @text;
1758         foreach my $r ( @readings ) {
1759                 next if $r->is_meta;
1760                 push( @text, $r->text );
1761         }
1762         $wit->text( \@text );
1763         if( $wit->is_layered ) {
1764                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1765                                                                                                   $wit->sigil.$self->ac_label );
1766                         my @uctext;
1767                         foreach my $r ( @ucrdgs ) {
1768                                 next if $r->is_meta;
1769                                 push( @uctext, $r->text );
1770                         }
1771                         $wit->layertext( \@uctext );
1772         }
1773     }
1774 }
1775
1776 =head1 UTILITY FUNCTIONS
1777
1778 =head2 common_predecessor( $reading_a, $reading_b )
1779
1780 Find the last reading that occurs in sequence before both the given readings.
1781 At the very least this should be $self->start.
1782
1783 =head2 common_successor( $reading_a, $reading_b )
1784
1785 Find the first reading that occurs in sequence after both the given readings.
1786 At the very least this should be $self->end.
1787
1788 =begin testing
1789
1790 use Text::Tradition;
1791
1792 my $cxfile = 't/data/Collatex-16.xml';
1793 my $t = Text::Tradition->new(
1794     'name'  => 'inline',
1795     'input' => 'CollateX',
1796     'file'  => $cxfile,
1797     );
1798 my $c = $t->collation;
1799
1800 is( $c->common_predecessor( 'n24', 'n23' )->id,
1801     'n20', "Found correct common predecessor" );
1802 is( $c->common_successor( 'n24', 'n23' )->id,
1803     '__END__', "Found correct common successor" );
1804
1805 is( $c->common_predecessor( 'n19', 'n17' )->id,
1806     'n16', "Found correct common predecessor for readings on same path" );
1807 is( $c->common_successor( 'n21', 'n10' )->id,
1808     '__END__', "Found correct common successor for readings on same path" );
1809
1810 =end testing
1811
1812 =cut
1813
1814 ## Return the closest reading that is a predecessor of both the given readings.
1815 sub common_predecessor {
1816         my $self = shift;
1817         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1818         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1819 }
1820
1821 sub common_successor {
1822         my $self = shift;
1823         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1824         return $self->_common_in_path( $r1, $r2, 'successors' );
1825 }
1826
1827
1828 # TODO think about how to do this without ranks...
1829 sub _common_in_path {
1830         my( $self, $r1, $r2, $dir ) = @_;
1831         my $iter = $self->end->rank;
1832         my @candidates;
1833         my @last_r1 = ( $r1 );
1834         my @last_r2 = ( $r2 );
1835         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1836         my %all_seen;
1837         # say STDERR "Finding common $dir for $r1, $r2";
1838         while( !@candidates ) {
1839                 last unless $iter--;  # Avoid looping infinitely
1840                 # Iterate separately down the graph from r1 and r2
1841                 my( @new_lc1, @new_lc2 );
1842                 foreach my $lc ( @last_r1 ) {
1843                         foreach my $p ( $lc->$dir ) {
1844                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1845                                         # say STDERR "Path candidate $p from $lc";
1846                                         push( @candidates, $p );
1847                                 } elsif( !$all_seen{$p->id} ) {
1848                                         $all_seen{$p->id} = 'r1';
1849                                         push( @new_lc1, $p );
1850                                 }
1851                         }
1852                 }
1853                 foreach my $lc ( @last_r2 ) {
1854                         foreach my $p ( $lc->$dir ) {
1855                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1856                                         # say STDERR "Path candidate $p from $lc";
1857                                         push( @candidates, $p );
1858                                 } elsif( !$all_seen{$p->id} ) {
1859                                         $all_seen{$p->id} = 'r2';
1860                                         push( @new_lc2, $p );
1861                                 }
1862                         }
1863                 }
1864                 @last_r1 = @new_lc1;
1865                 @last_r2 = @new_lc2;
1866         }
1867         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1868         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1869 }
1870
1871 sub throw {
1872         Text::Tradition::Error->throw(
1873                 'ident' => 'Collation error',
1874                 'message' => $_[0],
1875                 );
1876 }
1877
1878 no Moose;
1879 __PACKAGE__->meta->make_immutable;
1880
1881 =head1 LICENSE
1882
1883 This package is free software and is provided "as is" without express
1884 or implied warranty.  You can redistribute it and/or modify it under
1885 the same terms as Perl itself.
1886
1887 =head1 AUTHOR
1888
1889 Tara L Andrews E<lt>aurum@cpan.orgE<gt>