lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use Text::CSV;
  10 use Text::Tradition::Collation::Reading;
  11 use Text::Tradition::Collation::RelationshipStore;
  12 use Text::Tradition::Error;
  13 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  14 use Moose;
  15
  16 has 'sequence' => (
  17     is => 'ro',
  18     isa => 'Graph',
  19     default => sub { Graph->new() },
  20     handles => {
  21         paths => 'edges',
  22     },
  23     );
  24
  25 has 'relations' => (
  26         is => 'ro',
  27         isa => 'Text::Tradition::Collation::RelationshipStore',
  28         handles => {
  29                 relationships => 'relationships',
  30                 related_readings => 'related_readings',
  31                 get_relationship => 'get_relationship',
  32                 del_relationship => 'del_relationship',
  33                 equivalence => 'equivalence',
  34                 equivalence_graph => 'equivalence_graph',
  35         },
  36         writer => '_set_relations',
  37         );
  38
  39 has 'tradition' => (
  40     is => 'ro',
  41     isa => 'Text::Tradition',
  42     writer => '_set_tradition',
  43     weak_ref => 1,
  44     );
  45
  46 has 'readings' => (
  47         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  48         traits => ['Hash'],
  49     handles => {
  50         reading     => 'get',
  51         _add_reading => 'set',
  52         del_reading => 'delete',
  53         has_reading => 'exists',
  54 #        reading_keys => 'keys',
  55         readings   => 'values',
  56     },
  57     default => sub { {} },
  58         );
  59
  60 has 'wit_list_separator' => (
  61     is => 'rw',
  62     isa => 'Str',
  63     default => ', ',
  64     );
  65
  66 has 'baselabel' => (
  67     is => 'rw',
  68     isa => 'Str',
  69     default => 'base text',
  70     );
  71
  72 has 'linear' => (
  73     is => 'rw',
  74     isa => 'Bool',
  75     default => 1,
  76     );
  77
  78 has 'ac_label' => (
  79     is => 'rw',
  80     isa => 'Str',
  81     default => ' (a.c.)',
  82     );
  83
  84 has 'wordsep' => (
  85         is => 'rw',
  86         isa => 'Str',
  87         default => ' ',
  88         );
  89
  90 has 'start' => (
  91         is => 'ro',
  92         isa => 'Text::Tradition::Collation::Reading',
  93         writer => '_set_start',
  94         weak_ref => 1,
  95         );
  96
  97 has 'end' => (
  98         is => 'ro',
  99         isa => 'Text::Tradition::Collation::Reading',
 100         writer => '_set_end',
 101         weak_ref => 1,
 102         );
 103
 104 has 'cached_table' => (
 105         is => 'rw',
 106         isa => 'HashRef',
 107         predicate => 'has_cached_table',
 108         clearer => 'wipe_table',
 109         );
 110
 111 has '_graphcalc_done' => (
 112         is => 'rw',
 113         isa => 'Bool',
 114         default => undef,
 115         );
 116
 117 =head1 NAME
 118
 119 Text::Tradition::Collation - a software model for a text collation
 120
 121 =head1 SYNOPSIS
 122
 123   use Text::Tradition;
 124   my $t = Text::Tradition->new(
 125     'name' => 'this is a text',
 126     'input' => 'TEI',
 127     'file' => '/path/to/tei_parallel_seg_file.xml' );
 128
 129   my $c = $t->collation;
 130   my @readings = $c->readings;
 131   my @paths = $c->paths;
 132   my @relationships = $c->relationships;
 133
 134   my $svg_variant_graph = $t->collation->as_svg();
 135
 136 =head1 DESCRIPTION
 137
 138 Text::Tradition is a library for representation and analysis of collated
 139 texts, particularly medieval ones.  The Collation is the central feature of
 140 a Tradition, where the text, its sequence of readings, and its relationships
 141 between readings are actually kept.
 142
 143 =head1 CONSTRUCTOR
 144
 145 =head2 new
 146
 147 The constructor.  Takes a hash or hashref of the following arguments:
 148
 149 =over
 150
 151 =item * tradition - The Text::Tradition object to which the collation
 152 belongs. Required.
 153
 154 =item * linear - Whether the collation should be linear; that is, whether
 155 transposed readings should be treated as two linked readings rather than one,
 156 and therefore whether the collation graph is acyclic.  Defaults to true.
 157
 158 =item * baselabel - The default label for the path taken by a base text
 159 (if any). Defaults to 'base text'.
 160
 161 =item * wit_list_separator - The string to join a list of witnesses for
 162 purposes of making labels in display graphs.  Defaults to ', '.
 163
 164 =item * ac_label - The extra label to tack onto a witness sigil when
 165 representing another layer of path for the given witness - that is, when
 166 a text has more than one possible reading due to scribal corrections or
 167 the like.  Defaults to ' (a.c.)'.
 168
 169 =item * wordsep - The string used to separate words in the original text.
 170 Defaults to ' '.
 171
 172 =back
 173
 174 =head1 ACCESSORS
 175
 176 =head2 tradition
 177
 178 =head2 linear
 179
 180 =head2 wit_list_separator
 181
 182 =head2 baselabel
 183
 184 =head2 ac_label
 185
 186 =head2 wordsep
 187
 188 Simple accessors for collation attributes.
 189
 190 =head2 start
 191
 192 The meta-reading at the start of every witness path.
 193
 194 =head2 end
 195
 196 The meta-reading at the end of every witness path.
 197
 198 =head2 readings
 199
 200 Returns all Reading objects in the graph.
 201
 202 =head2 reading( $id )
 203
 204 Returns the Reading object corresponding to the given ID.
 205
 206 =head2 add_reading( $reading_args )
 207
 208 Adds a new reading object to the collation.
 209 See L<Text::Tradition::Collation::Reading> for the available arguments.
 210
 211 =head2 del_reading( $object_or_id )
 212
 213 Removes the given reading from the collation, implicitly removing its
 214 paths and relationships.
 215
 216 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 217
 218 Merges the $second reading into the $main one. If $concatenate is true, then
 219 the merged node will carry the text of both readings, concatenated with either
 220 $with_str (if specified) or a sensible default (the empty string if the
 221 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 222
 223 The first two arguments may be either readings or reading IDs.
 224
 225 =head2 has_reading( $id )
 226
 227 Predicate to see whether a given reading ID is in the graph.
 228
 229 =head2 reading_witnesses( $object_or_id )
 230
 231 Returns a list of sigils whose witnesses contain the reading.
 232
 233 =head2 paths
 234
 235 Returns all reading paths within the document - that is, all edges in the
 236 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 237
 238 =head2 add_path( $source, $target, $sigil )
 239
 240 Links the given readings in the collation in sequence, under the given witness
 241 sigil.  The readings may be specified by object or ID.
 242
 243 =head2 del_path( $source, $target, $sigil )
 244
 245 Links the given readings in the collation in sequence, under the given witness
 246 sigil.  The readings may be specified by object or ID.
 247
 248 =head2 has_path( $source, $target );
 249
 250 Returns true if the two readings are linked in sequence in any witness.
 251 The readings may be specified by object or ID.
 252
 253 =head2 relationships
 254
 255 Returns all Relationship objects in the collation.
 256
 257 =head2 add_relationship( $reading, $other_reading, $options )
 258
 259 Adds a new relationship of the type given in $options between the two readings,
 260 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 261 where $status is true on success, and @vectors is a list of relationship edges
 262 that were ultimately added.
 263 See L<Text::Tradition::Collation::Relationship> for the available options.
 264
 265 =cut
 266
 267 sub BUILD {
 268     my $self = shift;
 269     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 270     $self->_set_start( $self->add_reading(
 271         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 272     $self->_set_end( $self->add_reading(
 273         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 274 }
 275
 276 ### Reading construct/destruct functions
 277
 278 sub add_reading {
 279         my( $self, $reading ) = @_;
 280         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 281                 my %args = %$reading;
 282                 if( $args{'init'} ) {
 283                         # If we are initializing an empty collation, don't assume that we
 284                         # have set a tradition.
 285                         delete $args{'init'};
 286                 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
 287                         $args{'language'} = $self->tradition->language;
 288                 }
 289                 $reading = Text::Tradition::Collation::Reading->new(
 290                         'collation' => $self,
 291                         %args );
 292         }
 293         # First check to see if a reading with this ID exists.
 294         if( $self->reading( $reading->id ) ) {
 295                 throw( "Collation already has a reading with id " . $reading->id );
 296         }
 297         $self->_graphcalc_done(0);
 298         $self->_add_reading( $reading->id => $reading );
 299         # Once the reading has been added, put it in both graphs.
 300         $self->sequence->add_vertex( $reading->id );
 301         $self->relations->add_reading( $reading->id );
 302         return $reading;
 303 };
 304
 305 around del_reading => sub {
 306         my $orig = shift;
 307         my $self = shift;
 308         my $arg = shift;
 309
 310         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 311                 $arg = $arg->id;
 312         }
 313         # Remove the reading from the graphs.
 314         $self->_graphcalc_done(0);
 315         $self->_clear_cache; # Explicitly clear caches to GC the reading
 316         $self->sequence->delete_vertex( $arg );
 317         $self->relations->delete_reading( $arg );
 318
 319         # Carry on.
 320         $self->$orig( $arg );
 321 };
 322
 323 =begin testing
 324
 325 use Text::Tradition;
 326
 327 my $cxfile = 't/data/Collatex-16.xml';
 328 my $t = Text::Tradition->new(
 329     'name'  => 'inline',
 330     'input' => 'CollateX',
 331     'file'  => $cxfile,
 332     );
 333 my $c = $t->collation;
 334
 335 my $rno = scalar $c->readings;
 336 # Split n21 for testing purposes
 337 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 338 my $old_r = $c->reading( 'n21' );
 339 $old_r->alter_text( 'to' );
 340 $c->del_path( 'n20', 'n21', 'A' );
 341 $c->add_path( 'n20', 'n21p0', 'A' );
 342 $c->add_path( 'n21p0', 'n21', 'A' );
 343 $c->flatten_ranks();
 344 ok( $c->reading( 'n21p0' ), "New reading exists" );
 345 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 346
 347 # Combine n3 and n4 ( with his )
 348 $c->merge_readings( 'n3', 'n4', 1 );
 349 ok( !$c->reading('n4'), "Reading n4 is gone" );
 350 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 351
 352 # Collapse n9 and n10 ( rood / root )
 353 $c->merge_readings( 'n9', 'n10' );
 354 ok( !$c->reading('n10'), "Reading n10 is gone" );
 355 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 356
 357 # Combine n21 and n21p0
 358 my $remaining = $c->reading('n21');
 359 $remaining ||= $c->reading('n22');  # one of these should still exist
 360 $c->merge_readings( 'n21p0', $remaining, 1 );
 361 ok( !$c->reading('n21'), "Reading $remaining is gone" );
 362 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
 363
 364 =end testing
 365
 366 =cut
 367
 368 sub merge_readings {
 369         my $self = shift;
 370
 371         # Sanity check
 372         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 373         my $mergemeta = $kept_obj->is_meta;
 374         throw( "Cannot merge meta and non-meta reading" )
 375                 unless ( $mergemeta && $del_obj->is_meta )
 376                         || ( !$mergemeta && !$del_obj->is_meta );
 377         if( $mergemeta ) {
 378                 throw( "Cannot merge with start or end node" )
 379                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 380                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 381         }
 382         # We only need the IDs for adding paths to the graph, not the reading
 383         # objects themselves.
 384         my $kept = $kept_obj->id;
 385         my $deleted = $del_obj->id;
 386         $self->_graphcalc_done(0);
 387
 388     # The kept reading should inherit the paths and the relationships
 389     # of the deleted reading.
 390         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 391                 my @vector = ( $kept );
 392                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 393                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 394                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 395                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 396                 $self->sequence->add_edge( @vector );
 397                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 398                 @wits{keys %$fwits} = values %$fwits;
 399                 $self->sequence->set_edge_attributes( @vector, \%wits );
 400         }
 401         $self->relations->merge_readings( $kept, $deleted, $combine );
 402
 403         # Do the deletion deed.
 404         if( $combine ) {
 405                 # Combine the text of the readings
 406                 my $joinstr = $combine_char;
 407                 unless( defined $joinstr ) {
 408                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 409                         $joinstr = $self->wordsep unless defined $joinstr;
 410                 }
 411                 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
 412                 # Change this reading to a joining one if necessary
 413                 $kept_obj->_set_join_next( $del_obj->join_next );
 414                 $kept_obj->normal_form(
 415                         join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
 416                 # Combine the lexemes present in the readings
 417                 if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
 418                         $kept_obj->add_lexeme( $del_obj->lexemes );
 419                 }
 420         }
 421         $self->del_reading( $deleted );
 422 }
 423
 424 =head2 compress_readings
 425
 426 Where possible in the graph, compresses plain sequences of readings into a
 427 single reading. The sequences must consist of readings with no
 428 relationships to other readings, with only a single witness path between
 429 them and no other witness paths from either that would skip the other. The
 430 readings must also not be marked as nonsense or bad grammar.
 431
 432 WARNING: This operation cannot be undone.
 433
 434 =cut
 435
 436 sub compress_readings {
 437         my $self = shift;
 438         # Anywhere in the graph that there is a reading that joins only to a single
 439         # successor, and neither of these have any relationships, just join the two
 440         # readings.
 441         my %gobbled;
 442         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 443                 # While we are here, get rid of any extra wordforms from a disambiguated
 444                 # reading.
 445                 if( $rdg->disambiguated ) {
 446                         foreach my $lex ( $rdg->lexemes ) {
 447                                 $lex->clear_matching_forms();
 448                                 $lex->add_matching_form( $lex->form );
 449                         }
 450                 }
 451                 # Now look for readings that can be joined to their successors.
 452                 next if $rdg->is_meta;
 453                 next if $gobbled{$rdg->id};
 454                 next if $rdg->grammar_invalid || $rdg->is_nonsense;
 455                 next if $rdg->related_readings();
 456                 my %seen;
 457                 while( $self->sequence->successors( $rdg ) == 1 ) {
 458                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 459                         throw( "Infinite loop" ) if $seen{$next->id};
 460                         $seen{$next->id} = 1;
 461                         last if $self->sequence->predecessors( $next ) > 1;
 462                         last if $next->is_meta;
 463                         last if $next->grammar_invalid || $next->is_nonsense;
 464                         last if $next->related_readings();
 465                         say "Joining readings $rdg and $next";
 466                         $self->merge_readings( $rdg, $next, 1 );
 467                 }
 468         }
 469         # Make sure we haven't screwed anything up
 470         foreach my $wit ( $self->tradition->witnesses ) {
 471                 my $pathtext = $self->path_text( $wit->sigil );
 472                 my $origtext = join( ' ', @{$wit->text} );
 473                 throw( "Text differs for witness " . $wit->sigil )
 474                         unless $pathtext eq $origtext;
 475                 if( $wit->is_layered ) {
 476                         $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
 477                         $origtext = join( ' ', @{$wit->layertext} );
 478                         throw( "Ante-corr text differs for witness " . $wit->sigil )
 479                                 unless $pathtext eq $origtext;
 480                 }
 481         }
 482
 483         $self->relations->rebuild_equivalence();
 484         $self->calculate_ranks();
 485 }
 486
 487 # Helper function for manipulating the graph.
 488 sub _stringify_args {
 489         my( $self, $first, $second, @args ) = @_;
 490     $first = $first->id
 491         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 492     $second = $second->id
 493         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 494     return( $first, $second, @args );
 495 }
 496
 497 # Helper function for manipulating the graph.
 498 sub _objectify_args {
 499         my( $self, $first, $second, $arg ) = @_;
 500     $first = $self->reading( $first )
 501         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 502     $second = $self->reading( $second )
 503         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 504     return( $first, $second, $arg );
 505 }
 506 ### Path logic
 507
 508 sub add_path {
 509         my $self = shift;
 510
 511         # We only need the IDs for adding paths to the graph, not the reading
 512         # objects themselves.
 513     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 514
 515         $self->_graphcalc_done(0);
 516         # Connect the readings
 517         unless( $self->sequence->has_edge( $source, $target ) ) {
 518             $self->sequence->add_edge( $source, $target );
 519             $self->relations->add_equivalence_edge( $source, $target );
 520         }
 521     # Note the witness in question
 522     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 523 }
 524
 525 sub del_path {
 526         my $self = shift;
 527         my @args;
 528         if( ref( $_[0] ) eq 'ARRAY' ) {
 529                 my $e = shift @_;
 530                 @args = ( @$e, @_ );
 531         } else {
 532                 @args = @_;
 533         }
 534
 535         # We only need the IDs for adding paths to the graph, not the reading
 536         # objects themselves.
 537     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 538
 539         $self->_graphcalc_done(0);
 540         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 541                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 542         }
 543         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 544                 $self->sequence->delete_edge( $source, $target );
 545                 $self->relations->delete_equivalence_edge( $source, $target );
 546         }
 547 }
 548
 549
 550 # Extra graph-alike utility
 551 sub has_path {
 552         my $self = shift;
 553     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 554         return undef unless $self->sequence->has_edge( $source, $target );
 555         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 556 }
 557
 558 =head2 clear_witness( @sigil_list )
 559
 560 Clear the given witnesses out of the collation entirely, removing references
 561 to them in paths, and removing readings that belong only to them.  Should only
 562 be called via $tradition->del_witness.
 563
 564 =cut
 565
 566 sub clear_witness {
 567         my( $self, @sigils ) = @_;
 568
 569         $self->_graphcalc_done(0);
 570         # Clear the witness(es) out of the paths
 571         foreach my $e ( $self->paths ) {
 572                 foreach my $sig ( @sigils ) {
 573                         $self->del_path( $e, $sig );
 574                 }
 575         }
 576
 577         # Clear out the newly unused readings
 578         foreach my $r ( $self->readings ) {
 579                 unless( $self->reading_witnesses( $r ) ) {
 580                         $self->del_reading( $r );
 581                 }
 582         }
 583 }
 584
 585 sub add_relationship {
 586         my $self = shift;
 587     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 588     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 589         $self->_graphcalc_done(0);
 590     return @vectors;
 591 }
 592
 593 around qw/ get_relationship del_relationship / => sub {
 594         my $orig = shift;
 595         my $self = shift;
 596         my @args = @_;
 597         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 598                 @args = @{$_[0]};
 599         }
 600         my( $source, $target ) = $self->_stringify_args( @args );
 601         $self->$orig( $source, $target );
 602 };
 603
 604 =head2 reading_witnesses( $reading )
 605
 606 Return a list of sigils corresponding to the witnesses in which the reading appears.
 607
 608 =cut
 609
 610 sub reading_witnesses {
 611         my( $self, $reading ) = @_;
 612         # We need only check either the incoming or the outgoing edges; I have
 613         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 614         if( $reading eq $self->start ) {
 615                 return map { $_->sigil } $self->tradition->witnesses;
 616         }
 617         my %all_witnesses;
 618         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 619                 my $wits = $self->sequence->get_edge_attributes( @$e );
 620                 @all_witnesses{ keys %$wits } = 1;
 621         }
 622         my $acstr = $self->ac_label;
 623         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 624                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 625         }
 626         return keys %all_witnesses;
 627 }
 628
 629 =head1 OUTPUT METHODS
 630
 631 =head2 as_svg( \%options )
 632
 633 Returns an SVG string that represents the graph, via as_dot and graphviz.
 634 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 635
 636 =cut
 637
 638 sub as_svg {
 639     my( $self, $opts ) = @_;
 640     throw( "Need GraphViz installed to output SVG" )
 641         unless File::Which::which( 'dot' );
 642     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 643     $self->calculate_ranks()
 644         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 645         my @cmd = qw/dot -Tsvg/;
 646         my( $svg, $err );
 647         my $dotfile = File::Temp->new();
 648         ## USE FOR DEBUGGING
 649         # $dotfile->unlink_on_destroy(0);
 650         binmode $dotfile, ':utf8';
 651         print $dotfile $self->as_dot( $opts );
 652         push( @cmd, $dotfile->filename );
 653         run( \@cmd, ">", binary(), \$svg );
 654         $svg = decode_utf8( $svg );
 655         return $svg;
 656 }
 657
 658
 659 =head2 as_dot( \%options )
 660
 661 Returns a string that is the collation graph expressed in dot
 662 (i.e. GraphViz) format.  Options include:
 663
 664 =over 4
 665
 666 =item * from
 667
 668 =item * to
 669
 670 =item * color_common
 671
 672 =back
 673
 674 =cut
 675
 676 sub as_dot {
 677     my( $self, $opts ) = @_;
 678     my $startrank = $opts->{'from'} if $opts;
 679     my $endrank = $opts->{'to'} if $opts;
 680     my $color_common = $opts->{'color_common'} if $opts;
 681     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 682        && $self->end->rank > 100;
 683     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 684
 685     # Check the arguments
 686     if( $startrank ) {
 687         return if $endrank && $startrank > $endrank;
 688         return if $startrank > $self->end->rank;
 689         }
 690         if( defined $endrank ) {
 691                 return if $endrank < 0;
 692                 $endrank = undef if $endrank == $self->end->rank;
 693         }
 694
 695     my $graph_name = $self->tradition->name;
 696     $graph_name =~ s/[^\w\s]//g;
 697     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 698
 699     my %graph_attrs = (
 700         'rankdir' => 'LR',
 701         'bgcolor' => 'none',
 702         );
 703     my %node_attrs = (
 704         'fontsize' => 14,
 705         'fillcolor' => 'white',
 706         'style' => 'filled',
 707         'shape' => 'ellipse'
 708         );
 709     my %edge_attrs = (
 710         'arrowhead' => 'open',
 711         'color' => '#000000',
 712         'fontcolor' => '#000000',
 713         );
 714
 715     my $dot = sprintf( "digraph %s {\n", $graph_name );
 716     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 717     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 718
 719         # Output substitute start/end readings if necessary
 720         if( $startrank ) {
 721                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
 722         }
 723         if( $endrank ) {
 724                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
 725         }
 726         if( $STRAIGHTENHACK ) {
 727                 ## HACK part 1
 728                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
 729                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
 730                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 731         }
 732         my %used;  # Keep track of the readings that actually appear in the graph
 733         # Sort the readings by rank if we have ranks; this speeds layout.
 734         my @all_readings = $self->end->has_rank
 735                 ? sort { $a->rank <=> $b->rank } $self->readings
 736                 : $self->readings;
 737         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 738     foreach my $reading ( @all_readings ) {
 739         # Only output readings within our rank range.
 740         next if $startrank && $reading->rank < $startrank;
 741         next if $endrank && $reading->rank > $endrank;
 742         $used{$reading->id} = 1;
 743         # Need not output nodes without separate labels
 744         next if $reading->id eq $reading->text;
 745         my $rattrs;
 746         my $label = $reading->text;
 747         $label .= '-' if $reading->join_next;
 748         $label = "-$label" if $reading->join_prior;
 749         $label =~ s/\"/\\\"/g;
 750                 $rattrs->{'label'} = $label;
 751                 $rattrs->{'id'} = $reading->id;
 752                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 753         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 754     }
 755
 756         # Add the real edges. Need to weight one edge per rank jump, in a
 757         # continuous line.
 758         # my $weighted = $self->_add_edge_weights;
 759     my @edges = $self->paths;
 760         my( %substart, %subend );
 761     foreach my $edge ( @edges ) {
 762         # Do we need to output this edge?
 763         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 764                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 765                         my $variables = { %edge_attrs, 'label' => $label };
 766
 767                         # Account for the rank gap if necessary
 768                         my $rank0 = $self->reading( $edge->[0] )->rank
 769                                 if $self->reading( $edge->[0] )->has_rank;
 770                         my $rank1 = $self->reading( $edge->[1] )->rank
 771                                 if $self->reading( $edge->[1] )->has_rank;
 772                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 773                                 $variables->{'minlen'} = $rank1 - $rank0;
 774                         }
 775
 776                         # Add the calculated edge weights
 777                         # if( exists $weighted->{$edge->[0]}
 778                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 779                         #       # $variables->{'color'} = 'red';
 780                         #       $variables->{'weight'} = 3.0;
 781                         # }
 782
 783                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 784                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 785                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 786
 787                         my $varopts = _dot_attr_string( $variables );
 788                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 789                                 $edge->[0], $edge->[1], $varopts );
 790         } elsif( $used{$edge->[0]} ) {
 791                 $subend{$edge->[0]} = $edge->[1];
 792         } elsif( $used{$edge->[1]} ) {
 793                 $substart{$edge->[1]} = $edge->[0];
 794         }
 795     }
 796     # Add substitute start and end edges if necessary
 797     foreach my $node ( keys %substart ) {
 798         my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
 799         my $variables = { %edge_attrs, 'label' => $witstr };
 800         my $nrdg = $self->reading( $node );
 801         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
 802                 # Substart is actually one lower than $startrank
 803                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
 804         }
 805         my $varopts = _dot_attr_string( $variables );
 806         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
 807         }
 808     foreach my $node ( keys %subend ) {
 809         my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
 810         my $variables = { %edge_attrs, 'label' => $witstr };
 811         my $varopts = _dot_attr_string( $variables );
 812         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
 813         }
 814         # HACK part 2
 815         if( $STRAIGHTENHACK ) {
 816                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
 817                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 818         }
 819
 820     $dot .= "}\n";
 821     return $dot;
 822 }
 823
 824 sub _dot_attr_string {
 825         my( $hash ) = @_;
 826         my @attrs;
 827         foreach my $k ( sort keys %$hash ) {
 828                 my $v = $hash->{$k};
 829                 push( @attrs, $k.'="'.$v.'"' );
 830         }
 831         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 832 }
 833
 834 sub _add_edge_weights {
 835         my $self = shift;
 836         # Walk the graph from START to END, choosing the successor node with
 837         # the largest number of witness paths each time.
 838         my $weighted = {};
 839         my $curr = $self->start->id;
 840         my $ranked = $self->end->has_rank;
 841         while( $curr ne $self->end->id ) {
 842                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 843                 my @succ = sort { $self->path_witnesses( $curr, $a )
 844                                                         <=> $self->path_witnesses( $curr, $b ) }
 845                         $self->sequence->successors( $curr );
 846                 my $next = pop @succ;
 847                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 848                 # Try to avoid lacunae in the weighted path.
 849                 while( @succ &&
 850                            ( $self->reading( $next )->is_lacuna ||
 851                                  $nextrank - $rank > 1 ) ){
 852                         $next = pop @succ;
 853                 }
 854                 $weighted->{$curr} = $next;
 855                 $curr = $next;
 856         }
 857         return $weighted;
 858 }
 859
 860 =head2 path_witnesses( $edge )
 861
 862 Returns the list of sigils whose witnesses are associated with the given edge.
 863 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 864
 865 =cut
 866
 867 sub path_witnesses {
 868         my( $self, @edge ) = @_;
 869         # If edge is an arrayref, cope.
 870         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 871                 my $e = shift @edge;
 872                 @edge = @$e;
 873         }
 874         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 875         return @wits;
 876 }
 877
 878 # Helper function. Make a display label for the given witnesses, showing a.c.
 879 # witnesses only where the main witness is not also in the list.
 880 sub _path_display_label {
 881         my $self = shift;
 882         my %wits;
 883         map { $wits{$_} = 1 } @_;
 884
 885         # If an a.c. wit is listed, remove it if the main wit is also listed.
 886         # Otherwise keep it for explicit listing.
 887         my $aclabel = $self->ac_label;
 888         my @disp_ac;
 889         foreach my $w ( sort keys %wits ) {
 890                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
 891                         if( exists $wits{$1} ) {
 892                                 delete $wits{$w};
 893                         } else {
 894                                 push( @disp_ac, $w );
 895                         }
 896                 }
 897         }
 898
 899         # See if we are in a majority situation.
 900         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 901         $maj = $maj > 5 ? $maj : 5;
 902         if( scalar keys %wits > $maj ) {
 903                 unshift( @disp_ac, 'majority' );
 904                 return join( ', ', @disp_ac );
 905         } else {
 906                 return join( ', ', sort keys %wits );
 907         }
 908 }
 909
 910 =head2 readings_at_rank( $rank )
 911
 912 Returns a list of readings at a given rank, taken from the alignment table.
 913
 914 =cut
 915
 916 sub readings_at_rank {
 917         my( $self, $rank ) = @_;
 918         my $table = $self->alignment_table;
 919         # Table rank is real rank - 1.
 920         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
 921         my %readings;
 922         foreach my $e ( @elements ) {
 923                 next unless ref( $e ) eq 'HASH';
 924                 next unless exists $e->{'t'};
 925                 $readings{$e->{'t'}->id} = $e->{'t'};
 926         }
 927         return values %readings;
 928 }
 929
 930 =head2 as_graphml
 931
 932 Returns a GraphML representation of the collation.  The GraphML will contain
 933 two graphs. The first expresses the attributes of the readings and the witness
 934 paths that link them; the second expresses the relationships that link the
 935 readings.  This is the native transfer format for a tradition.
 936
 937 =begin testing
 938
 939 use Text::Tradition;
 940
 941 my $READINGS = 311;
 942 my $PATHS = 361;
 943
 944 my $datafile = 't/data/florilegium_tei_ps.xml';
 945 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 946                                       'name' => 'test0',
 947                                       'file' => $datafile,
 948                                       'linear' => 1 );
 949
 950 ok( $tradition, "Got a tradition object" );
 951 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 952 ok( $tradition->collation, "Tradition has a collation" );
 953
 954 my $c = $tradition->collation;
 955 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 956 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 957 is( scalar $c->relationships, 0, "Collation has all relationships" );
 958
 959 # Add a few relationships
 960 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 961 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 962 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 963
 964 # Now write it to GraphML and parse it again.
 965
 966 my $graphml = $c->as_graphml;
 967 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 968 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 969 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 970 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 971
 972 # Now add a stemma, write to GraphML, and look at the output.
 973 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
 974 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
 975 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
 976 $graphml = $c->as_graphml;
 977 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
 978
 979 # Now add a user, write to GraphML, and look at the output.
 980 unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" );
 981 my $testuser = Text::Tradition::User->new(
 982         id => 'testuser', password => 'testpass' );
 983 is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" );
 984 $testuser->add_tradition( $tradition );
 985 is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" );
 986 $graphml = $c->as_graphml;
 987 like( $graphml, qr/testuser/, "Test user name now exists in GraphML" );
 988
 989 =end testing
 990
 991 =cut
 992
 993 sub as_graphml {
 994     my( $self, $options ) = @_;
 995         $self->calculate_ranks unless $self->_graphcalc_done;
 996
 997         my $start = $options->{'from'}
 998                 ? $self->reading( $options->{'from'} ) : $self->start;
 999         my $end = $options->{'to'}
1000                 ? $self->reading( $options->{'to'} ) : $self->end;
1001         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1002                 throw( 'Start node must be before end node' );
1003         }
1004         # The readings need to be ranked for this to work.
1005         $start = $self->start unless $start->has_rank;
1006         $end = $self->end unless $end->has_rank;
1007         my $rankoffset = 0;
1008         unless( $start eq $self->start ) {
1009                 $rankoffset = $start->rank - 1;
1010         }
1011         my %use_readings;
1012
1013     # Some namespaces
1014     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1015     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1016     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1017         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1018
1019     # Create the document and root node
1020     require XML::LibXML;
1021     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1022     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1023     $graphml->setDocumentElement( $root );
1024     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1025     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1026
1027     # List of attribute types to save on our objects and their corresponding
1028     # GraphML types
1029     my %save_types = (
1030         'Str' => 'string',
1031         'Int' => 'int',
1032         'Bool' => 'boolean',
1033         'ReadingID' => 'string',
1034         'RelationshipType' => 'string',
1035         'RelationshipScope' => 'string',
1036     );
1037
1038     # Add the data keys for the graph. Include an extra key 'version' for the
1039     # GraphML output version.
1040     my %graph_data_keys;
1041     my $gdi = 0;
1042     my %graph_attributes = ( 'version' => 'string' );
1043         # Graph attributes include those of Tradition and those of Collation.
1044         my %gattr_from;
1045         my $tmeta = $self->tradition->meta;
1046         my $cmeta = $self->meta;
1047         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1048         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1049         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1050                 next if $attr->name =~ /^_/;
1051                 next unless $save_types{$attr->type_constraint->name};
1052                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1053         }
1054     # Extra custom keys for complex objects that should be saved in some form.
1055     # The subroutine should return a string, or undef/empty.
1056     $graph_attributes{'stemmata'} = sub {
1057         my @stemstrs;
1058                 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1059                         $self->tradition->stemmata;
1060                 join( "\n", @stemstrs );
1061         };
1062     $graph_attributes{'user'} = sub {
1063         $self->tradition->user ? $self->tradition->user->id : undef
1064     };
1065
1066     foreach my $datum ( sort keys %graph_attributes ) {
1067         $graph_data_keys{$datum} = 'dg'.$gdi++;
1068         my $key = $root->addNewChild( $graphml_ns, 'key' );
1069         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1070                 : $graph_attributes{$datum};
1071         $key->setAttribute( 'attr.name', $datum );
1072         $key->setAttribute( 'attr.type', $dtype );
1073         $key->setAttribute( 'for', 'graph' );
1074         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1075     }
1076
1077     # Add the data keys for reading nodes
1078     my %reading_attributes;
1079     my $rmeta = Text::Tradition::Collation::Reading->meta;
1080     foreach my $attr( $rmeta->get_all_attributes ) {
1081                 next if $attr->name =~ /^_/;
1082                 next unless $save_types{$attr->type_constraint->name};
1083                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1084         }
1085         # Extra custom key for the reading morphology
1086         $reading_attributes{'lexemes'} = 'string';
1087
1088     my %node_data_keys;
1089     my $ndi = 0;
1090     foreach my $datum ( sort keys %reading_attributes ) {
1091         $node_data_keys{$datum} = 'dn'.$ndi++;
1092         my $key = $root->addNewChild( $graphml_ns, 'key' );
1093         $key->setAttribute( 'attr.name', $datum );
1094         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1095         $key->setAttribute( 'for', 'node' );
1096         $key->setAttribute( 'id', $node_data_keys{$datum} );
1097     }
1098
1099     # Add the data keys for edges, that is, paths and relationships. Path
1100     # data does not come from a Moose class so is here manually.
1101     my $edi = 0;
1102     my %edge_data_keys;
1103     my %edge_attributes = (
1104         witness => 'string',                    # ID/label for a path
1105         extra => 'boolean',                             # Path key
1106         );
1107     my @path_attributes = keys %edge_attributes; # track our manual additions
1108     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1109     foreach my $attr( $pmeta->get_all_attributes ) {
1110                 next if $attr->name =~ /^_/;
1111                 next unless $save_types{$attr->type_constraint->name};
1112                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1113         }
1114     foreach my $datum ( sort keys %edge_attributes ) {
1115         $edge_data_keys{$datum} = 'de'.$edi++;
1116         my $key = $root->addNewChild( $graphml_ns, 'key' );
1117         $key->setAttribute( 'attr.name', $datum );
1118         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1119         $key->setAttribute( 'for', 'edge' );
1120         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1121     }
1122
1123     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1124     my $xmlidname = $self->tradition->name;
1125     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1126     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1127         $xmlidname = '_'.$xmlidname;
1128     }
1129     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1130     $sgraph->setAttribute( 'edgedefault', 'directed' );
1131     $sgraph->setAttribute( 'id', $xmlidname );
1132     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1133     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1134     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1135     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1136     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1137
1138     # Tradition/collation attribute data
1139     foreach my $datum ( keys %graph_attributes ) {
1140         my $value;
1141         if( $datum eq 'version' ) {
1142                 $value = '3.2';
1143         } elsif( ref( $graph_attributes{$datum} ) ) {
1144                 my $sub = $graph_attributes{$datum};
1145                 $value = &$sub();
1146         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1147                 $value = $self->tradition->$datum;
1148         } else {
1149                 $value = $self->$datum;
1150         }
1151                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1152         }
1153
1154     my $node_ctr = 0;
1155     my %node_hash;
1156     # Add our readings to the graph
1157     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1158         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1159                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1160         $use_readings{$n->id} = 1;
1161         # Add to the main graph
1162         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1163         my $node_xmlid = 'n' . $node_ctr++;
1164         $node_hash{ $n->id } = $node_xmlid;
1165         $node_el->setAttribute( 'id', $node_xmlid );
1166         foreach my $d ( keys %reading_attributes ) {
1167                 my $nval = $n->$d;
1168                 # Custom serialization
1169                 if( $d eq 'lexemes' ) {
1170                                 # If nval is a true value, we have lexemes so we need to
1171                                 # serialize them. Otherwise set nval to undef so that the
1172                                 # key is excluded from this reading.
1173                         $nval = $nval ? $n->_serialize_lexemes : undef;
1174                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1175                         $nval = undef;
1176                 }
1177                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1178                         # Adjust the ranks within the subgraph.
1179                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1180                                 : $nval - $rankoffset;
1181                 }
1182                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1183                         if defined $nval;
1184         }
1185     }
1186
1187     # Add the path edges to the sequence graph
1188     my $edge_ctr = 0;
1189     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1190         # We add an edge in the graphml for every witness in $e.
1191         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1192         my @edge_wits = sort $self->path_witnesses( $e );
1193         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1194         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1195         # Skip any path from start to end; that witness is not in the subgraph.
1196         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1197         foreach my $wit ( @edge_wits ) {
1198                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1199                                                                                 $node_hash{ $e->[0] },
1200                                                                                 $node_hash{ $e->[1] } );
1201                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1202                         $edge_el->setAttribute( 'source', $from );
1203                         $edge_el->setAttribute( 'target', $to );
1204                         $edge_el->setAttribute( 'id', $id );
1205
1206                         # It's a witness path, so add the witness
1207                         my $base = $wit;
1208                         my $key = $edge_data_keys{'witness'};
1209                         # Is this an ante-corr witness?
1210                         my $aclabel = $self->ac_label;
1211                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1212                                 # Keep the base witness
1213                                 $base = $1;
1214                                 # ...and record that this is an 'extra' reading path
1215                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1216                         }
1217                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1218                 }
1219         }
1220
1221         # Report the actual number of nodes and edges that went in
1222         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1223         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1224
1225         # Add the relationship graph to the XML
1226         map { delete $edge_data_keys{$_} } @path_attributes;
1227         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1228                 $node_data_keys{'id'}, \%edge_data_keys );
1229
1230     # Save and return the thing
1231     my $result = decode_utf8( $graphml->toString(1) );
1232     return $result;
1233 }
1234
1235 sub _add_graphml_data {
1236     my( $el, $key, $value ) = @_;
1237     return unless defined $value;
1238     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1239     $data_el->setAttribute( 'key', $key );
1240     $data_el->appendText( $value );
1241 }
1242
1243 =head2 as_csv
1244
1245 Returns a CSV alignment table representation of the collation graph, one
1246 row per witness (or witness uncorrected.)
1247
1248 =cut
1249
1250 sub as_csv {
1251     my( $self ) = @_;
1252     my $table = $self->alignment_table;
1253     my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1254     my @result;
1255     # Make the header row
1256     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1257         push( @result, decode_utf8( $csv->string ) );
1258     # Make the rest of the rows
1259     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1260         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1261         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1262         $csv->combine( @row );
1263         push( @result, decode_utf8( $csv->string ) );
1264     }
1265     return join( "\n", @result );
1266 }
1267
1268 =head2 alignment_table( $use_refs, $include_witnesses )
1269
1270 Return a reference to an alignment table, in a slightly enhanced CollateX
1271 format which looks like this:
1272
1273  $table = { alignment => [ { witness => "SIGIL",
1274                              tokens => [ { t => "TEXT" }, ... ] },
1275                            { witness => "SIG2",
1276                              tokens => [ { t => "TEXT" }, ... ] },
1277                            ... ],
1278             length => TEXTLEN };
1279
1280 If $use_refs is set to 1, the reading object is returned in the table
1281 instead of READINGTEXT; if not, the text of the reading is returned.
1282
1283 If $include_witnesses is set to a hashref, only the witnesses whose sigil
1284 keys have a true hash value will be included.
1285
1286 =cut
1287
1288 sub alignment_table {
1289     my( $self ) = @_;
1290     $self->calculate_ranks() unless $self->_graphcalc_done;
1291     return $self->cached_table if $self->has_cached_table;
1292
1293     # Make sure we can do this
1294         throw( "Need a linear graph in order to make an alignment table" )
1295                 unless $self->linear;
1296         $self->calculate_ranks unless $self->end->has_rank;
1297
1298     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1299     my @all_pos = ( 1 .. $self->end->rank - 1 );
1300     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1301         # say STDERR "Making witness row(s) for " . $wit->sigil;
1302         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1303         my @row = _make_witness_row( \@wit_path, \@all_pos );
1304         push( @{$table->{'alignment'}},
1305                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1306         if( $wit->is_layered ) {
1307                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1308                         $wit->sigil.$self->ac_label );
1309             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1310                         push( @{$table->{'alignment'}},
1311                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
1312         }
1313     }
1314     $self->cached_table( $table );
1315     return $table;
1316 }
1317
1318 sub _make_witness_row {
1319     my( $path, $positions ) = @_;
1320     my %char_hash;
1321     map { $char_hash{$_} = undef } @$positions;
1322     my $debug = 0;
1323     foreach my $rdg ( @$path ) {
1324         my $rtext = $rdg->text;
1325         $rtext = '#LACUNA#' if $rdg->is_lacuna;
1326         say STDERR "rank " . $rdg->rank if $debug;
1327         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1328         $char_hash{$rdg->rank} = { 't' => $rdg };
1329     }
1330     my @row = map { $char_hash{$_} } @$positions;
1331     # Fill in lacuna markers for undef spots in the row
1332     my $last_el = shift @row;
1333     my @filled_row = ( $last_el );
1334     foreach my $el ( @row ) {
1335         # If we are using node reference, make the lacuna node appear many times
1336         # in the table.  If not, use the lacuna tag.
1337         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1338             $el = $last_el;
1339         }
1340         push( @filled_row, $el );
1341         $last_el = $el;
1342     }
1343     return @filled_row;
1344 }
1345
1346 =head1 NAVIGATION METHODS
1347
1348 =head2 reading_sequence( $first, $last, $sigil, $backup )
1349
1350 Returns the ordered list of readings, starting with $first and ending
1351 with $last, for the witness given in $sigil. If a $backup sigil is
1352 specified (e.g. when walking a layered witness), it will be used wherever
1353 no $sigil path exists.  If there is a base text reading, that will be
1354 used wherever no path exists for $sigil or $backup.
1355
1356 =cut
1357
1358 # TODO Think about returning some lazy-eval iterator.
1359 # TODO Get rid of backup; we should know from what witness is whether we need it.
1360
1361 sub reading_sequence {
1362     my( $self, $start, $end, $witness ) = @_;
1363
1364     $witness = $self->baselabel unless $witness;
1365     my @readings = ( $start );
1366     my %seen;
1367     my $n = $start;
1368     while( $n && $n->id ne $end->id ) {
1369         if( exists( $seen{$n->id} ) ) {
1370             throw( "Detected loop for $witness at " . $n->id );
1371         }
1372         $seen{$n->id} = 1;
1373
1374         my $next = $self->next_reading( $n, $witness );
1375         unless( $next ) {
1376             throw( "Did not find any path for $witness from reading " . $n->id );
1377         }
1378         push( @readings, $next );
1379         $n = $next;
1380     }
1381     # Check that the last reading is our end reading.
1382     my $last = $readings[$#readings];
1383     throw( "Last reading found from " . $start->text .
1384         " for witness $witness is not the end!" ) # TODO do we get this far?
1385         unless $last->id eq $end->id;
1386
1387     return @readings;
1388 }
1389
1390 =head2 next_reading( $reading, $sigil );
1391
1392 Returns the reading that follows the given reading along the given witness
1393 path.
1394
1395 =cut
1396
1397 sub next_reading {
1398     # Return the successor via the corresponding path.
1399     my $self = shift;
1400     my $answer = $self->_find_linked_reading( 'next', @_ );
1401         return undef unless $answer;
1402     return $self->reading( $answer );
1403 }
1404
1405 =head2 prior_reading( $reading, $sigil )
1406
1407 Returns the reading that precedes the given reading along the given witness
1408 path.
1409
1410 =cut
1411
1412 sub prior_reading {
1413     # Return the predecessor via the corresponding path.
1414     my $self = shift;
1415     my $answer = $self->_find_linked_reading( 'prior', @_ );
1416     return $self->reading( $answer );
1417 }
1418
1419 sub _find_linked_reading {
1420     my( $self, $direction, $node, $path ) = @_;
1421
1422     # Get a backup if we are dealing with a layered witness
1423     my $alt_path;
1424     my $aclabel = $self->ac_label;
1425     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1426         $alt_path = $1;
1427     }
1428
1429     my @linked_paths = $direction eq 'next'
1430         ? $self->sequence->edges_from( $node )
1431         : $self->sequence->edges_to( $node );
1432     return undef unless scalar( @linked_paths );
1433
1434     # We have to find the linked path that contains all of the
1435     # witnesses supplied in $path.
1436     my( @path_wits, @alt_path_wits );
1437     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1438     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1439     my $base_le;
1440     my $alt_le;
1441     foreach my $le ( @linked_paths ) {
1442         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1443             $base_le = $le;
1444         }
1445                 my @le_wits = sort $self->path_witnesses( $le );
1446                 if( _is_within( \@path_wits, \@le_wits ) ) {
1447                         # This is the right path.
1448                         return $direction eq 'next' ? $le->[1] : $le->[0];
1449                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1450                         $alt_le = $le;
1451                 }
1452     }
1453     # Got this far? Return the alternate path if it exists.
1454     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1455         if $alt_le;
1456
1457     # Got this far? Return the base path if it exists.
1458     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1459         if $base_le;
1460
1461     # Got this far? We have no appropriate path.
1462     warn "Could not find $direction node from " . $node->id
1463         . " along path $path";
1464     return undef;
1465 }
1466
1467 # Some set logic.
1468 sub _is_within {
1469     my( $set1, $set2 ) = @_;
1470     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1471     foreach my $el ( @$set1 ) {
1472         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1473     }
1474     return $ret;
1475 }
1476
1477 # Return the string that joins together a list of witnesses for
1478 # display on a single path.
1479 sub _witnesses_of_label {
1480     my( $self, $label ) = @_;
1481     my $regex = $self->wit_list_separator;
1482     my @answer = split( /\Q$regex\E/, $label );
1483     return @answer;
1484 }
1485
1486 =head2 common_readings
1487
1488 Returns the list of common readings in the graph (i.e. those readings that are
1489 shared by all non-lacunose witnesses.)
1490
1491 =cut
1492
1493 sub common_readings {
1494         my $self = shift;
1495         my @common = grep { $_->is_common } $self->readings;
1496         return @common;
1497 }
1498
1499 =head2 path_text( $sigil, [, $start, $end ] )
1500
1501 Returns the text of a witness (plus its backup, if we are using a layer)
1502 as stored in the collation.  The text is returned as a string, where the
1503 individual readings are joined with spaces and the meta-readings (e.g.
1504 lacunae) are omitted.  Optional specification of $start and $end allows
1505 the generation of a subset of the witness text.
1506
1507 =cut
1508
1509 sub path_text {
1510         my( $self, $wit, $start, $end ) = @_;
1511         $start = $self->start unless $start;
1512         $end = $self->end unless $end;
1513         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1514         my $pathtext = '';
1515         my $last;
1516         foreach my $r ( @path ) {
1517                 unless ( $r->join_prior || !$last || $last->join_next ) {
1518                         $pathtext .= ' ';
1519                 }
1520                 $pathtext .= $r->text;
1521                 $last = $r;
1522         }
1523         return $pathtext;
1524 }
1525
1526 =head1 INITIALIZATION METHODS
1527
1528 These are mostly for use by parsers.
1529
1530 =head2 make_witness_path( $witness )
1531
1532 Link the array of readings contained in $witness->path (and in
1533 $witness->uncorrected_path if it exists) into collation paths.
1534 Clear out the arrays when finished.
1535
1536 =head2 make_witness_paths
1537
1538 Call make_witness_path for all witnesses in the tradition.
1539
1540 =cut
1541
1542 # For use when a collation is constructed from a base text and an apparatus.
1543 # We have the sequences of readings and just need to add path edges.
1544 # When we are done, clear out the witness path attributes, as they are no
1545 # longer needed.
1546 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1547
1548 sub make_witness_paths {
1549     my( $self ) = @_;
1550     foreach my $wit ( $self->tradition->witnesses ) {
1551         # say STDERR "Making path for " . $wit->sigil;
1552         $self->make_witness_path( $wit );
1553     }
1554 }
1555
1556 sub make_witness_path {
1557     my( $self, $wit ) = @_;
1558     my @chain = @{$wit->path};
1559     my $sig = $wit->sigil;
1560     # Add start and end if necessary
1561     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1562     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1563     foreach my $idx ( 0 .. $#chain-1 ) {
1564         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1565     }
1566     if( $wit->is_layered ) {
1567         @chain = @{$wit->uncorrected_path};
1568                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1569                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1570         foreach my $idx( 0 .. $#chain-1 ) {
1571             my $source = $chain[$idx];
1572             my $target = $chain[$idx+1];
1573             $self->add_path( $source, $target, $sig.$self->ac_label )
1574                 unless $self->has_path( $source, $target, $sig );
1575         }
1576     }
1577     $wit->clear_path;
1578     $wit->clear_uncorrected_path;
1579 }
1580
1581 =head2 calculate_ranks
1582
1583 Calculate the reading ranks (that is, their aligned positions relative
1584 to each other) for the graph.  This can only be called on linear collations.
1585
1586 =begin testing
1587
1588 use Text::Tradition;
1589
1590 my $cxfile = 't/data/Collatex-16.xml';
1591 my $t = Text::Tradition->new(
1592     'name'  => 'inline',
1593     'input' => 'CollateX',
1594     'file'  => $cxfile,
1595     );
1596 my $c = $t->collation;
1597
1598 # Make an svg
1599 my $table = $c->alignment_table;
1600 ok( $c->has_cached_table, "Alignment table was cached" );
1601 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1602 $c->calculate_ranks;
1603 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1604 $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
1605 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
1606
1607 =end testing
1608
1609 =cut
1610
1611 sub calculate_ranks {
1612     my $self = shift;
1613     # Save the existing ranks, in case we need to invalidate the cached SVG.
1614     my %existing_ranks;
1615     map { $existing_ranks{$_} = $_->rank } $self->readings;
1616
1617     # Do the rankings based on the relationship equivalence graph, starting
1618     # with the start node.
1619     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1620
1621     # Transfer our rankings from the topological graph to the real one.
1622     foreach my $r ( $self->readings ) {
1623         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1624             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1625         } else {
1626                 # Die. Find the last rank we calculated.
1627                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1628                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1629                         $self->readings;
1630                 my $last = pop @all_defined;
1631             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1632         }
1633     }
1634     # Do we need to invalidate the cached data?
1635     if( $self->has_cached_table ) {
1636         foreach my $r ( $self->readings ) {
1637                 next if defined( $existing_ranks{$r} )
1638                         && $existing_ranks{$r} == $r->rank;
1639                 # Something has changed, so clear the cache
1640                 $self->_clear_cache;
1641                         # ...and recalculate the common readings.
1642                         $self->calculate_common_readings();
1643                 last;
1644         }
1645     }
1646         # The graph calculation information is now up to date.
1647         $self->_graphcalc_done(1);
1648 }
1649
1650 sub _clear_cache {
1651         my $self = shift;
1652         $self->wipe_table if $self->has_cached_table;
1653 }
1654
1655
1656 =head2 flatten_ranks
1657
1658 A convenience method for parsing collation data.  Searches the graph for readings
1659 with the same text at the same rank, and merges any that are found.
1660
1661 =cut
1662
1663 sub flatten_ranks {
1664     my $self = shift;
1665     my %unique_rank_rdg;
1666     my $changed;
1667     foreach my $rdg ( $self->readings ) {
1668         next unless $rdg->has_rank;
1669         my $key = $rdg->rank . "||" . $rdg->text;
1670         if( exists $unique_rank_rdg{$key} ) {
1671                 # Make sure they don't have different grammatical forms
1672                         my $ur = $unique_rank_rdg{$key};
1673                         if( $rdg->disambiguated && $ur->disambiguated ) {
1674                                 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1675                                 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1676                                 next unless $rform eq $uform;
1677                         } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1678                                 next;
1679                         }
1680             # Combine!
1681                 #say STDERR "Combining readings at same rank: $key";
1682                 $changed = 1;
1683             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1684             # TODO see if this now makes a common point.
1685         } else {
1686             $unique_rank_rdg{$key} = $rdg;
1687         }
1688     }
1689     # If we merged readings, the ranks are still fine but the alignment
1690     # table is wrong. Wipe it.
1691     $self->wipe_table() if $changed;
1692 }
1693
1694
1695 =head2 calculate_common_readings
1696
1697 Goes through the graph identifying the readings that appear in every witness
1698 (apart from those with lacunae at that spot.) Marks them as common and returns
1699 the list.
1700
1701 =begin testing
1702
1703 use Text::Tradition;
1704
1705 my $cxfile = 't/data/Collatex-16.xml';
1706 my $t = Text::Tradition->new(
1707     'name'  => 'inline',
1708     'input' => 'CollateX',
1709     'file'  => $cxfile,
1710     );
1711 my $c = $t->collation;
1712
1713 my @common = $c->calculate_common_readings();
1714 is( scalar @common, 8, "Found correct number of common readings" );
1715 my @marked = sort $c->common_readings();
1716 is( scalar @common, 8, "All common readings got marked as such" );
1717 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1718 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1719
1720 =end testing
1721
1722 =cut
1723
1724 sub calculate_common_readings {
1725         my $self = shift;
1726         my @common;
1727         map { $_->is_common( 0 ) } $self->readings;
1728         # Implicitly calls calculate_ranks
1729         my $table = $self->alignment_table;
1730         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1731                 my @row = map { $_->{'tokens'}->[$idx]
1732                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1733                                         @{$table->{'alignment'}};
1734                 my %hash;
1735                 foreach my $r ( @row ) {
1736                         if( $r ) {
1737                                 $hash{$r->id} = $r unless $r->is_meta;
1738                         } else {
1739                                 $hash{'UNDEF'} = $r;
1740                         }
1741                 }
1742                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1743                         my( $r ) = values %hash;
1744                         $r->is_common( 1 );
1745                         push( @common, $r );
1746                 }
1747         }
1748         return @common;
1749 }
1750
1751 =head2 text_from_paths
1752
1753 Calculate the text array for all witnesses from the path, for later consistency
1754 checking.  Only to be used if there is no non-graph-based way to know the
1755 original texts.
1756
1757 =cut
1758
1759 sub text_from_paths {
1760         my $self = shift;
1761     foreach my $wit ( $self->tradition->witnesses ) {
1762         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1763         my @text;
1764         foreach my $r ( @readings ) {
1765                 next if $r->is_meta;
1766                 push( @text, $r->text );
1767         }
1768         $wit->text( \@text );
1769         if( $wit->is_layered ) {
1770                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1771                                                                                                   $wit->sigil.$self->ac_label );
1772                         my @uctext;
1773                         foreach my $r ( @ucrdgs ) {
1774                                 next if $r->is_meta;
1775                                 push( @uctext, $r->text );
1776                         }
1777                         $wit->layertext( \@uctext );
1778         }
1779     }
1780 }
1781
1782 =head1 UTILITY FUNCTIONS
1783
1784 =head2 common_predecessor( $reading_a, $reading_b )
1785
1786 Find the last reading that occurs in sequence before both the given readings.
1787 At the very least this should be $self->start.
1788
1789 =head2 common_successor( $reading_a, $reading_b )
1790
1791 Find the first reading that occurs in sequence after both the given readings.
1792 At the very least this should be $self->end.
1793
1794 =begin testing
1795
1796 use Text::Tradition;
1797
1798 my $cxfile = 't/data/Collatex-16.xml';
1799 my $t = Text::Tradition->new(
1800     'name'  => 'inline',
1801     'input' => 'CollateX',
1802     'file'  => $cxfile,
1803     );
1804 my $c = $t->collation;
1805
1806 is( $c->common_predecessor( 'n24', 'n23' )->id,
1807     'n20', "Found correct common predecessor" );
1808 is( $c->common_successor( 'n24', 'n23' )->id,
1809     '__END__', "Found correct common successor" );
1810
1811 is( $c->common_predecessor( 'n19', 'n17' )->id,
1812     'n16', "Found correct common predecessor for readings on same path" );
1813 is( $c->common_successor( 'n21', 'n10' )->id,
1814     '__END__', "Found correct common successor for readings on same path" );
1815
1816 =end testing
1817
1818 =cut
1819
1820 ## Return the closest reading that is a predecessor of both the given readings.
1821 sub common_predecessor {
1822         my $self = shift;
1823         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1824         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1825 }
1826
1827 sub common_successor {
1828         my $self = shift;
1829         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1830         return $self->_common_in_path( $r1, $r2, 'successors' );
1831 }
1832
1833
1834 # TODO think about how to do this without ranks...
1835 sub _common_in_path {
1836         my( $self, $r1, $r2, $dir ) = @_;
1837         my $iter = $self->end->rank;
1838         my @candidates;
1839         my @last_r1 = ( $r1 );
1840         my @last_r2 = ( $r2 );
1841         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1842         my %all_seen;
1843         # say STDERR "Finding common $dir for $r1, $r2";
1844         while( !@candidates ) {
1845                 last unless $iter--;  # Avoid looping infinitely
1846                 # Iterate separately down the graph from r1 and r2
1847                 my( @new_lc1, @new_lc2 );
1848                 foreach my $lc ( @last_r1 ) {
1849                         foreach my $p ( $lc->$dir ) {
1850                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1851                                         # say STDERR "Path candidate $p from $lc";
1852                                         push( @candidates, $p );
1853                                 } elsif( !$all_seen{$p->id} ) {
1854                                         $all_seen{$p->id} = 'r1';
1855                                         push( @new_lc1, $p );
1856                                 }
1857                         }
1858                 }
1859                 foreach my $lc ( @last_r2 ) {
1860                         foreach my $p ( $lc->$dir ) {
1861                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1862                                         # say STDERR "Path candidate $p from $lc";
1863                                         push( @candidates, $p );
1864                                 } elsif( !$all_seen{$p->id} ) {
1865                                         $all_seen{$p->id} = 'r2';
1866                                         push( @new_lc2, $p );
1867                                 }
1868                         }
1869                 }
1870                 @last_r1 = @new_lc1;
1871                 @last_r2 = @new_lc2;
1872         }
1873         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1874         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1875 }
1876
1877 sub throw {
1878         Text::Tradition::Error->throw(
1879                 'ident' => 'Collation error',
1880                 'message' => $_[0],
1881                 );
1882 }
1883
1884 no Moose;
1885 __PACKAGE__->meta->make_immutable;
1886
1887 =head1 LICENSE
1888
1889 This package is free software and is provided "as is" without express
1890 or implied warranty.  You can redistribute it and/or modify it under
1891 the same terms as Perl itself.
1892
1893 =head1 AUTHOR
1894
1895 Tara L Andrews E<lt>aurum@cpan.orgE<gt>