lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use Text::CSV;
  10 use Text::Tradition::Collation::Reading;
  11 use Text::Tradition::Collation::RelationshipStore;
  12 use Text::Tradition::Error;
  13 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  14 use Moose;
  15
  16 has 'sequence' => (
  17     is => 'ro',
  18     isa => 'Graph',
  19     default => sub { Graph->new() },
  20     handles => {
  21         paths => 'edges',
  22     },
  23     );
  24
  25 has 'relations' => (
  26         is => 'ro',
  27         isa => 'Text::Tradition::Collation::RelationshipStore',
  28         handles => {
  29                 relationships => 'relationships',
  30                 related_readings => 'related_readings',
  31                 get_relationship => 'get_relationship',
  32                 del_relationship => 'del_relationship',
  33                 equivalence => 'equivalence',
  34                 equivalence_graph => 'equivalence_graph',
  35         },
  36         writer => '_set_relations',
  37         );
  38
  39 has 'tradition' => (
  40     is => 'ro',
  41     isa => 'Text::Tradition',
  42     writer => '_set_tradition',
  43     weak_ref => 1,
  44     );
  45
  46 has 'readings' => (
  47         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  48         traits => ['Hash'],
  49     handles => {
  50         reading     => 'get',
  51         _add_reading => 'set',
  52         del_reading => 'delete',
  53         has_reading => 'exists',
  54         readings   => 'values',
  55     },
  56     default => sub { {} },
  57         );
  58
  59 has 'wit_list_separator' => (
  60     is => 'rw',
  61     isa => 'Str',
  62     default => ', ',
  63     );
  64
  65 has 'baselabel' => (
  66     is => 'rw',
  67     isa => 'Str',
  68     default => 'base text',
  69     );
  70
  71 has 'linear' => (
  72     is => 'rw',
  73     isa => 'Bool',
  74     default => 1,
  75     );
  76
  77 has 'ac_label' => (
  78     is => 'rw',
  79     isa => 'Str',
  80     default => ' (a.c.)',
  81     );
  82
  83 has 'wordsep' => (
  84         is => 'rw',
  85         isa => 'Str',
  86         default => ' ',
  87         );
  88
  89 has 'start' => (
  90         is => 'ro',
  91         isa => 'Text::Tradition::Collation::Reading',
  92         writer => '_set_start',
  93         weak_ref => 1,
  94         );
  95
  96 has 'end' => (
  97         is => 'ro',
  98         isa => 'Text::Tradition::Collation::Reading',
  99         writer => '_set_end',
 100         weak_ref => 1,
 101         );
 102
 103 has 'cached_svg' => (
 104         is => 'rw',
 105         isa => 'Str',
 106         predicate => 'has_cached_svg',
 107         clearer => 'wipe_svg',
 108         );
 109
 110 has 'cached_table' => (
 111         is => 'rw',
 112         isa => 'HashRef',
 113         predicate => 'has_cached_table',
 114         clearer => 'wipe_table',
 115         );
 116
 117 has '_graphcalc_done' => (
 118         is => 'rw',
 119         isa => 'Bool',
 120         default => undef,
 121         );
 122
 123 =head1 NAME
 124
 125 Text::Tradition::Collation - a software model for a text collation
 126
 127 =head1 SYNOPSIS
 128
 129   use Text::Tradition;
 130   my $t = Text::Tradition->new(
 131     'name' => 'this is a text',
 132     'input' => 'TEI',
 133     'file' => '/path/to/tei_parallel_seg_file.xml' );
 134
 135   my $c = $t->collation;
 136   my @readings = $c->readings;
 137   my @paths = $c->paths;
 138   my @relationships = $c->relationships;
 139
 140   my $svg_variant_graph = $t->collation->as_svg();
 141
 142 =head1 DESCRIPTION
 143
 144 Text::Tradition is a library for representation and analysis of collated
 145 texts, particularly medieval ones.  The Collation is the central feature of
 146 a Tradition, where the text, its sequence of readings, and its relationships
 147 between readings are actually kept.
 148
 149 =head1 CONSTRUCTOR
 150
 151 =head2 new
 152
 153 The constructor.  Takes a hash or hashref of the following arguments:
 154
 155 =over
 156
 157 =item * tradition - The Text::Tradition object to which the collation
 158 belongs. Required.
 159
 160 =item * linear - Whether the collation should be linear; that is, whether
 161 transposed readings should be treated as two linked readings rather than one,
 162 and therefore whether the collation graph is acyclic.  Defaults to true.
 163
 164 =item * baselabel - The default label for the path taken by a base text
 165 (if any). Defaults to 'base text'.
 166
 167 =item * wit_list_separator - The string to join a list of witnesses for
 168 purposes of making labels in display graphs.  Defaults to ', '.
 169
 170 =item * ac_label - The extra label to tack onto a witness sigil when
 171 representing another layer of path for the given witness - that is, when
 172 a text has more than one possible reading due to scribal corrections or
 173 the like.  Defaults to ' (a.c.)'.
 174
 175 =item * wordsep - The string used to separate words in the original text.
 176 Defaults to ' '.
 177
 178 =back
 179
 180 =head1 ACCESSORS
 181
 182 =head2 tradition
 183
 184 =head2 linear
 185
 186 =head2 wit_list_separator
 187
 188 =head2 baselabel
 189
 190 =head2 ac_label
 191
 192 =head2 wordsep
 193
 194 Simple accessors for collation attributes.
 195
 196 =head2 start
 197
 198 The meta-reading at the start of every witness path.
 199
 200 =head2 end
 201
 202 The meta-reading at the end of every witness path.
 203
 204 =head2 readings
 205
 206 Returns all Reading objects in the graph.
 207
 208 =head2 reading( $id )
 209
 210 Returns the Reading object corresponding to the given ID.
 211
 212 =head2 add_reading( $reading_args )
 213
 214 Adds a new reading object to the collation.
 215 See L<Text::Tradition::Collation::Reading> for the available arguments.
 216
 217 =head2 del_reading( $object_or_id )
 218
 219 Removes the given reading from the collation, implicitly removing its
 220 paths and relationships.
 221
 222 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 223
 224 Merges the $second reading into the $main one. If $concatenate is true, then
 225 the merged node will carry the text of both readings, concatenated with either
 226 $with_str (if specified) or a sensible default (the empty string if the
 227 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 228
 229 The first two arguments may be either readings or reading IDs.
 230
 231 =head2 has_reading( $id )
 232
 233 Predicate to see whether a given reading ID is in the graph.
 234
 235 =head2 reading_witnesses( $object_or_id )
 236
 237 Returns a list of sigils whose witnesses contain the reading.
 238
 239 =head2 paths
 240
 241 Returns all reading paths within the document - that is, all edges in the
 242 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 243
 244 =head2 add_path( $source, $target, $sigil )
 245
 246 Links the given readings in the collation in sequence, under the given witness
 247 sigil.  The readings may be specified by object or ID.
 248
 249 =head2 del_path( $source, $target, $sigil )
 250
 251 Links the given readings in the collation in sequence, under the given witness
 252 sigil.  The readings may be specified by object or ID.
 253
 254 =head2 has_path( $source, $target );
 255
 256 Returns true if the two readings are linked in sequence in any witness.
 257 The readings may be specified by object or ID.
 258
 259 =head2 relationships
 260
 261 Returns all Relationship objects in the collation.
 262
 263 =head2 add_relationship( $reading, $other_reading, $options )
 264
 265 Adds a new relationship of the type given in $options between the two readings,
 266 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 267 where $status is true on success, and @vectors is a list of relationship edges
 268 that were ultimately added.
 269 See L<Text::Tradition::Collation::Relationship> for the available options.
 270
 271 =cut
 272
 273 sub BUILD {
 274     my $self = shift;
 275     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 276     $self->_set_start( $self->add_reading(
 277         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 278     $self->_set_end( $self->add_reading(
 279         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 280 }
 281
 282 ### Reading construct/destruct functions
 283
 284 sub add_reading {
 285         my( $self, $reading ) = @_;
 286         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 287                 my %args = %$reading;
 288                 if( $args{'init'} ) {
 289                         # If we are initializing an empty collation, don't assume that we
 290                         # have set a tradition.
 291                         delete $args{'init'};
 292                 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
 293                         $args{'language'} = $self->tradition->language;
 294                 }
 295                 $reading = Text::Tradition::Collation::Reading->new(
 296                         'collation' => $self,
 297                         %args );
 298         }
 299         # First check to see if a reading with this ID exists.
 300         if( $self->reading( $reading->id ) ) {
 301                 throw( "Collation already has a reading with id " . $reading->id );
 302         }
 303         $self->_graphcalc_done(0);
 304         $self->_add_reading( $reading->id => $reading );
 305         # Once the reading has been added, put it in both graphs.
 306         $self->sequence->add_vertex( $reading->id );
 307         $self->relations->add_reading( $reading->id );
 308         return $reading;
 309 };
 310
 311 around del_reading => sub {
 312         my $orig = shift;
 313         my $self = shift;
 314         my $arg = shift;
 315
 316         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 317                 $arg = $arg->id;
 318         }
 319         # Remove the reading from the graphs.
 320         $self->_graphcalc_done(0);
 321         $self->_clear_cache; # Explicitly clear caches to GC the reading
 322         $self->sequence->delete_vertex( $arg );
 323         $self->relations->delete_reading( $arg );
 324
 325         # Carry on.
 326         $self->$orig( $arg );
 327 };
 328
 329 =begin testing
 330
 331 use Text::Tradition;
 332
 333 my $cxfile = 't/data/Collatex-16.xml';
 334 my $t = Text::Tradition->new(
 335     'name'  => 'inline',
 336     'input' => 'CollateX',
 337     'file'  => $cxfile,
 338     );
 339 my $c = $t->collation;
 340
 341 my $rno = scalar $c->readings;
 342 # Split n21 for testing purposes
 343 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 344 my $old_r = $c->reading( 'n21' );
 345 $old_r->alter_text( 'to' );
 346 $c->del_path( 'n20', 'n21', 'A' );
 347 $c->add_path( 'n20', 'n21p0', 'A' );
 348 $c->add_path( 'n21p0', 'n21', 'A' );
 349 $c->flatten_ranks();
 350 ok( $c->reading( 'n21p0' ), "New reading exists" );
 351 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 352
 353 # Combine n3 and n4 ( with his )
 354 $c->merge_readings( 'n3', 'n4', 1 );
 355 ok( !$c->reading('n4'), "Reading n4 is gone" );
 356 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 357
 358 # Collapse n9 and n10 ( rood / root )
 359 $c->merge_readings( 'n9', 'n10' );
 360 ok( !$c->reading('n10'), "Reading n10 is gone" );
 361 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 362
 363 # Combine n21 and n21p0
 364 my $remaining = $c->reading('n21');
 365 $remaining ||= $c->reading('n22');  # one of these should still exist
 366 $c->merge_readings( 'n21p0', $remaining, 1 );
 367 ok( !$c->reading('n21'), "Reading $remaining is gone" );
 368 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
 369
 370 =end testing
 371
 372 =cut
 373
 374 sub merge_readings {
 375         my $self = shift;
 376
 377         # Sanity check
 378         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 379         my $mergemeta = $kept_obj->is_meta;
 380         throw( "Cannot merge meta and non-meta reading" )
 381                 unless ( $mergemeta && $del_obj->is_meta )
 382                         || ( !$mergemeta && !$del_obj->is_meta );
 383         if( $mergemeta ) {
 384                 throw( "Cannot merge with start or end node" )
 385                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 386                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 387         }
 388         # We only need the IDs for adding paths to the graph, not the reading
 389         # objects themselves.
 390         my $kept = $kept_obj->id;
 391         my $deleted = $del_obj->id;
 392         $self->_graphcalc_done(0);
 393
 394     # The kept reading should inherit the paths and the relationships
 395     # of the deleted reading.
 396         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 397                 my @vector = ( $kept );
 398                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 399                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 400                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 401                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 402                 $self->sequence->add_edge( @vector );
 403                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 404                 @wits{keys %$fwits} = values %$fwits;
 405                 $self->sequence->set_edge_attributes( @vector, \%wits );
 406         }
 407         $self->relations->merge_readings( $kept, $deleted, $combine );
 408
 409         # Do the deletion deed.
 410         if( $combine ) {
 411                 # Combine the text of the readings
 412                 my $joinstr = $combine_char;
 413                 unless( defined $joinstr ) {
 414                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 415                         $joinstr = $self->wordsep unless defined $joinstr;
 416                 }
 417                 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
 418                 # Change this reading to a joining one if necessary
 419                 $kept_obj->_set_join_next( $del_obj->join_next );
 420                 $kept_obj->normal_form(
 421                         join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
 422                 # Combine the lexemes present in the readings
 423                 if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
 424                         $kept_obj->add_lexeme( $del_obj->lexemes );
 425                 }
 426         }
 427         $self->del_reading( $deleted );
 428 }
 429
 430 =head2 compress_readings
 431
 432 Where possible in the graph, compresses plain sequences of readings into a
 433 single reading. The sequences must consist of readings with no
 434 relationships to other readings, with only a single witness path between
 435 them and no other witness paths from either that would skip the other. The
 436 readings must also not be marked as nonsense or bad grammar.
 437
 438 WARNING: This operation cannot be undone.
 439
 440 =cut
 441
 442 sub compress_readings {
 443         my $self = shift;
 444         # Anywhere in the graph that there is a reading that joins only to a single
 445         # successor, and neither of these have any relationships, just join the two
 446         # readings.
 447         my %gobbled;
 448         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 449                 # While we are here, get rid of any extra wordforms from a disambiguated
 450                 # reading.
 451                 if( $rdg->disambiguated ) {
 452                         foreach my $lex ( $rdg->lexemes ) {
 453                                 $lex->clear_matching_forms();
 454                                 $lex->add_matching_form( $lex->form );
 455                         }
 456                 }
 457                 # Now look for readings that can be joined to their successors.
 458                 next if $rdg->is_meta;
 459                 next if $gobbled{$rdg->id};
 460                 next if $rdg->grammar_invalid || $rdg->is_nonsense;
 461                 next if $rdg->related_readings();
 462                 my %seen;
 463                 while( $self->sequence->successors( $rdg ) == 1 ) {
 464                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 465                         throw( "Infinite loop" ) if $seen{$next->id};
 466                         $seen{$next->id} = 1;
 467                         last if $self->sequence->predecessors( $next ) > 1;
 468                         last if $next->is_meta;
 469                         last if $next->grammar_invalid || $next->is_nonsense;
 470                         last if $next->related_readings();
 471                         say "Joining readings $rdg and $next";
 472                         $self->merge_readings( $rdg, $next, 1 );
 473                 }
 474         }
 475         # Make sure we haven't screwed anything up
 476         foreach my $wit ( $self->tradition->witnesses ) {
 477                 my $pathtext = $self->path_text( $wit->sigil );
 478                 my $origtext = join( ' ', @{$wit->text} );
 479                 throw( "Text differs for witness " . $wit->sigil )
 480                         unless $pathtext eq $origtext;
 481                 if( $wit->is_layered ) {
 482                         $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
 483                         $origtext = join( ' ', @{$wit->layertext} );
 484                         throw( "Ante-corr text differs for witness " . $wit->sigil )
 485                                 unless $pathtext eq $origtext;
 486                 }
 487         }
 488
 489         $self->relations->rebuild_equivalence();
 490         $self->calculate_ranks();
 491 }
 492
 493 # Helper function for manipulating the graph.
 494 sub _stringify_args {
 495         my( $self, $first, $second, @args ) = @_;
 496     $first = $first->id
 497         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 498     $second = $second->id
 499         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 500     return( $first, $second, @args );
 501 }
 502
 503 # Helper function for manipulating the graph.
 504 sub _objectify_args {
 505         my( $self, $first, $second, $arg ) = @_;
 506     $first = $self->reading( $first )
 507         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 508     $second = $self->reading( $second )
 509         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 510     return( $first, $second, $arg );
 511 }
 512 ### Path logic
 513
 514 sub add_path {
 515         my $self = shift;
 516
 517         # We only need the IDs for adding paths to the graph, not the reading
 518         # objects themselves.
 519     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 520
 521         $self->_graphcalc_done(0);
 522         # Connect the readings
 523         unless( $self->sequence->has_edge( $source, $target ) ) {
 524             $self->sequence->add_edge( $source, $target );
 525             $self->relations->add_equivalence_edge( $source, $target );
 526         }
 527     # Note the witness in question
 528     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 529 }
 530
 531 sub del_path {
 532         my $self = shift;
 533         my @args;
 534         if( ref( $_[0] ) eq 'ARRAY' ) {
 535                 my $e = shift @_;
 536                 @args = ( @$e, @_ );
 537         } else {
 538                 @args = @_;
 539         }
 540
 541         # We only need the IDs for adding paths to the graph, not the reading
 542         # objects themselves.
 543     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 544
 545         $self->_graphcalc_done(0);
 546         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 547                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 548         }
 549         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 550                 $self->sequence->delete_edge( $source, $target );
 551                 $self->relations->delete_equivalence_edge( $source, $target );
 552         }
 553 }
 554
 555
 556 # Extra graph-alike utility
 557 sub has_path {
 558         my $self = shift;
 559     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 560         return undef unless $self->sequence->has_edge( $source, $target );
 561         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 562 }
 563
 564 =head2 clear_witness( @sigil_list )
 565
 566 Clear the given witnesses out of the collation entirely, removing references
 567 to them in paths, and removing readings that belong only to them.  Should only
 568 be called via $tradition->del_witness.
 569
 570 =cut
 571
 572 sub clear_witness {
 573         my( $self, @sigils ) = @_;
 574
 575         $self->_graphcalc_done(0);
 576         # Clear the witness(es) out of the paths
 577         foreach my $e ( $self->paths ) {
 578                 foreach my $sig ( @sigils ) {
 579                         $self->del_path( $e, $sig );
 580                 }
 581         }
 582
 583         # Clear out the newly unused readings
 584         foreach my $r ( $self->readings ) {
 585                 unless( $self->reading_witnesses( $r ) ) {
 586                         $self->del_reading( $r );
 587                 }
 588         }
 589 }
 590
 591 sub add_relationship {
 592         my $self = shift;
 593     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 594     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 595         $self->_graphcalc_done(0);
 596     return @vectors;
 597 }
 598
 599 around qw/ get_relationship del_relationship / => sub {
 600         my $orig = shift;
 601         my $self = shift;
 602         my @args = @_;
 603         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 604                 @args = @{$_[0]};
 605         }
 606         my( $source, $target ) = $self->_stringify_args( @args );
 607         $self->$orig( $source, $target );
 608 };
 609
 610 =head2 reading_witnesses( $reading )
 611
 612 Return a list of sigils corresponding to the witnesses in which the reading appears.
 613
 614 =cut
 615
 616 sub reading_witnesses {
 617         my( $self, $reading ) = @_;
 618         # We need only check either the incoming or the outgoing edges; I have
 619         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 620         if( $reading eq $self->start ) {
 621                 return map { $_->sigil } $self->tradition->witnesses;
 622         }
 623         my %all_witnesses;
 624         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 625                 my $wits = $self->sequence->get_edge_attributes( @$e );
 626                 @all_witnesses{ keys %$wits } = 1;
 627         }
 628         my $acstr = $self->ac_label;
 629         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 630                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 631         }
 632         return keys %all_witnesses;
 633 }
 634
 635 =head1 OUTPUT METHODS
 636
 637 =head2 as_svg( \%options )
 638
 639 Returns an SVG string that represents the graph, via as_dot and graphviz.
 640 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 641
 642 =cut
 643
 644 sub as_svg {
 645     my( $self, $opts ) = @_;
 646     throw( "Need GraphViz installed to output SVG" )
 647         unless File::Which::which( 'dot' );
 648     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 649     $self->calculate_ranks()
 650         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 651     if( !$self->has_cached_svg || $opts->{'recalc'}     || $want_subgraph ) {
 652                 my @cmd = qw/dot -Tsvg/;
 653                 my( $svg, $err );
 654                 my $dotfile = File::Temp->new();
 655                 ## USE FOR DEBUGGING
 656                 # $dotfile->unlink_on_destroy(0);
 657                 binmode $dotfile, ':utf8';
 658                 print $dotfile $self->as_dot( $opts );
 659                 push( @cmd, $dotfile->filename );
 660                 run( \@cmd, ">", binary(), \$svg );
 661                 $svg = decode_utf8( $svg );
 662                 $self->cached_svg( $svg ) unless $want_subgraph;
 663                 return $svg;
 664         } else {
 665                 return $self->cached_svg;
 666         }
 667 }
 668
 669
 670 =head2 as_dot( \%options )
 671
 672 Returns a string that is the collation graph expressed in dot
 673 (i.e. GraphViz) format.  Options include:
 674
 675 =over 4
 676
 677 =item * from
 678
 679 =item * to
 680
 681 =item * color_common
 682
 683 =back
 684
 685 =cut
 686
 687 sub as_dot {
 688     my( $self, $opts ) = @_;
 689     my $startrank = $opts->{'from'} if $opts;
 690     my $endrank = $opts->{'to'} if $opts;
 691     my $color_common = $opts->{'color_common'} if $opts;
 692     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 693        && $self->end->rank > 100;
 694     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 695
 696     # Check the arguments
 697     if( $startrank ) {
 698         return if $endrank && $startrank > $endrank;
 699         return if $startrank > $self->end->rank;
 700         }
 701         if( defined $endrank ) {
 702                 return if $endrank < 0;
 703                 $endrank = undef if $endrank == $self->end->rank;
 704         }
 705
 706     my $graph_name = $self->tradition->name;
 707     $graph_name =~ s/[^\w\s]//g;
 708     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 709
 710     my %graph_attrs = (
 711         'rankdir' => 'LR',
 712         'bgcolor' => 'none',
 713         );
 714     my %node_attrs = (
 715         'fontsize' => 14,
 716         'fillcolor' => 'white',
 717         'style' => 'filled',
 718         'shape' => 'ellipse'
 719         );
 720     my %edge_attrs = (
 721         'arrowhead' => 'open',
 722         'color' => '#000000',
 723         'fontcolor' => '#000000',
 724         );
 725
 726     my $dot = sprintf( "digraph %s {\n", $graph_name );
 727     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 728     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 729
 730         # Output substitute start/end readings if necessary
 731         if( $startrank ) {
 732                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
 733         }
 734         if( $endrank ) {
 735                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
 736         }
 737         if( $STRAIGHTENHACK ) {
 738                 ## HACK part 1
 739                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
 740                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
 741                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 742         }
 743         my %used;  # Keep track of the readings that actually appear in the graph
 744         # Sort the readings by rank if we have ranks; this speeds layout.
 745         my @all_readings = $self->end->has_rank
 746                 ? sort { $a->rank <=> $b->rank } $self->readings
 747                 : $self->readings;
 748         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 749     foreach my $reading ( @all_readings ) {
 750         # Only output readings within our rank range.
 751         next if $startrank && $reading->rank < $startrank;
 752         next if $endrank && $reading->rank > $endrank;
 753         $used{$reading->id} = 1;
 754         # Need not output nodes without separate labels
 755         next if $reading->id eq $reading->text;
 756         my $rattrs;
 757         my $label = $reading->text;
 758         $label .= '-' if $reading->join_next;
 759         $label = "-$label" if $reading->join_prior;
 760         $label =~ s/\"/\\\"/g;
 761                 $rattrs->{'label'} = $label;
 762                 $rattrs->{'id'} = $reading->id;
 763                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 764         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 765     }
 766
 767         # Add the real edges. Need to weight one edge per rank jump, in a
 768         # continuous line.
 769         # my $weighted = $self->_add_edge_weights;
 770     my @edges = $self->paths;
 771         my( %substart, %subend );
 772     foreach my $edge ( @edges ) {
 773         # Do we need to output this edge?
 774         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 775                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 776                         my $variables = { %edge_attrs, 'label' => $label };
 777
 778                         # Account for the rank gap if necessary
 779                         my $rank0 = $self->reading( $edge->[0] )->rank
 780                                 if $self->reading( $edge->[0] )->has_rank;
 781                         my $rank1 = $self->reading( $edge->[1] )->rank
 782                                 if $self->reading( $edge->[1] )->has_rank;
 783                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 784                                 $variables->{'minlen'} = $rank1 - $rank0;
 785                         }
 786
 787                         # Add the calculated edge weights
 788                         # if( exists $weighted->{$edge->[0]}
 789                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 790                         #       # $variables->{'color'} = 'red';
 791                         #       $variables->{'weight'} = 3.0;
 792                         # }
 793
 794                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 795                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 796                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 797
 798                         my $varopts = _dot_attr_string( $variables );
 799                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 800                                 $edge->[0], $edge->[1], $varopts );
 801         } elsif( $used{$edge->[0]} ) {
 802                 $subend{$edge->[0]} = $edge->[1];
 803         } elsif( $used{$edge->[1]} ) {
 804                 $substart{$edge->[1]} = $edge->[0];
 805         }
 806     }
 807     # Add substitute start and end edges if necessary
 808     foreach my $node ( keys %substart ) {
 809         my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
 810         my $variables = { %edge_attrs, 'label' => $witstr };
 811         my $nrdg = $self->reading( $node );
 812         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
 813                 # Substart is actually one lower than $startrank
 814                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
 815         }
 816         my $varopts = _dot_attr_string( $variables );
 817         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
 818         }
 819     foreach my $node ( keys %subend ) {
 820         my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
 821         my $variables = { %edge_attrs, 'label' => $witstr };
 822         my $varopts = _dot_attr_string( $variables );
 823         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
 824         }
 825         # HACK part 2
 826         if( $STRAIGHTENHACK ) {
 827                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
 828                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 829         }
 830
 831     $dot .= "}\n";
 832     return $dot;
 833 }
 834
 835 sub _dot_attr_string {
 836         my( $hash ) = @_;
 837         my @attrs;
 838         foreach my $k ( sort keys %$hash ) {
 839                 my $v = $hash->{$k};
 840                 push( @attrs, $k.'="'.$v.'"' );
 841         }
 842         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 843 }
 844
 845 sub _add_edge_weights {
 846         my $self = shift;
 847         # Walk the graph from START to END, choosing the successor node with
 848         # the largest number of witness paths each time.
 849         my $weighted = {};
 850         my $curr = $self->start->id;
 851         my $ranked = $self->end->has_rank;
 852         while( $curr ne $self->end->id ) {
 853                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 854                 my @succ = sort { $self->path_witnesses( $curr, $a )
 855                                                         <=> $self->path_witnesses( $curr, $b ) }
 856                         $self->sequence->successors( $curr );
 857                 my $next = pop @succ;
 858                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 859                 # Try to avoid lacunae in the weighted path.
 860                 while( @succ &&
 861                            ( $self->reading( $next )->is_lacuna ||
 862                                  $nextrank - $rank > 1 ) ){
 863                         $next = pop @succ;
 864                 }
 865                 $weighted->{$curr} = $next;
 866                 $curr = $next;
 867         }
 868         return $weighted;
 869 }
 870
 871 =head2 path_witnesses( $edge )
 872
 873 Returns the list of sigils whose witnesses are associated with the given edge.
 874 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 875
 876 =cut
 877
 878 sub path_witnesses {
 879         my( $self, @edge ) = @_;
 880         # If edge is an arrayref, cope.
 881         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 882                 my $e = shift @edge;
 883                 @edge = @$e;
 884         }
 885         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 886         return @wits;
 887 }
 888
 889 # Helper function. Make a display label for the given witnesses, showing a.c.
 890 # witnesses only where the main witness is not also in the list.
 891 sub _path_display_label {
 892         my $self = shift;
 893         my %wits;
 894         map { $wits{$_} = 1 } @_;
 895
 896         # If an a.c. wit is listed, remove it if the main wit is also listed.
 897         # Otherwise keep it for explicit listing.
 898         my $aclabel = $self->ac_label;
 899         my @disp_ac;
 900         foreach my $w ( sort keys %wits ) {
 901                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
 902                         if( exists $wits{$1} ) {
 903                                 delete $wits{$w};
 904                         } else {
 905                                 push( @disp_ac, $w );
 906                         }
 907                 }
 908         }
 909
 910         # See if we are in a majority situation.
 911         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 912         $maj = $maj > 5 ? $maj : 5;
 913         if( scalar keys %wits > $maj ) {
 914                 unshift( @disp_ac, 'majority' );
 915                 return join( ', ', @disp_ac );
 916         } else {
 917                 return join( ', ', sort keys %wits );
 918         }
 919 }
 920
 921 =head2 readings_at_rank( $rank )
 922
 923 Returns a list of readings at a given rank, taken from the alignment table.
 924
 925 =cut
 926
 927 sub readings_at_rank {
 928         my( $self, $rank ) = @_;
 929         my $table = $self->alignment_table;
 930         # Table rank is real rank - 1.
 931         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
 932         my %readings;
 933         foreach my $e ( @elements ) {
 934                 next unless ref( $e ) eq 'HASH';
 935                 next unless exists $e->{'t'};
 936                 $readings{$e->{'t'}->id} = $e->{'t'};
 937         }
 938         return values %readings;
 939 }
 940
 941 =head2 as_graphml
 942
 943 Returns a GraphML representation of the collation.  The GraphML will contain
 944 two graphs. The first expresses the attributes of the readings and the witness
 945 paths that link them; the second expresses the relationships that link the
 946 readings.  This is the native transfer format for a tradition.
 947
 948 =begin testing
 949
 950 use Text::Tradition;
 951
 952 my $READINGS = 311;
 953 my $PATHS = 361;
 954
 955 my $datafile = 't/data/florilegium_tei_ps.xml';
 956 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 957                                       'name' => 'test0',
 958                                       'file' => $datafile,
 959                                       'linear' => 1 );
 960
 961 ok( $tradition, "Got a tradition object" );
 962 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 963 ok( $tradition->collation, "Tradition has a collation" );
 964
 965 my $c = $tradition->collation;
 966 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 967 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 968 is( scalar $c->relationships, 0, "Collation has all relationships" );
 969
 970 # Add a few relationships
 971 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 972 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 973 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 974
 975 # Now write it to GraphML and parse it again.
 976
 977 my $graphml = $c->as_graphml;
 978 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 979 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 980 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 981 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 982
 983 # Now add a stemma, write to GraphML, and look at the output.
 984 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
 985 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
 986 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
 987 $graphml = $c->as_graphml;
 988 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
 989
 990 # Now add a user, write to GraphML, and look at the output.
 991 unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" );
 992 my $testuser = Text::Tradition::User->new(
 993         id => 'testuser', password => 'testpass' );
 994 is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" );
 995 $testuser->add_tradition( $tradition );
 996 is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" );
 997 $graphml = $c->as_graphml;
 998 like( $graphml, qr/testuser/, "Test user name now exists in GraphML" );
 999
1000 =end testing
1001
1002 =cut
1003
1004 sub as_graphml {
1005     my( $self, $options ) = @_;
1006         $self->calculate_ranks unless $self->_graphcalc_done;
1007
1008         my $start = $options->{'from'}
1009                 ? $self->reading( $options->{'from'} ) : $self->start;
1010         my $end = $options->{'to'}
1011                 ? $self->reading( $options->{'to'} ) : $self->end;
1012         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1013                 throw( 'Start node must be before end node' );
1014         }
1015         # The readings need to be ranked for this to work.
1016         $start = $self->start unless $start->has_rank;
1017         $end = $self->end unless $end->has_rank;
1018         my $rankoffset = 0;
1019         unless( $start eq $self->start ) {
1020                 $rankoffset = $start->rank - 1;
1021         }
1022         my %use_readings;
1023
1024     # Some namespaces
1025     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1026     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1027     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1028         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1029
1030     # Create the document and root node
1031     require XML::LibXML;
1032     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1033     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1034     $graphml->setDocumentElement( $root );
1035     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1036     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1037
1038     # List of attribute types to save on our objects and their corresponding
1039     # GraphML types
1040     my %save_types = (
1041         'Str' => 'string',
1042         'Int' => 'int',
1043         'Bool' => 'boolean',
1044         'ReadingID' => 'string',
1045         'RelationshipType' => 'string',
1046         'RelationshipScope' => 'string',
1047     );
1048
1049     # List of attribute names *not* to save on our objects.
1050     # We will also not save any attribute beginning with _.
1051     my %skipsave;
1052     map { $skipsave{$_} = 1 } qw/ cached_svg /;
1053
1054     # Add the data keys for the graph. Include an extra key 'version' for the
1055     # GraphML output version.
1056     my %graph_data_keys;
1057     my $gdi = 0;
1058     my %graph_attributes = ( 'version' => 'string' );
1059         # Graph attributes include those of Tradition and those of Collation.
1060         my %gattr_from;
1061         my $tmeta = $self->tradition->meta;
1062         my $cmeta = $self->meta;
1063         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1064         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1065         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1066                 next if $attr->name =~ /^_/;
1067                 next if $skipsave{$attr->name};
1068                 next unless $save_types{$attr->type_constraint->name};
1069                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1070         }
1071     # Extra custom keys for complex objects that should be saved in some form.
1072     # The subroutine should return a string, or undef/empty.
1073     $graph_attributes{'stemmata'} = sub {
1074         my @stemstrs;
1075                 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1076                         $self->tradition->stemmata;
1077                 join( "\n", @stemstrs );
1078         };
1079     $graph_attributes{'user'} = sub {
1080         $self->tradition->user ? $self->tradition->user->id : undef
1081     };
1082
1083     foreach my $datum ( sort keys %graph_attributes ) {
1084         $graph_data_keys{$datum} = 'dg'.$gdi++;
1085         my $key = $root->addNewChild( $graphml_ns, 'key' );
1086         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1087                 : $graph_attributes{$datum};
1088         $key->setAttribute( 'attr.name', $datum );
1089         $key->setAttribute( 'attr.type', $dtype );
1090         $key->setAttribute( 'for', 'graph' );
1091         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1092     }
1093
1094     # Add the data keys for reading nodes
1095     my %reading_attributes;
1096     my $rmeta = Text::Tradition::Collation::Reading->meta;
1097     foreach my $attr( $rmeta->get_all_attributes ) {
1098                 next if $attr->name =~ /^_/;
1099                 next if $skipsave{$attr->name};
1100                 next unless $save_types{$attr->type_constraint->name};
1101                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1102         }
1103         # Extra custom key for the reading morphology
1104         $reading_attributes{'lexemes'} = 'string';
1105
1106     my %node_data_keys;
1107     my $ndi = 0;
1108     foreach my $datum ( sort keys %reading_attributes ) {
1109         $node_data_keys{$datum} = 'dn'.$ndi++;
1110         my $key = $root->addNewChild( $graphml_ns, 'key' );
1111         $key->setAttribute( 'attr.name', $datum );
1112         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1113         $key->setAttribute( 'for', 'node' );
1114         $key->setAttribute( 'id', $node_data_keys{$datum} );
1115     }
1116
1117     # Add the data keys for edges, that is, paths and relationships. Path
1118     # data does not come from a Moose class so is here manually.
1119     my $edi = 0;
1120     my %edge_data_keys;
1121     my %edge_attributes = (
1122         witness => 'string',                    # ID/label for a path
1123         extra => 'boolean',                             # Path key
1124         );
1125     my @path_attributes = keys %edge_attributes; # track our manual additions
1126     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1127     foreach my $attr( $pmeta->get_all_attributes ) {
1128                 next if $attr->name =~ /^_/;
1129                 next if $skipsave{$attr->name};
1130                 next unless $save_types{$attr->type_constraint->name};
1131                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1132         }
1133     foreach my $datum ( sort keys %edge_attributes ) {
1134         $edge_data_keys{$datum} = 'de'.$edi++;
1135         my $key = $root->addNewChild( $graphml_ns, 'key' );
1136         $key->setAttribute( 'attr.name', $datum );
1137         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1138         $key->setAttribute( 'for', 'edge' );
1139         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1140     }
1141
1142     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1143     my $xmlidname = $self->tradition->name;
1144     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1145     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1146         $xmlidname = '_'.$xmlidname;
1147     }
1148     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1149     $sgraph->setAttribute( 'edgedefault', 'directed' );
1150     $sgraph->setAttribute( 'id', $xmlidname );
1151     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1152     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1153     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1154     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1155     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1156
1157     # Tradition/collation attribute data
1158     foreach my $datum ( keys %graph_attributes ) {
1159         my $value;
1160         if( $datum eq 'version' ) {
1161                 $value = '3.2';
1162         } elsif( ref( $graph_attributes{$datum} ) ) {
1163                 my $sub = $graph_attributes{$datum};
1164                 $value = &$sub();
1165         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1166                 $value = $self->tradition->$datum;
1167         } else {
1168                 $value = $self->$datum;
1169         }
1170                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1171         }
1172
1173     my $node_ctr = 0;
1174     my %node_hash;
1175     # Add our readings to the graph
1176     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1177         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1178                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1179         $use_readings{$n->id} = 1;
1180         # Add to the main graph
1181         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1182         my $node_xmlid = 'n' . $node_ctr++;
1183         $node_hash{ $n->id } = $node_xmlid;
1184         $node_el->setAttribute( 'id', $node_xmlid );
1185         foreach my $d ( keys %reading_attributes ) {
1186                 my $nval = $n->$d;
1187                 # Custom serialization
1188                 if( $d eq 'lexemes' ) {
1189                                 # If nval is a true value, we have lexemes so we need to
1190                                 # serialize them. Otherwise set nval to undef so that the
1191                                 # key is excluded from this reading.
1192                         $nval = $nval ? $n->_serialize_lexemes : undef;
1193                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1194                         $nval = undef;
1195                 }
1196                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1197                         # Adjust the ranks within the subgraph.
1198                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1199                                 : $nval - $rankoffset;
1200                 }
1201                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1202                         if defined $nval;
1203         }
1204     }
1205
1206     # Add the path edges to the sequence graph
1207     my $edge_ctr = 0;
1208     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1209         # We add an edge in the graphml for every witness in $e.
1210         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1211         my @edge_wits = sort $self->path_witnesses( $e );
1212         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1213         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1214         # Skip any path from start to end; that witness is not in the subgraph.
1215         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1216         foreach my $wit ( @edge_wits ) {
1217                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1218                                                                                 $node_hash{ $e->[0] },
1219                                                                                 $node_hash{ $e->[1] } );
1220                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1221                         $edge_el->setAttribute( 'source', $from );
1222                         $edge_el->setAttribute( 'target', $to );
1223                         $edge_el->setAttribute( 'id', $id );
1224
1225                         # It's a witness path, so add the witness
1226                         my $base = $wit;
1227                         my $key = $edge_data_keys{'witness'};
1228                         # Is this an ante-corr witness?
1229                         my $aclabel = $self->ac_label;
1230                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1231                                 # Keep the base witness
1232                                 $base = $1;
1233                                 # ...and record that this is an 'extra' reading path
1234                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1235                         }
1236                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1237                 }
1238         }
1239
1240         # Report the actual number of nodes and edges that went in
1241         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1242         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1243
1244         # Add the relationship graph to the XML
1245         map { delete $edge_data_keys{$_} } @path_attributes;
1246         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1247                 $node_data_keys{'id'}, \%edge_data_keys );
1248
1249     # Save and return the thing
1250     my $result = decode_utf8( $graphml->toString(1) );
1251     return $result;
1252 }
1253
1254 sub _add_graphml_data {
1255     my( $el, $key, $value ) = @_;
1256     return unless defined $value;
1257     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1258     $data_el->setAttribute( 'key', $key );
1259     $data_el->appendText( $value );
1260 }
1261
1262 =head2 as_csv
1263
1264 Returns a CSV alignment table representation of the collation graph, one
1265 row per witness (or witness uncorrected.)
1266
1267 =cut
1268
1269 sub as_csv {
1270     my( $self ) = @_;
1271     my $table = $self->alignment_table;
1272     my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1273     my @result;
1274     # Make the header row
1275     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1276         push( @result, decode_utf8( $csv->string ) );
1277     # Make the rest of the rows
1278     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1279         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1280         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1281         $csv->combine( @row );
1282         push( @result, decode_utf8( $csv->string ) );
1283     }
1284     return join( "\n", @result );
1285 }
1286
1287 =head2 alignment_table( $use_refs, $include_witnesses )
1288
1289 Return a reference to an alignment table, in a slightly enhanced CollateX
1290 format which looks like this:
1291
1292  $table = { alignment => [ { witness => "SIGIL",
1293                              tokens => [ { t => "TEXT" }, ... ] },
1294                            { witness => "SIG2",
1295                              tokens => [ { t => "TEXT" }, ... ] },
1296                            ... ],
1297             length => TEXTLEN };
1298
1299 If $use_refs is set to 1, the reading object is returned in the table
1300 instead of READINGTEXT; if not, the text of the reading is returned.
1301
1302 If $include_witnesses is set to a hashref, only the witnesses whose sigil
1303 keys have a true hash value will be included.
1304
1305 =cut
1306
1307 sub alignment_table {
1308     my( $self ) = @_;
1309     $self->calculate_ranks() unless $self->_graphcalc_done;
1310     return $self->cached_table if $self->has_cached_table;
1311
1312     # Make sure we can do this
1313         throw( "Need a linear graph in order to make an alignment table" )
1314                 unless $self->linear;
1315         $self->calculate_ranks unless $self->end->has_rank;
1316
1317     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1318     my @all_pos = ( 1 .. $self->end->rank - 1 );
1319     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1320         # say STDERR "Making witness row(s) for " . $wit->sigil;
1321         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1322         my @row = _make_witness_row( \@wit_path, \@all_pos );
1323         push( @{$table->{'alignment'}},
1324                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1325         if( $wit->is_layered ) {
1326                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1327                         $wit->sigil.$self->ac_label );
1328             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1329                         push( @{$table->{'alignment'}},
1330                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
1331         }
1332     }
1333     $self->cached_table( $table );
1334     return $table;
1335 }
1336
1337 sub _make_witness_row {
1338     my( $path, $positions ) = @_;
1339     my %char_hash;
1340     map { $char_hash{$_} = undef } @$positions;
1341     my $debug = 0;
1342     foreach my $rdg ( @$path ) {
1343         my $rtext = $rdg->text;
1344         $rtext = '#LACUNA#' if $rdg->is_lacuna;
1345         say STDERR "rank " . $rdg->rank if $debug;
1346         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1347         $char_hash{$rdg->rank} = { 't' => $rdg };
1348     }
1349     my @row = map { $char_hash{$_} } @$positions;
1350     # Fill in lacuna markers for undef spots in the row
1351     my $last_el = shift @row;
1352     my @filled_row = ( $last_el );
1353     foreach my $el ( @row ) {
1354         # If we are using node reference, make the lacuna node appear many times
1355         # in the table.  If not, use the lacuna tag.
1356         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1357             $el = $last_el;
1358         }
1359         push( @filled_row, $el );
1360         $last_el = $el;
1361     }
1362     return @filled_row;
1363 }
1364
1365 =head1 NAVIGATION METHODS
1366
1367 =head2 reading_sequence( $first, $last, $sigil, $backup )
1368
1369 Returns the ordered list of readings, starting with $first and ending
1370 with $last, for the witness given in $sigil. If a $backup sigil is
1371 specified (e.g. when walking a layered witness), it will be used wherever
1372 no $sigil path exists.  If there is a base text reading, that will be
1373 used wherever no path exists for $sigil or $backup.
1374
1375 =cut
1376
1377 # TODO Think about returning some lazy-eval iterator.
1378 # TODO Get rid of backup; we should know from what witness is whether we need it.
1379
1380 sub reading_sequence {
1381     my( $self, $start, $end, $witness ) = @_;
1382
1383     $witness = $self->baselabel unless $witness;
1384     my @readings = ( $start );
1385     my %seen;
1386     my $n = $start;
1387     while( $n && $n->id ne $end->id ) {
1388         if( exists( $seen{$n->id} ) ) {
1389             throw( "Detected loop for $witness at " . $n->id );
1390         }
1391         $seen{$n->id} = 1;
1392
1393         my $next = $self->next_reading( $n, $witness );
1394         unless( $next ) {
1395             throw( "Did not find any path for $witness from reading " . $n->id );
1396         }
1397         push( @readings, $next );
1398         $n = $next;
1399     }
1400     # Check that the last reading is our end reading.
1401     my $last = $readings[$#readings];
1402     throw( "Last reading found from " . $start->text .
1403         " for witness $witness is not the end!" ) # TODO do we get this far?
1404         unless $last->id eq $end->id;
1405
1406     return @readings;
1407 }
1408
1409 =head2 next_reading( $reading, $sigil );
1410
1411 Returns the reading that follows the given reading along the given witness
1412 path.
1413
1414 =cut
1415
1416 sub next_reading {
1417     # Return the successor via the corresponding path.
1418     my $self = shift;
1419     my $answer = $self->_find_linked_reading( 'next', @_ );
1420         return undef unless $answer;
1421     return $self->reading( $answer );
1422 }
1423
1424 =head2 prior_reading( $reading, $sigil )
1425
1426 Returns the reading that precedes the given reading along the given witness
1427 path.
1428
1429 =cut
1430
1431 sub prior_reading {
1432     # Return the predecessor via the corresponding path.
1433     my $self = shift;
1434     my $answer = $self->_find_linked_reading( 'prior', @_ );
1435     return $self->reading( $answer );
1436 }
1437
1438 sub _find_linked_reading {
1439     my( $self, $direction, $node, $path ) = @_;
1440
1441     # Get a backup if we are dealing with a layered witness
1442     my $alt_path;
1443     my $aclabel = $self->ac_label;
1444     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1445         $alt_path = $1;
1446     }
1447
1448     my @linked_paths = $direction eq 'next'
1449         ? $self->sequence->edges_from( $node )
1450         : $self->sequence->edges_to( $node );
1451     return undef unless scalar( @linked_paths );
1452
1453     # We have to find the linked path that contains all of the
1454     # witnesses supplied in $path.
1455     my( @path_wits, @alt_path_wits );
1456     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1457     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1458     my $base_le;
1459     my $alt_le;
1460     foreach my $le ( @linked_paths ) {
1461         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1462             $base_le = $le;
1463         }
1464                 my @le_wits = sort $self->path_witnesses( $le );
1465                 if( _is_within( \@path_wits, \@le_wits ) ) {
1466                         # This is the right path.
1467                         return $direction eq 'next' ? $le->[1] : $le->[0];
1468                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1469                         $alt_le = $le;
1470                 }
1471     }
1472     # Got this far? Return the alternate path if it exists.
1473     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1474         if $alt_le;
1475
1476     # Got this far? Return the base path if it exists.
1477     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1478         if $base_le;
1479
1480     # Got this far? We have no appropriate path.
1481     warn "Could not find $direction node from " . $node->id
1482         . " along path $path";
1483     return undef;
1484 }
1485
1486 # Some set logic.
1487 sub _is_within {
1488     my( $set1, $set2 ) = @_;
1489     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1490     foreach my $el ( @$set1 ) {
1491         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1492     }
1493     return $ret;
1494 }
1495
1496 # Return the string that joins together a list of witnesses for
1497 # display on a single path.
1498 sub _witnesses_of_label {
1499     my( $self, $label ) = @_;
1500     my $regex = $self->wit_list_separator;
1501     my @answer = split( /\Q$regex\E/, $label );
1502     return @answer;
1503 }
1504
1505 =head2 common_readings
1506
1507 Returns the list of common readings in the graph (i.e. those readings that are
1508 shared by all non-lacunose witnesses.)
1509
1510 =cut
1511
1512 sub common_readings {
1513         my $self = shift;
1514         my @common = grep { $_->is_common } $self->readings;
1515         return @common;
1516 }
1517
1518 =head2 path_text( $sigil, [, $start, $end ] )
1519
1520 Returns the text of a witness (plus its backup, if we are using a layer)
1521 as stored in the collation.  The text is returned as a string, where the
1522 individual readings are joined with spaces and the meta-readings (e.g.
1523 lacunae) are omitted.  Optional specification of $start and $end allows
1524 the generation of a subset of the witness text.
1525
1526 =cut
1527
1528 sub path_text {
1529         my( $self, $wit, $start, $end ) = @_;
1530         $start = $self->start unless $start;
1531         $end = $self->end unless $end;
1532         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1533         my $pathtext = '';
1534         my $last;
1535         foreach my $r ( @path ) {
1536                 unless ( $r->join_prior || !$last || $last->join_next ) {
1537                         $pathtext .= ' ';
1538                 }
1539                 $pathtext .= $r->text;
1540                 $last = $r;
1541         }
1542         return $pathtext;
1543 }
1544
1545 =head1 INITIALIZATION METHODS
1546
1547 These are mostly for use by parsers.
1548
1549 =head2 make_witness_path( $witness )
1550
1551 Link the array of readings contained in $witness->path (and in
1552 $witness->uncorrected_path if it exists) into collation paths.
1553 Clear out the arrays when finished.
1554
1555 =head2 make_witness_paths
1556
1557 Call make_witness_path for all witnesses in the tradition.
1558
1559 =cut
1560
1561 # For use when a collation is constructed from a base text and an apparatus.
1562 # We have the sequences of readings and just need to add path edges.
1563 # When we are done, clear out the witness path attributes, as they are no
1564 # longer needed.
1565 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1566
1567 sub make_witness_paths {
1568     my( $self ) = @_;
1569     foreach my $wit ( $self->tradition->witnesses ) {
1570         # say STDERR "Making path for " . $wit->sigil;
1571         $self->make_witness_path( $wit );
1572     }
1573 }
1574
1575 sub make_witness_path {
1576     my( $self, $wit ) = @_;
1577     my @chain = @{$wit->path};
1578     my $sig = $wit->sigil;
1579     # Add start and end if necessary
1580     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1581     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1582     foreach my $idx ( 0 .. $#chain-1 ) {
1583         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1584     }
1585     if( $wit->is_layered ) {
1586         @chain = @{$wit->uncorrected_path};
1587                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1588                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1589         foreach my $idx( 0 .. $#chain-1 ) {
1590             my $source = $chain[$idx];
1591             my $target = $chain[$idx+1];
1592             $self->add_path( $source, $target, $sig.$self->ac_label )
1593                 unless $self->has_path( $source, $target, $sig );
1594         }
1595     }
1596     $wit->clear_path;
1597     $wit->clear_uncorrected_path;
1598 }
1599
1600 =head2 calculate_ranks
1601
1602 Calculate the reading ranks (that is, their aligned positions relative
1603 to each other) for the graph.  This can only be called on linear collations.
1604
1605 =begin testing
1606
1607 use Text::Tradition;
1608
1609 my $cxfile = 't/data/Collatex-16.xml';
1610 my $t = Text::Tradition->new(
1611     'name'  => 'inline',
1612     'input' => 'CollateX',
1613     'file'  => $cxfile,
1614     );
1615 my $c = $t->collation;
1616
1617 # Make an svg
1618 my $table = $c->alignment_table;
1619 ok( $c->has_cached_table, "Alignment table was cached" );
1620 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1621 $c->calculate_ranks;
1622 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1623 $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
1624 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
1625
1626 =end testing
1627
1628 =cut
1629
1630 sub calculate_ranks {
1631     my $self = shift;
1632     # Save the existing ranks, in case we need to invalidate the cached SVG.
1633     my %existing_ranks;
1634     map { $existing_ranks{$_} = $_->rank } $self->readings;
1635
1636     # Do the rankings based on the relationship equivalence graph, starting
1637     # with the start node.
1638     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1639
1640     # Transfer our rankings from the topological graph to the real one.
1641     foreach my $r ( $self->readings ) {
1642         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1643             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1644         } else {
1645                 # Die. Find the last rank we calculated.
1646                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1647                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1648                         $self->readings;
1649                 my $last = pop @all_defined;
1650             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1651         }
1652     }
1653     # Do we need to invalidate the cached data?
1654     if( $self->has_cached_svg || $self->has_cached_table ) {
1655         foreach my $r ( $self->readings ) {
1656                 next if defined( $existing_ranks{$r} )
1657                         && $existing_ranks{$r} == $r->rank;
1658                 # Something has changed, so clear the cache
1659                 $self->_clear_cache;
1660                         # ...and recalculate the common readings.
1661                         $self->calculate_common_readings();
1662                 last;
1663         }
1664     }
1665         # The graph calculation information is now up to date.
1666         $self->_graphcalc_done(1);
1667 }
1668
1669 sub _clear_cache {
1670         my $self = shift;
1671         $self->wipe_svg if $self->has_cached_svg;
1672         $self->wipe_table if $self->has_cached_table;
1673 }
1674
1675
1676 =head2 flatten_ranks
1677
1678 A convenience method for parsing collation data.  Searches the graph for readings
1679 with the same text at the same rank, and merges any that are found.
1680
1681 =cut
1682
1683 sub flatten_ranks {
1684     my $self = shift;
1685     my %unique_rank_rdg;
1686     my $changed;
1687     foreach my $rdg ( $self->readings ) {
1688         next unless $rdg->has_rank;
1689         my $key = $rdg->rank . "||" . $rdg->text;
1690         if( exists $unique_rank_rdg{$key} ) {
1691                 # Make sure they don't have different grammatical forms
1692                         my $ur = $unique_rank_rdg{$key};
1693                         if( $rdg->disambiguated && $ur->disambiguated ) {
1694                                 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1695                                 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1696                                 next unless $rform eq $uform;
1697                         } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1698                                 next;
1699                         }
1700             # Combine!
1701                 #say STDERR "Combining readings at same rank: $key";
1702                 $changed = 1;
1703             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1704             # TODO see if this now makes a common point.
1705         } else {
1706             $unique_rank_rdg{$key} = $rdg;
1707         }
1708     }
1709     # If we merged readings, the ranks are still fine but the alignment
1710     # table is wrong. Wipe it.
1711     $self->wipe_table() if $changed;
1712 }
1713
1714
1715 =head2 calculate_common_readings
1716
1717 Goes through the graph identifying the readings that appear in every witness
1718 (apart from those with lacunae at that spot.) Marks them as common and returns
1719 the list.
1720
1721 =begin testing
1722
1723 use Text::Tradition;
1724
1725 my $cxfile = 't/data/Collatex-16.xml';
1726 my $t = Text::Tradition->new(
1727     'name'  => 'inline',
1728     'input' => 'CollateX',
1729     'file'  => $cxfile,
1730     );
1731 my $c = $t->collation;
1732
1733 my @common = $c->calculate_common_readings();
1734 is( scalar @common, 8, "Found correct number of common readings" );
1735 my @marked = sort $c->common_readings();
1736 is( scalar @common, 8, "All common readings got marked as such" );
1737 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1738 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1739
1740 =end testing
1741
1742 =cut
1743
1744 sub calculate_common_readings {
1745         my $self = shift;
1746         my @common;
1747         map { $_->is_common( 0 ) } $self->readings;
1748         # Implicitly calls calculate_ranks
1749         my $table = $self->alignment_table;
1750         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1751                 my @row = map { $_->{'tokens'}->[$idx]
1752                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1753                                         @{$table->{'alignment'}};
1754                 my %hash;
1755                 foreach my $r ( @row ) {
1756                         if( $r ) {
1757                                 $hash{$r->id} = $r unless $r->is_meta;
1758                         } else {
1759                                 $hash{'UNDEF'} = $r;
1760                         }
1761                 }
1762                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1763                         my( $r ) = values %hash;
1764                         $r->is_common( 1 );
1765                         push( @common, $r );
1766                 }
1767         }
1768         return @common;
1769 }
1770
1771 =head2 text_from_paths
1772
1773 Calculate the text array for all witnesses from the path, for later consistency
1774 checking.  Only to be used if there is no non-graph-based way to know the
1775 original texts.
1776
1777 =cut
1778
1779 sub text_from_paths {
1780         my $self = shift;
1781     foreach my $wit ( $self->tradition->witnesses ) {
1782         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1783         my @text;
1784         foreach my $r ( @readings ) {
1785                 next if $r->is_meta;
1786                 push( @text, $r->text );
1787         }
1788         $wit->text( \@text );
1789         if( $wit->is_layered ) {
1790                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1791                                                                                                   $wit->sigil.$self->ac_label );
1792                         my @uctext;
1793                         foreach my $r ( @ucrdgs ) {
1794                                 next if $r->is_meta;
1795                                 push( @uctext, $r->text );
1796                         }
1797                         $wit->layertext( \@uctext );
1798         }
1799     }
1800 }
1801
1802 =head1 UTILITY FUNCTIONS
1803
1804 =head2 common_predecessor( $reading_a, $reading_b )
1805
1806 Find the last reading that occurs in sequence before both the given readings.
1807 At the very least this should be $self->start.
1808
1809 =head2 common_successor( $reading_a, $reading_b )
1810
1811 Find the first reading that occurs in sequence after both the given readings.
1812 At the very least this should be $self->end.
1813
1814 =begin testing
1815
1816 use Text::Tradition;
1817
1818 my $cxfile = 't/data/Collatex-16.xml';
1819 my $t = Text::Tradition->new(
1820     'name'  => 'inline',
1821     'input' => 'CollateX',
1822     'file'  => $cxfile,
1823     );
1824 my $c = $t->collation;
1825
1826 is( $c->common_predecessor( 'n24', 'n23' )->id,
1827     'n20', "Found correct common predecessor" );
1828 is( $c->common_successor( 'n24', 'n23' )->id,
1829     '__END__', "Found correct common successor" );
1830
1831 is( $c->common_predecessor( 'n19', 'n17' )->id,
1832     'n16', "Found correct common predecessor for readings on same path" );
1833 is( $c->common_successor( 'n21', 'n10' )->id,
1834     '__END__', "Found correct common successor for readings on same path" );
1835
1836 =end testing
1837
1838 =cut
1839
1840 ## Return the closest reading that is a predecessor of both the given readings.
1841 sub common_predecessor {
1842         my $self = shift;
1843         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1844         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1845 }
1846
1847 sub common_successor {
1848         my $self = shift;
1849         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1850         return $self->_common_in_path( $r1, $r2, 'successors' );
1851 }
1852
1853
1854 # TODO think about how to do this without ranks...
1855 sub _common_in_path {
1856         my( $self, $r1, $r2, $dir ) = @_;
1857         my $iter = $self->end->rank;
1858         my @candidates;
1859         my @last_r1 = ( $r1 );
1860         my @last_r2 = ( $r2 );
1861         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1862         my %all_seen;
1863         # say STDERR "Finding common $dir for $r1, $r2";
1864         while( !@candidates ) {
1865                 last unless $iter--;  # Avoid looping infinitely
1866                 # Iterate separately down the graph from r1 and r2
1867                 my( @new_lc1, @new_lc2 );
1868                 foreach my $lc ( @last_r1 ) {
1869                         foreach my $p ( $lc->$dir ) {
1870                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1871                                         # say STDERR "Path candidate $p from $lc";
1872                                         push( @candidates, $p );
1873                                 } elsif( !$all_seen{$p->id} ) {
1874                                         $all_seen{$p->id} = 'r1';
1875                                         push( @new_lc1, $p );
1876                                 }
1877                         }
1878                 }
1879                 foreach my $lc ( @last_r2 ) {
1880                         foreach my $p ( $lc->$dir ) {
1881                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1882                                         # say STDERR "Path candidate $p from $lc";
1883                                         push( @candidates, $p );
1884                                 } elsif( !$all_seen{$p->id} ) {
1885                                         $all_seen{$p->id} = 'r2';
1886                                         push( @new_lc2, $p );
1887                                 }
1888                         }
1889                 }
1890                 @last_r1 = @new_lc1;
1891                 @last_r2 = @new_lc2;
1892         }
1893         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1894         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1895 }
1896
1897 sub throw {
1898         Text::Tradition::Error->throw(
1899                 'ident' => 'Collation error',
1900                 'message' => $_[0],
1901                 );
1902 }
1903
1904 no Moose;
1905 __PACKAGE__->meta->make_immutable;
1906
1907 =head1 LICENSE
1908
1909 This package is free software and is provided "as is" without express
1910 or implied warranty.  You can redistribute it and/or modify it under
1911 the same terms as Perl itself.
1912
1913 =head1 AUTHOR
1914
1915 Tara L Andrews E<lt>aurum@cpan.orgE<gt>