lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Encode qw( decode_utf8 );
   4 use File::Temp;
   5 use Graph;
   6 use IPC::Run qw( run binary );
   7 use Text::CSV_XS;
   8 use Text::Tradition::Collation::Reading;
   9 use Text::Tradition::Collation::RelationshipStore;
  10 use Text::Tradition::Error;
  11 use XML::LibXML;
  12 use XML::LibXML::XPathContext;
  13 use Moose;
  14
  15 has 'sequence' => (
  16     is => 'ro',
  17     isa => 'Graph',
  18     default => sub { Graph->new() },
  19     handles => {
  20         paths => 'edges',
  21     },
  22     );
  23
  24 has 'relations' => (
  25         is => 'ro',
  26         isa => 'Text::Tradition::Collation::RelationshipStore',
  27         handles => {
  28                 relationships => 'relationships',
  29                 related_readings => 'related_readings',
  30                 del_relationship => 'del_relationship',
  31         },
  32         writer => '_set_relations',
  33         );
  34
  35 has 'tradition' => (
  36     is => 'ro',
  37     isa => 'Text::Tradition',
  38     weak_ref => 1,
  39     );
  40
  41 has 'readings' => (
  42         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  43         traits => ['Hash'],
  44     handles => {
  45         reading     => 'get',
  46         _add_reading => 'set',
  47         del_reading => 'delete',
  48         has_reading => 'exists',
  49         readings   => 'values',
  50     },
  51     default => sub { {} },
  52         );
  53
  54 has 'wit_list_separator' => (
  55     is => 'rw',
  56     isa => 'Str',
  57     default => ', ',
  58     );
  59
  60 has 'baselabel' => (
  61     is => 'rw',
  62     isa => 'Str',
  63     default => 'base text',
  64     );
  65
  66 has 'linear' => (
  67     is => 'rw',
  68     isa => 'Bool',
  69     default => 1,
  70     );
  71
  72 has 'ac_label' => (
  73     is => 'rw',
  74     isa => 'Str',
  75     default => ' (a.c.)',
  76     );
  77
  78 has 'start' => (
  79         is => 'ro',
  80         isa => 'Text::Tradition::Collation::Reading',
  81         writer => '_set_start',
  82         weak_ref => 1,
  83         );
  84
  85 has 'end' => (
  86         is => 'ro',
  87         isa => 'Text::Tradition::Collation::Reading',
  88         writer => '_set_end',
  89         weak_ref => 1,
  90         );
  91
  92 has 'cached_svg' => (
  93         is => 'rw',
  94         isa => 'Str',
  95         predicate => 'has_cached_svg',
  96         clearer => 'wipe_svg',
  97         );
  98
  99 has 'cached_table' => (
 100         is => 'rw',
 101         isa => 'HashRef',
 102         predicate => 'has_cached_table',
 103         clearer => 'wipe_table',
 104         );
 105
 106 =head1 NAME
 107
 108 Text::Tradition::Collation - a software model for a text collation
 109
 110 =head1 SYNOPSIS
 111
 112   use Text::Tradition;
 113   my $t = Text::Tradition->new(
 114     'name' => 'this is a text',
 115     'input' => 'TEI',
 116     'file' => '/path/to/tei_parallel_seg_file.xml' );
 117
 118   my $c = $t->collation;
 119   my @readings = $c->readings;
 120   my @paths = $c->paths;
 121   my @relationships = $c->relationships;
 122
 123   my $svg_variant_graph = $t->collation->as_svg();
 124
 125 =head1 DESCRIPTION
 126
 127 Text::Tradition is a library for representation and analysis of collated
 128 texts, particularly medieval ones.  The Collation is the central feature of
 129 a Tradition, where the text, its sequence of readings, and its relationships
 130 between readings are actually kept.
 131
 132 =head1 CONSTRUCTOR
 133
 134 =head2 new
 135
 136 The constructor.  Takes a hash or hashref of the following arguments:
 137
 138 =over
 139
 140 =item * tradition - The Text::Tradition object to which the collation
 141 belongs. Required.
 142
 143 =item * linear - Whether the collation should be linear; that is, whether
 144 transposed readings should be treated as two linked readings rather than one,
 145 and therefore whether the collation graph is acyclic.  Defaults to true.
 146
 147 =item * baselabel - The default label for the path taken by a base text
 148 (if any). Defaults to 'base text'.
 149
 150 =item * wit_list_separator - The string to join a list of witnesses for
 151 purposes of making labels in display graphs.  Defaults to ', '.
 152
 153 =item * ac_label - The extra label to tack onto a witness sigil when
 154 representing another layer of path for the given witness - that is, when
 155 a text has more than one possible reading due to scribal corrections or
 156 the like.  Defaults to ' (a.c.)'.
 157
 158 =back
 159
 160 =head1 ACCESSORS
 161
 162 =head2 tradition
 163
 164 =head2 linear
 165
 166 =head2 wit_list_separator
 167
 168 =head2 baselabel
 169
 170 =head2 ac_label
 171
 172 Simple accessors for collation attributes.
 173
 174 =head2 start
 175
 176 The meta-reading at the start of every witness path.
 177
 178 =head2 end
 179
 180 The meta-reading at the end of every witness path.
 181
 182 =head2 readings
 183
 184 Returns all Reading objects in the graph.
 185
 186 =head2 reading( $id )
 187
 188 Returns the Reading object corresponding to the given ID.
 189
 190 =head2 add_reading( $reading_args )
 191
 192 Adds a new reading object to the collation.
 193 See L<Text::Tradition::Collation::Reading> for the available arguments.
 194
 195 =head2 del_reading( $object_or_id )
 196
 197 Removes the given reading from the collation, implicitly removing its
 198 paths and relationships.
 199
 200 =head2 merge_readings( $main, $second )
 201
 202 Merges the $second reading into the $main one.
 203 The arguments may be either readings or reading IDs.
 204
 205 =head2 has_reading( $id )
 206
 207 Predicate to see whether a given reading ID is in the graph.
 208
 209 =head2 reading_witnesses( $object_or_id )
 210
 211 Returns a list of sigils whose witnesses contain the reading.
 212
 213 =head2 paths
 214
 215 Returns all reading paths within the document - that is, all edges in the
 216 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 217
 218 =head2 add_path( $source, $target, $sigil )
 219
 220 Links the given readings in the collation in sequence, under the given witness
 221 sigil.  The readings may be specified by object or ID.
 222
 223 =head2 del_path( $source, $target, $sigil )
 224
 225 Links the given readings in the collation in sequence, under the given witness
 226 sigil.  The readings may be specified by object or ID.
 227
 228 =head2 has_path( $source, $target );
 229
 230 Returns true if the two readings are linked in sequence in any witness.
 231 The readings may be specified by object or ID.
 232
 233 =head2 relationships
 234
 235 Returns all Relationship objects in the collation.
 236
 237 =head2 add_relationship( $reading, $other_reading, $options )
 238
 239 Adds a new relationship of the type given in $options between the two readings,
 240 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 241 where $status is true on success, and @vectors is a list of relationship edges
 242 that were ultimately added.
 243 See L<Text::Tradition::Collation::Relationship> for the available options.
 244
 245 =cut
 246
 247 sub BUILD {
 248     my $self = shift;
 249     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 250     $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
 251     $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 252 }
 253
 254 ### Reading construct/destruct functions
 255
 256 sub _clear_cache {
 257         my $self = shift;
 258         $self->wipe_svg if $self->has_cached_svg;
 259         $self->wipe_table if $self->has_cached_table;
 260 }
 261
 262 sub add_reading {
 263         my( $self, $reading ) = @_;
 264         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 265                 my %args = %$reading;
 266                 $reading = Text::Tradition::Collation::Reading->new(
 267                         'collation' => $self,
 268                         %args );
 269         }
 270         # First check to see if a reading with this ID exists.
 271         if( $self->reading( $reading->id ) ) {
 272                 throw( "Collation already has a reading with id " . $reading->id );
 273         }
 274         $self->_clear_cache;
 275         $self->_add_reading( $reading->id => $reading );
 276         # Once the reading has been added, put it in both graphs.
 277         $self->sequence->add_vertex( $reading->id );
 278         $self->relations->add_reading( $reading->id );
 279         return $reading;
 280 };
 281
 282 around del_reading => sub {
 283         my $orig = shift;
 284         my $self = shift;
 285         my $arg = shift;
 286
 287         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 288                 $arg = $arg->id;
 289         }
 290         # Remove the reading from the graphs.
 291         $self->_clear_cache;
 292         $self->sequence->delete_vertex( $arg );
 293         $self->relations->delete_reading( $arg );
 294
 295         # Carry on.
 296         $self->$orig( $arg );
 297 };
 298
 299 # merge_readings( $main, $to_be_deleted );
 300
 301 sub merge_readings {
 302         my $self = shift;
 303
 304         # We only need the IDs for adding paths to the graph, not the reading
 305         # objects themselves.
 306     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
 307         $self->_clear_cache;
 308
 309     # The kept reading should inherit the paths and the relationships
 310     # of the deleted reading.
 311         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 312                 my @vector = ( $kept );
 313                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 314                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 315                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 316                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 317                 $self->sequence->add_edge( @vector );
 318                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 319                 @wits{keys %$fwits} = values %$fwits;
 320                 $self->sequence->set_edge_attributes( @vector, \%wits );
 321         }
 322         $self->relations->merge_readings( $kept, $deleted, $combine_char );
 323
 324         # Do the deletion deed.
 325         if( $combine_char ) {
 326                 my $kept_obj = $self->reading( $kept );
 327                 my $new_text = join( $combine_char, $kept_obj->text,
 328                         $self->reading( $deleted )->text );
 329                 $kept_obj->alter_text( $new_text );
 330         }
 331         $self->del_reading( $deleted );
 332 }
 333
 334
 335 # Helper function for manipulating the graph.
 336 sub _stringify_args {
 337         my( $self, $first, $second, $arg ) = @_;
 338     $first = $first->id
 339         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 340     $second = $second->id
 341         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 342     return( $first, $second, $arg );
 343 }
 344
 345 # Helper function for manipulating the graph.
 346 sub _objectify_args {
 347         my( $self, $first, $second, $arg ) = @_;
 348     $first = $self->reading( $first )
 349         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 350     $second = $self->reading( $second )
 351         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 352     return( $first, $second, $arg );
 353 }
 354 ### Path logic
 355
 356 sub add_path {
 357         my $self = shift;
 358
 359         # We only need the IDs for adding paths to the graph, not the reading
 360         # objects themselves.
 361     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 362
 363         $self->_clear_cache;
 364         # Connect the readings
 365     $self->sequence->add_edge( $source, $target );
 366     # Note the witness in question
 367     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 368 };
 369
 370 sub del_path {
 371         my $self = shift;
 372         my @args;
 373         if( ref( $_[0] ) eq 'ARRAY' ) {
 374                 my $e = shift @_;
 375                 @args = ( @$e, @_ );
 376         } else {
 377                 @args = @_;
 378         }
 379
 380         # We only need the IDs for adding paths to the graph, not the reading
 381         # objects themselves.
 382     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 383
 384         $self->_clear_cache;
 385         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 386                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 387         }
 388         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 389                 $self->sequence->delete_edge( $source, $target );
 390         }
 391 }
 392
 393
 394 # Extra graph-alike utility
 395 sub has_path {
 396         my $self = shift;
 397     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 398         return undef unless $self->sequence->has_edge( $source, $target );
 399         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 400 }
 401
 402 =head2 clear_witness( @sigil_list )
 403
 404 Clear the given witnesses out of the collation entirely, removing references
 405 to them in paths, and removing readings that belong only to them.  Should only
 406 be called via $tradition->del_witness.
 407
 408 =cut
 409
 410 sub clear_witness {
 411         my( $self, @sigils ) = @_;
 412
 413         $self->_clear_cache;
 414         # Clear the witness(es) out of the paths
 415         foreach my $e ( $self->paths ) {
 416                 foreach my $sig ( @sigils ) {
 417                         $self->del_path( $e, $sig );
 418                 }
 419         }
 420
 421         # Clear out the newly unused readings
 422         foreach my $r ( $self->readings ) {
 423                 unless( $self->reading_witnesses( $r ) ) {
 424                         $self->del_reading( $r );
 425                 }
 426         }
 427 }
 428
 429 sub add_relationship {
 430         my $self = shift;
 431     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 432     my( @vectors ) = $self->relations->add_relationship( $source,
 433         $self->reading( $source ), $target, $self->reading( $target ), $opts );
 434     # Force a full rank recalculation every time. Yuck.
 435     $self->calculate_ranks() if $self->end->has_rank;
 436         $self->_clear_cache;
 437     return @vectors;
 438 }
 439
 440 =head2 reading_witnesses( $reading )
 441
 442 Return a list of sigils corresponding to the witnesses in which the reading appears.
 443
 444 =cut
 445
 446 sub reading_witnesses {
 447         my( $self, $reading ) = @_;
 448         # We need only check either the incoming or the outgoing edges; I have
 449         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 450         if( $reading eq $self->start ) {
 451                 return map { $_->sigil } $self->tradition->witnesses;
 452         }
 453         my %all_witnesses;
 454         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 455                 my $wits = $self->sequence->get_edge_attributes( @$e );
 456                 @all_witnesses{ keys %$wits } = 1;
 457         }
 458         return keys %all_witnesses;
 459 }
 460
 461 =head1 OUTPUT METHODS
 462
 463 =head2 as_svg( \%options )
 464
 465 Returns an SVG string that represents the graph, via as_dot and graphviz.
 466 See as_dot for a list of options.
 467
 468 =cut
 469
 470 sub as_svg {
 471     my( $self, $opts ) = @_;
 472     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 473     if( !$self->has_cached_svg || $opts->{'recalc'}     || $want_subgraph ) {
 474                 my @cmd = qw/dot -Tsvg/;
 475                 my( $svg, $err );
 476                 my $dotfile = File::Temp->new();
 477                 ## USE FOR DEBUGGING
 478                 # $dotfile->unlink_on_destroy(0);
 479                 binmode $dotfile, ':utf8';
 480                 print $dotfile $self->as_dot( $opts );
 481                 push( @cmd, $dotfile->filename );
 482                 run( \@cmd, ">", binary(), \$svg );
 483                 $svg = decode_utf8( $svg );
 484                 $self->cached_svg( $svg ) unless $want_subgraph;
 485                 return $svg;
 486         } else {
 487                 return $self->cached_svg;
 488         }
 489 }
 490
 491
 492 =head2 as_dot( \%options )
 493
 494 Returns a string that is the collation graph expressed in dot
 495 (i.e. GraphViz) format.  Options include:
 496
 497 =over 4
 498
 499 =item * from
 500
 501 =item * to
 502
 503 =item * color_common
 504
 505 =back
 506
 507 =cut
 508
 509 sub as_dot {
 510     my( $self, $opts ) = @_;
 511     my $startrank = $opts->{'from'} if $opts;
 512     my $endrank = $opts->{'to'} if $opts;
 513     my $color_common = $opts->{'color_common'} if $opts;
 514     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 515        && $self->end->rank > 100;
 516
 517     # Check the arguments
 518     if( $startrank ) {
 519         return if $endrank && $startrank > $endrank;
 520         return if $startrank > $self->end->rank;
 521         }
 522         if( defined $endrank ) {
 523                 return if $endrank < 0;
 524                 $endrank = undef if $endrank == $self->end->rank;
 525         }
 526
 527     my $graph_name = $self->tradition->name;
 528     $graph_name =~ s/[^\w\s]//g;
 529     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 530
 531     my %graph_attrs = (
 532         'rankdir' => 'LR',
 533         'bgcolor' => 'none',
 534         );
 535     my %node_attrs = (
 536         'fontsize' => 14,
 537         'fillcolor' => 'white',
 538         'style' => 'filled',
 539         'shape' => 'ellipse'
 540         );
 541     my %edge_attrs = (
 542         'arrowhead' => 'open',
 543         'color' => '#000000',
 544         'fontcolor' => '#000000',
 545         );
 546
 547     my $dot = sprintf( "digraph %s {\n", $graph_name );
 548     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 549     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 550
 551         # Output substitute start/end readings if necessary
 552         if( $startrank ) {
 553                 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
 554         }
 555         if( $endrank ) {
 556                 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
 557         }
 558         if( $STRAIGHTENHACK ) {
 559                 ## HACK part 1
 560                 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
 561                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 562         }
 563         my %used;  # Keep track of the readings that actually appear in the graph
 564         # Sort the readings by rank if we have ranks; this speeds layout.
 565         my @all_readings = $self->end->has_rank
 566                 ? sort { $a->rank <=> $b->rank } $self->readings
 567                 : $self->readings;
 568         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 569     foreach my $reading ( @all_readings ) {
 570         # Only output readings within our rank range.
 571         next if $startrank && $reading->rank < $startrank;
 572         next if $endrank && $reading->rank > $endrank;
 573         $used{$reading->id} = 1;
 574         # Need not output nodes without separate labels
 575         next if $reading->id eq $reading->text;
 576         my $rattrs;
 577         my $label = $reading->text;
 578         $label =~ s/\"/\\\"/g;
 579                 $rattrs->{'label'} = $label;
 580                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 581         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 582     }
 583
 584         # Add the real edges. Need to weight one edge per rank jump, in a
 585         # continuous line.
 586         # my $weighted = $self->_add_edge_weights;
 587     my @edges = $self->paths;
 588         my( %substart, %subend );
 589     foreach my $edge ( @edges ) {
 590         # Do we need to output this edge?
 591         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 592                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 593                         my $variables = { %edge_attrs, 'label' => $label };
 594
 595                         # Account for the rank gap if necessary
 596                         my $rank0 = $self->reading( $edge->[0] )->rank
 597                                 if $self->reading( $edge->[0] )->has_rank;
 598                         my $rank1 = $self->reading( $edge->[1] )->rank
 599                                 if $self->reading( $edge->[1] )->has_rank;
 600                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 601                                 $variables->{'minlen'} = $rank1 - $rank0;
 602                         }
 603
 604                         # Add the calculated edge weights
 605                         # if( exists $weighted->{$edge->[0]}
 606                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 607                         #       # $variables->{'color'} = 'red';
 608                         #       $variables->{'weight'} = 3.0;
 609                         # }
 610
 611                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 612                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 613                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 614
 615                         my $varopts = _dot_attr_string( $variables );
 616                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 617                                 $edge->[0], $edge->[1], $varopts );
 618         } elsif( $used{$edge->[0]} ) {
 619                 $subend{$edge->[0]} = 1;
 620         } elsif( $used{$edge->[1]} ) {
 621                 $substart{$edge->[1]} = 1;
 622         }
 623     }
 624     # Add substitute start and end edges if necessary
 625     foreach my $node ( keys %substart ) {
 626         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 627         my $variables = { %edge_attrs, 'label' => $witstr };
 628         my $varopts = _dot_attr_string( $variables );
 629         $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
 630         }
 631     foreach my $node ( keys %subend ) {
 632         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 633         my $variables = { %edge_attrs, 'label' => $witstr };
 634         my $varopts = _dot_attr_string( $variables );
 635         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 636         }
 637         # HACK part 2
 638         if( $STRAIGHTENHACK ) {
 639                 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 640         }
 641
 642     $dot .= "}\n";
 643     return $dot;
 644 }
 645
 646 sub _dot_attr_string {
 647         my( $hash ) = @_;
 648         my @attrs;
 649         foreach my $k ( sort keys %$hash ) {
 650                 my $v = $hash->{$k};
 651                 push( @attrs, $k.'="'.$v.'"' );
 652         }
 653         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 654 }
 655
 656 sub _add_edge_weights {
 657         my $self = shift;
 658         # Walk the graph from START to END, choosing the successor node with
 659         # the largest number of witness paths each time.
 660         my $weighted = {};
 661         my $curr = $self->start->id;
 662         my $ranked = $self->end->has_rank;
 663         while( $curr ne $self->end->id ) {
 664                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 665                 my @succ = sort { $self->path_witnesses( $curr, $a )
 666                                                         <=> $self->path_witnesses( $curr, $b ) }
 667                         $self->sequence->successors( $curr );
 668                 my $next = pop @succ;
 669                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 670                 # Try to avoid lacunae in the weighted path.
 671                 while( @succ &&
 672                            ( $self->reading( $next )->is_lacuna ||
 673                                  $nextrank - $rank > 1 ) ){
 674                         $next = pop @succ;
 675                 }
 676                 $weighted->{$curr} = $next;
 677                 $curr = $next;
 678         }
 679         return $weighted;
 680 }
 681
 682 =head2 path_witnesses( $edge )
 683
 684 Returns the list of sigils whose witnesses are associated with the given edge.
 685 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 686
 687 =cut
 688
 689 sub path_witnesses {
 690         my( $self, @edge ) = @_;
 691         # If edge is an arrayref, cope.
 692         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 693                 my $e = shift @edge;
 694                 @edge = @$e;
 695         }
 696         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 697         return @wits;
 698 }
 699
 700 sub _path_display_label {
 701         my $self = shift;
 702         my @wits = sort @_;
 703         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 704         if( scalar @wits > $maj ) {
 705                 # TODO break out a.c. wits
 706                 return 'majority';
 707         } else {
 708                 return join( ', ', @wits );
 709         }
 710 }
 711
 712 =head2 witnesses_at_rank
 713
 714 Returns a list of witnesses that are not lacunose, for a given rank.
 715
 716 =cut
 717
 718 sub witnesses_at_rank {
 719         my( $self, $rank ) = @_;
 720 }
 721
 722 =head2 as_graphml
 723
 724 Returns a GraphML representation of the collation.  The GraphML will contain
 725 two graphs. The first expresses the attributes of the readings and the witness
 726 paths that link them; the second expresses the relationships that link the
 727 readings.  This is the native transfer format for a tradition.
 728
 729 =begin testing
 730
 731 use Text::Tradition;
 732
 733 my $READINGS = 311;
 734 my $PATHS = 361;
 735
 736 my $datafile = 't/data/florilegium_tei_ps.xml';
 737 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 738                                       'name' => 'test0',
 739                                       'file' => $datafile,
 740                                       'linear' => 1 );
 741
 742 ok( $tradition, "Got a tradition object" );
 743 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 744 ok( $tradition->collation, "Tradition has a collation" );
 745
 746 my $c = $tradition->collation;
 747 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 748 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 749 is( scalar $c->relationships, 0, "Collation has all relationships" );
 750
 751 # Add a few relationships
 752 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 753 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 754 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 755
 756 # Now write it to GraphML and parse it again.
 757
 758 my $graphml = $c->as_graphml;
 759 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 760 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 761 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 762 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 763
 764 =end testing
 765
 766 =cut
 767
 768 sub as_graphml {
 769     my( $self ) = @_;
 770
 771     # Some namespaces
 772     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 773     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 774     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 775         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 776
 777     # Create the document and root node
 778     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 779     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 780     $graphml->setDocumentElement( $root );
 781     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 782     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 783
 784     # Add the data keys for the graph
 785     my %graph_data_keys;
 786     my $gdi = 0;
 787     my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
 788     foreach my $datum ( @graph_attributes ) {
 789         $graph_data_keys{$datum} = 'dg'.$gdi++;
 790         my $key = $root->addNewChild( $graphml_ns, 'key' );
 791         $key->setAttribute( 'attr.name', $datum );
 792         $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
 793         $key->setAttribute( 'for', 'graph' );
 794         $key->setAttribute( 'id', $graph_data_keys{$datum} );
 795     }
 796
 797     # Add the data keys for nodes
 798     my %node_data_keys;
 799     my $ndi = 0;
 800     my %node_data = (
 801         id => 'string',
 802         text => 'string',
 803         rank => 'string',
 804         is_start => 'boolean',
 805         is_end => 'boolean',
 806         is_lacuna => 'boolean',
 807         );
 808     foreach my $datum ( keys %node_data ) {
 809         $node_data_keys{$datum} = 'dn'.$ndi++;
 810         my $key = $root->addNewChild( $graphml_ns, 'key' );
 811         $key->setAttribute( 'attr.name', $datum );
 812         $key->setAttribute( 'attr.type', $node_data{$datum} );
 813         $key->setAttribute( 'for', 'node' );
 814         $key->setAttribute( 'id', $node_data_keys{$datum} );
 815     }
 816
 817     # Add the data keys for edges, i.e. witnesses
 818     my $edi = 0;
 819     my %edge_data_keys;
 820     my %edge_data = (
 821         class => 'string',                              # Class, deprecated soon
 822         witness => 'string',                    # ID/label for a path
 823         relationship => 'string',               # ID/label for a relationship
 824         extra => 'boolean',                             # Path key
 825         scope => 'string',                              # Relationship key
 826         non_correctable => 'boolean',   # Relationship key
 827         non_independent => 'boolean',   # Relationship key
 828         );
 829     foreach my $datum ( keys %edge_data ) {
 830         $edge_data_keys{$datum} = 'de'.$edi++;
 831         my $key = $root->addNewChild( $graphml_ns, 'key' );
 832         $key->setAttribute( 'attr.name', $datum );
 833         $key->setAttribute( 'attr.type', $edge_data{$datum} );
 834         $key->setAttribute( 'for', 'edge' );
 835         $key->setAttribute( 'id', $edge_data_keys{$datum} );
 836     }
 837
 838     # Add the collation graph itself
 839     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
 840     $sgraph->setAttribute( 'edgedefault', 'directed' );
 841     $sgraph->setAttribute( 'id', $self->tradition->name );
 842     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
 843     $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
 844     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
 845     $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
 846     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
 847
 848     # Collation attribute data
 849     foreach my $datum ( @graph_attributes ) {
 850         my $value = $datum eq 'version' ? '3.0' : $self->$datum;
 851                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
 852         }
 853
 854     my $node_ctr = 0;
 855     my %node_hash;
 856     # Add our readings to the graph
 857     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
 858         # Add to the main graph
 859         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
 860         my $node_xmlid = 'n' . $node_ctr++;
 861         $node_hash{ $n->id } = $node_xmlid;
 862         $node_el->setAttribute( 'id', $node_xmlid );
 863         foreach my $d ( keys %node_data ) {
 864                 my $nval = $n->$d;
 865                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
 866                         if defined $nval;
 867         }
 868     }
 869
 870     # Add the path edges to the sequence graph
 871     my $edge_ctr = 0;
 872     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
 873         # We add an edge in the graphml for every witness in $e.
 874         foreach my $wit ( sort $self->path_witnesses( $e ) ) {
 875                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
 876                                                                                 $node_hash{ $e->[0] },
 877                                                                                 $node_hash{ $e->[1] } );
 878                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
 879                         $edge_el->setAttribute( 'source', $from );
 880                         $edge_el->setAttribute( 'target', $to );
 881                         $edge_el->setAttribute( 'id', $id );
 882
 883                         # It's a witness path, so add the witness
 884                         my $base = $wit;
 885                         my $key = $edge_data_keys{'witness'};
 886                         # Is this an ante-corr witness?
 887                         my $aclabel = $self->ac_label;
 888                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
 889                                 # Keep the base witness
 890                                 $base = $1;
 891                                 # ...and record that this is an 'extra' reading path
 892                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
 893                         }
 894                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
 895                         _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
 896                 }
 897         }
 898
 899         # Add the relationship graph to the XML
 900         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
 901                 $node_data_keys{'id'}, \%edge_data_keys );
 902
 903     # Save and return the thing
 904     my $result = decode_utf8( $graphml->toString(1) );
 905     return $result;
 906 }
 907
 908 sub _add_graphml_data {
 909     my( $el, $key, $value ) = @_;
 910     return unless defined $value;
 911     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
 912     $data_el->setAttribute( 'key', $key );
 913     $data_el->appendText( $value );
 914 }
 915
 916 =head2 as_csv
 917
 918 Returns a CSV alignment table representation of the collation graph, one
 919 row per witness (or witness uncorrected.)
 920
 921 =cut
 922
 923 sub as_csv {
 924     my( $self ) = @_;
 925     my $table = $self->alignment_table;
 926     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
 927     my @result;
 928     # Make the header row
 929     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
 930         push( @result, decode_utf8( $csv->string ) );
 931     # Make the rest of the rows
 932     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
 933         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
 934         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
 935         $csv->combine( @row );
 936         push( @result, decode_utf8( $csv->string ) );
 937     }
 938     return join( "\n", @result );
 939 }
 940
 941 =head2 alignment_table( $use_refs, $include_witnesses )
 942
 943 Return a reference to an alignment table, in a slightly enhanced CollateX
 944 format which looks like this:
 945
 946  $table = { alignment => [ { witness => "SIGIL",
 947                              tokens => [ { t => "TEXT" }, ... ] },
 948                            { witness => "SIG2",
 949                              tokens => [ { t => "TEXT" }, ... ] },
 950                            ... ],
 951             length => TEXTLEN };
 952
 953 If $use_refs is set to 1, the reading object is returned in the table
 954 instead of READINGTEXT; if not, the text of the reading is returned.
 955
 956 If $include_witnesses is set to a hashref, only the witnesses whose sigil
 957 keys have a true hash value will be included.
 958
 959 =cut
 960
 961 sub alignment_table {
 962     my( $self ) = @_;
 963     my $include; # see if we can ditch this
 964     ## DEBUG
 965     return $self->cached_table if $self->has_cached_table;
 966
 967     # Make sure we can do this
 968         throw( "Need a linear graph in order to make an alignment table" )
 969                 unless $self->linear;
 970         $self->calculate_ranks unless $self->end->has_rank;
 971
 972     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
 973     my @all_pos = ( 1 .. $self->end->rank - 1 );
 974     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
 975         if( $include ) {
 976                 next unless $include->{$wit->sigil};
 977         }
 978         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
 979         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
 980         my @row = _make_witness_row( \@wit_path, \@all_pos );
 981         push( @{$table->{'alignment'}},
 982                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
 983         if( $wit->is_layered ) {
 984                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
 985                         $wit->sigil.$self->ac_label );
 986             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
 987                         push( @{$table->{'alignment'}},
 988                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
 989         }
 990     }
 991     $self->cached_table( $table );
 992     return $table;
 993 }
 994
 995 sub _make_witness_row {
 996     my( $path, $positions ) = @_;
 997     my %char_hash;
 998     map { $char_hash{$_} = undef } @$positions;
 999     my $debug = 0;
1000     foreach my $rdg ( @$path ) {
1001         my $rtext = $rdg->text;
1002         $rtext = '#LACUNA#' if $rdg->is_lacuna;
1003         print STDERR "rank " . $rdg->rank . "\n" if $debug;
1004         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1005         $char_hash{$rdg->rank} = { 't' => $rdg };
1006     }
1007     my @row = map { $char_hash{$_} } @$positions;
1008     # Fill in lacuna markers for undef spots in the row
1009     my $last_el = shift @row;
1010     my @filled_row = ( $last_el );
1011     foreach my $el ( @row ) {
1012         # If we are using node reference, make the lacuna node appear many times
1013         # in the table.  If not, use the lacuna tag.
1014         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1015             $el = $last_el;
1016         }
1017         push( @filled_row, $el );
1018         $last_el = $el;
1019     }
1020     return @filled_row;
1021 }
1022
1023 =head1 NAVIGATION METHODS
1024
1025 =head2 reading_sequence( $first, $last, $sigil, $backup )
1026
1027 Returns the ordered list of readings, starting with $first and ending
1028 with $last, for the witness given in $sigil. If a $backup sigil is
1029 specified (e.g. when walking a layered witness), it will be used wherever
1030 no $sigil path exists.  If there is a base text reading, that will be
1031 used wherever no path exists for $sigil or $backup.
1032
1033 =cut
1034
1035 # TODO Think about returning some lazy-eval iterator.
1036 # TODO Get rid of backup; we should know from what witness is whether we need it.
1037
1038 sub reading_sequence {
1039     my( $self, $start, $end, $witness ) = @_;
1040
1041     $witness = $self->baselabel unless $witness;
1042     my @readings = ( $start );
1043     my %seen;
1044     my $n = $start;
1045     while( $n && $n->id ne $end->id ) {
1046         if( exists( $seen{$n->id} ) ) {
1047             throw( "Detected loop for $witness at " . $n->id );
1048         }
1049         $seen{$n->id} = 1;
1050
1051         my $next = $self->next_reading( $n, $witness );
1052         unless( $next ) {
1053             throw( "Did not find any path for $witness from reading " . $n->id );
1054         }
1055         push( @readings, $next );
1056         $n = $next;
1057     }
1058     # Check that the last reading is our end reading.
1059     my $last = $readings[$#readings];
1060     throw( "Last reading found from " . $start->text .
1061         " for witness $witness is not the end!" ) # TODO do we get this far?
1062         unless $last->id eq $end->id;
1063
1064     return @readings;
1065 }
1066
1067 =head2 next_reading( $reading, $sigil );
1068
1069 Returns the reading that follows the given reading along the given witness
1070 path.
1071
1072 =cut
1073
1074 sub next_reading {
1075     # Return the successor via the corresponding path.
1076     my $self = shift;
1077     my $answer = $self->_find_linked_reading( 'next', @_ );
1078         return undef unless $answer;
1079     return $self->reading( $answer );
1080 }
1081
1082 =head2 prior_reading( $reading, $sigil )
1083
1084 Returns the reading that precedes the given reading along the given witness
1085 path.
1086
1087 =cut
1088
1089 sub prior_reading {
1090     # Return the predecessor via the corresponding path.
1091     my $self = shift;
1092     my $answer = $self->_find_linked_reading( 'prior', @_ );
1093     return $self->reading( $answer );
1094 }
1095
1096 sub _find_linked_reading {
1097     my( $self, $direction, $node, $path ) = @_;
1098
1099     # Get a backup if we are dealing with a layered witness
1100     my $alt_path;
1101     my $aclabel = $self->ac_label;
1102     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1103         $alt_path = $1;
1104     }
1105
1106     my @linked_paths = $direction eq 'next'
1107         ? $self->sequence->edges_from( $node )
1108         : $self->sequence->edges_to( $node );
1109     return undef unless scalar( @linked_paths );
1110
1111     # We have to find the linked path that contains all of the
1112     # witnesses supplied in $path.
1113     my( @path_wits, @alt_path_wits );
1114     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1115     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1116     my $base_le;
1117     my $alt_le;
1118     foreach my $le ( @linked_paths ) {
1119         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1120             $base_le = $le;
1121         }
1122                 my @le_wits = sort $self->path_witnesses( $le );
1123                 if( _is_within( \@path_wits, \@le_wits ) ) {
1124                         # This is the right path.
1125                         return $direction eq 'next' ? $le->[1] : $le->[0];
1126                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1127                         $alt_le = $le;
1128                 }
1129     }
1130     # Got this far? Return the alternate path if it exists.
1131     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1132         if $alt_le;
1133
1134     # Got this far? Return the base path if it exists.
1135     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1136         if $base_le;
1137
1138     # Got this far? We have no appropriate path.
1139     warn "Could not find $direction node from " . $node->id
1140         . " along path $path";
1141     return undef;
1142 }
1143
1144 # Some set logic.
1145 sub _is_within {
1146     my( $set1, $set2 ) = @_;
1147     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1148     foreach my $el ( @$set1 ) {
1149         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1150     }
1151     return $ret;
1152 }
1153
1154 # Return the string that joins together a list of witnesses for
1155 # display on a single path.
1156 sub _witnesses_of_label {
1157     my( $self, $label ) = @_;
1158     my $regex = $self->wit_list_separator;
1159     my @answer = split( /\Q$regex\E/, $label );
1160     return @answer;
1161 }
1162
1163 =head2 common_readings
1164
1165 Returns the list of common readings in the graph (i.e. those readings that are
1166 shared by all non-lacunose witnesses.)
1167
1168 =cut
1169
1170 sub common_readings {
1171         my $self = shift;
1172         my @common = grep { $_->is_common } $self->readings;
1173         return @common;
1174 }
1175
1176 =head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1177
1178 Returns the text of a witness (plus its backup, if we are using a layer)
1179 as stored in the collation.  The text is returned as a string, where the
1180 individual readings are joined with spaces and the meta-readings (e.g.
1181 lacunae) are omitted.  Optional specification of $start and $end allows
1182 the generation of a subset of the witness text.
1183
1184 =cut
1185
1186 sub path_text {
1187         my( $self, $wit, $start, $end ) = @_;
1188         $start = $self->start unless $start;
1189         $end = $self->end unless $end;
1190         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1191         return join( ' ', map { $_->text } @path );
1192 }
1193
1194 =head1 INITIALIZATION METHODS
1195
1196 These are mostly for use by parsers.
1197
1198 =head2 make_witness_path( $witness )
1199
1200 Link the array of readings contained in $witness->path (and in
1201 $witness->uncorrected_path if it exists) into collation paths.
1202 Clear out the arrays when finished.
1203
1204 =head2 make_witness_paths
1205
1206 Call make_witness_path for all witnesses in the tradition.
1207
1208 =cut
1209
1210 # For use when a collation is constructed from a base text and an apparatus.
1211 # We have the sequences of readings and just need to add path edges.
1212 # When we are done, clear out the witness path attributes, as they are no
1213 # longer needed.
1214 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1215
1216 sub make_witness_paths {
1217     my( $self ) = @_;
1218     foreach my $wit ( $self->tradition->witnesses ) {
1219         # print STDERR "Making path for " . $wit->sigil . "\n";
1220         $self->make_witness_path( $wit );
1221     }
1222 }
1223
1224 sub make_witness_path {
1225     my( $self, $wit ) = @_;
1226     my @chain = @{$wit->path};
1227     my $sig = $wit->sigil;
1228     foreach my $idx ( 0 .. $#chain-1 ) {
1229         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1230     }
1231     if( $wit->is_layered ) {
1232         @chain = @{$wit->uncorrected_path};
1233         foreach my $idx( 0 .. $#chain-1 ) {
1234             my $source = $chain[$idx];
1235             my $target = $chain[$idx+1];
1236             $self->add_path( $source, $target, $sig.$self->ac_label )
1237                 unless $self->has_path( $source, $target, $sig );
1238         }
1239     }
1240     $wit->clear_path;
1241     $wit->clear_uncorrected_path;
1242 }
1243
1244 =head2 calculate_ranks
1245
1246 Calculate the reading ranks (that is, their aligned positions relative
1247 to each other) for the graph.  This can only be called on linear collations.
1248
1249 =begin testing
1250
1251 use Text::Tradition;
1252
1253 my $cxfile = 't/data/Collatex-16.xml';
1254 my $t = Text::Tradition->new(
1255     'name'  => 'inline',
1256     'input' => 'CollateX',
1257     'file'  => $cxfile,
1258     );
1259 my $c = $t->collation;
1260
1261 # Make an svg
1262 my $svg = $c->as_svg;
1263 is( substr( $svg, 0, 5 ), '<?xml', "Got XML doc for svg" );
1264 ok( $c->has_cached_svg, "SVG was cached" );
1265 is( $c->as_svg, $svg, "Cached SVG returned upon second call" );
1266 $c->calculate_ranks;
1267 is( $c->as_svg, $svg, "Cached SVG retained with no rank change" );
1268 $c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
1269 isnt( $c->as_svg, $svg, "SVG changed after relationship add" );
1270
1271 =end testing
1272
1273 =cut
1274
1275 sub calculate_ranks {
1276     my $self = shift;
1277     # Save the existing ranks, in case we need to invalidate the cached SVG.
1278     my %existing_ranks;
1279     # Walk a version of the graph where every node linked by a relationship
1280     # edge is fundamentally the same node, and do a topological ranking on
1281     # the nodes in this graph.
1282     my $topo_graph = Graph->new();
1283     my %rel_containers;
1284     my $rel_ctr = 0;
1285     # Add the nodes
1286     foreach my $r ( $self->readings ) {
1287         next if exists $rel_containers{$r->id};
1288         my @rels = $r->related_readings( 'colocated' );
1289         if( @rels ) {
1290             # Make a relationship container.
1291             push( @rels, $r );
1292             my $rn = 'rel_container_' . $rel_ctr++;
1293             $topo_graph->add_vertex( $rn );
1294             foreach( @rels ) {
1295                 $rel_containers{$_->id} = $rn;
1296             }
1297         } else {
1298             # Add a new node to mirror the old node.
1299             $rel_containers{$r->id} = $r->id;
1300             $topo_graph->add_vertex( $r->id );
1301         }
1302     }
1303
1304     # Add the edges.
1305     foreach my $r ( $self->readings ) {
1306                 $existing_ranks{$r} = $r->rank;
1307         foreach my $n ( $self->sequence->successors( $r->id ) ) {
1308                 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1309                         $rel_containers{$n} );
1310                 # $DB::single = 1 unless $tfrom && $tto;
1311             $topo_graph->add_edge( $tfrom, $tto );
1312         }
1313     }
1314
1315     # Now do the rankings, starting with the start node.
1316     my $topo_start = $rel_containers{$self->start->id};
1317     my $node_ranks = { $topo_start => 0 };
1318     my @curr_origin = ( $topo_start );
1319     # A little iterative function.
1320     while( @curr_origin ) {
1321         @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1322     }
1323     # Transfer our rankings from the topological graph to the real one.
1324     foreach my $r ( $self->readings ) {
1325         if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1326             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1327         } else {
1328                 # Die. Find the last rank we calculated.
1329                 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1330                                  <=> $node_ranks->{$rel_containers{$b->id}} }
1331                         $self->readings;
1332                 my $last = pop @all_defined;
1333             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1334         }
1335     }
1336     # Do we need to invalidate the cached SVG?
1337     if( $self->has_cached_svg ) {
1338         foreach my $r ( $self->readings ) {
1339                 next if $existing_ranks{$r} == $r->rank;
1340                 $self->wipe_svg;
1341                 last;
1342         }
1343     }
1344 }
1345
1346 sub _assign_rank {
1347     my( $graph, $node_ranks, @current_nodes ) = @_;
1348     # Look at each of the children of @current_nodes.  If all the child's
1349     # parents have a rank, assign it the highest rank + 1 and add it to
1350     # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
1351     # parent gets a rank.
1352     my @next_nodes;
1353     foreach my $c ( @current_nodes ) {
1354         warn "Current reading $c has no rank!"
1355             unless exists $node_ranks->{$c};
1356         # print STDERR "Looking at child of node $c, rank "
1357         #     . $node_ranks->{$c} . "\n";
1358         foreach my $child ( $graph->successors( $c ) ) {
1359             next if exists $node_ranks->{$child};
1360             my $highest_rank = -1;
1361             my $skip = 0;
1362             foreach my $parent ( $graph->predecessors( $child ) ) {
1363                 if( exists $node_ranks->{$parent} ) {
1364                     $highest_rank = $node_ranks->{$parent}
1365                         if $highest_rank <= $node_ranks->{$parent};
1366                 } else {
1367                     $skip = 1;
1368                     last;
1369                 }
1370             }
1371             next if $skip;
1372             my $c_rank = $highest_rank + 1;
1373             # print STDERR "Assigning rank $c_rank to node $child \n";
1374             $node_ranks->{$child} = $c_rank;
1375             push( @next_nodes, $child );
1376         }
1377     }
1378     return @next_nodes;
1379 }
1380
1381 =head2 flatten_ranks
1382
1383 A convenience method for parsing collation data.  Searches the graph for readings
1384 with the same text at the same rank, and merges any that are found.
1385
1386 =cut
1387
1388 sub flatten_ranks {
1389     my $self = shift;
1390     my %unique_rank_rdg;
1391     foreach my $rdg ( $self->readings ) {
1392         next unless $rdg->has_rank;
1393         my $key = $rdg->rank . "||" . $rdg->text;
1394         if( exists $unique_rank_rdg{$key} ) {
1395             # Combine!
1396                 # print STDERR "Combining readings at same rank: $key\n";
1397             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1398             # TODO see if this now makes a common point.
1399         } else {
1400             $unique_rank_rdg{$key} = $rdg;
1401         }
1402     }
1403 }
1404
1405 =head2 remove_collations
1406
1407 Another convenience method for parsing. Removes all 'collation' relationships
1408 that were defined in order to get the reading ranks to be correct.
1409
1410 =begin testing
1411
1412 use Text::Tradition;
1413
1414 my $cxfile = 't/data/Collatex-16.xml';
1415 my $t = Text::Tradition->new(
1416     'name'  => 'inline',
1417     'input' => 'CollateX',
1418     'file'  => $cxfile,
1419     );
1420 my $c = $t->collation;
1421
1422 isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" );
1423 $c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } );
1424 is( scalar $c->relationships, 4, "Found all expected relationships" );
1425 $c->remove_collations;
1426 is( scalar $c->relationships, 3, "Collated relationships now gone" );
1427 is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" );
1428
1429 =end testing
1430
1431 =cut
1432
1433 sub remove_collations {
1434         my $self = shift;
1435         foreach my $reledge ( $self->relationships ) {
1436                 my $relobj = $self->relations->get_relationship( $reledge );
1437                 if( $relobj && $relobj->type eq 'collated' ) {
1438                         $self->relations->delete_relationship( $reledge );
1439                 }
1440         }
1441 }
1442
1443
1444 =head2 calculate_common_readings
1445
1446 Goes through the graph identifying the readings that appear in every witness
1447 (apart from those with lacunae at that spot.) Marks them as common and returns
1448 the list.
1449
1450 =begin testing
1451
1452 use Text::Tradition;
1453
1454 my $cxfile = 't/data/Collatex-16.xml';
1455 my $t = Text::Tradition->new(
1456     'name'  => 'inline',
1457     'input' => 'CollateX',
1458     'file'  => $cxfile,
1459     );
1460 my $c = $t->collation;
1461
1462 my @common = $c->calculate_common_readings();
1463 is( scalar @common, 8, "Found correct number of common readings" );
1464 my @marked = sort $c->common_readings();
1465 is( scalar @common, 8, "All common readings got marked as such" );
1466 my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1467 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1468
1469 =end testing
1470
1471 =cut
1472
1473 sub calculate_common_readings {
1474         my $self = shift;
1475         my @common;
1476         my $table = $self->alignment_table;
1477         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1478                 my @row = map { $_->{'tokens'}->[$idx]
1479                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1480                                         @{$table->{'alignment'}};
1481                 my %hash;
1482                 foreach my $r ( @row ) {
1483                         if( $r ) {
1484                                 $hash{$r->id} = $r unless $r->is_meta;
1485                         } else {
1486                                 $hash{'UNDEF'} = $r;
1487                         }
1488                 }
1489                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1490                         my( $r ) = values %hash;
1491                         $r->is_common( 1 );
1492                         push( @common, $r );
1493                 }
1494         }
1495         return @common;
1496 }
1497
1498 =head2 text_from_paths
1499
1500 Calculate the text array for all witnesses from the path, for later consistency
1501 checking.  Only to be used if there is no non-graph-based way to know the
1502 original texts.
1503
1504 =cut
1505
1506 sub text_from_paths {
1507         my $self = shift;
1508     foreach my $wit ( $self->tradition->witnesses ) {
1509         my @text = split( /\s+/,
1510                 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1511         $wit->text( \@text );
1512         if( $wit->is_layered ) {
1513                         my @uctext = split( /\s+/,
1514                                 $self->reading_sequence( $self->start, $self->end,
1515                                         $wit->sigil.$self->ac_label ) );
1516                         $wit->text( \@uctext );
1517         }
1518     }
1519 }
1520
1521 =head1 UTILITY FUNCTIONS
1522
1523 =head2 common_predecessor( $reading_a, $reading_b )
1524
1525 Find the last reading that occurs in sequence before both the given readings.
1526
1527 =head2 common_successor( $reading_a, $reading_b )
1528
1529 Find the first reading that occurs in sequence after both the given readings.
1530
1531 =begin testing
1532
1533 use Text::Tradition;
1534
1535 my $cxfile = 't/data/Collatex-16.xml';
1536 my $t = Text::Tradition->new(
1537     'name'  => 'inline',
1538     'input' => 'CollateX',
1539     'file'  => $cxfile,
1540     );
1541 my $c = $t->collation;
1542
1543 is( $c->common_predecessor( 'n9', 'n23' )->id,
1544     'n20', "Found correct common predecessor" );
1545 is( $c->common_successor( 'n9', 'n23' )->id,
1546     '#END#', "Found correct common successor" );
1547
1548 is( $c->common_predecessor( 'n19', 'n17' )->id,
1549     'n16', "Found correct common predecessor for readings on same path" );
1550 is( $c->common_successor( 'n21', 'n26' )->id,
1551     '#END#', "Found correct common successor for readings on same path" );
1552
1553 =end testing
1554
1555 =cut
1556
1557 ## Return the closest reading that is a predecessor of both the given readings.
1558 sub common_predecessor {
1559         my $self = shift;
1560         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1561         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1562 }
1563
1564 sub common_successor {
1565         my $self = shift;
1566         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1567         return $self->_common_in_path( $r1, $r2, 'successors' );
1568 }
1569
1570 sub _common_in_path {
1571         my( $self, $r1, $r2, $dir ) = @_;
1572         my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1573         $iter = $self->end->rank - $iter if $dir eq 'successors';
1574         my @candidates;
1575         my @last_checked = ( $r1, $r2 );
1576         my %all_seen;
1577         while( !@candidates ) {
1578                 my @new_lc;
1579                 foreach my $lc ( @last_checked ) {
1580                         foreach my $p ( $lc->$dir ) {
1581                                 if( $all_seen{$p->id} ) {
1582                                         push( @candidates, $p );
1583                                 } else {
1584                                         $all_seen{$p->id} = 1;
1585                                         push( @new_lc, $p );
1586                                 }
1587                         }
1588                 }
1589                 @last_checked = @new_lc;
1590         }
1591         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1592         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1593 }
1594
1595 sub throw {
1596         Text::Tradition::Error->throw(
1597                 'ident' => 'Collation error',
1598                 'message' => $_[0],
1599                 );
1600 }
1601
1602 no Moose;
1603 __PACKAGE__->meta->make_immutable;
1604
1605 =head1 LICENSE
1606
1607 This package is free software and is provided "as is" without express
1608 or implied warranty.  You can redistribute it and/or modify it under
1609 the same terms as Perl itself.
1610
1611 =head1 AUTHOR
1612
1613 Tara L Andrews E<lt>aurum@cpan.orgE<gt>