lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Encode qw( decode_utf8 );
   4 use File::Temp;
   5 use Graph;
   6 use IPC::Run qw( run binary );
   7 use Text::CSV_XS;
   8 use Text::Tradition::Collation::Reading;
   9 use Text::Tradition::Collation::RelationshipStore;
  10 use Text::Tradition::Error;
  11 use XML::LibXML;
  12 use XML::LibXML::XPathContext;
  13 use Moose;
  14
  15 has 'sequence' => (
  16     is => 'ro',
  17     isa => 'Graph',
  18     default => sub { Graph->new() },
  19     handles => {
  20         paths => 'edges',
  21     },
  22     );
  23
  24 has 'relations' => (
  25         is => 'ro',
  26         isa => 'Text::Tradition::Collation::RelationshipStore',
  27         handles => {
  28                 relationships => 'relationships',
  29                 related_readings => 'related_readings',
  30         },
  31         writer => '_set_relations',
  32         );
  33
  34 has 'tradition' => (
  35     is => 'ro',
  36     isa => 'Text::Tradition',
  37     weak_ref => 1,
  38     );
  39
  40 has 'readings' => (
  41         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  42         traits => ['Hash'],
  43     handles => {
  44         reading     => 'get',
  45         _add_reading => 'set',
  46         del_reading => 'delete',
  47         has_reading => 'exists',
  48         readings   => 'values',
  49     },
  50     default => sub { {} },
  51         );
  52
  53 has 'wit_list_separator' => (
  54     is => 'rw',
  55     isa => 'Str',
  56     default => ', ',
  57     );
  58
  59 has 'baselabel' => (
  60     is => 'rw',
  61     isa => 'Str',
  62     default => 'base text',
  63     );
  64
  65 has 'linear' => (
  66     is => 'rw',
  67     isa => 'Bool',
  68     default => 1,
  69     );
  70
  71 has 'ac_label' => (
  72     is => 'rw',
  73     isa => 'Str',
  74     default => ' (a.c.)',
  75     );
  76
  77 has 'start' => (
  78         is => 'ro',
  79         isa => 'Text::Tradition::Collation::Reading',
  80         writer => '_set_start',
  81         weak_ref => 1,
  82         );
  83
  84 has 'end' => (
  85         is => 'ro',
  86         isa => 'Text::Tradition::Collation::Reading',
  87         writer => '_set_end',
  88         weak_ref => 1,
  89         );
  90
  91 has 'cached_svg' => (
  92         is => 'rw',
  93         isa => 'Str',
  94         predicate => 'has_cached_svg',
  95         clearer => 'wipe_svg',
  96         );
  97
  98 has 'cached_table' => (
  99         is => 'rw',
 100         isa => 'HashRef',
 101         predicate => 'has_cached_table',
 102         clearer => 'wipe_table',
 103         );
 104
 105 =head1 NAME
 106
 107 Text::Tradition::Collation - a software model for a text collation
 108
 109 =head1 SYNOPSIS
 110
 111   use Text::Tradition;
 112   my $t = Text::Tradition->new(
 113     'name' => 'this is a text',
 114     'input' => 'TEI',
 115     'file' => '/path/to/tei_parallel_seg_file.xml' );
 116
 117   my $c = $t->collation;
 118   my @readings = $c->readings;
 119   my @paths = $c->paths;
 120   my @relationships = $c->relationships;
 121
 122   my $svg_variant_graph = $t->collation->as_svg();
 123
 124 =head1 DESCRIPTION
 125
 126 Text::Tradition is a library for representation and analysis of collated
 127 texts, particularly medieval ones.  The Collation is the central feature of
 128 a Tradition, where the text, its sequence of readings, and its relationships
 129 between readings are actually kept.
 130
 131 =head1 CONSTRUCTOR
 132
 133 =head2 new
 134
 135 The constructor.  Takes a hash or hashref of the following arguments:
 136
 137 =over
 138
 139 =item * tradition - The Text::Tradition object to which the collation
 140 belongs. Required.
 141
 142 =item * linear - Whether the collation should be linear; that is, whether
 143 transposed readings should be treated as two linked readings rather than one,
 144 and therefore whether the collation graph is acyclic.  Defaults to true.
 145
 146 =item * baselabel - The default label for the path taken by a base text
 147 (if any). Defaults to 'base text'.
 148
 149 =item * wit_list_separator - The string to join a list of witnesses for
 150 purposes of making labels in display graphs.  Defaults to ', '.
 151
 152 =item * ac_label - The extra label to tack onto a witness sigil when
 153 representing another layer of path for the given witness - that is, when
 154 a text has more than one possible reading due to scribal corrections or
 155 the like.  Defaults to ' (a.c.)'.
 156
 157 =back
 158
 159 =head1 ACCESSORS
 160
 161 =head2 tradition
 162
 163 =head2 linear
 164
 165 =head2 wit_list_separator
 166
 167 =head2 baselabel
 168
 169 =head2 ac_label
 170
 171 Simple accessors for collation attributes.
 172
 173 =head2 start
 174
 175 The meta-reading at the start of every witness path.
 176
 177 =head2 end
 178
 179 The meta-reading at the end of every witness path.
 180
 181 =head2 readings
 182
 183 Returns all Reading objects in the graph.
 184
 185 =head2 reading( $id )
 186
 187 Returns the Reading object corresponding to the given ID.
 188
 189 =head2 add_reading( $reading_args )
 190
 191 Adds a new reading object to the collation.
 192 See L<Text::Tradition::Collation::Reading> for the available arguments.
 193
 194 =head2 del_reading( $object_or_id )
 195
 196 Removes the given reading from the collation, implicitly removing its
 197 paths and relationships.
 198
 199 =head2 merge_readings( $main, $second )
 200
 201 Merges the $second reading into the $main one.
 202 The arguments may be either readings or reading IDs.
 203
 204 =head2 has_reading( $id )
 205
 206 Predicate to see whether a given reading ID is in the graph.
 207
 208 =head2 reading_witnesses( $object_or_id )
 209
 210 Returns a list of sigils whose witnesses contain the reading.
 211
 212 =head2 paths
 213
 214 Returns all reading paths within the document - that is, all edges in the
 215 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 216
 217 =head2 add_path( $source, $target, $sigil )
 218
 219 Links the given readings in the collation in sequence, under the given witness
 220 sigil.  The readings may be specified by object or ID.
 221
 222 =head2 del_path( $source, $target, $sigil )
 223
 224 Links the given readings in the collation in sequence, under the given witness
 225 sigil.  The readings may be specified by object or ID.
 226
 227 =head2 has_path( $source, $target );
 228
 229 Returns true if the two readings are linked in sequence in any witness.
 230 The readings may be specified by object or ID.
 231
 232 =head2 relationships
 233
 234 Returns all Relationship objects in the collation.
 235
 236 =head2 add_relationship( $reading, $other_reading, $options )
 237
 238 Adds a new relationship of the type given in $options between the two readings,
 239 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 240 where $status is true on success, and @vectors is a list of relationship edges
 241 that were ultimately added.
 242 See L<Text::Tradition::Collation::Relationship> for the available options.
 243
 244 =cut
 245
 246 sub BUILD {
 247     my $self = shift;
 248     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 249     $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
 250     $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 251 }
 252
 253 ### Reading construct/destruct functions
 254
 255 sub _clear_cache {
 256         my $self = shift;
 257         $self->wipe_svg if $self->has_cached_svg;
 258         $self->wipe_table if $self->has_cached_table;
 259 }
 260
 261 sub add_reading {
 262         my( $self, $reading ) = @_;
 263         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 264                 my %args = %$reading;
 265                 $reading = Text::Tradition::Collation::Reading->new(
 266                         'collation' => $self,
 267                         %args );
 268         }
 269         # First check to see if a reading with this ID exists.
 270         if( $self->reading( $reading->id ) ) {
 271                 throw( "Collation already has a reading with id " . $reading->id );
 272         }
 273         $self->_clear_cache;
 274         $self->_add_reading( $reading->id => $reading );
 275         # Once the reading has been added, put it in both graphs.
 276         $self->sequence->add_vertex( $reading->id );
 277         $self->relations->add_reading( $reading->id );
 278         return $reading;
 279 };
 280
 281 around del_reading => sub {
 282         my $orig = shift;
 283         my $self = shift;
 284         my $arg = shift;
 285
 286         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 287                 $arg = $arg->id;
 288         }
 289         # Remove the reading from the graphs.
 290         $self->_clear_cache;
 291         $self->sequence->delete_vertex( $arg );
 292         $self->relations->delete_reading( $arg );
 293
 294         # Carry on.
 295         $self->$orig( $arg );
 296 };
 297
 298 # merge_readings( $main, $to_be_deleted );
 299
 300 sub merge_readings {
 301         my $self = shift;
 302
 303         # We only need the IDs for adding paths to the graph, not the reading
 304         # objects themselves.
 305     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
 306         $self->_clear_cache;
 307
 308     # The kept reading should inherit the paths and the relationships
 309     # of the deleted reading.
 310         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 311                 my @vector = ( $kept );
 312                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 313                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 314                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 315                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 316                 $self->sequence->add_edge( @vector );
 317                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 318                 @wits{keys %$fwits} = values %$fwits;
 319                 $self->sequence->set_edge_attributes( @vector, \%wits );
 320         }
 321         $self->relations->merge_readings( $kept, $deleted, $combine_char );
 322
 323         # Do the deletion deed.
 324         if( $combine_char ) {
 325                 my $kept_obj = $self->reading( $kept );
 326                 my $new_text = join( $combine_char, $kept_obj->text,
 327                         $self->reading( $deleted )->text );
 328                 $kept_obj->alter_text( $new_text );
 329         }
 330         $self->del_reading( $deleted );
 331 }
 332
 333
 334 # Helper function for manipulating the graph.
 335 sub _stringify_args {
 336         my( $self, $first, $second, $arg ) = @_;
 337     $first = $first->id
 338         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 339     $second = $second->id
 340         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 341     return( $first, $second, $arg );
 342 }
 343
 344 # Helper function for manipulating the graph.
 345 sub _objectify_args {
 346         my( $self, $first, $second, $arg ) = @_;
 347     $first = $self->reading( $first )
 348         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 349     $second = $self->reading( $second )
 350         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 351     return( $first, $second, $arg );
 352 }
 353 ### Path logic
 354
 355 sub add_path {
 356         my $self = shift;
 357
 358         # We only need the IDs for adding paths to the graph, not the reading
 359         # objects themselves.
 360     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 361
 362         $self->_clear_cache;
 363         # Connect the readings
 364     $self->sequence->add_edge( $source, $target );
 365     # Note the witness in question
 366     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 367 };
 368
 369 sub del_path {
 370         my $self = shift;
 371         my @args;
 372         if( ref( $_[0] ) eq 'ARRAY' ) {
 373                 my $e = shift @_;
 374                 @args = ( @$e, @_ );
 375         } else {
 376                 @args = @_;
 377         }
 378
 379         # We only need the IDs for adding paths to the graph, not the reading
 380         # objects themselves.
 381     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 382
 383         $self->_clear_cache;
 384         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 385                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 386         }
 387         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 388                 $self->sequence->delete_edge( $source, $target );
 389         }
 390 }
 391
 392
 393 # Extra graph-alike utility
 394 sub has_path {
 395         my $self = shift;
 396     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 397         return undef unless $self->sequence->has_edge( $source, $target );
 398         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 399 }
 400
 401 =head2 clear_witness( @sigil_list )
 402
 403 Clear the given witnesses out of the collation entirely, removing references
 404 to them in paths, and removing readings that belong only to them.  Should only
 405 be called via $tradition->del_witness.
 406
 407 =cut
 408
 409 sub clear_witness {
 410         my( $self, @sigils ) = @_;
 411
 412         $self->_clear_cache;
 413         # Clear the witness(es) out of the paths
 414         foreach my $e ( $self->paths ) {
 415                 foreach my $sig ( @sigils ) {
 416                         $self->del_path( $e, $sig );
 417                 }
 418         }
 419
 420         # Clear out the newly unused readings
 421         foreach my $r ( $self->readings ) {
 422                 unless( $self->reading_witnesses( $r ) ) {
 423                         $self->del_reading( $r );
 424                 }
 425         }
 426 }
 427
 428 sub add_relationship {
 429         my $self = shift;
 430     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 431     my( @vectors ) = $self->relations->add_relationship( $source,
 432         $self->reading( $source ), $target, $self->reading( $target ), $opts );
 433     # Force a full rank recalculation every time. Yuck.
 434     $self->calculate_ranks() if $self->end->has_rank;
 435         $self->_clear_cache;
 436     return @vectors;
 437 }
 438
 439 =head2 reading_witnesses( $reading )
 440
 441 Return a list of sigils corresponding to the witnesses in which the reading appears.
 442
 443 =cut
 444
 445 sub reading_witnesses {
 446         my( $self, $reading ) = @_;
 447         # We need only check either the incoming or the outgoing edges; I have
 448         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 449         if( $reading eq $self->start ) {
 450                 return map { $_->sigil } $self->tradition->witnesses;
 451         }
 452         my %all_witnesses;
 453         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 454                 my $wits = $self->sequence->get_edge_attributes( @$e );
 455                 @all_witnesses{ keys %$wits } = 1;
 456         }
 457         return keys %all_witnesses;
 458 }
 459
 460 =head1 OUTPUT METHODS
 461
 462 =head2 as_svg( \%options )
 463
 464 Returns an SVG string that represents the graph, via as_dot and graphviz.
 465 See as_dot for a list of options.
 466
 467 =cut
 468
 469 sub as_svg {
 470     my( $self, $opts ) = @_;
 471     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 472     if( !$self->has_cached_svg || $opts->{'recalc'}     || $want_subgraph ) {
 473                 my @cmd = qw/dot -Tsvg/;
 474                 my( $svg, $err );
 475                 my $dotfile = File::Temp->new();
 476                 ## USE FOR DEBUGGING
 477                 # $dotfile->unlink_on_destroy(0);
 478                 binmode $dotfile, ':utf8';
 479                 print $dotfile $self->as_dot( $opts );
 480                 push( @cmd, $dotfile->filename );
 481                 run( \@cmd, ">", binary(), \$svg );
 482                 $svg = decode_utf8( $svg );
 483                 $self->cached_svg( $svg ) unless $want_subgraph;
 484                 return $svg;
 485         } else {
 486                 return $self->cached_svg;
 487         }
 488 }
 489
 490
 491 =head2 as_dot( \%options )
 492
 493 Returns a string that is the collation graph expressed in dot
 494 (i.e. GraphViz) format.  Options include:
 495
 496 =over 4
 497
 498 =item * from
 499
 500 =item * to
 501
 502 =item * color_common
 503
 504 =back
 505
 506 =cut
 507
 508 sub as_dot {
 509     my( $self, $opts ) = @_;
 510     my $startrank = $opts->{'from'} if $opts;
 511     my $endrank = $opts->{'to'} if $opts;
 512     my $color_common = $opts->{'color_common'} if $opts;
 513     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 514        && $self->end->rank > 100;
 515
 516     # Check the arguments
 517     if( $startrank ) {
 518         return if $endrank && $startrank > $endrank;
 519         return if $startrank > $self->end->rank;
 520         }
 521         if( defined $endrank ) {
 522                 return if $endrank < 0;
 523                 $endrank = undef if $endrank == $self->end->rank;
 524         }
 525
 526     my $graph_name = $self->tradition->name;
 527     $graph_name =~ s/[^\w\s]//g;
 528     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 529
 530     my %graph_attrs = (
 531         'rankdir' => 'LR',
 532         'bgcolor' => 'none',
 533         );
 534     my %node_attrs = (
 535         'fontsize' => 14,
 536         'fillcolor' => 'white',
 537         'style' => 'filled',
 538         'shape' => 'ellipse'
 539         );
 540     my %edge_attrs = (
 541         'arrowhead' => 'open',
 542         'color' => '#000000',
 543         'fontcolor' => '#000000',
 544         );
 545
 546     my $dot = sprintf( "digraph %s {\n", $graph_name );
 547     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 548     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 549
 550         # Output substitute start/end readings if necessary
 551         if( $startrank ) {
 552                 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
 553         }
 554         if( $endrank ) {
 555                 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
 556         }
 557         if( $STRAIGHTENHACK ) {
 558                 ## HACK part 1
 559                 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
 560                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 561         }
 562         my %used;  # Keep track of the readings that actually appear in the graph
 563         # Sort the readings by rank if we have ranks; this speeds layout.
 564         my @all_readings = $self->end->has_rank
 565                 ? sort { $a->rank <=> $b->rank } $self->readings
 566                 : $self->readings;
 567         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 568     foreach my $reading ( @all_readings ) {
 569         # Only output readings within our rank range.
 570         next if $startrank && $reading->rank < $startrank;
 571         next if $endrank && $reading->rank > $endrank;
 572         $used{$reading->id} = 1;
 573         # Need not output nodes without separate labels
 574         next if $reading->id eq $reading->text;
 575         my $rattrs;
 576         my $label = $reading->text;
 577         $label =~ s/\"/\\\"/g;
 578                 $rattrs->{'label'} = $label;
 579                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 580         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 581     }
 582
 583         # Add the real edges. Need to weight one edge per rank jump, in a
 584         # continuous line.
 585         # my $weighted = $self->_add_edge_weights;
 586     my @edges = $self->paths;
 587         my( %substart, %subend );
 588     foreach my $edge ( @edges ) {
 589         # Do we need to output this edge?
 590         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 591                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 592                         my $variables = { %edge_attrs, 'label' => $label };
 593
 594                         # Account for the rank gap if necessary
 595                         my $rank0 = $self->reading( $edge->[0] )->rank
 596                                 if $self->reading( $edge->[0] )->has_rank;
 597                         my $rank1 = $self->reading( $edge->[1] )->rank
 598                                 if $self->reading( $edge->[1] )->has_rank;
 599                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 600                                 $variables->{'minlen'} = $rank1 - $rank0;
 601                         }
 602
 603                         # Add the calculated edge weights
 604                         # if( exists $weighted->{$edge->[0]}
 605                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 606                         #       # $variables->{'color'} = 'red';
 607                         #       $variables->{'weight'} = 3.0;
 608                         # }
 609
 610                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 611                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 612                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 613
 614                         my $varopts = _dot_attr_string( $variables );
 615                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 616                                 $edge->[0], $edge->[1], $varopts );
 617         } elsif( $used{$edge->[0]} ) {
 618                 $subend{$edge->[0]} = 1;
 619         } elsif( $used{$edge->[1]} ) {
 620                 $substart{$edge->[1]} = 1;
 621         }
 622     }
 623     # Add substitute start and end edges if necessary
 624     foreach my $node ( keys %substart ) {
 625         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 626         my $variables = { %edge_attrs, 'label' => $witstr };
 627         my $varopts = _dot_attr_string( $variables );
 628         $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
 629         }
 630     foreach my $node ( keys %subend ) {
 631         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 632         my $variables = { %edge_attrs, 'label' => $witstr };
 633         my $varopts = _dot_attr_string( $variables );
 634         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 635         }
 636         # HACK part 2
 637         if( $STRAIGHTENHACK ) {
 638                 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 639         }
 640
 641     $dot .= "}\n";
 642     return $dot;
 643 }
 644
 645 sub _dot_attr_string {
 646         my( $hash ) = @_;
 647         my @attrs;
 648         foreach my $k ( sort keys %$hash ) {
 649                 my $v = $hash->{$k};
 650                 push( @attrs, $k.'="'.$v.'"' );
 651         }
 652         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 653 }
 654
 655 sub _add_edge_weights {
 656         my $self = shift;
 657         # Walk the graph from START to END, choosing the successor node with
 658         # the largest number of witness paths each time.
 659         my $weighted = {};
 660         my $curr = $self->start->id;
 661         my $ranked = $self->end->has_rank;
 662         while( $curr ne $self->end->id ) {
 663                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 664                 my @succ = sort { $self->path_witnesses( $curr, $a )
 665                                                         <=> $self->path_witnesses( $curr, $b ) }
 666                         $self->sequence->successors( $curr );
 667                 my $next = pop @succ;
 668                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 669                 # Try to avoid lacunae in the weighted path.
 670                 while( @succ &&
 671                            ( $self->reading( $next )->is_lacuna ||
 672                                  $nextrank - $rank > 1 ) ){
 673                         $next = pop @succ;
 674                 }
 675                 $weighted->{$curr} = $next;
 676                 $curr = $next;
 677         }
 678         return $weighted;
 679 }
 680
 681 =head2 path_witnesses( $edge )
 682
 683 Returns the list of sigils whose witnesses are associated with the given edge.
 684 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 685
 686 =cut
 687
 688 sub path_witnesses {
 689         my( $self, @edge ) = @_;
 690         # If edge is an arrayref, cope.
 691         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 692                 my $e = shift @edge;
 693                 @edge = @$e;
 694         }
 695         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 696         return @wits;
 697 }
 698
 699 sub _path_display_label {
 700         my $self = shift;
 701         my @wits = sort @_;
 702         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 703         if( scalar @wits > $maj ) {
 704                 # TODO break out a.c. wits
 705                 return 'majority';
 706         } else {
 707                 return join( ', ', @wits );
 708         }
 709 }
 710
 711 =head2 witnesses_at_rank
 712
 713 Returns a list of witnesses that are not lacunose, for a given rank.
 714
 715 =cut
 716
 717 sub witnesses_at_rank {
 718         my( $self, $rank ) = @_;
 719 }
 720
 721 =head2 as_graphml
 722
 723 Returns a GraphML representation of the collation.  The GraphML will contain
 724 two graphs. The first expresses the attributes of the readings and the witness
 725 paths that link them; the second expresses the relationships that link the
 726 readings.  This is the native transfer format for a tradition.
 727
 728 =begin testing
 729
 730 use Text::Tradition;
 731
 732 my $READINGS = 311;
 733 my $PATHS = 361;
 734
 735 my $datafile = 't/data/florilegium_tei_ps.xml';
 736 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 737                                       'name' => 'test0',
 738                                       'file' => $datafile,
 739                                       'linear' => 1 );
 740
 741 ok( $tradition, "Got a tradition object" );
 742 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 743 ok( $tradition->collation, "Tradition has a collation" );
 744
 745 my $c = $tradition->collation;
 746 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 747 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 748 is( scalar $c->relationships, 0, "Collation has all relationships" );
 749
 750 # Add a few relationships
 751 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 752 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 753 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 754
 755 # Now write it to GraphML and parse it again.
 756
 757 my $graphml = $c->as_graphml;
 758 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 759 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 760 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 761 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 762
 763 =end testing
 764
 765 =cut
 766
 767 sub as_graphml {
 768     my( $self ) = @_;
 769
 770     # Some namespaces
 771     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 772     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 773     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 774         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 775
 776     # Create the document and root node
 777     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 778     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 779     $graphml->setDocumentElement( $root );
 780     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 781     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 782
 783     # Add the data keys for the graph
 784     my %graph_data_keys;
 785     my $gdi = 0;
 786     my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
 787     foreach my $datum ( @graph_attributes ) {
 788         $graph_data_keys{$datum} = 'dg'.$gdi++;
 789         my $key = $root->addNewChild( $graphml_ns, 'key' );
 790         $key->setAttribute( 'attr.name', $datum );
 791         $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
 792         $key->setAttribute( 'for', 'graph' );
 793         $key->setAttribute( 'id', $graph_data_keys{$datum} );
 794     }
 795
 796     # Add the data keys for nodes
 797     my %node_data_keys;
 798     my $ndi = 0;
 799     my %node_data = (
 800         id => 'string',
 801         text => 'string',
 802         rank => 'string',
 803         is_start => 'boolean',
 804         is_end => 'boolean',
 805         is_lacuna => 'boolean',
 806         );
 807     foreach my $datum ( keys %node_data ) {
 808         $node_data_keys{$datum} = 'dn'.$ndi++;
 809         my $key = $root->addNewChild( $graphml_ns, 'key' );
 810         $key->setAttribute( 'attr.name', $datum );
 811         $key->setAttribute( 'attr.type', $node_data{$datum} );
 812         $key->setAttribute( 'for', 'node' );
 813         $key->setAttribute( 'id', $node_data_keys{$datum} );
 814     }
 815
 816     # Add the data keys for edges, i.e. witnesses
 817     my $edi = 0;
 818     my %edge_data_keys;
 819     my %edge_data = (
 820         class => 'string',                              # Class, deprecated soon
 821         witness => 'string',                    # ID/label for a path
 822         relationship => 'string',               # ID/label for a relationship
 823         extra => 'boolean',                             # Path key
 824         scope => 'string',                              # Relationship key
 825         non_correctable => 'boolean',   # Relationship key
 826         non_independent => 'boolean',   # Relationship key
 827         );
 828     foreach my $datum ( keys %edge_data ) {
 829         $edge_data_keys{$datum} = 'de'.$edi++;
 830         my $key = $root->addNewChild( $graphml_ns, 'key' );
 831         $key->setAttribute( 'attr.name', $datum );
 832         $key->setAttribute( 'attr.type', $edge_data{$datum} );
 833         $key->setAttribute( 'for', 'edge' );
 834         $key->setAttribute( 'id', $edge_data_keys{$datum} );
 835     }
 836
 837     # Add the collation graph itself
 838     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
 839     $sgraph->setAttribute( 'edgedefault', 'directed' );
 840     $sgraph->setAttribute( 'id', $self->tradition->name );
 841     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
 842     $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
 843     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
 844     $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
 845     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
 846
 847     # Collation attribute data
 848     foreach my $datum ( @graph_attributes ) {
 849         my $value = $datum eq 'version' ? '3.0' : $self->$datum;
 850                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
 851         }
 852
 853     my $node_ctr = 0;
 854     my %node_hash;
 855     # Add our readings to the graph
 856     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
 857         # Add to the main graph
 858         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
 859         my $node_xmlid = 'n' . $node_ctr++;
 860         $node_hash{ $n->id } = $node_xmlid;
 861         $node_el->setAttribute( 'id', $node_xmlid );
 862         foreach my $d ( keys %node_data ) {
 863                 my $nval = $n->$d;
 864                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
 865                         if defined $nval;
 866         }
 867     }
 868
 869     # Add the path edges to the sequence graph
 870     my $edge_ctr = 0;
 871     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
 872         # We add an edge in the graphml for every witness in $e.
 873         foreach my $wit ( sort $self->path_witnesses( $e ) ) {
 874                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
 875                                                                                 $node_hash{ $e->[0] },
 876                                                                                 $node_hash{ $e->[1] } );
 877                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
 878                         $edge_el->setAttribute( 'source', $from );
 879                         $edge_el->setAttribute( 'target', $to );
 880                         $edge_el->setAttribute( 'id', $id );
 881
 882                         # It's a witness path, so add the witness
 883                         my $base = $wit;
 884                         my $key = $edge_data_keys{'witness'};
 885                         # Is this an ante-corr witness?
 886                         my $aclabel = $self->ac_label;
 887                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
 888                                 # Keep the base witness
 889                                 $base = $1;
 890                                 # ...and record that this is an 'extra' reading path
 891                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
 892                         }
 893                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
 894                         _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
 895                 }
 896         }
 897
 898         # Add the relationship graph to the XML
 899         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
 900                 $node_data_keys{'id'}, \%edge_data_keys );
 901
 902     # Save and return the thing
 903     my $result = decode_utf8( $graphml->toString(1) );
 904     return $result;
 905 }
 906
 907 sub _add_graphml_data {
 908     my( $el, $key, $value ) = @_;
 909     return unless defined $value;
 910     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
 911     $data_el->setAttribute( 'key', $key );
 912     $data_el->appendText( $value );
 913 }
 914
 915 =head2 as_csv
 916
 917 Returns a CSV alignment table representation of the collation graph, one
 918 row per witness (or witness uncorrected.)
 919
 920 =cut
 921
 922 sub as_csv {
 923     my( $self ) = @_;
 924     my $table = $self->alignment_table;
 925     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
 926     my @result;
 927     # Make the header row
 928     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
 929         push( @result, decode_utf8( $csv->string ) );
 930     # Make the rest of the rows
 931     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
 932         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
 933         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
 934         $csv->combine( @row );
 935         push( @result, decode_utf8( $csv->string ) );
 936     }
 937     return join( "\n", @result );
 938 }
 939
 940 =head2 alignment_table( $use_refs, $include_witnesses )
 941
 942 Return a reference to an alignment table, in a slightly enhanced CollateX
 943 format which looks like this:
 944
 945  $table = { alignment => [ { witness => "SIGIL",
 946                              tokens => [ { t => "TEXT" }, ... ] },
 947                            { witness => "SIG2",
 948                              tokens => [ { t => "TEXT" }, ... ] },
 949                            ... ],
 950             length => TEXTLEN };
 951
 952 If $use_refs is set to 1, the reading object is returned in the table
 953 instead of READINGTEXT; if not, the text of the reading is returned.
 954
 955 If $include_witnesses is set to a hashref, only the witnesses whose sigil
 956 keys have a true hash value will be included.
 957
 958 =cut
 959
 960 sub alignment_table {
 961     my( $self ) = @_;
 962     my $include; # see if we can ditch this
 963     return $self->cached_table if $self->has_cached_table;
 964
 965     # Make sure we can do this
 966         throw( "Need a linear graph in order to make an alignment table" )
 967                 unless $self->linear;
 968         $self->calculate_ranks unless $self->end->has_rank;
 969
 970     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
 971     my @all_pos = ( 1 .. $self->end->rank - 1 );
 972     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
 973         if( $include ) {
 974                 next unless $include->{$wit->sigil};
 975         }
 976         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
 977         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
 978         my @row = _make_witness_row( \@wit_path, \@all_pos );
 979         push( @{$table->{'alignment'}},
 980                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
 981         if( $wit->is_layered ) {
 982                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
 983                         $wit->sigil.$self->ac_label );
 984             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
 985                         push( @{$table->{'alignment'}},
 986                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
 987         }
 988     }
 989     $self->cached_table( $table );
 990     return $table;
 991 }
 992
 993 sub _make_witness_row {
 994     my( $path, $positions ) = @_;
 995     my %char_hash;
 996     map { $char_hash{$_} = undef } @$positions;
 997     my $debug = 0;
 998     foreach my $rdg ( @$path ) {
 999         my $rtext = $rdg->text;
1000         $rtext = '#LACUNA#' if $rdg->is_lacuna;
1001         print STDERR "rank " . $rdg->rank . "\n" if $debug;
1002         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1003         $char_hash{$rdg->rank} = { 't' => $rdg };
1004     }
1005     my @row = map { $char_hash{$_} } @$positions;
1006     # Fill in lacuna markers for undef spots in the row
1007     my $last_el = shift @row;
1008     my @filled_row = ( $last_el );
1009     foreach my $el ( @row ) {
1010         # If we are using node reference, make the lacuna node appear many times
1011         # in the table.  If not, use the lacuna tag.
1012         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1013             $el = $last_el;
1014         }
1015         push( @filled_row, $el );
1016         $last_el = $el;
1017     }
1018     return @filled_row;
1019 }
1020
1021 =head1 NAVIGATION METHODS
1022
1023 =head2 reading_sequence( $first, $last, $sigil, $backup )
1024
1025 Returns the ordered list of readings, starting with $first and ending
1026 with $last, for the witness given in $sigil. If a $backup sigil is
1027 specified (e.g. when walking a layered witness), it will be used wherever
1028 no $sigil path exists.  If there is a base text reading, that will be
1029 used wherever no path exists for $sigil or $backup.
1030
1031 =cut
1032
1033 # TODO Think about returning some lazy-eval iterator.
1034 # TODO Get rid of backup; we should know from what witness is whether we need it.
1035
1036 sub reading_sequence {
1037     my( $self, $start, $end, $witness ) = @_;
1038
1039     $witness = $self->baselabel unless $witness;
1040     my @readings = ( $start );
1041     my %seen;
1042     my $n = $start;
1043     while( $n && $n->id ne $end->id ) {
1044         if( exists( $seen{$n->id} ) ) {
1045             throw( "Detected loop for $witness at " . $n->id );
1046         }
1047         $seen{$n->id} = 1;
1048
1049         my $next = $self->next_reading( $n, $witness );
1050         unless( $next ) {
1051             throw( "Did not find any path for $witness from reading " . $n->id );
1052         }
1053         push( @readings, $next );
1054         $n = $next;
1055     }
1056     # Check that the last reading is our end reading.
1057     my $last = $readings[$#readings];
1058     throw( "Last reading found from " . $start->text .
1059         " for witness $witness is not the end!" ) # TODO do we get this far?
1060         unless $last->id eq $end->id;
1061
1062     return @readings;
1063 }
1064
1065 =head2 next_reading( $reading, $sigil );
1066
1067 Returns the reading that follows the given reading along the given witness
1068 path.
1069
1070 =cut
1071
1072 sub next_reading {
1073     # Return the successor via the corresponding path.
1074     my $self = shift;
1075     my $answer = $self->_find_linked_reading( 'next', @_ );
1076         return undef unless $answer;
1077     return $self->reading( $answer );
1078 }
1079
1080 =head2 prior_reading( $reading, $sigil )
1081
1082 Returns the reading that precedes the given reading along the given witness
1083 path.
1084
1085 =cut
1086
1087 sub prior_reading {
1088     # Return the predecessor via the corresponding path.
1089     my $self = shift;
1090     my $answer = $self->_find_linked_reading( 'prior', @_ );
1091     return $self->reading( $answer );
1092 }
1093
1094 sub _find_linked_reading {
1095     my( $self, $direction, $node, $path ) = @_;
1096
1097     # Get a backup if we are dealing with a layered witness
1098     my $alt_path;
1099     my $aclabel = $self->ac_label;
1100     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1101         $alt_path = $1;
1102     }
1103
1104     my @linked_paths = $direction eq 'next'
1105         ? $self->sequence->edges_from( $node )
1106         : $self->sequence->edges_to( $node );
1107     return undef unless scalar( @linked_paths );
1108
1109     # We have to find the linked path that contains all of the
1110     # witnesses supplied in $path.
1111     my( @path_wits, @alt_path_wits );
1112     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1113     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1114     my $base_le;
1115     my $alt_le;
1116     foreach my $le ( @linked_paths ) {
1117         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1118             $base_le = $le;
1119         }
1120                 my @le_wits = sort $self->path_witnesses( $le );
1121                 if( _is_within( \@path_wits, \@le_wits ) ) {
1122                         # This is the right path.
1123                         return $direction eq 'next' ? $le->[1] : $le->[0];
1124                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1125                         $alt_le = $le;
1126                 }
1127     }
1128     # Got this far? Return the alternate path if it exists.
1129     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1130         if $alt_le;
1131
1132     # Got this far? Return the base path if it exists.
1133     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1134         if $base_le;
1135
1136     # Got this far? We have no appropriate path.
1137     warn "Could not find $direction node from " . $node->id
1138         . " along path $path";
1139     return undef;
1140 }
1141
1142 # Some set logic.
1143 sub _is_within {
1144     my( $set1, $set2 ) = @_;
1145     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1146     foreach my $el ( @$set1 ) {
1147         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1148     }
1149     return $ret;
1150 }
1151
1152 # Return the string that joins together a list of witnesses for
1153 # display on a single path.
1154 sub _witnesses_of_label {
1155     my( $self, $label ) = @_;
1156     my $regex = $self->wit_list_separator;
1157     my @answer = split( /\Q$regex\E/, $label );
1158     return @answer;
1159 }
1160
1161 =head2 common_readings
1162
1163 Returns the list of common readings in the graph (i.e. those readings that are
1164 shared by all non-lacunose witnesses.)
1165
1166 =cut
1167
1168 sub common_readings {
1169         my $self = shift;
1170         my @common = grep { $_->is_common } $self->readings;
1171         return @common;
1172 }
1173
1174 =head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1175
1176 Returns the text of a witness (plus its backup, if we are using a layer)
1177 as stored in the collation.  The text is returned as a string, where the
1178 individual readings are joined with spaces and the meta-readings (e.g.
1179 lacunae) are omitted.  Optional specification of $start and $end allows
1180 the generation of a subset of the witness text.
1181
1182 =cut
1183
1184 sub path_text {
1185         my( $self, $wit, $start, $end ) = @_;
1186         $start = $self->start unless $start;
1187         $end = $self->end unless $end;
1188         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1189         return join( ' ', map { $_->text } @path );
1190 }
1191
1192 =head1 INITIALIZATION METHODS
1193
1194 These are mostly for use by parsers.
1195
1196 =head2 make_witness_path( $witness )
1197
1198 Link the array of readings contained in $witness->path (and in
1199 $witness->uncorrected_path if it exists) into collation paths.
1200 Clear out the arrays when finished.
1201
1202 =head2 make_witness_paths
1203
1204 Call make_witness_path for all witnesses in the tradition.
1205
1206 =cut
1207
1208 # For use when a collation is constructed from a base text and an apparatus.
1209 # We have the sequences of readings and just need to add path edges.
1210 # When we are done, clear out the witness path attributes, as they are no
1211 # longer needed.
1212 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1213
1214 sub make_witness_paths {
1215     my( $self ) = @_;
1216     foreach my $wit ( $self->tradition->witnesses ) {
1217         # print STDERR "Making path for " . $wit->sigil . "\n";
1218         $self->make_witness_path( $wit );
1219     }
1220 }
1221
1222 sub make_witness_path {
1223     my( $self, $wit ) = @_;
1224     my @chain = @{$wit->path};
1225     my $sig = $wit->sigil;
1226     foreach my $idx ( 0 .. $#chain-1 ) {
1227         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1228     }
1229     if( $wit->is_layered ) {
1230         @chain = @{$wit->uncorrected_path};
1231         foreach my $idx( 0 .. $#chain-1 ) {
1232             my $source = $chain[$idx];
1233             my $target = $chain[$idx+1];
1234             $self->add_path( $source, $target, $sig.$self->ac_label )
1235                 unless $self->has_path( $source, $target, $sig );
1236         }
1237     }
1238     $wit->clear_path;
1239     $wit->clear_uncorrected_path;
1240 }
1241
1242 =head2 calculate_ranks
1243
1244 Calculate the reading ranks (that is, their aligned positions relative
1245 to each other) for the graph.  This can only be called on linear collations.
1246
1247 =begin testing
1248
1249 use Text::Tradition;
1250
1251 my $cxfile = 't/data/Collatex-16.xml';
1252 my $t = Text::Tradition->new(
1253     'name'  => 'inline',
1254     'input' => 'CollateX',
1255     'file'  => $cxfile,
1256     );
1257 my $c = $t->collation;
1258
1259 # Make an svg
1260 my $svg = $c->as_svg;
1261 is( substr( $svg, 0, 5 ), '<?xml', "Got XML doc for svg" );
1262 ok( $c->has_cached_svg, "SVG was cached" );
1263 is( $c->as_svg, $svg, "Cached SVG returned upon second call" );
1264 $c->calculate_ranks;
1265 is( $c->as_svg, $svg, "Cached SVG retained with no rank change" );
1266 $c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
1267 isnt( $c->as_svg, $svg, "SVG changed after relationship add" );
1268
1269 =end testing
1270
1271 =cut
1272
1273 sub calculate_ranks {
1274     my $self = shift;
1275     # Save the existing ranks, in case we need to invalidate the cached SVG.
1276     my %existing_ranks;
1277     # Walk a version of the graph where every node linked by a relationship
1278     # edge is fundamentally the same node, and do a topological ranking on
1279     # the nodes in this graph.
1280     my $topo_graph = Graph->new();
1281     my %rel_containers;
1282     my $rel_ctr = 0;
1283     # Add the nodes
1284     foreach my $r ( $self->readings ) {
1285         next if exists $rel_containers{$r->id};
1286         my @rels = $r->related_readings( 'colocated' );
1287         if( @rels ) {
1288             # Make a relationship container.
1289             push( @rels, $r );
1290             my $rn = 'rel_container_' . $rel_ctr++;
1291             $topo_graph->add_vertex( $rn );
1292             foreach( @rels ) {
1293                 $rel_containers{$_->id} = $rn;
1294             }
1295         } else {
1296             # Add a new node to mirror the old node.
1297             $rel_containers{$r->id} = $r->id;
1298             $topo_graph->add_vertex( $r->id );
1299         }
1300     }
1301
1302     # Add the edges.
1303     foreach my $r ( $self->readings ) {
1304                 $existing_ranks{$r} = $r->rank;
1305         foreach my $n ( $self->sequence->successors( $r->id ) ) {
1306                 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1307                         $rel_containers{$n} );
1308                 # $DB::single = 1 unless $tfrom && $tto;
1309             $topo_graph->add_edge( $tfrom, $tto );
1310         }
1311     }
1312
1313     # Now do the rankings, starting with the start node.
1314     my $topo_start = $rel_containers{$self->start->id};
1315     my $node_ranks = { $topo_start => 0 };
1316     my @curr_origin = ( $topo_start );
1317     # A little iterative function.
1318     while( @curr_origin ) {
1319         @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1320     }
1321     # Transfer our rankings from the topological graph to the real one.
1322     foreach my $r ( $self->readings ) {
1323         if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1324             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1325         } else {
1326                 # Die. Find the last rank we calculated.
1327                 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1328                                  <=> $node_ranks->{$rel_containers{$b->id}} }
1329                         $self->readings;
1330                 my $last = pop @all_defined;
1331             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1332         }
1333     }
1334     # Do we need to invalidate the cached SVG?
1335     if( $self->has_cached_svg ) {
1336         foreach my $r ( $self->readings ) {
1337                 next if $existing_ranks{$r} == $r->rank;
1338                 $self->wipe_svg;
1339                 last;
1340         }
1341     }
1342 }
1343
1344 sub _assign_rank {
1345     my( $graph, $node_ranks, @current_nodes ) = @_;
1346     # Look at each of the children of @current_nodes.  If all the child's
1347     # parents have a rank, assign it the highest rank + 1 and add it to
1348     # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
1349     # parent gets a rank.
1350     my @next_nodes;
1351     foreach my $c ( @current_nodes ) {
1352         warn "Current reading $c has no rank!"
1353             unless exists $node_ranks->{$c};
1354         # print STDERR "Looking at child of node $c, rank "
1355         #     . $node_ranks->{$c} . "\n";
1356         foreach my $child ( $graph->successors( $c ) ) {
1357             next if exists $node_ranks->{$child};
1358             my $highest_rank = -1;
1359             my $skip = 0;
1360             foreach my $parent ( $graph->predecessors( $child ) ) {
1361                 if( exists $node_ranks->{$parent} ) {
1362                     $highest_rank = $node_ranks->{$parent}
1363                         if $highest_rank <= $node_ranks->{$parent};
1364                 } else {
1365                     $skip = 1;
1366                     last;
1367                 }
1368             }
1369             next if $skip;
1370             my $c_rank = $highest_rank + 1;
1371             # print STDERR "Assigning rank $c_rank to node $child \n";
1372             $node_ranks->{$child} = $c_rank;
1373             push( @next_nodes, $child );
1374         }
1375     }
1376     return @next_nodes;
1377 }
1378
1379 =head2 flatten_ranks
1380
1381 A convenience method for parsing collation data.  Searches the graph for readings
1382 with the same text at the same rank, and merges any that are found.
1383
1384 =cut
1385
1386 sub flatten_ranks {
1387     my $self = shift;
1388     my %unique_rank_rdg;
1389     foreach my $rdg ( $self->readings ) {
1390         next unless $rdg->has_rank;
1391         my $key = $rdg->rank . "||" . $rdg->text;
1392         if( exists $unique_rank_rdg{$key} ) {
1393             # Combine!
1394                 # print STDERR "Combining readings at same rank: $key\n";
1395             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1396             # TODO see if this now makes a common point.
1397         } else {
1398             $unique_rank_rdg{$key} = $rdg;
1399         }
1400     }
1401 }
1402
1403 =head2 remove_collations
1404
1405 Another convenience method for parsing. Removes all 'collation' relationships
1406 that were defined in order to get the reading ranks to be correct.
1407
1408 =begin testing
1409
1410 use Text::Tradition;
1411
1412 my $cxfile = 't/data/Collatex-16.xml';
1413 my $t = Text::Tradition->new(
1414     'name'  => 'inline',
1415     'input' => 'CollateX',
1416     'file'  => $cxfile,
1417     );
1418 my $c = $t->collation;
1419
1420 isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" );
1421 $c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } );
1422 is( scalar $c->relationships, 4, "Found all expected relationships" );
1423 $c->remove_collations;
1424 is( scalar $c->relationships, 3, "Collated relationships now gone" );
1425 is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" );
1426
1427 =end testing
1428
1429 =cut
1430
1431 sub remove_collations {
1432         my $self = shift;
1433         foreach my $reledge ( $self->relationships ) {
1434                 my $relobj = $self->relations->get_relationship( $reledge );
1435                 if( $relobj && $relobj->type eq 'collated' ) {
1436                         $self->relations->delete_relationship( $reledge );
1437                 }
1438         }
1439 }
1440
1441
1442 =head2 calculate_common_readings
1443
1444 Goes through the graph identifying the readings that appear in every witness
1445 (apart from those with lacunae at that spot.) Marks them as common and returns
1446 the list.
1447
1448 =begin testing
1449
1450 use Text::Tradition;
1451
1452 my $cxfile = 't/data/Collatex-16.xml';
1453 my $t = Text::Tradition->new(
1454     'name'  => 'inline',
1455     'input' => 'CollateX',
1456     'file'  => $cxfile,
1457     );
1458 my $c = $t->collation;
1459
1460 my @common = $c->calculate_common_readings();
1461 is( scalar @common, 8, "Found correct number of common readings" );
1462 my @marked = sort $c->common_readings();
1463 is( scalar @common, 8, "All common readings got marked as such" );
1464 my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1465 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1466
1467 =end testing
1468
1469 =cut
1470
1471 sub calculate_common_readings {
1472         my $self = shift;
1473         my @common;
1474         my $table = $self->alignment_table;
1475         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1476                 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1477                 my %hash;
1478                 foreach my $r ( @row ) {
1479                         if( $r ) {
1480                                 $hash{$r->id} = $r unless $r->is_meta;
1481                         } else {
1482                                 $hash{'UNDEF'} = $r;
1483                         }
1484                 }
1485                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1486                         my( $r ) = values %hash;
1487                         $r->is_common( 1 );
1488                         push( @common, $r );
1489                 }
1490         }
1491         return @common;
1492 }
1493
1494 =head2 text_from_paths
1495
1496 Calculate the text array for all witnesses from the path, for later consistency
1497 checking.  Only to be used if there is no non-graph-based way to know the
1498 original texts.
1499
1500 =cut
1501
1502 sub text_from_paths {
1503         my $self = shift;
1504     foreach my $wit ( $self->tradition->witnesses ) {
1505         my @text = split( /\s+/,
1506                 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1507         $wit->text( \@text );
1508         if( $wit->is_layered ) {
1509                         my @uctext = split( /\s+/,
1510                                 $self->reading_sequence( $self->start, $self->end,
1511                                         $wit->sigil.$self->ac_label ) );
1512                         $wit->text( \@uctext );
1513         }
1514     }
1515 }
1516
1517 =head1 UTILITY FUNCTIONS
1518
1519 =head2 common_predecessor( $reading_a, $reading_b )
1520
1521 Find the last reading that occurs in sequence before both the given readings.
1522
1523 =head2 common_successor( $reading_a, $reading_b )
1524
1525 Find the first reading that occurs in sequence after both the given readings.
1526
1527 =begin testing
1528
1529 use Text::Tradition;
1530
1531 my $cxfile = 't/data/Collatex-16.xml';
1532 my $t = Text::Tradition->new(
1533     'name'  => 'inline',
1534     'input' => 'CollateX',
1535     'file'  => $cxfile,
1536     );
1537 my $c = $t->collation;
1538
1539 is( $c->common_predecessor( 'n9', 'n23' )->id,
1540     'n20', "Found correct common predecessor" );
1541 is( $c->common_successor( 'n9', 'n23' )->id,
1542     '#END#', "Found correct common successor" );
1543
1544 is( $c->common_predecessor( 'n19', 'n17' )->id,
1545     'n16', "Found correct common predecessor for readings on same path" );
1546 is( $c->common_successor( 'n21', 'n26' )->id,
1547     '#END#', "Found correct common successor for readings on same path" );
1548
1549 =end testing
1550
1551 =cut
1552
1553 ## Return the closest reading that is a predecessor of both the given readings.
1554 sub common_predecessor {
1555         my $self = shift;
1556         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1557         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1558 }
1559
1560 sub common_successor {
1561         my $self = shift;
1562         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1563         return $self->_common_in_path( $r1, $r2, 'successors' );
1564 }
1565
1566 sub _common_in_path {
1567         my( $self, $r1, $r2, $dir ) = @_;
1568         my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1569         $iter = $self->end->rank - $iter if $dir eq 'successors';
1570         my @candidates;
1571         my @last_checked = ( $r1, $r2 );
1572         my %all_seen;
1573         while( !@candidates ) {
1574                 my @new_lc;
1575                 foreach my $lc ( @last_checked ) {
1576                         foreach my $p ( $lc->$dir ) {
1577                                 if( $all_seen{$p->id} ) {
1578                                         push( @candidates, $p );
1579                                 } else {
1580                                         $all_seen{$p->id} = 1;
1581                                         push( @new_lc, $p );
1582                                 }
1583                         }
1584                 }
1585                 @last_checked = @new_lc;
1586         }
1587         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1588         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1589 }
1590
1591 sub throw {
1592         Text::Tradition::Error->throw(
1593                 'ident' => 'Collation error',
1594                 'message' => $_[0],
1595                 );
1596 }
1597
1598 no Moose;
1599 __PACKAGE__->meta->make_immutable;
1600
1601 =head1 LICENSE
1602
1603 This package is free software and is provided "as is" without express
1604 or implied warranty.  You can redistribute it and/or modify it under
1605 the same terms as Perl itself.
1606
1607 =head1 AUTHOR
1608
1609 Tara L Andrews E<lt>aurum@cpan.orgE<gt>