lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Encode qw( decode_utf8 );
   4 use File::Temp;
   5 use Graph;
   6 use IPC::Run qw( run binary );
   7 use Text::CSV_XS;
   8 use Text::Tradition::Collation::Reading;
   9 use Text::Tradition::Collation::RelationshipStore;
  10 use Text::Tradition::Error;
  11 use XML::LibXML;
  12 use XML::LibXML::XPathContext;
  13 use Moose;
  14
  15 has 'sequence' => (
  16     is => 'ro',
  17     isa => 'Graph',
  18     default => sub { Graph->new() },
  19     handles => {
  20         paths => 'edges',
  21     },
  22     );
  23
  24 has 'relations' => (
  25         is => 'ro',
  26         isa => 'Text::Tradition::Collation::RelationshipStore',
  27         handles => {
  28                 relationships => 'relationships',
  29                 related_readings => 'related_readings',
  30         },
  31         writer => '_set_relations',
  32         );
  33
  34 has 'tradition' => (
  35     is => 'ro',
  36     isa => 'Text::Tradition',
  37     weak_ref => 1,
  38     );
  39
  40 has 'readings' => (
  41         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  42         traits => ['Hash'],
  43     handles => {
  44         reading     => 'get',
  45         _add_reading => 'set',
  46         del_reading => 'delete',
  47         has_reading => 'exists',
  48         readings   => 'values',
  49     },
  50     default => sub { {} },
  51         );
  52
  53 has 'wit_list_separator' => (
  54     is => 'rw',
  55     isa => 'Str',
  56     default => ', ',
  57     );
  58
  59 has 'baselabel' => (
  60     is => 'rw',
  61     isa => 'Str',
  62     default => 'base text',
  63     );
  64
  65 has 'linear' => (
  66     is => 'rw',
  67     isa => 'Bool',
  68     default => 1,
  69     );
  70
  71 has 'ac_label' => (
  72     is => 'rw',
  73     isa => 'Str',
  74     default => ' (a.c.)',
  75     );
  76
  77 has 'start' => (
  78         is => 'ro',
  79         isa => 'Text::Tradition::Collation::Reading',
  80         writer => '_set_start',
  81         weak_ref => 1,
  82         );
  83
  84 has 'end' => (
  85         is => 'ro',
  86         isa => 'Text::Tradition::Collation::Reading',
  87         writer => '_set_end',
  88         weak_ref => 1,
  89         );
  90
  91 has 'cached_svg' => (
  92         is => 'rw',
  93         isa => 'Str',
  94         predicate => 'has_cached_svg',
  95         clearer => 'wipe_svg',
  96         );
  97
  98 =head1 NAME
  99
 100 Text::Tradition::Collation - a software model for a text collation
 101
 102 =head1 SYNOPSIS
 103
 104   use Text::Tradition;
 105   my $t = Text::Tradition->new(
 106     'name' => 'this is a text',
 107     'input' => 'TEI',
 108     'file' => '/path/to/tei_parallel_seg_file.xml' );
 109
 110   my $c = $t->collation;
 111   my @readings = $c->readings;
 112   my @paths = $c->paths;
 113   my @relationships = $c->relationships;
 114
 115   my $svg_variant_graph = $t->collation->as_svg();
 116
 117 =head1 DESCRIPTION
 118
 119 Text::Tradition is a library for representation and analysis of collated
 120 texts, particularly medieval ones.  The Collation is the central feature of
 121 a Tradition, where the text, its sequence of readings, and its relationships
 122 between readings are actually kept.
 123
 124 =head1 CONSTRUCTOR
 125
 126 =head2 new
 127
 128 The constructor.  Takes a hash or hashref of the following arguments:
 129
 130 =over
 131
 132 =item * tradition - The Text::Tradition object to which the collation
 133 belongs. Required.
 134
 135 =item * linear - Whether the collation should be linear; that is, whether
 136 transposed readings should be treated as two linked readings rather than one,
 137 and therefore whether the collation graph is acyclic.  Defaults to true.
 138
 139 =item * baselabel - The default label for the path taken by a base text
 140 (if any). Defaults to 'base text'.
 141
 142 =item * wit_list_separator - The string to join a list of witnesses for
 143 purposes of making labels in display graphs.  Defaults to ', '.
 144
 145 =item * ac_label - The extra label to tack onto a witness sigil when
 146 representing another layer of path for the given witness - that is, when
 147 a text has more than one possible reading due to scribal corrections or
 148 the like.  Defaults to ' (a.c.)'.
 149
 150 =back
 151
 152 =head1 ACCESSORS
 153
 154 =head2 tradition
 155
 156 =head2 linear
 157
 158 =head2 wit_list_separator
 159
 160 =head2 baselabel
 161
 162 =head2 ac_label
 163
 164 Simple accessors for collation attributes.
 165
 166 =head2 start
 167
 168 The meta-reading at the start of every witness path.
 169
 170 =head2 end
 171
 172 The meta-reading at the end of every witness path.
 173
 174 =head2 readings
 175
 176 Returns all Reading objects in the graph.
 177
 178 =head2 reading( $id )
 179
 180 Returns the Reading object corresponding to the given ID.
 181
 182 =head2 add_reading( $reading_args )
 183
 184 Adds a new reading object to the collation.
 185 See L<Text::Tradition::Collation::Reading> for the available arguments.
 186
 187 =head2 del_reading( $object_or_id )
 188
 189 Removes the given reading from the collation, implicitly removing its
 190 paths and relationships.
 191
 192 =head2 merge_readings( $main, $second )
 193
 194 Merges the $second reading into the $main one.
 195 The arguments may be either readings or reading IDs.
 196
 197 =head2 has_reading( $id )
 198
 199 Predicate to see whether a given reading ID is in the graph.
 200
 201 =head2 reading_witnesses( $object_or_id )
 202
 203 Returns a list of sigils whose witnesses contain the reading.
 204
 205 =head2 paths
 206
 207 Returns all reading paths within the document - that is, all edges in the
 208 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 209
 210 =head2 add_path( $source, $target, $sigil )
 211
 212 Links the given readings in the collation in sequence, under the given witness
 213 sigil.  The readings may be specified by object or ID.
 214
 215 =head2 del_path( $source, $target, $sigil )
 216
 217 Links the given readings in the collation in sequence, under the given witness
 218 sigil.  The readings may be specified by object or ID.
 219
 220 =head2 has_path( $source, $target );
 221
 222 Returns true if the two readings are linked in sequence in any witness.
 223 The readings may be specified by object or ID.
 224
 225 =head2 relationships
 226
 227 Returns all Relationship objects in the collation.
 228
 229 =head2 add_relationship( $reading, $other_reading, $options )
 230
 231 Adds a new relationship of the type given in $options between the two readings,
 232 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 233 where $status is true on success, and @vectors is a list of relationship edges
 234 that were ultimately added.
 235 See L<Text::Tradition::Collation::Relationship> for the available options.
 236
 237 =cut
 238
 239 sub BUILD {
 240     my $self = shift;
 241     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 242     $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
 243     $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 244 }
 245
 246 ### Reading construct/destruct functions
 247
 248 sub add_reading {
 249         my( $self, $reading ) = @_;
 250         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 251                 my %args = %$reading;
 252                 $reading = Text::Tradition::Collation::Reading->new(
 253                         'collation' => $self,
 254                         %args );
 255         }
 256         # First check to see if a reading with this ID exists.
 257         if( $self->reading( $reading->id ) ) {
 258                 throw( "Collation already has a reading with id " . $reading->id );
 259         }
 260         $self->_add_reading( $reading->id => $reading );
 261         # Once the reading has been added, put it in both graphs.
 262         $self->sequence->add_vertex( $reading->id );
 263         $self->relations->add_reading( $reading->id );
 264         return $reading;
 265 };
 266
 267 around del_reading => sub {
 268         my $orig = shift;
 269         my $self = shift;
 270         my $arg = shift;
 271
 272         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 273                 $arg = $arg->id;
 274         }
 275         # Remove the reading from the graphs.
 276         $self->sequence->delete_vertex( $arg );
 277         $self->relations->delete_reading( $arg );
 278
 279         # Carry on.
 280         $self->$orig( $arg );
 281 };
 282
 283 # merge_readings( $main, $to_be_deleted );
 284
 285 sub merge_readings {
 286         my $self = shift;
 287
 288         # We only need the IDs for adding paths to the graph, not the reading
 289         # objects themselves.
 290     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
 291
 292     # The kept reading should inherit the paths and the relationships
 293     # of the deleted reading.
 294         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 295                 my @vector = ( $kept );
 296                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 297                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 298                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 299                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 300                 $self->sequence->add_edge( @vector );
 301                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 302                 @wits{keys %$fwits} = values %$fwits;
 303                 $self->sequence->set_edge_attributes( @vector, \%wits );
 304         }
 305         $self->relations->merge_readings( $kept, $deleted, $combine_char );
 306
 307         # Do the deletion deed.
 308         if( $combine_char ) {
 309                 my $kept_obj = $self->reading( $kept );
 310                 my $new_text = join( $combine_char, $kept_obj->text,
 311                         $self->reading( $deleted )->text );
 312                 $kept_obj->alter_text( $new_text );
 313         }
 314         $self->del_reading( $deleted );
 315 }
 316
 317
 318 # Helper function for manipulating the graph.
 319 sub _stringify_args {
 320         my( $self, $first, $second, $arg ) = @_;
 321     $first = $first->id
 322         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 323     $second = $second->id
 324         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 325     return( $first, $second, $arg );
 326 }
 327
 328 # Helper function for manipulating the graph.
 329 sub _objectify_args {
 330         my( $self, $first, $second, $arg ) = @_;
 331     $first = $self->reading( $first )
 332         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 333     $second = $self->reading( $second )
 334         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 335     return( $first, $second, $arg );
 336 }
 337 ### Path logic
 338
 339 sub add_path {
 340         my $self = shift;
 341
 342         # We only need the IDs for adding paths to the graph, not the reading
 343         # objects themselves.
 344     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 345
 346         # Connect the readings
 347     $self->sequence->add_edge( $source, $target );
 348     # Note the witness in question
 349     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 350 };
 351
 352 sub del_path {
 353         my $self = shift;
 354         my @args;
 355         if( ref( $_[0] ) eq 'ARRAY' ) {
 356                 my $e = shift @_;
 357                 @args = ( @$e, @_ );
 358         } else {
 359                 @args = @_;
 360         }
 361
 362         # We only need the IDs for adding paths to the graph, not the reading
 363         # objects themselves.
 364     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 365
 366         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 367                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 368         }
 369         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 370                 $self->sequence->delete_edge( $source, $target );
 371         }
 372 }
 373
 374
 375 # Extra graph-alike utility
 376 sub has_path {
 377         my $self = shift;
 378     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 379         return undef unless $self->sequence->has_edge( $source, $target );
 380         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 381 }
 382
 383 =head2 clear_witness( @sigil_list )
 384
 385 Clear the given witnesses out of the collation entirely, removing references
 386 to them in paths, and removing readings that belong only to them.  Should only
 387 be called via $tradition->del_witness.
 388
 389 =cut
 390
 391 sub clear_witness {
 392         my( $self, @sigils ) = @_;
 393
 394         # Clear the witness(es) out of the paths
 395         foreach my $e ( $self->paths ) {
 396                 foreach my $sig ( @sigils ) {
 397                         $self->del_path( $e, $sig );
 398                 }
 399         }
 400
 401         # Clear out the newly unused readings
 402         foreach my $r ( $self->readings ) {
 403                 unless( $self->reading_witnesses( $r ) ) {
 404                         $self->del_reading( $r );
 405                 }
 406         }
 407 }
 408
 409 sub add_relationship {
 410         my $self = shift;
 411     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 412     my( @vectors ) = $self->relations->add_relationship( $source,
 413         $self->reading( $source ), $target, $self->reading( $target ), $opts );
 414     # Force a full rank recalculation every time. Yuck.
 415     $self->calculate_ranks() if $self->end->has_rank;
 416     return @vectors;
 417 }
 418
 419 =head2 reading_witnesses( $reading )
 420
 421 Return a list of sigils corresponding to the witnesses in which the reading appears.
 422
 423 =cut
 424
 425 sub reading_witnesses {
 426         my( $self, $reading ) = @_;
 427         # We need only check either the incoming or the outgoing edges; I have
 428         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 429         if( $reading eq $self->start ) {
 430                 return map { $_->sigil } $self->tradition->witnesses;
 431         }
 432         my %all_witnesses;
 433         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 434                 my $wits = $self->sequence->get_edge_attributes( @$e );
 435                 @all_witnesses{ keys %$wits } = 1;
 436         }
 437         return keys %all_witnesses;
 438 }
 439
 440 =head1 OUTPUT METHODS
 441
 442 =head2 as_svg( \%options )
 443
 444 Returns an SVG string that represents the graph, via as_dot and graphviz.
 445 See as_dot for a list of options.
 446
 447 =cut
 448
 449 sub as_svg {
 450     my( $self, $opts ) = @_;
 451     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 452     if( !$self->has_cached_svg || $opts->{'recalc'}     || $want_subgraph ) {
 453                 my @cmd = qw/dot -Tsvg/;
 454                 my( $svg, $err );
 455                 my $dotfile = File::Temp->new();
 456                 ## USE FOR DEBUGGING
 457                 # $dotfile->unlink_on_destroy(0);
 458                 binmode $dotfile, ':utf8';
 459                 print $dotfile $self->as_dot( $opts );
 460                 push( @cmd, $dotfile->filename );
 461                 run( \@cmd, ">", binary(), \$svg );
 462                 $svg = decode_utf8( $svg );
 463                 $self->cached_svg( $svg ) unless $want_subgraph;
 464                 return $svg;
 465         } else {
 466                 return $self->cached_svg;
 467         }
 468 }
 469
 470
 471 =head2 as_dot( \%options )
 472
 473 Returns a string that is the collation graph expressed in dot
 474 (i.e. GraphViz) format.  Options include:
 475
 476 =over 4
 477
 478 =item * from
 479
 480 =item * to
 481
 482 =item * color_common
 483
 484 =back
 485
 486 =cut
 487
 488 sub as_dot {
 489     my( $self, $opts ) = @_;
 490     my $startrank = $opts->{'from'} if $opts;
 491     my $endrank = $opts->{'to'} if $opts;
 492     my $color_common = $opts->{'color_common'} if $opts;
 493     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 494        && $self->end->rank > 100;
 495
 496     # Check the arguments
 497     if( $startrank ) {
 498         return if $endrank && $startrank > $endrank;
 499         return if $startrank > $self->end->rank;
 500         }
 501         if( defined $endrank ) {
 502                 return if $endrank < 0;
 503                 $endrank = undef if $endrank == $self->end->rank;
 504         }
 505
 506     my $graph_name = $self->tradition->name;
 507     $graph_name =~ s/[^\w\s]//g;
 508     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 509
 510     my %graph_attrs = (
 511         'rankdir' => 'LR',
 512         'bgcolor' => 'none',
 513         );
 514     my %node_attrs = (
 515         'fontsize' => 14,
 516         'fillcolor' => 'white',
 517         'style' => 'filled',
 518         'shape' => 'ellipse'
 519         );
 520     my %edge_attrs = (
 521         'arrowhead' => 'open',
 522         'color' => '#000000',
 523         'fontcolor' => '#000000',
 524         );
 525
 526     my $dot = sprintf( "digraph %s {\n", $graph_name );
 527     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 528     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 529
 530         # Output substitute start/end readings if necessary
 531         if( $startrank ) {
 532                 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
 533         }
 534         if( $endrank ) {
 535                 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
 536         }
 537         if( $STRAIGHTENHACK ) {
 538                 ## HACK part 1
 539                 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
 540                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 541         }
 542         my %used;  # Keep track of the readings that actually appear in the graph
 543         # Sort the readings by rank if we have ranks; this speeds layout.
 544         my @all_readings = $self->end->has_rank
 545                 ? sort { $a->rank <=> $b->rank } $self->readings
 546                 : $self->readings;
 547         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 548     foreach my $reading ( @all_readings ) {
 549         # Only output readings within our rank range.
 550         next if $startrank && $reading->rank < $startrank;
 551         next if $endrank && $reading->rank > $endrank;
 552         $used{$reading->id} = 1;
 553         # Need not output nodes without separate labels
 554         next if $reading->id eq $reading->text;
 555         my $rattrs;
 556         my $label = $reading->text;
 557         $label =~ s/\"/\\\"/g;
 558                 $rattrs->{'label'} = $label;
 559                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 560         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 561     }
 562
 563         # Add the real edges. Need to weight one edge per rank jump, in a
 564         # continuous line.
 565         # my $weighted = $self->_add_edge_weights;
 566     my @edges = $self->paths;
 567         my( %substart, %subend );
 568     foreach my $edge ( @edges ) {
 569         # Do we need to output this edge?
 570         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 571                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 572                         my $variables = { %edge_attrs, 'label' => $label };
 573
 574                         # Account for the rank gap if necessary
 575                         my $rank0 = $self->reading( $edge->[0] )->rank
 576                                 if $self->reading( $edge->[0] )->has_rank;
 577                         my $rank1 = $self->reading( $edge->[1] )->rank
 578                                 if $self->reading( $edge->[1] )->has_rank;
 579                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 580                                 $variables->{'minlen'} = $rank1 - $rank0;
 581                         }
 582
 583                         # Add the calculated edge weights
 584                         # if( exists $weighted->{$edge->[0]}
 585                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 586                         #       # $variables->{'color'} = 'red';
 587                         #       $variables->{'weight'} = 3.0;
 588                         # }
 589
 590                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 591                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 592                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 593
 594                         my $varopts = _dot_attr_string( $variables );
 595                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 596                                 $edge->[0], $edge->[1], $varopts );
 597         } elsif( $used{$edge->[0]} ) {
 598                 $subend{$edge->[0]} = 1;
 599         } elsif( $used{$edge->[1]} ) {
 600                 $substart{$edge->[1]} = 1;
 601         }
 602     }
 603     # Add substitute start and end edges if necessary
 604     foreach my $node ( keys %substart ) {
 605         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 606         my $variables = { %edge_attrs, 'label' => $witstr };
 607         my $varopts = _dot_attr_string( $variables );
 608         $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
 609         }
 610     foreach my $node ( keys %subend ) {
 611         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 612         my $variables = { %edge_attrs, 'label' => $witstr };
 613         my $varopts = _dot_attr_string( $variables );
 614         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 615         }
 616         # HACK part 2
 617         if( $STRAIGHTENHACK ) {
 618                 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 619         }
 620
 621     $dot .= "}\n";
 622     return $dot;
 623 }
 624
 625 sub _dot_attr_string {
 626         my( $hash ) = @_;
 627         my @attrs;
 628         foreach my $k ( sort keys %$hash ) {
 629                 my $v = $hash->{$k};
 630                 push( @attrs, $k.'="'.$v.'"' );
 631         }
 632         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 633 }
 634
 635 sub _add_edge_weights {
 636         my $self = shift;
 637         # Walk the graph from START to END, choosing the successor node with
 638         # the largest number of witness paths each time.
 639         my $weighted = {};
 640         my $curr = $self->start->id;
 641         my $ranked = $self->end->has_rank;
 642         while( $curr ne $self->end->id ) {
 643                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 644                 my @succ = sort { $self->path_witnesses( $curr, $a )
 645                                                         <=> $self->path_witnesses( $curr, $b ) }
 646                         $self->sequence->successors( $curr );
 647                 my $next = pop @succ;
 648                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 649                 # Try to avoid lacunae in the weighted path.
 650                 while( @succ &&
 651                            ( $self->reading( $next )->is_lacuna ||
 652                                  $nextrank - $rank > 1 ) ){
 653                         $next = pop @succ;
 654                 }
 655                 $weighted->{$curr} = $next;
 656                 $curr = $next;
 657         }
 658         return $weighted;
 659 }
 660
 661 =head2 path_witnesses( $edge )
 662
 663 Returns the list of sigils whose witnesses are associated with the given edge.
 664 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 665
 666 =cut
 667
 668 sub path_witnesses {
 669         my( $self, @edge ) = @_;
 670         # If edge is an arrayref, cope.
 671         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 672                 my $e = shift @edge;
 673                 @edge = @$e;
 674         }
 675         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 676         return @wits;
 677 }
 678
 679 sub _path_display_label {
 680         my $self = shift;
 681         my @wits = sort @_;
 682         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 683         if( scalar @wits > $maj ) {
 684                 # TODO break out a.c. wits
 685                 return 'majority';
 686         } else {
 687                 return join( ', ', @wits );
 688         }
 689 }
 690
 691
 692 =head2 as_graphml
 693
 694 Returns a GraphML representation of the collation.  The GraphML will contain
 695 two graphs. The first expresses the attributes of the readings and the witness
 696 paths that link them; the second expresses the relationships that link the
 697 readings.  This is the native transfer format for a tradition.
 698
 699 =begin testing
 700
 701 use Text::Tradition;
 702
 703 my $READINGS = 311;
 704 my $PATHS = 361;
 705
 706 my $datafile = 't/data/florilegium_tei_ps.xml';
 707 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 708                                       'name' => 'test0',
 709                                       'file' => $datafile,
 710                                       'linear' => 1 );
 711
 712 ok( $tradition, "Got a tradition object" );
 713 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 714 ok( $tradition->collation, "Tradition has a collation" );
 715
 716 my $c = $tradition->collation;
 717 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 718 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 719 is( scalar $c->relationships, 0, "Collation has all relationships" );
 720
 721 # Add a few relationships
 722 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 723 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 724 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 725
 726 # Now write it to GraphML and parse it again.
 727
 728 my $graphml = $c->as_graphml;
 729 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 730 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 731 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 732 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 733
 734 =end testing
 735
 736 =cut
 737
 738 sub as_graphml {
 739     my( $self ) = @_;
 740
 741     # Some namespaces
 742     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 743     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 744     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 745         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 746
 747     # Create the document and root node
 748     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 749     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 750     $graphml->setDocumentElement( $root );
 751     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 752     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 753
 754     # Add the data keys for the graph
 755     my %graph_data_keys;
 756     my $gdi = 0;
 757     my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
 758     foreach my $datum ( @graph_attributes ) {
 759         $graph_data_keys{$datum} = 'dg'.$gdi++;
 760         my $key = $root->addNewChild( $graphml_ns, 'key' );
 761         $key->setAttribute( 'attr.name', $datum );
 762         $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
 763         $key->setAttribute( 'for', 'graph' );
 764         $key->setAttribute( 'id', $graph_data_keys{$datum} );
 765     }
 766
 767     # Add the data keys for nodes
 768     my %node_data_keys;
 769     my $ndi = 0;
 770     my %node_data = (
 771         id => 'string',
 772         text => 'string',
 773         rank => 'string',
 774         is_start => 'boolean',
 775         is_end => 'boolean',
 776         is_lacuna => 'boolean',
 777         );
 778     foreach my $datum ( keys %node_data ) {
 779         $node_data_keys{$datum} = 'dn'.$ndi++;
 780         my $key = $root->addNewChild( $graphml_ns, 'key' );
 781         $key->setAttribute( 'attr.name', $datum );
 782         $key->setAttribute( 'attr.type', $node_data{$datum} );
 783         $key->setAttribute( 'for', 'node' );
 784         $key->setAttribute( 'id', $node_data_keys{$datum} );
 785     }
 786
 787     # Add the data keys for edges, i.e. witnesses
 788     my $edi = 0;
 789     my %edge_data_keys;
 790     my %edge_data = (
 791         class => 'string',                              # Class, deprecated soon
 792         witness => 'string',                    # ID/label for a path
 793         relationship => 'string',               # ID/label for a relationship
 794         extra => 'boolean',                             # Path key
 795         scope => 'string',                              # Relationship key
 796         non_correctable => 'boolean',   # Relationship key
 797         non_independent => 'boolean',   # Relationship key
 798         );
 799     foreach my $datum ( keys %edge_data ) {
 800         $edge_data_keys{$datum} = 'de'.$edi++;
 801         my $key = $root->addNewChild( $graphml_ns, 'key' );
 802         $key->setAttribute( 'attr.name', $datum );
 803         $key->setAttribute( 'attr.type', $edge_data{$datum} );
 804         $key->setAttribute( 'for', 'edge' );
 805         $key->setAttribute( 'id', $edge_data_keys{$datum} );
 806     }
 807
 808     # Add the collation graph itself
 809     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
 810     $sgraph->setAttribute( 'edgedefault', 'directed' );
 811     $sgraph->setAttribute( 'id', $self->tradition->name );
 812     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
 813     $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
 814     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
 815     $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
 816     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
 817
 818     # Collation attribute data
 819     foreach my $datum ( @graph_attributes ) {
 820         my $value = $datum eq 'version' ? '3.0' : $self->$datum;
 821                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
 822         }
 823
 824     my $node_ctr = 0;
 825     my %node_hash;
 826     # Add our readings to the graph
 827     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
 828         # Add to the main graph
 829         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
 830         my $node_xmlid = 'n' . $node_ctr++;
 831         $node_hash{ $n->id } = $node_xmlid;
 832         $node_el->setAttribute( 'id', $node_xmlid );
 833         foreach my $d ( keys %node_data ) {
 834                 my $nval = $n->$d;
 835                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
 836                         if defined $nval;
 837         }
 838     }
 839
 840     # Add the path edges to the sequence graph
 841     my $edge_ctr = 0;
 842     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
 843         # We add an edge in the graphml for every witness in $e.
 844         foreach my $wit ( sort $self->path_witnesses( $e ) ) {
 845                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
 846                                                                                 $node_hash{ $e->[0] },
 847                                                                                 $node_hash{ $e->[1] } );
 848                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
 849                         $edge_el->setAttribute( 'source', $from );
 850                         $edge_el->setAttribute( 'target', $to );
 851                         $edge_el->setAttribute( 'id', $id );
 852
 853                         # It's a witness path, so add the witness
 854                         my $base = $wit;
 855                         my $key = $edge_data_keys{'witness'};
 856                         # Is this an ante-corr witness?
 857                         my $aclabel = $self->ac_label;
 858                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
 859                                 # Keep the base witness
 860                                 $base = $1;
 861                                 # ...and record that this is an 'extra' reading path
 862                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
 863                         }
 864                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
 865                         _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
 866                 }
 867         }
 868
 869         # Add the relationship graph to the XML
 870         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
 871                 $node_data_keys{'id'}, \%edge_data_keys );
 872
 873     # Save and return the thing
 874     my $result = decode_utf8( $graphml->toString(1) );
 875     return $result;
 876 }
 877
 878 sub _add_graphml_data {
 879     my( $el, $key, $value ) = @_;
 880     return unless defined $value;
 881     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
 882     $data_el->setAttribute( 'key', $key );
 883     $data_el->appendText( $value );
 884 }
 885
 886 =head2 as_csv
 887
 888 Returns a CSV alignment table representation of the collation graph, one
 889 row per witness (or witness uncorrected.)
 890
 891 =cut
 892
 893 sub as_csv {
 894     my( $self ) = @_;
 895     my $table = $self->make_alignment_table;
 896     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
 897     my @result;
 898     # Make the header row
 899     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
 900         push( @result, decode_utf8( $csv->string ) );
 901     # Make the rest of the rows
 902     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
 903         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
 904         my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
 905         $csv->combine( @row );
 906         push( @result, decode_utf8( $csv->string ) );
 907     }
 908     return join( "\n", @result );
 909 }
 910
 911 =head2 make_alignment_table( $use_refs, $include_witnesses )
 912
 913 Return a reference to an alignment table, in a slightly enhanced CollateX
 914 format which looks like this:
 915
 916  $table = { alignment => [ { witness => "SIGIL",
 917                              tokens => [ { t => "TEXT" }, ... ] },
 918                            { witness => "SIG2",
 919                              tokens => [ { t => "TEXT" }, ... ] },
 920                            ... ],
 921             length => TEXTLEN };
 922
 923 If $use_refs is set to 1, the reading object is returned in the table
 924 instead of READINGTEXT; if not, the text of the reading is returned.
 925
 926 If $include_witnesses is set to a hashref, only the witnesses whose sigil
 927 keys have a true hash value will be included.
 928
 929 =cut
 930
 931 sub make_alignment_table {
 932     my( $self, $noderefs, $include ) = @_;
 933     # Make sure we can do this
 934         throw( "Need a linear graph in order to make an alignment table" )
 935                 unless $self->linear;
 936         $self->calculate_ranks unless $self->end->has_rank;
 937
 938     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
 939     my @all_pos = ( 1 .. $self->end->rank - 1 );
 940     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
 941         if( $include ) {
 942                 next unless $include->{$wit->sigil};
 943         }
 944         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
 945         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
 946         my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
 947         push( @{$table->{'alignment'}},
 948                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
 949         if( $wit->is_layered ) {
 950                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
 951                         $wit->sigil.$self->ac_label );
 952             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
 953                         push( @{$table->{'alignment'}},
 954                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
 955         }
 956     }
 957         return $table;
 958 }
 959
 960 sub _make_witness_row {
 961     my( $path, $positions, $noderefs ) = @_;
 962     my %char_hash;
 963     map { $char_hash{$_} = undef } @$positions;
 964     my $debug = 0;
 965     foreach my $rdg ( @$path ) {
 966         my $rtext = $rdg->text;
 967         $rtext = '#LACUNA#' if $rdg->is_lacuna;
 968         print STDERR "rank " . $rdg->rank . "\n" if $debug;
 969         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
 970         $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
 971                                                                            : { 't' => $rtext };
 972     }
 973     my @row = map { $char_hash{$_} } @$positions;
 974     # Fill in lacuna markers for undef spots in the row
 975     my $last_el = shift @row;
 976     my @filled_row = ( $last_el );
 977     foreach my $el ( @row ) {
 978         # If we are using node reference, make the lacuna node appear many times
 979         # in the table.  If not, use the lacuna tag.
 980         if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
 981             $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
 982         }
 983         push( @filled_row, $el );
 984         $last_el = $el;
 985     }
 986     return @filled_row;
 987 }
 988
 989 # Tiny utility function to say if a table element is a lacuna
 990 sub _el_is_lacuna {
 991     my $el = shift;
 992     return 1 if $el->{'t'} eq '#LACUNA#';
 993     return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
 994         && $el->{'t'}->is_lacuna;
 995     return 0;
 996 }
 997
 998 # Helper to turn the witnesses along columns rather than rows.  Assumes
 999 # equal-sized rows.
1000 sub _turn_table {
1001     my( $table ) = @_;
1002     my $result = [];
1003     return $result unless scalar @$table;
1004     my $nrows = scalar @{$table->[0]};
1005     foreach my $idx ( 0 .. $nrows - 1 ) {
1006         foreach my $wit ( 0 .. $#{$table} ) {
1007             $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
1008         }
1009     }
1010     return $result;
1011 }
1012
1013 =head1 NAVIGATION METHODS
1014
1015 =head2 reading_sequence( $first, $last, $sigil, $backup )
1016
1017 Returns the ordered list of readings, starting with $first and ending
1018 with $last, for the witness given in $sigil. If a $backup sigil is
1019 specified (e.g. when walking a layered witness), it will be used wherever
1020 no $sigil path exists.  If there is a base text reading, that will be
1021 used wherever no path exists for $sigil or $backup.
1022
1023 =cut
1024
1025 # TODO Think about returning some lazy-eval iterator.
1026 # TODO Get rid of backup; we should know from what witness is whether we need it.
1027
1028 sub reading_sequence {
1029     my( $self, $start, $end, $witness ) = @_;
1030
1031     $witness = $self->baselabel unless $witness;
1032     my @readings = ( $start );
1033     my %seen;
1034     my $n = $start;
1035     while( $n && $n->id ne $end->id ) {
1036         if( exists( $seen{$n->id} ) ) {
1037             throw( "Detected loop for $witness at " . $n->id );
1038         }
1039         $seen{$n->id} = 1;
1040
1041         my $next = $self->next_reading( $n, $witness );
1042         unless( $next ) {
1043             throw( "Did not find any path for $witness from reading " . $n->id );
1044         }
1045         push( @readings, $next );
1046         $n = $next;
1047     }
1048     # Check that the last reading is our end reading.
1049     my $last = $readings[$#readings];
1050     throw( "Last reading found from " . $start->text .
1051         " for witness $witness is not the end!" ) # TODO do we get this far?
1052         unless $last->id eq $end->id;
1053
1054     return @readings;
1055 }
1056
1057 =head2 next_reading( $reading, $sigil );
1058
1059 Returns the reading that follows the given reading along the given witness
1060 path.
1061
1062 =cut
1063
1064 sub next_reading {
1065     # Return the successor via the corresponding path.
1066     my $self = shift;
1067     my $answer = $self->_find_linked_reading( 'next', @_ );
1068         return undef unless $answer;
1069     return $self->reading( $answer );
1070 }
1071
1072 =head2 prior_reading( $reading, $sigil )
1073
1074 Returns the reading that precedes the given reading along the given witness
1075 path.
1076
1077 =cut
1078
1079 sub prior_reading {
1080     # Return the predecessor via the corresponding path.
1081     my $self = shift;
1082     my $answer = $self->_find_linked_reading( 'prior', @_ );
1083     return $self->reading( $answer );
1084 }
1085
1086 sub _find_linked_reading {
1087     my( $self, $direction, $node, $path ) = @_;
1088
1089     # Get a backup if we are dealing with a layered witness
1090     my $alt_path;
1091     my $aclabel = $self->ac_label;
1092     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1093         $alt_path = $1;
1094     }
1095
1096     my @linked_paths = $direction eq 'next'
1097         ? $self->sequence->edges_from( $node )
1098         : $self->sequence->edges_to( $node );
1099     return undef unless scalar( @linked_paths );
1100
1101     # We have to find the linked path that contains all of the
1102     # witnesses supplied in $path.
1103     my( @path_wits, @alt_path_wits );
1104     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1105     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1106     my $base_le;
1107     my $alt_le;
1108     foreach my $le ( @linked_paths ) {
1109         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1110             $base_le = $le;
1111         }
1112                 my @le_wits = sort $self->path_witnesses( $le );
1113                 if( _is_within( \@path_wits, \@le_wits ) ) {
1114                         # This is the right path.
1115                         return $direction eq 'next' ? $le->[1] : $le->[0];
1116                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1117                         $alt_le = $le;
1118                 }
1119     }
1120     # Got this far? Return the alternate path if it exists.
1121     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1122         if $alt_le;
1123
1124     # Got this far? Return the base path if it exists.
1125     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1126         if $base_le;
1127
1128     # Got this far? We have no appropriate path.
1129     warn "Could not find $direction node from " . $node->id
1130         . " along path $path";
1131     return undef;
1132 }
1133
1134 # Some set logic.
1135 sub _is_within {
1136     my( $set1, $set2 ) = @_;
1137     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1138     foreach my $el ( @$set1 ) {
1139         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1140     }
1141     return $ret;
1142 }
1143
1144 # Return the string that joins together a list of witnesses for
1145 # display on a single path.
1146 sub _witnesses_of_label {
1147     my( $self, $label ) = @_;
1148     my $regex = $self->wit_list_separator;
1149     my @answer = split( /\Q$regex\E/, $label );
1150     return @answer;
1151 }
1152
1153 =head2 common_readings
1154
1155 Returns the list of common readings in the graph (i.e. those readings that are
1156 shared by all non-lacunose witnesses.)
1157
1158 =cut
1159
1160 sub common_readings {
1161         my $self = shift;
1162         my @common = grep { $_->is_common } $self->readings;
1163         return @common;
1164 }
1165
1166 =head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1167
1168 Returns the text of a witness (plus its backup, if we are using a layer)
1169 as stored in the collation.  The text is returned as a string, where the
1170 individual readings are joined with spaces and the meta-readings (e.g.
1171 lacunae) are omitted.  Optional specification of $start and $end allows
1172 the generation of a subset of the witness text.
1173
1174 =cut
1175
1176 sub path_text {
1177         my( $self, $wit, $start, $end ) = @_;
1178         $start = $self->start unless $start;
1179         $end = $self->end unless $end;
1180         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1181         return join( ' ', map { $_->text } @path );
1182 }
1183
1184 =head1 INITIALIZATION METHODS
1185
1186 These are mostly for use by parsers.
1187
1188 =head2 make_witness_path( $witness )
1189
1190 Link the array of readings contained in $witness->path (and in
1191 $witness->uncorrected_path if it exists) into collation paths.
1192 Clear out the arrays when finished.
1193
1194 =head2 make_witness_paths
1195
1196 Call make_witness_path for all witnesses in the tradition.
1197
1198 =cut
1199
1200 # For use when a collation is constructed from a base text and an apparatus.
1201 # We have the sequences of readings and just need to add path edges.
1202 # When we are done, clear out the witness path attributes, as they are no
1203 # longer needed.
1204 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1205
1206 sub make_witness_paths {
1207     my( $self ) = @_;
1208     foreach my $wit ( $self->tradition->witnesses ) {
1209         # print STDERR "Making path for " . $wit->sigil . "\n";
1210         $self->make_witness_path( $wit );
1211     }
1212 }
1213
1214 sub make_witness_path {
1215     my( $self, $wit ) = @_;
1216     my @chain = @{$wit->path};
1217     my $sig = $wit->sigil;
1218     foreach my $idx ( 0 .. $#chain-1 ) {
1219         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1220     }
1221     if( $wit->is_layered ) {
1222         @chain = @{$wit->uncorrected_path};
1223         foreach my $idx( 0 .. $#chain-1 ) {
1224             my $source = $chain[$idx];
1225             my $target = $chain[$idx+1];
1226             $self->add_path( $source, $target, $sig.$self->ac_label )
1227                 unless $self->has_path( $source, $target, $sig );
1228         }
1229     }
1230     $wit->clear_path;
1231     $wit->clear_uncorrected_path;
1232 }
1233
1234 =head2 calculate_ranks
1235
1236 Calculate the reading ranks (that is, their aligned positions relative
1237 to each other) for the graph.  This can only be called on linear collations.
1238
1239 =begin testing
1240
1241 use Text::Tradition;
1242
1243 my $cxfile = 't/data/Collatex-16.xml';
1244 my $t = Text::Tradition->new(
1245     'name'  => 'inline',
1246     'input' => 'CollateX',
1247     'file'  => $cxfile,
1248     );
1249 my $c = $t->collation;
1250
1251 # Make an svg
1252 my $svg = $c->as_svg;
1253 is( substr( $svg, 0, 5 ), '<?xml', "Got XML doc for svg" );
1254 ok( $c->has_cached_svg, "SVG was cached" );
1255 is( $c->as_svg, $svg, "Cached SVG returned upon second call" );
1256 $c->calculate_ranks;
1257 is( $c->as_svg, $svg, "Cached SVG retained with no rank change" );
1258 $c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
1259 isnt( $c->as_svg, $svg, "SVG changed after relationship add" );
1260
1261 =end testing
1262
1263 =cut
1264
1265 sub calculate_ranks {
1266     my $self = shift;
1267     # Save the existing ranks, in case we need to invalidate the cached SVG.
1268     my %existing_ranks;
1269     # Walk a version of the graph where every node linked by a relationship
1270     # edge is fundamentally the same node, and do a topological ranking on
1271     # the nodes in this graph.
1272     my $topo_graph = Graph->new();
1273     my %rel_containers;
1274     my $rel_ctr = 0;
1275     # Add the nodes
1276     foreach my $r ( $self->readings ) {
1277         next if exists $rel_containers{$r->id};
1278         my @rels = $r->related_readings( 'colocated' );
1279         if( @rels ) {
1280             # Make a relationship container.
1281             push( @rels, $r );
1282             my $rn = 'rel_container_' . $rel_ctr++;
1283             $topo_graph->add_vertex( $rn );
1284             foreach( @rels ) {
1285                 $rel_containers{$_->id} = $rn;
1286             }
1287         } else {
1288             # Add a new node to mirror the old node.
1289             $rel_containers{$r->id} = $r->id;
1290             $topo_graph->add_vertex( $r->id );
1291         }
1292     }
1293
1294     # Add the edges.
1295     foreach my $r ( $self->readings ) {
1296                 $existing_ranks{$r} = $r->rank;
1297         foreach my $n ( $self->sequence->successors( $r->id ) ) {
1298                 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1299                         $rel_containers{$n} );
1300                 # $DB::single = 1 unless $tfrom && $tto;
1301             $topo_graph->add_edge( $tfrom, $tto );
1302         }
1303     }
1304
1305     # Now do the rankings, starting with the start node.
1306     my $topo_start = $rel_containers{$self->start->id};
1307     my $node_ranks = { $topo_start => 0 };
1308     my @curr_origin = ( $topo_start );
1309     # A little iterative function.
1310     while( @curr_origin ) {
1311         @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1312     }
1313     # Transfer our rankings from the topological graph to the real one.
1314     foreach my $r ( $self->readings ) {
1315         if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1316             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1317         } else {
1318                 # Die. Find the last rank we calculated.
1319                 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1320                                  <=> $node_ranks->{$rel_containers{$b->id}} }
1321                         $self->readings;
1322                 my $last = pop @all_defined;
1323             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1324         }
1325     }
1326     # Do we need to invalidate the cached SVG?
1327     if( $self->has_cached_svg ) {
1328         foreach my $r ( $self->readings ) {
1329                 next if $existing_ranks{$r} == $r->rank;
1330                 $self->wipe_svg;
1331                 last;
1332         }
1333     }
1334 }
1335
1336 sub _assign_rank {
1337     my( $graph, $node_ranks, @current_nodes ) = @_;
1338     # Look at each of the children of @current_nodes.  If all the child's
1339     # parents have a rank, assign it the highest rank + 1 and add it to
1340     # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
1341     # parent gets a rank.
1342     my @next_nodes;
1343     foreach my $c ( @current_nodes ) {
1344         warn "Current reading $c has no rank!"
1345             unless exists $node_ranks->{$c};
1346         # print STDERR "Looking at child of node $c, rank "
1347         #     . $node_ranks->{$c} . "\n";
1348         foreach my $child ( $graph->successors( $c ) ) {
1349             next if exists $node_ranks->{$child};
1350             my $highest_rank = -1;
1351             my $skip = 0;
1352             foreach my $parent ( $graph->predecessors( $child ) ) {
1353                 if( exists $node_ranks->{$parent} ) {
1354                     $highest_rank = $node_ranks->{$parent}
1355                         if $highest_rank <= $node_ranks->{$parent};
1356                 } else {
1357                     $skip = 1;
1358                     last;
1359                 }
1360             }
1361             next if $skip;
1362             my $c_rank = $highest_rank + 1;
1363             # print STDERR "Assigning rank $c_rank to node $child \n";
1364             $node_ranks->{$child} = $c_rank;
1365             push( @next_nodes, $child );
1366         }
1367     }
1368     return @next_nodes;
1369 }
1370
1371 =head2 flatten_ranks
1372
1373 A convenience method for parsing collation data.  Searches the graph for readings
1374 with the same text at the same rank, and merges any that are found.
1375
1376 =cut
1377
1378 sub flatten_ranks {
1379     my $self = shift;
1380     my %unique_rank_rdg;
1381     foreach my $rdg ( $self->readings ) {
1382         next unless $rdg->has_rank;
1383         my $key = $rdg->rank . "||" . $rdg->text;
1384         if( exists $unique_rank_rdg{$key} ) {
1385             # Combine!
1386                 # print STDERR "Combining readings at same rank: $key\n";
1387             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1388             # TODO see if this now makes a common point.
1389         } else {
1390             $unique_rank_rdg{$key} = $rdg;
1391         }
1392     }
1393 }
1394
1395 =head2 remove_collations
1396
1397 Another convenience method for parsing. Removes all 'collation' relationships
1398 that were defined in order to get the reading ranks to be correct.
1399
1400 =begin testing
1401
1402 use Text::Tradition;
1403
1404 my $cxfile = 't/data/Collatex-16.xml';
1405 my $t = Text::Tradition->new(
1406     'name'  => 'inline',
1407     'input' => 'CollateX',
1408     'file'  => $cxfile,
1409     );
1410 my $c = $t->collation;
1411
1412 isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" );
1413 $c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } );
1414 is( scalar $c->relationships, 4, "Found all expected relationships" );
1415 $c->remove_collations;
1416 is( scalar $c->relationships, 3, "Collated relationships now gone" );
1417 is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" );
1418
1419 =end testing
1420
1421 =cut
1422
1423 sub remove_collations {
1424         my $self = shift;
1425         foreach my $reledge ( $self->relationships ) {
1426                 my $relobj = $self->relations->get_relationship( $reledge );
1427                 if( $relobj && $relobj->type eq 'collated' ) {
1428                         $self->relations->delete_relationship( $reledge );
1429                 }
1430         }
1431 }
1432
1433
1434 =head2 calculate_common_readings
1435
1436 Goes through the graph identifying the readings that appear in every witness
1437 (apart from those with lacunae at that spot.) Marks them as common and returns
1438 the list.
1439
1440 =begin testing
1441
1442 use Text::Tradition;
1443
1444 my $cxfile = 't/data/Collatex-16.xml';
1445 my $t = Text::Tradition->new(
1446     'name'  => 'inline',
1447     'input' => 'CollateX',
1448     'file'  => $cxfile,
1449     );
1450 my $c = $t->collation;
1451
1452 my @common = $c->calculate_common_readings();
1453 is( scalar @common, 8, "Found correct number of common readings" );
1454 my @marked = sort $c->common_readings();
1455 is( scalar @common, 8, "All common readings got marked as such" );
1456 my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1457 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1458
1459 =end testing
1460
1461 =cut
1462
1463 sub calculate_common_readings {
1464         my $self = shift;
1465         my @common;
1466         my $table = $self->make_alignment_table( 1 );
1467         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1468                 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1469                 my %hash;
1470                 foreach my $r ( @row ) {
1471                         if( $r ) {
1472                                 $hash{$r->id} = $r unless $r->is_meta;
1473                         } else {
1474                                 $hash{'UNDEF'} = $r;
1475                         }
1476                 }
1477                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1478                         my( $r ) = values %hash;
1479                         $r->is_common( 1 );
1480                         push( @common, $r );
1481                 }
1482         }
1483         return @common;
1484 }
1485
1486 =head2 text_from_paths
1487
1488 Calculate the text array for all witnesses from the path, for later consistency
1489 checking.  Only to be used if there is no non-graph-based way to know the
1490 original texts.
1491
1492 =cut
1493
1494 sub text_from_paths {
1495         my $self = shift;
1496     foreach my $wit ( $self->tradition->witnesses ) {
1497         my @text = split( /\s+/,
1498                 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1499         $wit->text( \@text );
1500         if( $wit->is_layered ) {
1501                         my @uctext = split( /\s+/,
1502                                 $self->reading_sequence( $self->start, $self->end,
1503                                         $wit->sigil.$self->ac_label ) );
1504                         $wit->text( \@uctext );
1505         }
1506     }
1507 }
1508
1509 =head1 UTILITY FUNCTIONS
1510
1511 =head2 common_predecessor( $reading_a, $reading_b )
1512
1513 Find the last reading that occurs in sequence before both the given readings.
1514
1515 =head2 common_successor( $reading_a, $reading_b )
1516
1517 Find the first reading that occurs in sequence after both the given readings.
1518
1519 =begin testing
1520
1521 use Text::Tradition;
1522
1523 my $cxfile = 't/data/Collatex-16.xml';
1524 my $t = Text::Tradition->new(
1525     'name'  => 'inline',
1526     'input' => 'CollateX',
1527     'file'  => $cxfile,
1528     );
1529 my $c = $t->collation;
1530
1531 is( $c->common_predecessor( 'n9', 'n23' )->id,
1532     'n20', "Found correct common predecessor" );
1533 is( $c->common_successor( 'n9', 'n23' )->id,
1534     '#END#', "Found correct common successor" );
1535
1536 is( $c->common_predecessor( 'n19', 'n17' )->id,
1537     'n16', "Found correct common predecessor for readings on same path" );
1538 is( $c->common_successor( 'n21', 'n26' )->id,
1539     '#END#', "Found correct common successor for readings on same path" );
1540
1541 =end testing
1542
1543 =cut
1544
1545 ## Return the closest reading that is a predecessor of both the given readings.
1546 sub common_predecessor {
1547         my $self = shift;
1548         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1549         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1550 }
1551
1552 sub common_successor {
1553         my $self = shift;
1554         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1555         return $self->_common_in_path( $r1, $r2, 'successors' );
1556 }
1557
1558 sub _common_in_path {
1559         my( $self, $r1, $r2, $dir ) = @_;
1560         my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1561         $iter = $self->end->rank - $iter if $dir eq 'successors';
1562         my @candidates;
1563         my @last_checked = ( $r1, $r2 );
1564         my %all_seen;
1565         while( !@candidates ) {
1566                 my @new_lc;
1567                 foreach my $lc ( @last_checked ) {
1568                         foreach my $p ( $lc->$dir ) {
1569                                 if( $all_seen{$p->id} ) {
1570                                         push( @candidates, $p );
1571                                 } else {
1572                                         $all_seen{$p->id} = 1;
1573                                         push( @new_lc, $p );
1574                                 }
1575                         }
1576                 }
1577                 @last_checked = @new_lc;
1578         }
1579         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1580         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1581 }
1582
1583 sub throw {
1584         Text::Tradition::Error->throw(
1585                 'ident' => 'Collation error',
1586                 'message' => $_[0],
1587                 );
1588 }
1589
1590 no Moose;
1591 __PACKAGE__->meta->make_immutable;
1592
1593 =head1 LICENSE
1594
1595 This package is free software and is provided "as is" without express
1596 or implied warranty.  You can redistribute it and/or modify it under
1597 the same terms as Perl itself.
1598
1599 =head1 AUTHOR
1600
1601 Tara L Andrews E<lt>aurum@cpan.orgE<gt>