lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Encode qw( decode_utf8 );
   4 use File::Temp;
   5 use Graph;
   6 use IPC::Run qw( run binary );
   7 use Text::CSV_XS;
   8 use Text::Tradition::Collation::Reading;
   9 use Text::Tradition::Collation::RelationshipStore;
  10 use Text::Tradition::Error;
  11 use XML::LibXML;
  12 use XML::LibXML::XPathContext;
  13 use Moose;
  14
  15 has 'sequence' => (
  16     is => 'ro',
  17     isa => 'Graph',
  18     default => sub { Graph->new() },
  19     handles => {
  20         paths => 'edges',
  21     },
  22     );
  23
  24 has 'relations' => (
  25         is => 'ro',
  26         isa => 'Text::Tradition::Collation::RelationshipStore',
  27         handles => {
  28                 relationships => 'relationships',
  29                 related_readings => 'related_readings',
  30         },
  31         writer => '_set_relations',
  32         );
  33
  34 has 'tradition' => (
  35     is => 'ro',
  36     isa => 'Text::Tradition',
  37     weak_ref => 1,
  38     );
  39
  40 has 'readings' => (
  41         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  42         traits => ['Hash'],
  43     handles => {
  44         reading     => 'get',
  45         _add_reading => 'set',
  46         del_reading => 'delete',
  47         has_reading => 'exists',
  48         readings   => 'values',
  49     },
  50     default => sub { {} },
  51         );
  52
  53 has 'wit_list_separator' => (
  54     is => 'rw',
  55     isa => 'Str',
  56     default => ', ',
  57     );
  58
  59 has 'baselabel' => (
  60     is => 'rw',
  61     isa => 'Str',
  62     default => 'base text',
  63     );
  64
  65 has 'linear' => (
  66     is => 'rw',
  67     isa => 'Bool',
  68     default => 1,
  69     );
  70
  71 has 'ac_label' => (
  72     is => 'rw',
  73     isa => 'Str',
  74     default => ' (a.c.)',
  75     );
  76
  77 has 'start' => (
  78         is => 'ro',
  79         isa => 'Text::Tradition::Collation::Reading',
  80         writer => '_set_start',
  81         weak_ref => 1,
  82         );
  83
  84 has 'end' => (
  85         is => 'ro',
  86         isa => 'Text::Tradition::Collation::Reading',
  87         writer => '_set_end',
  88         weak_ref => 1,
  89         );
  90
  91 has 'cached_svg' => (
  92         is => 'rw',
  93         isa => 'Str',
  94         predicate => 'has_cached_svg',
  95         clearer => 'wipe_svg',
  96         );
  97
  98 =head1 NAME
  99
 100 Text::Tradition::Collation - a software model for a text collation
 101
 102 =head1 SYNOPSIS
 103
 104   use Text::Tradition;
 105   my $t = Text::Tradition->new(
 106     'name' => 'this is a text',
 107     'input' => 'TEI',
 108     'file' => '/path/to/tei_parallel_seg_file.xml' );
 109
 110   my $c = $t->collation;
 111   my @readings = $c->readings;
 112   my @paths = $c->paths;
 113   my @relationships = $c->relationships;
 114
 115   my $svg_variant_graph = $t->collation->as_svg();
 116
 117 =head1 DESCRIPTION
 118
 119 Text::Tradition is a library for representation and analysis of collated
 120 texts, particularly medieval ones.  The Collation is the central feature of
 121 a Tradition, where the text, its sequence of readings, and its relationships
 122 between readings are actually kept.
 123
 124 =head1 CONSTRUCTOR
 125
 126 =head2 new
 127
 128 The constructor.  Takes a hash or hashref of the following arguments:
 129
 130 =over
 131
 132 =item * tradition - The Text::Tradition object to which the collation
 133 belongs. Required.
 134
 135 =item * linear - Whether the collation should be linear; that is, whether
 136 transposed readings should be treated as two linked readings rather than one,
 137 and therefore whether the collation graph is acyclic.  Defaults to true.
 138
 139 =item * baselabel - The default label for the path taken by a base text
 140 (if any). Defaults to 'base text'.
 141
 142 =item * wit_list_separator - The string to join a list of witnesses for
 143 purposes of making labels in display graphs.  Defaults to ', '.
 144
 145 =item * ac_label - The extra label to tack onto a witness sigil when
 146 representing another layer of path for the given witness - that is, when
 147 a text has more than one possible reading due to scribal corrections or
 148 the like.  Defaults to ' (a.c.)'.
 149
 150 =back
 151
 152 =head1 ACCESSORS
 153
 154 =head2 tradition
 155
 156 =head2 linear
 157
 158 =head2 wit_list_separator
 159
 160 =head2 baselabel
 161
 162 =head2 ac_label
 163
 164 Simple accessors for collation attributes.
 165
 166 =head2 start
 167
 168 The meta-reading at the start of every witness path.
 169
 170 =head2 end
 171
 172 The meta-reading at the end of every witness path.
 173
 174 =head2 readings
 175
 176 Returns all Reading objects in the graph.
 177
 178 =head2 reading( $id )
 179
 180 Returns the Reading object corresponding to the given ID.
 181
 182 =head2 add_reading( $reading_args )
 183
 184 Adds a new reading object to the collation.
 185 See L<Text::Tradition::Collation::Reading> for the available arguments.
 186
 187 =head2 del_reading( $object_or_id )
 188
 189 Removes the given reading from the collation, implicitly removing its
 190 paths and relationships.
 191
 192 =head2 merge_readings( $main, $second )
 193
 194 Merges the $second reading into the $main one.
 195 The arguments may be either readings or reading IDs.
 196
 197 =head2 has_reading( $id )
 198
 199 Predicate to see whether a given reading ID is in the graph.
 200
 201 =head2 reading_witnesses( $object_or_id )
 202
 203 Returns a list of sigils whose witnesses contain the reading.
 204
 205 =head2 paths
 206
 207 Returns all reading paths within the document - that is, all edges in the
 208 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 209
 210 =head2 add_path( $source, $target, $sigil )
 211
 212 Links the given readings in the collation in sequence, under the given witness
 213 sigil.  The readings may be specified by object or ID.
 214
 215 =head2 del_path( $source, $target, $sigil )
 216
 217 Links the given readings in the collation in sequence, under the given witness
 218 sigil.  The readings may be specified by object or ID.
 219
 220 =head2 has_path( $source, $target );
 221
 222 Returns true if the two readings are linked in sequence in any witness.
 223 The readings may be specified by object or ID.
 224
 225 =head2 relationships
 226
 227 Returns all Relationship objects in the collation.
 228
 229 =head2 add_relationship( $reading, $other_reading, $options )
 230
 231 Adds a new relationship of the type given in $options between the two readings,
 232 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 233 where $status is true on success, and @vectors is a list of relationship edges
 234 that were ultimately added.
 235 See L<Text::Tradition::Collation::Relationship> for the available options.
 236
 237 =cut
 238
 239 sub BUILD {
 240     my $self = shift;
 241     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 242     $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
 243     $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 244 }
 245
 246 ### Reading construct/destruct functions
 247
 248 sub add_reading {
 249         my( $self, $reading ) = @_;
 250         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 251                 my %args = %$reading;
 252                 $reading = Text::Tradition::Collation::Reading->new(
 253                         'collation' => $self,
 254                         %args );
 255         }
 256         # First check to see if a reading with this ID exists.
 257         if( $self->reading( $reading->id ) ) {
 258                 throw( "Collation already has a reading with id " . $reading->id );
 259         }
 260         $self->_add_reading( $reading->id => $reading );
 261         # Once the reading has been added, put it in both graphs.
 262         $self->sequence->add_vertex( $reading->id );
 263         $self->relations->add_reading( $reading->id );
 264         return $reading;
 265 };
 266
 267 around del_reading => sub {
 268         my $orig = shift;
 269         my $self = shift;
 270         my $arg = shift;
 271
 272         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 273                 $arg = $arg->id;
 274         }
 275         # Remove the reading from the graphs.
 276         $self->sequence->delete_vertex( $arg );
 277         $self->relations->delete_reading( $arg );
 278
 279         # Carry on.
 280         $self->$orig( $arg );
 281 };
 282
 283 # merge_readings( $main, $to_be_deleted );
 284
 285 sub merge_readings {
 286         my $self = shift;
 287
 288         # We only need the IDs for adding paths to the graph, not the reading
 289         # objects themselves.
 290     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
 291
 292     # The kept reading should inherit the paths and the relationships
 293     # of the deleted reading.
 294         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 295                 my @vector = ( $kept );
 296                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 297                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 298                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 299                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 300                 $self->sequence->add_edge( @vector );
 301                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 302                 @wits{keys %$fwits} = values %$fwits;
 303                 $self->sequence->set_edge_attributes( @vector, \%wits );
 304         }
 305         $self->relations->merge_readings( $kept, $deleted, $combine_char );
 306
 307         # Do the deletion deed.
 308         if( $combine_char ) {
 309                 my $kept_obj = $self->reading( $kept );
 310                 my $new_text = join( $combine_char, $kept_obj->text,
 311                         $self->reading( $deleted )->text );
 312                 $kept_obj->alter_text( $new_text );
 313         }
 314         $self->del_reading( $deleted );
 315 }
 316
 317
 318 # Helper function for manipulating the graph.
 319 sub _stringify_args {
 320         my( $self, $first, $second, $arg ) = @_;
 321     $first = $first->id
 322         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 323     $second = $second->id
 324         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 325     return( $first, $second, $arg );
 326 }
 327
 328 # Helper function for manipulating the graph.
 329 sub _objectify_args {
 330         my( $self, $first, $second, $arg ) = @_;
 331     $first = $self->reading( $first )
 332         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 333     $second = $self->reading( $second )
 334         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 335     return( $first, $second, $arg );
 336 }
 337 ### Path logic
 338
 339 sub add_path {
 340         my $self = shift;
 341
 342         # We only need the IDs for adding paths to the graph, not the reading
 343         # objects themselves.
 344     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 345
 346         # Connect the readings
 347     $self->sequence->add_edge( $source, $target );
 348     # Note the witness in question
 349     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 350 };
 351
 352 sub del_path {
 353         my $self = shift;
 354         my @args;
 355         if( ref( $_[0] ) eq 'ARRAY' ) {
 356                 my $e = shift @_;
 357                 @args = ( @$e, @_ );
 358         } else {
 359                 @args = @_;
 360         }
 361
 362         # We only need the IDs for adding paths to the graph, not the reading
 363         # objects themselves.
 364     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 365
 366         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 367                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 368         }
 369         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 370                 $self->sequence->delete_edge( $source, $target );
 371         }
 372 }
 373
 374
 375 # Extra graph-alike utility
 376 sub has_path {
 377         my $self = shift;
 378     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 379         return undef unless $self->sequence->has_edge( $source, $target );
 380         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 381 }
 382
 383 =head2 clear_witness( @sigil_list )
 384
 385 Clear the given witnesses out of the collation entirely, removing references
 386 to them in paths, and removing readings that belong only to them.  Should only
 387 be called via $tradition->del_witness.
 388
 389 =cut
 390
 391 sub clear_witness {
 392         my( $self, @sigils ) = @_;
 393
 394         # Clear the witness(es) out of the paths
 395         foreach my $e ( $self->paths ) {
 396                 foreach my $sig ( @sigils ) {
 397                         $self->del_path( $e, $sig );
 398                 }
 399         }
 400
 401         # Clear out the newly unused readings
 402         foreach my $r ( $self->readings ) {
 403                 unless( $self->reading_witnesses( $r ) ) {
 404                         $self->del_reading( $r );
 405                 }
 406         }
 407 }
 408
 409 sub add_relationship {
 410         my $self = shift;
 411     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 412     my( @vectors ) = $self->relations->add_relationship( $source,
 413         $self->reading( $source ), $target, $self->reading( $target ), $opts );
 414     # Force a full rank recalculation every time. Yuck.
 415     $self->calculate_ranks() if $self->end->has_rank;
 416     return @vectors;
 417 }
 418
 419 =head2 reading_witnesses( $reading )
 420
 421 Return a list of sigils corresponding to the witnesses in which the reading appears.
 422
 423 =cut
 424
 425 sub reading_witnesses {
 426         my( $self, $reading ) = @_;
 427         # We need only check either the incoming or the outgoing edges; I have
 428         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 429         if( $reading eq $self->start ) {
 430                 return map { $_->sigil } $self->tradition->witnesses;
 431         }
 432         my %all_witnesses;
 433         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 434                 my $wits = $self->sequence->get_edge_attributes( @$e );
 435                 @all_witnesses{ keys %$wits } = 1;
 436         }
 437         return keys %all_witnesses;
 438 }
 439
 440 =head1 OUTPUT METHODS
 441
 442 =head2 as_svg( \%options )
 443
 444 Returns an SVG string that represents the graph, via as_dot and graphviz.
 445 See as_dot for a list of options.
 446
 447 =cut
 448
 449 sub as_svg {
 450     my( $self, $opts ) = @_;
 451     if( !$self->has_cached_svg || $opts->{'recalc'} ) {
 452                 my @cmd = qw/dot -Tsvg/;
 453                 my( $svg, $err );
 454                 my $dotfile = File::Temp->new();
 455                 ## USE FOR DEBUGGING
 456                 # $dotfile->unlink_on_destroy(0);
 457                 binmode $dotfile, ':utf8';
 458                 print $dotfile $self->as_dot( $opts );
 459                 push( @cmd, $dotfile->filename );
 460                 run( \@cmd, ">", binary(), \$svg );
 461                 $self->cached_svg( decode_utf8( $svg ) );
 462         }
 463         return $self->cached_svg;
 464 }
 465
 466
 467 =head2 as_dot( \%options )
 468
 469 Returns a string that is the collation graph expressed in dot
 470 (i.e. GraphViz) format.  Options include:
 471
 472 =over 4
 473
 474 =item * from
 475
 476 =item * to
 477
 478 =item * color_common
 479
 480 =back
 481
 482 =cut
 483
 484 sub as_dot {
 485     my( $self, $opts ) = @_;
 486     my $startrank = $opts->{'from'} if $opts;
 487     my $endrank = $opts->{'to'} if $opts;
 488     my $color_common = $opts->{'color_common'} if $opts;
 489     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 490        && $self->end->rank > 100;
 491
 492     # Check the arguments
 493     if( $startrank ) {
 494         return if $endrank && $startrank > $endrank;
 495         return if $startrank > $self->end->rank;
 496         }
 497         if( defined $endrank ) {
 498                 return if $endrank < 0;
 499                 $endrank = undef if $endrank == $self->end->rank;
 500         }
 501
 502     my $graph_name = $self->tradition->name;
 503     $graph_name =~ s/[^\w\s]//g;
 504     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 505
 506     my %graph_attrs = (
 507         'rankdir' => 'LR',
 508         'bgcolor' => 'none',
 509         );
 510     my %node_attrs = (
 511         'fontsize' => 14,
 512         'fillcolor' => 'white',
 513         'style' => 'filled',
 514         'shape' => 'ellipse'
 515         );
 516     my %edge_attrs = (
 517         'arrowhead' => 'open',
 518         'color' => '#000000',
 519         'fontcolor' => '#000000',
 520         );
 521
 522     my $dot = sprintf( "digraph %s {\n", $graph_name );
 523     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 524     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 525
 526         # Output substitute start/end readings if necessary
 527         if( $startrank ) {
 528                 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
 529         }
 530         if( $endrank ) {
 531                 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
 532         }
 533         if( $STRAIGHTENHACK ) {
 534                 ## HACK part 1
 535                 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
 536                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 537         }
 538         my %used;  # Keep track of the readings that actually appear in the graph
 539         # Sort the readings by rank if we have ranks; this speeds layout.
 540         my @all_readings = $self->end->has_rank
 541                 ? sort { $a->rank <=> $b->rank } $self->readings
 542                 : $self->readings;
 543         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 544     foreach my $reading ( @all_readings ) {
 545         # Only output readings within our rank range.
 546         next if $startrank && $reading->rank < $startrank;
 547         next if $endrank && $reading->rank > $endrank;
 548         $used{$reading->id} = 1;
 549         # Need not output nodes without separate labels
 550         next if $reading->id eq $reading->text;
 551         my $rattrs;
 552         my $label = $reading->text;
 553         $label =~ s/\"/\\\"/g;
 554                 $rattrs->{'label'} = $label;
 555                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 556         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 557     }
 558
 559         # Add the real edges. Need to weight one edge per rank jump, in a
 560         # continuous line.
 561         # my $weighted = $self->_add_edge_weights;
 562     my @edges = $self->paths;
 563         my( %substart, %subend );
 564     foreach my $edge ( @edges ) {
 565         # Do we need to output this edge?
 566         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 567                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 568                         my $variables = { %edge_attrs, 'label' => $label };
 569
 570                         # Account for the rank gap if necessary
 571                         my $rank0 = $self->reading( $edge->[0] )->rank
 572                                 if $self->reading( $edge->[0] )->has_rank;
 573                         my $rank1 = $self->reading( $edge->[1] )->rank
 574                                 if $self->reading( $edge->[1] )->has_rank;
 575                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 576                                 $variables->{'minlen'} = $rank1 - $rank0;
 577                         }
 578
 579                         # Add the calculated edge weights
 580                         # if( exists $weighted->{$edge->[0]}
 581 #                               && $weighted->{$edge->[0]} eq $edge->[1] ) {
 582 #                               # $variables->{'color'} = 'red';
 583 #                               $variables->{'weight'} = 3.0;
 584 #                       }
 585
 586                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 587                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 588                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 589
 590                         my $varopts = _dot_attr_string( $variables );
 591                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 592                                 $edge->[0], $edge->[1], $varopts );
 593         } elsif( $used{$edge->[0]} ) {
 594                 $subend{$edge->[0]} = 1;
 595         } elsif( $used{$edge->[1]} ) {
 596                 $substart{$edge->[1]} = 1;
 597         }
 598     }
 599     # Add substitute start and end edges if necessary
 600     foreach my $node ( keys %substart ) {
 601         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 602         my $variables = { %edge_attrs, 'label' => $witstr };
 603         my $varopts = _dot_attr_string( $variables );
 604         $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
 605         }
 606     foreach my $node ( keys %subend ) {
 607         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 608         my $variables = { %edge_attrs, 'label' => $witstr };
 609         my $varopts = _dot_attr_string( $variables );
 610         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 611         }
 612         # HACK part 2
 613         if( $STRAIGHTENHACK ) {
 614                 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 615         }
 616
 617     $dot .= "}\n";
 618     return $dot;
 619 }
 620
 621 sub _dot_attr_string {
 622         my( $hash ) = @_;
 623         my @attrs;
 624         foreach my $k ( sort keys %$hash ) {
 625                 my $v = $hash->{$k};
 626                 push( @attrs, $k.'="'.$v.'"' );
 627         }
 628         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 629 }
 630
 631 sub _add_edge_weights {
 632         my $self = shift;
 633         # Walk the graph from START to END, choosing the successor node with
 634         # the largest number of witness paths each time.
 635         my $weighted = {};
 636         my $curr = $self->start->id;
 637         my $ranked = $self->end->has_rank;
 638         while( $curr ne $self->end->id ) {
 639                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 640                 my @succ = sort { $self->path_witnesses( $curr, $a )
 641                                                         <=> $self->path_witnesses( $curr, $b ) }
 642                         $self->sequence->successors( $curr );
 643                 my $next = pop @succ;
 644                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 645                 # Try to avoid lacunae in the weighted path.
 646                 while( @succ &&
 647                            ( $self->reading( $next )->is_lacuna ||
 648                                  $nextrank - $rank > 1 ) ){
 649                         $next = pop @succ;
 650                 }
 651                 $weighted->{$curr} = $next;
 652                 $curr = $next;
 653         }
 654         return $weighted;
 655 }
 656
 657 =head2 path_witnesses( $edge )
 658
 659 Returns the list of sigils whose witnesses are associated with the given edge.
 660 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 661
 662 =cut
 663
 664 sub path_witnesses {
 665         my( $self, @edge ) = @_;
 666         # If edge is an arrayref, cope.
 667         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 668                 my $e = shift @edge;
 669                 @edge = @$e;
 670         }
 671         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 672         return @wits;
 673 }
 674
 675 sub _path_display_label {
 676         my $self = shift;
 677         my @wits = sort @_;
 678         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 679         if( scalar @wits > $maj ) {
 680                 # TODO break out a.c. wits
 681                 return 'majority';
 682         } else {
 683                 return join( ', ', @wits );
 684         }
 685 }
 686
 687
 688 =head2 as_graphml
 689
 690 Returns a GraphML representation of the collation.  The GraphML will contain
 691 two graphs. The first expresses the attributes of the readings and the witness
 692 paths that link them; the second expresses the relationships that link the
 693 readings.  This is the native transfer format for a tradition.
 694
 695 =begin testing
 696
 697 use Text::Tradition;
 698
 699 my $READINGS = 311;
 700 my $PATHS = 361;
 701
 702 my $datafile = 't/data/florilegium_tei_ps.xml';
 703 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 704                                       'name' => 'test0',
 705                                       'file' => $datafile,
 706                                       'linear' => 1 );
 707
 708 ok( $tradition, "Got a tradition object" );
 709 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 710 ok( $tradition->collation, "Tradition has a collation" );
 711
 712 my $c = $tradition->collation;
 713 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 714 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 715 is( scalar $c->relationships, 0, "Collation has all relationships" );
 716
 717 # Add a few relationships
 718 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 719 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 720 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 721
 722 # Now write it to GraphML and parse it again.
 723
 724 my $graphml = $c->as_graphml;
 725 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 726 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 727 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 728 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 729
 730 =end testing
 731
 732 =cut
 733
 734 sub as_graphml {
 735     my( $self ) = @_;
 736
 737     # Some namespaces
 738     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 739     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 740     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 741         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 742
 743     # Create the document and root node
 744     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 745     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 746     $graphml->setDocumentElement( $root );
 747     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 748     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 749
 750     # Add the data keys for the graph
 751     my %graph_data_keys;
 752     my $gdi = 0;
 753     my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
 754     foreach my $datum ( @graph_attributes ) {
 755         $graph_data_keys{$datum} = 'dg'.$gdi++;
 756         my $key = $root->addNewChild( $graphml_ns, 'key' );
 757         $key->setAttribute( 'attr.name', $datum );
 758         $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
 759         $key->setAttribute( 'for', 'graph' );
 760         $key->setAttribute( 'id', $graph_data_keys{$datum} );
 761     }
 762
 763     # Add the data keys for nodes
 764     my %node_data_keys;
 765     my $ndi = 0;
 766     my %node_data = (
 767         id => 'string',
 768         text => 'string',
 769         rank => 'string',
 770         is_start => 'boolean',
 771         is_end => 'boolean',
 772         is_lacuna => 'boolean',
 773         );
 774     foreach my $datum ( keys %node_data ) {
 775         $node_data_keys{$datum} = 'dn'.$ndi++;
 776         my $key = $root->addNewChild( $graphml_ns, 'key' );
 777         $key->setAttribute( 'attr.name', $datum );
 778         $key->setAttribute( 'attr.type', $node_data{$datum} );
 779         $key->setAttribute( 'for', 'node' );
 780         $key->setAttribute( 'id', $node_data_keys{$datum} );
 781     }
 782
 783     # Add the data keys for edges, i.e. witnesses
 784     my $edi = 0;
 785     my %edge_data_keys;
 786     my %edge_data = (
 787         class => 'string',                              # Class, deprecated soon
 788         witness => 'string',                    # ID/label for a path
 789         relationship => 'string',               # ID/label for a relationship
 790         extra => 'boolean',                             # Path key
 791         scope => 'string',                              # Relationship key
 792         non_correctable => 'boolean',   # Relationship key
 793         non_independent => 'boolean',   # Relationship key
 794         );
 795     foreach my $datum ( keys %edge_data ) {
 796         $edge_data_keys{$datum} = 'de'.$edi++;
 797         my $key = $root->addNewChild( $graphml_ns, 'key' );
 798         $key->setAttribute( 'attr.name', $datum );
 799         $key->setAttribute( 'attr.type', $edge_data{$datum} );
 800         $key->setAttribute( 'for', 'edge' );
 801         $key->setAttribute( 'id', $edge_data_keys{$datum} );
 802     }
 803
 804     # Add the collation graph itself
 805     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
 806     $sgraph->setAttribute( 'edgedefault', 'directed' );
 807     $sgraph->setAttribute( 'id', $self->tradition->name );
 808     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
 809     $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
 810     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
 811     $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
 812     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
 813
 814     # Collation attribute data
 815     foreach my $datum ( @graph_attributes ) {
 816         my $value = $datum eq 'version' ? '3.0' : $self->$datum;
 817                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
 818         }
 819
 820     my $node_ctr = 0;
 821     my %node_hash;
 822     # Add our readings to the graph
 823     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
 824         # Add to the main graph
 825         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
 826         my $node_xmlid = 'n' . $node_ctr++;
 827         $node_hash{ $n->id } = $node_xmlid;
 828         $node_el->setAttribute( 'id', $node_xmlid );
 829         foreach my $d ( keys %node_data ) {
 830                 my $nval = $n->$d;
 831                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
 832                         if defined $nval;
 833         }
 834     }
 835
 836     # Add the path edges to the sequence graph
 837     my $edge_ctr = 0;
 838     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
 839         # We add an edge in the graphml for every witness in $e.
 840         foreach my $wit ( sort $self->path_witnesses( $e ) ) {
 841                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
 842                                                                                 $node_hash{ $e->[0] },
 843                                                                                 $node_hash{ $e->[1] } );
 844                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
 845                         $edge_el->setAttribute( 'source', $from );
 846                         $edge_el->setAttribute( 'target', $to );
 847                         $edge_el->setAttribute( 'id', $id );
 848
 849                         # It's a witness path, so add the witness
 850                         my $base = $wit;
 851                         my $key = $edge_data_keys{'witness'};
 852                         # Is this an ante-corr witness?
 853                         my $aclabel = $self->ac_label;
 854                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
 855                                 # Keep the base witness
 856                                 $base = $1;
 857                                 # ...and record that this is an 'extra' reading path
 858                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
 859                         }
 860                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
 861                         _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
 862                 }
 863         }
 864
 865         # Add the relationship graph to the XML
 866         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
 867                 $node_data_keys{'id'}, \%edge_data_keys );
 868
 869     # Save and return the thing
 870     my $result = decode_utf8( $graphml->toString(1) );
 871     return $result;
 872 }
 873
 874 sub _add_graphml_data {
 875     my( $el, $key, $value ) = @_;
 876     return unless defined $value;
 877     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
 878     $data_el->setAttribute( 'key', $key );
 879     $data_el->appendText( $value );
 880 }
 881
 882 =head2 as_csv
 883
 884 Returns a CSV alignment table representation of the collation graph, one
 885 row per witness (or witness uncorrected.)
 886
 887 =cut
 888
 889 sub as_csv {
 890     my( $self ) = @_;
 891     my $table = $self->make_alignment_table;
 892     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
 893     my @result;
 894     # Make the header row
 895     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
 896         push( @result, decode_utf8( $csv->string ) );
 897     # Make the rest of the rows
 898     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
 899         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
 900         my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
 901         $csv->combine( @row );
 902         push( @result, decode_utf8( $csv->string ) );
 903     }
 904     return join( "\n", @result );
 905 }
 906
 907 =head2 make_alignment_table( $use_refs, $include_witnesses )
 908
 909 Return a reference to an alignment table, in a slightly enhanced CollateX
 910 format which looks like this:
 911
 912  $table = { alignment => [ { witness => "SIGIL",
 913                              tokens => [ { t => "TEXT" }, ... ] },
 914                            { witness => "SIG2",
 915                              tokens => [ { t => "TEXT" }, ... ] },
 916                            ... ],
 917             length => TEXTLEN };
 918
 919 If $use_refs is set to 1, the reading object is returned in the table
 920 instead of READINGTEXT; if not, the text of the reading is returned.
 921
 922 If $include_witnesses is set to a hashref, only the witnesses whose sigil
 923 keys have a true hash value will be included.
 924
 925 =cut
 926
 927 sub make_alignment_table {
 928     my( $self, $noderefs, $include ) = @_;
 929     # Make sure we can do this
 930         throw( "Need a linear graph in order to make an alignment table" )
 931                 unless $self->linear;
 932         $self->calculate_ranks unless $self->end->has_rank;
 933
 934     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
 935     my @all_pos = ( 1 .. $self->end->rank - 1 );
 936     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
 937         if( $include ) {
 938                 next unless $include->{$wit->sigil};
 939         }
 940         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
 941         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
 942         my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
 943         push( @{$table->{'alignment'}},
 944                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
 945         if( $wit->is_layered ) {
 946                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
 947                         $wit->sigil.$self->ac_label );
 948             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
 949                         push( @{$table->{'alignment'}},
 950                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
 951         }
 952     }
 953         return $table;
 954 }
 955
 956 sub _make_witness_row {
 957     my( $path, $positions, $noderefs ) = @_;
 958     my %char_hash;
 959     map { $char_hash{$_} = undef } @$positions;
 960     my $debug = 0;
 961     foreach my $rdg ( @$path ) {
 962         my $rtext = $rdg->text;
 963         $rtext = '#LACUNA#' if $rdg->is_lacuna;
 964         print STDERR "rank " . $rdg->rank . "\n" if $debug;
 965         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
 966         $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
 967                                                                            : { 't' => $rtext };
 968     }
 969     my @row = map { $char_hash{$_} } @$positions;
 970     # Fill in lacuna markers for undef spots in the row
 971     my $last_el = shift @row;
 972     my @filled_row = ( $last_el );
 973     foreach my $el ( @row ) {
 974         # If we are using node reference, make the lacuna node appear many times
 975         # in the table.  If not, use the lacuna tag.
 976         if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
 977             $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
 978         }
 979         push( @filled_row, $el );
 980         $last_el = $el;
 981     }
 982     return @filled_row;
 983 }
 984
 985 # Tiny utility function to say if a table element is a lacuna
 986 sub _el_is_lacuna {
 987     my $el = shift;
 988     return 1 if $el->{'t'} eq '#LACUNA#';
 989     return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
 990         && $el->{'t'}->is_lacuna;
 991     return 0;
 992 }
 993
 994 # Helper to turn the witnesses along columns rather than rows.  Assumes
 995 # equal-sized rows.
 996 sub _turn_table {
 997     my( $table ) = @_;
 998     my $result = [];
 999     return $result unless scalar @$table;
1000     my $nrows = scalar @{$table->[0]};
1001     foreach my $idx ( 0 .. $nrows - 1 ) {
1002         foreach my $wit ( 0 .. $#{$table} ) {
1003             $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
1004         }
1005     }
1006     return $result;
1007 }
1008
1009 =head1 NAVIGATION METHODS
1010
1011 =head2 reading_sequence( $first, $last, $sigil, $backup )
1012
1013 Returns the ordered list of readings, starting with $first and ending
1014 with $last, for the witness given in $sigil. If a $backup sigil is
1015 specified (e.g. when walking a layered witness), it will be used wherever
1016 no $sigil path exists.  If there is a base text reading, that will be
1017 used wherever no path exists for $sigil or $backup.
1018
1019 =cut
1020
1021 # TODO Think about returning some lazy-eval iterator.
1022 # TODO Get rid of backup; we should know from what witness is whether we need it.
1023
1024 sub reading_sequence {
1025     my( $self, $start, $end, $witness ) = @_;
1026
1027     $witness = $self->baselabel unless $witness;
1028     my @readings = ( $start );
1029     my %seen;
1030     my $n = $start;
1031     while( $n && $n->id ne $end->id ) {
1032         if( exists( $seen{$n->id} ) ) {
1033             throw( "Detected loop for $witness at " . $n->id );
1034         }
1035         $seen{$n->id} = 1;
1036
1037         my $next = $self->next_reading( $n, $witness );
1038         unless( $next ) {
1039             throw( "Did not find any path for $witness from reading " . $n->id );
1040         }
1041         push( @readings, $next );
1042         $n = $next;
1043     }
1044     # Check that the last reading is our end reading.
1045     my $last = $readings[$#readings];
1046     throw( "Last reading found from " . $start->text .
1047         " for witness $witness is not the end!" ) # TODO do we get this far?
1048         unless $last->id eq $end->id;
1049
1050     return @readings;
1051 }
1052
1053 =head2 next_reading( $reading, $sigil );
1054
1055 Returns the reading that follows the given reading along the given witness
1056 path.
1057
1058 =cut
1059
1060 sub next_reading {
1061     # Return the successor via the corresponding path.
1062     my $self = shift;
1063     my $answer = $self->_find_linked_reading( 'next', @_ );
1064         return undef unless $answer;
1065     return $self->reading( $answer );
1066 }
1067
1068 =head2 prior_reading( $reading, $sigil )
1069
1070 Returns the reading that precedes the given reading along the given witness
1071 path.
1072
1073 =cut
1074
1075 sub prior_reading {
1076     # Return the predecessor via the corresponding path.
1077     my $self = shift;
1078     my $answer = $self->_find_linked_reading( 'prior', @_ );
1079     return $self->reading( $answer );
1080 }
1081
1082 sub _find_linked_reading {
1083     my( $self, $direction, $node, $path ) = @_;
1084
1085     # Get a backup if we are dealing with a layered witness
1086     my $alt_path;
1087     my $aclabel = $self->ac_label;
1088     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1089         $alt_path = $1;
1090     }
1091
1092     my @linked_paths = $direction eq 'next'
1093         ? $self->sequence->edges_from( $node )
1094         : $self->sequence->edges_to( $node );
1095     return undef unless scalar( @linked_paths );
1096
1097     # We have to find the linked path that contains all of the
1098     # witnesses supplied in $path.
1099     my( @path_wits, @alt_path_wits );
1100     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1101     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1102     my $base_le;
1103     my $alt_le;
1104     foreach my $le ( @linked_paths ) {
1105         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1106             $base_le = $le;
1107         }
1108                 my @le_wits = sort $self->path_witnesses( $le );
1109                 if( _is_within( \@path_wits, \@le_wits ) ) {
1110                         # This is the right path.
1111                         return $direction eq 'next' ? $le->[1] : $le->[0];
1112                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1113                         $alt_le = $le;
1114                 }
1115     }
1116     # Got this far? Return the alternate path if it exists.
1117     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1118         if $alt_le;
1119
1120     # Got this far? Return the base path if it exists.
1121     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1122         if $base_le;
1123
1124     # Got this far? We have no appropriate path.
1125     warn "Could not find $direction node from " . $node->id
1126         . " along path $path";
1127     return undef;
1128 }
1129
1130 # Some set logic.
1131 sub _is_within {
1132     my( $set1, $set2 ) = @_;
1133     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1134     foreach my $el ( @$set1 ) {
1135         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1136     }
1137     return $ret;
1138 }
1139
1140 # Return the string that joins together a list of witnesses for
1141 # display on a single path.
1142 sub _witnesses_of_label {
1143     my( $self, $label ) = @_;
1144     my $regex = $self->wit_list_separator;
1145     my @answer = split( /\Q$regex\E/, $label );
1146     return @answer;
1147 }
1148
1149 =head2 common_readings
1150
1151 Returns the list of common readings in the graph (i.e. those readings that are
1152 shared by all non-lacunose witnesses.)
1153
1154 =cut
1155
1156 sub common_readings {
1157         my $self = shift;
1158         my @common = grep { $_->is_common } $self->readings;
1159         return @common;
1160 }
1161
1162 =head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1163
1164 Returns the text of a witness (plus its backup, if we are using a layer)
1165 as stored in the collation.  The text is returned as a string, where the
1166 individual readings are joined with spaces and the meta-readings (e.g.
1167 lacunae) are omitted.  Optional specification of $start and $end allows
1168 the generation of a subset of the witness text.
1169
1170 =cut
1171
1172 sub path_text {
1173         my( $self, $wit, $start, $end ) = @_;
1174         $start = $self->start unless $start;
1175         $end = $self->end unless $end;
1176         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1177         return join( ' ', map { $_->text } @path );
1178 }
1179
1180 =head1 INITIALIZATION METHODS
1181
1182 These are mostly for use by parsers.
1183
1184 =head2 make_witness_path( $witness )
1185
1186 Link the array of readings contained in $witness->path (and in
1187 $witness->uncorrected_path if it exists) into collation paths.
1188 Clear out the arrays when finished.
1189
1190 =head2 make_witness_paths
1191
1192 Call make_witness_path for all witnesses in the tradition.
1193
1194 =cut
1195
1196 # For use when a collation is constructed from a base text and an apparatus.
1197 # We have the sequences of readings and just need to add path edges.
1198 # When we are done, clear out the witness path attributes, as they are no
1199 # longer needed.
1200 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1201
1202 sub make_witness_paths {
1203     my( $self ) = @_;
1204     foreach my $wit ( $self->tradition->witnesses ) {
1205         # print STDERR "Making path for " . $wit->sigil . "\n";
1206         $self->make_witness_path( $wit );
1207     }
1208 }
1209
1210 sub make_witness_path {
1211     my( $self, $wit ) = @_;
1212     my @chain = @{$wit->path};
1213     my $sig = $wit->sigil;
1214     foreach my $idx ( 0 .. $#chain-1 ) {
1215         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1216     }
1217     if( $wit->is_layered ) {
1218         @chain = @{$wit->uncorrected_path};
1219         foreach my $idx( 0 .. $#chain-1 ) {
1220             my $source = $chain[$idx];
1221             my $target = $chain[$idx+1];
1222             $self->add_path( $source, $target, $sig.$self->ac_label )
1223                 unless $self->has_path( $source, $target, $sig );
1224         }
1225     }
1226     $wit->clear_path;
1227     $wit->clear_uncorrected_path;
1228 }
1229
1230 =head2 calculate_ranks
1231
1232 Calculate the reading ranks (that is, their aligned positions relative
1233 to each other) for the graph.  This can only be called on linear collations.
1234
1235 =begin testing
1236
1237 use Text::Tradition;
1238
1239 my $cxfile = 't/data/Collatex-16.xml';
1240 my $t = Text::Tradition->new(
1241     'name'  => 'inline',
1242     'input' => 'CollateX',
1243     'file'  => $cxfile,
1244     );
1245 my $c = $t->collation;
1246
1247 # Make an svg
1248 my $svg = $c->as_svg;
1249 is( substr( $svg, 0, 5 ), '<?xml', "Got XML doc for svg" );
1250 ok( $c->has_cached_svg, "SVG was cached" );
1251 is( $c->as_svg, $svg, "Cached SVG returned upon second call" );
1252 $c->calculate_ranks;
1253 is( $c->as_svg, $svg, "Cached SVG retained with no rank change" );
1254 $c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
1255 isnt( $c->as_svg, $svg, "SVG changed after relationship add" );
1256
1257 =end testing
1258
1259 =cut
1260
1261 sub calculate_ranks {
1262     my $self = shift;
1263     # Save the existing ranks, in case we need to invalidate the cached SVG.
1264     my %existing_ranks;
1265     # Walk a version of the graph where every node linked by a relationship
1266     # edge is fundamentally the same node, and do a topological ranking on
1267     # the nodes in this graph.
1268     my $topo_graph = Graph->new();
1269     my %rel_containers;
1270     my $rel_ctr = 0;
1271     # Add the nodes
1272     foreach my $r ( $self->readings ) {
1273         next if exists $rel_containers{$r->id};
1274         my @rels = $r->related_readings( 'colocated' );
1275         if( @rels ) {
1276             # Make a relationship container.
1277             push( @rels, $r );
1278             my $rn = 'rel_container_' . $rel_ctr++;
1279             $topo_graph->add_vertex( $rn );
1280             foreach( @rels ) {
1281                 $rel_containers{$_->id} = $rn;
1282             }
1283         } else {
1284             # Add a new node to mirror the old node.
1285             $rel_containers{$r->id} = $r->id;
1286             $topo_graph->add_vertex( $r->id );
1287         }
1288     }
1289
1290     # Add the edges.
1291     foreach my $r ( $self->readings ) {
1292                 $existing_ranks{$r} = $r->rank;
1293         foreach my $n ( $self->sequence->successors( $r->id ) ) {
1294                 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1295                         $rel_containers{$n} );
1296                 # $DB::single = 1 unless $tfrom && $tto;
1297             $topo_graph->add_edge( $tfrom, $tto );
1298         }
1299     }
1300
1301     # Now do the rankings, starting with the start node.
1302     my $topo_start = $rel_containers{$self->start->id};
1303     my $node_ranks = { $topo_start => 0 };
1304     my @curr_origin = ( $topo_start );
1305     # A little iterative function.
1306     while( @curr_origin ) {
1307         @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1308     }
1309     # Transfer our rankings from the topological graph to the real one.
1310     foreach my $r ( $self->readings ) {
1311         if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1312             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1313         } else {
1314                 # Die. Find the last rank we calculated.
1315                 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1316                                  <=> $node_ranks->{$rel_containers{$b->id}} }
1317                         $self->readings;
1318                 my $last = pop @all_defined;
1319             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1320         }
1321     }
1322     # Do we need to invalidate the cached SVG?
1323     if( $self->has_cached_svg ) {
1324         foreach my $r ( $self->readings ) {
1325                 next if $existing_ranks{$r} == $r->rank;
1326                 $self->wipe_svg;
1327                 last;
1328         }
1329     }
1330 }
1331
1332 sub _assign_rank {
1333     my( $graph, $node_ranks, @current_nodes ) = @_;
1334     # Look at each of the children of @current_nodes.  If all the child's
1335     # parents have a rank, assign it the highest rank + 1 and add it to
1336     # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
1337     # parent gets a rank.
1338     my @next_nodes;
1339     foreach my $c ( @current_nodes ) {
1340         warn "Current reading $c has no rank!"
1341             unless exists $node_ranks->{$c};
1342         # print STDERR "Looking at child of node $c, rank "
1343         #     . $node_ranks->{$c} . "\n";
1344         foreach my $child ( $graph->successors( $c ) ) {
1345             next if exists $node_ranks->{$child};
1346             my $highest_rank = -1;
1347             my $skip = 0;
1348             foreach my $parent ( $graph->predecessors( $child ) ) {
1349                 if( exists $node_ranks->{$parent} ) {
1350                     $highest_rank = $node_ranks->{$parent}
1351                         if $highest_rank <= $node_ranks->{$parent};
1352                 } else {
1353                     $skip = 1;
1354                     last;
1355                 }
1356             }
1357             next if $skip;
1358             my $c_rank = $highest_rank + 1;
1359             # print STDERR "Assigning rank $c_rank to node $child \n";
1360             $node_ranks->{$child} = $c_rank;
1361             push( @next_nodes, $child );
1362         }
1363     }
1364     return @next_nodes;
1365 }
1366
1367 =head2 flatten_ranks
1368
1369 A convenience method for parsing collation data.  Searches the graph for readings
1370 with the same text at the same rank, and merges any that are found.
1371
1372 =cut
1373
1374 sub flatten_ranks {
1375     my $self = shift;
1376     my %unique_rank_rdg;
1377     foreach my $rdg ( $self->readings ) {
1378         next unless $rdg->has_rank;
1379         my $key = $rdg->rank . "||" . $rdg->text;
1380         if( exists $unique_rank_rdg{$key} ) {
1381             # Combine!
1382                 # print STDERR "Combining readings at same rank: $key\n";
1383             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1384             # TODO see if this now makes a common point.
1385         } else {
1386             $unique_rank_rdg{$key} = $rdg;
1387         }
1388     }
1389 }
1390
1391 =head2 remove_collations
1392
1393 Another convenience method for parsing. Removes all 'collation' relationships
1394 that were defined in order to get the reading ranks to be correct.
1395
1396 =begin testing
1397
1398 use Text::Tradition;
1399
1400 my $cxfile = 't/data/Collatex-16.xml';
1401 my $t = Text::Tradition->new(
1402     'name'  => 'inline',
1403     'input' => 'CollateX',
1404     'file'  => $cxfile,
1405     );
1406 my $c = $t->collation;
1407
1408 isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" );
1409 $c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } );
1410 is( scalar $c->relationships, 4, "Found all expected relationships" );
1411 $c->remove_collations;
1412 is( scalar $c->relationships, 3, "Collated relationships now gone" );
1413 is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" );
1414
1415 =end testing
1416
1417 =cut
1418
1419 sub remove_collations {
1420         my $self = shift;
1421         foreach my $reledge ( $self->relationships ) {
1422                 my $relobj = $self->relations->get_relationship( $reledge );
1423                 if( $relobj && $relobj->type eq 'collated' ) {
1424                         $self->relations->delete_relationship( $reledge );
1425                 }
1426         }
1427 }
1428
1429
1430 =head2 calculate_common_readings
1431
1432 Goes through the graph identifying the readings that appear in every witness
1433 (apart from those with lacunae at that spot.) Marks them as common and returns
1434 the list.
1435
1436 =begin testing
1437
1438 use Text::Tradition;
1439
1440 my $cxfile = 't/data/Collatex-16.xml';
1441 my $t = Text::Tradition->new(
1442     'name'  => 'inline',
1443     'input' => 'CollateX',
1444     'file'  => $cxfile,
1445     );
1446 my $c = $t->collation;
1447
1448 my @common = $c->calculate_common_readings();
1449 is( scalar @common, 8, "Found correct number of common readings" );
1450 my @marked = sort $c->common_readings();
1451 is( scalar @common, 8, "All common readings got marked as such" );
1452 my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1453 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1454
1455 =end testing
1456
1457 =cut
1458
1459 sub calculate_common_readings {
1460         my $self = shift;
1461         my @common;
1462         my $table = $self->make_alignment_table( 1 );
1463         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1464                 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1465                 my %hash;
1466                 foreach my $r ( @row ) {
1467                         if( $r ) {
1468                                 $hash{$r->id} = $r unless $r->is_meta;
1469                         } else {
1470                                 $hash{'UNDEF'} = $r;
1471                         }
1472                 }
1473                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1474                         my( $r ) = values %hash;
1475                         $r->is_common( 1 );
1476                         push( @common, $r );
1477                 }
1478         }
1479         return @common;
1480 }
1481
1482 =head2 text_from_paths
1483
1484 Calculate the text array for all witnesses from the path, for later consistency
1485 checking.  Only to be used if there is no non-graph-based way to know the
1486 original texts.
1487
1488 =cut
1489
1490 sub text_from_paths {
1491         my $self = shift;
1492     foreach my $wit ( $self->tradition->witnesses ) {
1493         my @text = split( /\s+/,
1494                 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1495         $wit->text( \@text );
1496         if( $wit->is_layered ) {
1497                         my @uctext = split( /\s+/,
1498                                 $self->reading_sequence( $self->start, $self->end,
1499                                         $wit->sigil.$self->ac_label ) );
1500                         $wit->text( \@uctext );
1501         }
1502     }
1503 }
1504
1505 =head1 UTILITY FUNCTIONS
1506
1507 =head2 common_predecessor( $reading_a, $reading_b )
1508
1509 Find the last reading that occurs in sequence before both the given readings.
1510
1511 =head2 common_successor( $reading_a, $reading_b )
1512
1513 Find the first reading that occurs in sequence after both the given readings.
1514
1515 =begin testing
1516
1517 use Text::Tradition;
1518
1519 my $cxfile = 't/data/Collatex-16.xml';
1520 my $t = Text::Tradition->new(
1521     'name'  => 'inline',
1522     'input' => 'CollateX',
1523     'file'  => $cxfile,
1524     );
1525 my $c = $t->collation;
1526
1527 is( $c->common_predecessor( 'n9', 'n23' )->id,
1528     'n20', "Found correct common predecessor" );
1529 is( $c->common_successor( 'n9', 'n23' )->id,
1530     '#END#', "Found correct common successor" );
1531
1532 is( $c->common_predecessor( 'n19', 'n17' )->id,
1533     'n16', "Found correct common predecessor for readings on same path" );
1534 is( $c->common_successor( 'n21', 'n26' )->id,
1535     '#END#', "Found correct common successor for readings on same path" );
1536
1537 =end testing
1538
1539 =cut
1540
1541 ## Return the closest reading that is a predecessor of both the given readings.
1542 sub common_predecessor {
1543         my $self = shift;
1544         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1545         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1546 }
1547
1548 sub common_successor {
1549         my $self = shift;
1550         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1551         return $self->_common_in_path( $r1, $r2, 'successors' );
1552 }
1553
1554 sub _common_in_path {
1555         my( $self, $r1, $r2, $dir ) = @_;
1556         my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1557         $iter = $self->end->rank - $iter if $dir eq 'successors';
1558         my @candidates;
1559         my @last_checked = ( $r1, $r2 );
1560         my %all_seen;
1561         while( !@candidates ) {
1562                 my @new_lc;
1563                 foreach my $lc ( @last_checked ) {
1564                         foreach my $p ( $lc->$dir ) {
1565                                 if( $all_seen{$p->id} ) {
1566                                         push( @candidates, $p );
1567                                 } else {
1568                                         $all_seen{$p->id} = 1;
1569                                         push( @new_lc, $p );
1570                                 }
1571                         }
1572                 }
1573                 @last_checked = @new_lc;
1574         }
1575         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1576         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1577 }
1578
1579 sub throw {
1580         Text::Tradition::Error->throw(
1581                 'ident' => 'Collation error',
1582                 'message' => $_[0],
1583                 );
1584 }
1585
1586 no Moose;
1587 __PACKAGE__->meta->make_immutable;
1588
1589 =head1 LICENSE
1590
1591 This package is free software and is provided "as is" without express
1592 or implied warranty.  You can redistribute it and/or modify it under
1593 the same terms as Perl itself.
1594
1595 =head1 AUTHOR
1596
1597 Tara L Andrews E<lt>aurum@cpan.orgE<gt>