lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Encode qw( decode_utf8 );
   4 use File::Temp;
   5 use Graph;
   6 use IPC::Run qw( run binary );
   7 use Text::CSV_XS;
   8 use Text::Tradition::Collation::Reading;
   9 use Text::Tradition::Collation::RelationshipStore;
  10 use Text::Tradition::Error;
  11 use XML::LibXML;
  12 use XML::LibXML::XPathContext;
  13 use Moose;
  14
  15 has 'sequence' => (
  16     is => 'ro',
  17     isa => 'Graph',
  18     default => sub { Graph->new() },
  19     handles => {
  20         paths => 'edges',
  21     },
  22     );
  23
  24 has 'relations' => (
  25         is => 'ro',
  26         isa => 'Text::Tradition::Collation::RelationshipStore',
  27         handles => {
  28                 relationships => 'relationships',
  29                 related_readings => 'related_readings',
  30                 del_relationship => 'del_relationship',
  31         },
  32         writer => '_set_relations',
  33         );
  34
  35 has 'tradition' => (
  36     is => 'ro',
  37     isa => 'Text::Tradition',
  38     weak_ref => 1,
  39     );
  40
  41 has 'readings' => (
  42         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  43         traits => ['Hash'],
  44     handles => {
  45         reading     => 'get',
  46         _add_reading => 'set',
  47         del_reading => 'delete',
  48         has_reading => 'exists',
  49         readings   => 'values',
  50     },
  51     default => sub { {} },
  52         );
  53
  54 has 'wit_list_separator' => (
  55     is => 'rw',
  56     isa => 'Str',
  57     default => ', ',
  58     );
  59
  60 has 'baselabel' => (
  61     is => 'rw',
  62     isa => 'Str',
  63     default => 'base text',
  64     );
  65
  66 has 'linear' => (
  67     is => 'rw',
  68     isa => 'Bool',
  69     default => 1,
  70     );
  71
  72 has 'ac_label' => (
  73     is => 'rw',
  74     isa => 'Str',
  75     default => ' (a.c.)',
  76     );
  77
  78 has 'start' => (
  79         is => 'ro',
  80         isa => 'Text::Tradition::Collation::Reading',
  81         writer => '_set_start',
  82         weak_ref => 1,
  83         );
  84
  85 has 'end' => (
  86         is => 'ro',
  87         isa => 'Text::Tradition::Collation::Reading',
  88         writer => '_set_end',
  89         weak_ref => 1,
  90         );
  91
  92 =head1 NAME
  93
  94 Text::Tradition::Collation - a software model for a text collation
  95
  96 =head1 SYNOPSIS
  97
  98   use Text::Tradition;
  99   my $t = Text::Tradition->new(
 100     'name' => 'this is a text',
 101     'input' => 'TEI',
 102     'file' => '/path/to/tei_parallel_seg_file.xml' );
 103
 104   my $c = $t->collation;
 105   my @readings = $c->readings;
 106   my @paths = $c->paths;
 107   my @relationships = $c->relationships;
 108
 109   my $svg_variant_graph = $t->collation->as_svg();
 110
 111 =head1 DESCRIPTION
 112
 113 Text::Tradition is a library for representation and analysis of collated
 114 texts, particularly medieval ones.  The Collation is the central feature of
 115 a Tradition, where the text, its sequence of readings, and its relationships
 116 between readings are actually kept.
 117
 118 =head1 CONSTRUCTOR
 119
 120 =head2 new
 121
 122 The constructor.  Takes a hash or hashref of the following arguments:
 123
 124 =over
 125
 126 =item * tradition - The Text::Tradition object to which the collation
 127 belongs. Required.
 128
 129 =item * linear - Whether the collation should be linear; that is, whether
 130 transposed readings should be treated as two linked readings rather than one,
 131 and therefore whether the collation graph is acyclic.  Defaults to true.
 132
 133 =item * baselabel - The default label for the path taken by a base text
 134 (if any). Defaults to 'base text'.
 135
 136 =item * wit_list_separator - The string to join a list of witnesses for
 137 purposes of making labels in display graphs.  Defaults to ', '.
 138
 139 =item * ac_label - The extra label to tack onto a witness sigil when
 140 representing another layer of path for the given witness - that is, when
 141 a text has more than one possible reading due to scribal corrections or
 142 the like.  Defaults to ' (a.c.)'.
 143
 144 =back
 145
 146 =head1 ACCESSORS
 147
 148 =head2 tradition
 149
 150 =head2 linear
 151
 152 =head2 wit_list_separator
 153
 154 =head2 baselabel
 155
 156 =head2 ac_label
 157
 158 Simple accessors for collation attributes.
 159
 160 =head2 start
 161
 162 The meta-reading at the start of every witness path.
 163
 164 =head2 end
 165
 166 The meta-reading at the end of every witness path.
 167
 168 =head2 readings
 169
 170 Returns all Reading objects in the graph.
 171
 172 =head2 reading( $id )
 173
 174 Returns the Reading object corresponding to the given ID.
 175
 176 =head2 add_reading( $reading_args )
 177
 178 Adds a new reading object to the collation.
 179 See L<Text::Tradition::Collation::Reading> for the available arguments.
 180
 181 =head2 del_reading( $object_or_id )
 182
 183 Removes the given reading from the collation, implicitly removing its
 184 paths and relationships.
 185
 186 =head2 merge_readings( $main, $second )
 187
 188 Merges the $second reading into the $main one.
 189 The arguments may be either readings or reading IDs.
 190
 191 =head2 has_reading( $id )
 192
 193 Predicate to see whether a given reading ID is in the graph.
 194
 195 =head2 reading_witnesses( $object_or_id )
 196
 197 Returns a list of sigils whose witnesses contain the reading.
 198
 199 =head2 paths
 200
 201 Returns all reading paths within the document - that is, all edges in the
 202 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 203
 204 =head2 add_path( $source, $target, $sigil )
 205
 206 Links the given readings in the collation in sequence, under the given witness
 207 sigil.  The readings may be specified by object or ID.
 208
 209 =head2 del_path( $source, $target, $sigil )
 210
 211 Links the given readings in the collation in sequence, under the given witness
 212 sigil.  The readings may be specified by object or ID.
 213
 214 =head2 has_path( $source, $target );
 215
 216 Returns true if the two readings are linked in sequence in any witness.
 217 The readings may be specified by object or ID.
 218
 219 =head2 relationships
 220
 221 Returns all Relationship objects in the collation.
 222
 223 =head2 add_relationship( $reading, $other_reading, $options )
 224
 225 Adds a new relationship of the type given in $options between the two readings,
 226 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 227 where $status is true on success, and @vectors is a list of relationship edges
 228 that were ultimately added.
 229 See L<Text::Tradition::Collation::Relationship> for the available options.
 230
 231 =cut
 232
 233 sub BUILD {
 234     my $self = shift;
 235     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 236     $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
 237     $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 238 }
 239
 240 ### Reading construct/destruct functions
 241
 242 sub add_reading {
 243         my( $self, $reading ) = @_;
 244         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 245                 my %args = %$reading;
 246                 $reading = Text::Tradition::Collation::Reading->new(
 247                         'collation' => $self,
 248                         %args );
 249         }
 250         # First check to see if a reading with this ID exists.
 251         if( $self->reading( $reading->id ) ) {
 252                 throw( "Collation already has a reading with id " . $reading->id );
 253         }
 254         $self->_add_reading( $reading->id => $reading );
 255         # Once the reading has been added, put it in both graphs.
 256         $self->sequence->add_vertex( $reading->id );
 257         $self->relations->add_reading( $reading->id );
 258         return $reading;
 259 };
 260
 261 around del_reading => sub {
 262         my $orig = shift;
 263         my $self = shift;
 264         my $arg = shift;
 265
 266         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 267                 $arg = $arg->id;
 268         }
 269         # Remove the reading from the graphs.
 270         $self->sequence->delete_vertex( $arg );
 271         $self->relations->delete_reading( $arg );
 272
 273         # Carry on.
 274         $self->$orig( $arg );
 275 };
 276
 277 # merge_readings( $main, $to_be_deleted );
 278
 279 sub merge_readings {
 280         my $self = shift;
 281
 282         # We only need the IDs for adding paths to the graph, not the reading
 283         # objects themselves.
 284     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
 285
 286     # The kept reading should inherit the paths and the relationships
 287     # of the deleted reading.
 288         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 289                 my @vector = ( $kept );
 290                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 291                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 292                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 293                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 294                 $self->sequence->add_edge( @vector );
 295                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 296                 @wits{keys %$fwits} = values %$fwits;
 297                 $self->sequence->set_edge_attributes( @vector, \%wits );
 298         }
 299         $self->relations->merge_readings( $kept, $deleted, $combine_char );
 300
 301         # Do the deletion deed.
 302         if( $combine_char ) {
 303                 my $kept_obj = $self->reading( $kept );
 304                 my $new_text = join( $combine_char, $kept_obj->text,
 305                         $self->reading( $deleted )->text );
 306                 $kept_obj->alter_text( $new_text );
 307         }
 308         $self->del_reading( $deleted );
 309 }
 310
 311
 312 # Helper function for manipulating the graph.
 313 sub _stringify_args {
 314         my( $self, $first, $second, $arg ) = @_;
 315     $first = $first->id
 316         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 317     $second = $second->id
 318         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 319     return( $first, $second, $arg );
 320 }
 321
 322 # Helper function for manipulating the graph.
 323 sub _objectify_args {
 324         my( $self, $first, $second, $arg ) = @_;
 325     $first = $self->reading( $first )
 326         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 327     $second = $self->reading( $second )
 328         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 329     return( $first, $second, $arg );
 330 }
 331 ### Path logic
 332
 333 sub add_path {
 334         my $self = shift;
 335
 336         # We only need the IDs for adding paths to the graph, not the reading
 337         # objects themselves.
 338     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 339
 340         # Connect the readings
 341     $self->sequence->add_edge( $source, $target );
 342     # Note the witness in question
 343     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 344 };
 345
 346 sub del_path {
 347         my $self = shift;
 348         my @args;
 349         if( ref( $_[0] ) eq 'ARRAY' ) {
 350                 my $e = shift @_;
 351                 @args = ( @$e, @_ );
 352         } else {
 353                 @args = @_;
 354         }
 355
 356         # We only need the IDs for adding paths to the graph, not the reading
 357         # objects themselves.
 358     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 359
 360         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 361                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 362         }
 363         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 364                 $self->sequence->delete_edge( $source, $target );
 365         }
 366 }
 367
 368
 369 # Extra graph-alike utility
 370 sub has_path {
 371         my $self = shift;
 372     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 373         return undef unless $self->sequence->has_edge( $source, $target );
 374         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 375 }
 376
 377 =head2 clear_witness( @sigil_list )
 378
 379 Clear the given witnesses out of the collation entirely, removing references
 380 to them in paths, and removing readings that belong only to them.  Should only
 381 be called via $tradition->del_witness.
 382
 383 =cut
 384
 385 sub clear_witness {
 386         my( $self, @sigils ) = @_;
 387
 388         # Clear the witness(es) out of the paths
 389         foreach my $e ( $self->paths ) {
 390                 foreach my $sig ( @sigils ) {
 391                         $self->del_path( $e, $sig );
 392                 }
 393         }
 394
 395         # Clear out the newly unused readings
 396         foreach my $r ( $self->readings ) {
 397                 unless( $self->reading_witnesses( $r ) ) {
 398                         $self->del_reading( $r );
 399                 }
 400         }
 401 }
 402
 403 sub add_relationship {
 404         my $self = shift;
 405     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 406     my( @vectors ) = $self->relations->add_relationship( $source,
 407         $self->reading( $source ), $target, $self->reading( $target ), $opts );
 408     # Force a full rank recalculation every time. Yuck.
 409     $self->calculate_ranks() if $self->end->has_rank;
 410     return @vectors;
 411 }
 412
 413 =head2 reading_witnesses( $reading )
 414
 415 Return a list of sigils corresponding to the witnesses in which the reading appears.
 416
 417 =cut
 418
 419 sub reading_witnesses {
 420         my( $self, $reading ) = @_;
 421         # We need only check either the incoming or the outgoing edges; I have
 422         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 423         if( $reading eq $self->start ) {
 424                 return map { $_->sigil } $self->tradition->witnesses;
 425         }
 426         my %all_witnesses;
 427         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 428                 my $wits = $self->sequence->get_edge_attributes( @$e );
 429                 @all_witnesses{ keys %$wits } = 1;
 430         }
 431         return keys %all_witnesses;
 432 }
 433
 434 =head1 OUTPUT METHODS
 435
 436 =head2 as_svg( \%options )
 437
 438 Returns an SVG string that represents the graph, via as_dot and graphviz.
 439 See as_dot for a list of options.
 440
 441 =cut
 442
 443 sub as_svg {
 444     my( $self, $opts ) = @_;
 445
 446     my @cmd = qw/dot -Tsvg/;
 447     my( $svg, $err );
 448     my $dotfile = File::Temp->new();
 449     ## USE FOR DEBUGGING
 450     # $dotfile->unlink_on_destroy(0);
 451     binmode $dotfile, ':utf8';
 452     print $dotfile $self->as_dot( $opts );
 453     push( @cmd, $dotfile->filename );
 454     run( \@cmd, ">", binary(), \$svg );
 455     return decode_utf8( $svg );
 456 }
 457
 458
 459 =head2 as_dot( \%options )
 460
 461 Returns a string that is the collation graph expressed in dot
 462 (i.e. GraphViz) format.  Options include:
 463
 464 =over 4
 465
 466 =item * from
 467
 468 =item * to
 469
 470 =item * color_common
 471
 472 =back
 473
 474 =cut
 475
 476 sub as_dot {
 477     my( $self, $opts ) = @_;
 478     my $startrank = $opts->{'from'} if $opts;
 479     my $endrank = $opts->{'to'} if $opts;
 480     my $color_common = $opts->{'color_common'} if $opts;
 481
 482     # Check the arguments
 483     if( $startrank ) {
 484         return if $endrank && $startrank > $endrank;
 485         return if $startrank > $self->end->rank;
 486         }
 487         if( defined $endrank ) {
 488                 return if $endrank < 0;
 489                 $endrank = undef if $endrank == $self->end->rank;
 490         }
 491
 492     # TODO consider making some of these things configurable
 493     my $graph_name = $self->tradition->name;
 494     $graph_name =~ s/[^\w\s]//g;
 495     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 496
 497     my %graph_attrs = (
 498         'rankdir' => 'LR',
 499         'bgcolor' => 'none',
 500         );
 501     my %node_attrs = (
 502         'fontsize' => 14,
 503         'fillcolor' => 'white',
 504         'style' => 'filled',
 505         'shape' => 'ellipse'
 506         );
 507     my %edge_attrs = (
 508         'arrowhead' => 'open',
 509         'color' => '#000000',
 510         'fontcolor' => '#000000',
 511         );
 512
 513     my $dot = sprintf( "digraph %s {\n", $graph_name );
 514     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 515     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 516
 517         # Output substitute start/end readings if necessary
 518         if( $startrank ) {
 519                 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
 520         }
 521         if( $endrank ) {
 522                 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
 523         }
 524
 525         my %used;  # Keep track of the readings that actually appear in the graph
 526         # Sort the readings by rank if we have ranks; this speeds layout.
 527         my @all_readings = $self->end->has_rank
 528                 ? sort { $a->rank <=> $b->rank } $self->readings
 529                 : $self->readings;
 530     foreach my $reading ( @all_readings ) {
 531         # Only output readings within our rank range.
 532         next if $startrank && $reading->rank < $startrank;
 533         next if $endrank && $reading->rank > $endrank;
 534         $used{$reading->id} = 1;
 535         # Need not output nodes without separate labels
 536         next if $reading->id eq $reading->text;
 537         my $rattrs;
 538         my $label = $reading->text;
 539         $label =~ s/\"/\\\"/g;
 540                 $rattrs->{'label'} = $label;
 541                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 542         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 543     }
 544
 545         # Add the real edges. Need to weight one edge per rank jump, in a
 546         # continuous line.
 547         my $weighted = $self->_add_edge_weights;
 548     my @edges = $self->paths;
 549         my( %substart, %subend );
 550     foreach my $edge ( @edges ) {
 551         # Do we need to output this edge?
 552         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 553                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 554                         my $variables = { %edge_attrs, 'label' => $label };
 555
 556                         # Account for the rank gap if necessary
 557                         my $rank0 = $self->reading( $edge->[0] )->rank
 558                                 if $self->reading( $edge->[0] )->has_rank;
 559                         my $rank1 = $self->reading( $edge->[1] )->rank
 560                                 if $self->reading( $edge->[1] )->has_rank;
 561                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 562                                 $variables->{'minlen'} = $rank1 - $rank0;
 563                         }
 564
 565                         # Add the calculated edge weights
 566                         if( exists $weighted->{$edge->[0]}
 567                                 && $weighted->{$edge->[0]} eq $edge->[1] ) {
 568                                 # $variables->{'color'} = 'red';
 569                                 $variables->{'weight'} = 3.0;
 570                         }
 571
 572                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 573                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 574                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 575
 576                         my $varopts = _dot_attr_string( $variables );
 577                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 578                                 $edge->[0], $edge->[1], $varopts );
 579         } elsif( $used{$edge->[0]} ) {
 580                 $subend{$edge->[0]} = 1;
 581         } elsif( $used{$edge->[1]} ) {
 582                 $substart{$edge->[1]} = 1;
 583         }
 584     }
 585     # Add substitute start and end edges if necessary
 586     foreach my $node ( keys %substart ) {
 587         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 588         my $variables = { %edge_attrs, 'label' => $witstr };
 589         my $varopts = _dot_attr_string( $variables );
 590         $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
 591         }
 592     foreach my $node ( keys %subend ) {
 593         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 594         my $variables = { %edge_attrs, 'label' => $witstr };
 595         my $varopts = _dot_attr_string( $variables );
 596         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 597         }
 598
 599     $dot .= "}\n";
 600     return $dot;
 601 }
 602
 603 sub _dot_attr_string {
 604         my( $hash ) = @_;
 605         my @attrs;
 606         foreach my $k ( sort keys %$hash ) {
 607                 my $v = $hash->{$k};
 608                 push( @attrs, $k.'="'.$v.'"' );
 609         }
 610         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 611 }
 612
 613 sub _add_edge_weights {
 614         my $self = shift;
 615         # Walk the graph from START to END, choosing the successor node with
 616         # the largest number of witness paths each time.
 617         my $weighted = {};
 618         my $curr = $self->start->id;
 619         while( $curr ne $self->end->id ) {
 620                 my @succ = sort { $self->path_witnesses( $curr, $a )
 621                                                         <=> $self->path_witnesses( $curr, $b ) }
 622                         $self->sequence->successors( $curr );
 623                 my $next = pop @succ;
 624                 # Try to avoid lacunae in the weighted path.
 625                 while( $self->reading( $next )->is_lacuna && @succ ) {
 626                         $next = pop @succ;
 627                 }
 628                 $weighted->{$curr} = $next;
 629                 $curr = $next;
 630         }
 631         return $weighted;
 632 }
 633
 634 =head2 path_witnesses( $edge )
 635
 636 Returns the list of sigils whose witnesses are associated with the given edge.
 637 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 638
 639 =cut
 640
 641 sub path_witnesses {
 642         my( $self, @edge ) = @_;
 643         # If edge is an arrayref, cope.
 644         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 645                 my $e = shift @edge;
 646                 @edge = @$e;
 647         }
 648         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 649         return @wits;
 650 }
 651
 652 sub _path_display_label {
 653         my $self = shift;
 654         my @wits = sort @_;
 655         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 656         if( scalar @wits > $maj ) {
 657                 # TODO break out a.c. wits
 658                 return 'majority';
 659         } else {
 660                 return join( ', ', @wits );
 661         }
 662 }
 663
 664
 665 =head2 as_graphml
 666
 667 Returns a GraphML representation of the collation.  The GraphML will contain
 668 two graphs. The first expresses the attributes of the readings and the witness
 669 paths that link them; the second expresses the relationships that link the
 670 readings.  This is the native transfer format for a tradition.
 671
 672 =begin testing
 673
 674 use Text::Tradition;
 675
 676 my $READINGS = 311;
 677 my $PATHS = 361;
 678
 679 my $datafile = 't/data/florilegium_tei_ps.xml';
 680 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 681                                       'name' => 'test0',
 682                                       'file' => $datafile,
 683                                       'linear' => 1 );
 684
 685 ok( $tradition, "Got a tradition object" );
 686 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 687 ok( $tradition->collation, "Tradition has a collation" );
 688
 689 my $c = $tradition->collation;
 690 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 691 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 692 is( scalar $c->relationships, 0, "Collation has all relationships" );
 693
 694 # Add a few relationships
 695 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 696 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 697 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 698
 699 # Now write it to GraphML and parse it again.
 700
 701 my $graphml = $c->as_graphml;
 702 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 703 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 704 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 705 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 706
 707 =end testing
 708
 709 =cut
 710
 711 sub as_graphml {
 712     my( $self ) = @_;
 713
 714     # Some namespaces
 715     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 716     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 717     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 718         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 719
 720     # Create the document and root node
 721     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 722     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 723     $graphml->setDocumentElement( $root );
 724     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 725     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 726
 727     # Add the data keys for the graph
 728     my %graph_data_keys;
 729     my $gdi = 0;
 730     my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
 731     foreach my $datum ( @graph_attributes ) {
 732         $graph_data_keys{$datum} = 'dg'.$gdi++;
 733         my $key = $root->addNewChild( $graphml_ns, 'key' );
 734         $key->setAttribute( 'attr.name', $datum );
 735         $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
 736         $key->setAttribute( 'for', 'graph' );
 737         $key->setAttribute( 'id', $graph_data_keys{$datum} );
 738     }
 739
 740     # Add the data keys for nodes
 741     my %node_data_keys;
 742     my $ndi = 0;
 743     my %node_data = (
 744         id => 'string',
 745         text => 'string',
 746         rank => 'string',
 747         is_start => 'boolean',
 748         is_end => 'boolean',
 749         is_lacuna => 'boolean',
 750         );
 751     foreach my $datum ( keys %node_data ) {
 752         $node_data_keys{$datum} = 'dn'.$ndi++;
 753         my $key = $root->addNewChild( $graphml_ns, 'key' );
 754         $key->setAttribute( 'attr.name', $datum );
 755         $key->setAttribute( 'attr.type', $node_data{$datum} );
 756         $key->setAttribute( 'for', 'node' );
 757         $key->setAttribute( 'id', $node_data_keys{$datum} );
 758     }
 759
 760     # Add the data keys for edges, i.e. witnesses
 761     my $edi = 0;
 762     my %edge_data_keys;
 763     my %edge_data = (
 764         class => 'string',                              # Class, deprecated soon
 765         witness => 'string',                    # ID/label for a path
 766         relationship => 'string',               # ID/label for a relationship
 767         extra => 'boolean',                             # Path key
 768         scope => 'string',                              # Relationship key
 769         non_correctable => 'boolean',   # Relationship key
 770         non_independent => 'boolean',   # Relationship key
 771         );
 772     foreach my $datum ( keys %edge_data ) {
 773         $edge_data_keys{$datum} = 'de'.$edi++;
 774         my $key = $root->addNewChild( $graphml_ns, 'key' );
 775         $key->setAttribute( 'attr.name', $datum );
 776         $key->setAttribute( 'attr.type', $edge_data{$datum} );
 777         $key->setAttribute( 'for', 'edge' );
 778         $key->setAttribute( 'id', $edge_data_keys{$datum} );
 779     }
 780
 781     # Add the collation graph itself
 782     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
 783     $sgraph->setAttribute( 'edgedefault', 'directed' );
 784     $sgraph->setAttribute( 'id', $self->tradition->name );
 785     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
 786     $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
 787     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
 788     $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
 789     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
 790
 791     # Collation attribute data
 792     foreach my $datum ( @graph_attributes ) {
 793         my $value = $datum eq 'version' ? '3.0' : $self->$datum;
 794                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
 795         }
 796
 797     my $node_ctr = 0;
 798     my %node_hash;
 799     # Add our readings to the graph
 800     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
 801         # Add to the main graph
 802         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
 803         my $node_xmlid = 'n' . $node_ctr++;
 804         $node_hash{ $n->id } = $node_xmlid;
 805         $node_el->setAttribute( 'id', $node_xmlid );
 806         foreach my $d ( keys %node_data ) {
 807                 my $nval = $n->$d;
 808                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
 809                         if defined $nval;
 810         }
 811     }
 812
 813     # Add the path edges to the sequence graph
 814     my $edge_ctr = 0;
 815     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
 816         # We add an edge in the graphml for every witness in $e.
 817         foreach my $wit ( sort $self->path_witnesses( $e ) ) {
 818                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
 819                                                                                 $node_hash{ $e->[0] },
 820                                                                                 $node_hash{ $e->[1] } );
 821                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
 822                         $edge_el->setAttribute( 'source', $from );
 823                         $edge_el->setAttribute( 'target', $to );
 824                         $edge_el->setAttribute( 'id', $id );
 825
 826                         # It's a witness path, so add the witness
 827                         my $base = $wit;
 828                         my $key = $edge_data_keys{'witness'};
 829                         # Is this an ante-corr witness?
 830                         my $aclabel = $self->ac_label;
 831                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
 832                                 # Keep the base witness
 833                                 $base = $1;
 834                                 # ...and record that this is an 'extra' reading path
 835                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
 836                         }
 837                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
 838                         _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
 839                 }
 840         }
 841
 842         # Add the relationship graph to the XML
 843         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
 844                 $node_data_keys{'id'}, \%edge_data_keys );
 845
 846     # Save and return the thing
 847     my $result = decode_utf8( $graphml->toString(1) );
 848     return $result;
 849 }
 850
 851 sub _add_graphml_data {
 852     my( $el, $key, $value ) = @_;
 853     return unless defined $value;
 854     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
 855     $data_el->setAttribute( 'key', $key );
 856     $data_el->appendText( $value );
 857 }
 858
 859 =head2 as_csv
 860
 861 Returns a CSV alignment table representation of the collation graph, one
 862 row per witness (or witness uncorrected.)
 863
 864 =cut
 865
 866 sub as_csv {
 867     my( $self ) = @_;
 868     my $table = $self->make_alignment_table;
 869     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
 870     my @result;
 871     # Make the header row
 872     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
 873         push( @result, decode_utf8( $csv->string ) );
 874     # Make the rest of the rows
 875     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
 876         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
 877         my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
 878         $csv->combine( @row );
 879         push( @result, decode_utf8( $csv->string ) );
 880     }
 881     return join( "\n", @result );
 882 }
 883
 884 =head2 make_alignment_table( $use_refs, $include_witnesses )
 885
 886 Return a reference to an alignment table, in a slightly enhanced CollateX
 887 format which looks like this:
 888
 889  $table = { alignment => [ { witness => "SIGIL",
 890                              tokens => [ { t => "TEXT" }, ... ] },
 891                            { witness => "SIG2",
 892                              tokens => [ { t => "TEXT" }, ... ] },
 893                            ... ],
 894             length => TEXTLEN };
 895
 896 If $use_refs is set to 1, the reading object is returned in the table
 897 instead of READINGTEXT; if not, the text of the reading is returned.
 898
 899 If $include_witnesses is set to a hashref, only the witnesses whose sigil
 900 keys have a true hash value will be included.
 901
 902 =cut
 903
 904 sub make_alignment_table {
 905     my( $self, $noderefs, $include ) = @_;
 906     # Make sure we can do this
 907         throw( "Need a linear graph in order to make an alignment table" )
 908                 unless $self->linear;
 909         $self->calculate_ranks unless $self->end->has_rank;
 910
 911     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
 912     my @all_pos = ( 1 .. $self->end->rank - 1 );
 913     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
 914         if( $include ) {
 915                 next unless $include->{$wit->sigil};
 916         }
 917         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
 918         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
 919         my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
 920         push( @{$table->{'alignment'}},
 921                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
 922         if( $wit->is_layered ) {
 923                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
 924                         $wit->sigil.$self->ac_label );
 925             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
 926                         push( @{$table->{'alignment'}},
 927                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
 928         }
 929     }
 930         return $table;
 931 }
 932
 933 sub _make_witness_row {
 934     my( $path, $positions, $noderefs ) = @_;
 935     my %char_hash;
 936     map { $char_hash{$_} = undef } @$positions;
 937     my $debug = 0;
 938     foreach my $rdg ( @$path ) {
 939         my $rtext = $rdg->text;
 940         $rtext = '#LACUNA#' if $rdg->is_lacuna;
 941         print STDERR "rank " . $rdg->rank . "\n" if $debug;
 942         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
 943         $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
 944                                                                            : { 't' => $rtext };
 945     }
 946     my @row = map { $char_hash{$_} } @$positions;
 947     # Fill in lacuna markers for undef spots in the row
 948     my $last_el = shift @row;
 949     my @filled_row = ( $last_el );
 950     foreach my $el ( @row ) {
 951         # If we are using node reference, make the lacuna node appear many times
 952         # in the table.  If not, use the lacuna tag.
 953         if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
 954             $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
 955         }
 956         push( @filled_row, $el );
 957         $last_el = $el;
 958     }
 959     return @filled_row;
 960 }
 961
 962 # Tiny utility function to say if a table element is a lacuna
 963 sub _el_is_lacuna {
 964     my $el = shift;
 965     return 1 if $el->{'t'} eq '#LACUNA#';
 966     return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
 967         && $el->{'t'}->is_lacuna;
 968     return 0;
 969 }
 970
 971 # Helper to turn the witnesses along columns rather than rows.  Assumes
 972 # equal-sized rows.
 973 sub _turn_table {
 974     my( $table ) = @_;
 975     my $result = [];
 976     return $result unless scalar @$table;
 977     my $nrows = scalar @{$table->[0]};
 978     foreach my $idx ( 0 .. $nrows - 1 ) {
 979         foreach my $wit ( 0 .. $#{$table} ) {
 980             $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
 981         }
 982     }
 983     return $result;
 984 }
 985
 986 =head1 NAVIGATION METHODS
 987
 988 =head2 reading_sequence( $first, $last, $sigil, $backup )
 989
 990 Returns the ordered list of readings, starting with $first and ending
 991 with $last, for the witness given in $sigil. If a $backup sigil is
 992 specified (e.g. when walking a layered witness), it will be used wherever
 993 no $sigil path exists.  If there is a base text reading, that will be
 994 used wherever no path exists for $sigil or $backup.
 995
 996 =cut
 997
 998 # TODO Think about returning some lazy-eval iterator.
 999 # TODO Get rid of backup; we should know from what witness is whether we need it.
1000
1001 sub reading_sequence {
1002     my( $self, $start, $end, $witness ) = @_;
1003
1004     $witness = $self->baselabel unless $witness;
1005     my @readings = ( $start );
1006     my %seen;
1007     my $n = $start;
1008     while( $n && $n->id ne $end->id ) {
1009         if( exists( $seen{$n->id} ) ) {
1010             throw( "Detected loop for $witness at " . $n->id );
1011         }
1012         $seen{$n->id} = 1;
1013
1014         my $next = $self->next_reading( $n, $witness );
1015         unless( $next ) {
1016             throw( "Did not find any path for $witness from reading " . $n->id );
1017         }
1018         push( @readings, $next );
1019         $n = $next;
1020     }
1021     # Check that the last reading is our end reading.
1022     my $last = $readings[$#readings];
1023     throw( "Last reading found from " . $start->text .
1024         " for witness $witness is not the end!" ) # TODO do we get this far?
1025         unless $last->id eq $end->id;
1026
1027     return @readings;
1028 }
1029
1030 =head2 next_reading( $reading, $sigil );
1031
1032 Returns the reading that follows the given reading along the given witness
1033 path.
1034
1035 =cut
1036
1037 sub next_reading {
1038     # Return the successor via the corresponding path.
1039     my $self = shift;
1040     my $answer = $self->_find_linked_reading( 'next', @_ );
1041         return undef unless $answer;
1042     return $self->reading( $answer );
1043 }
1044
1045 =head2 prior_reading( $reading, $sigil )
1046
1047 Returns the reading that precedes the given reading along the given witness
1048 path.
1049
1050 =cut
1051
1052 sub prior_reading {
1053     # Return the predecessor via the corresponding path.
1054     my $self = shift;
1055     my $answer = $self->_find_linked_reading( 'prior', @_ );
1056     return $self->reading( $answer );
1057 }
1058
1059 sub _find_linked_reading {
1060     my( $self, $direction, $node, $path ) = @_;
1061
1062     # Get a backup if we are dealing with a layered witness
1063     my $alt_path;
1064     my $aclabel = $self->ac_label;
1065     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1066         $alt_path = $1;
1067     }
1068
1069     my @linked_paths = $direction eq 'next'
1070         ? $self->sequence->edges_from( $node )
1071         : $self->sequence->edges_to( $node );
1072     return undef unless scalar( @linked_paths );
1073
1074     # We have to find the linked path that contains all of the
1075     # witnesses supplied in $path.
1076     my( @path_wits, @alt_path_wits );
1077     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1078     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1079     my $base_le;
1080     my $alt_le;
1081     foreach my $le ( @linked_paths ) {
1082         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1083             $base_le = $le;
1084         }
1085                 my @le_wits = sort $self->path_witnesses( $le );
1086                 if( _is_within( \@path_wits, \@le_wits ) ) {
1087                         # This is the right path.
1088                         return $direction eq 'next' ? $le->[1] : $le->[0];
1089                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1090                         $alt_le = $le;
1091                 }
1092     }
1093     # Got this far? Return the alternate path if it exists.
1094     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1095         if $alt_le;
1096
1097     # Got this far? Return the base path if it exists.
1098     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1099         if $base_le;
1100
1101     # Got this far? We have no appropriate path.
1102     warn "Could not find $direction node from " . $node->id
1103         . " along path $path";
1104     return undef;
1105 }
1106
1107 # Some set logic.
1108 sub _is_within {
1109     my( $set1, $set2 ) = @_;
1110     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1111     foreach my $el ( @$set1 ) {
1112         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1113     }
1114     return $ret;
1115 }
1116
1117 # Return the string that joins together a list of witnesses for
1118 # display on a single path.
1119 sub _witnesses_of_label {
1120     my( $self, $label ) = @_;
1121     my $regex = $self->wit_list_separator;
1122     my @answer = split( /\Q$regex\E/, $label );
1123     return @answer;
1124 }
1125
1126 =head2 common_readings
1127
1128 Returns the list of common readings in the graph (i.e. those readings that are
1129 shared by all non-lacunose witnesses.)
1130
1131 =cut
1132
1133 sub common_readings {
1134         my $self = shift;
1135         my @common = grep { $_->is_common } $self->readings;
1136         return @common;
1137 }
1138
1139 =head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1140
1141 Returns the text of a witness (plus its backup, if we are using a layer)
1142 as stored in the collation.  The text is returned as a string, where the
1143 individual readings are joined with spaces and the meta-readings (e.g.
1144 lacunae) are omitted.  Optional specification of $start and $end allows
1145 the generation of a subset of the witness text.
1146
1147 =cut
1148
1149 sub path_text {
1150         my( $self, $wit, $start, $end ) = @_;
1151         $start = $self->start unless $start;
1152         $end = $self->end unless $end;
1153         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1154         return join( ' ', map { $_->text } @path );
1155 }
1156
1157 =head1 INITIALIZATION METHODS
1158
1159 These are mostly for use by parsers.
1160
1161 =head2 make_witness_path( $witness )
1162
1163 Link the array of readings contained in $witness->path (and in
1164 $witness->uncorrected_path if it exists) into collation paths.
1165 Clear out the arrays when finished.
1166
1167 =head2 make_witness_paths
1168
1169 Call make_witness_path for all witnesses in the tradition.
1170
1171 =cut
1172
1173 # For use when a collation is constructed from a base text and an apparatus.
1174 # We have the sequences of readings and just need to add path edges.
1175 # When we are done, clear out the witness path attributes, as they are no
1176 # longer needed.
1177 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1178
1179 sub make_witness_paths {
1180     my( $self ) = @_;
1181     foreach my $wit ( $self->tradition->witnesses ) {
1182         # print STDERR "Making path for " . $wit->sigil . "\n";
1183         $self->make_witness_path( $wit );
1184     }
1185 }
1186
1187 sub make_witness_path {
1188     my( $self, $wit ) = @_;
1189     my @chain = @{$wit->path};
1190     my $sig = $wit->sigil;
1191     foreach my $idx ( 0 .. $#chain-1 ) {
1192         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1193     }
1194     if( $wit->is_layered ) {
1195         @chain = @{$wit->uncorrected_path};
1196         foreach my $idx( 0 .. $#chain-1 ) {
1197             my $source = $chain[$idx];
1198             my $target = $chain[$idx+1];
1199             $self->add_path( $source, $target, $sig.$self->ac_label )
1200                 unless $self->has_path( $source, $target, $sig );
1201         }
1202     }
1203     $wit->clear_path;
1204     $wit->clear_uncorrected_path;
1205 }
1206
1207 =head2 calculate_ranks
1208
1209 Calculate the reading ranks (that is, their aligned positions relative
1210 to each other) for the graph.  This can only be called on linear collations.
1211
1212 =cut
1213
1214 sub calculate_ranks {
1215     my $self = shift;
1216     # Walk a version of the graph where every node linked by a relationship
1217     # edge is fundamentally the same node, and do a topological ranking on
1218     # the nodes in this graph.
1219     my $topo_graph = Graph->new();
1220     my %rel_containers;
1221     my $rel_ctr = 0;
1222     # Add the nodes
1223     foreach my $r ( $self->readings ) {
1224         next if exists $rel_containers{$r->id};
1225         my @rels = $r->related_readings( 'colocated' );
1226         if( @rels ) {
1227             # Make a relationship container.
1228             push( @rels, $r );
1229             my $rn = 'rel_container_' . $rel_ctr++;
1230             $topo_graph->add_vertex( $rn );
1231             foreach( @rels ) {
1232                 $rel_containers{$_->id} = $rn;
1233             }
1234         } else {
1235             # Add a new node to mirror the old node.
1236             $rel_containers{$r->id} = $r->id;
1237             $topo_graph->add_vertex( $r->id );
1238         }
1239     }
1240
1241     # Add the edges.
1242     foreach my $r ( $self->readings ) {
1243         foreach my $n ( $self->sequence->successors( $r->id ) ) {
1244                 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1245                         $rel_containers{$n} );
1246                 # $DB::single = 1 unless $tfrom && $tto;
1247             $topo_graph->add_edge( $tfrom, $tto );
1248         }
1249     }
1250
1251     # Now do the rankings, starting with the start node.
1252     my $topo_start = $rel_containers{$self->start->id};
1253     my $node_ranks = { $topo_start => 0 };
1254     my @curr_origin = ( $topo_start );
1255     # A little iterative function.
1256     while( @curr_origin ) {
1257         @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1258     }
1259     # Transfer our rankings from the topological graph to the real one.
1260     foreach my $r ( $self->readings ) {
1261         if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1262             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1263         } else {
1264                 # Die. Find the last rank we calculated.
1265                 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1266                                  <=> $node_ranks->{$rel_containers{$b->id}} }
1267                         $self->readings;
1268                 my $last = pop @all_defined;
1269             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1270         }
1271     }
1272 }
1273
1274 sub _assign_rank {
1275     my( $graph, $node_ranks, @current_nodes ) = @_;
1276     # Look at each of the children of @current_nodes.  If all the child's
1277     # parents have a rank, assign it the highest rank + 1 and add it to
1278     # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
1279     # parent gets a rank.
1280     my @next_nodes;
1281     foreach my $c ( @current_nodes ) {
1282         warn "Current reading $c has no rank!"
1283             unless exists $node_ranks->{$c};
1284         # print STDERR "Looking at child of node $c, rank "
1285         #     . $node_ranks->{$c} . "\n";
1286         foreach my $child ( $graph->successors( $c ) ) {
1287             next if exists $node_ranks->{$child};
1288             my $highest_rank = -1;
1289             my $skip = 0;
1290             foreach my $parent ( $graph->predecessors( $child ) ) {
1291                 if( exists $node_ranks->{$parent} ) {
1292                     $highest_rank = $node_ranks->{$parent}
1293                         if $highest_rank <= $node_ranks->{$parent};
1294                 } else {
1295                     $skip = 1;
1296                     last;
1297                 }
1298             }
1299             next if $skip;
1300             my $c_rank = $highest_rank + 1;
1301             # print STDERR "Assigning rank $c_rank to node $child \n";
1302             $node_ranks->{$child} = $c_rank;
1303             push( @next_nodes, $child );
1304         }
1305     }
1306     return @next_nodes;
1307 }
1308
1309 =head2 flatten_ranks
1310
1311 A convenience method for parsing collation data.  Searches the graph for readings
1312 with the same text at the same rank, and merges any that are found.
1313
1314 =cut
1315
1316 sub flatten_ranks {
1317     my $self = shift;
1318     my %unique_rank_rdg;
1319     foreach my $rdg ( $self->readings ) {
1320         next unless $rdg->has_rank;
1321         my $key = $rdg->rank . "||" . $rdg->text;
1322         if( exists $unique_rank_rdg{$key} ) {
1323             # Combine!
1324                 # print STDERR "Combining readings at same rank: $key\n";
1325             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1326             # TODO see if this now makes a common point.
1327         } else {
1328             $unique_rank_rdg{$key} = $rdg;
1329         }
1330     }
1331 }
1332
1333 =head2 calculate_common_readings
1334
1335 Goes through the graph identifying the readings that appear in every witness
1336 (apart from those with lacunae at that spot.) Marks them as common and returns
1337 the list.
1338
1339 =begin testing
1340
1341 use Text::Tradition;
1342
1343 my $cxfile = 't/data/Collatex-16.xml';
1344 my $t = Text::Tradition->new(
1345     'name'  => 'inline',
1346     'input' => 'CollateX',
1347     'file'  => $cxfile,
1348     );
1349 my $c = $t->collation;
1350
1351 my @common = $c->calculate_common_readings();
1352 is( scalar @common, 8, "Found correct number of common readings" );
1353 my @marked = sort $c->common_readings();
1354 is( scalar @common, 8, "All common readings got marked as such" );
1355 my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1356 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1357
1358 =end testing
1359
1360 =cut
1361
1362 sub calculate_common_readings {
1363         my $self = shift;
1364         my @common;
1365         my $table = $self->make_alignment_table( 1 );
1366         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1367                 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1368                 my %hash;
1369                 foreach my $r ( @row ) {
1370                         if( $r ) {
1371                                 $hash{$r->id} = $r unless $r->is_meta;
1372                         } else {
1373                                 $hash{'UNDEF'} = $r;
1374                         }
1375                 }
1376                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1377                         my( $r ) = values %hash;
1378                         $r->is_common( 1 );
1379                         push( @common, $r );
1380                 }
1381         }
1382         return @common;
1383 }
1384
1385 =head2 text_from_paths
1386
1387 Calculate the text array for all witnesses from the path, for later consistency
1388 checking.  Only to be used if there is no non-graph-based way to know the
1389 original texts.
1390
1391 =cut
1392
1393 sub text_from_paths {
1394         my $self = shift;
1395     foreach my $wit ( $self->tradition->witnesses ) {
1396         my @text = split( /\s+/,
1397                 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1398         $wit->text( \@text );
1399         if( $wit->is_layered ) {
1400                         my @uctext = split( /\s+/,
1401                                 $self->reading_sequence( $self->start, $self->end,
1402                                         $wit->sigil.$self->ac_label ) );
1403                         $wit->text( \@uctext );
1404         }
1405     }
1406 }
1407
1408 =head1 UTILITY FUNCTIONS
1409
1410 =head2 common_predecessor( $reading_a, $reading_b )
1411
1412 Find the last reading that occurs in sequence before both the given readings.
1413
1414 =head2 common_successor( $reading_a, $reading_b )
1415
1416 Find the first reading that occurs in sequence after both the given readings.
1417
1418 =begin testing
1419
1420 use Text::Tradition;
1421
1422 my $cxfile = 't/data/Collatex-16.xml';
1423 my $t = Text::Tradition->new(
1424     'name'  => 'inline',
1425     'input' => 'CollateX',
1426     'file'  => $cxfile,
1427     );
1428 my $c = $t->collation;
1429
1430 is( $c->common_predecessor( 'n9', 'n23' )->id,
1431     'n20', "Found correct common predecessor" );
1432 is( $c->common_successor( 'n9', 'n23' )->id,
1433     '#END#', "Found correct common successor" );
1434
1435 is( $c->common_predecessor( 'n19', 'n17' )->id,
1436     'n16', "Found correct common predecessor for readings on same path" );
1437 is( $c->common_successor( 'n21', 'n26' )->id,
1438     '#END#', "Found correct common successor for readings on same path" );
1439
1440 =end testing
1441
1442 =cut
1443
1444 ## Return the closest reading that is a predecessor of both the given readings.
1445 sub common_predecessor {
1446         my $self = shift;
1447         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1448         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1449 }
1450
1451 sub common_successor {
1452         my $self = shift;
1453         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1454         return $self->_common_in_path( $r1, $r2, 'successors' );
1455 }
1456
1457 sub _common_in_path {
1458         my( $self, $r1, $r2, $dir ) = @_;
1459         my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1460         $iter = $self->end->rank - $iter if $dir eq 'successors';
1461         my @candidates;
1462         my @last_checked = ( $r1, $r2 );
1463         my %all_seen;
1464         while( !@candidates ) {
1465                 my @new_lc;
1466                 foreach my $lc ( @last_checked ) {
1467                         foreach my $p ( $lc->$dir ) {
1468                                 if( $all_seen{$p->id} ) {
1469                                         push( @candidates, $p );
1470                                 } else {
1471                                         $all_seen{$p->id} = 1;
1472                                         push( @new_lc, $p );
1473                                 }
1474                         }
1475                 }
1476                 @last_checked = @new_lc;
1477         }
1478         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1479         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1480 }
1481
1482 sub throw {
1483         Text::Tradition::Error->throw(
1484                 'ident' => 'Collation error',
1485                 'message' => $_[0],
1486                 );
1487 }
1488
1489 no Moose;
1490 __PACKAGE__->meta->make_immutable;
1491
1492 =head1 LICENSE
1493
1494 This package is free software and is provided "as is" without express
1495 or implied warranty.  You can redistribute it and/or modify it under
1496 the same terms as Perl itself.
1497
1498 =head1 AUTHOR
1499
1500 Tara L Andrews E<lt>aurum@cpan.orgE<gt>