lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Encode qw( decode_utf8 );
   4 use File::Temp;
   5 use Graph;
   6 use IPC::Run qw( run binary );
   7 use Text::CSV_XS;
   8 use Text::Tradition::Collation::Reading;
   9 use Text::Tradition::Collation::RelationshipStore;
  10 use Text::Tradition::Error;
  11 use XML::LibXML;
  12 use XML::LibXML::XPathContext;
  13 use Moose;
  14
  15 has 'sequence' => (
  16     is => 'ro',
  17     isa => 'Graph',
  18     default => sub { Graph->new() },
  19     handles => {
  20         paths => 'edges',
  21     },
  22     );
  23
  24 has 'relations' => (
  25         is => 'ro',
  26         isa => 'Text::Tradition::Collation::RelationshipStore',
  27         handles => {
  28                 relationships => 'relationships',
  29                 related_readings => 'related_readings',
  30         },
  31         writer => '_set_relations',
  32         );
  33
  34 has 'tradition' => (
  35     is => 'ro',
  36     isa => 'Text::Tradition',
  37     weak_ref => 1,
  38     );
  39
  40 has 'readings' => (
  41         isa => 'HashRef[Text::Tradition::Collation::Reading]',
  42         traits => ['Hash'],
  43     handles => {
  44         reading     => 'get',
  45         _add_reading => 'set',
  46         del_reading => 'delete',
  47         has_reading => 'exists',
  48         readings   => 'values',
  49     },
  50     default => sub { {} },
  51         );
  52
  53 has 'wit_list_separator' => (
  54     is => 'rw',
  55     isa => 'Str',
  56     default => ', ',
  57     );
  58
  59 has 'baselabel' => (
  60     is => 'rw',
  61     isa => 'Str',
  62     default => 'base text',
  63     );
  64
  65 has 'linear' => (
  66     is => 'rw',
  67     isa => 'Bool',
  68     default => 1,
  69     );
  70
  71 has 'ac_label' => (
  72     is => 'rw',
  73     isa => 'Str',
  74     default => ' (a.c.)',
  75     );
  76
  77 has 'start' => (
  78         is => 'ro',
  79         isa => 'Text::Tradition::Collation::Reading',
  80         writer => '_set_start',
  81         weak_ref => 1,
  82         );
  83
  84 has 'end' => (
  85         is => 'ro',
  86         isa => 'Text::Tradition::Collation::Reading',
  87         writer => '_set_end',
  88         weak_ref => 1,
  89         );
  90
  91 =head1 NAME
  92
  93 Text::Tradition::Collation - a software model for a text collation
  94
  95 =head1 SYNOPSIS
  96
  97   use Text::Tradition;
  98   my $t = Text::Tradition->new(
  99     'name' => 'this is a text',
 100     'input' => 'TEI',
 101     'file' => '/path/to/tei_parallel_seg_file.xml' );
 102
 103   my $c = $t->collation;
 104   my @readings = $c->readings;
 105   my @paths = $c->paths;
 106   my @relationships = $c->relationships;
 107
 108   my $svg_variant_graph = $t->collation->as_svg();
 109
 110 =head1 DESCRIPTION
 111
 112 Text::Tradition is a library for representation and analysis of collated
 113 texts, particularly medieval ones.  The Collation is the central feature of
 114 a Tradition, where the text, its sequence of readings, and its relationships
 115 between readings are actually kept.
 116
 117 =head1 CONSTRUCTOR
 118
 119 =head2 new
 120
 121 The constructor.  Takes a hash or hashref of the following arguments:
 122
 123 =over
 124
 125 =item * tradition - The Text::Tradition object to which the collation
 126 belongs. Required.
 127
 128 =item * linear - Whether the collation should be linear; that is, whether
 129 transposed readings should be treated as two linked readings rather than one,
 130 and therefore whether the collation graph is acyclic.  Defaults to true.
 131
 132 =item * baselabel - The default label for the path taken by a base text
 133 (if any). Defaults to 'base text'.
 134
 135 =item * wit_list_separator - The string to join a list of witnesses for
 136 purposes of making labels in display graphs.  Defaults to ', '.
 137
 138 =item * ac_label - The extra label to tack onto a witness sigil when
 139 representing another layer of path for the given witness - that is, when
 140 a text has more than one possible reading due to scribal corrections or
 141 the like.  Defaults to ' (a.c.)'.
 142
 143 =back
 144
 145 =head1 ACCESSORS
 146
 147 =head2 tradition
 148
 149 =head2 linear
 150
 151 =head2 wit_list_separator
 152
 153 =head2 baselabel
 154
 155 =head2 ac_label
 156
 157 Simple accessors for collation attributes.
 158
 159 =head2 start
 160
 161 The meta-reading at the start of every witness path.
 162
 163 =head2 end
 164
 165 The meta-reading at the end of every witness path.
 166
 167 =head2 readings
 168
 169 Returns all Reading objects in the graph.
 170
 171 =head2 reading( $id )
 172
 173 Returns the Reading object corresponding to the given ID.
 174
 175 =head2 add_reading( $reading_args )
 176
 177 Adds a new reading object to the collation.
 178 See L<Text::Tradition::Collation::Reading> for the available arguments.
 179
 180 =head2 del_reading( $object_or_id )
 181
 182 Removes the given reading from the collation, implicitly removing its
 183 paths and relationships.
 184
 185 =head2 merge_readings( $main, $second )
 186
 187 Merges the $second reading into the $main one.
 188 The arguments may be either readings or reading IDs.
 189
 190 =head2 has_reading( $id )
 191
 192 Predicate to see whether a given reading ID is in the graph.
 193
 194 =head2 reading_witnesses( $object_or_id )
 195
 196 Returns a list of sigils whose witnesses contain the reading.
 197
 198 =head2 paths
 199
 200 Returns all reading paths within the document - that is, all edges in the
 201 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 202
 203 =head2 add_path( $source, $target, $sigil )
 204
 205 Links the given readings in the collation in sequence, under the given witness
 206 sigil.  The readings may be specified by object or ID.
 207
 208 =head2 del_path( $source, $target, $sigil )
 209
 210 Links the given readings in the collation in sequence, under the given witness
 211 sigil.  The readings may be specified by object or ID.
 212
 213 =head2 has_path( $source, $target );
 214
 215 Returns true if the two readings are linked in sequence in any witness.
 216 The readings may be specified by object or ID.
 217
 218 =head2 relationships
 219
 220 Returns all Relationship objects in the collation.
 221
 222 =head2 add_relationship( $reading, $other_reading, $options )
 223
 224 Adds a new relationship of the type given in $options between the two readings,
 225 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 226 where $status is true on success, and @vectors is a list of relationship edges
 227 that were ultimately added.
 228 See L<Text::Tradition::Collation::Relationship> for the available options.
 229
 230 =cut
 231
 232 sub BUILD {
 233     my $self = shift;
 234     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 235     $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
 236     $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 237 }
 238
 239 ### Reading construct/destruct functions
 240
 241 sub add_reading {
 242         my( $self, $reading ) = @_;
 243         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 244                 my %args = %$reading;
 245                 $reading = Text::Tradition::Collation::Reading->new(
 246                         'collation' => $self,
 247                         %args );
 248         }
 249         # First check to see if a reading with this ID exists.
 250         if( $self->reading( $reading->id ) ) {
 251                 throw( "Collation already has a reading with id " . $reading->id );
 252         }
 253         $self->_add_reading( $reading->id => $reading );
 254         # Once the reading has been added, put it in both graphs.
 255         $self->sequence->add_vertex( $reading->id );
 256         $self->relations->add_reading( $reading->id );
 257         return $reading;
 258 };
 259
 260 around del_reading => sub {
 261         my $orig = shift;
 262         my $self = shift;
 263         my $arg = shift;
 264
 265         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 266                 $arg = $arg->id;
 267         }
 268         # Remove the reading from the graphs.
 269         $self->sequence->delete_vertex( $arg );
 270         $self->relations->delete_reading( $arg );
 271
 272         # Carry on.
 273         $self->$orig( $arg );
 274 };
 275
 276 # merge_readings( $main, $to_be_deleted );
 277
 278 sub merge_readings {
 279         my $self = shift;
 280
 281         # We only need the IDs for adding paths to the graph, not the reading
 282         # objects themselves.
 283     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
 284
 285     # The kept reading should inherit the paths and the relationships
 286     # of the deleted reading.
 287         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 288                 my @vector = ( $kept );
 289                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 290                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 291                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 292                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 293                 $self->sequence->add_edge( @vector );
 294                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 295                 @wits{keys %$fwits} = values %$fwits;
 296                 $self->sequence->set_edge_attributes( @vector, \%wits );
 297         }
 298         $self->relations->merge_readings( $kept, $deleted, $combine_char );
 299
 300         # Do the deletion deed.
 301         if( $combine_char ) {
 302                 my $kept_obj = $self->reading( $kept );
 303                 my $new_text = join( $combine_char, $kept_obj->text,
 304                         $self->reading( $deleted )->text );
 305                 $kept_obj->alter_text( $new_text );
 306         }
 307         $self->del_reading( $deleted );
 308 }
 309
 310
 311 # Helper function for manipulating the graph.
 312 sub _stringify_args {
 313         my( $self, $first, $second, $arg ) = @_;
 314     $first = $first->id
 315         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 316     $second = $second->id
 317         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 318     return( $first, $second, $arg );
 319 }
 320
 321 # Helper function for manipulating the graph.
 322 sub _objectify_args {
 323         my( $self, $first, $second, $arg ) = @_;
 324     $first = $self->reading( $first )
 325         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 326     $second = $self->reading( $second )
 327         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 328     return( $first, $second, $arg );
 329 }
 330 ### Path logic
 331
 332 sub add_path {
 333         my $self = shift;
 334
 335         # We only need the IDs for adding paths to the graph, not the reading
 336         # objects themselves.
 337     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 338
 339         # Connect the readings
 340     $self->sequence->add_edge( $source, $target );
 341     # Note the witness in question
 342     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 343 };
 344
 345 sub del_path {
 346         my $self = shift;
 347         my @args;
 348         if( ref( $_[0] ) eq 'ARRAY' ) {
 349                 my $e = shift @_;
 350                 @args = ( @$e, @_ );
 351         } else {
 352                 @args = @_;
 353         }
 354
 355         # We only need the IDs for adding paths to the graph, not the reading
 356         # objects themselves.
 357     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 358
 359         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 360                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 361         }
 362         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 363                 $self->sequence->delete_edge( $source, $target );
 364         }
 365 }
 366
 367
 368 # Extra graph-alike utility
 369 sub has_path {
 370         my $self = shift;
 371     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 372         return undef unless $self->sequence->has_edge( $source, $target );
 373         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 374 }
 375
 376 =head2 clear_witness( @sigil_list )
 377
 378 Clear the given witnesses out of the collation entirely, removing references
 379 to them in paths, and removing readings that belong only to them.  Should only
 380 be called via $tradition->del_witness.
 381
 382 =cut
 383
 384 sub clear_witness {
 385         my( $self, @sigils ) = @_;
 386
 387         # Clear the witness(es) out of the paths
 388         foreach my $e ( $self->paths ) {
 389                 foreach my $sig ( @sigils ) {
 390                         $self->del_path( $e, $sig );
 391                 }
 392         }
 393
 394         # Clear out the newly unused readings
 395         foreach my $r ( $self->readings ) {
 396                 unless( $self->reading_witnesses( $r ) ) {
 397                         $self->del_reading( $r );
 398                 }
 399         }
 400 }
 401
 402 sub add_relationship {
 403         my $self = shift;
 404     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 405     my( @vectors ) = $self->relations->add_relationship( $source,
 406         $self->reading( $source ), $target, $self->reading( $target ), $opts );
 407     # Force a full rank recalculation every time. Yuck.
 408     $self->calculate_ranks() if $self->end->has_rank;
 409     return @vectors;
 410 }
 411
 412 =head2 reading_witnesses( $reading )
 413
 414 Return a list of sigils corresponding to the witnesses in which the reading appears.
 415
 416 =cut
 417
 418 sub reading_witnesses {
 419         my( $self, $reading ) = @_;
 420         # We need only check either the incoming or the outgoing edges; I have
 421         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 422         if( $reading eq $self->start ) {
 423                 return map { $_->sigil } $self->tradition->witnesses;
 424         }
 425         my %all_witnesses;
 426         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 427                 my $wits = $self->sequence->get_edge_attributes( @$e );
 428                 @all_witnesses{ keys %$wits } = 1;
 429         }
 430         return keys %all_witnesses;
 431 }
 432
 433 =head1 OUTPUT METHODS
 434
 435 =head2 as_svg( \%options )
 436
 437 Returns an SVG string that represents the graph, via as_dot and graphviz.
 438 See as_dot for a list of options.
 439
 440 =cut
 441
 442 sub as_svg {
 443     my( $self, $opts ) = @_;
 444
 445     my @cmd = qw/dot -Tsvg/;
 446     my( $svg, $err );
 447     my $dotfile = File::Temp->new();
 448     ## USE FOR DEBUGGING
 449     # $dotfile->unlink_on_destroy(0);
 450     binmode $dotfile, ':utf8';
 451     print $dotfile $self->as_dot( $opts );
 452     push( @cmd, $dotfile->filename );
 453     run( \@cmd, ">", binary(), \$svg );
 454     return decode_utf8( $svg );
 455 }
 456
 457
 458 =head2 as_dot( \%options )
 459
 460 Returns a string that is the collation graph expressed in dot
 461 (i.e. GraphViz) format.  Options include:
 462
 463 =over 4
 464
 465 =item * from
 466
 467 =item * to
 468
 469 =item * color_common
 470
 471 =back
 472
 473 =cut
 474
 475 sub as_dot {
 476     my( $self, $opts ) = @_;
 477     my $startrank = $opts->{'from'} if $opts;
 478     my $endrank = $opts->{'to'} if $opts;
 479     my $color_common = $opts->{'color_common'} if $opts;
 480     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 481         && $self->end->rank > 100;
 482
 483     # Check the arguments
 484     if( $startrank ) {
 485         return if $endrank && $startrank > $endrank;
 486         return if $startrank > $self->end->rank;
 487         }
 488         if( defined $endrank ) {
 489                 return if $endrank < 0;
 490                 $endrank = undef if $endrank == $self->end->rank;
 491         }
 492
 493     # TODO consider making some of these things configurable
 494     my $graph_name = $self->tradition->name;
 495     $graph_name =~ s/[^\w\s]//g;
 496     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 497
 498     my %graph_attrs = (
 499         'rankdir' => 'LR',
 500         'bgcolor' => 'none',
 501         );
 502     my %node_attrs = (
 503         'fontsize' => 11,
 504         'fillcolor' => 'white',
 505         'style' => 'filled',
 506         'shape' => 'ellipse'
 507         );
 508     my %edge_attrs = (
 509         'arrowhead' => 'open',
 510         'color' => '#000000',
 511         'fontcolor' => '#000000',
 512         );
 513
 514     my $dot = sprintf( "digraph %s {\n", $graph_name );
 515     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 516     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 517
 518         # Output substitute start/end readings if necessary
 519         if( $startrank ) {
 520                 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
 521         }
 522         if( $endrank ) {
 523                 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
 524         }
 525         if( $STRAIGHTENHACK ) {
 526                 ## HACK part 1
 527                 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
 528                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 529         }
 530         my %used;  # Keep track of the readings that actually appear in the graph
 531     foreach my $reading ( $self->readings ) {
 532         # Only output readings within our rank range.
 533         next if $startrank && $reading->rank < $startrank;
 534         next if $endrank && $reading->rank > $endrank;
 535         $used{$reading->id} = 1;
 536         # Need not output nodes without separate labels
 537         next if $reading->id eq $reading->text;
 538         my $rattrs;
 539         my $label = $reading->text;
 540         $label =~ s/\"/\\\"/g;
 541                 $rattrs->{'label'} = $label;
 542                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 543         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 544     }
 545
 546         # Add the real edges
 547     my @edges = $self->paths;
 548         my( %substart, %subend );
 549     foreach my $edge ( @edges ) {
 550         # Do we need to output this edge?
 551         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 552                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 553                         my $variables = { %edge_attrs, 'label' => $label };
 554                         # Account for the rank gap if necessary
 555                         if( $self->reading( $edge->[1] )->has_rank
 556                                 && $self->reading( $edge->[0] )->has_rank
 557                                 && $self->reading( $edge->[1] )->rank
 558                                 - $self->reading( $edge->[0] )->rank > 1 ) {
 559                                 $variables->{'minlen'} = $self->reading( $edge->[1] )->rank
 560                                 - $self->reading( $edge->[0] )->rank;
 561                         }
 562                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 563                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 564                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 565
 566                         my $varopts = _dot_attr_string( $variables );
 567                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 568                                 $edge->[0], $edge->[1], $varopts );
 569         } elsif( $used{$edge->[0]} ) {
 570                 $subend{$edge->[0]} = 1;
 571         } elsif( $used{$edge->[1]} ) {
 572                 $substart{$edge->[1]} = 1;
 573         }
 574     }
 575     # Add substitute start and end edges if necessary
 576     foreach my $node ( keys %substart ) {
 577         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 578         my $variables = { %edge_attrs, 'label' => $witstr };
 579         my $varopts = _dot_attr_string( $variables );
 580         $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
 581         }
 582     foreach my $node ( keys %subend ) {
 583         my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
 584         my $variables = { %edge_attrs, 'label' => $witstr };
 585         my $varopts = _dot_attr_string( $variables );
 586         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 587         }
 588         # HACK part 2
 589         if( $STRAIGHTENHACK ) {
 590                 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 591         }
 592
 593     $dot .= "}\n";
 594     return $dot;
 595 }
 596
 597 sub _dot_attr_string {
 598         my( $hash ) = @_;
 599         my @attrs;
 600         foreach my $k ( sort keys %$hash ) {
 601                 my $v = $hash->{$k};
 602                 push( @attrs, $k.'="'.$v.'"' );
 603         }
 604         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 605 }
 606
 607 =head2 path_witnesses( $edge )
 608
 609 Returns the list of sigils whose witnesses are associated with the given edge.
 610 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 611
 612 =cut
 613
 614 sub path_witnesses {
 615         my( $self, @edge ) = @_;
 616         # If edge is an arrayref, cope.
 617         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 618                 my $e = shift @edge;
 619                 @edge = @$e;
 620         }
 621         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 622         return @wits;
 623 }
 624
 625 sub _path_display_label {
 626         my $self = shift;
 627         my @wits = sort @_;
 628         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 629         if( scalar @wits > $maj ) {
 630                 # TODO break out a.c. wits
 631                 return 'majority';
 632         } else {
 633                 return join( ', ', @wits );
 634         }
 635 }
 636
 637
 638 =head2 as_graphml
 639
 640 Returns a GraphML representation of the collation.  The GraphML will contain
 641 two graphs. The first expresses the attributes of the readings and the witness
 642 paths that link them; the second expresses the relationships that link the
 643 readings.  This is the native transfer format for a tradition.
 644
 645 =begin testing
 646
 647 use Text::Tradition;
 648
 649 my $READINGS = 311;
 650 my $PATHS = 361;
 651
 652 my $datafile = 't/data/florilegium_tei_ps.xml';
 653 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 654                                       'name' => 'test0',
 655                                       'file' => $datafile,
 656                                       'linear' => 1 );
 657
 658 ok( $tradition, "Got a tradition object" );
 659 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 660 ok( $tradition->collation, "Tradition has a collation" );
 661
 662 my $c = $tradition->collation;
 663 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 664 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 665 is( scalar $c->relationships, 0, "Collation has all relationships" );
 666
 667 # Add a few relationships
 668 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 669 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 670 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 671
 672 # Now write it to GraphML and parse it again.
 673
 674 my $graphml = $c->as_graphml;
 675 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 676 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 677 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 678 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 679
 680 =end testing
 681
 682 =cut
 683
 684 sub as_graphml {
 685     my( $self ) = @_;
 686
 687     # Some namespaces
 688     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 689     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 690     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 691         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 692
 693     # Create the document and root node
 694     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 695     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 696     $graphml->setDocumentElement( $root );
 697     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 698     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 699
 700     # Add the data keys for the graph
 701     my %graph_data_keys;
 702     my $gdi = 0;
 703     my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
 704     foreach my $datum ( @graph_attributes ) {
 705         $graph_data_keys{$datum} = 'dg'.$gdi++;
 706         my $key = $root->addNewChild( $graphml_ns, 'key' );
 707         $key->setAttribute( 'attr.name', $datum );
 708         $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
 709         $key->setAttribute( 'for', 'graph' );
 710         $key->setAttribute( 'id', $graph_data_keys{$datum} );
 711     }
 712
 713     # Add the data keys for nodes
 714     my %node_data_keys;
 715     my $ndi = 0;
 716     my %node_data = (
 717         id => 'string',
 718         text => 'string',
 719         rank => 'string',
 720         is_start => 'boolean',
 721         is_end => 'boolean',
 722         is_lacuna => 'boolean',
 723         );
 724     foreach my $datum ( keys %node_data ) {
 725         $node_data_keys{$datum} = 'dn'.$ndi++;
 726         my $key = $root->addNewChild( $graphml_ns, 'key' );
 727         $key->setAttribute( 'attr.name', $datum );
 728         $key->setAttribute( 'attr.type', $node_data{$datum} );
 729         $key->setAttribute( 'for', 'node' );
 730         $key->setAttribute( 'id', $node_data_keys{$datum} );
 731     }
 732
 733     # Add the data keys for edges, i.e. witnesses
 734     my $edi = 0;
 735     my %edge_data_keys;
 736     my %edge_data = (
 737         class => 'string',                              # Class, deprecated soon
 738         witness => 'string',                    # ID/label for a path
 739         relationship => 'string',               # ID/label for a relationship
 740         extra => 'boolean',                             # Path key
 741         scope => 'string',                              # Relationship key
 742         non_correctable => 'boolean',   # Relationship key
 743         non_independent => 'boolean',   # Relationship key
 744         );
 745     foreach my $datum ( keys %edge_data ) {
 746         $edge_data_keys{$datum} = 'de'.$edi++;
 747         my $key = $root->addNewChild( $graphml_ns, 'key' );
 748         $key->setAttribute( 'attr.name', $datum );
 749         $key->setAttribute( 'attr.type', $edge_data{$datum} );
 750         $key->setAttribute( 'for', 'edge' );
 751         $key->setAttribute( 'id', $edge_data_keys{$datum} );
 752     }
 753
 754     # Add the collation graph itself
 755     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
 756     $sgraph->setAttribute( 'edgedefault', 'directed' );
 757     $sgraph->setAttribute( 'id', $self->tradition->name );
 758     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
 759     $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
 760     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
 761     $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
 762     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
 763
 764     # Collation attribute data
 765     foreach my $datum ( @graph_attributes ) {
 766         my $value = $datum eq 'version' ? '3.0' : $self->$datum;
 767                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
 768         }
 769
 770     my $node_ctr = 0;
 771     my %node_hash;
 772     # Add our readings to the graph
 773     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
 774         # Add to the main graph
 775         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
 776         my $node_xmlid = 'n' . $node_ctr++;
 777         $node_hash{ $n->id } = $node_xmlid;
 778         $node_el->setAttribute( 'id', $node_xmlid );
 779         foreach my $d ( keys %node_data ) {
 780                 my $nval = $n->$d;
 781                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
 782                         if defined $nval;
 783         }
 784     }
 785
 786     # Add the path edges to the sequence graph
 787     my $edge_ctr = 0;
 788     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
 789         # We add an edge in the graphml for every witness in $e.
 790         foreach my $wit ( sort $self->path_witnesses( $e ) ) {
 791                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
 792                                                                                 $node_hash{ $e->[0] },
 793                                                                                 $node_hash{ $e->[1] } );
 794                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
 795                         $edge_el->setAttribute( 'source', $from );
 796                         $edge_el->setAttribute( 'target', $to );
 797                         $edge_el->setAttribute( 'id', $id );
 798
 799                         # It's a witness path, so add the witness
 800                         my $base = $wit;
 801                         my $key = $edge_data_keys{'witness'};
 802                         # Is this an ante-corr witness?
 803                         my $aclabel = $self->ac_label;
 804                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
 805                                 # Keep the base witness
 806                                 $base = $1;
 807                                 # ...and record that this is an 'extra' reading path
 808                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
 809                         }
 810                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
 811                         _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
 812                 }
 813         }
 814
 815         # Add the relationship graph to the XML
 816         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
 817                 $node_data_keys{'id'}, \%edge_data_keys );
 818
 819     # Save and return the thing
 820     my $result = decode_utf8( $graphml->toString(1) );
 821     return $result;
 822 }
 823
 824 sub _add_graphml_data {
 825     my( $el, $key, $value ) = @_;
 826     return unless defined $value;
 827     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
 828     $data_el->setAttribute( 'key', $key );
 829     $data_el->appendText( $value );
 830 }
 831
 832 =head2 as_csv
 833
 834 Returns a CSV alignment table representation of the collation graph, one
 835 row per witness (or witness uncorrected.)
 836
 837 =cut
 838
 839 sub as_csv {
 840     my( $self ) = @_;
 841     my $table = $self->make_alignment_table;
 842     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
 843     my @result;
 844     # Make the header row
 845     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
 846         push( @result, decode_utf8( $csv->string ) );
 847     # Make the rest of the rows
 848     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
 849         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
 850         my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
 851         $csv->combine( @row );
 852         push( @result, decode_utf8( $csv->string ) );
 853     }
 854     return join( "\n", @result );
 855 }
 856
 857 =head2 make_alignment_table( $use_refs, $include_witnesses )
 858
 859 Return a reference to an alignment table, in a slightly enhanced CollateX
 860 format which looks like this:
 861
 862  $table = { alignment => [ { witness => "SIGIL",
 863                              tokens => [ { t => "TEXT" }, ... ] },
 864                            { witness => "SIG2",
 865                              tokens => [ { t => "TEXT" }, ... ] },
 866                            ... ],
 867             length => TEXTLEN };
 868
 869 If $use_refs is set to 1, the reading object is returned in the table
 870 instead of READINGTEXT; if not, the text of the reading is returned.
 871
 872 If $include_witnesses is set to a hashref, only the witnesses whose sigil
 873 keys have a true hash value will be included.
 874
 875 =cut
 876
 877 sub make_alignment_table {
 878     my( $self, $noderefs, $include ) = @_;
 879     # Make sure we can do this
 880         throw( "Need a linear graph in order to make an alignment table" )
 881                 unless $self->linear;
 882         $self->calculate_ranks unless $self->end->has_rank;
 883
 884     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
 885     my @all_pos = ( 1 .. $self->end->rank - 1 );
 886     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
 887         if( $include ) {
 888                 next unless $include->{$wit->sigil};
 889         }
 890         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
 891         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
 892         my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
 893         push( @{$table->{'alignment'}},
 894                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
 895         if( $wit->is_layered ) {
 896                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
 897                         $wit->sigil.$self->ac_label );
 898             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
 899                         push( @{$table->{'alignment'}},
 900                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
 901         }
 902     }
 903         return $table;
 904 }
 905
 906 sub _make_witness_row {
 907     my( $path, $positions, $noderefs ) = @_;
 908     my %char_hash;
 909     map { $char_hash{$_} = undef } @$positions;
 910     my $debug = 0;
 911     foreach my $rdg ( @$path ) {
 912         my $rtext = $rdg->text;
 913         $rtext = '#LACUNA#' if $rdg->is_lacuna;
 914         print STDERR "rank " . $rdg->rank . "\n" if $debug;
 915         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
 916         $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
 917                                                                            : { 't' => $rtext };
 918     }
 919     my @row = map { $char_hash{$_} } @$positions;
 920     # Fill in lacuna markers for undef spots in the row
 921     my $last_el = shift @row;
 922     my @filled_row = ( $last_el );
 923     foreach my $el ( @row ) {
 924         # If we are using node reference, make the lacuna node appear many times
 925         # in the table.  If not, use the lacuna tag.
 926         if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
 927             $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
 928         }
 929         push( @filled_row, $el );
 930         $last_el = $el;
 931     }
 932     return @filled_row;
 933 }
 934
 935 # Tiny utility function to say if a table element is a lacuna
 936 sub _el_is_lacuna {
 937     my $el = shift;
 938     return 1 if $el->{'t'} eq '#LACUNA#';
 939     return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
 940         && $el->{'t'}->is_lacuna;
 941     return 0;
 942 }
 943
 944 # Helper to turn the witnesses along columns rather than rows.  Assumes
 945 # equal-sized rows.
 946 sub _turn_table {
 947     my( $table ) = @_;
 948     my $result = [];
 949     return $result unless scalar @$table;
 950     my $nrows = scalar @{$table->[0]};
 951     foreach my $idx ( 0 .. $nrows - 1 ) {
 952         foreach my $wit ( 0 .. $#{$table} ) {
 953             $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
 954         }
 955     }
 956     return $result;
 957 }
 958
 959 =head1 NAVIGATION METHODS
 960
 961 =head2 reading_sequence( $first, $last, $sigil, $backup )
 962
 963 Returns the ordered list of readings, starting with $first and ending
 964 with $last, for the witness given in $sigil. If a $backup sigil is
 965 specified (e.g. when walking a layered witness), it will be used wherever
 966 no $sigil path exists.  If there is a base text reading, that will be
 967 used wherever no path exists for $sigil or $backup.
 968
 969 =cut
 970
 971 # TODO Think about returning some lazy-eval iterator.
 972 # TODO Get rid of backup; we should know from what witness is whether we need it.
 973
 974 sub reading_sequence {
 975     my( $self, $start, $end, $witness ) = @_;
 976
 977     $witness = $self->baselabel unless $witness;
 978     my @readings = ( $start );
 979     my %seen;
 980     my $n = $start;
 981     while( $n && $n->id ne $end->id ) {
 982         if( exists( $seen{$n->id} ) ) {
 983             throw( "Detected loop for $witness at " . $n->id );
 984         }
 985         $seen{$n->id} = 1;
 986
 987         my $next = $self->next_reading( $n, $witness );
 988         unless( $next ) {
 989             throw( "Did not find any path for $witness from reading " . $n->id );
 990         }
 991         push( @readings, $next );
 992         $n = $next;
 993     }
 994     # Check that the last reading is our end reading.
 995     my $last = $readings[$#readings];
 996     throw( "Last reading found from " . $start->text .
 997         " for witness $witness is not the end!" ) # TODO do we get this far?
 998         unless $last->id eq $end->id;
 999
1000     return @readings;
1001 }
1002
1003 =head2 next_reading( $reading, $sigil );
1004
1005 Returns the reading that follows the given reading along the given witness
1006 path.
1007
1008 =cut
1009
1010 sub next_reading {
1011     # Return the successor via the corresponding path.
1012     my $self = shift;
1013     my $answer = $self->_find_linked_reading( 'next', @_ );
1014         return undef unless $answer;
1015     return $self->reading( $answer );
1016 }
1017
1018 =head2 prior_reading( $reading, $sigil )
1019
1020 Returns the reading that precedes the given reading along the given witness
1021 path.
1022
1023 =cut
1024
1025 sub prior_reading {
1026     # Return the predecessor via the corresponding path.
1027     my $self = shift;
1028     my $answer = $self->_find_linked_reading( 'prior', @_ );
1029     return $self->reading( $answer );
1030 }
1031
1032 sub _find_linked_reading {
1033     my( $self, $direction, $node, $path ) = @_;
1034
1035     # Get a backup if we are dealing with a layered witness
1036     my $alt_path;
1037     my $aclabel = $self->ac_label;
1038     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1039         $alt_path = $1;
1040     }
1041
1042     my @linked_paths = $direction eq 'next'
1043         ? $self->sequence->edges_from( $node )
1044         : $self->sequence->edges_to( $node );
1045     return undef unless scalar( @linked_paths );
1046
1047     # We have to find the linked path that contains all of the
1048     # witnesses supplied in $path.
1049     my( @path_wits, @alt_path_wits );
1050     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1051     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1052     my $base_le;
1053     my $alt_le;
1054     foreach my $le ( @linked_paths ) {
1055         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1056             $base_le = $le;
1057         }
1058                 my @le_wits = sort $self->path_witnesses( $le );
1059                 if( _is_within( \@path_wits, \@le_wits ) ) {
1060                         # This is the right path.
1061                         return $direction eq 'next' ? $le->[1] : $le->[0];
1062                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1063                         $alt_le = $le;
1064                 }
1065     }
1066     # Got this far? Return the alternate path if it exists.
1067     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1068         if $alt_le;
1069
1070     # Got this far? Return the base path if it exists.
1071     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1072         if $base_le;
1073
1074     # Got this far? We have no appropriate path.
1075     warn "Could not find $direction node from " . $node->id
1076         . " along path $path";
1077     return undef;
1078 }
1079
1080 # Some set logic.
1081 sub _is_within {
1082     my( $set1, $set2 ) = @_;
1083     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1084     foreach my $el ( @$set1 ) {
1085         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1086     }
1087     return $ret;
1088 }
1089
1090 # Return the string that joins together a list of witnesses for
1091 # display on a single path.
1092 sub _witnesses_of_label {
1093     my( $self, $label ) = @_;
1094     my $regex = $self->wit_list_separator;
1095     my @answer = split( /\Q$regex\E/, $label );
1096     return @answer;
1097 }
1098
1099 =head2 common_readings
1100
1101 Returns the list of common readings in the graph (i.e. those readings that are
1102 shared by all non-lacunose witnesses.)
1103
1104 =cut
1105
1106 sub common_readings {
1107         my $self = shift;
1108         my @common = grep { $_->is_common } $self->readings;
1109         return @common;
1110 }
1111
1112 =head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1113
1114 Returns the text of a witness (plus its backup, if we are using a layer)
1115 as stored in the collation.  The text is returned as a string, where the
1116 individual readings are joined with spaces and the meta-readings (e.g.
1117 lacunae) are omitted.  Optional specification of $start and $end allows
1118 the generation of a subset of the witness text.
1119
1120 =cut
1121
1122 sub path_text {
1123         my( $self, $wit, $start, $end ) = @_;
1124         $start = $self->start unless $start;
1125         $end = $self->end unless $end;
1126         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1127         return join( ' ', map { $_->text } @path );
1128 }
1129
1130 =head1 INITIALIZATION METHODS
1131
1132 These are mostly for use by parsers.
1133
1134 =head2 make_witness_path( $witness )
1135
1136 Link the array of readings contained in $witness->path (and in
1137 $witness->uncorrected_path if it exists) into collation paths.
1138 Clear out the arrays when finished.
1139
1140 =head2 make_witness_paths
1141
1142 Call make_witness_path for all witnesses in the tradition.
1143
1144 =cut
1145
1146 # For use when a collation is constructed from a base text and an apparatus.
1147 # We have the sequences of readings and just need to add path edges.
1148 # When we are done, clear out the witness path attributes, as they are no
1149 # longer needed.
1150 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1151
1152 sub make_witness_paths {
1153     my( $self ) = @_;
1154     foreach my $wit ( $self->tradition->witnesses ) {
1155         # print STDERR "Making path for " . $wit->sigil . "\n";
1156         $self->make_witness_path( $wit );
1157     }
1158 }
1159
1160 sub make_witness_path {
1161     my( $self, $wit ) = @_;
1162     my @chain = @{$wit->path};
1163     my $sig = $wit->sigil;
1164     foreach my $idx ( 0 .. $#chain-1 ) {
1165         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1166     }
1167     if( $wit->is_layered ) {
1168         @chain = @{$wit->uncorrected_path};
1169         foreach my $idx( 0 .. $#chain-1 ) {
1170             my $source = $chain[$idx];
1171             my $target = $chain[$idx+1];
1172             $self->add_path( $source, $target, $sig.$self->ac_label )
1173                 unless $self->has_path( $source, $target, $sig );
1174         }
1175     }
1176     $wit->clear_path;
1177     $wit->clear_uncorrected_path;
1178 }
1179
1180 =head2 calculate_ranks
1181
1182 Calculate the reading ranks (that is, their aligned positions relative
1183 to each other) for the graph.  This can only be called on linear collations.
1184
1185 =cut
1186
1187 sub calculate_ranks {
1188     my $self = shift;
1189     # Walk a version of the graph where every node linked by a relationship
1190     # edge is fundamentally the same node, and do a topological ranking on
1191     # the nodes in this graph.
1192     my $topo_graph = Graph->new();
1193     my %rel_containers;
1194     my $rel_ctr = 0;
1195     # Add the nodes
1196     foreach my $r ( $self->readings ) {
1197         next if exists $rel_containers{$r->id};
1198         my @rels = $r->related_readings( 'colocated' );
1199         if( @rels ) {
1200             # Make a relationship container.
1201             push( @rels, $r );
1202             my $rn = 'rel_container_' . $rel_ctr++;
1203             $topo_graph->add_vertex( $rn );
1204             foreach( @rels ) {
1205                 $rel_containers{$_->id} = $rn;
1206             }
1207         } else {
1208             # Add a new node to mirror the old node.
1209             $rel_containers{$r->id} = $r->id;
1210             $topo_graph->add_vertex( $r->id );
1211         }
1212     }
1213
1214     # Add the edges.
1215     foreach my $r ( $self->readings ) {
1216         foreach my $n ( $self->sequence->successors( $r->id ) ) {
1217                 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1218                         $rel_containers{$n} );
1219                 # $DB::single = 1 unless $tfrom && $tto;
1220             $topo_graph->add_edge( $tfrom, $tto );
1221         }
1222     }
1223
1224     # Now do the rankings, starting with the start node.
1225     my $topo_start = $rel_containers{$self->start->id};
1226     my $node_ranks = { $topo_start => 0 };
1227     my @curr_origin = ( $topo_start );
1228     # A little iterative function.
1229     while( @curr_origin ) {
1230         @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1231     }
1232     # Transfer our rankings from the topological graph to the real one.
1233     foreach my $r ( $self->readings ) {
1234         if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1235             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1236         } else {
1237                 # Die. Find the last rank we calculated.
1238                 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1239                                  <=> $node_ranks->{$rel_containers{$b->id}} }
1240                         $self->readings;
1241                 my $last = pop @all_defined;
1242             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1243         }
1244     }
1245 }
1246
1247 sub _assign_rank {
1248     my( $graph, $node_ranks, @current_nodes ) = @_;
1249     # Look at each of the children of @current_nodes.  If all the child's
1250     # parents have a rank, assign it the highest rank + 1 and add it to
1251     # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
1252     # parent gets a rank.
1253     my @next_nodes;
1254     foreach my $c ( @current_nodes ) {
1255         warn "Current reading $c has no rank!"
1256             unless exists $node_ranks->{$c};
1257         # print STDERR "Looking at child of node $c, rank "
1258         #     . $node_ranks->{$c} . "\n";
1259         foreach my $child ( $graph->successors( $c ) ) {
1260             next if exists $node_ranks->{$child};
1261             my $highest_rank = -1;
1262             my $skip = 0;
1263             foreach my $parent ( $graph->predecessors( $child ) ) {
1264                 if( exists $node_ranks->{$parent} ) {
1265                     $highest_rank = $node_ranks->{$parent}
1266                         if $highest_rank <= $node_ranks->{$parent};
1267                 } else {
1268                     $skip = 1;
1269                     last;
1270                 }
1271             }
1272             next if $skip;
1273             my $c_rank = $highest_rank + 1;
1274             # print STDERR "Assigning rank $c_rank to node $child \n";
1275             $node_ranks->{$child} = $c_rank;
1276             push( @next_nodes, $child );
1277         }
1278     }
1279     return @next_nodes;
1280 }
1281
1282 =head2 flatten_ranks
1283
1284 A convenience method for parsing collation data.  Searches the graph for readings
1285 with the same text at the same rank, and merges any that are found.
1286
1287 =cut
1288
1289 sub flatten_ranks {
1290     my $self = shift;
1291     my %unique_rank_rdg;
1292     foreach my $rdg ( $self->readings ) {
1293         next unless $rdg->has_rank;
1294         my $key = $rdg->rank . "||" . $rdg->text;
1295         if( exists $unique_rank_rdg{$key} ) {
1296             # Combine!
1297                 # print STDERR "Combining readings at same rank: $key\n";
1298             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1299             # TODO see if this now makes a common point.
1300         } else {
1301             $unique_rank_rdg{$key} = $rdg;
1302         }
1303     }
1304 }
1305
1306 =head2 calculate_common_readings
1307
1308 Goes through the graph identifying the readings that appear in every witness
1309 (apart from those with lacunae at that spot.) Marks them as common and returns
1310 the list.
1311
1312 =begin testing
1313
1314 use Text::Tradition;
1315
1316 my $cxfile = 't/data/Collatex-16.xml';
1317 my $t = Text::Tradition->new(
1318     'name'  => 'inline',
1319     'input' => 'CollateX',
1320     'file'  => $cxfile,
1321     );
1322 my $c = $t->collation;
1323
1324 my @common = $c->calculate_common_readings();
1325 is( scalar @common, 8, "Found correct number of common readings" );
1326 my @marked = sort $c->common_readings();
1327 is( scalar @common, 8, "All common readings got marked as such" );
1328 my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1329 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1330
1331 =end testing
1332
1333 =cut
1334
1335 sub calculate_common_readings {
1336         my $self = shift;
1337         my @common;
1338         my $table = $self->make_alignment_table( 1 );
1339         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1340                 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1341                 my %hash;
1342                 foreach my $r ( @row ) {
1343                         if( $r ) {
1344                                 $hash{$r->id} = $r unless $r->is_meta;
1345                         } else {
1346                                 $hash{'UNDEF'} = $r;
1347                         }
1348                 }
1349                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1350                         my( $r ) = values %hash;
1351                         $r->is_common( 1 );
1352                         push( @common, $r );
1353                 }
1354         }
1355         return @common;
1356 }
1357
1358 =head2 text_from_paths
1359
1360 Calculate the text array for all witnesses from the path, for later consistency
1361 checking.  Only to be used if there is no non-graph-based way to know the
1362 original texts.
1363
1364 =cut
1365
1366 sub text_from_paths {
1367         my $self = shift;
1368     foreach my $wit ( $self->tradition->witnesses ) {
1369         my @text = split( /\s+/,
1370                 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1371         $wit->text( \@text );
1372         if( $wit->is_layered ) {
1373                         my @uctext = split( /\s+/,
1374                                 $self->reading_sequence( $self->start, $self->end,
1375                                         $wit->sigil.$self->ac_label ) );
1376                         $wit->text( \@uctext );
1377         }
1378     }
1379 }
1380
1381 =head1 UTILITY FUNCTIONS
1382
1383 =head2 common_predecessor( $reading_a, $reading_b )
1384
1385 Find the last reading that occurs in sequence before both the given readings.
1386
1387 =head2 common_successor( $reading_a, $reading_b )
1388
1389 Find the first reading that occurs in sequence after both the given readings.
1390
1391 =begin testing
1392
1393 use Text::Tradition;
1394
1395 my $cxfile = 't/data/Collatex-16.xml';
1396 my $t = Text::Tradition->new(
1397     'name'  => 'inline',
1398     'input' => 'CollateX',
1399     'file'  => $cxfile,
1400     );
1401 my $c = $t->collation;
1402
1403 is( $c->common_predecessor( 'n9', 'n23' )->id,
1404     'n20', "Found correct common predecessor" );
1405 is( $c->common_successor( 'n9', 'n23' )->id,
1406     '#END#', "Found correct common successor" );
1407
1408 is( $c->common_predecessor( 'n19', 'n17' )->id,
1409     'n16', "Found correct common predecessor for readings on same path" );
1410 is( $c->common_successor( 'n21', 'n26' )->id,
1411     '#END#', "Found correct common successor for readings on same path" );
1412
1413 =end testing
1414
1415 =cut
1416
1417 ## Return the closest reading that is a predecessor of both the given readings.
1418 sub common_predecessor {
1419         my $self = shift;
1420         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1421         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1422 }
1423
1424 sub common_successor {
1425         my $self = shift;
1426         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1427         return $self->_common_in_path( $r1, $r2, 'successors' );
1428 }
1429
1430 sub _common_in_path {
1431         my( $self, $r1, $r2, $dir ) = @_;
1432         my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1433         $iter = $self->end->rank - $iter if $dir eq 'successors';
1434         my @candidates;
1435         my @last_checked = ( $r1, $r2 );
1436         my %all_seen;
1437         while( !@candidates ) {
1438                 my @new_lc;
1439                 foreach my $lc ( @last_checked ) {
1440                         foreach my $p ( $lc->$dir ) {
1441                                 if( $all_seen{$p->id} ) {
1442                                         push( @candidates, $p );
1443                                 } else {
1444                                         $all_seen{$p->id} = 1;
1445                                         push( @new_lc, $p );
1446                                 }
1447                         }
1448                 }
1449                 @last_checked = @new_lc;
1450         }
1451         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1452         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1453 }
1454
1455 sub throw {
1456         Text::Tradition::Error->throw(
1457                 'ident' => 'Collation error',
1458                 'message' => $_[0],
1459                 );
1460 }
1461
1462 no Moose;
1463 __PACKAGE__->meta->make_immutable;
1464
1465 =head1 LICENSE
1466
1467 This package is free software and is provided "as is" without express
1468 or implied warranty.  You can redistribute it and/or modify it under
1469 the same terms as Perl itself.
1470
1471 =head1 AUTHOR
1472
1473 Tara L Andrews E<lt>aurum@cpan.orgE<gt>