base/lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use JSON qw/ to_json /;
  10 use Text::CSV;
  11 use Text::Tradition::Collation::Data;
  12 use Text::Tradition::Collation::Reading;
  13 use Text::Tradition::Collation::RelationshipStore;
  14 use Text::Tradition::Error;
  15 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  16 use XML::LibXML;
  17 use XML::LibXML::XPathContext;
  18 use Moose;
  19
  20 has _data => (
  21         isa      => 'Text::Tradition::Collation::Data',
  22         is       => 'ro',
  23         required => 1,
  24         handles  => [ qw(
  25                 sequence
  26                 paths
  27                 _set_relations
  28                 relations
  29                 _set_start
  30                 _set_end
  31                 ac_label
  32                 has_cached_table
  33                 relationships
  34                 relationship_types
  35                 related_readings
  36                 get_relationship
  37                 del_relationship
  38                 equivalence
  39                 equivalence_graph
  40                 readings
  41                 reading
  42                 _add_reading
  43                 del_reading
  44                 has_reading
  45                 wit_list_separator
  46                 baselabel
  47                 linear
  48                 wordsep
  49                 start
  50                 end
  51                 cached_table
  52                 _graphcalc_done
  53                 has_cached_svg
  54                 wipe_table
  55         )]
  56 );
  57
  58 has 'tradition' => (
  59     is => 'ro',
  60     isa => 'Text::Tradition',
  61     writer => '_set_tradition',
  62     weak_ref => 1,
  63     );
  64
  65 =encoding utf8
  66
  67 =head1 NAME
  68
  69 Text::Tradition::Collation - a software model for a text collation
  70
  71 =head1 SYNOPSIS
  72
  73   use Text::Tradition;
  74   my $t = Text::Tradition->new(
  75     'name' => 'this is a text',
  76     'input' => 'TEI',
  77     'file' => '/path/to/tei_parallel_seg_file.xml' );
  78
  79   my $c = $t->collation;
  80   my @readings = $c->readings;
  81   my @paths = $c->paths;
  82   my @relationships = $c->relationships;
  83
  84   my $svg_variant_graph = $t->collation->as_svg();
  85
  86 =head1 DESCRIPTION
  87
  88 Text::Tradition is a library for representation and analysis of collated
  89 texts, particularly medieval ones.  The Collation is the central feature of
  90 a Tradition, where the text, its sequence of readings, and its relationships
  91 between readings are actually kept.
  92
  93 =head1 CONSTRUCTOR
  94
  95 =head2 new
  96
  97 The constructor.  Takes a hash or hashref of the following arguments:
  98
  99 =over
 100
 101 =item * tradition - The Text::Tradition object to which the collation
 102 belongs. Required.
 103
 104 =item * linear - Whether the collation should be linear; that is, whether
 105 transposed readings should be treated as two linked readings rather than one,
 106 and therefore whether the collation graph is acyclic.  Defaults to true.
 107
 108 =item * baselabel - The default label for the path taken by a base text
 109 (if any). Defaults to 'base text'.
 110
 111 =item * wit_list_separator - The string to join a list of witnesses for
 112 purposes of making labels in display graphs.  Defaults to ', '.
 113
 114 =item * ac_label - The extra label to tack onto a witness sigil when
 115 representing another layer of path for the given witness - that is, when
 116 a text has more than one possible reading due to scribal corrections or
 117 the like.  Defaults to ' (a.c.)'.
 118
 119 =item * wordsep - The string used to separate words in the original text.
 120 Defaults to ' '.
 121
 122 =back
 123
 124 =head1 ACCESSORS
 125
 126 =head2 tradition
 127
 128 =head2 linear
 129
 130 =head2 wit_list_separator
 131
 132 =head2 baselabel
 133
 134 =head2 ac_label
 135
 136 =head2 wordsep
 137
 138 Simple accessors for collation attributes.
 139
 140 =head2 start
 141
 142 The meta-reading at the start of every witness path.
 143
 144 =head2 end
 145
 146 The meta-reading at the end of every witness path.
 147
 148 =head2 readings
 149
 150 Returns all Reading objects in the graph.
 151
 152 =head2 reading( $id )
 153
 154 Returns the Reading object corresponding to the given ID.
 155
 156 =head2 add_reading( $reading_args )
 157
 158 Adds a new reading object to the collation.
 159 See L<Text::Tradition::Collation::Reading> for the available arguments.
 160
 161 =head2 del_reading( $object_or_id )
 162
 163 Removes the given reading from the collation, implicitly removing its
 164 paths and relationships.
 165
 166 =head2 has_reading( $id )
 167
 168 Predicate to see whether a given reading ID is in the graph.
 169
 170 =head2 reading_witnesses( $object_or_id )
 171
 172 Returns a list of sigils whose witnesses contain the reading.
 173
 174 =head2 paths
 175
 176 Returns all reading paths within the document - that is, all edges in the
 177 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 178
 179 =head2 add_path( $source, $target, $sigil )
 180
 181 Links the given readings in the collation in sequence, under the given witness
 182 sigil.  The readings may be specified by object or ID.
 183
 184 =head2 del_path( $source, $target, $sigil )
 185
 186 Links the given readings in the collation in sequence, under the given witness
 187 sigil.  The readings may be specified by object or ID.
 188
 189 =head2 has_path( $source, $target );
 190
 191 Returns true if the two readings are linked in sequence in any witness.
 192 The readings may be specified by object or ID.
 193
 194 =head2 relationships
 195
 196 Returns all Relationship objects in the collation.
 197
 198 =head2 add_relationship( $reading, $other_reading, $options )
 199
 200 Adds a new relationship of the type given in $options between the two readings,
 201 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 202 where $status is true on success, and @vectors is a list of relationship edges
 203 that were ultimately added.
 204 See L<Text::Tradition::Collation::Relationship> for the available options.
 205
 206 =cut
 207
 208 sub BUILDARGS {
 209         my ( $class, @args ) = @_;
 210         my %args = @args == 1 ? %{ $args[0] } : @args;
 211         # TODO determine these from the Moose::Meta object
 212         my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
 213                 linear wordsep start end cached_table _graphcalc_done);
 214         my %data_args;
 215         for my $attr (@delegate_attrs) {
 216                 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
 217         }
 218         $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
 219         return \%args;
 220 }
 221
 222 sub BUILD {
 223     my $self = shift;
 224     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 225     $self->_set_start( $self->add_reading(
 226         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 227     $self->_set_end( $self->add_reading(
 228         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 229 }
 230
 231 =head2 register_relationship_type( %relationship_definition )
 232
 233 Add a relationship type definition to this collation. The argument can be either a
 234 hash or a hashref, defining the properties of the relationship. For relationship types
 235 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
 236
 237 =head2 get_relationship_type( $relationship_name )
 238
 239 Retrieve the RelationshipType object for the relationship with the given name.
 240
 241 =cut
 242
 243 sub register_relationship_type {
 244         my $self = shift;
 245         my %args = @_ == 1 ? %{$_[0]} : @_;
 246         if( $self->relations->has_type( $args{name} ) ) {
 247                 throw( 'Relationship type ' . $args{name} . ' already registered' );
 248         }
 249         $self->relations->add_type( %args );
 250 }
 251
 252 sub get_relationship_type {
 253         my( $self, $name ) = @_;
 254                 return $self->relations->has_type( $name )
 255                         ? $self->relations->type( $name ) : undef;
 256 }
 257
 258 ### Reading construct/destruct functions
 259
 260 sub add_reading {
 261         my( $self, $reading ) = @_;
 262         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 263                 my %args = %$reading;
 264                 if( $args{'init'} ) {
 265                         # If we are initializing an empty collation, don't assume that we
 266                         # have set a tradition.
 267                         delete $args{'init'};
 268                 } elsif( $self->tradition->can('language') && $self->tradition->has_language
 269                         && !exists $args{'language'} ) {
 270                         $args{'language'} = $self->tradition->language;
 271                 }
 272                 $reading = Text::Tradition::Collation::Reading->new(
 273                         'collation' => $self,
 274                         %args );
 275         }
 276         # First check to see if a reading with this ID exists.
 277         if( $self->reading( $reading->id ) ) {
 278                 throw( "Collation already has a reading with id " . $reading->id );
 279         }
 280         $self->_graphcalc_done(0);
 281         $self->_add_reading( $reading->id => $reading );
 282         # Once the reading has been added, put it in both graphs.
 283         $self->sequence->add_vertex( $reading->id );
 284         $self->relations->add_reading( $reading->id );
 285         return $reading;
 286 };
 287
 288 around del_reading => sub {
 289         my $orig = shift;
 290         my $self = shift;
 291         my $arg = shift;
 292
 293         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 294                 $arg = $arg->id;
 295         }
 296         # Remove the reading from the graphs.
 297         $self->_graphcalc_done(0);
 298         $self->_clear_cache; # Explicitly clear caches to GC the reading
 299         $self->sequence->delete_vertex( $arg );
 300         $self->relations->delete_reading( $arg );
 301
 302         # Carry on.
 303         $self->$orig( $arg );
 304 };
 305
 306 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 307
 308 Merges the $second reading into the $main one. If $concatenate is true, then
 309 the merged node will carry the text of both readings, concatenated with either
 310 $with_str (if specified) or a sensible default (the empty string if the
 311 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 312
 313 The first two arguments may be either readings or reading IDs.
 314
 315 =begin testing
 316
 317 use Text::Tradition;
 318 use TryCatch;
 319
 320 my $cxfile = 't/data/Collatex-16.xml';
 321 my $t = Text::Tradition->new(
 322     'name'  => 'inline',
 323     'input' => 'CollateX',
 324     'file'  => $cxfile,
 325     );
 326 my $c = $t->collation;
 327
 328 my $rno = scalar $c->readings;
 329 # Split n21 ('unto') for testing purposes
 330 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 331 my $old_r = $c->reading( 'n21' );
 332 $old_r->alter_text( 'to' );
 333 $c->del_path( 'n20', 'n21', 'A' );
 334 $c->add_path( 'n20', 'n21p0', 'A' );
 335 $c->add_path( 'n21p0', 'n21', 'A' );
 336 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
 337 $c->flatten_ranks();
 338 ok( $c->reading( 'n21p0' ), "New reading exists" );
 339 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 340
 341 # Combine n3 and n4 ( with his )
 342 $c->merge_readings( 'n3', 'n4', 1 );
 343 ok( !$c->reading('n4'), "Reading n4 is gone" );
 344 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 345
 346 # Collapse n9 and n10 ( rood / root )
 347 $c->merge_readings( 'n9', 'n10' );
 348 ok( !$c->reading('n10'), "Reading n10 is gone" );
 349 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 350
 351 # Try to combine n21 and n21p0. This should break.
 352 my $remaining = $c->reading('n21');
 353 $remaining ||= $c->reading('n22');  # one of these should still exist
 354 try {
 355         $c->merge_readings( 'n21p0', $remaining, 1 );
 356         ok( 0, "Bad reading merge changed the graph" );
 357 } catch( Text::Tradition::Error $e ) {
 358         like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
 359 } catch {
 360         ok( 0, "Unexpected error on bad reading merge: $@" );
 361 }
 362
 363 try {
 364         $c->calculate_ranks();
 365         ok( 1, "Graph is still evidently whole" );
 366 } catch( Text::Tradition::Error $e ) {
 367         ok( 0, "Caught a rank exception: " . $e->message );
 368 }
 369
 370 =end testing
 371
 372 =cut
 373
 374 sub merge_readings {
 375         my $self = shift;
 376
 377         # Sanity check
 378         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 379         my $mergemeta = $kept_obj->is_meta;
 380         throw( "Cannot merge meta and non-meta reading" )
 381                 unless ( $mergemeta && $del_obj->is_meta )
 382                         || ( !$mergemeta && !$del_obj->is_meta );
 383         if( $mergemeta ) {
 384                 throw( "Cannot merge with start or end node" )
 385                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 386                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 387                 throw( "Cannot combine text of meta readings" ) if $combine;
 388         }
 389         # We can only merge readings in a linear graph if:
 390         # - they are contiguous with only one edge between them, OR
 391         # - they are at equivalent ranks in the graph.
 392         if( $self->linear ) {
 393                 my @delpred = $del_obj->predecessors;
 394                 my @keptsuc = $kept_obj->successors;
 395                 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
 396                         && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
 397                         my( $is_ok, $msg ) = $self->relations->relationship_valid(
 398                                 $kept_obj, $del_obj, 'collated' );
 399                         unless( $is_ok ) {
 400                                 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
 401                         }
 402                 }
 403         }
 404
 405         # We only need the IDs for adding paths to the graph, not the reading
 406         # objects themselves.
 407         my $kept = $kept_obj->id;
 408         my $deleted = $del_obj->id;
 409         $self->_graphcalc_done(0);
 410
 411     # The kept reading should inherit the paths and the relationships
 412     # of the deleted reading.
 413         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 414                 my @vector = ( $kept );
 415                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 416                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 417                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 418                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 419                 $self->sequence->add_edge( @vector );
 420                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 421                 @wits{keys %$fwits} = values %$fwits;
 422                 $self->sequence->set_edge_attributes( @vector, \%wits );
 423         }
 424         $self->relations->merge_readings( $kept, $deleted, $combine );
 425
 426         # Do the deletion deed.
 427         if( $combine ) {
 428                 # Combine the text of the readings
 429                 my $joinstr = $combine_char;
 430                 unless( defined $joinstr ) {
 431                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 432                         $joinstr = $self->wordsep unless defined $joinstr;
 433                 }
 434                 $kept_obj->_combine( $del_obj, $joinstr );
 435         }
 436         $self->del_reading( $deleted );
 437 }
 438
 439 =head2 merge_related( @relationship_types )
 440
 441 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
 442
 443 WARNING: This operation cannot be undone.
 444
 445 =cut
 446
 447 =begin testing
 448
 449 use Test::Warn;
 450 use Text::Tradition;
 451 use TryCatch;
 452
 453 my $t;
 454 warnings_exist {
 455         $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
 456 } [qr/Cannot set relationship on a meta reading/],
 457         "Got expected relationship drop warning on parse";
 458
 459 my $c = $t->collation;
 460
 461 my %rdg_ids;
 462 map { $rdg_ids{$_} = 1 } $c->readings;
 463 $c->merge_related( 'orthographic' );
 464 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
 465         "Successfully collapsed orthographic variation" );
 466 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
 467 foreach my $rid ( keys %rdg_ids ) {
 468         my $exp = $rdg_ids{$rid};
 469         is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
 470                 ( $exp ? "retained" : "removed" ) );
 471 }
 472 ok( $c->linear, "Graph is still linear" );
 473 try {
 474         $c->calculate_ranks; # This should succeed
 475         ok( 1, "Can still calculate ranks on the new graph" );
 476 } catch {
 477         ok( 0, "Rank calculation on merged graph failed: $@" );
 478 }
 479
 480 # Now add some transpositions
 481 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
 482 $c->merge_related( 'transposition' );
 483 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
 484         "Transposed relationship is merged away" );
 485 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
 486 ok( !$c->linear, "Graph is no longer linear" );
 487 try {
 488         $c->calculate_ranks; # This should fail
 489         ok( 0, "Rank calculation happened on nonlinear graph?!" );
 490 } catch ( Text::Tradition::Error $e ) {
 491         is( $e->message, 'Cannot calculate ranks on a non-linear graph',
 492                 "Rank calculation on merged graph threw an error" );
 493 }
 494
 495 =end testing
 496
 497 =cut
 498
 499 # TODO: there should be a way to display merged without affecting the underlying data!
 500
 501 sub merge_related {
 502         my $self = shift;
 503         my %reltypehash;
 504         map { $reltypehash{$_} = 1 } @_;
 505
 506         # Set up the filter for finding related readings
 507         my $filter = sub {
 508                 exists $reltypehash{$_[0]->type};
 509         };
 510
 511         # Go through all readings looking for related ones
 512         foreach my $r ( $self->readings ) {
 513                 next unless $self->reading( "$r" ); # might have been deleted meanwhile
 514                 while( my @related = $self->related_readings( $r, $filter ) ) {
 515                         push( @related, $r );
 516                         @related = sort {
 517                                         scalar $b->witnesses <=> scalar $a->witnesses
 518                                 } @related;
 519                         my $keep = shift @related;
 520                         foreach my $delr ( @related ) {
 521                                 $self->linear( 0 )
 522                                         unless( $self->get_relationship( $keep, $delr )->colocated );
 523                                 $self->merge_readings( $keep, $delr );
 524                         }
 525                 }
 526         }
 527 }
 528
 529 =head2 compress_readings
 530
 531 Where possible in the graph, compresses plain sequences of readings into a
 532 single reading. The sequences must consist of readings with no
 533 relationships to other readings, with only a single witness path between
 534 them and no other witness paths from either that would skip the other. The
 535 readings must also not be marked as nonsense or bad grammar.
 536
 537 WARNING: This operation cannot be undone.
 538
 539 =cut
 540
 541 sub compress_readings {
 542         my $self = shift;
 543         # Sanity check: first save the original text of each witness.
 544         my %origtext;
 545         foreach my $wit ( $self->tradition->witnesses ) {
 546                 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
 547                 if( $wit->is_layered ) {
 548                         my $acsig = $wit->sigil . $self->ac_label;
 549                         $origtext{$acsig} = $self->path_text( $acsig );
 550                 }
 551         }
 552
 553         # Now do the deed.
 554         # Anywhere in the graph that there is a reading that joins only to a single
 555         # successor, and neither of these have any relationships, just join the two
 556         # readings.
 557         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 558                 # Now look for readings that can be joined to their successors.
 559                 next unless $rdg->is_combinable;
 560                 my %seen;
 561                 while( $self->sequence->successors( $rdg ) == 1 ) {
 562                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 563                         throw( "Infinite loop" ) if $seen{$next->id};
 564                         $seen{$next->id} = 1;
 565                         last if $self->sequence->predecessors( $next ) > 1;
 566                         last unless $next->is_combinable;
 567                         say "Joining readings $rdg and $next";
 568                         $self->merge_readings( $rdg, $next, 1 );
 569                 }
 570         }
 571
 572         # Finally, make sure we haven't screwed anything up.
 573         foreach my $wit ( $self->tradition->witnesses ) {
 574                 my $pathtext = $self->path_text( $wit->sigil );
 575                 throw( "Text differs for witness " . $wit->sigil )
 576                         unless $pathtext eq $origtext{$wit->sigil};
 577                 if( $wit->is_layered ) {
 578                         my $acsig = $wit->sigil . $self->ac_label;
 579                         $pathtext = $self->path_text( $acsig );
 580                         throw( "Layered text differs for witness " . $wit->sigil )
 581                                 unless $pathtext eq $origtext{$acsig};
 582                 }
 583         }
 584
 585         $self->relations->rebuild_equivalence();
 586         $self->calculate_ranks();
 587 }
 588
 589 # Helper function for manipulating the graph.
 590 sub _stringify_args {
 591         my( $self, $first, $second, @args ) = @_;
 592     $first = $first->id
 593         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 594     $second = $second->id
 595         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 596     return( $first, $second, @args );
 597 }
 598
 599 # Helper function for manipulating the graph.
 600 sub _objectify_args {
 601         my( $self, $first, $second, $arg ) = @_;
 602     $first = $self->reading( $first )
 603         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 604     $second = $self->reading( $second )
 605         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 606     return( $first, $second, $arg );
 607 }
 608
 609 =head2 duplicate_reading( $reading, @witlist )
 610
 611 Split the given reading into two, so that the new reading is in the path for
 612 the witnesses given in @witlist. If the result is that certain non-colocated
 613 relationships (e.g. transpositions) are no longer valid, these will be removed.
 614 Returns the newly-created reading.
 615
 616 =begin testing
 617
 618 use Test::More::UTF8;
 619 use Text::Tradition;
 620 use TryCatch;
 621
 622 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
 623 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
 624 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
 625
 626 my $sc = $st->collation;
 627 my $numr = 17;
 628 ok( $sc->reading('n131'), "Tradition has the affected reading" );
 629 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
 630 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
 631
 632 # Detach the erroneously collated reading
 633 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
 634 ok( $newr, "New reading was created" );
 635 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
 636 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
 637 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
 638 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
 639 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
 640
 641 # Check that the bad transposition is gone
 642 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
 643 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
 644
 645 # The collation should not be fixed
 646 my @pairs = $sc->identical_readings();
 647 is( scalar @pairs, 0, "Not re-collated yet" );
 648 # Fix the collation
 649 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
 650 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
 651 is( scalar @pairs, 3, "Found three more identical readings" );
 652 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
 653 $sc->flatten_ranks();
 654 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
 655
 656 # Check that we can't "duplicate" a reading with no wits or with all wits
 657 try {
 658         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
 659         ok( 0, "Reading duplication without witnesses throws an error" );
 660 } catch( Text::Tradition::Error $e ) {
 661         like( $e->message, qr/Must specify one or more witnesses/,
 662                 "Reading duplication without witnesses throws the expected error" );
 663 } catch {
 664         ok( 0, "Reading duplication without witnesses threw the wrong error" );
 665 }
 666
 667 try {
 668         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
 669         ok( 0, "Reading duplication with all witnesses throws an error" );
 670 } catch( Text::Tradition::Error $e ) {
 671         like( $e->message, qr/Cannot join all witnesses/,
 672                 "Reading duplication with all witnesses throws the expected error" );
 673 } catch {
 674         ok( 0, "Reading duplication with all witnesses threw the wrong error" );
 675 }
 676
 677 try {
 678         $sc->calculate_ranks();
 679         ok( 1, "Graph is still evidently whole" );
 680 } catch( Text::Tradition::Error $e ) {
 681         ok( 0, "Caught a rank exception: " . $e->message );
 682 }
 683
 684 =end testing
 685
 686 =cut
 687
 688 sub duplicate_reading {
 689         my( $self, $r, @wits ) = @_;
 690         # Check that we are not doing anything unwise.
 691         throw( "Must specify one or more witnesses for the duplicated reading" )
 692                 unless @wits;
 693         unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
 694                 $r = $self->reading( $r );
 695         }
 696         throw( "Cannot duplicate a meta-reading" )
 697                 if $r->is_meta;
 698         throw( "Cannot join all witnesses to the new reading" )
 699                 if scalar( @wits ) == scalar( $r->witnesses );
 700
 701         # Get all the reading attributes and duplicate them.
 702         my $rmeta = Text::Tradition::Collation::Reading->meta;
 703         my %args;
 704     foreach my $attr( $rmeta->get_all_attributes ) {
 705                 next if $attr->name =~ /^_/;
 706                 my $acc = $attr->get_read_method;
 707                 if( !$acc && $attr->has_applied_traits ) {
 708                         my $tr = $attr->applied_traits;
 709                         if( $tr->[0] =~ /::(Array|Hash)$/ ) {
 710                                 my $which = $1;
 711                                 my %methods = reverse %{$attr->handles};
 712                                 $acc = $methods{elements};
 713                                 $args{$attr->name} = $which eq 'Array'
 714                                         ? [ $r->$acc ] : { $r->$acc };
 715                         }
 716                 } else {
 717                         $args{$attr->name} = $r->$acc if $acc;
 718                 }
 719         }
 720         # By definition the new reading will no longer be common.
 721         $args{is_common} = 0;
 722         # The new reading also needs its own ID.
 723         $args{id} = $self->_generate_dup_id( $r->id );
 724
 725         # Try to make the new reading.
 726         my $newr = $self->add_reading( \%args );
 727         # The old reading is also no longer common.
 728         $r->is_common( 0 );
 729
 730         # For each of the witnesses, dissociate from the old reading and
 731         # associate with the new.
 732         foreach my $wit ( @wits ) {
 733                 my $prior = $self->prior_reading( $r, $wit );
 734                 my $next = $self->next_reading( $r, $wit );
 735                 $self->del_path( $prior, $r, $wit );
 736                 $self->add_path( $prior, $newr, $wit );
 737                 $self->del_path( $r, $next, $wit );
 738                 $self->add_path( $newr, $next, $wit );
 739         }
 740
 741         # If the graph is ranked, we need to look for relationships that are now
 742         # invalid (i.e. 'non-colocation' types that might now be colocated) and
 743         # remove them. If not, we can skip it.
 744         my $succ;
 745         my %rrk;
 746         my @deleted_relations;
 747         if( $self->end->has_rank ) {
 748                 # Find the point where we can stop checking
 749                 $succ = $self->common_successor( $r, $newr );
 750
 751                 # Hash the existing ranks
 752                 foreach my $rdg ( $self->readings ) {
 753                         $rrk{$rdg->id} = $rdg->rank;
 754                 }
 755                 # Calculate the new ranks
 756                 $self->calculate_ranks();
 757
 758                 # Check for invalid non-colocated relationships among changed-rank readings
 759                 # from where the ranks start changing up to $succ
 760                 my $lastrank = $succ->rank;
 761                 foreach my $rdg ( $self->readings ) {
 762                         next if $rdg->rank > $lastrank;
 763                         next if $rdg->rank == $rrk{$rdg->id};
 764                         my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
 765                         next unless @noncolo;
 766                         foreach my $nc ( @noncolo ) {
 767                                 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
 768                                         push( @deleted_relations, [ $rdg->id, $nc->id ] );
 769                                 }
 770                         }
 771                 }
 772         }
 773         return ( $newr, @deleted_relations );
 774 }
 775
 776 sub _generate_dup_id {
 777         my( $self, $rid ) = @_;
 778         my $newid;
 779         my $i = 0;
 780         while( !$newid ) {
 781                 $newid = $rid."_$i";
 782                 if( $self->has_reading( $newid ) ) {
 783                         $newid = '';
 784                         $i++;
 785                 }
 786         }
 787         return $newid;
 788 }
 789
 790 ### Path logic
 791
 792 sub add_path {
 793         my $self = shift;
 794
 795         # We only need the IDs for adding paths to the graph, not the reading
 796         # objects themselves.
 797     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 798
 799         $self->_graphcalc_done(0);
 800         # Connect the readings
 801         unless( $self->sequence->has_edge( $source, $target ) ) {
 802             $self->sequence->add_edge( $source, $target );
 803             $self->relations->add_equivalence_edge( $source, $target );
 804         }
 805     # Note the witness in question
 806     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 807 }
 808
 809 sub del_path {
 810         my $self = shift;
 811         my @args;
 812         if( ref( $_[0] ) eq 'ARRAY' ) {
 813                 my $e = shift @_;
 814                 @args = ( @$e, @_ );
 815         } else {
 816                 @args = @_;
 817         }
 818
 819         # We only need the IDs for removing paths from the graph, not the reading
 820         # objects themselves.
 821     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 822
 823         $self->_graphcalc_done(0);
 824         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 825                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 826         }
 827         unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
 828                 $self->sequence->delete_edge( $source, $target );
 829                 $self->relations->delete_equivalence_edge( $source, $target );
 830         }
 831 }
 832
 833
 834 # Extra graph-alike utility
 835 sub has_path {
 836         my $self = shift;
 837     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 838         return undef unless $self->sequence->has_edge( $source, $target );
 839         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 840 }
 841
 842 =head2 clear_witness( @sigil_list )
 843
 844 Clear the given witnesses out of the collation entirely, removing references
 845 to them in paths, and removing readings that belong only to them.  Should only
 846 be called via $tradition->del_witness.
 847
 848 =cut
 849
 850 sub clear_witness {
 851         my( $self, @sigils ) = @_;
 852
 853         $self->_graphcalc_done(0);
 854         # Clear the witness(es) out of the paths
 855         foreach my $e ( $self->paths ) {
 856                 foreach my $sig ( @sigils ) {
 857                         $self->del_path( $e, $sig );
 858                 }
 859         }
 860
 861         # Clear out the newly unused readings
 862         foreach my $r ( $self->readings ) {
 863                 unless( $self->reading_witnesses( $r ) ) {
 864                         $self->del_reading( $r );
 865                 }
 866         }
 867 }
 868
 869 sub add_relationship {
 870         my $self = shift;
 871     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 872     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 873     foreach my $v ( @vectors ) {
 874         next unless $self->get_relationship( $v )->colocated;
 875         if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
 876                 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
 877                         $self->_graphcalc_done(0);
 878                         $self->_clear_cache;
 879                         last;
 880         }
 881     }
 882     return @vectors;
 883 }
 884
 885 around qw/ get_relationship del_relationship / => sub {
 886         my $orig = shift;
 887         my $self = shift;
 888         my @args = @_;
 889         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 890                 @args = @{$_[0]};
 891         }
 892         my @stringargs = $self->_stringify_args( @args );
 893         $self->$orig( @stringargs );
 894 };
 895
 896 =head2 reading_witnesses( $reading )
 897
 898 Return a list of sigils corresponding to the witnesses in which the reading appears.
 899
 900 =cut
 901
 902 sub reading_witnesses {
 903         my( $self, $reading ) = @_;
 904         # We need only check either the incoming or the outgoing edges; I have
 905         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 906         if( $reading eq $self->start ) {
 907                 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
 908         }
 909         my %all_witnesses;
 910         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 911                 my $wits = $self->sequence->get_edge_attributes( @$e );
 912                 @all_witnesses{ keys %$wits } = 1;
 913         }
 914         my $acstr = $self->ac_label;
 915         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 916                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 917         }
 918         return keys %all_witnesses;
 919 }
 920
 921 =head1 OUTPUT METHODS
 922
 923 =head2 as_svg( \%options )
 924
 925 Returns an SVG string that represents the graph, via as_dot and graphviz.
 926 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 927
 928 =cut
 929
 930 sub as_svg {
 931     my( $self, $opts ) = @_;
 932     throw( "Need GraphViz installed to output SVG" )
 933         unless File::Which::which( 'dot' );
 934     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 935     $self->calculate_ranks()
 936         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 937         my @cmd = qw/dot -Tsvg/;
 938         my( $svg, $err );
 939         my $dotfile = File::Temp->new();
 940         ## USE FOR DEBUGGING
 941         # $dotfile->unlink_on_destroy(0);
 942         binmode $dotfile, ':utf8';
 943         print $dotfile $self->as_dot( $opts );
 944         push( @cmd, $dotfile->filename );
 945         run( \@cmd, ">", binary(), \$svg );
 946         $svg = decode_utf8( $svg );
 947         return $svg;
 948 }
 949
 950
 951 =head2 as_dot( \%options )
 952
 953 Returns a string that is the collation graph expressed in dot
 954 (i.e. GraphViz) format.  Options include:
 955
 956 =over 4
 957
 958 =item * from
 959
 960 =item * to
 961
 962 =item * color_common
 963
 964 =back
 965
 966 =cut
 967
 968 sub as_dot {
 969     my( $self, $opts ) = @_;
 970     my $startrank = $opts->{'from'} if $opts;
 971     my $endrank = $opts->{'to'} if $opts;
 972     my $color_common = $opts->{'color_common'} if $opts;
 973     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 974        && $self->end->rank > 100;
 975     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 976
 977     # Check the arguments
 978     if( $startrank ) {
 979         return if $endrank && $startrank > $endrank;
 980         return if $startrank > $self->end->rank;
 981         }
 982         if( defined $endrank ) {
 983                 return if $endrank < 0;
 984                 $endrank = undef if $endrank == $self->end->rank;
 985         }
 986
 987     my $graph_name = $self->tradition->name;
 988     $graph_name =~ s/[^\w\s]//g;
 989     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 990
 991     my %graph_attrs = (
 992         'rankdir' => 'LR',
 993         'bgcolor' => 'none',
 994         );
 995     my %node_attrs = (
 996         'fontsize' => 14,
 997         'fillcolor' => 'white',
 998         'style' => 'filled',
 999         'shape' => 'ellipse'
1000         );
1001     my %edge_attrs = (
1002         'arrowhead' => 'open',
1003         'color' => '#000000',
1004         'fontcolor' => '#000000',
1005         );
1006
1007     my $dot = sprintf( "digraph %s {\n", $graph_name );
1008     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1009     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1010
1011         # Output substitute start/end readings if necessary
1012         if( $startrank ) {
1013                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1014         }
1015         if( $endrank ) {
1016                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1017         }
1018         if( $STRAIGHTENHACK ) {
1019                 ## HACK part 1
1020                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1021                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1022                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1023         }
1024         my %used;  # Keep track of the readings that actually appear in the graph
1025         # Sort the readings by rank if we have ranks; this speeds layout.
1026         my @all_readings = $self->end->has_rank
1027                 ? sort { $a->rank <=> $b->rank } $self->readings
1028                 : $self->readings;
1029         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1030     foreach my $reading ( @all_readings ) {
1031         # Only output readings within our rank range.
1032         next if $startrank && $reading->rank < $startrank;
1033         next if $endrank && $reading->rank > $endrank;
1034         $used{$reading->id} = 1;
1035         # Need not output nodes without separate labels
1036         next if $reading->id eq $reading->text;
1037         my $rattrs;
1038         my $label = $reading->text;
1039         unless( $label =~ /^[[:punct:]]+$/ ) {
1040                 $label .= '-' if $reading->join_next;
1041             $label = "-$label" if $reading->join_prior;
1042         }
1043         $label =~ s/\"/\\\"/g;
1044                 $rattrs->{'label'} = $label;
1045                 $rattrs->{'id'} = $reading->id;
1046                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1047         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1048     }
1049
1050         # Add the real edges.
1051     my @edges = $self->paths;
1052         my( %substart, %subend );
1053     foreach my $edge ( @edges ) {
1054         # Do we need to output this edge?
1055         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1056                 my $label = $self->_path_display_label( $opts,
1057                         $self->path_witnesses( $edge ) );
1058                         my $variables = { %edge_attrs, 'label' => $label };
1059
1060                         # Account for the rank gap if necessary
1061                         my $rank0 = $self->reading( $edge->[0] )->rank
1062                                 if $self->reading( $edge->[0] )->has_rank;
1063                         my $rank1 = $self->reading( $edge->[1] )->rank
1064                                 if $self->reading( $edge->[1] )->has_rank;
1065                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1066                                 $variables->{'minlen'} = $rank1 - $rank0;
1067                         }
1068
1069                         # EXPERIMENTAL: make edge width reflect no. of witnesses
1070                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1071                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1072
1073                         my $varopts = _dot_attr_string( $variables );
1074                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1075                                 $edge->[0], $edge->[1], $varopts );
1076         } elsif( $used{$edge->[0]} ) {
1077                 $subend{$edge->[0]} = $edge->[1];
1078         } elsif( $used{$edge->[1]} ) {
1079                 $substart{$edge->[1]} = $edge->[0];
1080         }
1081     }
1082
1083     # If we are asked to, add relationship links
1084     if( exists $opts->{show_relations} ) {
1085         my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1086         if( $filter eq 'transposition' ) {
1087                 $filter =~ qr/^transposition$/;
1088         }
1089         my %typecolors;
1090         my @types = sort( map { $_->name } $self->relations->types );
1091         if( exists $opts->{graphcolors} ) {
1092                 foreach my $tdx ( 0 .. $#types ) {
1093                         $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1094                 }
1095         } else {
1096                 map { $typecolors{$_} = '#FFA14F' } @types;
1097         }
1098         foreach my $redge ( $self->relationships ) {
1099                 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1100                                 my $rel = $self->get_relationship( $redge );
1101                                 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1102                                 my $variables = {
1103                                         arrowhead => 'none',
1104                                         color => $typecolors{$rel->type},
1105                                         constraint => 'false',
1106                                         penwidth => '3',
1107                                 };
1108                                 unless( exists $opts->{graphcolors} ) {
1109                                         $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1110                                 }
1111                                 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1112                                         $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1113                 }
1114         }
1115     }
1116
1117     # Add substitute start and end edges if necessary
1118     foreach my $node ( keys %substart ) {
1119         my $witstr = $self->_path_display_label( $opts,
1120                 $self->path_witnesses( $substart{$node}, $node ) );
1121         my $variables = { %edge_attrs, 'label' => $witstr };
1122         my $nrdg = $self->reading( $node );
1123         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1124                 # Substart is actually one lower than $startrank
1125                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1126         }
1127         my $varopts = _dot_attr_string( $variables );
1128         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1129         }
1130     foreach my $node ( keys %subend ) {
1131         my $witstr = $self->_path_display_label( $opts,
1132                 $self->path_witnesses( $node, $subend{$node} ) );
1133         my $variables = { %edge_attrs, 'label' => $witstr };
1134         my $varopts = _dot_attr_string( $variables );
1135         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1136         }
1137         # HACK part 2
1138         if( $STRAIGHTENHACK ) {
1139                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1140                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1141         }
1142
1143     $dot .= "}\n";
1144     return $dot;
1145 }
1146
1147 sub _dot_attr_string {
1148         my( $hash ) = @_;
1149         my @attrs;
1150         foreach my $k ( sort keys %$hash ) {
1151                 my $v = $hash->{$k};
1152                 push( @attrs, $k.'="'.$v.'"' );
1153         }
1154         return( '[ ' . join( ', ', @attrs ) . ' ]' );
1155 }
1156
1157 =head2 path_witnesses( $edge )
1158
1159 Returns the list of sigils whose witnesses are associated with the given edge.
1160 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1161
1162 =cut
1163
1164 sub path_witnesses {
1165         my( $self, @edge ) = @_;
1166         # If edge is an arrayref, cope.
1167         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1168                 my $e = shift @edge;
1169                 @edge = @$e;
1170         }
1171         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1172         return @wits;
1173 }
1174
1175 # Helper function. Make a display label for the given witnesses, showing a.c.
1176 # witnesses only where the main witness is not also in the list.
1177 sub _path_display_label {
1178         my $self = shift;
1179         my $opts = shift;
1180         my %wits;
1181         map { $wits{$_} = 1 } @_;
1182
1183         # If an a.c. wit is listed, remove it if the main wit is also listed.
1184         # Otherwise keep it for explicit listing.
1185         my $aclabel = $self->ac_label;
1186         my @disp_ac;
1187         foreach my $w ( sort keys %wits ) {
1188                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1189                         if( exists $wits{$1} ) {
1190                                 delete $wits{$w};
1191                         } else {
1192                                 push( @disp_ac, $w );
1193                         }
1194                 }
1195         }
1196
1197         if( $opts->{'explicit_wits'} ) {
1198                 return join( ', ', sort keys %wits );
1199         } else {
1200                 # See if we are in a majority situation.
1201                 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1202                 $maj = $maj > 5 ? $maj : 5;
1203                 if( scalar keys %wits > $maj ) {
1204                         unshift( @disp_ac, 'majority' );
1205                         return join( ', ', @disp_ac );
1206                 } else {
1207                         return join( ', ', sort keys %wits );
1208                 }
1209         }
1210 }
1211
1212 =head2 as_adjacency_list
1213
1214 Returns a JSON structure that represents the collation sequence graph.
1215
1216 =begin testing
1217
1218 use JSON qw/ from_json /;
1219 use Text::Tradition;
1220
1221 my $t = Text::Tradition->new(
1222         'input' => 'Self',
1223         'file' => 't/data/florilegium_graphml.xml' );
1224 my $c = $t->collation;
1225
1226 # Make a connection so we can test rank preservation
1227 $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1228
1229 # Create an adjacency list of the whole thing; test the output.
1230 my $adj_whole = from_json( $c->as_adjacency_list() );
1231 is( scalar @$adj_whole, scalar $c->readings(),
1232         "Same number of nodes in graph and adjacency list" );
1233 my @adj_whole_edges;
1234 map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1235 is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1236         "Same number of edges in graph and adjacency list" );
1237 # Find the reading whose rank should be preserved
1238 my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1239 my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1240 is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1241
1242 # Now create an adjacency list of just a portion. w76 to w122
1243 my $adj_part = from_json( $c->as_adjacency_list(
1244         { from => $c->reading('w76')->rank,
1245           to   => $c->reading('w122')->rank }));
1246 is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1247 my @adj_part_edges;
1248 map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1249 is( scalar @adj_part_edges, 58,
1250         "Same number of edges in partial graph and adjacency list" );
1251 # Check for consistency
1252 my %part_nodes;
1253 map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1254 foreach my $edge ( @adj_part_edges ) {
1255         my $testid = $edge->{id};
1256         ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1257 }
1258
1259 =end testing
1260
1261 =cut
1262
1263 sub as_adjacency_list {
1264         my( $self, $opts ) = @_;
1265         # Make a structure that contains all the nodes, the nodes they point to,
1266         # and the attributes of the edges that connect them.
1267         # [ { id: 'n0', label: 'Gallia', adjacent: [
1268         #               { id: 'n1', label: 'P Q' } ,
1269         #               { id: 'n2', label: 'R S', minlen: 2 } ] },
1270         #   { id: 'n1', label: 'est', adjacent: [ ... ] },
1271         #   ... ]
1272         my $startrank = $opts->{'from'} || 0;
1273         my $endrank = $opts->{'to'} || $self->end->rank;
1274
1275     $self->calculate_ranks()
1276         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1277         my $list = [];
1278         foreach my $rdg ( $self->readings ) {
1279                 my @successors;
1280                 my $phony = '';
1281                 # Figure out what the node's successors should be.
1282                 if( $rdg eq $self->start && $startrank > 0 ) {
1283                         # Connect the start node with all the nodes at startrank.
1284                         # Lacunas should be included only if the node really has that rank.
1285                         @successors = $self->readings_at_rank( $startrank, 1 );
1286                         $phony = 'start';
1287                 } elsif( $rdg->rank < $startrank
1288                                  || $rdg->rank > $endrank && $rdg ne $self->end ) {
1289                         next;
1290                 } else {
1291                         @successors = $rdg->successors;
1292                 }
1293                 # Make sure that the end node is at the end of the successors
1294                 # list if it is needed.
1295                 if( grep { $_ eq $self->end } @successors ) {
1296                         my @ts = grep { $_ ne $self->end } @successors;
1297                         @successors = ( @ts, $self->end );
1298                 } elsif ( grep { $_->rank > $endrank } @successors ) {
1299                         push( @successors, $self->end );
1300                 }
1301
1302                 my $listitem = { id => $rdg->id, label => $rdg->text };
1303                 my $adjacent = [];
1304                 my @endwits;
1305                 foreach my $succ ( @successors ) {
1306                         my @edgewits;
1307                         if( $phony eq 'start' ) {
1308                                 @edgewits = $succ->witnesses;
1309                         } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1310                                 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1311                         }
1312
1313                         if( $succ eq $self->end ) {
1314                                 @edgewits = @endwits;
1315                         } elsif( $succ->rank > $endrank ) {
1316                                 # These witnesses will point to 'end' instead, not to the
1317                                 # actual successor.
1318                                 push( @endwits, @edgewits );
1319                                 next;
1320                         }
1321                         my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1322                         my $edgedef = { id => $succ->id, label => $edgelabel };
1323                         my $rankoffset = $succ->rank - $rdg->rank;
1324                         if( $rankoffset > 1 and $succ ne $self->end ) {
1325                                 $edgedef->{minlen} = $rankoffset;
1326                         }
1327                         push( @$adjacent, $edgedef );
1328                 }
1329                 $listitem->{adjacent} = $adjacent;
1330                 push( @$list, $listitem );
1331         }
1332         return to_json( $list );
1333 }
1334
1335 =head2 as_graphml
1336
1337 Returns a GraphML representation of the collation.  The GraphML will contain
1338 two graphs. The first expresses the attributes of the readings and the witness
1339 paths that link them; the second expresses the relationships that link the
1340 readings.  This is the native transfer format for a tradition.
1341
1342 =begin testing
1343
1344 use Text::Tradition;
1345 use TryCatch;
1346
1347 my $READINGS = 311;
1348 my $PATHS = 361;
1349
1350 my $datafile = 't/data/florilegium_tei_ps.xml';
1351 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1352                                       'name' => 'test0',
1353                                       'file' => $datafile,
1354                                       'linear' => 1 );
1355
1356 ok( $tradition, "Got a tradition object" );
1357 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1358 ok( $tradition->collation, "Tradition has a collation" );
1359
1360 my $c = $tradition->collation;
1361 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1362 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1363 is( scalar $c->relationships, 0, "Collation has all relationships" );
1364
1365 # Add a few relationships
1366 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1367 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1368 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1369                                           'is_significant' => 'yes' } );
1370
1371 # Now write it to GraphML and parse it again.
1372
1373 my $graphml = $c->as_graphml;
1374 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1375 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1376 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1377 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1378 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1379 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1380
1381 # Now add a stemma, write to GraphML, and look at the output.
1382 SKIP: {
1383         skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1384         my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1385         is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1386         is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1387         $graphml = $c->as_graphml;
1388         like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1389 }
1390
1391 =end testing
1392
1393 =cut
1394
1395 ## TODO MOVE this to Tradition.pm and modularize it better
1396 sub as_graphml {
1397     my( $self, $options ) = @_;
1398         $self->calculate_ranks unless $self->_graphcalc_done;
1399
1400         my $start = $options->{'from'}
1401                 ? $self->reading( $options->{'from'} ) : $self->start;
1402         my $end = $options->{'to'}
1403                 ? $self->reading( $options->{'to'} ) : $self->end;
1404         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1405                 throw( 'Start node must be before end node' );
1406         }
1407         # The readings need to be ranked for this to work.
1408         $start = $self->start unless $start->has_rank;
1409         $end = $self->end unless $end->has_rank;
1410         my $rankoffset = 0;
1411         unless( $start eq $self->start ) {
1412                 $rankoffset = $start->rank - 1;
1413         }
1414         my %use_readings;
1415
1416     # Some namespaces
1417     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1418     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1419     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1420         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1421
1422     # Create the document and root node
1423     require XML::LibXML;
1424     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1425     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1426     $graphml->setDocumentElement( $root );
1427     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1428     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1429
1430     # List of attribute types to save on our objects and their corresponding
1431     # GraphML types
1432     my %save_types = (
1433         'Str' => 'string',
1434         'Int' => 'int',
1435         'Bool' => 'boolean',
1436         'ReadingID' => 'string',
1437         'RelationshipType' => 'string',
1438         'RelationshipScope' => 'string',
1439         'Ternary' => 'string',
1440     );
1441
1442     # Add the data keys for the graph. Include an extra key 'version' for the
1443     # GraphML output version.
1444     my %graph_data_keys;
1445     my $gdi = 0;
1446     my %graph_attributes = ( 'version' => 'string' );
1447         # Graph attributes include those of Tradition and those of Collation.
1448         my %gattr_from;
1449         # TODO Use meta introspection method from duplicate_reading to do this
1450         # instead of naming custom keys.
1451         my $tmeta = $self->tradition->meta;
1452         my $cmeta = $self->meta;
1453         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1454         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1455         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1456                 next if $attr->name =~ /^_/;
1457                 next unless $save_types{$attr->type_constraint->name};
1458                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1459         }
1460     # Extra custom keys for complex objects that should be saved in some form.
1461     # The subroutine should return a string, or undef/empty.
1462     if( $tmeta->has_method('stemmata') ) {
1463                 $graph_attributes{'stemmata'} = sub {
1464                         my @stemstrs;
1465                         map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1466                                 $self->tradition->stemmata;
1467                         join( "\n", @stemstrs );
1468                 };
1469         }
1470
1471         if( $tmeta->has_method('user') ) {
1472                 $graph_attributes{'user'} = sub {
1473                         $self->tradition->user ? $self->tradition->user->id : undef
1474                 };
1475         }
1476
1477     foreach my $datum ( sort keys %graph_attributes ) {
1478         $graph_data_keys{$datum} = 'dg'.$gdi++;
1479         my $key = $root->addNewChild( $graphml_ns, 'key' );
1480         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1481                 : $graph_attributes{$datum};
1482         $key->setAttribute( 'attr.name', $datum );
1483         $key->setAttribute( 'attr.type', $dtype );
1484         $key->setAttribute( 'for', 'graph' );
1485         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1486     }
1487
1488     # Add the data keys for reading nodes
1489     my %reading_attributes;
1490     my $rmeta = Text::Tradition::Collation::Reading->meta;
1491     foreach my $attr( $rmeta->get_all_attributes ) {
1492                 next if $attr->name =~ /^_/;
1493                 next unless $save_types{$attr->type_constraint->name};
1494                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1495         }
1496         if( $self->start->does('Text::Tradition::Morphology' ) ) {
1497                 # Extra custom key for the reading morphology
1498                 $reading_attributes{'lexemes'} = 'string';
1499         }
1500
1501     my %node_data_keys;
1502     my $ndi = 0;
1503     foreach my $datum ( sort keys %reading_attributes ) {
1504         $node_data_keys{$datum} = 'dn'.$ndi++;
1505         my $key = $root->addNewChild( $graphml_ns, 'key' );
1506         $key->setAttribute( 'attr.name', $datum );
1507         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1508         $key->setAttribute( 'for', 'node' );
1509         $key->setAttribute( 'id', $node_data_keys{$datum} );
1510     }
1511
1512     # Add the data keys for edges, that is, paths and relationships. Path
1513     # data does not come from a Moose class so is here manually.
1514     my $edi = 0;
1515     my %edge_data_keys;
1516     my %edge_attributes = (
1517         witness => 'string',                    # ID/label for a path
1518         extra => 'boolean',                             # Path key
1519         );
1520     my @path_attributes = keys %edge_attributes; # track our manual additions
1521     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1522     foreach my $attr( $pmeta->get_all_attributes ) {
1523                 next if $attr->name =~ /^_/;
1524                 next unless $save_types{$attr->type_constraint->name};
1525                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1526         }
1527     foreach my $datum ( sort keys %edge_attributes ) {
1528         $edge_data_keys{$datum} = 'de'.$edi++;
1529         my $key = $root->addNewChild( $graphml_ns, 'key' );
1530         $key->setAttribute( 'attr.name', $datum );
1531         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1532         $key->setAttribute( 'for', 'edge' );
1533         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1534     }
1535
1536     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1537     my $xmlidname = $self->tradition->name;
1538     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1539     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1540         $xmlidname = '_'.$xmlidname;
1541     }
1542     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1543     $sgraph->setAttribute( 'edgedefault', 'directed' );
1544     $sgraph->setAttribute( 'id', $xmlidname );
1545     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1546     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1547     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1548     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1549     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1550
1551     # Tradition/collation attribute data
1552     foreach my $datum ( keys %graph_attributes ) {
1553         my $value;
1554         if( $datum eq 'version' ) {
1555                 $value = '3.2';
1556         } elsif( ref( $graph_attributes{$datum} ) ) {
1557                 my $sub = $graph_attributes{$datum};
1558                 $value = &$sub();
1559         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1560                 $value = $self->tradition->$datum;
1561         } else {
1562                 $value = $self->$datum;
1563         }
1564                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1565         }
1566
1567     my $node_ctr = 0;
1568     my %node_hash;
1569     # Add our readings to the graph
1570     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1571         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1572                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1573         $use_readings{$n->id} = 1;
1574         # Add to the main graph
1575         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1576         my $node_xmlid = 'n' . $node_ctr++;
1577         $node_hash{ $n->id } = $node_xmlid;
1578         $node_el->setAttribute( 'id', $node_xmlid );
1579         foreach my $d ( keys %reading_attributes ) {
1580                 my $nval = $n->$d;
1581                 # Custom serialization
1582                 if( $d eq 'lexemes' ) {
1583                                 # If nval is a true value, we have lexemes so we need to
1584                                 # serialize them. Otherwise set nval to undef so that the
1585                                 # key is excluded from this reading.
1586                         $nval = $nval ? $n->_serialize_lexemes : undef;
1587                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1588                         $nval = undef;
1589                 }
1590                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1591                         # Adjust the ranks within the subgraph.
1592                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1593                                 : $nval - $rankoffset;
1594                 }
1595                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1596                         if defined $nval;
1597         }
1598     }
1599
1600     # Add the path edges to the sequence graph
1601     my $edge_ctr = 0;
1602     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1603         # We add an edge in the graphml for every witness in $e.
1604         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1605         my @edge_wits = sort $self->path_witnesses( $e );
1606         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1607         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1608         # Skip any path from start to end; that witness is not in the subgraph.
1609         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1610         foreach my $wit ( @edge_wits ) {
1611                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1612                                                                                 $node_hash{ $e->[0] },
1613                                                                                 $node_hash{ $e->[1] } );
1614                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1615                         $edge_el->setAttribute( 'source', $from );
1616                         $edge_el->setAttribute( 'target', $to );
1617                         $edge_el->setAttribute( 'id', $id );
1618
1619                         # It's a witness path, so add the witness
1620                         my $base = $wit;
1621                         my $key = $edge_data_keys{'witness'};
1622                         # Is this an ante-corr witness?
1623                         my $aclabel = $self->ac_label;
1624                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1625                                 # Keep the base witness
1626                                 $base = $1;
1627                                 # ...and record that this is an 'extra' reading path
1628                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1629                         }
1630                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1631                 }
1632         }
1633
1634         # Report the actual number of nodes and edges that went in
1635         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1636         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1637
1638         # Add the relationship graph to the XML
1639         map { delete $edge_data_keys{$_} } @path_attributes;
1640         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1641                 $node_data_keys{'id'}, \%edge_data_keys );
1642
1643     # Save and return the thing
1644     my $result = decode_utf8( $graphml->toString(1) );
1645     return $result;
1646 }
1647
1648 sub _add_graphml_data {
1649     my( $el, $key, $value ) = @_;
1650     return unless defined $value;
1651     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1652     $data_el->setAttribute( 'key', $key );
1653     $data_el->appendText( $value );
1654 }
1655
1656 =head2 as_csv
1657
1658 Returns a CSV alignment table representation of the collation graph, one
1659 row per witness (or witness uncorrected.)
1660
1661 =head2 as_tsv
1662
1663 Returns a tab-separated alignment table representation of the collation graph,
1664 one row per witness (or witness uncorrected.)
1665
1666 =begin testing
1667
1668 use Text::Tradition;
1669 use Text::CSV;
1670
1671 my $READINGS = 311;
1672 my $PATHS = 361;
1673 my $WITS = 13;
1674 my $WITAC = 4;
1675
1676 my $datafile = 't/data/florilegium_tei_ps.xml';
1677 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1678                                       'name' => 'test0',
1679                                       'file' => $datafile,
1680                                       'linear' => 1 );
1681
1682 my $c = $tradition->collation;
1683 # Export the thing to CSV
1684 my $csvstr = $c->as_csv();
1685 # Count the columns
1686 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1687 my @lines = split(/\n/, $csvstr );
1688 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1689 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1690 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1691 ok( @q_ac, "Found a layered witness" );
1692
1693 my $t2 = Text::Tradition->new( input => 'Tabular',
1694                                                            name => 'test2',
1695                                                            string => $csvstr,
1696                                                            sep_char => ',' );
1697 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1698 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1699
1700 # Now do it with TSV
1701 my $tsvstr = $c->as_tsv();
1702 my $t3 = Text::Tradition->new( input => 'Tabular',
1703                                                            name => 'test3',
1704                                                            string => $tsvstr,
1705                                                            sep_char => "\t" );
1706 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1707 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1708
1709 my $table = $c->alignment_table;
1710 my $noaccsv = $c->as_csv({ noac => 1 });
1711 my @noaclines = split(/\n/, $noaccsv );
1712 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1713 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1714 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1715
1716 my $safecsv = $c->as_csv({ safe_ac => 1});
1717 my @safelines = split(/\n/, $safecsv );
1718 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1719 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1720 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1721 ok( @q_ac, "Found a sanitized layered witness" );
1722 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1723
1724 # Test relationship collapse
1725 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1726 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1727
1728 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1729 my $t4 = Text::Tradition->new( input => 'Tabular',
1730                                                            name => 'test4',
1731                                                            string => $mergedtsv,
1732                                                            sep_char => "\t" );
1733 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1734 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1735
1736 =end testing
1737
1738 =cut
1739
1740 sub _tabular {
1741     my( $self, $opts ) = @_;
1742     my $table = $self->alignment_table( $opts );
1743         my $csv_options = { binary => 1, quote_null => 0 };
1744         $csv_options->{'sep_char'} = $opts->{fieldsep};
1745         if( $opts->{fieldsep} eq "\t" ) {
1746                 # If it is really tab separated, nothing is an escape char.
1747                 $csv_options->{'quote_char'} = undef;
1748                 $csv_options->{'escape_char'} = '';
1749         }
1750     my $csv = Text::CSV->new( $csv_options );
1751     my @result;
1752     # Make the header row
1753     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1754         push( @result, $csv->string );
1755     # Make the rest of the rows
1756     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1757         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1758         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1759         # Quick and dirty collapse of requested relationship types
1760         if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1761                 # Now substitute the reading in the relevant index of @row
1762                 # for its merge-related reading
1763                 my %substitutes;
1764                 while( @rowobjs ) {
1765                         my $thisr = shift @rowobjs;
1766                         next unless $thisr;
1767                                 next if exists $substitutes{$thisr->{t}->text};
1768                                 # Make sure we don't have A <-> B substitutions.
1769                                 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1770                         foreach my $thatr ( @rowobjs ) {
1771                                 next unless $thatr;
1772                                 next if exists $substitutes{$thatr->{t}->text};
1773                                 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1774                                 next unless $ttrel;
1775                                 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1776                                 # If we have got this far then we need to merge them.
1777                                 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1778                         }
1779                 }
1780                 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1781         }
1782         $csv->combine( @row );
1783         push( @result, $csv->string );
1784     }
1785     return join( "\n", @result );
1786 }
1787
1788 sub as_csv {
1789         my $self = shift;
1790         my $opts = shift || {};
1791         $opts->{fieldsep} = ',';
1792         return $self->_tabular( $opts );
1793 }
1794
1795 sub as_tsv {
1796         my $self = shift;
1797         my $opts = shift || {};
1798         $opts->{fieldsep} = "\t";
1799         return $self->_tabular( $opts );
1800 }
1801
1802 =head2 alignment_table
1803
1804 Return a reference to an alignment table, in a slightly enhanced CollateX
1805 format which looks like this:
1806
1807  $table = { alignment => [ { witness => "SIGIL",
1808                              tokens => [ { t => "TEXT" }, ... ] },
1809                            { witness => "SIG2",
1810                              tokens => [ { t => "TEXT" }, ... ] },
1811                            ... ],
1812             length => TEXTLEN };
1813
1814 =cut
1815
1816 sub alignment_table {
1817     my( $self, $opts ) = @_;
1818     if( $self->has_cached_table ) {
1819                 return $self->cached_table
1820                         unless $opts->{noac} || $opts->{safe_ac};
1821     }
1822
1823     # Make sure we can do this
1824         throw( "Need a linear graph in order to make an alignment table" )
1825                 unless $self->linear;
1826     $self->calculate_ranks()
1827         unless $self->_graphcalc_done && $self->end->has_rank;
1828
1829     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1830     my @all_pos = ( 1 .. $self->end->rank - 1 );
1831     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1832         # say STDERR "Making witness row(s) for " . $wit->sigil;
1833         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1834         my @row = _make_witness_row( \@wit_path, \@all_pos );
1835         my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1836         $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1837         push( @{$table->{'alignment'}}, $witobj );
1838         if( $wit->is_layered && !$opts->{noac} ) {
1839                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1840                         $wit->sigil.$self->ac_label );
1841             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1842             my $witlabel = $opts->{safe_ac}
1843                 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1844             my $witacobj = { 'witness' => $witlabel,
1845                 'tokens' => \@ac_row };
1846             $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1847                         push( @{$table->{'alignment'}}, $witacobj );
1848         }
1849     }
1850     unless( $opts->{noac} || $opts->{safe_ac} ) {
1851             $self->cached_table( $table );
1852         }
1853     return $table;
1854 }
1855
1856 sub _make_witness_row {
1857     my( $path, $positions ) = @_;
1858     my %char_hash;
1859     map { $char_hash{$_} = undef } @$positions;
1860     my $debug = 0;
1861     foreach my $rdg ( @$path ) {
1862         say STDERR "rank " . $rdg->rank if $debug;
1863         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1864         $char_hash{$rdg->rank} = { 't' => $rdg };
1865     }
1866     my @row = map { $char_hash{$_} } @$positions;
1867     # Fill in lacuna markers for undef spots in the row
1868     my $last_el = shift @row;
1869     my @filled_row = ( $last_el );
1870     foreach my $el ( @row ) {
1871         # If we are using node reference, make the lacuna node appear many times
1872         # in the table.  If not, use the lacuna tag.
1873         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1874             $el = $last_el;
1875         }
1876         push( @filled_row, $el );
1877         $last_el = $el;
1878     }
1879     return @filled_row;
1880 }
1881
1882
1883 =head1 NAVIGATION METHODS
1884
1885 =head2 reading_sequence( $first, $last, $sigil, $backup )
1886
1887 Returns the ordered list of readings, starting with $first and ending
1888 with $last, for the witness given in $sigil. If a $backup sigil is
1889 specified (e.g. when walking a layered witness), it will be used wherever
1890 no $sigil path exists.  If there is a base text reading, that will be
1891 used wherever no path exists for $sigil or $backup.
1892
1893 =cut
1894
1895 # TODO Think about returning some lazy-eval iterator.
1896 # TODO Get rid of backup; we should know from what witness is whether we need it.
1897
1898 sub reading_sequence {
1899     my( $self, $start, $end, $witness ) = @_;
1900
1901     $witness = $self->baselabel unless $witness;
1902     my @readings = ( $start );
1903     my %seen;
1904     my $n = $start;
1905     while( $n && $n->id ne $end->id ) {
1906         if( exists( $seen{$n->id} ) ) {
1907             throw( "Detected loop for $witness at " . $n->id );
1908         }
1909         $seen{$n->id} = 1;
1910
1911         my $next = $self->next_reading( $n, $witness );
1912         unless( $next ) {
1913             throw( "Did not find any path for $witness from reading " . $n->id );
1914         }
1915         push( @readings, $next );
1916         $n = $next;
1917     }
1918     # Check that the last reading is our end reading.
1919     my $last = $readings[$#readings];
1920     throw( "Last reading found from " . $start->text .
1921         " for witness $witness is not the end!" ) # TODO do we get this far?
1922         unless $last->id eq $end->id;
1923
1924     return @readings;
1925 }
1926
1927 =head2 readings_at_rank( $rank )
1928
1929 Returns a list of readings at a given rank, taken from the alignment table.
1930
1931 =cut
1932
1933 sub readings_at_rank {
1934         my( $self, $rank, $nolacuna ) = @_;
1935         my $table = $self->alignment_table;
1936         # Table rank is real rank - 1.
1937         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1938         my %readings;
1939         foreach my $e ( @elements ) {
1940                 next unless ref( $e ) eq 'HASH';
1941                 next unless exists $e->{'t'};
1942                 my $rdg = $e->{'t'};
1943                 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
1944                 $readings{$e->{'t'}->id} = $e->{'t'};
1945         }
1946         return values %readings;
1947 }
1948
1949 =head2 next_reading( $reading, $sigil );
1950
1951 Returns the reading that follows the given reading along the given witness
1952 path.
1953
1954 =cut
1955
1956 sub next_reading {
1957     # Return the successor via the corresponding path.
1958     my $self = shift;
1959     my $answer = $self->_find_linked_reading( 'next', @_ );
1960         return undef unless $answer;
1961     return $self->reading( $answer );
1962 }
1963
1964 =head2 prior_reading( $reading, $sigil )
1965
1966 Returns the reading that precedes the given reading along the given witness
1967 path.
1968
1969 =cut
1970
1971 sub prior_reading {
1972     # Return the predecessor via the corresponding path.
1973     my $self = shift;
1974     my $answer = $self->_find_linked_reading( 'prior', @_ );
1975     return $self->reading( $answer );
1976 }
1977
1978 sub _find_linked_reading {
1979     my( $self, $direction, $node, $path ) = @_;
1980
1981     # Get a backup if we are dealing with a layered witness
1982     my $alt_path;
1983     my $aclabel = $self->ac_label;
1984     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1985         $alt_path = $1;
1986     }
1987
1988     my @linked_paths = $direction eq 'next'
1989         ? $self->sequence->edges_from( $node )
1990         : $self->sequence->edges_to( $node );
1991     return undef unless scalar( @linked_paths );
1992
1993     # We have to find the linked path that contains all of the
1994     # witnesses supplied in $path.
1995     my( @path_wits, @alt_path_wits );
1996     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1997     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1998     my $base_le;
1999     my $alt_le;
2000     foreach my $le ( @linked_paths ) {
2001         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2002             $base_le = $le;
2003         }
2004                 my @le_wits = sort $self->path_witnesses( $le );
2005                 if( _is_within( \@path_wits, \@le_wits ) ) {
2006                         # This is the right path.
2007                         return $direction eq 'next' ? $le->[1] : $le->[0];
2008                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2009                         $alt_le = $le;
2010                 }
2011     }
2012     # Got this far? Return the alternate path if it exists.
2013     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2014         if $alt_le;
2015
2016     # Got this far? Return the base path if it exists.
2017     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2018         if $base_le;
2019
2020     # Got this far? We have no appropriate path.
2021     warn "Could not find $direction node from " . $node->id
2022         . " along path $path";
2023     return undef;
2024 }
2025
2026 # Some set logic.
2027 sub _is_within {
2028     my( $set1, $set2 ) = @_;
2029     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2030     foreach my $el ( @$set1 ) {
2031         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2032     }
2033     return $ret;
2034 }
2035
2036 # Return the string that joins together a list of witnesses for
2037 # display on a single path.
2038 sub _witnesses_of_label {
2039     my( $self, $label ) = @_;
2040     my $regex = $self->wit_list_separator;
2041     my @answer = split( /\Q$regex\E/, $label );
2042     return @answer;
2043 }
2044
2045 =head2 common_readings
2046
2047 Returns the list of common readings in the graph (i.e. those readings that are
2048 shared by all non-lacunose witnesses.)
2049
2050 =cut
2051
2052 sub common_readings {
2053         my $self = shift;
2054         my @common = grep { $_->is_common } $self->readings;
2055         return @common;
2056 }
2057
2058 =head2 path_text( $sigil, [, $start, $end ] )
2059
2060 Returns the text of a witness (plus its backup, if we are using a layer)
2061 as stored in the collation.  The text is returned as a string, where the
2062 individual readings are joined with spaces and the meta-readings (e.g.
2063 lacunae) are omitted.  Optional specification of $start and $end allows
2064 the generation of a subset of the witness text.
2065
2066 =cut
2067
2068 sub path_text {
2069         my( $self, $wit, $start, $end ) = @_;
2070         $start = $self->start unless $start;
2071         $end = $self->end unless $end;
2072         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2073         my $pathtext = '';
2074         my $last;
2075         foreach my $r ( @path ) {
2076                 unless ( $r->join_prior || !$last || $last->join_next ) {
2077                         $pathtext .= ' ';
2078                 }
2079                 $pathtext .= $r->text;
2080                 $last = $r;
2081         }
2082         return $pathtext;
2083 }
2084
2085 =head1 INITIALIZATION METHODS
2086
2087 These are mostly for use by parsers.
2088
2089 =head2 make_witness_path( $witness )
2090
2091 Link the array of readings contained in $witness->path (and in
2092 $witness->uncorrected_path if it exists) into collation paths.
2093 Clear out the arrays when finished.
2094
2095 =head2 make_witness_paths
2096
2097 Call make_witness_path for all witnesses in the tradition.
2098
2099 =cut
2100
2101 # For use when a collation is constructed from a base text and an apparatus.
2102 # We have the sequences of readings and just need to add path edges.
2103 # When we are done, clear out the witness path attributes, as they are no
2104 # longer needed.
2105 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2106
2107 sub make_witness_paths {
2108     my( $self ) = @_;
2109     foreach my $wit ( $self->tradition->witnesses ) {
2110         # say STDERR "Making path for " . $wit->sigil;
2111         $self->make_witness_path( $wit );
2112     }
2113 }
2114
2115 sub make_witness_path {
2116     my( $self, $wit ) = @_;
2117     my @chain = @{$wit->path};
2118     my $sig = $wit->sigil;
2119     # Add start and end if necessary
2120     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2121     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2122     foreach my $idx ( 0 .. $#chain-1 ) {
2123         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2124     }
2125     if( $wit->is_layered ) {
2126         @chain = @{$wit->uncorrected_path};
2127                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2128                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2129         foreach my $idx( 0 .. $#chain-1 ) {
2130             my $source = $chain[$idx];
2131             my $target = $chain[$idx+1];
2132             $self->add_path( $source, $target, $sig.$self->ac_label )
2133                 unless $self->has_path( $source, $target, $sig );
2134         }
2135     }
2136     $wit->clear_path;
2137     $wit->clear_uncorrected_path;
2138 }
2139
2140 =head2 calculate_ranks
2141
2142 Calculate the reading ranks (that is, their aligned positions relative
2143 to each other) for the graph.  This can only be called on linear collations.
2144
2145 =begin testing
2146
2147 use Text::Tradition;
2148
2149 my $cxfile = 't/data/Collatex-16.xml';
2150 my $t = Text::Tradition->new(
2151     'name'  => 'inline',
2152     'input' => 'CollateX',
2153     'file'  => $cxfile,
2154     );
2155 my $c = $t->collation;
2156
2157 # Make an svg
2158 my $table = $c->alignment_table;
2159 ok( $c->has_cached_table, "Alignment table was cached" );
2160 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2161 $c->calculate_ranks;
2162 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2163 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2164 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2165 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2166 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2167
2168 =end testing
2169
2170 =cut
2171
2172 sub calculate_ranks {
2173     my $self = shift;
2174     # Save the existing ranks, in case we need to invalidate the cached SVG.
2175     throw( "Cannot calculate ranks on a non-linear graph" )
2176         unless $self->linear;
2177     my %existing_ranks;
2178     map { $existing_ranks{$_} = $_->rank } $self->readings;
2179
2180     # Do the rankings based on the relationship equivalence graph, starting
2181     # with the start node.
2182     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2183
2184     # Transfer our rankings from the topological graph to the real one.
2185     foreach my $r ( $self->readings ) {
2186         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2187             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2188         } else {
2189                 # Die. Find the last rank we calculated.
2190                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2191                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2192                         $self->readings;
2193                 my $last = pop @all_defined;
2194             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2195         }
2196     }
2197     # Do we need to invalidate the cached data?
2198     if( $self->has_cached_table ) {
2199         foreach my $r ( $self->readings ) {
2200                 next if defined( $existing_ranks{$r} )
2201                         && $existing_ranks{$r} == $r->rank;
2202                 # Something has changed, so clear the cache
2203                 $self->_clear_cache;
2204                         # ...and recalculate the common readings.
2205                         $self->calculate_common_readings();
2206                 last;
2207         }
2208     }
2209         # The graph calculation information is now up to date.
2210         $self->_graphcalc_done(1);
2211 }
2212
2213 sub _clear_cache {
2214         my $self = shift;
2215         $self->wipe_table if $self->has_cached_table;
2216 }
2217
2218
2219 =head2 flatten_ranks
2220
2221 A convenience method for parsing collation data.  Searches the graph for readings
2222 with the same text at the same rank, and merges any that are found.
2223
2224 =cut
2225
2226 sub flatten_ranks {
2227     my ( $self, %args ) = shift;
2228     my %unique_rank_rdg;
2229     my $changed;
2230     foreach my $p ( $self->identical_readings( %args ) ) {
2231                 # say STDERR "Combining readings at same rank: @$p";
2232                 $changed = 1;
2233                 $self->merge_readings( @$p );
2234                 # TODO see if this now makes a common point.
2235     }
2236     # If we merged readings, the ranks are still fine but the alignment
2237     # table is wrong. Wipe it.
2238     $self->wipe_table() if $changed;
2239 }
2240
2241 =head2 identical_readings
2242 =head2 identical_readings( start => $startnode, end => $endnode )
2243 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2244
2245 Goes through the graph identifying all pairs of readings that appear to be
2246 identical, and therefore able to be merged into a single reading. Returns the
2247 relevant identical pairs. Can be restricted to run over only a part of the
2248 graph, specified either by node or by rank.
2249
2250 =cut
2251
2252 sub identical_readings {
2253         my ( $self, %args ) = @_;
2254     # Find where we should start and end.
2255     my $startrank = $args{startrank} || 0;
2256     if( $args{start} ) {
2257         throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2258                 && $self->reading( $args{start} )->has_rank;
2259         $startrank = $self->reading( $args{start} )->rank;
2260     }
2261     my $endrank = $args{endrank} || $self->end->rank;
2262     if( $args{end} ) {
2263         throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2264                 && $self->reading( $args{end} )->has_rank;
2265         $endrank = $self->reading( $args{end} )->rank;
2266     }
2267
2268     # Make sure the ranks are correct.
2269     unless( $self->_graphcalc_done ) {
2270         $self->calculate_ranks;
2271     }
2272     # Go through the readings looking for duplicates.
2273     my %unique_rank_rdg;
2274     my @pairs;
2275     foreach my $rdg ( $self->readings ) {
2276         next unless $rdg->has_rank;
2277         my $rk = $rdg->rank;
2278         next if $rk > $endrank || $rk < $startrank;
2279         my $key = $rk . "||" . $rdg->text;
2280         if( exists $unique_rank_rdg{$key} ) {
2281                 # Make sure they don't have different grammatical forms
2282                         my $ur = $unique_rank_rdg{$key};
2283                 if( $rdg->is_identical( $ur ) ) {
2284                                 push( @pairs, [ $ur, $rdg ] );
2285                         }
2286         } else {
2287             $unique_rank_rdg{$key} = $rdg;
2288         }
2289     }
2290
2291     return @pairs;
2292 }
2293
2294
2295 =head2 calculate_common_readings
2296
2297 Goes through the graph identifying the readings that appear in every witness
2298 (apart from those with lacunae at that spot.) Marks them as common and returns
2299 the list.
2300
2301 =begin testing
2302
2303 use Text::Tradition;
2304
2305 my $cxfile = 't/data/Collatex-16.xml';
2306 my $t = Text::Tradition->new(
2307     'name'  => 'inline',
2308     'input' => 'CollateX',
2309     'file'  => $cxfile,
2310     );
2311 my $c = $t->collation;
2312
2313 my @common = $c->calculate_common_readings();
2314 is( scalar @common, 8, "Found correct number of common readings" );
2315 my @marked = sort $c->common_readings();
2316 is( scalar @common, 8, "All common readings got marked as such" );
2317 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2318 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2319
2320 =end testing
2321
2322 =cut
2323
2324 sub calculate_common_readings {
2325         my $self = shift;
2326         my @common;
2327         map { $_->is_common( 0 ) } $self->readings;
2328         # Implicitly calls calculate_ranks
2329         my $table = $self->alignment_table;
2330         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2331                 my @row = map { $_->{'tokens'}->[$idx]
2332                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2333                                         @{$table->{'alignment'}};
2334                 my %hash;
2335                 foreach my $r ( @row ) {
2336                         if( $r ) {
2337                                 $hash{$r->id} = $r unless $r->is_meta;
2338                         } else {
2339                                 $hash{'UNDEF'} = $r;
2340                         }
2341                 }
2342                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2343                         my( $r ) = values %hash;
2344                         $r->is_common( 1 );
2345                         push( @common, $r );
2346                 }
2347         }
2348         return @common;
2349 }
2350
2351 =head2 text_from_paths
2352
2353 Calculate the text array for all witnesses from the path, for later consistency
2354 checking.  Only to be used if there is no non-graph-based way to know the
2355 original texts.
2356
2357 =cut
2358
2359 sub text_from_paths {
2360         my $self = shift;
2361     foreach my $wit ( $self->tradition->witnesses ) {
2362         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2363         my @text;
2364         foreach my $r ( @readings ) {
2365                 next if $r->is_meta;
2366                 push( @text, $r->text );
2367         }
2368         $wit->text( \@text );
2369         if( $wit->is_layered ) {
2370                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2371                                                                                                   $wit->sigil.$self->ac_label );
2372                         my @uctext;
2373                         foreach my $r ( @ucrdgs ) {
2374                                 next if $r->is_meta;
2375                                 push( @uctext, $r->text );
2376                         }
2377                         $wit->layertext( \@uctext );
2378         }
2379     }
2380 }
2381
2382 =head1 UTILITY FUNCTIONS
2383
2384 =head2 common_predecessor( $reading_a, $reading_b )
2385
2386 Find the last reading that occurs in sequence before both the given readings.
2387 At the very least this should be $self->start.
2388
2389 =head2 common_successor( $reading_a, $reading_b )
2390
2391 Find the first reading that occurs in sequence after both the given readings.
2392 At the very least this should be $self->end.
2393
2394 =begin testing
2395
2396 use Text::Tradition;
2397
2398 my $cxfile = 't/data/Collatex-16.xml';
2399 my $t = Text::Tradition->new(
2400     'name'  => 'inline',
2401     'input' => 'CollateX',
2402     'file'  => $cxfile,
2403     );
2404 my $c = $t->collation;
2405
2406 is( $c->common_predecessor( 'n24', 'n23' )->id,
2407     'n20', "Found correct common predecessor" );
2408 is( $c->common_successor( 'n24', 'n23' )->id,
2409     '__END__', "Found correct common successor" );
2410
2411 is( $c->common_predecessor( 'n19', 'n17' )->id,
2412     'n16', "Found correct common predecessor for readings on same path" );
2413 is( $c->common_successor( 'n21', 'n10' )->id,
2414     '__END__', "Found correct common successor for readings on same path" );
2415
2416 =end testing
2417
2418 =cut
2419
2420 ## Return the closest reading that is a predecessor of both the given readings.
2421 sub common_predecessor {
2422         my $self = shift;
2423         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2424         return $self->_common_in_path( $r1, $r2, 'predecessors' );
2425 }
2426
2427 sub common_successor {
2428         my $self = shift;
2429         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2430         return $self->_common_in_path( $r1, $r2, 'successors' );
2431 }
2432
2433
2434 # TODO think about how to do this without ranks...
2435 sub _common_in_path {
2436         my( $self, $r1, $r2, $dir ) = @_;
2437         my $iter = $self->end->rank;
2438         my @candidates;
2439         my @last_r1 = ( $r1 );
2440         my @last_r2 = ( $r2 );
2441         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2442         my %all_seen;
2443         # say STDERR "Finding common $dir for $r1, $r2";
2444         while( !@candidates ) {
2445                 last unless $iter--;  # Avoid looping infinitely
2446                 # Iterate separately down the graph from r1 and r2
2447                 my( @new_lc1, @new_lc2 );
2448                 foreach my $lc ( @last_r1 ) {
2449                         foreach my $p ( $lc->$dir ) {
2450                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2451                                         # say STDERR "Path candidate $p from $lc";
2452                                         push( @candidates, $p );
2453                                 } elsif( !$all_seen{$p->id} ) {
2454                                         $all_seen{$p->id} = 'r1';
2455                                         push( @new_lc1, $p );
2456                                 }
2457                         }
2458                 }
2459                 foreach my $lc ( @last_r2 ) {
2460                         foreach my $p ( $lc->$dir ) {
2461                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2462                                         # say STDERR "Path candidate $p from $lc";
2463                                         push( @candidates, $p );
2464                                 } elsif( !$all_seen{$p->id} ) {
2465                                         $all_seen{$p->id} = 'r2';
2466                                         push( @new_lc2, $p );
2467                                 }
2468                         }
2469                 }
2470                 @last_r1 = @new_lc1;
2471                 @last_r2 = @new_lc2;
2472         }
2473         my @answer = sort { $a->rank <=> $b->rank } @candidates;
2474         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2475 }
2476
2477 sub throw {
2478         Text::Tradition::Error->throw(
2479                 'ident' => 'Collation error',
2480                 'message' => $_[0],
2481                 );
2482 }
2483
2484 no Moose;
2485 __PACKAGE__->meta->make_immutable;
2486
2487 =head1 BUGS/TODO
2488
2489 =over
2490
2491 =item * Rework XML serialization in a more modular way
2492
2493 =back
2494
2495 =head1 LICENSE
2496
2497 This package is free software and is provided "as is" without express
2498 or implied warranty.  You can redistribute it and/or modify it under
2499 the same terms as Perl itself.
2500
2501 =head1 AUTHOR
2502
2503 Tara L Andrews E<lt>aurum@cpan.orgE<gt>