base/lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use JSON qw/ to_json /;
  10 use Text::CSV;
  11 use Text::Tradition::Collation::Data;
  12 use Text::Tradition::Collation::Reading;
  13 use Text::Tradition::Collation::RelationshipStore;
  14 use Text::Tradition::Error;
  15 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  16 use XML::LibXML;
  17 use XML::LibXML::XPathContext;
  18 use Moose;
  19
  20 has _data => (
  21         isa      => 'Text::Tradition::Collation::Data',
  22         is       => 'ro',
  23         required => 1,
  24         handles  => [ qw(
  25                 sequence
  26                 paths
  27                 _set_relations
  28                 relations
  29                 _set_start
  30                 _set_end
  31                 ac_label
  32                 has_cached_table
  33                 relationships
  34                 relationship_types
  35                 related_readings
  36                 get_relationship
  37                 del_relationship
  38                 equivalence
  39                 equivalence_graph
  40                 readings
  41                 reading
  42                 _add_reading
  43                 del_reading
  44                 has_reading
  45                 wit_list_separator
  46                 baselabel
  47                 linear
  48                 wordsep
  49                 start
  50                 end
  51                 cached_table
  52                 _graphcalc_done
  53                 has_cached_svg
  54                 wipe_table
  55         )]
  56 );
  57
  58 has 'tradition' => (
  59     is => 'ro',
  60     isa => 'Text::Tradition',
  61     writer => '_set_tradition',
  62     weak_ref => 1,
  63     );
  64
  65 =encoding utf8
  66
  67 =head1 NAME
  68
  69 Text::Tradition::Collation - a software model for a text collation
  70
  71 =head1 SYNOPSIS
  72
  73   use Text::Tradition;
  74   my $t = Text::Tradition->new(
  75     'name' => 'this is a text',
  76     'input' => 'TEI',
  77     'file' => '/path/to/tei_parallel_seg_file.xml' );
  78
  79   my $c = $t->collation;
  80   my @readings = $c->readings;
  81   my @paths = $c->paths;
  82   my @relationships = $c->relationships;
  83
  84   my $svg_variant_graph = $t->collation->as_svg();
  85
  86 =head1 DESCRIPTION
  87
  88 Text::Tradition is a library for representation and analysis of collated
  89 texts, particularly medieval ones.  The Collation is the central feature of
  90 a Tradition, where the text, its sequence of readings, and its relationships
  91 between readings are actually kept.
  92
  93 =head1 CONSTRUCTOR
  94
  95 =head2 new
  96
  97 The constructor.  Takes a hash or hashref of the following arguments:
  98
  99 =over
 100
 101 =item * tradition - The Text::Tradition object to which the collation
 102 belongs. Required.
 103
 104 =item * linear - Whether the collation should be linear; that is, whether
 105 transposed readings should be treated as two linked readings rather than one,
 106 and therefore whether the collation graph is acyclic.  Defaults to true.
 107
 108 =item * baselabel - The default label for the path taken by a base text
 109 (if any). Defaults to 'base text'.
 110
 111 =item * wit_list_separator - The string to join a list of witnesses for
 112 purposes of making labels in display graphs.  Defaults to ', '.
 113
 114 =item * ac_label - The extra label to tack onto a witness sigil when
 115 representing another layer of path for the given witness - that is, when
 116 a text has more than one possible reading due to scribal corrections or
 117 the like.  Defaults to ' (a.c.)'.
 118
 119 =item * wordsep - The string used to separate words in the original text.
 120 Defaults to ' '.
 121
 122 =back
 123
 124 =head1 ACCESSORS
 125
 126 =head2 tradition
 127
 128 =head2 linear
 129
 130 =head2 wit_list_separator
 131
 132 =head2 baselabel
 133
 134 =head2 ac_label
 135
 136 =head2 wordsep
 137
 138 Simple accessors for collation attributes.
 139
 140 =head2 start
 141
 142 The meta-reading at the start of every witness path.
 143
 144 =head2 end
 145
 146 The meta-reading at the end of every witness path.
 147
 148 =head2 readings
 149
 150 Returns all Reading objects in the graph.
 151
 152 =head2 reading( $id )
 153
 154 Returns the Reading object corresponding to the given ID.
 155
 156 =head2 add_reading( $reading_args )
 157
 158 Adds a new reading object to the collation.
 159 See L<Text::Tradition::Collation::Reading> for the available arguments.
 160
 161 =head2 del_reading( $object_or_id )
 162
 163 Removes the given reading from the collation, implicitly removing its
 164 paths and relationships.
 165
 166 =head2 has_reading( $id )
 167
 168 Predicate to see whether a given reading ID is in the graph.
 169
 170 =head2 reading_witnesses( $object_or_id )
 171
 172 Returns a list of sigils whose witnesses contain the reading.
 173
 174 =head2 paths
 175
 176 Returns all reading paths within the document - that is, all edges in the
 177 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 178
 179 =head2 add_path( $source, $target, $sigil )
 180
 181 Links the given readings in the collation in sequence, under the given witness
 182 sigil.  The readings may be specified by object or ID.
 183
 184 =head2 del_path( $source, $target, $sigil )
 185
 186 Links the given readings in the collation in sequence, under the given witness
 187 sigil.  The readings may be specified by object or ID.
 188
 189 =head2 has_path( $source, $target );
 190
 191 Returns true if the two readings are linked in sequence in any witness.
 192 The readings may be specified by object or ID.
 193
 194 =head2 relationships
 195
 196 Returns all Relationship objects in the collation.
 197
 198 =head2 add_relationship( $reading, $other_reading, $options )
 199
 200 Adds a new relationship of the type given in $options between the two readings,
 201 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 202 where $status is true on success, and @vectors is a list of relationship edges
 203 that were ultimately added.
 204 See L<Text::Tradition::Collation::Relationship> for the available options.
 205
 206 =cut
 207
 208 sub BUILDARGS {
 209         my ( $class, @args ) = @_;
 210         my %args = @args == 1 ? %{ $args[0] } : @args;
 211         # TODO determine these from the Moose::Meta object
 212         my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
 213                 linear wordsep start end cached_table _graphcalc_done);
 214         my %data_args;
 215         for my $attr (@delegate_attrs) {
 216                 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
 217         }
 218         $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
 219         return \%args;
 220 }
 221
 222 sub BUILD {
 223     my $self = shift;
 224     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 225     $self->_set_start( $self->add_reading(
 226         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 227     $self->_set_end( $self->add_reading(
 228         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 229 }
 230
 231 =head2 register_relationship_type( %relationship_definition )
 232
 233 Add a relationship type definition to this collation. The argument can be either a
 234 hash or a hashref, defining the properties of the relationship. For relationship types
 235 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
 236
 237 =head2 get_relationship_type( $relationship_name )
 238
 239 Retrieve the RelationshipType object for the relationship with the given name.
 240
 241 =cut
 242
 243 sub register_relationship_type {
 244         my $self = shift;
 245         my %args = @_ == 1 ? %{$_[0]} : @_;
 246         if( $self->relations->has_type( $args{name} ) ) {
 247                 throw( 'Relationship type ' . $args{name} . ' already registered' );
 248         }
 249         $self->relations->add_type( %args );
 250 }
 251
 252 sub get_relationship_type {
 253         my( $self, $name ) = @_;
 254                 return $self->relations->has_type( $name )
 255                         ? $self->relations->type( $name ) : undef;
 256 }
 257
 258 ### Reading construct/destruct functions
 259
 260 sub add_reading {
 261         my( $self, $reading ) = @_;
 262         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 263                 my %args = %$reading;
 264                 if( $args{'init'} ) {
 265                         # If we are initializing an empty collation, don't assume that we
 266                         # have set a tradition.
 267                         delete $args{'init'};
 268                 } elsif( $self->tradition->can('language') && $self->tradition->has_language
 269                         && !exists $args{'language'} ) {
 270                         $args{'language'} = $self->tradition->language;
 271                 }
 272                 $reading = Text::Tradition::Collation::Reading->new(
 273                         'collation' => $self,
 274                         %args );
 275         }
 276         # First check to see if a reading with this ID exists.
 277         if( $self->reading( $reading->id ) ) {
 278                 throw( "Collation already has a reading with id " . $reading->id );
 279         }
 280         $self->_graphcalc_done(0);
 281         $self->_add_reading( $reading->id => $reading );
 282         # Once the reading has been added, put it in both graphs.
 283         $self->sequence->add_vertex( $reading->id );
 284         $self->relations->add_reading( $reading->id );
 285         return $reading;
 286 };
 287
 288 around del_reading => sub {
 289         my $orig = shift;
 290         my $self = shift;
 291         my $arg = shift;
 292
 293         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 294                 $arg = $arg->id;
 295         }
 296         # Remove the reading from the graphs.
 297         $self->_graphcalc_done(0);
 298         $self->_clear_cache; # Explicitly clear caches to GC the reading
 299         $self->sequence->delete_vertex( $arg );
 300         $self->relations->delete_reading( $arg );
 301
 302         # Carry on.
 303         $self->$orig( $arg );
 304 };
 305
 306 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 307
 308 Merges the $second reading into the $main one. If $concatenate is true, then
 309 the merged node will carry the text of both readings, concatenated with either
 310 $with_str (if specified) or a sensible default (the empty string if the
 311 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 312
 313 The first two arguments may be either readings or reading IDs.
 314
 315 =begin testing
 316
 317 use Text::Tradition;
 318 use TryCatch;
 319
 320 my $cxfile = 't/data/Collatex-16.xml';
 321 my $t = Text::Tradition->new(
 322     'name'  => 'inline',
 323     'input' => 'CollateX',
 324     'file'  => $cxfile,
 325     );
 326 my $c = $t->collation;
 327
 328 my $rno = scalar $c->readings;
 329 # Split n21 ('unto') for testing purposes
 330 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 331 my $old_r = $c->reading( 'n21' );
 332 $old_r->alter_text( 'to' );
 333 $c->del_path( 'n20', 'n21', 'A' );
 334 $c->add_path( 'n20', 'n21p0', 'A' );
 335 $c->add_path( 'n21p0', 'n21', 'A' );
 336 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
 337 $c->flatten_ranks();
 338 ok( $c->reading( 'n21p0' ), "New reading exists" );
 339 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 340
 341 # Combine n3 and n4 ( with his )
 342 $c->merge_readings( 'n3', 'n4', 1 );
 343 ok( !$c->reading('n4'), "Reading n4 is gone" );
 344 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 345
 346 # Collapse n9 and n10 ( rood / root )
 347 $c->merge_readings( 'n9', 'n10' );
 348 ok( !$c->reading('n10'), "Reading n10 is gone" );
 349 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 350
 351 # Try to combine n21 and n21p0. This should break.
 352 my $remaining = $c->reading('n21');
 353 $remaining ||= $c->reading('n22');  # one of these should still exist
 354 try {
 355         $c->merge_readings( 'n21p0', $remaining, 1 );
 356         ok( 0, "Bad reading merge changed the graph" );
 357 } catch( Text::Tradition::Error $e ) {
 358         like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
 359 } catch {
 360         ok( 0, "Unexpected error on bad reading merge: $@" );
 361 }
 362
 363 try {
 364         $c->calculate_ranks();
 365         ok( 1, "Graph is still evidently whole" );
 366 } catch( Text::Tradition::Error $e ) {
 367         ok( 0, "Caught a rank exception: " . $e->message );
 368 }
 369
 370 =end testing
 371
 372 =cut
 373
 374 sub merge_readings {
 375         my $self = shift;
 376
 377         # Sanity check
 378         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 379         my $mergemeta = $kept_obj->is_meta;
 380         throw( "Cannot merge meta and non-meta reading" )
 381                 unless ( $mergemeta && $del_obj->is_meta )
 382                         || ( !$mergemeta && !$del_obj->is_meta );
 383         if( $mergemeta ) {
 384                 throw( "Cannot merge with start or end node" )
 385                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 386                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 387                 throw( "Cannot combine text of meta readings" ) if $combine;
 388         }
 389         # We can only merge readings in a linear graph if:
 390         # - they are contiguous with only one edge between them, OR
 391         # - they are at equivalent ranks in the graph.
 392         if( $self->linear ) {
 393                 my @delpred = $del_obj->predecessors;
 394                 my @keptsuc = $kept_obj->successors;
 395                 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
 396                         && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
 397                         my( $is_ok, $msg ) = $self->relations->relationship_valid(
 398                                 $kept_obj, $del_obj, 'collated' );
 399                         unless( $is_ok ) {
 400                                 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
 401                         }
 402                 }
 403         }
 404
 405         # We only need the IDs for adding paths to the graph, not the reading
 406         # objects themselves.
 407         my $kept = $kept_obj->id;
 408         my $deleted = $del_obj->id;
 409         $self->_graphcalc_done(0);
 410
 411     # The kept reading should inherit the paths and the relationships
 412     # of the deleted reading.
 413         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 414                 my @vector = ( $kept );
 415                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 416                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 417                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 418                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 419                 $self->sequence->add_edge( @vector );
 420                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 421                 @wits{keys %$fwits} = values %$fwits;
 422                 $self->sequence->set_edge_attributes( @vector, \%wits );
 423         }
 424         $self->relations->merge_readings( $kept, $deleted, $combine );
 425
 426         # Do the deletion deed.
 427         if( $combine ) {
 428                 # Combine the text of the readings
 429                 my $joinstr = $combine_char;
 430                 unless( defined $joinstr ) {
 431                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 432                         $joinstr = $self->wordsep unless defined $joinstr;
 433                 }
 434                 $kept_obj->_combine( $del_obj, $joinstr );
 435         }
 436         $self->del_reading( $deleted );
 437 }
 438
 439 =head2 merge_related( @relationship_types )
 440
 441 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
 442
 443 WARNING: This operation cannot be undone.
 444
 445 =cut
 446
 447 =begin testing
 448
 449 use Test::Warn;
 450 use Text::Tradition;
 451 use TryCatch;
 452
 453 my $t;
 454 warnings_exist {
 455         $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
 456 } [qr/Cannot set relationship on a meta reading/],
 457         "Got expected relationship drop warning on parse";
 458
 459 my $c = $t->collation;
 460
 461 my %rdg_ids;
 462 map { $rdg_ids{$_} = 1 } $c->readings;
 463 $c->merge_related( 'orthographic' );
 464 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
 465         "Successfully collapsed orthographic variation" );
 466 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
 467 foreach my $rid ( keys %rdg_ids ) {
 468         my $exp = $rdg_ids{$rid};
 469         is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
 470                 ( $exp ? "retained" : "removed" ) );
 471 }
 472 ok( $c->linear, "Graph is still linear" );
 473 try {
 474         $c->calculate_ranks; # This should succeed
 475         ok( 1, "Can still calculate ranks on the new graph" );
 476 } catch {
 477         ok( 0, "Rank calculation on merged graph failed: $@" );
 478 }
 479
 480 # Now add some transpositions
 481 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
 482 $c->merge_related( 'transposition' );
 483 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
 484         "Transposed relationship is merged away" );
 485 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
 486 ok( !$c->linear, "Graph is no longer linear" );
 487 try {
 488         $c->calculate_ranks; # This should fail
 489         ok( 0, "Rank calculation happened on nonlinear graph?!" );
 490 } catch ( Text::Tradition::Error $e ) {
 491         is( $e->message, 'Cannot calculate ranks on a non-linear graph',
 492                 "Rank calculation on merged graph threw an error" );
 493 }
 494
 495 =end testing
 496
 497 =cut
 498
 499 # TODO: there should be a way to display merged without affecting the underlying data!
 500
 501 sub merge_related {
 502         my $self = shift;
 503         my %reltypehash;
 504         map { $reltypehash{$_} = 1 } @_;
 505
 506         # Set up the filter for finding related readings
 507         my $filter = sub {
 508                 exists $reltypehash{$_[0]->type};
 509         };
 510
 511         # Go through all readings looking for related ones
 512         foreach my $r ( $self->readings ) {
 513                 next unless $self->reading( "$r" ); # might have been deleted meanwhile
 514                 while( my @related = $self->related_readings( $r, $filter ) ) {
 515                         push( @related, $r );
 516                         @related = sort {
 517                                         scalar $b->witnesses <=> scalar $a->witnesses
 518                                 } @related;
 519                         my $keep = shift @related;
 520                         foreach my $delr ( @related ) {
 521                                 $self->linear( 0 )
 522                                         unless( $self->get_relationship( $keep, $delr )->colocated );
 523                                 $self->merge_readings( $keep, $delr );
 524                         }
 525                 }
 526         }
 527 }
 528
 529 =head2 compress_readings
 530
 531 Where possible in the graph, compresses plain sequences of readings into a
 532 single reading. The sequences must consist of readings with no
 533 relationships to other readings, with only a single witness path between
 534 them and no other witness paths from either that would skip the other. The
 535 readings must also not be marked as nonsense or bad grammar.
 536
 537 WARNING: This operation cannot be undone.
 538
 539 =cut
 540
 541 sub compress_readings {
 542         my $self = shift;
 543         # Sanity check: first save the original text of each witness.
 544         my %origtext;
 545         foreach my $wit ( $self->tradition->witnesses ) {
 546                 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
 547                 if( $wit->is_layered ) {
 548                         my $acsig = $wit->sigil . $self->ac_label;
 549                         $origtext{$acsig} = $self->path_text( $acsig );
 550                 }
 551         }
 552
 553         # Now do the deed.
 554         # Anywhere in the graph that there is a reading that joins only to a single
 555         # successor, and neither of these have any relationships, just join the two
 556         # readings.
 557         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 558                 # Now look for readings that can be joined to their successors.
 559                 next unless $rdg->is_combinable;
 560                 my %seen;
 561                 while( $self->sequence->successors( $rdg ) == 1 ) {
 562                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 563                         throw( "Infinite loop" ) if $seen{$next->id};
 564                         $seen{$next->id} = 1;
 565                         last if $self->sequence->predecessors( $next ) > 1;
 566                         last unless $next->is_combinable;
 567                         say "Joining readings $rdg and $next";
 568                         $self->merge_readings( $rdg, $next, 1 );
 569                 }
 570         }
 571
 572         # Finally, make sure we haven't screwed anything up.
 573         foreach my $wit ( $self->tradition->witnesses ) {
 574                 my $pathtext = $self->path_text( $wit->sigil );
 575                 throw( "Text differs for witness " . $wit->sigil )
 576                         unless $pathtext eq $origtext{$wit->sigil};
 577                 if( $wit->is_layered ) {
 578                         my $acsig = $wit->sigil . $self->ac_label;
 579                         $pathtext = $self->path_text( $acsig );
 580                         throw( "Layered text differs for witness " . $wit->sigil )
 581                                 unless $pathtext eq $origtext{$acsig};
 582                 }
 583         }
 584
 585         $self->relations->rebuild_equivalence();
 586         $self->calculate_ranks();
 587 }
 588
 589 # Helper function for manipulating the graph.
 590 sub _stringify_args {
 591         my( $self, $first, $second, @args ) = @_;
 592     $first = $first->id
 593         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 594     $second = $second->id
 595         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 596     return( $first, $second, @args );
 597 }
 598
 599 # Helper function for manipulating the graph.
 600 sub _objectify_args {
 601         my( $self, $first, $second, $arg ) = @_;
 602     $first = $self->reading( $first )
 603         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 604     $second = $self->reading( $second )
 605         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 606     return( $first, $second, $arg );
 607 }
 608
 609 =head2 duplicate_reading( $reading, @witlist )
 610
 611 Split the given reading into two, so that the new reading is in the path for
 612 the witnesses given in @witlist. If the result is that certain non-colocated
 613 relationships (e.g. transpositions) are no longer valid, these will be removed.
 614 Returns the newly-created reading.
 615
 616 =begin testing
 617
 618 use Test::More::UTF8;
 619 use Text::Tradition;
 620 use TryCatch;
 621
 622 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
 623 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
 624 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
 625
 626 my $sc = $st->collation;
 627 my $numr = 17;
 628 ok( $sc->reading('n131'), "Tradition has the affected reading" );
 629 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
 630 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
 631
 632 # Detach the erroneously collated reading
 633 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
 634 ok( $newr, "New reading was created" );
 635 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
 636 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
 637 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
 638 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
 639 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
 640
 641 # Check that the bad transposition is gone
 642 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
 643 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
 644
 645 # The collation should not be fixed
 646 my @pairs = $sc->identical_readings();
 647 is( scalar @pairs, 0, "Not re-collated yet" );
 648 # Fix the collation
 649 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
 650 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
 651 is( scalar @pairs, 3, "Found three more identical readings" );
 652 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
 653 $sc->flatten_ranks();
 654 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
 655
 656 # Check that we can't "duplicate" a reading with no wits or with all wits
 657 try {
 658         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
 659         ok( 0, "Reading duplication without witnesses throws an error" );
 660 } catch( Text::Tradition::Error $e ) {
 661         like( $e->message, qr/Must specify one or more witnesses/,
 662                 "Reading duplication without witnesses throws the expected error" );
 663 } catch {
 664         ok( 0, "Reading duplication without witnesses threw the wrong error" );
 665 }
 666
 667 try {
 668         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
 669         ok( 0, "Reading duplication with all witnesses throws an error" );
 670 } catch( Text::Tradition::Error $e ) {
 671         like( $e->message, qr/Cannot join all witnesses/,
 672                 "Reading duplication with all witnesses throws the expected error" );
 673 } catch {
 674         ok( 0, "Reading duplication with all witnesses threw the wrong error" );
 675 }
 676
 677 try {
 678         $sc->calculate_ranks();
 679         ok( 1, "Graph is still evidently whole" );
 680 } catch( Text::Tradition::Error $e ) {
 681         ok( 0, "Caught a rank exception: " . $e->message );
 682 }
 683
 684 =end testing
 685
 686 =cut
 687
 688 sub duplicate_reading {
 689         my( $self, $r, @wits ) = @_;
 690         # Check that we are not doing anything unwise.
 691         throw( "Must specify one or more witnesses for the duplicated reading" )
 692                 unless @wits;
 693         unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
 694                 $r = $self->reading( $r );
 695         }
 696         throw( "Cannot duplicate a meta-reading" )
 697                 if $r->is_meta;
 698         throw( "Cannot join all witnesses to the new reading" )
 699                 if scalar( @wits ) == scalar( $r->witnesses );
 700
 701         # Get all the reading attributes and duplicate them.
 702         my $rmeta = Text::Tradition::Collation::Reading->meta;
 703         my %args;
 704     foreach my $attr( $rmeta->get_all_attributes ) {
 705                 next if $attr->name =~ /^_/;
 706                 my $acc = $attr->get_read_method;
 707                 if( !$acc && $attr->has_applied_traits ) {
 708                         my $tr = $attr->applied_traits;
 709                         if( $tr->[0] =~ /::(Array|Hash)$/ ) {
 710                                 my $which = $1;
 711                                 my %methods = reverse %{$attr->handles};
 712                                 $acc = $methods{elements};
 713                                 $args{$attr->name} = $which eq 'Array'
 714                                         ? [ $r->$acc ] : { $r->$acc };
 715                         }
 716                 } else {
 717                         $args{$attr->name} = $r->$acc if $acc;
 718                 }
 719         }
 720         # By definition the new reading will no longer be common.
 721         $args{is_common} = 0;
 722         # The new reading also needs its own ID.
 723         $args{id} = $self->_generate_dup_id( $r->id );
 724
 725         # Try to make the new reading.
 726         my $newr = $self->add_reading( \%args );
 727         # The old reading is also no longer common.
 728         $r->is_common( 0 );
 729
 730         # For each of the witnesses, dissociate from the old reading and
 731         # associate with the new.
 732         foreach my $wit ( @wits ) {
 733                 my $prior = $self->prior_reading( $r, $wit );
 734                 my $next = $self->next_reading( $r, $wit );
 735                 $self->del_path( $prior, $r, $wit );
 736                 $self->add_path( $prior, $newr, $wit );
 737                 $self->del_path( $r, $next, $wit );
 738                 $self->add_path( $newr, $next, $wit );
 739         }
 740
 741         # If the graph is ranked, we need to look for relationships that are now
 742         # invalid (i.e. 'non-colocation' types that might now be colocated) and
 743         # remove them. If not, we can skip it.
 744         my $succ;
 745         my %rrk;
 746         my @deleted_relations;
 747         if( $self->end->has_rank ) {
 748                 # Find the point where we can stop checking
 749                 $succ = $self->common_successor( $r, $newr );
 750
 751                 # Hash the existing ranks
 752                 foreach my $rdg ( $self->readings ) {
 753                         $rrk{$rdg->id} = $rdg->rank;
 754                 }
 755                 # Calculate the new ranks
 756                 $self->calculate_ranks();
 757
 758                 # Check for invalid non-colocated relationships among changed-rank readings
 759                 # from where the ranks start changing up to $succ
 760                 my $lastrank = $succ->rank;
 761                 foreach my $rdg ( $self->readings ) {
 762                         next if $rdg->rank > $lastrank;
 763                         next if $rdg->rank == $rrk{$rdg->id};
 764                         my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
 765                         next unless @noncolo;
 766                         foreach my $nc ( @noncolo ) {
 767                                 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
 768                                         push( @deleted_relations, [ $rdg->id, $nc->id ] );
 769                                 }
 770                         }
 771                 }
 772         }
 773         return ( $newr, @deleted_relations );
 774 }
 775
 776 sub _generate_dup_id {
 777         my( $self, $rid ) = @_;
 778         my $newid;
 779         my $i = 0;
 780         while( !$newid ) {
 781                 $newid = $rid."_$i";
 782                 if( $self->has_reading( $newid ) ) {
 783                         $newid = '';
 784                         $i++;
 785                 }
 786         }
 787         return $newid;
 788 }
 789
 790 ### Path logic
 791
 792 sub add_path {
 793         my $self = shift;
 794
 795         # We only need the IDs for adding paths to the graph, not the reading
 796         # objects themselves.
 797     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 798
 799         $self->_graphcalc_done(0);
 800         # Connect the readings
 801         unless( $self->sequence->has_edge( $source, $target ) ) {
 802             $self->sequence->add_edge( $source, $target );
 803             $self->relations->add_equivalence_edge( $source, $target );
 804         }
 805     # Note the witness in question
 806     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 807 }
 808
 809 sub del_path {
 810         my $self = shift;
 811         my @args;
 812         if( ref( $_[0] ) eq 'ARRAY' ) {
 813                 my $e = shift @_;
 814                 @args = ( @$e, @_ );
 815         } else {
 816                 @args = @_;
 817         }
 818
 819         # We only need the IDs for removing paths from the graph, not the reading
 820         # objects themselves.
 821     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 822
 823         $self->_graphcalc_done(0);
 824         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 825                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 826         }
 827         unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
 828                 $self->sequence->delete_edge( $source, $target );
 829                 $self->relations->delete_equivalence_edge( $source, $target );
 830         }
 831 }
 832
 833
 834 # Extra graph-alike utility
 835 sub has_path {
 836         my $self = shift;
 837     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 838         return undef unless $self->sequence->has_edge( $source, $target );
 839         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 840 }
 841
 842 =head2 clear_witness( @sigil_list )
 843
 844 Clear the given witnesses out of the collation entirely, removing references
 845 to them in paths, and removing readings that belong only to them.  Should only
 846 be called via $tradition->del_witness.
 847
 848 =cut
 849
 850 sub clear_witness {
 851         my( $self, @sigils ) = @_;
 852
 853         $self->_graphcalc_done(0);
 854         # Clear the witness(es) out of the paths
 855         foreach my $e ( $self->paths ) {
 856                 foreach my $sig ( @sigils ) {
 857                         $self->del_path( $e, $sig );
 858                 }
 859         }
 860
 861         # Clear out the newly unused readings
 862         foreach my $r ( $self->readings ) {
 863                 unless( $self->reading_witnesses( $r ) ) {
 864                         $self->del_reading( $r );
 865                 }
 866         }
 867 }
 868
 869 sub add_relationship {
 870         my $self = shift;
 871     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 872     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 873     foreach my $v ( @vectors ) {
 874         next unless $self->get_relationship( $v )->colocated;
 875         if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
 876                 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
 877                         $self->_graphcalc_done(0);
 878                         $self->_clear_cache;
 879                         last;
 880         }
 881     }
 882     return @vectors;
 883 }
 884
 885 around qw/ get_relationship del_relationship / => sub {
 886         my $orig = shift;
 887         my $self = shift;
 888         my @args = @_;
 889         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 890                 @args = @{$_[0]};
 891         }
 892         my @stringargs = $self->_stringify_args( @args );
 893         $self->$orig( @stringargs );
 894 };
 895
 896 =head2 reading_witnesses( $reading )
 897
 898 Return a list of sigils corresponding to the witnesses in which the reading appears.
 899
 900 =cut
 901
 902 sub reading_witnesses {
 903         my( $self, $reading ) = @_;
 904         # We need only check either the incoming or the outgoing edges; I have
 905         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 906         if( $reading eq $self->start ) {
 907                 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
 908         }
 909         my %all_witnesses;
 910         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 911                 my $wits = $self->sequence->get_edge_attributes( @$e );
 912                 @all_witnesses{ keys %$wits } = 1;
 913         }
 914         my $acstr = $self->ac_label;
 915         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 916                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 917         }
 918         return keys %all_witnesses;
 919 }
 920
 921 =head1 OUTPUT METHODS
 922
 923 =head2 as_svg( \%options )
 924
 925 Returns an SVG string that represents the graph, via as_dot and graphviz.
 926 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 927
 928 =cut
 929
 930 sub as_svg {
 931     my( $self, $opts ) = @_;
 932     throw( "Need GraphViz installed to output SVG" )
 933         unless File::Which::which( 'dot' );
 934     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 935     $self->calculate_ranks()
 936         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 937         my @cmd = qw/dot -Tsvg/;
 938         my( $svg, $err );
 939         my $dotfile = File::Temp->new();
 940         ## USE FOR DEBUGGING
 941         # $dotfile->unlink_on_destroy(0);
 942         binmode $dotfile, ':utf8';
 943         print $dotfile $self->as_dot( $opts );
 944         push( @cmd, $dotfile->filename );
 945         run( \@cmd, ">", binary(), \$svg );
 946         $svg = decode_utf8( $svg );
 947         return $svg;
 948 }
 949
 950
 951 =head2 as_dot( \%options )
 952
 953 Returns a string that is the collation graph expressed in dot
 954 (i.e. GraphViz) format.  Options include:
 955
 956 =over 4
 957
 958 =item * from
 959
 960 =item * to
 961
 962 =item * color_common
 963
 964 =back
 965
 966 =cut
 967
 968 sub as_dot {
 969     my( $self, $opts ) = @_;
 970     my $startrank = $opts->{'from'} if $opts;
 971     my $endrank = $opts->{'to'} if $opts;
 972     my $color_common = $opts->{'color_common'} if $opts;
 973     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 974        && $self->end->rank > 100;
 975     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 976
 977     # Check the arguments
 978     if( $startrank ) {
 979         return if $endrank && $startrank > $endrank;
 980         return if $startrank > $self->end->rank;
 981         }
 982         if( defined $endrank ) {
 983                 return if $endrank < 0;
 984                 $endrank = undef if $endrank == $self->end->rank;
 985         }
 986
 987     my $graph_name = $self->tradition->name;
 988     $graph_name =~ s/[^\w\s]//g;
 989     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 990
 991     my %graph_attrs = (
 992         'rankdir' => 'LR',
 993         'bgcolor' => 'none',
 994         );
 995     my %node_attrs = (
 996         'fontsize' => 14,
 997         'fillcolor' => 'white',
 998         'style' => 'filled',
 999         'shape' => 'ellipse'
1000         );
1001     my %edge_attrs = (
1002         'arrowhead' => 'open',
1003         'color' => '#000000',
1004         'fontcolor' => '#000000',
1005         );
1006
1007     my $dot = sprintf( "digraph %s {\n", $graph_name );
1008     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1009     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1010
1011         # Output substitute start/end readings if necessary
1012         if( $startrank ) {
1013                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1014         }
1015         if( $endrank ) {
1016                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1017         }
1018         if( $STRAIGHTENHACK ) {
1019                 ## HACK part 1
1020                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1021                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1022                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1023         }
1024         my %used;  # Keep track of the readings that actually appear in the graph
1025         # Sort the readings by rank if we have ranks; this speeds layout.
1026         my @all_readings = $self->end->has_rank
1027                 ? sort { $a->rank <=> $b->rank } $self->readings
1028                 : $self->readings;
1029         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1030     foreach my $reading ( @all_readings ) {
1031         # Only output readings within our rank range.
1032         next if $startrank && $reading->rank < $startrank;
1033         next if $endrank && $reading->rank > $endrank;
1034         $used{$reading->id} = 1;
1035         # Need not output nodes without separate labels
1036         next if $reading->id eq $reading->text;
1037         my $rattrs;
1038         my $label = $reading->text;
1039         unless( $label =~ /^[[:punct:]]+$/ ) {
1040                 $label .= '-' if $reading->join_next;
1041             $label = "-$label" if $reading->join_prior;
1042         }
1043         $label =~ s/\"/\\\"/g;
1044                 $rattrs->{'label'} = $label;
1045                 $rattrs->{'id'} = $reading->id;
1046                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1047         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1048     }
1049
1050         # Add the real edges.
1051     my @edges = $self->paths;
1052         my( %substart, %subend );
1053     foreach my $edge ( @edges ) {
1054         # Do we need to output this edge?
1055         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1056                 my $label = $self->_path_display_label( $opts,
1057                         $self->path_witnesses( $edge ) );
1058                         my $variables = { %edge_attrs, 'label' => $label };
1059
1060                         # Account for the rank gap if necessary
1061                         my $rank0 = $self->reading( $edge->[0] )->rank
1062                                 if $self->reading( $edge->[0] )->has_rank;
1063                         my $rank1 = $self->reading( $edge->[1] )->rank
1064                                 if $self->reading( $edge->[1] )->has_rank;
1065                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1066                                 $variables->{'minlen'} = $rank1 - $rank0;
1067                         }
1068
1069                         # EXPERIMENTAL: make edge width reflect no. of witnesses
1070                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1071                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1072
1073                         my $varopts = _dot_attr_string( $variables );
1074                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1075                                 $edge->[0], $edge->[1], $varopts );
1076         } elsif( $used{$edge->[0]} ) {
1077                 $subend{$edge->[0]} = $edge->[1];
1078         } elsif( $used{$edge->[1]} ) {
1079                 $substart{$edge->[1]} = $edge->[0];
1080         }
1081     }
1082
1083     # If we are asked to, add relationship links
1084     if( exists $opts->{show_relations} ) {
1085         my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1086         if( $filter eq 'transposition' ) {
1087                 $filter =~ qr/^transposition$/;
1088         }
1089         my %typecolors;
1090         my @types = sort( map { $_->name } $self->relations->types );
1091         if( exists $opts->{graphcolors} ) {
1092                 foreach my $tdx ( 0 .. $#types ) {
1093                         $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1094                 }
1095         } else {
1096                 map { $typecolors{$_} = '#FFA14F' } @types;
1097         }
1098         foreach my $redge ( $self->relationships ) {
1099                 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1100                                 my $rel = $self->get_relationship( $redge );
1101                                 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1102                                 my $variables = {
1103                                         arrowhead => 'none',
1104                                         color => $typecolors{$rel->type},
1105                                         constraint => 'false',
1106                                         penwidth => '3',
1107                                 };
1108                                 unless( exists $opts->{graphcolors} ) {
1109                                         $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1110                                 }
1111                                 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1112                                         $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1113                 }
1114         }
1115     }
1116
1117     # Add substitute start and end edges if necessary
1118     foreach my $node ( keys %substart ) {
1119         my $witstr = $self->_path_display_label( $opts,
1120                 $self->path_witnesses( $substart{$node}, $node ) );
1121         my $variables = { %edge_attrs, 'label' => $witstr };
1122         my $nrdg = $self->reading( $node );
1123         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1124                 # Substart is actually one lower than $startrank
1125                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1126         }
1127         my $varopts = _dot_attr_string( $variables );
1128         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1129         }
1130     foreach my $node ( keys %subend ) {
1131         my $witstr = $self->_path_display_label( $opts,
1132                 $self->path_witnesses( $node, $subend{$node} ) );
1133         my $variables = { %edge_attrs, 'label' => $witstr };
1134         my $varopts = _dot_attr_string( $variables );
1135         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1136         }
1137         # HACK part 2
1138         if( $STRAIGHTENHACK ) {
1139                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1140                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1141         }
1142
1143     $dot .= "}\n";
1144     return $dot;
1145 }
1146
1147 sub _dot_attr_string {
1148         my( $hash ) = @_;
1149         my @attrs;
1150         foreach my $k ( sort keys %$hash ) {
1151                 my $v = $hash->{$k};
1152                 push( @attrs, $k.'="'.$v.'"' );
1153         }
1154         return( '[ ' . join( ', ', @attrs ) . ' ]' );
1155 }
1156
1157 =head2 path_witnesses( $edge )
1158
1159 Returns the list of sigils whose witnesses are associated with the given edge.
1160 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1161
1162 =cut
1163
1164 sub path_witnesses {
1165         my( $self, @edge ) = @_;
1166         # If edge is an arrayref, cope.
1167         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1168                 my $e = shift @edge;
1169                 @edge = @$e;
1170         }
1171         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1172         return @wits;
1173 }
1174
1175 # Helper function. Make a display label for the given witnesses, showing a.c.
1176 # witnesses only where the main witness is not also in the list.
1177 sub _path_display_label {
1178         my $self = shift;
1179         my $opts = shift;
1180         my %wits;
1181         map { $wits{$_} = 1 } @_;
1182
1183         # If an a.c. wit is listed, remove it if the main wit is also listed.
1184         # Otherwise keep it for explicit listing.
1185         my $aclabel = $self->ac_label;
1186         my @disp_ac;
1187         foreach my $w ( sort keys %wits ) {
1188                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1189                         if( exists $wits{$1} ) {
1190                                 delete $wits{$w};
1191                         } else {
1192                                 push( @disp_ac, $w );
1193                         }
1194                 }
1195         }
1196
1197         if( $opts->{'explicit_wits'} ) {
1198                 return join( ', ', sort keys %wits );
1199         } else {
1200                 # See if we are in a majority situation.
1201                 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1202                 $maj = $maj > 5 ? $maj : 5;
1203                 if( scalar keys %wits > $maj ) {
1204                         unshift( @disp_ac, 'majority' );
1205                         return join( ', ', @disp_ac );
1206                 } else {
1207                         return join( ', ', sort keys %wits );
1208                 }
1209         }
1210 }
1211
1212 =head2 as_adjacency_list
1213
1214 Returns a JSON structure that represents the collation sequence graph.
1215
1216 =begin testing
1217
1218 use JSON qw/ from_json /;
1219 use Text::Tradition;
1220
1221 my $t = Text::Tradition->new(
1222         'input' => 'Self',
1223         'file' => 't/data/florilegium_graphml.xml' );
1224 my $c = $t->collation;
1225
1226 # Make a connection so we can test rank preservation
1227 $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1228
1229 # Create an adjacency list of the whole thing; test the output.
1230 my $adj_whole = from_json( $c->as_adjacency_list() );
1231 is( scalar @$adj_whole, scalar $c->readings(),
1232         "Same number of nodes in graph and adjacency list" );
1233 my @adj_whole_edges;
1234 map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1235 is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1236         "Same number of edges in graph and adjacency list" );
1237 # Find the reading whose rank should be preserved
1238 my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1239 my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1240 is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1241
1242 # Now create an adjacency list of just a portion. w76 to w122
1243 my $adj_part = from_json( $c->as_adjacency_list(
1244         { from => $c->reading('w76')->rank,
1245           to   => $c->reading('w122')->rank }));
1246 is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1247 my @adj_part_edges;
1248 map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1249 is( scalar @adj_part_edges, 58,
1250         "Same number of edges in partial graph and adjacency list" );
1251 # Check for consistency
1252 my %part_nodes;
1253 map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1254 foreach my $edge ( @adj_part_edges ) {
1255         my $testid = $edge->{id};
1256         ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1257 }
1258
1259 =end testing
1260
1261 =cut
1262
1263 sub as_adjacency_list {
1264         my( $self, $opts ) = @_;
1265         # Make a structure that contains all the nodes, the nodes they point to,
1266         # and the attributes of the edges that connect them.
1267         # [ { id: 'n0', label: 'Gallia', adjacent: [
1268         #               { id: 'n1', label: 'P Q' } ,
1269         #               { id: 'n2', label: 'R S', minlen: 2 } ] },
1270         #   { id: 'n1', label: 'est', adjacent: [ ... ] },
1271         #   ... ]
1272         my $startrank = $opts->{'from'} || 0;
1273         my $endrank = $opts->{'to'} || $self->end->rank;
1274
1275     $self->calculate_ranks()
1276         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1277         my $list = [];
1278         foreach my $rdg ( $self->readings ) {
1279                 my @successors;
1280                 my $phony = '';
1281                 # Figure out what the node's successors should be.
1282                 if( $rdg eq $self->start && $startrank > 0 ) {
1283                         # Connect the start node with all the nodes at startrank.
1284                         # Lacunas should be included only if the node really has that rank.
1285                         @successors = $self->readings_at_rank( $startrank, 1 );
1286                         $phony = 'start';
1287                 } elsif( $rdg->rank < $startrank
1288                                  || $rdg->rank > $endrank && $rdg ne $self->end ) {
1289                         next;
1290                 } else {
1291                         @successors = $rdg->successors;
1292                 }
1293                 # Make sure that the end node is at the end of the successors
1294                 # list if it is needed.
1295                 if( grep { $_ eq $self->end } @successors ) {
1296                         my @ts = grep { $_ ne $self->end } @successors;
1297                         @successors = ( @ts, $self->end );
1298                 } elsif ( grep { $_->rank > $endrank } @successors ) {
1299                         push( @successors, $self->end );
1300                 }
1301
1302                 my $listitem = { id => $rdg->id, label => $rdg->text };
1303                 my $adjacent = [];
1304                 my @endwits;
1305                 foreach my $succ ( @successors ) {
1306                         my @edgewits;
1307                         if( $phony eq 'start' ) {
1308                                 @edgewits = $succ->witnesses;
1309                         } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1310                                 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1311                         }
1312
1313                         if( $succ eq $self->end ) {
1314                                 @edgewits = @endwits;
1315                         } elsif( $succ->rank > $endrank ) {
1316                                 # These witnesses will point to 'end' instead, not to the
1317                                 # actual successor.
1318                                 push( @endwits, @edgewits );
1319                                 next;
1320                         }
1321                         my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1322                         my $edgedef = { id => $succ->id, label => $edgelabel };
1323                         my $rankoffset = $succ->rank - $rdg->rank;
1324                         if( $rankoffset > 1 and $succ ne $self->end ) {
1325                                 $edgedef->{minlen} = $rankoffset;
1326                         }
1327                         push( @$adjacent, $edgedef );
1328                 }
1329                 $listitem->{adjacent} = $adjacent;
1330                 push( @$list, $listitem );
1331         }
1332         return to_json( $list );
1333 }
1334
1335 =head2 as_graphml
1336
1337 Returns a GraphML representation of the collation.  The GraphML will contain
1338 two graphs. The first expresses the attributes of the readings and the witness
1339 paths that link them; the second expresses the relationships that link the
1340 readings.  This is the native transfer format for a tradition.
1341
1342 =begin testing
1343
1344 use Text::Tradition;
1345 use TryCatch;
1346
1347 my $READINGS = 311;
1348 my $PATHS = 361;
1349
1350 my $datafile = 't/data/florilegium_tei_ps.xml';
1351 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1352                                       'name' => 'test0',
1353                                       'file' => $datafile,
1354                                       'linear' => 1 );
1355
1356 ok( $tradition, "Got a tradition object" );
1357 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1358 ok( $tradition->collation, "Tradition has a collation" );
1359
1360 my $c = $tradition->collation;
1361 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1362 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1363 is( scalar $c->relationships, 0, "Collation has all relationships" );
1364
1365 # Add a few relationships
1366 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1367 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1368 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1369                                           'is_significant' => 'yes' } );
1370
1371 # Now write it to GraphML and parse it again.
1372
1373 my $graphml = $c->as_graphml;
1374 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1375 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1376 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1377 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1378 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1379 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1380
1381 # Now add a stemma, write to GraphML, and look at the output.
1382 SKIP: {
1383         skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1384         my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1385         is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1386         is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1387         $graphml = $c->as_graphml;
1388         like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1389 }
1390
1391 =end testing
1392
1393 =cut
1394
1395 ## TODO MOVE this to Tradition.pm and modularize it better
1396 sub as_graphml {
1397     my( $self, $options ) = @_;
1398         $self->calculate_ranks unless $self->_graphcalc_done;
1399
1400         my $start = $options->{'from'}
1401                 ? $self->reading( $options->{'from'} ) : $self->start;
1402         my $end = $options->{'to'}
1403                 ? $self->reading( $options->{'to'} ) : $self->end;
1404         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1405                 throw( 'Start node must be before end node' );
1406         }
1407         # The readings need to be ranked for this to work.
1408         $start = $self->start unless $start->has_rank;
1409         $end = $self->end unless $end->has_rank;
1410         my $rankoffset = 0;
1411         unless( $start eq $self->start ) {
1412                 $rankoffset = $start->rank - 1;
1413         }
1414         my %use_readings;
1415
1416     # Some namespaces
1417     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1418     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1419     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1420         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1421
1422     # Create the document and root node
1423     require XML::LibXML;
1424     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1425     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1426     $graphml->setDocumentElement( $root );
1427     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1428     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1429
1430     # List of attribute types to save on our objects and their corresponding
1431     # GraphML types
1432     my %save_types = (
1433         'Str' => 'string',
1434         'Int' => 'int',
1435         'Bool' => 'boolean',
1436         'ReadingID' => 'string',
1437         'RelationshipType' => 'string',
1438         'RelationshipScope' => 'string',
1439         'Ternary' => 'string',
1440     );
1441
1442     # Add the data keys for the graph. Include an extra key 'version' for the
1443     # GraphML output version.
1444     my %graph_data_keys;
1445     my $gdi = 0;
1446     my %graph_attributes = ( 'version' => 'string' );
1447         # Graph attributes include those of Tradition and those of Collation.
1448         my %gattr_from;
1449         # TODO Use meta introspection method from duplicate_reading to do this
1450         # instead of naming custom keys.
1451         my $tmeta = $self->tradition->meta;
1452         my $cmeta = $self->meta;
1453         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1454         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1455         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1456                 next if $attr->name =~ /^_/;
1457                 next unless $save_types{$attr->type_constraint->name};
1458                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1459         }
1460     # Extra custom keys for complex objects that should be saved in some form.
1461     # The subroutine should return a string, or undef/empty.
1462     if( $tmeta->has_method('stemmata') ) {
1463                 $graph_attributes{'stemmata'} = sub {
1464                         my @stemstrs;
1465                         map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1466                                 $self->tradition->stemmata;
1467                         join( "\n", @stemstrs );
1468                 };
1469         }
1470
1471         if( $tmeta->has_method('user') ) {
1472                 $graph_attributes{'user'} = sub {
1473                         $self->tradition->user ? $self->tradition->user->id : undef
1474                 };
1475         }
1476
1477     foreach my $datum ( sort keys %graph_attributes ) {
1478         $graph_data_keys{$datum} = 'dg'.$gdi++;
1479         my $key = $root->addNewChild( $graphml_ns, 'key' );
1480         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1481                 : $graph_attributes{$datum};
1482         $key->setAttribute( 'attr.name', $datum );
1483         $key->setAttribute( 'attr.type', $dtype );
1484         $key->setAttribute( 'for', 'graph' );
1485         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1486     }
1487
1488     # Add the data keys for reading nodes
1489     my %reading_attributes;
1490     my $rmeta = Text::Tradition::Collation::Reading->meta;
1491     foreach my $attr( $rmeta->get_all_attributes ) {
1492                 next if $attr->name =~ /^_/;
1493                 next unless $save_types{$attr->type_constraint->name};
1494                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1495         }
1496         if( $self->start->does('Text::Tradition::Morphology' ) ) {
1497                 # Extra custom key for the reading morphology
1498                 $reading_attributes{'lexemes'} = 'string';
1499         }
1500
1501     my %node_data_keys;
1502     my $ndi = 0;
1503     foreach my $datum ( sort keys %reading_attributes ) {
1504         $node_data_keys{$datum} = 'dn'.$ndi++;
1505         my $key = $root->addNewChild( $graphml_ns, 'key' );
1506         $key->setAttribute( 'attr.name', $datum );
1507         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1508         $key->setAttribute( 'for', 'node' );
1509         $key->setAttribute( 'id', $node_data_keys{$datum} );
1510     }
1511
1512     # Add the data keys for edges, that is, paths and relationships. Path
1513     # data does not come from a Moose class so is here manually.
1514     my $edi = 0;
1515     my %edge_data_keys;
1516     my %edge_attributes = (
1517         witness => 'string',                    # ID/label for a path
1518         extra => 'boolean',                             # Path key
1519         );
1520     my @path_attributes = keys %edge_attributes; # track our manual additions
1521     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1522     foreach my $attr( $pmeta->get_all_attributes ) {
1523                 next if $attr->name =~ /^_/;
1524                 next unless $save_types{$attr->type_constraint->name};
1525                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1526         }
1527     foreach my $datum ( sort keys %edge_attributes ) {
1528         $edge_data_keys{$datum} = 'de'.$edi++;
1529         my $key = $root->addNewChild( $graphml_ns, 'key' );
1530         $key->setAttribute( 'attr.name', $datum );
1531         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1532         $key->setAttribute( 'for', 'edge' );
1533         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1534     }
1535
1536     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1537     my $xmlidname = $self->tradition->name;
1538     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1539     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1540         $xmlidname = '_'.$xmlidname;
1541     }
1542     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1543     $sgraph->setAttribute( 'edgedefault', 'directed' );
1544     $sgraph->setAttribute( 'id', $xmlidname );
1545     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1546     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1547     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1548     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1549     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1550
1551     # Tradition/collation attribute data
1552     foreach my $datum ( keys %graph_attributes ) {
1553         my $value;
1554         if( $datum eq 'version' ) {
1555                 $value = '3.2';
1556         } elsif( ref( $graph_attributes{$datum} ) ) {
1557                 my $sub = $graph_attributes{$datum};
1558                 $value = &$sub();
1559         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1560                 $value = $self->tradition->$datum;
1561         } else {
1562                 $value = $self->$datum;
1563         }
1564                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1565         }
1566
1567     my $node_ctr = 0;
1568     my %node_hash;
1569     # Add our readings to the graph
1570     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1571         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1572                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1573         $use_readings{$n->id} = 1;
1574         # Add to the main graph
1575         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1576         my $node_xmlid = 'n' . $node_ctr++;
1577         $node_hash{ $n->id } = $node_xmlid;
1578         $node_el->setAttribute( 'id', $node_xmlid );
1579         foreach my $d ( keys %reading_attributes ) {
1580                 my $nval = $n->$d;
1581                 # Custom serialization
1582                 if( $d eq 'lexemes' ) {
1583                                 # If nval is a true value, we have lexemes so we need to
1584                                 # serialize them. Otherwise set nval to undef so that the
1585                                 # key is excluded from this reading.
1586                         $nval = $nval ? $n->_serialize_lexemes : undef;
1587                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1588                         $nval = undef;
1589                 }
1590                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1591                         # Adjust the ranks within the subgraph.
1592                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1593                                 : $nval - $rankoffset;
1594                 }
1595                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1596                         if defined $nval;
1597         }
1598     }
1599
1600     # Add the path edges to the sequence graph
1601     my $edge_ctr = 0;
1602     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1603         # We add an edge in the graphml for every witness in $e.
1604         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1605         my @edge_wits = sort $self->path_witnesses( $e );
1606         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1607         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1608         # Skip any path from start to end; that witness is not in the subgraph.
1609         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1610         foreach my $wit ( @edge_wits ) {
1611                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1612                                                                                 $node_hash{ $e->[0] },
1613                                                                                 $node_hash{ $e->[1] } );
1614                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1615                         $edge_el->setAttribute( 'source', $from );
1616                         $edge_el->setAttribute( 'target', $to );
1617                         $edge_el->setAttribute( 'id', $id );
1618
1619                         # It's a witness path, so add the witness
1620                         my $base = $wit;
1621                         my $key = $edge_data_keys{'witness'};
1622                         # Is this an ante-corr witness?
1623                         my $aclabel = $self->ac_label;
1624                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1625                                 # Keep the base witness
1626                                 $base = $1;
1627                                 # ...and record that this is an 'extra' reading path
1628                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1629                         }
1630                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1631                 }
1632         }
1633
1634         # Report the actual number of nodes and edges that went in
1635         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1636         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1637
1638         # Add the relationship graph to the XML
1639         map { delete $edge_data_keys{$_} } @path_attributes;
1640         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1641                 $node_data_keys{'id'}, \%edge_data_keys );
1642
1643     # Save and return the thing
1644     my $result = decode_utf8( $graphml->toString(1) );
1645     return $result;
1646 }
1647
1648 sub _add_graphml_data {
1649     my( $el, $key, $value ) = @_;
1650     return unless defined $value;
1651     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1652     $data_el->setAttribute( 'key', $key );
1653     $data_el->appendText( $value );
1654 }
1655
1656 =head2 as_csv
1657
1658 Returns a CSV alignment table representation of the collation graph, one
1659 row per witness (or witness uncorrected.)
1660
1661 =head2 as_tsv
1662
1663 Returns a tab-separated alignment table representation of the collation graph,
1664 one row per witness (or witness uncorrected.)
1665
1666 =begin testing
1667
1668 use Text::Tradition;
1669 use Text::CSV;
1670
1671 my $READINGS = 311;
1672 my $PATHS = 361;
1673 my $WITS = 13;
1674 my $WITAC = 4;
1675
1676 my $datafile = 't/data/florilegium_tei_ps.xml';
1677 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1678                                       'name' => 'test0',
1679                                       'file' => $datafile,
1680                                       'linear' => 1 );
1681
1682 my $c = $tradition->collation;
1683 # Export the thing to CSV
1684 my $csvstr = $c->as_csv();
1685 # Count the columns
1686 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1687 my @lines = split(/\n/, $csvstr );
1688 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1689 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1690 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1691 ok( @q_ac, "Found a layered witness" );
1692
1693 my $t2 = Text::Tradition->new( input => 'Tabular',
1694                                                            name => 'test2',
1695                                                            string => $csvstr,
1696                                                            sep_char => ',' );
1697 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1698 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1699
1700 # Now do it with TSV
1701 my $tsvstr = $c->as_tsv();
1702 my $t3 = Text::Tradition->new( input => 'Tabular',
1703                                                            name => 'test3',
1704                                                            string => $tsvstr,
1705                                                            sep_char => "\t" );
1706 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1707 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1708
1709 my $table = $c->alignment_table;
1710 my $noaccsv = $c->as_csv({ noac => 1 });
1711 my @noaclines = split(/\n/, $noaccsv );
1712 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1713 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1714 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1715
1716 my $safecsv = $c->as_csv({ safe_ac => 1});
1717 my @safelines = split(/\n/, $safecsv );
1718 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1719 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1720 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1721 ok( @q_ac, "Found a sanitized layered witness" );
1722 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1723
1724 # Test relationship collapse
1725 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1726 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1727
1728 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1729 my $t4 = Text::Tradition->new( input => 'Tabular',
1730                                                            name => 'test4',
1731                                                            string => $mergedtsv,
1732                                                            sep_char => "\t" );
1733 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1734 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1735
1736 # Test non-ASCII sigla
1737 my $t5 = Text::Tradition->new( input => 'Tabular',
1738                                                            name => 'nonascii',
1739                                                            file => 't/data/armexample.xlsx',
1740                                                            excel => 'xlsx' );
1741 my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1742 my @awitlines = split( /\n/, $awittsv );
1743 like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1744
1745 =end testing
1746
1747 =cut
1748
1749 sub _tabular {
1750     my( $self, $opts ) = @_;
1751     my $table = $self->alignment_table( $opts );
1752         my $csv_options = { binary => 1, quote_null => 0 };
1753         $csv_options->{'sep_char'} = $opts->{fieldsep};
1754         if( $opts->{fieldsep} eq "\t" ) {
1755                 # If it is really tab separated, nothing is an escape char.
1756                 $csv_options->{'quote_char'} = undef;
1757                 $csv_options->{'escape_char'} = '';
1758         }
1759     my $csv = Text::CSV->new( $csv_options );
1760     my @result;
1761
1762     # Make the header row
1763     my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1764     if( $opts->{ascii} ) {
1765         # TODO think of a fix for this
1766         throw( "Cannot currently produce ASCII sigla with witness layers" )
1767                 unless $opts->{noac};
1768         my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1769         @witnesses = @awits;
1770     }
1771     $csv->combine( @witnesses );
1772         push( @result, $csv->string );
1773
1774     # Make the rest of the rows
1775     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1776         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1777         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1778         # Quick and dirty collapse of requested relationship types
1779         if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1780                 # Now substitute the reading in the relevant index of @row
1781                 # for its merge-related reading
1782                 my %substitutes;
1783                 while( @rowobjs ) {
1784                         my $thisr = shift @rowobjs;
1785                         next unless $thisr;
1786                                 next if exists $substitutes{$thisr->{t}->text};
1787                                 # Make sure we don't have A <-> B substitutions.
1788                                 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1789                         foreach my $thatr ( @rowobjs ) {
1790                                 next unless $thatr;
1791                                 next if exists $substitutes{$thatr->{t}->text};
1792                                 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1793                                 next unless $ttrel;
1794                                 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1795                                 # If we have got this far then we need to merge them.
1796                                 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1797                         }
1798                 }
1799                 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1800         }
1801         $csv->combine( @row );
1802         push( @result, $csv->string );
1803     }
1804     return join( "\n", @result );
1805 }
1806
1807 sub as_csv {
1808         my $self = shift;
1809         my $opts = shift || {};
1810         $opts->{fieldsep} = ',';
1811         return $self->_tabular( $opts );
1812 }
1813
1814 sub as_tsv {
1815         my $self = shift;
1816         my $opts = shift || {};
1817         $opts->{fieldsep} = "\t";
1818         return $self->_tabular( $opts );
1819 }
1820
1821 =head2 alignment_table
1822
1823 Return a reference to an alignment table, in a slightly enhanced CollateX
1824 format which looks like this:
1825
1826  $table = { alignment => [ { witness => "SIGIL",
1827                              tokens => [ { t => "TEXT" }, ... ] },
1828                            { witness => "SIG2",
1829                              tokens => [ { t => "TEXT" }, ... ] },
1830                            ... ],
1831             length => TEXTLEN };
1832
1833 =cut
1834
1835 sub alignment_table {
1836     my( $self, $opts ) = @_;
1837     if( $self->has_cached_table ) {
1838                 return $self->cached_table
1839                         unless $opts->{noac} || $opts->{safe_ac};
1840     }
1841
1842     # Make sure we can do this
1843         throw( "Need a linear graph in order to make an alignment table" )
1844                 unless $self->linear;
1845     $self->calculate_ranks()
1846         unless $self->_graphcalc_done && $self->end->has_rank;
1847
1848     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1849     my @all_pos = ( 1 .. $self->end->rank - 1 );
1850     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1851         # say STDERR "Making witness row(s) for " . $wit->sigil;
1852         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1853         my @row = _make_witness_row( \@wit_path, \@all_pos );
1854         my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1855         $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1856         push( @{$table->{'alignment'}}, $witobj );
1857         if( $wit->is_layered && !$opts->{noac} ) {
1858                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1859                         $wit->sigil.$self->ac_label );
1860             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1861             my $witlabel = $opts->{safe_ac}
1862                 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1863             my $witacobj = { 'witness' => $witlabel,
1864                 'tokens' => \@ac_row };
1865             $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1866                         push( @{$table->{'alignment'}}, $witacobj );
1867         }
1868     }
1869     unless( $opts->{noac} || $opts->{safe_ac} ) {
1870             $self->cached_table( $table );
1871         }
1872     return $table;
1873 }
1874
1875 sub _make_witness_row {
1876     my( $path, $positions ) = @_;
1877     my %char_hash;
1878     map { $char_hash{$_} = undef } @$positions;
1879     my $debug = 0;
1880     foreach my $rdg ( @$path ) {
1881         say STDERR "rank " . $rdg->rank if $debug;
1882         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1883         $char_hash{$rdg->rank} = { 't' => $rdg };
1884     }
1885     my @row = map { $char_hash{$_} } @$positions;
1886     # Fill in lacuna markers for undef spots in the row
1887     my $last_el = shift @row;
1888     my @filled_row = ( $last_el );
1889     foreach my $el ( @row ) {
1890         # If we are using node reference, make the lacuna node appear many times
1891         # in the table.  If not, use the lacuna tag.
1892         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1893             $el = $last_el;
1894         }
1895         push( @filled_row, $el );
1896         $last_el = $el;
1897     }
1898     return @filled_row;
1899 }
1900
1901
1902 =head1 NAVIGATION METHODS
1903
1904 =head2 reading_sequence( $first, $last, $sigil, $backup )
1905
1906 Returns the ordered list of readings, starting with $first and ending
1907 with $last, for the witness given in $sigil. If a $backup sigil is
1908 specified (e.g. when walking a layered witness), it will be used wherever
1909 no $sigil path exists.  If there is a base text reading, that will be
1910 used wherever no path exists for $sigil or $backup.
1911
1912 =cut
1913
1914 # TODO Think about returning some lazy-eval iterator.
1915 # TODO Get rid of backup; we should know from what witness is whether we need it.
1916
1917 sub reading_sequence {
1918     my( $self, $start, $end, $witness ) = @_;
1919
1920     $witness = $self->baselabel unless $witness;
1921     my @readings = ( $start );
1922     my %seen;
1923     my $n = $start;
1924     while( $n && $n->id ne $end->id ) {
1925         if( exists( $seen{$n->id} ) ) {
1926             throw( "Detected loop for $witness at " . $n->id );
1927         }
1928         $seen{$n->id} = 1;
1929
1930         my $next = $self->next_reading( $n, $witness );
1931         unless( $next ) {
1932             throw( "Did not find any path for $witness from reading " . $n->id );
1933         }
1934         push( @readings, $next );
1935         $n = $next;
1936     }
1937     # Check that the last reading is our end reading.
1938     my $last = $readings[$#readings];
1939     throw( "Last reading found from " . $start->text .
1940         " for witness $witness is not the end!" ) # TODO do we get this far?
1941         unless $last->id eq $end->id;
1942
1943     return @readings;
1944 }
1945
1946 =head2 readings_at_rank( $rank )
1947
1948 Returns a list of readings at a given rank, taken from the alignment table.
1949
1950 =cut
1951
1952 sub readings_at_rank {
1953         my( $self, $rank, $nolacuna ) = @_;
1954         my $table = $self->alignment_table;
1955         # Table rank is real rank - 1.
1956         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1957         my %readings;
1958         foreach my $e ( @elements ) {
1959                 next unless ref( $e ) eq 'HASH';
1960                 next unless exists $e->{'t'};
1961                 my $rdg = $e->{'t'};
1962                 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
1963                 $readings{$e->{'t'}->id} = $e->{'t'};
1964         }
1965         return values %readings;
1966 }
1967
1968 =head2 next_reading( $reading, $sigil );
1969
1970 Returns the reading that follows the given reading along the given witness
1971 path.
1972
1973 =cut
1974
1975 sub next_reading {
1976     # Return the successor via the corresponding path.
1977     my $self = shift;
1978     my $answer = $self->_find_linked_reading( 'next', @_ );
1979         return undef unless $answer;
1980     return $self->reading( $answer );
1981 }
1982
1983 =head2 prior_reading( $reading, $sigil )
1984
1985 Returns the reading that precedes the given reading along the given witness
1986 path.
1987
1988 =cut
1989
1990 sub prior_reading {
1991     # Return the predecessor via the corresponding path.
1992     my $self = shift;
1993     my $answer = $self->_find_linked_reading( 'prior', @_ );
1994     return $self->reading( $answer );
1995 }
1996
1997 sub _find_linked_reading {
1998     my( $self, $direction, $node, $path ) = @_;
1999
2000     # Get a backup if we are dealing with a layered witness
2001     my $alt_path;
2002     my $aclabel = $self->ac_label;
2003     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2004         $alt_path = $1;
2005     }
2006
2007     my @linked_paths = $direction eq 'next'
2008         ? $self->sequence->edges_from( $node )
2009         : $self->sequence->edges_to( $node );
2010     return undef unless scalar( @linked_paths );
2011
2012     # We have to find the linked path that contains all of the
2013     # witnesses supplied in $path.
2014     my( @path_wits, @alt_path_wits );
2015     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2016     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2017     my $base_le;
2018     my $alt_le;
2019     foreach my $le ( @linked_paths ) {
2020         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2021             $base_le = $le;
2022         }
2023                 my @le_wits = sort $self->path_witnesses( $le );
2024                 if( _is_within( \@path_wits, \@le_wits ) ) {
2025                         # This is the right path.
2026                         return $direction eq 'next' ? $le->[1] : $le->[0];
2027                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2028                         $alt_le = $le;
2029                 }
2030     }
2031     # Got this far? Return the alternate path if it exists.
2032     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2033         if $alt_le;
2034
2035     # Got this far? Return the base path if it exists.
2036     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2037         if $base_le;
2038
2039     # Got this far? We have no appropriate path.
2040     warn "Could not find $direction node from " . $node->id
2041         . " along path $path";
2042     return undef;
2043 }
2044
2045 # Some set logic.
2046 sub _is_within {
2047     my( $set1, $set2 ) = @_;
2048     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2049     foreach my $el ( @$set1 ) {
2050         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2051     }
2052     return $ret;
2053 }
2054
2055 # Return the string that joins together a list of witnesses for
2056 # display on a single path.
2057 sub _witnesses_of_label {
2058     my( $self, $label ) = @_;
2059     my $regex = $self->wit_list_separator;
2060     my @answer = split( /\Q$regex\E/, $label );
2061     return @answer;
2062 }
2063
2064 =head2 common_readings
2065
2066 Returns the list of common readings in the graph (i.e. those readings that are
2067 shared by all non-lacunose witnesses.)
2068
2069 =cut
2070
2071 sub common_readings {
2072         my $self = shift;
2073         my @common = grep { $_->is_common } $self->readings;
2074         return @common;
2075 }
2076
2077 =head2 path_text( $sigil, [, $start, $end ] )
2078
2079 Returns the text of a witness (plus its backup, if we are using a layer)
2080 as stored in the collation.  The text is returned as a string, where the
2081 individual readings are joined with spaces and the meta-readings (e.g.
2082 lacunae) are omitted.  Optional specification of $start and $end allows
2083 the generation of a subset of the witness text.
2084
2085 =cut
2086
2087 sub path_text {
2088         my( $self, $wit, $start, $end ) = @_;
2089         $start = $self->start unless $start;
2090         $end = $self->end unless $end;
2091         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2092         my $pathtext = '';
2093         my $last;
2094         foreach my $r ( @path ) {
2095                 unless ( $r->join_prior || !$last || $last->join_next ) {
2096                         $pathtext .= ' ';
2097                 }
2098                 $pathtext .= $r->text;
2099                 $last = $r;
2100         }
2101         return $pathtext;
2102 }
2103
2104 =head1 INITIALIZATION METHODS
2105
2106 These are mostly for use by parsers.
2107
2108 =head2 make_witness_path( $witness )
2109
2110 Link the array of readings contained in $witness->path (and in
2111 $witness->uncorrected_path if it exists) into collation paths.
2112 Clear out the arrays when finished.
2113
2114 =head2 make_witness_paths
2115
2116 Call make_witness_path for all witnesses in the tradition.
2117
2118 =cut
2119
2120 # For use when a collation is constructed from a base text and an apparatus.
2121 # We have the sequences of readings and just need to add path edges.
2122 # When we are done, clear out the witness path attributes, as they are no
2123 # longer needed.
2124 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2125
2126 sub make_witness_paths {
2127     my( $self ) = @_;
2128     foreach my $wit ( $self->tradition->witnesses ) {
2129         # say STDERR "Making path for " . $wit->sigil;
2130         $self->make_witness_path( $wit );
2131     }
2132 }
2133
2134 sub make_witness_path {
2135     my( $self, $wit ) = @_;
2136     my @chain = @{$wit->path};
2137     my $sig = $wit->sigil;
2138     # Add start and end if necessary
2139     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2140     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2141     foreach my $idx ( 0 .. $#chain-1 ) {
2142         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2143     }
2144     if( $wit->is_layered ) {
2145         @chain = @{$wit->uncorrected_path};
2146                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2147                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2148         foreach my $idx( 0 .. $#chain-1 ) {
2149             my $source = $chain[$idx];
2150             my $target = $chain[$idx+1];
2151             $self->add_path( $source, $target, $sig.$self->ac_label )
2152                 unless $self->has_path( $source, $target, $sig );
2153         }
2154     }
2155     $wit->clear_path;
2156     $wit->clear_uncorrected_path;
2157 }
2158
2159 =head2 calculate_ranks
2160
2161 Calculate the reading ranks (that is, their aligned positions relative
2162 to each other) for the graph.  This can only be called on linear collations.
2163
2164 =begin testing
2165
2166 use Text::Tradition;
2167
2168 my $cxfile = 't/data/Collatex-16.xml';
2169 my $t = Text::Tradition->new(
2170     'name'  => 'inline',
2171     'input' => 'CollateX',
2172     'file'  => $cxfile,
2173     );
2174 my $c = $t->collation;
2175
2176 # Make an svg
2177 my $table = $c->alignment_table;
2178 ok( $c->has_cached_table, "Alignment table was cached" );
2179 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2180 $c->calculate_ranks;
2181 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2182 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2183 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2184 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2185 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2186
2187 =end testing
2188
2189 =cut
2190
2191 sub calculate_ranks {
2192     my $self = shift;
2193     # Save the existing ranks, in case we need to invalidate the cached SVG.
2194     throw( "Cannot calculate ranks on a non-linear graph" )
2195         unless $self->linear;
2196     my %existing_ranks;
2197     map { $existing_ranks{$_} = $_->rank } $self->readings;
2198
2199     # Do the rankings based on the relationship equivalence graph, starting
2200     # with the start node.
2201     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2202
2203     # Transfer our rankings from the topological graph to the real one.
2204     foreach my $r ( $self->readings ) {
2205         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2206             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2207         } else {
2208                 # Die. Find the last rank we calculated.
2209                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2210                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2211                         $self->readings;
2212                 my $last = pop @all_defined;
2213             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2214         }
2215     }
2216     # Do we need to invalidate the cached data?
2217     if( $self->has_cached_table ) {
2218         foreach my $r ( $self->readings ) {
2219                 next if defined( $existing_ranks{$r} )
2220                         && $existing_ranks{$r} == $r->rank;
2221                 # Something has changed, so clear the cache
2222                 $self->_clear_cache;
2223                         # ...and recalculate the common readings.
2224                         $self->calculate_common_readings();
2225                 last;
2226         }
2227     }
2228         # The graph calculation information is now up to date.
2229         $self->_graphcalc_done(1);
2230 }
2231
2232 sub _clear_cache {
2233         my $self = shift;
2234         $self->wipe_table if $self->has_cached_table;
2235 }
2236
2237
2238 =head2 flatten_ranks
2239
2240 A convenience method for parsing collation data.  Searches the graph for readings
2241 with the same text at the same rank, and merges any that are found.
2242
2243 =cut
2244
2245 sub flatten_ranks {
2246     my ( $self, %args ) = shift;
2247     my %unique_rank_rdg;
2248     my $changed;
2249     foreach my $p ( $self->identical_readings( %args ) ) {
2250                 # say STDERR "Combining readings at same rank: @$p";
2251                 $changed = 1;
2252                 $self->merge_readings( @$p );
2253                 # TODO see if this now makes a common point.
2254     }
2255     # If we merged readings, the ranks are still fine but the alignment
2256     # table is wrong. Wipe it.
2257     $self->wipe_table() if $changed;
2258 }
2259
2260 =head2 identical_readings
2261 =head2 identical_readings( start => $startnode, end => $endnode )
2262 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2263
2264 Goes through the graph identifying all pairs of readings that appear to be
2265 identical, and therefore able to be merged into a single reading. Returns the
2266 relevant identical pairs. Can be restricted to run over only a part of the
2267 graph, specified either by node or by rank.
2268
2269 =cut
2270
2271 sub identical_readings {
2272         my ( $self, %args ) = @_;
2273     # Find where we should start and end.
2274     my $startrank = $args{startrank} || 0;
2275     if( $args{start} ) {
2276         throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2277                 && $self->reading( $args{start} )->has_rank;
2278         $startrank = $self->reading( $args{start} )->rank;
2279     }
2280     my $endrank = $args{endrank} || $self->end->rank;
2281     if( $args{end} ) {
2282         throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2283                 && $self->reading( $args{end} )->has_rank;
2284         $endrank = $self->reading( $args{end} )->rank;
2285     }
2286
2287     # Make sure the ranks are correct.
2288     unless( $self->_graphcalc_done ) {
2289         $self->calculate_ranks;
2290     }
2291     # Go through the readings looking for duplicates.
2292     my %unique_rank_rdg;
2293     my @pairs;
2294     foreach my $rdg ( $self->readings ) {
2295         next unless $rdg->has_rank;
2296         my $rk = $rdg->rank;
2297         next if $rk > $endrank || $rk < $startrank;
2298         my $key = $rk . "||" . $rdg->text;
2299         if( exists $unique_rank_rdg{$key} ) {
2300                 # Make sure they don't have different grammatical forms
2301                         my $ur = $unique_rank_rdg{$key};
2302                 if( $rdg->is_identical( $ur ) ) {
2303                                 push( @pairs, [ $ur, $rdg ] );
2304                         }
2305         } else {
2306             $unique_rank_rdg{$key} = $rdg;
2307         }
2308     }
2309
2310     return @pairs;
2311 }
2312
2313
2314 =head2 calculate_common_readings
2315
2316 Goes through the graph identifying the readings that appear in every witness
2317 (apart from those with lacunae at that spot.) Marks them as common and returns
2318 the list.
2319
2320 =begin testing
2321
2322 use Text::Tradition;
2323
2324 my $cxfile = 't/data/Collatex-16.xml';
2325 my $t = Text::Tradition->new(
2326     'name'  => 'inline',
2327     'input' => 'CollateX',
2328     'file'  => $cxfile,
2329     );
2330 my $c = $t->collation;
2331
2332 my @common = $c->calculate_common_readings();
2333 is( scalar @common, 8, "Found correct number of common readings" );
2334 my @marked = sort $c->common_readings();
2335 is( scalar @common, 8, "All common readings got marked as such" );
2336 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2337 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2338
2339 =end testing
2340
2341 =cut
2342
2343 sub calculate_common_readings {
2344         my $self = shift;
2345         my @common;
2346         map { $_->is_common( 0 ) } $self->readings;
2347         # Implicitly calls calculate_ranks
2348         my $table = $self->alignment_table;
2349         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2350                 my @row = map { $_->{'tokens'}->[$idx]
2351                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2352                                         @{$table->{'alignment'}};
2353                 my %hash;
2354                 foreach my $r ( @row ) {
2355                         if( $r ) {
2356                                 $hash{$r->id} = $r unless $r->is_meta;
2357                         } else {
2358                                 $hash{'UNDEF'} = $r;
2359                         }
2360                 }
2361                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2362                         my( $r ) = values %hash;
2363                         $r->is_common( 1 );
2364                         push( @common, $r );
2365                 }
2366         }
2367         return @common;
2368 }
2369
2370 =head2 text_from_paths
2371
2372 Calculate the text array for all witnesses from the path, for later consistency
2373 checking.  Only to be used if there is no non-graph-based way to know the
2374 original texts.
2375
2376 =cut
2377
2378 sub text_from_paths {
2379         my $self = shift;
2380     foreach my $wit ( $self->tradition->witnesses ) {
2381         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2382         my @text;
2383         foreach my $r ( @readings ) {
2384                 next if $r->is_meta;
2385                 push( @text, $r->text );
2386         }
2387         $wit->text( \@text );
2388         if( $wit->is_layered ) {
2389                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2390                                                                                                   $wit->sigil.$self->ac_label );
2391                         my @uctext;
2392                         foreach my $r ( @ucrdgs ) {
2393                                 next if $r->is_meta;
2394                                 push( @uctext, $r->text );
2395                         }
2396                         $wit->layertext( \@uctext );
2397         }
2398     }
2399 }
2400
2401 =head1 UTILITY FUNCTIONS
2402
2403 =head2 common_predecessor( $reading_a, $reading_b )
2404
2405 Find the last reading that occurs in sequence before both the given readings.
2406 At the very least this should be $self->start.
2407
2408 =head2 common_successor( $reading_a, $reading_b )
2409
2410 Find the first reading that occurs in sequence after both the given readings.
2411 At the very least this should be $self->end.
2412
2413 =begin testing
2414
2415 use Text::Tradition;
2416
2417 my $cxfile = 't/data/Collatex-16.xml';
2418 my $t = Text::Tradition->new(
2419     'name'  => 'inline',
2420     'input' => 'CollateX',
2421     'file'  => $cxfile,
2422     );
2423 my $c = $t->collation;
2424
2425 is( $c->common_predecessor( 'n24', 'n23' )->id,
2426     'n20', "Found correct common predecessor" );
2427 is( $c->common_successor( 'n24', 'n23' )->id,
2428     '__END__', "Found correct common successor" );
2429
2430 is( $c->common_predecessor( 'n19', 'n17' )->id,
2431     'n16', "Found correct common predecessor for readings on same path" );
2432 is( $c->common_successor( 'n21', 'n10' )->id,
2433     '__END__', "Found correct common successor for readings on same path" );
2434
2435 =end testing
2436
2437 =cut
2438
2439 ## Return the closest reading that is a predecessor of both the given readings.
2440 sub common_predecessor {
2441         my $self = shift;
2442         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2443         return $self->_common_in_path( $r1, $r2, 'predecessors' );
2444 }
2445
2446 sub common_successor {
2447         my $self = shift;
2448         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2449         return $self->_common_in_path( $r1, $r2, 'successors' );
2450 }
2451
2452
2453 # TODO think about how to do this without ranks...
2454 sub _common_in_path {
2455         my( $self, $r1, $r2, $dir ) = @_;
2456         my $iter = $self->end->rank;
2457         my @candidates;
2458         my @last_r1 = ( $r1 );
2459         my @last_r2 = ( $r2 );
2460         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2461         my %all_seen;
2462         # say STDERR "Finding common $dir for $r1, $r2";
2463         while( !@candidates ) {
2464                 last unless $iter--;  # Avoid looping infinitely
2465                 # Iterate separately down the graph from r1 and r2
2466                 my( @new_lc1, @new_lc2 );
2467                 foreach my $lc ( @last_r1 ) {
2468                         foreach my $p ( $lc->$dir ) {
2469                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2470                                         # say STDERR "Path candidate $p from $lc";
2471                                         push( @candidates, $p );
2472                                 } elsif( !$all_seen{$p->id} ) {
2473                                         $all_seen{$p->id} = 'r1';
2474                                         push( @new_lc1, $p );
2475                                 }
2476                         }
2477                 }
2478                 foreach my $lc ( @last_r2 ) {
2479                         foreach my $p ( $lc->$dir ) {
2480                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2481                                         # say STDERR "Path candidate $p from $lc";
2482                                         push( @candidates, $p );
2483                                 } elsif( !$all_seen{$p->id} ) {
2484                                         $all_seen{$p->id} = 'r2';
2485                                         push( @new_lc2, $p );
2486                                 }
2487                         }
2488                 }
2489                 @last_r1 = @new_lc1;
2490                 @last_r2 = @new_lc2;
2491         }
2492         my @answer = sort { $a->rank <=> $b->rank } @candidates;
2493         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2494 }
2495
2496 sub throw {
2497         Text::Tradition::Error->throw(
2498                 'ident' => 'Collation error',
2499                 'message' => $_[0],
2500                 );
2501 }
2502
2503 no Moose;
2504 __PACKAGE__->meta->make_immutable;
2505
2506 =head1 BUGS/TODO
2507
2508 =over
2509
2510 =item * Rework XML serialization in a more modular way
2511
2512 =back
2513
2514 =head1 LICENSE
2515
2516 This package is free software and is provided "as is" without express
2517 or implied warranty.  You can redistribute it and/or modify it under
2518 the same terms as Perl itself.
2519
2520 =head1 AUTHOR
2521
2522 Tara L Andrews E<lt>aurum@cpan.orgE<gt>