base/lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use JSON qw/ to_json /;
  10 use Text::CSV;
  11 use Text::Tradition::Collation::Data;
  12 use Text::Tradition::Collation::Reading;
  13 use Text::Tradition::Collation::RelationshipStore;
  14 use Text::Tradition::Error;
  15 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  16 use XML::LibXML;
  17 use XML::LibXML::XPathContext;
  18 use Moose;
  19
  20 has _data => (
  21         isa      => 'Text::Tradition::Collation::Data',
  22         is       => 'ro',
  23         required => 1,
  24         handles  => [ qw(
  25                 sequence
  26                 paths
  27                 _set_relations
  28                 relations
  29                 _set_start
  30                 _set_end
  31                 ac_label
  32                 has_cached_table
  33                 relationships
  34                 relationship_types
  35                 related_readings
  36                 get_relationship
  37                 del_relationship
  38                 equivalence
  39                 equivalence_graph
  40                 readings
  41                 reading
  42                 _add_reading
  43                 del_reading
  44                 has_reading
  45                 wit_list_separator
  46                 baselabel
  47                 linear
  48                 wordsep
  49                 start
  50                 end
  51                 cached_table
  52                 _graphcalc_done
  53                 has_cached_svg
  54                 wipe_table
  55         )]
  56 );
  57
  58 has 'tradition' => (
  59     is => 'ro',
  60     isa => 'Text::Tradition',
  61     writer => '_set_tradition',
  62     weak_ref => 1,
  63     );
  64
  65 =encoding utf8
  66
  67 =head1 NAME
  68
  69 Text::Tradition::Collation - a software model for a text collation
  70
  71 =head1 SYNOPSIS
  72
  73   use Text::Tradition;
  74   my $t = Text::Tradition->new(
  75     'name' => 'this is a text',
  76     'input' => 'TEI',
  77     'file' => '/path/to/tei_parallel_seg_file.xml' );
  78
  79   my $c = $t->collation;
  80   my @readings = $c->readings;
  81   my @paths = $c->paths;
  82   my @relationships = $c->relationships;
  83
  84   my $svg_variant_graph = $t->collation->as_svg();
  85
  86 =head1 DESCRIPTION
  87
  88 Text::Tradition is a library for representation and analysis of collated
  89 texts, particularly medieval ones.  The Collation is the central feature of
  90 a Tradition, where the text, its sequence of readings, and its relationships
  91 between readings are actually kept.
  92
  93 =head1 CONSTRUCTOR
  94
  95 =head2 new
  96
  97 The constructor.  Takes a hash or hashref of the following arguments:
  98
  99 =over
 100
 101 =item * tradition - The Text::Tradition object to which the collation
 102 belongs. Required.
 103
 104 =item * linear - Whether the collation should be linear; that is, whether
 105 transposed readings should be treated as two linked readings rather than one,
 106 and therefore whether the collation graph is acyclic.  Defaults to true.
 107
 108 =item * baselabel - The default label for the path taken by a base text
 109 (if any). Defaults to 'base text'.
 110
 111 =item * wit_list_separator - The string to join a list of witnesses for
 112 purposes of making labels in display graphs.  Defaults to ', '.
 113
 114 =item * ac_label - The extra label to tack onto a witness sigil when
 115 representing another layer of path for the given witness - that is, when
 116 a text has more than one possible reading due to scribal corrections or
 117 the like.  Defaults to ' (a.c.)'.
 118
 119 =item * wordsep - The string used to separate words in the original text.
 120 Defaults to ' '.
 121
 122 =back
 123
 124 =head1 ACCESSORS
 125
 126 =head2 tradition
 127
 128 =head2 linear
 129
 130 =head2 wit_list_separator
 131
 132 =head2 baselabel
 133
 134 =head2 ac_label
 135
 136 =head2 wordsep
 137
 138 Simple accessors for collation attributes.
 139
 140 =head2 start
 141
 142 The meta-reading at the start of every witness path.
 143
 144 =head2 end
 145
 146 The meta-reading at the end of every witness path.
 147
 148 =head2 readings
 149
 150 Returns all Reading objects in the graph.
 151
 152 =head2 reading( $id )
 153
 154 Returns the Reading object corresponding to the given ID.
 155
 156 =head2 add_reading( $reading_args )
 157
 158 Adds a new reading object to the collation.
 159 See L<Text::Tradition::Collation::Reading> for the available arguments.
 160
 161 =head2 del_reading( $object_or_id )
 162
 163 Removes the given reading from the collation, implicitly removing its
 164 paths and relationships.
 165
 166 =head2 has_reading( $id )
 167
 168 Predicate to see whether a given reading ID is in the graph.
 169
 170 =head2 reading_witnesses( $object_or_id )
 171
 172 Returns a list of sigils whose witnesses contain the reading.
 173
 174 =head2 paths
 175
 176 Returns all reading paths within the document - that is, all edges in the
 177 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 178
 179 =head2 add_path( $source, $target, $sigil )
 180
 181 Links the given readings in the collation in sequence, under the given witness
 182 sigil.  The readings may be specified by object or ID.
 183
 184 =head2 del_path( $source, $target, $sigil )
 185
 186 Links the given readings in the collation in sequence, under the given witness
 187 sigil.  The readings may be specified by object or ID.
 188
 189 =head2 has_path( $source, $target );
 190
 191 Returns true if the two readings are linked in sequence in any witness.
 192 The readings may be specified by object or ID.
 193
 194 =head2 relationships
 195
 196 Returns all Relationship objects in the collation.
 197
 198 =head2 add_relationship( $reading, $other_reading, $options )
 199
 200 Adds a new relationship of the type given in $options between the two readings,
 201 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 202 where $status is true on success, and @vectors is a list of relationship edges
 203 that were ultimately added.
 204 See L<Text::Tradition::Collation::Relationship> for the available options.
 205
 206 =cut
 207
 208 sub BUILDARGS {
 209         my ( $class, @args ) = @_;
 210         my %args = @args == 1 ? %{ $args[0] } : @args;
 211         # TODO determine these from the Moose::Meta object
 212         my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
 213                 linear wordsep start end cached_table _graphcalc_done);
 214         my %data_args;
 215         for my $attr (@delegate_attrs) {
 216                 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
 217         }
 218         $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
 219         return \%args;
 220 }
 221
 222 sub BUILD {
 223     my $self = shift;
 224     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 225     $self->_set_start( $self->add_reading(
 226         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 227     $self->_set_end( $self->add_reading(
 228         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 229 }
 230
 231 =head2 register_relationship_type( %relationship_definition )
 232
 233 Add a relationship type definition to this collation. The argument can be either a
 234 hash or a hashref, defining the properties of the relationship. For relationship types
 235 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
 236
 237 =head2 get_relationship_type( $relationship_name )
 238
 239 Retrieve the RelationshipType object for the relationship with the given name.
 240
 241 =cut
 242
 243 sub register_relationship_type {
 244         my $self = shift;
 245         my %args = @_ == 1 ? %{$_[0]} : @_;
 246         if( $self->relations->has_type( $args{name} ) ) {
 247                 throw( 'Relationship type ' . $args{name} . ' already registered' );
 248         }
 249         $self->relations->add_type( %args );
 250 }
 251
 252 sub get_relationship_type {
 253         my( $self, $name ) = @_;
 254                 return $self->relations->has_type( $name )
 255                         ? $self->relations->type( $name ) : undef;
 256 }
 257
 258 ### Reading construct/destruct functions
 259
 260 sub add_reading {
 261         my( $self, $reading ) = @_;
 262         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 263                 my %args = %$reading;
 264                 if( $args{'init'} ) {
 265                         # If we are initializing an empty collation, don't assume that we
 266                         # have set a tradition.
 267                         delete $args{'init'};
 268                 } elsif( $self->tradition->can('language') && $self->tradition->has_language
 269                         && !exists $args{'language'} ) {
 270                         $args{'language'} = $self->tradition->language;
 271                 }
 272                 $reading = Text::Tradition::Collation::Reading->new(
 273                         'collation' => $self,
 274                         %args );
 275         }
 276         # First check to see if a reading with this ID exists.
 277         if( $self->reading( $reading->id ) ) {
 278                 throw( "Collation already has a reading with id " . $reading->id );
 279         }
 280         $self->_graphcalc_done(0);
 281         $self->_add_reading( $reading->id => $reading );
 282         # Once the reading has been added, put it in both graphs.
 283         $self->sequence->add_vertex( $reading->id );
 284         $self->relations->add_reading( $reading->id );
 285         return $reading;
 286 };
 287
 288 around del_reading => sub {
 289         my $orig = shift;
 290         my $self = shift;
 291         my $arg = shift;
 292
 293         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 294                 $arg = $arg->id;
 295         }
 296         # Remove the reading from the graphs.
 297         $self->_graphcalc_done(0);
 298         $self->_clear_cache; # Explicitly clear caches to GC the reading
 299         $self->sequence->delete_vertex( $arg );
 300         $self->relations->delete_reading( $arg );
 301
 302         # Carry on.
 303         $self->$orig( $arg );
 304 };
 305
 306 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 307
 308 Merges the $second reading into the $main one. If $concatenate is true, then
 309 the merged node will carry the text of both readings, concatenated with either
 310 $with_str (if specified) or a sensible default (the empty string if the
 311 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 312
 313 The first two arguments may be either readings or reading IDs.
 314
 315 =begin testing
 316
 317 use Text::Tradition;
 318 use TryCatch;
 319
 320 my $cxfile = 't/data/Collatex-16.xml';
 321 my $t = Text::Tradition->new(
 322     'name'  => 'inline',
 323     'input' => 'CollateX',
 324     'file'  => $cxfile,
 325     );
 326 my $c = $t->collation;
 327
 328 my $rno = scalar $c->readings;
 329 # Split n21 ('unto') for testing purposes
 330 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 331 my $old_r = $c->reading( 'n21' );
 332 $old_r->alter_text( 'to' );
 333 $c->del_path( 'n20', 'n21', 'A' );
 334 $c->add_path( 'n20', 'n21p0', 'A' );
 335 $c->add_path( 'n21p0', 'n21', 'A' );
 336 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
 337 $c->flatten_ranks();
 338 ok( $c->reading( 'n21p0' ), "New reading exists" );
 339 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 340
 341 # Combine n3 and n4 ( with his )
 342 $c->merge_readings( 'n3', 'n4', 1 );
 343 ok( !$c->reading('n4'), "Reading n4 is gone" );
 344 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 345
 346 # Collapse n9 and n10 ( rood / root )
 347 $c->merge_readings( 'n9', 'n10' );
 348 ok( !$c->reading('n10'), "Reading n10 is gone" );
 349 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 350
 351 # Try to combine n21 and n21p0. This should break.
 352 my $remaining = $c->reading('n21');
 353 $remaining ||= $c->reading('n22');  # one of these should still exist
 354 try {
 355         $c->merge_readings( 'n21p0', $remaining, 1 );
 356         ok( 0, "Bad reading merge changed the graph" );
 357 } catch( Text::Tradition::Error $e ) {
 358         like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
 359 } catch {
 360         ok( 0, "Unexpected error on bad reading merge: $@" );
 361 }
 362
 363 try {
 364         $c->calculate_ranks();
 365         ok( 1, "Graph is still evidently whole" );
 366 } catch( Text::Tradition::Error $e ) {
 367         ok( 0, "Caught a rank exception: " . $e->message );
 368 }
 369
 370 =end testing
 371
 372 =cut
 373
 374 sub merge_readings {
 375         my $self = shift;
 376
 377         # Sanity check
 378         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 379         my $mergemeta = $kept_obj->is_meta;
 380         throw( "Cannot merge meta and non-meta reading" )
 381                 unless ( $mergemeta && $del_obj->is_meta )
 382                         || ( !$mergemeta && !$del_obj->is_meta );
 383         if( $mergemeta ) {
 384                 throw( "Cannot merge with start or end node" )
 385                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 386                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 387                 throw( "Cannot combine text of meta readings" ) if $combine;
 388         }
 389         # We can only merge readings in a linear graph if:
 390         # - they are contiguous with only one edge between them, OR
 391         # - they are at equivalent ranks in the graph.
 392         if( $self->linear ) {
 393                 my @delpred = $del_obj->predecessors;
 394                 my @keptsuc = $kept_obj->successors;
 395                 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
 396                         && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
 397                         my( $is_ok, $msg ) = $self->relations->relationship_valid(
 398                                 $kept_obj, $del_obj, 'collated' );
 399                         unless( $is_ok ) {
 400                                 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
 401                         }
 402                 }
 403         }
 404
 405         # We only need the IDs for adding paths to the graph, not the reading
 406         # objects themselves.
 407         my $kept = $kept_obj->id;
 408         my $deleted = $del_obj->id;
 409         $self->_graphcalc_done(0);
 410
 411     # The kept reading should inherit the paths and the relationships
 412     # of the deleted reading.
 413         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 414                 my @vector = ( $kept );
 415                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 416                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 417                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 418                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 419                 $self->sequence->add_edge( @vector );
 420                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 421                 @wits{keys %$fwits} = values %$fwits;
 422                 $self->sequence->set_edge_attributes( @vector, \%wits );
 423         }
 424         $self->relations->merge_readings( $kept, $deleted, $combine );
 425
 426         # Do the deletion deed.
 427         if( $combine ) {
 428                 # Combine the text of the readings
 429                 my $joinstr = $combine_char;
 430                 unless( defined $joinstr ) {
 431                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 432                         $joinstr = $self->wordsep unless defined $joinstr;
 433                 }
 434                 $kept_obj->_combine( $del_obj, $joinstr );
 435         }
 436         $self->del_reading( $deleted );
 437 }
 438
 439 =head2 merge_related( @relationship_types )
 440
 441 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
 442
 443 WARNING: This operation cannot be undone.
 444
 445 =cut
 446
 447 =begin testing
 448
 449 use Test::Warn;
 450 use Text::Tradition;
 451 use TryCatch;
 452
 453 my $t;
 454 warnings_exist {
 455         $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
 456 } [qr/Cannot set relationship on a meta reading/],
 457         "Got expected relationship drop warning on parse";
 458
 459 my $c = $t->collation;
 460 # Force the transitive propagation of all existing relationships.
 461 $c->relations->propagate_all_relationships();
 462
 463 my %rdg_ids;
 464 map { $rdg_ids{$_} = 1 } $c->readings;
 465 $c->merge_related( 'orthographic' );
 466 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
 467         "Successfully collapsed orthographic variation" );
 468 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
 469 foreach my $rid ( keys %rdg_ids ) {
 470         my $exp = $rdg_ids{$rid};
 471         is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
 472                 ( $exp ? "retained" : "removed" ) );
 473 }
 474 ok( $c->linear, "Graph is still linear" );
 475 try {
 476         $c->calculate_ranks; # This should succeed
 477         ok( 1, "Can still calculate ranks on the new graph" );
 478 } catch {
 479         ok( 0, "Rank calculation on merged graph failed: $@" );
 480 }
 481
 482 # Now add some transpositions
 483 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
 484 $c->merge_related( 'transposition' );
 485 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
 486         "Transposed relationship is merged away" );
 487 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
 488 ok( !$c->linear, "Graph is no longer linear" );
 489 try {
 490         $c->calculate_ranks; # This should fail
 491         ok( 0, "Rank calculation happened on nonlinear graph?!" );
 492 } catch ( Text::Tradition::Error $e ) {
 493         is( $e->message, 'Cannot calculate ranks on a non-linear graph',
 494                 "Rank calculation on merged graph threw an error" );
 495 }
 496
 497 =end testing
 498
 499 =cut
 500
 501 # TODO: there should be a way to display merged without affecting the underlying data!
 502
 503 sub merge_related {
 504         my $self = shift;
 505         my %reltypehash;
 506         map { $reltypehash{$_} = 1 } @_;
 507
 508         # Set up the filter for finding related readings
 509         my $filter = sub {
 510                 exists $reltypehash{$_[0]->type};
 511         };
 512
 513         # Go through all readings looking for related ones
 514         foreach my $r ( $self->readings ) {
 515                 next unless $self->reading( "$r" ); # might have been deleted meanwhile
 516                 while( my @related = $self->related_readings( $r, $filter ) ) {
 517                         push( @related, $r );
 518                         @related = sort {
 519                                         scalar $b->witnesses <=> scalar $a->witnesses
 520                                 } @related;
 521                         my $keep = shift @related;
 522                         foreach my $delr ( @related ) {
 523                                 $self->linear( 0 )
 524                                         unless( $self->get_relationship( $keep, $delr )->colocated );
 525                                 $self->merge_readings( $keep, $delr );
 526                         }
 527                 }
 528         }
 529 }
 530
 531 =head2 compress_readings
 532
 533 Where possible in the graph, compresses plain sequences of readings into a
 534 single reading. The sequences must consist of readings with no
 535 relationships to other readings, with only a single witness path between
 536 them and no other witness paths from either that would skip the other. The
 537 readings must also not be marked as nonsense or bad grammar.
 538
 539 WARNING: This operation cannot be undone.
 540
 541 =begin testing
 542
 543 use Text::Tradition;
 544
 545 my $t = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
 546 my $c = $t->collation;
 547 my $n = scalar $c->readings;
 548 $c->compress_readings();
 549 is( scalar $c->readings, $n - 6, "Compressing readings seems to work" );
 550
 551 # Now put in a join-word and make sure the thing still works.
 552 my $t2 = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
 553 my $c2 = $t2->collation;
 554 # Split n21 ('unto') for testing purposes
 555 my $new_r = $c2->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 556 my $old_r = $c2->reading( 'n21' );
 557 $old_r->alter_text( 'to' );
 558 $c2->del_path( 'n20', 'n21', 'A' );
 559 $c2->add_path( 'n20', 'n21p0', 'A' );
 560 $c2->add_path( 'n21p0', 'n21', 'A' );
 561 $c2->calculate_ranks();
 562 is( scalar $c2->readings, $n + 1, "We have our extra test reading" );
 563 $c2->compress_readings();
 564 is( scalar $c2->readings, $n - 6, "Compressing readings also works with join_next" );
 565 is( $c2->reading( 'n21p0' )->text, 'unto', "The joined word has no space" );
 566
 567
 568 =end testing
 569
 570 =cut
 571
 572 sub compress_readings {
 573         my $self = shift;
 574         # Sanity check: first save the original text of each witness.
 575         my %origtext;
 576         foreach my $wit ( $self->tradition->witnesses ) {
 577                 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
 578                 if( $wit->is_layered ) {
 579                         my $acsig = $wit->sigil . $self->ac_label;
 580                         $origtext{$acsig} = $self->path_text( $acsig );
 581                 }
 582         }
 583
 584         # Now do the deed.
 585         # Anywhere in the graph that there is a reading that joins only to a single
 586         # successor, and neither of these have any relationships, just join the two
 587         # readings.
 588         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 589                 # Now look for readings that can be joined to their successors.
 590                 next unless $rdg->is_combinable;
 591                 my %seen;
 592                 while( $self->sequence->successors( $rdg ) == 1 ) {
 593                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 594                         throw( "Infinite loop" ) if $seen{$next->id};
 595                         $seen{$next->id} = 1;
 596                         last if $self->sequence->predecessors( $next ) > 1;
 597                         last unless $next->is_combinable;
 598                         say "Joining readings $rdg and $next";
 599                         $self->merge_readings( $rdg, $next, 1 );
 600                 }
 601         }
 602
 603         # Finally, make sure we haven't screwed anything up.
 604         foreach my $wit ( $self->tradition->witnesses ) {
 605                 my $pathtext = $self->path_text( $wit->sigil );
 606                 throw( "Text differs for witness " . $wit->sigil )
 607                         unless $pathtext eq $origtext{$wit->sigil};
 608                 if( $wit->is_layered ) {
 609                         my $acsig = $wit->sigil . $self->ac_label;
 610                         $pathtext = $self->path_text( $acsig );
 611                         throw( "Layered text differs for witness " . $wit->sigil )
 612                                 unless $pathtext eq $origtext{$acsig};
 613                 }
 614         }
 615
 616         $self->relations->rebuild_equivalence();
 617         $self->calculate_ranks();
 618 }
 619
 620 # Helper function for manipulating the graph.
 621 sub _stringify_args {
 622         my( $self, $first, $second, @args ) = @_;
 623     $first = $first->id
 624         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 625     $second = $second->id
 626         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 627     return( $first, $second, @args );
 628 }
 629
 630 # Helper function for manipulating the graph.
 631 sub _objectify_args {
 632         my( $self, $first, $second, $arg ) = @_;
 633     $first = $self->reading( $first )
 634         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 635     $second = $self->reading( $second )
 636         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 637     return( $first, $second, $arg );
 638 }
 639
 640 =head2 duplicate_reading( $reading, @witlist )
 641
 642 Split the given reading into two, so that the new reading is in the path for
 643 the witnesses given in @witlist. If the result is that certain non-colocated
 644 relationships (e.g. transpositions) are no longer valid, these will be removed.
 645 Returns the newly-created reading.
 646
 647 =begin testing
 648
 649 use Test::More::UTF8;
 650 use Text::Tradition;
 651 use TryCatch;
 652
 653 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
 654 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
 655 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
 656
 657 my $sc = $st->collation;
 658 my $numr = 17;
 659 ok( $sc->reading('n131'), "Tradition has the affected reading" );
 660 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
 661 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
 662
 663 # Detach the erroneously collated reading
 664 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
 665 ok( $newr, "New reading was created" );
 666 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
 667 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
 668 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
 669 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
 670 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
 671
 672 # Check that the bad transposition is gone
 673 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
 674 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
 675
 676 # The collation should not be fixed
 677 my @pairs = $sc->identical_readings();
 678 is( scalar @pairs, 0, "Not re-collated yet" );
 679 # Fix the collation
 680 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
 681 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
 682 is( scalar @pairs, 3, "Found three more identical readings" );
 683 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
 684 $sc->flatten_ranks();
 685 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
 686
 687 # Check that we can't "duplicate" a reading with no wits or with all wits
 688 try {
 689         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
 690         ok( 0, "Reading duplication without witnesses throws an error" );
 691 } catch( Text::Tradition::Error $e ) {
 692         like( $e->message, qr/Must specify one or more witnesses/,
 693                 "Reading duplication without witnesses throws the expected error" );
 694 } catch {
 695         ok( 0, "Reading duplication without witnesses threw the wrong error" );
 696 }
 697
 698 try {
 699         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
 700         ok( 0, "Reading duplication with all witnesses throws an error" );
 701 } catch( Text::Tradition::Error $e ) {
 702         like( $e->message, qr/Cannot join all witnesses/,
 703                 "Reading duplication with all witnesses throws the expected error" );
 704 } catch {
 705         ok( 0, "Reading duplication with all witnesses threw the wrong error" );
 706 }
 707
 708 try {
 709         $sc->calculate_ranks();
 710         ok( 1, "Graph is still evidently whole" );
 711 } catch( Text::Tradition::Error $e ) {
 712         ok( 0, "Caught a rank exception: " . $e->message );
 713 }
 714
 715 =end testing
 716
 717 =cut
 718
 719 sub duplicate_reading {
 720         my( $self, $r, @wits ) = @_;
 721         # Check that we are not doing anything unwise.
 722         throw( "Must specify one or more witnesses for the duplicated reading" )
 723                 unless @wits;
 724         unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
 725                 $r = $self->reading( $r );
 726         }
 727         throw( "Cannot duplicate a meta-reading" )
 728                 if $r->is_meta;
 729         throw( "Cannot join all witnesses to the new reading" )
 730                 if scalar( @wits ) == scalar( $r->witnesses );
 731
 732         # Get all the reading attributes and duplicate them.
 733         my $rmeta = Text::Tradition::Collation::Reading->meta;
 734         my %args;
 735     foreach my $attr( $rmeta->get_all_attributes ) {
 736                 next if $attr->name =~ /^_/;
 737                 my $acc = $attr->get_read_method;
 738                 if( !$acc && $attr->has_applied_traits ) {
 739                         my $tr = $attr->applied_traits;
 740                         if( $tr->[0] =~ /::(Array|Hash)$/ ) {
 741                                 my $which = $1;
 742                                 my %methods = reverse %{$attr->handles};
 743                                 $acc = $methods{elements};
 744                                 $args{$attr->name} = $which eq 'Array'
 745                                         ? [ $r->$acc ] : { $r->$acc };
 746                         }
 747                 } else {
 748                         $args{$attr->name} = $r->$acc if $acc;
 749                 }
 750         }
 751         # By definition the new reading will no longer be common.
 752         $args{is_common} = 0;
 753         # The new reading also needs its own ID.
 754         $args{id} = $self->_generate_dup_id( $r->id );
 755
 756         # Try to make the new reading.
 757         my $newr = $self->add_reading( \%args );
 758         # The old reading is also no longer common.
 759         $r->is_common( 0 );
 760
 761         # For each of the witnesses, dissociate from the old reading and
 762         # associate with the new.
 763         foreach my $wit ( @wits ) {
 764                 my $prior = $self->prior_reading( $r, $wit );
 765                 my $next = $self->next_reading( $r, $wit );
 766                 $self->del_path( $prior, $r, $wit );
 767                 $self->add_path( $prior, $newr, $wit );
 768                 $self->del_path( $r, $next, $wit );
 769                 $self->add_path( $newr, $next, $wit );
 770         }
 771
 772         # If the graph is ranked, we need to look for relationships that are now
 773         # invalid (i.e. 'non-colocation' types that might now be colocated) and
 774         # remove them. If not, we can skip it.
 775         my $succ;
 776         my %rrk;
 777         my @deleted_relations;
 778         if( $self->end->has_rank ) {
 779                 # Find the point where we can stop checking
 780                 $succ = $self->common_successor( $r, $newr );
 781
 782                 # Hash the existing ranks
 783                 foreach my $rdg ( $self->readings ) {
 784                         $rrk{$rdg->id} = $rdg->rank;
 785                 }
 786                 # Calculate the new ranks
 787                 $self->calculate_ranks();
 788
 789                 # Check for invalid non-colocated relationships among changed-rank readings
 790                 # from where the ranks start changing up to $succ
 791                 my $lastrank = $succ->rank;
 792                 foreach my $rdg ( $self->readings ) {
 793                         next if $rdg->rank > $lastrank;
 794                         next if $rdg->rank == $rrk{$rdg->id};
 795                         my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
 796                         next unless @noncolo;
 797                         foreach my $nc ( @noncolo ) {
 798                                 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
 799                                         push( @deleted_relations, [ $rdg->id, $nc->id ] );
 800                                 }
 801                         }
 802                 }
 803         }
 804         return ( $newr, @deleted_relations );
 805 }
 806
 807 sub _generate_dup_id {
 808         my( $self, $rid ) = @_;
 809         my $newid;
 810         my $i = 0;
 811         while( !$newid ) {
 812                 $newid = $rid."_$i";
 813                 if( $self->has_reading( $newid ) ) {
 814                         $newid = '';
 815                         $i++;
 816                 }
 817         }
 818         return $newid;
 819 }
 820
 821 ### Path logic
 822
 823 sub add_path {
 824         my $self = shift;
 825
 826         # We only need the IDs for adding paths to the graph, not the reading
 827         # objects themselves.
 828     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 829
 830         $self->_graphcalc_done(0);
 831         # Connect the readings
 832         unless( $self->sequence->has_edge( $source, $target ) ) {
 833             $self->sequence->add_edge( $source, $target );
 834             $self->relations->add_equivalence_edge( $source, $target );
 835         }
 836     # Note the witness in question
 837     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 838 }
 839
 840 sub del_path {
 841         my $self = shift;
 842         my @args;
 843         if( ref( $_[0] ) eq 'ARRAY' ) {
 844                 my $e = shift @_;
 845                 @args = ( @$e, @_ );
 846         } else {
 847                 @args = @_;
 848         }
 849
 850         # We only need the IDs for removing paths from the graph, not the reading
 851         # objects themselves.
 852     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 853
 854         $self->_graphcalc_done(0);
 855         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 856                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 857         }
 858         unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
 859                 $self->sequence->delete_edge( $source, $target );
 860                 $self->relations->delete_equivalence_edge( $source, $target );
 861         }
 862 }
 863
 864
 865 # Extra graph-alike utility
 866 sub has_path {
 867         my $self = shift;
 868     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 869         return undef unless $self->sequence->has_edge( $source, $target );
 870         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 871 }
 872
 873 =head2 clear_witness( @sigil_list )
 874
 875 Clear the given witnesses out of the collation entirely, removing references
 876 to them in paths, and removing readings that belong only to them.  Should only
 877 be called via $tradition->del_witness.
 878
 879 =cut
 880
 881 sub clear_witness {
 882         my( $self, @sigils ) = @_;
 883
 884         $self->_graphcalc_done(0);
 885         # Clear the witness(es) out of the paths
 886         foreach my $e ( $self->paths ) {
 887                 foreach my $sig ( @sigils ) {
 888                         $self->del_path( $e, $sig );
 889                 }
 890         }
 891
 892         # Clear out the newly unused readings
 893         foreach my $r ( $self->readings ) {
 894                 unless( $self->reading_witnesses( $r ) ) {
 895                         $self->del_reading( $r );
 896                 }
 897         }
 898 }
 899
 900 sub add_relationship {
 901         my $self = shift;
 902     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 903     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 904     my $did_recalc;
 905     foreach my $v ( @vectors ) {
 906         my $rel = $self->get_relationship( $v );
 907         next unless $rel->colocated;
 908         my $r1 = $self->reading( $v->[0] );
 909         my $r2 =  $self->reading( $v->[1] );
 910         # If it's a spelling or orthographic relationship, and one is marked
 911         # as a lemma, set the normal form on the non-lemma to reflect that.
 912         if( $r1->does( 'Text::Tradition::Morphology' ) ) {
 913                 $r1->relationship_added( $r2, $rel );
 914         }
 915         next if $did_recalc;
 916         if( $r1->has_rank && $r2->has_rank && $r1->rank ne $r2->rank ) {
 917                         $self->_graphcalc_done(0);
 918                         $self->_clear_cache;
 919                         $did_recalc = 1;
 920         }
 921     }
 922     return @vectors;
 923 }
 924
 925 around qw/ get_relationship del_relationship / => sub {
 926         my $orig = shift;
 927         my $self = shift;
 928         my @args = @_;
 929         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 930                 @args = @{$_[0]};
 931         }
 932         my @stringargs = $self->_stringify_args( @args );
 933         $self->$orig( @stringargs );
 934 };
 935
 936 =head2 reading_witnesses( $reading )
 937
 938 Return a list of sigils corresponding to the witnesses in which the reading appears.
 939
 940 =cut
 941
 942 sub reading_witnesses {
 943         my( $self, $reading ) = @_;
 944         # We need only check either the incoming or the outgoing edges; I have
 945         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 946         if( $reading eq $self->start ) {
 947                 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
 948         }
 949         my %all_witnesses;
 950         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 951                 my $wits = $self->sequence->get_edge_attributes( @$e );
 952                 @all_witnesses{ keys %$wits } = 1;
 953         }
 954         my $acstr = $self->ac_label;
 955         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 956                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 957         }
 958         return keys %all_witnesses;
 959 }
 960
 961 =head1 OUTPUT METHODS
 962
 963 =head2 as_svg( \%options )
 964
 965 Returns an SVG string that represents the graph, via as_dot and graphviz.
 966 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 967
 968 =cut
 969
 970 sub as_svg {
 971     my( $self, $opts ) = @_;
 972     throw( "Need GraphViz installed to output SVG" )
 973         unless File::Which::which( 'dot' );
 974     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 975     $self->calculate_ranks()
 976         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 977         my @cmd = qw/dot -Tsvg/;
 978         my( $svg, $err );
 979         my $dotfile = File::Temp->new();
 980         ## USE FOR DEBUGGING
 981         # $dotfile->unlink_on_destroy(0);
 982         binmode $dotfile, ':utf8';
 983         print $dotfile $self->as_dot( $opts );
 984         push( @cmd, $dotfile->filename );
 985         run( \@cmd, ">", binary(), \$svg );
 986         $svg = decode_utf8( $svg );
 987         return $svg;
 988 }
 989
 990
 991 =head2 as_dot( \%options )
 992
 993 Returns a string that is the collation graph expressed in dot
 994 (i.e. GraphViz) format.  Options include:
 995
 996 =over 4
 997
 998 =item * from
 999
1000 =item * to
1001
1002 =item * color_common
1003
1004 =back
1005
1006 =cut
1007
1008 sub as_dot {
1009     my( $self, $opts ) = @_;
1010     my $startrank = $opts->{'from'} if $opts;
1011     my $endrank = $opts->{'to'} if $opts;
1012     my $color_common = $opts->{'color_common'} if $opts;
1013     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
1014        && $self->end->rank > 100;
1015     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
1016
1017     # Check the arguments
1018     if( $startrank ) {
1019         return if $endrank && $startrank > $endrank;
1020         return if $startrank > $self->end->rank;
1021         }
1022         if( defined $endrank ) {
1023                 return if $endrank < 0;
1024                 $endrank = undef if $endrank == $self->end->rank;
1025         }
1026
1027     my $graph_name = $self->tradition->name;
1028     $graph_name =~ s/[^\w\s]//g;
1029     $graph_name = join( '_', split( /\s+/, $graph_name ) );
1030
1031     my %graph_attrs = (
1032         'rankdir' => 'LR',
1033         'bgcolor' => 'none',
1034         );
1035     my %node_attrs = (
1036         'fontsize' => 14,
1037         'fillcolor' => 'white',
1038         'style' => 'filled',
1039         'shape' => 'ellipse'
1040         );
1041     my %edge_attrs = (
1042         'arrowhead' => 'open',
1043         'color' => '#000000',
1044         'fontcolor' => '#000000',
1045         );
1046
1047     my $dot = sprintf( "digraph %s {\n", $graph_name );
1048     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1049     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1050
1051         # Output substitute start/end readings if necessary
1052         if( $startrank ) {
1053                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1054         }
1055         if( $endrank ) {
1056                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1057         }
1058         if( $STRAIGHTENHACK ) {
1059                 ## HACK part 1
1060                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1061                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1062                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1063         }
1064         my %used;  # Keep track of the readings that actually appear in the graph
1065         # Sort the readings by rank if we have ranks; this speeds layout.
1066         my @all_readings = $self->end->has_rank
1067                 ? sort { $a->rank <=> $b->rank } $self->readings
1068                 : $self->readings;
1069         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1070     foreach my $reading ( @all_readings ) {
1071         # Only output readings within our rank range.
1072         next if $startrank && $reading->rank < $startrank;
1073         next if $endrank && $reading->rank > $endrank;
1074         $used{$reading->id} = 1;
1075         # Need not output nodes without separate labels
1076         next if $reading->id eq $reading->text;
1077         my $rattrs;
1078         my $label = $reading->text;
1079         unless( $label =~ /^[[:punct:]]+$/ ) {
1080                 $label .= '-' if $reading->join_next;
1081             $label = "-$label" if $reading->join_prior;
1082         }
1083         $label =~ s/\"/\\\"/g;
1084                 $rattrs->{'label'} = $label;
1085                 $rattrs->{'id'} = $reading->id;
1086                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1087         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1088     }
1089
1090         # Add the real edges.
1091     my @edges = $self->paths;
1092         my( %substart, %subend );
1093     foreach my $edge ( @edges ) {
1094         # Do we need to output this edge?
1095         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1096                 my $label = $self->_path_display_label( $opts,
1097                         $self->path_witnesses( $edge ) );
1098                         my $variables = { %edge_attrs, 'label' => $label };
1099
1100                         # Account for the rank gap if necessary
1101                         my $rank0 = $self->reading( $edge->[0] )->rank
1102                                 if $self->reading( $edge->[0] )->has_rank;
1103                         my $rank1 = $self->reading( $edge->[1] )->rank
1104                                 if $self->reading( $edge->[1] )->has_rank;
1105                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1106                                 $variables->{'minlen'} = $rank1 - $rank0;
1107                         }
1108
1109                         # EXPERIMENTAL: make edge width reflect no. of witnesses
1110                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1111                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1112
1113                         my $varopts = _dot_attr_string( $variables );
1114                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1115                                 $edge->[0], $edge->[1], $varopts );
1116         } elsif( $used{$edge->[0]} ) {
1117                 $subend{$edge->[0]} = $edge->[1];
1118         } elsif( $used{$edge->[1]} ) {
1119                 $substart{$edge->[1]} = $edge->[0];
1120         }
1121     }
1122
1123     # If we are asked to, add relationship links
1124     if( exists $opts->{show_relations} ) {
1125         my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1126         if( $filter eq 'transposition' ) {
1127                 $filter =~ qr/^transposition$/;
1128         }
1129         my %typecolors;
1130         my @types = sort( map { $_->name } $self->relations->types );
1131         if( exists $opts->{graphcolors} ) {
1132                 foreach my $tdx ( 0 .. $#types ) {
1133                         $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1134                 }
1135         } else {
1136                 map { $typecolors{$_} = '#FFA14F' } @types;
1137         }
1138         foreach my $redge ( $self->relationships ) {
1139                 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1140                                 my $rel = $self->get_relationship( $redge );
1141                                 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1142                                 my $variables = {
1143                                         arrowhead => 'none',
1144                                         color => $typecolors{$rel->type},
1145                                         constraint => 'false',
1146                                         penwidth => '3',
1147                                 };
1148                                 unless( exists $opts->{graphcolors} ) {
1149                                         $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1150                                 }
1151                                 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1152                                         $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1153                 }
1154         }
1155     }
1156
1157     # Add substitute start and end edges if necessary
1158     foreach my $node ( keys %substart ) {
1159         my $witstr = $self->_path_display_label( $opts,
1160                 $self->path_witnesses( $substart{$node}, $node ) );
1161         my $variables = { %edge_attrs, 'label' => $witstr };
1162         my $nrdg = $self->reading( $node );
1163         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1164                 # Substart is actually one lower than $startrank
1165                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1166         }
1167         my $varopts = _dot_attr_string( $variables );
1168         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1169         }
1170     foreach my $node ( keys %subend ) {
1171         my $witstr = $self->_path_display_label( $opts,
1172                 $self->path_witnesses( $node, $subend{$node} ) );
1173         my $variables = { %edge_attrs, 'label' => $witstr };
1174         my $varopts = _dot_attr_string( $variables );
1175         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1176         }
1177         # HACK part 2
1178         if( $STRAIGHTENHACK ) {
1179                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1180                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1181         }
1182
1183     $dot .= "}\n";
1184     return $dot;
1185 }
1186
1187 sub _dot_attr_string {
1188         my( $hash ) = @_;
1189         my @attrs;
1190         foreach my $k ( sort keys %$hash ) {
1191                 my $v = $hash->{$k};
1192                 push( @attrs, $k.'="'.$v.'"' );
1193         }
1194         return( '[ ' . join( ', ', @attrs ) . ' ]' );
1195 }
1196
1197 =head2 path_witnesses( $edge )
1198
1199 Returns the list of sigils whose witnesses are associated with the given edge.
1200 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1201
1202 =cut
1203
1204 sub path_witnesses {
1205         my( $self, @edge ) = @_;
1206         # If edge is an arrayref, cope.
1207         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1208                 my $e = shift @edge;
1209                 @edge = @$e;
1210         }
1211         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1212         return @wits;
1213 }
1214
1215 # Helper function. Make a display label for the given witnesses, showing a.c.
1216 # witnesses only where the main witness is not also in the list.
1217 sub _path_display_label {
1218         my $self = shift;
1219         my $opts = shift;
1220         my %wits;
1221         map { $wits{$_} = 1 } @_;
1222
1223         # If an a.c. wit is listed, remove it if the main wit is also listed.
1224         # Otherwise keep it for explicit listing.
1225         my $aclabel = $self->ac_label;
1226         my @disp_ac;
1227         foreach my $w ( sort keys %wits ) {
1228                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1229                         if( exists $wits{$1} ) {
1230                                 delete $wits{$w};
1231                         } else {
1232                                 push( @disp_ac, $w );
1233                         }
1234                 }
1235         }
1236
1237         if( $opts->{'explicit_wits'} ) {
1238                 return join( ', ', sort keys %wits );
1239         } else {
1240                 # See if we are in a majority situation.
1241                 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1242                 $maj = $maj > 5 ? $maj : 5;
1243                 if( scalar keys %wits > $maj ) {
1244                         unshift( @disp_ac, 'majority' );
1245                         return join( ', ', @disp_ac );
1246                 } else {
1247                         return join( ', ', sort keys %wits );
1248                 }
1249         }
1250 }
1251
1252 =head2 as_adjacency_list
1253
1254 Returns a JSON structure that represents the collation sequence graph.
1255
1256 =begin testing
1257
1258 use JSON qw/ from_json /;
1259 use Text::Tradition;
1260
1261 my $t = Text::Tradition->new(
1262         'input' => 'Self',
1263         'file' => 't/data/florilegium_graphml.xml' );
1264 my $c = $t->collation;
1265
1266 # Make a connection so we can test rank preservation
1267 $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1268
1269 # Create an adjacency list of the whole thing; test the output.
1270 my $adj_whole = from_json( $c->as_adjacency_list() );
1271 is( scalar @$adj_whole, scalar $c->readings(),
1272         "Same number of nodes in graph and adjacency list" );
1273 my @adj_whole_edges;
1274 map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1275 is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1276         "Same number of edges in graph and adjacency list" );
1277 # Find the reading whose rank should be preserved
1278 my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1279 my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1280 is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1281
1282 # Now create an adjacency list of just a portion. w76 to w122
1283 my $adj_part = from_json( $c->as_adjacency_list(
1284         { from => $c->reading('w76')->rank,
1285           to   => $c->reading('w122')->rank }));
1286 is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1287 my @adj_part_edges;
1288 map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1289 is( scalar @adj_part_edges, 58,
1290         "Same number of edges in partial graph and adjacency list" );
1291 # Check for consistency
1292 my %part_nodes;
1293 map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1294 foreach my $edge ( @adj_part_edges ) {
1295         my $testid = $edge->{id};
1296         ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1297 }
1298
1299 =end testing
1300
1301 =cut
1302
1303 sub as_adjacency_list {
1304         my( $self, $opts ) = @_;
1305         # Make a structure that contains all the nodes, the nodes they point to,
1306         # and the attributes of the edges that connect them.
1307         # [ { id: 'n0', label: 'Gallia', adjacent: [
1308         #               { id: 'n1', label: 'P Q' } ,
1309         #               { id: 'n2', label: 'R S', minlen: 2 } ] },
1310         #   { id: 'n1', label: 'est', adjacent: [ ... ] },
1311         #   ... ]
1312         my $startrank = $opts->{'from'} || 0;
1313         my $endrank = $opts->{'to'} || $self->end->rank;
1314
1315     $self->calculate_ranks()
1316         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1317         my $list = [];
1318         foreach my $rdg ( $self->readings ) {
1319                 my @successors;
1320                 my $phony = '';
1321                 # Figure out what the node's successors should be.
1322                 if( $rdg eq $self->start && $startrank > 0 ) {
1323                         # Connect the start node with all the nodes at startrank.
1324                         # Lacunas should be included only if the node really has that rank.
1325                         @successors = $self->readings_at_rank( $startrank, 1 );
1326                         $phony = 'start';
1327                 } elsif( $rdg->rank < $startrank
1328                                  || $rdg->rank > $endrank && $rdg ne $self->end ) {
1329                         next;
1330                 } else {
1331                         @successors = $rdg->successors;
1332                 }
1333                 # Make sure that the end node is at the end of the successors
1334                 # list if it is needed.
1335                 if( grep { $_ eq $self->end } @successors ) {
1336                         my @ts = grep { $_ ne $self->end } @successors;
1337                         @successors = ( @ts, $self->end );
1338                 } elsif ( grep { $_->rank > $endrank } @successors ) {
1339                         push( @successors, $self->end );
1340                 }
1341
1342                 my $listitem = { id => $rdg->id, label => $rdg->text };
1343                 my $adjacent = [];
1344                 my @endwits;
1345                 foreach my $succ ( @successors ) {
1346                         my @edgewits;
1347                         if( $phony eq 'start' ) {
1348                                 @edgewits = $succ->witnesses;
1349                         } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1350                                 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1351                         }
1352
1353                         if( $succ eq $self->end ) {
1354                                 @edgewits = @endwits;
1355                         } elsif( $succ->rank > $endrank ) {
1356                                 # These witnesses will point to 'end' instead, not to the
1357                                 # actual successor.
1358                                 push( @endwits, @edgewits );
1359                                 next;
1360                         }
1361                         my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1362                         my $edgedef = { id => $succ->id, label => $edgelabel };
1363                         my $rankoffset = $succ->rank - $rdg->rank;
1364                         if( $rankoffset > 1 and $succ ne $self->end ) {
1365                                 $edgedef->{minlen} = $rankoffset;
1366                         }
1367                         push( @$adjacent, $edgedef );
1368                 }
1369                 $listitem->{adjacent} = $adjacent;
1370                 push( @$list, $listitem );
1371         }
1372         return to_json( $list );
1373 }
1374
1375 =head2 as_graphml
1376
1377 Returns a GraphML representation of the collation.  The GraphML will contain
1378 two graphs. The first expresses the attributes of the readings and the witness
1379 paths that link them; the second expresses the relationships that link the
1380 readings.  This is the native transfer format for a tradition.
1381
1382 =begin testing
1383
1384 use Text::Tradition;
1385 use TryCatch;
1386
1387 my $READINGS = 311;
1388 my $PATHS = 361;
1389
1390 my $datafile = 't/data/florilegium_tei_ps.xml';
1391 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1392                                       'name' => 'test0',
1393                                       'file' => $datafile,
1394                                       'linear' => 1 );
1395
1396 ok( $tradition, "Got a tradition object" );
1397 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1398 ok( $tradition->collation, "Tradition has a collation" );
1399
1400 my $c = $tradition->collation;
1401 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1402 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1403 is( scalar $c->relationships, 0, "Collation has all relationships" );
1404
1405 # Add a few relationships
1406 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1407 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1408 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1409                                           'is_significant' => 'yes' } );
1410
1411 # Now write it to GraphML and parse it again.
1412
1413 my $graphml = $c->as_graphml;
1414 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1415 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1416 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1417 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1418 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1419 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1420
1421 # Now add a stemma, write to GraphML, and look at the output.
1422 SKIP: {
1423         skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1424         my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1425         is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1426         is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1427         $graphml = $c->as_graphml;
1428         like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1429 }
1430
1431 =end testing
1432
1433 =cut
1434
1435 ## TODO MOVE this to Tradition.pm and modularize it better
1436 sub as_graphml {
1437     my( $self, $options ) = @_;
1438         $self->calculate_ranks unless $self->_graphcalc_done;
1439
1440         my $start = $options->{'from'}
1441                 ? $self->reading( $options->{'from'} ) : $self->start;
1442         my $end = $options->{'to'}
1443                 ? $self->reading( $options->{'to'} ) : $self->end;
1444         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1445                 throw( 'Start node must be before end node' );
1446         }
1447         # The readings need to be ranked for this to work.
1448         $start = $self->start unless $start->has_rank;
1449         $end = $self->end unless $end->has_rank;
1450         my $rankoffset = 0;
1451         unless( $start eq $self->start ) {
1452                 $rankoffset = $start->rank - 1;
1453         }
1454         my %use_readings;
1455
1456     # Some namespaces
1457     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1458     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1459     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1460         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1461
1462     # Create the document and root node
1463     require XML::LibXML;
1464     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1465     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1466     $graphml->setDocumentElement( $root );
1467     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1468     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1469
1470     # List of attribute types to save on our objects and their corresponding
1471     # GraphML types
1472     my %save_types = (
1473         'Str' => 'string',
1474         'Int' => 'int',
1475         'Bool' => 'boolean',
1476         'ReadingID' => 'string',
1477         'RelationshipType' => 'string',
1478         'RelationshipScope' => 'string',
1479         'Ternary' => 'string',
1480     );
1481
1482     # Add the data keys for the graph. Include an extra key 'version' for the
1483     # GraphML output version.
1484     my %graph_data_keys;
1485     my $gdi = 0;
1486     my %graph_attributes = ( 'version' => 'string' );
1487         # Graph attributes include those of Tradition and those of Collation.
1488         my %gattr_from;
1489         # TODO Use meta introspection method from duplicate_reading to do this
1490         # instead of naming custom keys.
1491         my $tmeta = $self->tradition->meta;
1492         my $cmeta = $self->meta;
1493         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1494         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1495         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1496                 next if $attr->name =~ /^_/;
1497                 next unless $save_types{$attr->type_constraint->name};
1498                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1499         }
1500     # Extra custom keys for complex objects that should be saved in some form.
1501     # The subroutine should return a string, or undef/empty.
1502     if( $tmeta->has_method('stemmata') ) {
1503                 $graph_attributes{'stemmata'} = sub {
1504                         my @stemstrs;
1505                         map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1506                                 $self->tradition->stemmata;
1507                         join( "\n", @stemstrs );
1508                 };
1509         }
1510
1511         if( $tmeta->has_method('user') ) {
1512                 $graph_attributes{'user'} = sub {
1513                         $self->tradition->user ? $self->tradition->user->id : undef
1514                 };
1515         }
1516
1517     foreach my $datum ( sort keys %graph_attributes ) {
1518         $graph_data_keys{$datum} = 'dg'.$gdi++;
1519         my $key = $root->addNewChild( $graphml_ns, 'key' );
1520         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1521                 : $graph_attributes{$datum};
1522         $key->setAttribute( 'attr.name', $datum );
1523         $key->setAttribute( 'attr.type', $dtype );
1524         $key->setAttribute( 'for', 'graph' );
1525         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1526     }
1527
1528     # Add the data keys for reading nodes
1529     my %reading_attributes;
1530     my $rmeta = Text::Tradition::Collation::Reading->meta;
1531     foreach my $attr( $rmeta->get_all_attributes ) {
1532                 next if $attr->name =~ /^_/;
1533                 next unless $save_types{$attr->type_constraint->name};
1534                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1535         }
1536         if( $self->start->does('Text::Tradition::Morphology' ) ) {
1537                 # Extra custom key for the reading morphology
1538                 $reading_attributes{'lexemes'} = 'string';
1539         }
1540
1541     my %node_data_keys;
1542     my $ndi = 0;
1543     foreach my $datum ( sort keys %reading_attributes ) {
1544         $node_data_keys{$datum} = 'dn'.$ndi++;
1545         my $key = $root->addNewChild( $graphml_ns, 'key' );
1546         $key->setAttribute( 'attr.name', $datum );
1547         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1548         $key->setAttribute( 'for', 'node' );
1549         $key->setAttribute( 'id', $node_data_keys{$datum} );
1550     }
1551
1552     # Add the data keys for edges, that is, paths and relationships. Path
1553     # data does not come from a Moose class so is here manually.
1554     my $edi = 0;
1555     my %edge_data_keys;
1556     my %edge_attributes = (
1557         witness => 'string',                    # ID/label for a path
1558         extra => 'boolean',                             # Path key
1559         );
1560     my @path_attributes = keys %edge_attributes; # track our manual additions
1561     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1562     foreach my $attr( $pmeta->get_all_attributes ) {
1563                 next if $attr->name =~ /^_/;
1564                 next unless $save_types{$attr->type_constraint->name};
1565                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1566         }
1567     foreach my $datum ( sort keys %edge_attributes ) {
1568         $edge_data_keys{$datum} = 'de'.$edi++;
1569         my $key = $root->addNewChild( $graphml_ns, 'key' );
1570         $key->setAttribute( 'attr.name', $datum );
1571         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1572         $key->setAttribute( 'for', 'edge' );
1573         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1574     }
1575
1576     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1577     my $xmlidname = $self->tradition->name;
1578     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1579     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1580         $xmlidname = '_'.$xmlidname;
1581     }
1582     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1583     $sgraph->setAttribute( 'edgedefault', 'directed' );
1584     $sgraph->setAttribute( 'id', $xmlidname );
1585     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1586     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1587     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1588     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1589     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1590
1591     # Tradition/collation attribute data
1592     foreach my $datum ( keys %graph_attributes ) {
1593         my $value;
1594         if( $datum eq 'version' ) {
1595                 $value = '3.2';
1596         } elsif( ref( $graph_attributes{$datum} ) ) {
1597                 my $sub = $graph_attributes{$datum};
1598                 $value = &$sub();
1599         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1600                 $value = $self->tradition->$datum;
1601         } else {
1602                 $value = $self->$datum;
1603         }
1604                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1605         }
1606
1607     my $node_ctr = 0;
1608     my %node_hash;
1609     # Add our readings to the graph
1610     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1611         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1612                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1613         $use_readings{$n->id} = 1;
1614         # Add to the main graph
1615         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1616         my $node_xmlid = 'n' . $node_ctr++;
1617         $node_hash{ $n->id } = $node_xmlid;
1618         $node_el->setAttribute( 'id', $node_xmlid );
1619         foreach my $d ( keys %reading_attributes ) {
1620                 my $nval = $n->$d;
1621                 # Custom serialization
1622                 if( $d eq 'lexemes' ) {
1623                                 # If nval is a true value, we have lexemes so we need to
1624                                 # serialize them. Otherwise set nval to undef so that the
1625                                 # key is excluded from this reading.
1626                         $nval = $nval ? $n->_serialize_lexemes : undef;
1627                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1628                         $nval = undef;
1629                 }
1630                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1631                         # Adjust the ranks within the subgraph.
1632                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1633                                 : $nval - $rankoffset;
1634                 }
1635                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1636                         if defined $nval;
1637         }
1638     }
1639
1640     # Add the path edges to the sequence graph
1641     my $edge_ctr = 0;
1642     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1643         # We add an edge in the graphml for every witness in $e.
1644         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1645         my @edge_wits = sort $self->path_witnesses( $e );
1646         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1647         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1648         # Skip any path from start to end; that witness is not in the subgraph.
1649         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1650         foreach my $wit ( @edge_wits ) {
1651                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1652                                                                                 $node_hash{ $e->[0] },
1653                                                                                 $node_hash{ $e->[1] } );
1654                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1655                         $edge_el->setAttribute( 'source', $from );
1656                         $edge_el->setAttribute( 'target', $to );
1657                         $edge_el->setAttribute( 'id', $id );
1658
1659                         # It's a witness path, so add the witness
1660                         my $base = $wit;
1661                         my $key = $edge_data_keys{'witness'};
1662                         # Is this an ante-corr witness?
1663                         my $aclabel = $self->ac_label;
1664                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1665                                 # Keep the base witness
1666                                 $base = $1;
1667                                 # ...and record that this is an 'extra' reading path
1668                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1669                         }
1670                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1671                 }
1672         }
1673
1674         # Report the actual number of nodes and edges that went in
1675         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1676         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1677
1678         # Add the relationship graph to the XML
1679         map { delete $edge_data_keys{$_} } @path_attributes;
1680         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1681                 $node_data_keys{'id'}, \%edge_data_keys );
1682
1683     # Save and return the thing
1684     my $result = decode_utf8( $graphml->toString(1) );
1685     return $result;
1686 }
1687
1688 sub _add_graphml_data {
1689     my( $el, $key, $value ) = @_;
1690     return unless defined $value;
1691     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1692     $data_el->setAttribute( 'key', $key );
1693     $data_el->appendText( $value );
1694 }
1695
1696 =head2 as_csv
1697
1698 Returns a CSV alignment table representation of the collation graph, one
1699 row per witness (or witness uncorrected.)
1700
1701 =head2 as_tsv
1702
1703 Returns a tab-separated alignment table representation of the collation graph,
1704 one row per witness (or witness uncorrected.)
1705
1706 =begin testing
1707
1708 use Text::Tradition;
1709 use Text::CSV;
1710
1711 my $READINGS = 311;
1712 my $PATHS = 361;
1713 my $WITS = 13;
1714 my $WITAC = 4;
1715
1716 my $datafile = 't/data/florilegium_tei_ps.xml';
1717 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1718                                       'name' => 'test0',
1719                                       'file' => $datafile,
1720                                       'linear' => 1 );
1721
1722 my $c = $tradition->collation;
1723 # Export the thing to CSV
1724 my $csvstr = $c->as_csv();
1725 # Count the columns
1726 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1727 my @lines = split(/\n/, $csvstr );
1728 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1729 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1730 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1731 ok( @q_ac, "Found a layered witness" );
1732
1733 my $t2 = Text::Tradition->new( input => 'Tabular',
1734                                                            name => 'test2',
1735                                                            string => $csvstr,
1736                                                            sep_char => ',' );
1737 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1738 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1739
1740 # Now do it with TSV
1741 my $tsvstr = $c->as_tsv();
1742 my $t3 = Text::Tradition->new( input => 'Tabular',
1743                                                            name => 'test3',
1744                                                            string => $tsvstr,
1745                                                            sep_char => "\t" );
1746 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1747 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1748
1749 my $table = $c->alignment_table;
1750 my $noaccsv = $c->as_csv({ noac => 1 });
1751 my @noaclines = split(/\n/, $noaccsv );
1752 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1753 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1754 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1755
1756 my $safecsv = $c->as_csv({ safe_ac => 1});
1757 my @safelines = split(/\n/, $safecsv );
1758 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1759 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1760 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1761 ok( @q_ac, "Found a sanitized layered witness" );
1762 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1763
1764 # Test relationship collapse
1765 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1766 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1767
1768 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1769 my $t4 = Text::Tradition->new( input => 'Tabular',
1770                                                            name => 'test4',
1771                                                            string => $mergedtsv,
1772                                                            sep_char => "\t" );
1773 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1774 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1775
1776 # Test non-ASCII sigla
1777 my $t5 = Text::Tradition->new( input => 'Tabular',
1778                                                            name => 'nonascii',
1779                                                            file => 't/data/armexample.xlsx',
1780                                                            excel => 'xlsx' );
1781 my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1782 my @awitlines = split( /\n/, $awittsv );
1783 like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1784
1785 =end testing
1786
1787 =cut
1788
1789 sub _tabular {
1790     my( $self, $opts ) = @_;
1791     my $table = $self->alignment_table( $opts );
1792         my $csv_options = { binary => 1, quote_null => 0 };
1793         $csv_options->{'sep_char'} = $opts->{fieldsep};
1794         if( $opts->{fieldsep} eq "\t" ) {
1795                 # If it is really tab separated, nothing is an escape char.
1796                 $csv_options->{'quote_char'} = undef;
1797                 $csv_options->{'escape_char'} = '';
1798         }
1799     my $csv = Text::CSV->new( $csv_options );
1800     my @result;
1801
1802     # Make the header row
1803     my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1804     if( $opts->{ascii} ) {
1805         # TODO think of a fix for this
1806         throw( "Cannot currently produce ASCII sigla with witness layers" )
1807                 unless $opts->{noac};
1808         my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1809         @witnesses = @awits;
1810     }
1811     $csv->combine( @witnesses );
1812         push( @result, $csv->string );
1813
1814     # Make the rest of the rows
1815     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1816         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1817         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1818         # Quick and dirty collapse of requested relationship types
1819         if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1820                 # Now substitute the reading in the relevant index of @row
1821                 # for its merge-related reading
1822                 my %substitutes;
1823                 while( @rowobjs ) {
1824                         my $thisr = shift @rowobjs;
1825                         next unless $thisr;
1826                                 next if exists $substitutes{$thisr->{t}->text};
1827                                 # Make sure we don't have A <-> B substitutions.
1828                                 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1829                         foreach my $thatr ( @rowobjs ) {
1830                                 next unless $thatr;
1831                                 next if exists $substitutes{$thatr->{t}->text};
1832                                 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1833                                 next unless $ttrel;
1834                                 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1835                                 # If we have got this far then we need to merge them.
1836                                 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1837                         }
1838                 }
1839                 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1840         }
1841         $csv->combine( @row );
1842         push( @result, $csv->string );
1843     }
1844     return join( "\n", @result );
1845 }
1846
1847 sub as_csv {
1848         my $self = shift;
1849         my $opts = shift || {};
1850         $opts->{fieldsep} = ',';
1851         return $self->_tabular( $opts );
1852 }
1853
1854 sub as_tsv {
1855         my $self = shift;
1856         my $opts = shift || {};
1857         $opts->{fieldsep} = "\t";
1858         return $self->_tabular( $opts );
1859 }
1860
1861 =head2 alignment_table
1862
1863 Return a reference to an alignment table, in a slightly enhanced CollateX
1864 format which looks like this:
1865
1866  $table = { alignment => [ { witness => "SIGIL",
1867                              tokens => [ { t => "TEXT" }, ... ] },
1868                            { witness => "SIG2",
1869                              tokens => [ { t => "TEXT" }, ... ] },
1870                            ... ],
1871             length => TEXTLEN };
1872
1873 =cut
1874
1875 sub alignment_table {
1876     my( $self, $opts ) = @_;
1877     if( $self->has_cached_table ) {
1878                 return $self->cached_table
1879                         unless $opts->{noac} || $opts->{safe_ac};
1880     }
1881
1882     # Make sure we can do this
1883         throw( "Need a linear graph in order to make an alignment table" )
1884                 unless $self->linear;
1885     $self->calculate_ranks()
1886         unless $self->_graphcalc_done && $self->end->has_rank;
1887
1888     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1889     my @all_pos = ( 1 .. $self->end->rank - 1 );
1890     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1891         # say STDERR "Making witness row(s) for " . $wit->sigil;
1892         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1893         my @row = _make_witness_row( \@wit_path, \@all_pos );
1894         my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1895         $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1896         push( @{$table->{'alignment'}}, $witobj );
1897         if( $wit->is_layered && !$opts->{noac} ) {
1898                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1899                         $wit->sigil.$self->ac_label );
1900             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1901             my $witlabel = $opts->{safe_ac}
1902                 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1903             my $witacobj = { 'witness' => $witlabel,
1904                 'tokens' => \@ac_row };
1905             $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1906                         push( @{$table->{'alignment'}}, $witacobj );
1907         }
1908     }
1909     unless( $opts->{noac} || $opts->{safe_ac} ) {
1910             $self->cached_table( $table );
1911         }
1912     return $table;
1913 }
1914
1915 sub _make_witness_row {
1916     my( $path, $positions ) = @_;
1917     my %char_hash;
1918     map { $char_hash{$_} = undef } @$positions;
1919     my $debug = 0;
1920     foreach my $rdg ( @$path ) {
1921         say STDERR "rank " . $rdg->rank if $debug;
1922         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1923         $char_hash{$rdg->rank} = { 't' => $rdg };
1924     }
1925     my @row = map { $char_hash{$_} } @$positions;
1926     # Fill in lacuna markers for undef spots in the row
1927     my $last_el = shift @row;
1928     my @filled_row = ( $last_el );
1929     foreach my $el ( @row ) {
1930         # If we are using node reference, make the lacuna node appear many times
1931         # in the table.  If not, use the lacuna tag.
1932         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1933             $el = $last_el;
1934         }
1935         push( @filled_row, $el );
1936         $last_el = $el;
1937     }
1938     return @filled_row;
1939 }
1940
1941
1942 =head1 NAVIGATION METHODS
1943
1944 =head2 reading_sequence( $first, $last, $sigil, $backup )
1945
1946 Returns the ordered list of readings, starting with $first and ending
1947 with $last, for the witness given in $sigil. If a $backup sigil is
1948 specified (e.g. when walking a layered witness), it will be used wherever
1949 no $sigil path exists.  If there is a base text reading, that will be
1950 used wherever no path exists for $sigil or $backup.
1951
1952 =cut
1953
1954 # TODO Think about returning some lazy-eval iterator.
1955 # TODO Get rid of backup; we should know from what witness is whether we need it.
1956
1957 sub reading_sequence {
1958     my( $self, $start, $end, $witness ) = @_;
1959
1960     $witness = $self->baselabel unless $witness;
1961     my @readings = ( $start );
1962     my %seen;
1963     my $n = $start;
1964     while( $n && $n->id ne $end->id ) {
1965         if( exists( $seen{$n->id} ) ) {
1966             throw( "Detected loop for $witness at " . $n->id );
1967         }
1968         $seen{$n->id} = 1;
1969
1970         my $next = $self->next_reading( $n, $witness );
1971         unless( $next ) {
1972             throw( "Did not find any path for $witness from reading " . $n->id );
1973         }
1974         push( @readings, $next );
1975         $n = $next;
1976     }
1977     # Check that the last reading is our end reading.
1978     my $last = $readings[$#readings];
1979     throw( "Last reading found from " . $start->text .
1980         " for witness $witness is not the end!" ) # TODO do we get this far?
1981         unless $last->id eq $end->id;
1982
1983     return @readings;
1984 }
1985
1986 =head2 readings_at_rank( $rank )
1987
1988 Returns a list of readings at a given rank, taken from the alignment table.
1989
1990 =cut
1991
1992 sub readings_at_rank {
1993         my( $self, $rank, $nolacuna ) = @_;
1994         my $table = $self->alignment_table;
1995         # Table rank is real rank - 1.
1996         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1997         my %readings;
1998         foreach my $e ( @elements ) {
1999                 next unless ref( $e ) eq 'HASH';
2000                 next unless exists $e->{'t'};
2001                 my $rdg = $e->{'t'};
2002                 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
2003                 $readings{$e->{'t'}->id} = $e->{'t'};
2004         }
2005         return values %readings;
2006 }
2007
2008 =head2 next_reading( $reading, $sigil );
2009
2010 Returns the reading that follows the given reading along the given witness
2011 path.
2012
2013 =cut
2014
2015 sub next_reading {
2016     # Return the successor via the corresponding path.
2017     my $self = shift;
2018     my $answer = $self->_find_linked_reading( 'next', @_ );
2019         return undef unless $answer;
2020     return $self->reading( $answer );
2021 }
2022
2023 =head2 prior_reading( $reading, $sigil )
2024
2025 Returns the reading that precedes the given reading along the given witness
2026 path.
2027
2028 =cut
2029
2030 sub prior_reading {
2031     # Return the predecessor via the corresponding path.
2032     my $self = shift;
2033     my $answer = $self->_find_linked_reading( 'prior', @_ );
2034     return $self->reading( $answer );
2035 }
2036
2037 sub _find_linked_reading {
2038     my( $self, $direction, $node, $path ) = @_;
2039
2040     # Get a backup if we are dealing with a layered witness
2041     my $alt_path;
2042     my $aclabel = $self->ac_label;
2043     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2044         $alt_path = $1;
2045     }
2046
2047     my @linked_paths = $direction eq 'next'
2048         ? $self->sequence->edges_from( $node )
2049         : $self->sequence->edges_to( $node );
2050     return undef unless scalar( @linked_paths );
2051
2052     # We have to find the linked path that contains all of the
2053     # witnesses supplied in $path.
2054     my( @path_wits, @alt_path_wits );
2055     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2056     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2057     my $base_le;
2058     my $alt_le;
2059     foreach my $le ( @linked_paths ) {
2060         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2061             $base_le = $le;
2062         }
2063                 my @le_wits = sort $self->path_witnesses( $le );
2064                 if( _is_within( \@path_wits, \@le_wits ) ) {
2065                         # This is the right path.
2066                         return $direction eq 'next' ? $le->[1] : $le->[0];
2067                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2068                         $alt_le = $le;
2069                 }
2070     }
2071     # Got this far? Return the alternate path if it exists.
2072     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2073         if $alt_le;
2074
2075     # Got this far? Return the base path if it exists.
2076     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2077         if $base_le;
2078
2079     # Got this far? We have no appropriate path.
2080     warn "Could not find $direction node from " . $node->id
2081         . " along path $path";
2082     return undef;
2083 }
2084
2085 # Some set logic.
2086 sub _is_within {
2087     my( $set1, $set2 ) = @_;
2088     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2089     foreach my $el ( @$set1 ) {
2090         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2091     }
2092     return $ret;
2093 }
2094
2095 # Return the string that joins together a list of witnesses for
2096 # display on a single path.
2097 sub _witnesses_of_label {
2098     my( $self, $label ) = @_;
2099     my $regex = $self->wit_list_separator;
2100     my @answer = split( /\Q$regex\E/, $label );
2101     return @answer;
2102 }
2103
2104 =head2 common_readings
2105
2106 Returns the list of common readings in the graph (i.e. those readings that are
2107 shared by all non-lacunose witnesses.)
2108
2109 =cut
2110
2111 sub common_readings {
2112         my $self = shift;
2113         my @common = grep { $_->is_common } $self->readings;
2114         return @common;
2115 }
2116
2117 =head2 path_text( $sigil, [, $start, $end ] )
2118
2119 Returns the text of a witness (plus its backup, if we are using a layer)
2120 as stored in the collation.  The text is returned as a string, where the
2121 individual readings are joined with spaces and the meta-readings (e.g.
2122 lacunae) are omitted.  Optional specification of $start and $end allows
2123 the generation of a subset of the witness text.
2124
2125 =cut
2126
2127 sub path_text {
2128         my( $self, $wit, $start, $end ) = @_;
2129         $start = $self->start unless $start;
2130         $end = $self->end unless $end;
2131         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2132         my $pathtext = '';
2133         my $last;
2134         foreach my $r ( @path ) {
2135                 unless ( $r->join_prior || !$last || $last->join_next ) {
2136                         $pathtext .= ' ';
2137                 }
2138                 $pathtext .= $r->text;
2139                 $last = $r;
2140         }
2141         return $pathtext;
2142 }
2143
2144 =head1 INITIALIZATION METHODS
2145
2146 These are mostly for use by parsers.
2147
2148 =head2 make_witness_path( $witness )
2149
2150 Link the array of readings contained in $witness->path (and in
2151 $witness->uncorrected_path if it exists) into collation paths.
2152 Clear out the arrays when finished.
2153
2154 =head2 make_witness_paths
2155
2156 Call make_witness_path for all witnesses in the tradition.
2157
2158 =cut
2159
2160 # For use when a collation is constructed from a base text and an apparatus.
2161 # We have the sequences of readings and just need to add path edges.
2162 # When we are done, clear out the witness path attributes, as they are no
2163 # longer needed.
2164 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2165
2166 sub make_witness_paths {
2167     my( $self ) = @_;
2168     foreach my $wit ( $self->tradition->witnesses ) {
2169         # say STDERR "Making path for " . $wit->sigil;
2170         $self->make_witness_path( $wit );
2171     }
2172 }
2173
2174 sub make_witness_path {
2175     my( $self, $wit ) = @_;
2176     my @chain = @{$wit->path};
2177     my $sig = $wit->sigil;
2178     # Add start and end if necessary
2179     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2180     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2181     foreach my $idx ( 0 .. $#chain-1 ) {
2182         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2183     }
2184     if( $wit->is_layered ) {
2185         @chain = @{$wit->uncorrected_path};
2186                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2187                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2188         foreach my $idx( 0 .. $#chain-1 ) {
2189             my $source = $chain[$idx];
2190             my $target = $chain[$idx+1];
2191             $self->add_path( $source, $target, $sig.$self->ac_label )
2192                 unless $self->has_path( $source, $target, $sig );
2193         }
2194     }
2195     $wit->clear_path;
2196     $wit->clear_uncorrected_path;
2197 }
2198
2199 =head2 calculate_ranks
2200
2201 Calculate the reading ranks (that is, their aligned positions relative
2202 to each other) for the graph.  This can only be called on linear collations.
2203
2204 =begin testing
2205
2206 use Text::Tradition;
2207
2208 my $cxfile = 't/data/Collatex-16.xml';
2209 my $t = Text::Tradition->new(
2210     'name'  => 'inline',
2211     'input' => 'CollateX',
2212     'file'  => $cxfile,
2213     );
2214 my $c = $t->collation;
2215
2216 # Make an svg
2217 my $table = $c->alignment_table;
2218 ok( $c->has_cached_table, "Alignment table was cached" );
2219 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2220 $c->calculate_ranks;
2221 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2222 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2223 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2224 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2225 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2226
2227 =end testing
2228
2229 =cut
2230
2231 sub calculate_ranks {
2232     my $self = shift;
2233     # Save the existing ranks, in case we need to invalidate the cached SVG.
2234     throw( "Cannot calculate ranks on a non-linear graph" )
2235         unless $self->linear;
2236     my %existing_ranks;
2237     map { $existing_ranks{$_} = $_->rank } $self->readings;
2238
2239     # Do the rankings based on the relationship equivalence graph, starting
2240     # with the start node.
2241     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2242
2243     # Transfer our rankings from the topological graph to the real one.
2244     foreach my $r ( $self->readings ) {
2245         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2246             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2247         } else {
2248                 # Die. Find the last rank we calculated.
2249                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2250                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2251                         $self->readings;
2252                 my $last = pop @all_defined;
2253             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2254         }
2255     }
2256     # Do we need to invalidate the cached data?
2257     if( $self->has_cached_table ) {
2258         foreach my $r ( $self->readings ) {
2259                 next if defined( $existing_ranks{$r} )
2260                         && $existing_ranks{$r} == $r->rank;
2261                 # Something has changed, so clear the cache
2262                 $self->_clear_cache;
2263                         # ...and recalculate the common readings.
2264                         $self->calculate_common_readings();
2265                 last;
2266         }
2267     }
2268         # The graph calculation information is now up to date.
2269         $self->_graphcalc_done(1);
2270 }
2271
2272 sub _clear_cache {
2273         my $self = shift;
2274         $self->wipe_table if $self->has_cached_table;
2275 }
2276
2277
2278 =head2 flatten_ranks
2279
2280 A convenience method for parsing collation data.  Searches the graph for readings
2281 with the same text at the same rank, and merges any that are found.
2282
2283 =cut
2284
2285 sub flatten_ranks {
2286     my ( $self, %args ) = shift;
2287     my %unique_rank_rdg;
2288     my $changed;
2289     foreach my $p ( $self->identical_readings( %args ) ) {
2290                 # say STDERR "Combining readings at same rank: @$p";
2291                 $changed = 1;
2292                 $self->merge_readings( @$p );
2293                 # TODO see if this now makes a common point.
2294     }
2295     # If we merged readings, the ranks are still fine but the alignment
2296     # table is wrong. Wipe it.
2297     $self->wipe_table() if $changed;
2298 }
2299
2300 =head2 identical_readings
2301 =head2 identical_readings( start => $startnode, end => $endnode )
2302 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2303
2304 Goes through the graph identifying all pairs of readings that appear to be
2305 identical, and therefore able to be merged into a single reading. Returns the
2306 relevant identical pairs. Can be restricted to run over only a part of the
2307 graph, specified either by node or by rank.
2308
2309 =cut
2310
2311 sub identical_readings {
2312         my ( $self, %args ) = @_;
2313     # Find where we should start and end.
2314     my $startrank = $args{startrank} || 0;
2315     if( $args{start} ) {
2316         throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2317                 && $self->reading( $args{start} )->has_rank;
2318         $startrank = $self->reading( $args{start} )->rank;
2319     }
2320     my $endrank = $args{endrank} || $self->end->rank;
2321     if( $args{end} ) {
2322         throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2323                 && $self->reading( $args{end} )->has_rank;
2324         $endrank = $self->reading( $args{end} )->rank;
2325     }
2326
2327     # Make sure the ranks are correct.
2328     unless( $self->_graphcalc_done ) {
2329         $self->calculate_ranks;
2330     }
2331     # Go through the readings looking for duplicates.
2332     my %unique_rank_rdg;
2333     my @pairs;
2334     foreach my $rdg ( $self->readings ) {
2335         next unless $rdg->has_rank;
2336         my $rk = $rdg->rank;
2337         next if $rk > $endrank || $rk < $startrank;
2338         my $key = $rk . "||" . $rdg->text;
2339         if( exists $unique_rank_rdg{$key} ) {
2340                 # Make sure they don't have different grammatical forms
2341                         my $ur = $unique_rank_rdg{$key};
2342                 if( $rdg->is_identical( $ur ) ) {
2343                                 push( @pairs, [ $ur, $rdg ] );
2344                         }
2345         } else {
2346             $unique_rank_rdg{$key} = $rdg;
2347         }
2348     }
2349
2350     return @pairs;
2351 }
2352
2353
2354 =head2 calculate_common_readings
2355
2356 Goes through the graph identifying the readings that appear in every witness
2357 (apart from those with lacunae at that spot.) Marks them as common and returns
2358 the list.
2359
2360 =begin testing
2361
2362 use Text::Tradition;
2363
2364 my $cxfile = 't/data/Collatex-16.xml';
2365 my $t = Text::Tradition->new(
2366     'name'  => 'inline',
2367     'input' => 'CollateX',
2368     'file'  => $cxfile,
2369     );
2370 my $c = $t->collation;
2371
2372 my @common = $c->calculate_common_readings();
2373 is( scalar @common, 8, "Found correct number of common readings" );
2374 my @marked = sort $c->common_readings();
2375 is( scalar @common, 8, "All common readings got marked as such" );
2376 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2377 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2378
2379 =end testing
2380
2381 =cut
2382
2383 sub calculate_common_readings {
2384         my $self = shift;
2385         my @common;
2386         map { $_->is_common( 0 ) } $self->readings;
2387         # Implicitly calls calculate_ranks
2388         my $table = $self->alignment_table;
2389         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2390                 my @row = map { $_->{'tokens'}->[$idx]
2391                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2392                                         @{$table->{'alignment'}};
2393                 my %hash;
2394                 foreach my $r ( @row ) {
2395                         if( $r ) {
2396                                 $hash{$r->id} = $r unless $r->is_meta;
2397                         } else {
2398                                 $hash{'UNDEF'} = $r;
2399                         }
2400                 }
2401                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2402                         my( $r ) = values %hash;
2403                         $r->is_common( 1 );
2404                         push( @common, $r );
2405                 }
2406         }
2407         return @common;
2408 }
2409
2410 =head2 text_from_paths
2411
2412 Calculate the text array for all witnesses from the path, for later consistency
2413 checking.  Only to be used if there is no non-graph-based way to know the
2414 original texts.
2415
2416 =cut
2417
2418 sub text_from_paths {
2419         my $self = shift;
2420     foreach my $wit ( $self->tradition->witnesses ) {
2421         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2422         my @text;
2423         foreach my $r ( @readings ) {
2424                 next if $r->is_meta;
2425                 push( @text, $r->text );
2426         }
2427         $wit->text( \@text );
2428         if( $wit->is_layered ) {
2429                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2430                                                                                                   $wit->sigil.$self->ac_label );
2431                         my @uctext;
2432                         foreach my $r ( @ucrdgs ) {
2433                                 next if $r->is_meta;
2434                                 push( @uctext, $r->text );
2435                         }
2436                         $wit->layertext( \@uctext );
2437         }
2438     }
2439 }
2440
2441 =head1 UTILITY FUNCTIONS
2442
2443 =head2 common_predecessor( $reading_a, $reading_b )
2444
2445 Find the last reading that occurs in sequence before both the given readings.
2446 At the very least this should be $self->start.
2447
2448 =head2 common_successor( $reading_a, $reading_b )
2449
2450 Find the first reading that occurs in sequence after both the given readings.
2451 At the very least this should be $self->end.
2452
2453 =begin testing
2454
2455 use Text::Tradition;
2456
2457 my $cxfile = 't/data/Collatex-16.xml';
2458 my $t = Text::Tradition->new(
2459     'name'  => 'inline',
2460     'input' => 'CollateX',
2461     'file'  => $cxfile,
2462     );
2463 my $c = $t->collation;
2464
2465 is( $c->common_predecessor( 'n24', 'n23' )->id,
2466     'n20', "Found correct common predecessor" );
2467 is( $c->common_successor( 'n24', 'n23' )->id,
2468     '__END__', "Found correct common successor" );
2469
2470 is( $c->common_predecessor( 'n19', 'n17' )->id,
2471     'n16', "Found correct common predecessor for readings on same path" );
2472 is( $c->common_successor( 'n21', 'n10' )->id,
2473     '__END__', "Found correct common successor for readings on same path" );
2474
2475 =end testing
2476
2477 =cut
2478
2479 ## Return the closest reading that is a predecessor of both the given readings.
2480 sub common_predecessor {
2481         my $self = shift;
2482         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2483         return $self->_common_in_path( $r1, $r2, 'predecessors' );
2484 }
2485
2486 sub common_successor {
2487         my $self = shift;
2488         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2489         return $self->_common_in_path( $r1, $r2, 'successors' );
2490 }
2491
2492
2493 # TODO think about how to do this without ranks...
2494 sub _common_in_path {
2495         my( $self, $r1, $r2, $dir ) = @_;
2496         my $iter = $self->end->rank;
2497         my @candidates;
2498         my @last_r1 = ( $r1 );
2499         my @last_r2 = ( $r2 );
2500         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2501         my %all_seen;
2502         # say STDERR "Finding common $dir for $r1, $r2";
2503         while( !@candidates ) {
2504                 last unless $iter--;  # Avoid looping infinitely
2505                 # Iterate separately down the graph from r1 and r2
2506                 my( @new_lc1, @new_lc2 );
2507                 foreach my $lc ( @last_r1 ) {
2508                         foreach my $p ( $lc->$dir ) {
2509                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2510                                         # say STDERR "Path candidate $p from $lc";
2511                                         push( @candidates, $p );
2512                                 } elsif( !$all_seen{$p->id} ) {
2513                                         $all_seen{$p->id} = 'r1';
2514                                         push( @new_lc1, $p );
2515                                 }
2516                         }
2517                 }
2518                 foreach my $lc ( @last_r2 ) {
2519                         foreach my $p ( $lc->$dir ) {
2520                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2521                                         # say STDERR "Path candidate $p from $lc";
2522                                         push( @candidates, $p );
2523                                 } elsif( !$all_seen{$p->id} ) {
2524                                         $all_seen{$p->id} = 'r2';
2525                                         push( @new_lc2, $p );
2526                                 }
2527                         }
2528                 }
2529                 @last_r1 = @new_lc1;
2530                 @last_r2 = @new_lc2;
2531         }
2532         my @answer = sort { $a->rank <=> $b->rank } @candidates;
2533         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2534 }
2535
2536 sub throw {
2537         Text::Tradition::Error->throw(
2538                 'ident' => 'Collation error',
2539                 'message' => $_[0],
2540                 );
2541 }
2542
2543 no Moose;
2544 __PACKAGE__->meta->make_immutable;
2545
2546 =head1 BUGS/TODO
2547
2548 =over
2549
2550 =item * Rework XML serialization in a more modular way
2551
2552 =back
2553
2554 =head1 LICENSE
2555
2556 This package is free software and is provided "as is" without express
2557 or implied warranty.  You can redistribute it and/or modify it under
2558 the same terms as Perl itself.
2559
2560 =head1 AUTHOR
2561
2562 Tara L Andrews E<lt>aurum@cpan.orgE<gt>