base/lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use JSON qw/ to_json /;
  10 use Text::CSV;
  11 use Text::Tradition::Collation::Data;
  12 use Text::Tradition::Collation::Reading;
  13 use Text::Tradition::Collation::RelationshipStore;
  14 use Text::Tradition::Error;
  15 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  16 use XML::LibXML;
  17 use XML::LibXML::XPathContext;
  18 use Moose;
  19
  20 has _data => (
  21         isa      => 'Text::Tradition::Collation::Data',
  22         is       => 'ro',
  23         required => 1,
  24         handles  => [ qw(
  25                 sequence
  26                 paths
  27                 _set_relations
  28                 relations
  29                 _set_start
  30                 _set_end
  31                 ac_label
  32                 has_cached_table
  33                 relationships
  34                 relationship_types
  35                 related_readings
  36                 get_relationship
  37                 del_relationship
  38                 equivalence
  39                 equivalence_graph
  40                 readings
  41                 reading
  42                 _add_reading
  43                 del_reading
  44                 has_reading
  45                 wit_list_separator
  46                 baselabel
  47                 linear
  48                 wordsep
  49                 start
  50                 end
  51                 cached_table
  52                 _graphcalc_done
  53                 has_cached_svg
  54                 wipe_table
  55         )]
  56 );
  57
  58 has 'tradition' => (
  59     is => 'ro',
  60     isa => 'Text::Tradition',
  61     writer => '_set_tradition',
  62     weak_ref => 1,
  63     );
  64
  65 =encoding utf8
  66
  67 =head1 NAME
  68
  69 Text::Tradition::Collation - a software model for a text collation
  70
  71 =head1 SYNOPSIS
  72
  73   use Text::Tradition;
  74   my $t = Text::Tradition->new(
  75     'name' => 'this is a text',
  76     'input' => 'TEI',
  77     'file' => '/path/to/tei_parallel_seg_file.xml' );
  78
  79   my $c = $t->collation;
  80   my @readings = $c->readings;
  81   my @paths = $c->paths;
  82   my @relationships = $c->relationships;
  83
  84   my $svg_variant_graph = $t->collation->as_svg();
  85
  86 =head1 DESCRIPTION
  87
  88 Text::Tradition is a library for representation and analysis of collated
  89 texts, particularly medieval ones.  The Collation is the central feature of
  90 a Tradition, where the text, its sequence of readings, and its relationships
  91 between readings are actually kept.
  92
  93 =head1 CONSTRUCTOR
  94
  95 =head2 new
  96
  97 The constructor.  Takes a hash or hashref of the following arguments:
  98
  99 =over
 100
 101 =item * tradition - The Text::Tradition object to which the collation
 102 belongs. Required.
 103
 104 =item * linear - Whether the collation should be linear; that is, whether
 105 transposed readings should be treated as two linked readings rather than one,
 106 and therefore whether the collation graph is acyclic.  Defaults to true.
 107
 108 =item * baselabel - The default label for the path taken by a base text
 109 (if any). Defaults to 'base text'.
 110
 111 =item * wit_list_separator - The string to join a list of witnesses for
 112 purposes of making labels in display graphs.  Defaults to ', '.
 113
 114 =item * ac_label - The extra label to tack onto a witness sigil when
 115 representing another layer of path for the given witness - that is, when
 116 a text has more than one possible reading due to scribal corrections or
 117 the like.  Defaults to ' (a.c.)'.
 118
 119 =item * wordsep - The string used to separate words in the original text.
 120 Defaults to ' '.
 121
 122 =back
 123
 124 =head1 ACCESSORS
 125
 126 =head2 tradition
 127
 128 =head2 linear
 129
 130 =head2 wit_list_separator
 131
 132 =head2 baselabel
 133
 134 =head2 ac_label
 135
 136 =head2 wordsep
 137
 138 Simple accessors for collation attributes.
 139
 140 =head2 start
 141
 142 The meta-reading at the start of every witness path.
 143
 144 =head2 end
 145
 146 The meta-reading at the end of every witness path.
 147
 148 =head2 readings
 149
 150 Returns all Reading objects in the graph.
 151
 152 =head2 reading( $id )
 153
 154 Returns the Reading object corresponding to the given ID.
 155
 156 =head2 add_reading( $reading_args )
 157
 158 Adds a new reading object to the collation.
 159 See L<Text::Tradition::Collation::Reading> for the available arguments.
 160
 161 =head2 del_reading( $object_or_id )
 162
 163 Removes the given reading from the collation, implicitly removing its
 164 paths and relationships.
 165
 166 =head2 has_reading( $id )
 167
 168 Predicate to see whether a given reading ID is in the graph.
 169
 170 =head2 reading_witnesses( $object_or_id )
 171
 172 Returns a list of sigils whose witnesses contain the reading.
 173
 174 =head2 paths
 175
 176 Returns all reading paths within the document - that is, all edges in the
 177 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 178
 179 =head2 add_path( $source, $target, $sigil )
 180
 181 Links the given readings in the collation in sequence, under the given witness
 182 sigil.  The readings may be specified by object or ID.
 183
 184 =head2 del_path( $source, $target, $sigil )
 185
 186 Links the given readings in the collation in sequence, under the given witness
 187 sigil.  The readings may be specified by object or ID.
 188
 189 =head2 has_path( $source, $target );
 190
 191 Returns true if the two readings are linked in sequence in any witness.
 192 The readings may be specified by object or ID.
 193
 194 =head2 relationships
 195
 196 Returns all Relationship objects in the collation.
 197
 198 =head2 add_relationship( $reading, $other_reading, $options )
 199
 200 Adds a new relationship of the type given in $options between the two readings,
 201 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 202 where $status is true on success, and @vectors is a list of relationship edges
 203 that were ultimately added.
 204 See L<Text::Tradition::Collation::Relationship> for the available options.
 205
 206 =cut
 207
 208 sub BUILDARGS {
 209         my ( $class, @args ) = @_;
 210         my %args = @args == 1 ? %{ $args[0] } : @args;
 211         # TODO determine these from the Moose::Meta object
 212         my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
 213                 linear wordsep start end cached_table _graphcalc_done);
 214         my %data_args;
 215         for my $attr (@delegate_attrs) {
 216                 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
 217         }
 218         $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
 219         return \%args;
 220 }
 221
 222 sub BUILD {
 223     my $self = shift;
 224     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 225     $self->_set_start( $self->add_reading(
 226         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 227     $self->_set_end( $self->add_reading(
 228         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 229 }
 230
 231 =head2 register_relationship_type( %relationship_definition )
 232
 233 Add a relationship type definition to this collation. The argument can be either a
 234 hash or a hashref, defining the properties of the relationship. For relationship types
 235 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
 236
 237 =head2 get_relationship_type( $relationship_name )
 238
 239 Retrieve the RelationshipType object for the relationship with the given name.
 240
 241 =cut
 242
 243 sub register_relationship_type {
 244         my $self = shift;
 245         my %args = @_ == 1 ? %{$_[0]} : @_;
 246         if( $self->relations->has_type( $args{name} ) ) {
 247                 throw( 'Relationship type ' . $args{name} . ' already registered' );
 248         }
 249         $self->relations->add_type( %args );
 250 }
 251
 252 sub get_relationship_type {
 253         my( $self, $name ) = @_;
 254                 return $self->relations->has_type( $name )
 255                         ? $self->relations->type( $name ) : undef;
 256 }
 257
 258 ### Reading construct/destruct functions
 259
 260 sub add_reading {
 261         my( $self, $reading ) = @_;
 262         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 263                 my %args = %$reading;
 264                 if( $args{'init'} ) {
 265                         # If we are initializing an empty collation, don't assume that we
 266                         # have set a tradition.
 267                         delete $args{'init'};
 268                 } elsif( $self->tradition->can('language') && $self->tradition->has_language
 269                         && !exists $args{'language'} ) {
 270                         $args{'language'} = $self->tradition->language;
 271                 }
 272                 $reading = Text::Tradition::Collation::Reading->new(
 273                         'collation' => $self,
 274                         %args );
 275         }
 276         # First check to see if a reading with this ID exists.
 277         if( $self->reading( $reading->id ) ) {
 278                 throw( "Collation already has a reading with id " . $reading->id );
 279         }
 280         $self->_graphcalc_done(0);
 281         $self->_add_reading( $reading->id => $reading );
 282         # Once the reading has been added, put it in both graphs.
 283         $self->sequence->add_vertex( $reading->id );
 284         $self->relations->add_reading( $reading->id );
 285         return $reading;
 286 };
 287
 288 around del_reading => sub {
 289         my $orig = shift;
 290         my $self = shift;
 291         my $arg = shift;
 292
 293         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 294                 $arg = $arg->id;
 295         }
 296         # Remove the reading from the graphs.
 297         $self->_graphcalc_done(0);
 298         $self->_clear_cache; # Explicitly clear caches to GC the reading
 299         $self->sequence->delete_vertex( $arg );
 300         $self->relations->delete_reading( $arg );
 301
 302         # Carry on.
 303         $self->$orig( $arg );
 304 };
 305
 306 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 307
 308 Merges the $second reading into the $main one. If $concatenate is true, then
 309 the merged node will carry the text of both readings, concatenated with either
 310 $with_str (if specified) or a sensible default (the empty string if the
 311 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 312
 313 The first two arguments may be either readings or reading IDs.
 314
 315 =begin testing
 316
 317 use Text::Tradition;
 318 use TryCatch;
 319
 320 my $cxfile = 't/data/Collatex-16.xml';
 321 my $t = Text::Tradition->new(
 322     'name'  => 'inline',
 323     'input' => 'CollateX',
 324     'file'  => $cxfile,
 325     );
 326 my $c = $t->collation;
 327
 328 my $rno = scalar $c->readings;
 329 # Split n21 ('unto') for testing purposes
 330 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 331 my $old_r = $c->reading( 'n21' );
 332 $old_r->alter_text( 'to' );
 333 $c->del_path( 'n20', 'n21', 'A' );
 334 $c->add_path( 'n20', 'n21p0', 'A' );
 335 $c->add_path( 'n21p0', 'n21', 'A' );
 336 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
 337 $c->flatten_ranks();
 338 ok( $c->reading( 'n21p0' ), "New reading exists" );
 339 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 340
 341 # Combine n3 and n4 ( with his )
 342 $c->merge_readings( 'n3', 'n4', 1 );
 343 ok( !$c->reading('n4'), "Reading n4 is gone" );
 344 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 345
 346 # Collapse n9 and n10 ( rood / root )
 347 $c->merge_readings( 'n9', 'n10' );
 348 ok( !$c->reading('n10'), "Reading n10 is gone" );
 349 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 350
 351 # Try to combine n21 and n21p0. This should break.
 352 my $remaining = $c->reading('n21');
 353 $remaining ||= $c->reading('n22');  # one of these should still exist
 354 try {
 355         $c->merge_readings( 'n21p0', $remaining, 1 );
 356         ok( 0, "Bad reading merge changed the graph" );
 357 } catch( Text::Tradition::Error $e ) {
 358         like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
 359 } catch {
 360         ok( 0, "Unexpected error on bad reading merge: $@" );
 361 }
 362
 363 try {
 364         $c->calculate_ranks();
 365         ok( 1, "Graph is still evidently whole" );
 366 } catch( Text::Tradition::Error $e ) {
 367         ok( 0, "Caught a rank exception: " . $e->message );
 368 }
 369
 370 =end testing
 371
 372 =cut
 373
 374 sub merge_readings {
 375         my $self = shift;
 376
 377         # Sanity check
 378         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 379         my $mergemeta = $kept_obj->is_meta;
 380         throw( "Cannot merge meta and non-meta reading" )
 381                 unless ( $mergemeta && $del_obj->is_meta )
 382                         || ( !$mergemeta && !$del_obj->is_meta );
 383         if( $mergemeta ) {
 384                 throw( "Cannot merge with start or end node" )
 385                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 386                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 387                 throw( "Cannot combine text of meta readings" ) if $combine;
 388         }
 389         # We can only merge readings in a linear graph if:
 390         # - they are contiguous with only one edge between them, OR
 391         # - they are at equivalent ranks in the graph.
 392         if( $self->linear ) {
 393                 my @delpred = $del_obj->predecessors;
 394                 my @keptsuc = $kept_obj->successors;
 395                 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
 396                         && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
 397                         my( $is_ok, $msg ) = $self->relations->relationship_valid(
 398                                 $kept_obj, $del_obj, 'collated' );
 399                         unless( $is_ok ) {
 400                                 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
 401                         }
 402                 }
 403         }
 404
 405         # We only need the IDs for adding paths to the graph, not the reading
 406         # objects themselves.
 407         my $kept = $kept_obj->id;
 408         my $deleted = $del_obj->id;
 409         $self->_graphcalc_done(0);
 410
 411     # The kept reading should inherit the paths and the relationships
 412     # of the deleted reading.
 413         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 414                 my @vector = ( $kept );
 415                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 416                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 417                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 418                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 419                 $self->sequence->add_edge( @vector );
 420                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 421                 @wits{keys %$fwits} = values %$fwits;
 422                 $self->sequence->set_edge_attributes( @vector, \%wits );
 423         }
 424         $self->relations->merge_readings( $kept, $deleted, $combine );
 425
 426         # Do the deletion deed.
 427         if( $combine ) {
 428                 # Combine the text of the readings
 429                 my $joinstr = $combine_char;
 430                 unless( defined $joinstr ) {
 431                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 432                         $joinstr = $self->wordsep unless defined $joinstr;
 433                 }
 434                 $kept_obj->_combine( $del_obj, $joinstr );
 435         }
 436         $self->del_reading( $deleted );
 437 }
 438
 439 =head2 merge_related( @relationship_types )
 440
 441 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
 442
 443 WARNING: This operation cannot be undone.
 444
 445 =cut
 446
 447 =begin testing
 448
 449 use Test::Warn;
 450 use Text::Tradition;
 451 use TryCatch;
 452
 453 my $t;
 454 warnings_exist {
 455         $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
 456 } [qr/Cannot set relationship on a meta reading/],
 457         "Got expected relationship drop warning on parse";
 458
 459 my $c = $t->collation;
 460 # Force the transitive propagation of all existing relationships.
 461 $c->relations->propagate_all_relationships();
 462
 463 my %rdg_ids;
 464 map { $rdg_ids{$_} = 1 } $c->readings;
 465 $c->merge_related( 'orthographic' );
 466 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
 467         "Successfully collapsed orthographic variation" );
 468 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
 469 foreach my $rid ( keys %rdg_ids ) {
 470         my $exp = $rdg_ids{$rid};
 471         is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
 472                 ( $exp ? "retained" : "removed" ) );
 473 }
 474 ok( $c->linear, "Graph is still linear" );
 475 try {
 476         $c->calculate_ranks; # This should succeed
 477         ok( 1, "Can still calculate ranks on the new graph" );
 478 } catch {
 479         ok( 0, "Rank calculation on merged graph failed: $@" );
 480 }
 481
 482 # Now add some transpositions
 483 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
 484 $c->merge_related( 'transposition' );
 485 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
 486         "Transposed relationship is merged away" );
 487 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
 488 ok( !$c->linear, "Graph is no longer linear" );
 489 try {
 490         $c->calculate_ranks; # This should fail
 491         ok( 0, "Rank calculation happened on nonlinear graph?!" );
 492 } catch ( Text::Tradition::Error $e ) {
 493         is( $e->message, 'Cannot calculate ranks on a non-linear graph',
 494                 "Rank calculation on merged graph threw an error" );
 495 }
 496
 497 =end testing
 498
 499 =cut
 500
 501 # TODO: there should be a way to display merged without affecting the underlying data!
 502
 503 sub merge_related {
 504         my $self = shift;
 505         my %reltypehash;
 506         map { $reltypehash{$_} = 1 } @_;
 507
 508         # Set up the filter for finding related readings
 509         my $filter = sub {
 510                 exists $reltypehash{$_[0]->type};
 511         };
 512
 513         # Go through all readings looking for related ones
 514         foreach my $r ( $self->readings ) {
 515                 next unless $self->reading( "$r" ); # might have been deleted meanwhile
 516                 while( my @related = $self->related_readings( $r, $filter ) ) {
 517                         push( @related, $r );
 518                         @related = sort {
 519                                         scalar $b->witnesses <=> scalar $a->witnesses
 520                                 } @related;
 521                         my $keep = shift @related;
 522                         foreach my $delr ( @related ) {
 523                                 $self->linear( 0 )
 524                                         unless( $self->get_relationship( $keep, $delr )->colocated );
 525                                 $self->merge_readings( $keep, $delr );
 526                         }
 527                 }
 528         }
 529 }
 530
 531 =head2 compress_readings
 532
 533 Where possible in the graph, compresses plain sequences of readings into a
 534 single reading. The sequences must consist of readings with no
 535 relationships to other readings, with only a single witness path between
 536 them and no other witness paths from either that would skip the other. The
 537 readings must also not be marked as nonsense or bad grammar.
 538
 539 WARNING: This operation cannot be undone.
 540
 541 =begin testing
 542
 543 use Text::Tradition;
 544
 545 my $t = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
 546 my $c = $t->collation;
 547 my $n = scalar $c->readings;
 548 $c->compress_readings();
 549 is( scalar $c->readings, $n - 6, "Compressing readings seems to work" );
 550
 551 # Now put in a join-word and make sure the thing still works.
 552 my $t2 = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
 553 my $c2 = $t2->collation;
 554 # Split n21 ('unto') for testing purposes
 555 my $new_r = $c2->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 556 my $old_r = $c2->reading( 'n21' );
 557 $old_r->alter_text( 'to' );
 558 $c2->del_path( 'n20', 'n21', 'A' );
 559 $c2->add_path( 'n20', 'n21p0', 'A' );
 560 $c2->add_path( 'n21p0', 'n21', 'A' );
 561 $c2->calculate_ranks();
 562 is( scalar $c2->readings, $n + 1, "We have our extra test reading" );
 563 $c2->compress_readings();
 564 is( scalar $c2->readings, $n - 6, "Compressing readings also works with join_next" );
 565 is( $c2->reading( 'n21p0' )->text, 'unto', "The joined word has no space" );
 566
 567
 568 =end testing
 569
 570 =cut
 571
 572 sub compress_readings {
 573         my $self = shift;
 574         # Sanity check: first save the original text of each witness.
 575         my %origtext;
 576         foreach my $wit ( $self->tradition->witnesses ) {
 577                 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
 578                 if( $wit->is_layered ) {
 579                         my $acsig = $wit->sigil . $self->ac_label;
 580                         $origtext{$acsig} = $self->path_text( $acsig );
 581                 }
 582         }
 583
 584         # Now do the deed.
 585         # Anywhere in the graph that there is a reading that joins only to a single
 586         # successor, and neither of these have any relationships, just join the two
 587         # readings.
 588         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 589                 # Now look for readings that can be joined to their successors.
 590                 next unless $rdg->is_combinable;
 591                 my %seen;
 592                 while( $self->sequence->successors( $rdg ) == 1 ) {
 593                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 594                         throw( "Infinite loop" ) if $seen{$next->id};
 595                         $seen{$next->id} = 1;
 596                         last if $self->sequence->predecessors( $next ) > 1;
 597                         last unless $next->is_combinable;
 598                         say "Joining readings $rdg and $next";
 599                         $self->merge_readings( $rdg, $next, 1 );
 600                 }
 601         }
 602
 603         # Finally, make sure we haven't screwed anything up.
 604         foreach my $wit ( $self->tradition->witnesses ) {
 605                 my $pathtext = $self->path_text( $wit->sigil );
 606                 throw( "Text differs for witness " . $wit->sigil )
 607                         unless $pathtext eq $origtext{$wit->sigil};
 608                 if( $wit->is_layered ) {
 609                         my $acsig = $wit->sigil . $self->ac_label;
 610                         $pathtext = $self->path_text( $acsig );
 611                         throw( "Layered text differs for witness " . $wit->sigil )
 612                                 unless $pathtext eq $origtext{$acsig};
 613                 }
 614         }
 615
 616         $self->relations->rebuild_equivalence();
 617         $self->calculate_ranks();
 618 }
 619
 620 # Helper function for manipulating the graph.
 621 sub _stringify_args {
 622         my( $self, $first, $second, @args ) = @_;
 623     $first = $first->id
 624         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 625     $second = $second->id
 626         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 627     return( $first, $second, @args );
 628 }
 629
 630 # Helper function for manipulating the graph.
 631 sub _objectify_args {
 632         my( $self, $first, $second, $arg ) = @_;
 633     $first = $self->reading( $first )
 634         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 635     $second = $self->reading( $second )
 636         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 637     return( $first, $second, $arg );
 638 }
 639
 640 =head2 duplicate_reading( $reading, @witlist )
 641
 642 Split the given reading into two, so that the new reading is in the path for
 643 the witnesses given in @witlist. If the result is that certain non-colocated
 644 relationships (e.g. transpositions) are no longer valid, these will be removed.
 645 Returns the newly-created reading.
 646
 647 =begin testing
 648
 649 use Test::More::UTF8;
 650 use Text::Tradition;
 651 use TryCatch;
 652
 653 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
 654 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
 655 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
 656
 657 my $sc = $st->collation;
 658 my $numr = 17;
 659 ok( $sc->reading('n131'), "Tradition has the affected reading" );
 660 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
 661 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
 662
 663 # Detach the erroneously collated reading
 664 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
 665 ok( $newr, "New reading was created" );
 666 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
 667 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
 668 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
 669 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
 670 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
 671
 672 # Check that the bad transposition is gone
 673 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
 674 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
 675
 676 # The collation should not be fixed
 677 my @pairs = $sc->identical_readings();
 678 is( scalar @pairs, 0, "Not re-collated yet" );
 679 # Fix the collation
 680 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
 681 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
 682 is( scalar @pairs, 3, "Found three more identical readings" );
 683 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
 684 $sc->flatten_ranks();
 685 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
 686
 687 # Check that we can't "duplicate" a reading with no wits or with all wits
 688 try {
 689         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
 690         ok( 0, "Reading duplication without witnesses throws an error" );
 691 } catch( Text::Tradition::Error $e ) {
 692         like( $e->message, qr/Must specify one or more witnesses/,
 693                 "Reading duplication without witnesses throws the expected error" );
 694 } catch {
 695         ok( 0, "Reading duplication without witnesses threw the wrong error" );
 696 }
 697
 698 try {
 699         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
 700         ok( 0, "Reading duplication with all witnesses throws an error" );
 701 } catch( Text::Tradition::Error $e ) {
 702         like( $e->message, qr/Cannot join all witnesses/,
 703                 "Reading duplication with all witnesses throws the expected error" );
 704 } catch {
 705         ok( 0, "Reading duplication with all witnesses threw the wrong error" );
 706 }
 707
 708 try {
 709         $sc->calculate_ranks();
 710         ok( 1, "Graph is still evidently whole" );
 711 } catch( Text::Tradition::Error $e ) {
 712         ok( 0, "Caught a rank exception: " . $e->message );
 713 }
 714
 715 =end testing
 716
 717 =cut
 718
 719 sub duplicate_reading {
 720         my( $self, $r, @wits ) = @_;
 721         # Check that we are not doing anything unwise.
 722         throw( "Must specify one or more witnesses for the duplicated reading" )
 723                 unless @wits;
 724         unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
 725                 $r = $self->reading( $r );
 726         }
 727         throw( "Cannot duplicate a meta-reading" )
 728                 if $r->is_meta;
 729         throw( "Cannot join all witnesses to the new reading" )
 730                 if scalar( @wits ) == scalar( $r->witnesses );
 731
 732         # Get all the reading attributes and duplicate them.
 733         my $rmeta = Text::Tradition::Collation::Reading->meta;
 734         my %args;
 735     foreach my $attr( $rmeta->get_all_attributes ) {
 736                 next if $attr->name =~ /^_/;
 737                 my $acc = $attr->get_read_method;
 738                 if( !$acc && $attr->has_applied_traits ) {
 739                         my $tr = $attr->applied_traits;
 740                         if( $tr->[0] =~ /::(Array|Hash)$/ ) {
 741                                 my $which = $1;
 742                                 my %methods = reverse %{$attr->handles};
 743                                 $acc = $methods{elements};
 744                                 $args{$attr->name} = $which eq 'Array'
 745                                         ? [ $r->$acc ] : { $r->$acc };
 746                         }
 747                 } else {
 748                         $args{$attr->name} = $r->$acc if $acc;
 749                 }
 750         }
 751         # By definition the new reading will no longer be common.
 752         $args{is_common} = 0;
 753         # The new reading also needs its own ID.
 754         $args{id} = $self->_generate_dup_id( $r->id );
 755
 756         # Try to make the new reading.
 757         my $newr = $self->add_reading( \%args );
 758         # The old reading is also no longer common.
 759         $r->is_common( 0 );
 760
 761         # For each of the witnesses, dissociate from the old reading and
 762         # associate with the new.
 763         foreach my $wit ( @wits ) {
 764                 my $prior = $self->prior_reading( $r, $wit );
 765                 my $next = $self->next_reading( $r, $wit );
 766                 $self->del_path( $prior, $r, $wit );
 767                 $self->add_path( $prior, $newr, $wit );
 768                 $self->del_path( $r, $next, $wit );
 769                 $self->add_path( $newr, $next, $wit );
 770         }
 771
 772         # If the graph is ranked, we need to look for relationships that are now
 773         # invalid (i.e. 'non-colocation' types that might now be colocated) and
 774         # remove them. If not, we can skip it.
 775         my $succ;
 776         my %rrk;
 777         my @deleted_relations;
 778         if( $self->end->has_rank ) {
 779                 # Find the point where we can stop checking
 780                 $succ = $self->common_successor( $r, $newr );
 781
 782                 # Hash the existing ranks
 783                 foreach my $rdg ( $self->readings ) {
 784                         $rrk{$rdg->id} = $rdg->rank;
 785                 }
 786                 # Calculate the new ranks
 787                 $self->calculate_ranks();
 788
 789                 # Check for invalid non-colocated relationships among changed-rank readings
 790                 # from where the ranks start changing up to $succ
 791                 my $lastrank = $succ->rank;
 792                 foreach my $rdg ( $self->readings ) {
 793                         next if $rdg->rank > $lastrank;
 794                         next if $rdg->rank == $rrk{$rdg->id};
 795                         my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
 796                         next unless @noncolo;
 797                         foreach my $nc ( @noncolo ) {
 798                                 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
 799                                         push( @deleted_relations, [ $rdg->id, $nc->id ] );
 800                                 }
 801                         }
 802                 }
 803         }
 804         return ( $newr, @deleted_relations );
 805 }
 806
 807 sub _generate_dup_id {
 808         my( $self, $rid ) = @_;
 809         my $newid;
 810         my $i = 0;
 811         while( !$newid ) {
 812                 $newid = $rid."_$i";
 813                 if( $self->has_reading( $newid ) ) {
 814                         $newid = '';
 815                         $i++;
 816                 }
 817         }
 818         return $newid;
 819 }
 820
 821 ### Path logic
 822
 823 sub add_path {
 824         my $self = shift;
 825
 826         # We only need the IDs for adding paths to the graph, not the reading
 827         # objects themselves.
 828     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 829
 830         $self->_graphcalc_done(0);
 831         # Connect the readings
 832         unless( $self->sequence->has_edge( $source, $target ) ) {
 833             $self->sequence->add_edge( $source, $target );
 834             $self->relations->add_equivalence_edge( $source, $target );
 835         }
 836     # Note the witness in question
 837     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 838 }
 839
 840 sub del_path {
 841         my $self = shift;
 842         my @args;
 843         if( ref( $_[0] ) eq 'ARRAY' ) {
 844                 my $e = shift @_;
 845                 @args = ( @$e, @_ );
 846         } else {
 847                 @args = @_;
 848         }
 849
 850         # We only need the IDs for removing paths from the graph, not the reading
 851         # objects themselves.
 852     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 853
 854         $self->_graphcalc_done(0);
 855         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 856                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 857         }
 858         unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
 859                 $self->sequence->delete_edge( $source, $target );
 860                 $self->relations->delete_equivalence_edge( $source, $target );
 861         }
 862 }
 863
 864
 865 # Extra graph-alike utility
 866 sub has_path {
 867         my $self = shift;
 868     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 869         return undef unless $self->sequence->has_edge( $source, $target );
 870         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 871 }
 872
 873 =head2 clear_witness( @sigil_list )
 874
 875 Clear the given witnesses out of the collation entirely, removing references
 876 to them in paths, and removing readings that belong only to them.  Should only
 877 be called via $tradition->del_witness.
 878
 879 =cut
 880
 881 sub clear_witness {
 882         my( $self, @sigils ) = @_;
 883
 884         $self->_graphcalc_done(0);
 885         # Clear the witness(es) out of the paths
 886         foreach my $e ( $self->paths ) {
 887                 foreach my $sig ( @sigils ) {
 888                         $self->del_path( $e, $sig );
 889                 }
 890         }
 891
 892         # Clear out the newly unused readings
 893         foreach my $r ( $self->readings ) {
 894                 unless( $self->reading_witnesses( $r ) ) {
 895                         $self->del_reading( $r );
 896                 }
 897         }
 898 }
 899
 900 sub add_relationship {
 901         my $self = shift;
 902     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 903     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 904     foreach my $v ( @vectors ) {
 905         next unless $self->get_relationship( $v )->colocated;
 906         if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
 907                 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
 908                         $self->_graphcalc_done(0);
 909                         $self->_clear_cache;
 910                         last;
 911         }
 912     }
 913     return @vectors;
 914 }
 915
 916 around qw/ get_relationship del_relationship / => sub {
 917         my $orig = shift;
 918         my $self = shift;
 919         my @args = @_;
 920         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 921                 @args = @{$_[0]};
 922         }
 923         my @stringargs = $self->_stringify_args( @args );
 924         $self->$orig( @stringargs );
 925 };
 926
 927 =head2 reading_witnesses( $reading )
 928
 929 Return a list of sigils corresponding to the witnesses in which the reading appears.
 930
 931 =cut
 932
 933 sub reading_witnesses {
 934         my( $self, $reading ) = @_;
 935         # We need only check either the incoming or the outgoing edges; I have
 936         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 937         if( $reading eq $self->start ) {
 938                 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
 939         }
 940         my %all_witnesses;
 941         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 942                 my $wits = $self->sequence->get_edge_attributes( @$e );
 943                 @all_witnesses{ keys %$wits } = 1;
 944         }
 945         my $acstr = $self->ac_label;
 946         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 947                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 948         }
 949         return keys %all_witnesses;
 950 }
 951
 952 =head1 OUTPUT METHODS
 953
 954 =head2 as_svg( \%options )
 955
 956 Returns an SVG string that represents the graph, via as_dot and graphviz.
 957 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 958
 959 =cut
 960
 961 sub as_svg {
 962     my( $self, $opts ) = @_;
 963     throw( "Need GraphViz installed to output SVG" )
 964         unless File::Which::which( 'dot' );
 965     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 966     $self->calculate_ranks()
 967         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 968         my @cmd = qw/dot -Tsvg/;
 969         my( $svg, $err );
 970         my $dotfile = File::Temp->new();
 971         ## USE FOR DEBUGGING
 972         # $dotfile->unlink_on_destroy(0);
 973         binmode $dotfile, ':utf8';
 974         print $dotfile $self->as_dot( $opts );
 975         push( @cmd, $dotfile->filename );
 976         run( \@cmd, ">", binary(), \$svg );
 977         $svg = decode_utf8( $svg );
 978         return $svg;
 979 }
 980
 981
 982 =head2 as_dot( \%options )
 983
 984 Returns a string that is the collation graph expressed in dot
 985 (i.e. GraphViz) format.  Options include:
 986
 987 =over 4
 988
 989 =item * from
 990
 991 =item * to
 992
 993 =item * color_common
 994
 995 =back
 996
 997 =cut
 998
 999 sub as_dot {
1000     my( $self, $opts ) = @_;
1001     my $startrank = $opts->{'from'} if $opts;
1002     my $endrank = $opts->{'to'} if $opts;
1003     my $color_common = $opts->{'color_common'} if $opts;
1004     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
1005        && $self->end->rank > 100;
1006     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
1007
1008     # Check the arguments
1009     if( $startrank ) {
1010         return if $endrank && $startrank > $endrank;
1011         return if $startrank > $self->end->rank;
1012         }
1013         if( defined $endrank ) {
1014                 return if $endrank < 0;
1015                 $endrank = undef if $endrank == $self->end->rank;
1016         }
1017
1018     my $graph_name = $self->tradition->name;
1019     $graph_name =~ s/[^\w\s]//g;
1020     $graph_name = join( '_', split( /\s+/, $graph_name ) );
1021
1022     my %graph_attrs = (
1023         'rankdir' => 'LR',
1024         'bgcolor' => 'none',
1025         );
1026     my %node_attrs = (
1027         'fontsize' => 14,
1028         'fillcolor' => 'white',
1029         'style' => 'filled',
1030         'shape' => 'ellipse'
1031         );
1032     my %edge_attrs = (
1033         'arrowhead' => 'open',
1034         'color' => '#000000',
1035         'fontcolor' => '#000000',
1036         );
1037
1038     my $dot = sprintf( "digraph %s {\n", $graph_name );
1039     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1040     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1041
1042         # Output substitute start/end readings if necessary
1043         if( $startrank ) {
1044                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1045         }
1046         if( $endrank ) {
1047                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1048         }
1049         if( $STRAIGHTENHACK ) {
1050                 ## HACK part 1
1051                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1052                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1053                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1054         }
1055         my %used;  # Keep track of the readings that actually appear in the graph
1056         # Sort the readings by rank if we have ranks; this speeds layout.
1057         my @all_readings = $self->end->has_rank
1058                 ? sort { $a->rank <=> $b->rank } $self->readings
1059                 : $self->readings;
1060         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1061     foreach my $reading ( @all_readings ) {
1062         # Only output readings within our rank range.
1063         next if $startrank && $reading->rank < $startrank;
1064         next if $endrank && $reading->rank > $endrank;
1065         $used{$reading->id} = 1;
1066         # Need not output nodes without separate labels
1067         next if $reading->id eq $reading->text;
1068         my $rattrs;
1069         my $label = $reading->text;
1070         unless( $label =~ /^[[:punct:]]+$/ ) {
1071                 $label .= '-' if $reading->join_next;
1072             $label = "-$label" if $reading->join_prior;
1073         }
1074         $label =~ s/\"/\\\"/g;
1075                 $rattrs->{'label'} = $label;
1076                 $rattrs->{'id'} = $reading->id;
1077                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1078         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1079     }
1080
1081         # Add the real edges.
1082     my @edges = $self->paths;
1083         my( %substart, %subend );
1084     foreach my $edge ( @edges ) {
1085         # Do we need to output this edge?
1086         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1087                 my $label = $self->_path_display_label( $opts,
1088                         $self->path_witnesses( $edge ) );
1089                         my $variables = { %edge_attrs, 'label' => $label };
1090
1091                         # Account for the rank gap if necessary
1092                         my $rank0 = $self->reading( $edge->[0] )->rank
1093                                 if $self->reading( $edge->[0] )->has_rank;
1094                         my $rank1 = $self->reading( $edge->[1] )->rank
1095                                 if $self->reading( $edge->[1] )->has_rank;
1096                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1097                                 $variables->{'minlen'} = $rank1 - $rank0;
1098                         }
1099
1100                         # EXPERIMENTAL: make edge width reflect no. of witnesses
1101                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1102                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1103
1104                         my $varopts = _dot_attr_string( $variables );
1105                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1106                                 $edge->[0], $edge->[1], $varopts );
1107         } elsif( $used{$edge->[0]} ) {
1108                 $subend{$edge->[0]} = $edge->[1];
1109         } elsif( $used{$edge->[1]} ) {
1110                 $substart{$edge->[1]} = $edge->[0];
1111         }
1112     }
1113
1114     # If we are asked to, add relationship links
1115     if( exists $opts->{show_relations} ) {
1116         my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1117         if( $filter eq 'transposition' ) {
1118                 $filter =~ qr/^transposition$/;
1119         }
1120         my %typecolors;
1121         my @types = sort( map { $_->name } $self->relations->types );
1122         if( exists $opts->{graphcolors} ) {
1123                 foreach my $tdx ( 0 .. $#types ) {
1124                         $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1125                 }
1126         } else {
1127                 map { $typecolors{$_} = '#FFA14F' } @types;
1128         }
1129         foreach my $redge ( $self->relationships ) {
1130                 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1131                                 my $rel = $self->get_relationship( $redge );
1132                                 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1133                                 my $variables = {
1134                                         arrowhead => 'none',
1135                                         color => $typecolors{$rel->type},
1136                                         constraint => 'false',
1137                                         penwidth => '3',
1138                                 };
1139                                 unless( exists $opts->{graphcolors} ) {
1140                                         $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1141                                 }
1142                                 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1143                                         $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1144                 }
1145         }
1146     }
1147
1148     # Add substitute start and end edges if necessary
1149     foreach my $node ( keys %substart ) {
1150         my $witstr = $self->_path_display_label( $opts,
1151                 $self->path_witnesses( $substart{$node}, $node ) );
1152         my $variables = { %edge_attrs, 'label' => $witstr };
1153         my $nrdg = $self->reading( $node );
1154         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1155                 # Substart is actually one lower than $startrank
1156                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1157         }
1158         my $varopts = _dot_attr_string( $variables );
1159         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1160         }
1161     foreach my $node ( keys %subend ) {
1162         my $witstr = $self->_path_display_label( $opts,
1163                 $self->path_witnesses( $node, $subend{$node} ) );
1164         my $variables = { %edge_attrs, 'label' => $witstr };
1165         my $varopts = _dot_attr_string( $variables );
1166         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1167         }
1168         # HACK part 2
1169         if( $STRAIGHTENHACK ) {
1170                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1171                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1172         }
1173
1174     $dot .= "}\n";
1175     return $dot;
1176 }
1177
1178 sub _dot_attr_string {
1179         my( $hash ) = @_;
1180         my @attrs;
1181         foreach my $k ( sort keys %$hash ) {
1182                 my $v = $hash->{$k};
1183                 push( @attrs, $k.'="'.$v.'"' );
1184         }
1185         return( '[ ' . join( ', ', @attrs ) . ' ]' );
1186 }
1187
1188 =head2 path_witnesses( $edge )
1189
1190 Returns the list of sigils whose witnesses are associated with the given edge.
1191 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1192
1193 =cut
1194
1195 sub path_witnesses {
1196         my( $self, @edge ) = @_;
1197         # If edge is an arrayref, cope.
1198         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1199                 my $e = shift @edge;
1200                 @edge = @$e;
1201         }
1202         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1203         return @wits;
1204 }
1205
1206 # Helper function. Make a display label for the given witnesses, showing a.c.
1207 # witnesses only where the main witness is not also in the list.
1208 sub _path_display_label {
1209         my $self = shift;
1210         my $opts = shift;
1211         my %wits;
1212         map { $wits{$_} = 1 } @_;
1213
1214         # If an a.c. wit is listed, remove it if the main wit is also listed.
1215         # Otherwise keep it for explicit listing.
1216         my $aclabel = $self->ac_label;
1217         my @disp_ac;
1218         foreach my $w ( sort keys %wits ) {
1219                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1220                         if( exists $wits{$1} ) {
1221                                 delete $wits{$w};
1222                         } else {
1223                                 push( @disp_ac, $w );
1224                         }
1225                 }
1226         }
1227
1228         if( $opts->{'explicit_wits'} ) {
1229                 return join( ', ', sort keys %wits );
1230         } else {
1231                 # See if we are in a majority situation.
1232                 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1233                 $maj = $maj > 5 ? $maj : 5;
1234                 if( scalar keys %wits > $maj ) {
1235                         unshift( @disp_ac, 'majority' );
1236                         return join( ', ', @disp_ac );
1237                 } else {
1238                         return join( ', ', sort keys %wits );
1239                 }
1240         }
1241 }
1242
1243 =head2 as_adjacency_list
1244
1245 Returns a JSON structure that represents the collation sequence graph.
1246
1247 =begin testing
1248
1249 use JSON qw/ from_json /;
1250 use Text::Tradition;
1251
1252 my $t = Text::Tradition->new(
1253         'input' => 'Self',
1254         'file' => 't/data/florilegium_graphml.xml' );
1255 my $c = $t->collation;
1256
1257 # Make a connection so we can test rank preservation
1258 $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1259
1260 # Create an adjacency list of the whole thing; test the output.
1261 my $adj_whole = from_json( $c->as_adjacency_list() );
1262 is( scalar @$adj_whole, scalar $c->readings(),
1263         "Same number of nodes in graph and adjacency list" );
1264 my @adj_whole_edges;
1265 map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1266 is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1267         "Same number of edges in graph and adjacency list" );
1268 # Find the reading whose rank should be preserved
1269 my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1270 my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1271 is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1272
1273 # Now create an adjacency list of just a portion. w76 to w122
1274 my $adj_part = from_json( $c->as_adjacency_list(
1275         { from => $c->reading('w76')->rank,
1276           to   => $c->reading('w122')->rank }));
1277 is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1278 my @adj_part_edges;
1279 map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1280 is( scalar @adj_part_edges, 58,
1281         "Same number of edges in partial graph and adjacency list" );
1282 # Check for consistency
1283 my %part_nodes;
1284 map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1285 foreach my $edge ( @adj_part_edges ) {
1286         my $testid = $edge->{id};
1287         ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1288 }
1289
1290 =end testing
1291
1292 =cut
1293
1294 sub as_adjacency_list {
1295         my( $self, $opts ) = @_;
1296         # Make a structure that contains all the nodes, the nodes they point to,
1297         # and the attributes of the edges that connect them.
1298         # [ { id: 'n0', label: 'Gallia', adjacent: [
1299         #               { id: 'n1', label: 'P Q' } ,
1300         #               { id: 'n2', label: 'R S', minlen: 2 } ] },
1301         #   { id: 'n1', label: 'est', adjacent: [ ... ] },
1302         #   ... ]
1303         my $startrank = $opts->{'from'} || 0;
1304         my $endrank = $opts->{'to'} || $self->end->rank;
1305
1306     $self->calculate_ranks()
1307         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1308         my $list = [];
1309         foreach my $rdg ( $self->readings ) {
1310                 my @successors;
1311                 my $phony = '';
1312                 # Figure out what the node's successors should be.
1313                 if( $rdg eq $self->start && $startrank > 0 ) {
1314                         # Connect the start node with all the nodes at startrank.
1315                         # Lacunas should be included only if the node really has that rank.
1316                         @successors = $self->readings_at_rank( $startrank, 1 );
1317                         $phony = 'start';
1318                 } elsif( $rdg->rank < $startrank
1319                                  || $rdg->rank > $endrank && $rdg ne $self->end ) {
1320                         next;
1321                 } else {
1322                         @successors = $rdg->successors;
1323                 }
1324                 # Make sure that the end node is at the end of the successors
1325                 # list if it is needed.
1326                 if( grep { $_ eq $self->end } @successors ) {
1327                         my @ts = grep { $_ ne $self->end } @successors;
1328                         @successors = ( @ts, $self->end );
1329                 } elsif ( grep { $_->rank > $endrank } @successors ) {
1330                         push( @successors, $self->end );
1331                 }
1332
1333                 my $listitem = { id => $rdg->id, label => $rdg->text };
1334                 my $adjacent = [];
1335                 my @endwits;
1336                 foreach my $succ ( @successors ) {
1337                         my @edgewits;
1338                         if( $phony eq 'start' ) {
1339                                 @edgewits = $succ->witnesses;
1340                         } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1341                                 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1342                         }
1343
1344                         if( $succ eq $self->end ) {
1345                                 @edgewits = @endwits;
1346                         } elsif( $succ->rank > $endrank ) {
1347                                 # These witnesses will point to 'end' instead, not to the
1348                                 # actual successor.
1349                                 push( @endwits, @edgewits );
1350                                 next;
1351                         }
1352                         my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1353                         my $edgedef = { id => $succ->id, label => $edgelabel };
1354                         my $rankoffset = $succ->rank - $rdg->rank;
1355                         if( $rankoffset > 1 and $succ ne $self->end ) {
1356                                 $edgedef->{minlen} = $rankoffset;
1357                         }
1358                         push( @$adjacent, $edgedef );
1359                 }
1360                 $listitem->{adjacent} = $adjacent;
1361                 push( @$list, $listitem );
1362         }
1363         return to_json( $list );
1364 }
1365
1366 =head2 as_graphml
1367
1368 Returns a GraphML representation of the collation.  The GraphML will contain
1369 two graphs. The first expresses the attributes of the readings and the witness
1370 paths that link them; the second expresses the relationships that link the
1371 readings.  This is the native transfer format for a tradition.
1372
1373 =begin testing
1374
1375 use Text::Tradition;
1376 use TryCatch;
1377
1378 my $READINGS = 311;
1379 my $PATHS = 361;
1380
1381 my $datafile = 't/data/florilegium_tei_ps.xml';
1382 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1383                                       'name' => 'test0',
1384                                       'file' => $datafile,
1385                                       'linear' => 1 );
1386
1387 ok( $tradition, "Got a tradition object" );
1388 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1389 ok( $tradition->collation, "Tradition has a collation" );
1390
1391 my $c = $tradition->collation;
1392 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1393 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1394 is( scalar $c->relationships, 0, "Collation has all relationships" );
1395
1396 # Add a few relationships
1397 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1398 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1399 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1400                                           'is_significant' => 'yes' } );
1401
1402 # Now write it to GraphML and parse it again.
1403
1404 my $graphml = $c->as_graphml;
1405 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1406 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1407 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1408 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1409 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1410 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1411
1412 # Now add a stemma, write to GraphML, and look at the output.
1413 SKIP: {
1414         skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1415         my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1416         is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1417         is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1418         $graphml = $c->as_graphml;
1419         like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1420 }
1421
1422 =end testing
1423
1424 =cut
1425
1426 ## TODO MOVE this to Tradition.pm and modularize it better
1427 sub as_graphml {
1428     my( $self, $options ) = @_;
1429         $self->calculate_ranks unless $self->_graphcalc_done;
1430
1431         my $start = $options->{'from'}
1432                 ? $self->reading( $options->{'from'} ) : $self->start;
1433         my $end = $options->{'to'}
1434                 ? $self->reading( $options->{'to'} ) : $self->end;
1435         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1436                 throw( 'Start node must be before end node' );
1437         }
1438         # The readings need to be ranked for this to work.
1439         $start = $self->start unless $start->has_rank;
1440         $end = $self->end unless $end->has_rank;
1441         my $rankoffset = 0;
1442         unless( $start eq $self->start ) {
1443                 $rankoffset = $start->rank - 1;
1444         }
1445         my %use_readings;
1446
1447     # Some namespaces
1448     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1449     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1450     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1451         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1452
1453     # Create the document and root node
1454     require XML::LibXML;
1455     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1456     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1457     $graphml->setDocumentElement( $root );
1458     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1459     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1460
1461     # List of attribute types to save on our objects and their corresponding
1462     # GraphML types
1463     my %save_types = (
1464         'Str' => 'string',
1465         'Int' => 'int',
1466         'Bool' => 'boolean',
1467         'ReadingID' => 'string',
1468         'RelationshipType' => 'string',
1469         'RelationshipScope' => 'string',
1470         'Ternary' => 'string',
1471     );
1472
1473     # Add the data keys for the graph. Include an extra key 'version' for the
1474     # GraphML output version.
1475     my %graph_data_keys;
1476     my $gdi = 0;
1477     my %graph_attributes = ( 'version' => 'string' );
1478         # Graph attributes include those of Tradition and those of Collation.
1479         my %gattr_from;
1480         # TODO Use meta introspection method from duplicate_reading to do this
1481         # instead of naming custom keys.
1482         my $tmeta = $self->tradition->meta;
1483         my $cmeta = $self->meta;
1484         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1485         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1486         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1487                 next if $attr->name =~ /^_/;
1488                 next unless $save_types{$attr->type_constraint->name};
1489                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1490         }
1491     # Extra custom keys for complex objects that should be saved in some form.
1492     # The subroutine should return a string, or undef/empty.
1493     if( $tmeta->has_method('stemmata') ) {
1494                 $graph_attributes{'stemmata'} = sub {
1495                         my @stemstrs;
1496                         map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1497                                 $self->tradition->stemmata;
1498                         join( "\n", @stemstrs );
1499                 };
1500         }
1501
1502         if( $tmeta->has_method('user') ) {
1503                 $graph_attributes{'user'} = sub {
1504                         $self->tradition->user ? $self->tradition->user->id : undef
1505                 };
1506         }
1507
1508     foreach my $datum ( sort keys %graph_attributes ) {
1509         $graph_data_keys{$datum} = 'dg'.$gdi++;
1510         my $key = $root->addNewChild( $graphml_ns, 'key' );
1511         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1512                 : $graph_attributes{$datum};
1513         $key->setAttribute( 'attr.name', $datum );
1514         $key->setAttribute( 'attr.type', $dtype );
1515         $key->setAttribute( 'for', 'graph' );
1516         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1517     }
1518
1519     # Add the data keys for reading nodes
1520     my %reading_attributes;
1521     my $rmeta = Text::Tradition::Collation::Reading->meta;
1522     foreach my $attr( $rmeta->get_all_attributes ) {
1523                 next if $attr->name =~ /^_/;
1524                 next unless $save_types{$attr->type_constraint->name};
1525                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1526         }
1527         if( $self->start->does('Text::Tradition::Morphology' ) ) {
1528                 # Extra custom key for the reading morphology
1529                 $reading_attributes{'lexemes'} = 'string';
1530         }
1531
1532     my %node_data_keys;
1533     my $ndi = 0;
1534     foreach my $datum ( sort keys %reading_attributes ) {
1535         $node_data_keys{$datum} = 'dn'.$ndi++;
1536         my $key = $root->addNewChild( $graphml_ns, 'key' );
1537         $key->setAttribute( 'attr.name', $datum );
1538         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1539         $key->setAttribute( 'for', 'node' );
1540         $key->setAttribute( 'id', $node_data_keys{$datum} );
1541     }
1542
1543     # Add the data keys for edges, that is, paths and relationships. Path
1544     # data does not come from a Moose class so is here manually.
1545     my $edi = 0;
1546     my %edge_data_keys;
1547     my %edge_attributes = (
1548         witness => 'string',                    # ID/label for a path
1549         extra => 'boolean',                             # Path key
1550         );
1551     my @path_attributes = keys %edge_attributes; # track our manual additions
1552     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1553     foreach my $attr( $pmeta->get_all_attributes ) {
1554                 next if $attr->name =~ /^_/;
1555                 next unless $save_types{$attr->type_constraint->name};
1556                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1557         }
1558     foreach my $datum ( sort keys %edge_attributes ) {
1559         $edge_data_keys{$datum} = 'de'.$edi++;
1560         my $key = $root->addNewChild( $graphml_ns, 'key' );
1561         $key->setAttribute( 'attr.name', $datum );
1562         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1563         $key->setAttribute( 'for', 'edge' );
1564         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1565     }
1566
1567     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1568     my $xmlidname = $self->tradition->name;
1569     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1570     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1571         $xmlidname = '_'.$xmlidname;
1572     }
1573     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1574     $sgraph->setAttribute( 'edgedefault', 'directed' );
1575     $sgraph->setAttribute( 'id', $xmlidname );
1576     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1577     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1578     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1579     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1580     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1581
1582     # Tradition/collation attribute data
1583     foreach my $datum ( keys %graph_attributes ) {
1584         my $value;
1585         if( $datum eq 'version' ) {
1586                 $value = '3.2';
1587         } elsif( ref( $graph_attributes{$datum} ) ) {
1588                 my $sub = $graph_attributes{$datum};
1589                 $value = &$sub();
1590         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1591                 $value = $self->tradition->$datum;
1592         } else {
1593                 $value = $self->$datum;
1594         }
1595                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1596         }
1597
1598     my $node_ctr = 0;
1599     my %node_hash;
1600     # Add our readings to the graph
1601     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1602         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1603                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1604         $use_readings{$n->id} = 1;
1605         # Add to the main graph
1606         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1607         my $node_xmlid = 'n' . $node_ctr++;
1608         $node_hash{ $n->id } = $node_xmlid;
1609         $node_el->setAttribute( 'id', $node_xmlid );
1610         foreach my $d ( keys %reading_attributes ) {
1611                 my $nval = $n->$d;
1612                 # Custom serialization
1613                 if( $d eq 'lexemes' ) {
1614                                 # If nval is a true value, we have lexemes so we need to
1615                                 # serialize them. Otherwise set nval to undef so that the
1616                                 # key is excluded from this reading.
1617                         $nval = $nval ? $n->_serialize_lexemes : undef;
1618                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1619                         $nval = undef;
1620                 }
1621                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1622                         # Adjust the ranks within the subgraph.
1623                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1624                                 : $nval - $rankoffset;
1625                 }
1626                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1627                         if defined $nval;
1628         }
1629     }
1630
1631     # Add the path edges to the sequence graph
1632     my $edge_ctr = 0;
1633     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1634         # We add an edge in the graphml for every witness in $e.
1635         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1636         my @edge_wits = sort $self->path_witnesses( $e );
1637         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1638         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1639         # Skip any path from start to end; that witness is not in the subgraph.
1640         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1641         foreach my $wit ( @edge_wits ) {
1642                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1643                                                                                 $node_hash{ $e->[0] },
1644                                                                                 $node_hash{ $e->[1] } );
1645                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1646                         $edge_el->setAttribute( 'source', $from );
1647                         $edge_el->setAttribute( 'target', $to );
1648                         $edge_el->setAttribute( 'id', $id );
1649
1650                         # It's a witness path, so add the witness
1651                         my $base = $wit;
1652                         my $key = $edge_data_keys{'witness'};
1653                         # Is this an ante-corr witness?
1654                         my $aclabel = $self->ac_label;
1655                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1656                                 # Keep the base witness
1657                                 $base = $1;
1658                                 # ...and record that this is an 'extra' reading path
1659                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1660                         }
1661                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1662                 }
1663         }
1664
1665         # Report the actual number of nodes and edges that went in
1666         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1667         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1668
1669         # Add the relationship graph to the XML
1670         map { delete $edge_data_keys{$_} } @path_attributes;
1671         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1672                 $node_data_keys{'id'}, \%edge_data_keys );
1673
1674     # Save and return the thing
1675     my $result = decode_utf8( $graphml->toString(1) );
1676     return $result;
1677 }
1678
1679 sub _add_graphml_data {
1680     my( $el, $key, $value ) = @_;
1681     return unless defined $value;
1682     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1683     $data_el->setAttribute( 'key', $key );
1684     $data_el->appendText( $value );
1685 }
1686
1687 =head2 as_csv
1688
1689 Returns a CSV alignment table representation of the collation graph, one
1690 row per witness (or witness uncorrected.)
1691
1692 =head2 as_tsv
1693
1694 Returns a tab-separated alignment table representation of the collation graph,
1695 one row per witness (or witness uncorrected.)
1696
1697 =begin testing
1698
1699 use Text::Tradition;
1700 use Text::CSV;
1701
1702 my $READINGS = 311;
1703 my $PATHS = 361;
1704 my $WITS = 13;
1705 my $WITAC = 4;
1706
1707 my $datafile = 't/data/florilegium_tei_ps.xml';
1708 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1709                                       'name' => 'test0',
1710                                       'file' => $datafile,
1711                                       'linear' => 1 );
1712
1713 my $c = $tradition->collation;
1714 # Export the thing to CSV
1715 my $csvstr = $c->as_csv();
1716 # Count the columns
1717 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1718 my @lines = split(/\n/, $csvstr );
1719 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1720 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1721 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1722 ok( @q_ac, "Found a layered witness" );
1723
1724 my $t2 = Text::Tradition->new( input => 'Tabular',
1725                                                            name => 'test2',
1726                                                            string => $csvstr,
1727                                                            sep_char => ',' );
1728 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1729 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1730
1731 # Now do it with TSV
1732 my $tsvstr = $c->as_tsv();
1733 my $t3 = Text::Tradition->new( input => 'Tabular',
1734                                                            name => 'test3',
1735                                                            string => $tsvstr,
1736                                                            sep_char => "\t" );
1737 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1738 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1739
1740 my $table = $c->alignment_table;
1741 my $noaccsv = $c->as_csv({ noac => 1 });
1742 my @noaclines = split(/\n/, $noaccsv );
1743 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1744 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1745 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1746
1747 my $safecsv = $c->as_csv({ safe_ac => 1});
1748 my @safelines = split(/\n/, $safecsv );
1749 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1750 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1751 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1752 ok( @q_ac, "Found a sanitized layered witness" );
1753 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1754
1755 # Test relationship collapse
1756 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1757 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1758
1759 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1760 my $t4 = Text::Tradition->new( input => 'Tabular',
1761                                                            name => 'test4',
1762                                                            string => $mergedtsv,
1763                                                            sep_char => "\t" );
1764 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1765 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1766
1767 # Test non-ASCII sigla
1768 my $t5 = Text::Tradition->new( input => 'Tabular',
1769                                                            name => 'nonascii',
1770                                                            file => 't/data/armexample.xlsx',
1771                                                            excel => 'xlsx' );
1772 my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1773 my @awitlines = split( /\n/, $awittsv );
1774 like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1775
1776 =end testing
1777
1778 =cut
1779
1780 sub _tabular {
1781     my( $self, $opts ) = @_;
1782     my $table = $self->alignment_table( $opts );
1783         my $csv_options = { binary => 1, quote_null => 0 };
1784         $csv_options->{'sep_char'} = $opts->{fieldsep};
1785         if( $opts->{fieldsep} eq "\t" ) {
1786                 # If it is really tab separated, nothing is an escape char.
1787                 $csv_options->{'quote_char'} = undef;
1788                 $csv_options->{'escape_char'} = '';
1789         }
1790     my $csv = Text::CSV->new( $csv_options );
1791     my @result;
1792
1793     # Make the header row
1794     my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1795     if( $opts->{ascii} ) {
1796         # TODO think of a fix for this
1797         throw( "Cannot currently produce ASCII sigla with witness layers" )
1798                 unless $opts->{noac};
1799         my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1800         @witnesses = @awits;
1801     }
1802     $csv->combine( @witnesses );
1803         push( @result, $csv->string );
1804
1805     # Make the rest of the rows
1806     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1807         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1808         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1809         # Quick and dirty collapse of requested relationship types
1810         if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1811                 # Now substitute the reading in the relevant index of @row
1812                 # for its merge-related reading
1813                 my %substitutes;
1814                 while( @rowobjs ) {
1815                         my $thisr = shift @rowobjs;
1816                         next unless $thisr;
1817                                 next if exists $substitutes{$thisr->{t}->text};
1818                                 # Make sure we don't have A <-> B substitutions.
1819                                 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1820                         foreach my $thatr ( @rowobjs ) {
1821                                 next unless $thatr;
1822                                 next if exists $substitutes{$thatr->{t}->text};
1823                                 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1824                                 next unless $ttrel;
1825                                 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1826                                 # If we have got this far then we need to merge them.
1827                                 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1828                         }
1829                 }
1830                 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1831         }
1832         $csv->combine( @row );
1833         push( @result, $csv->string );
1834     }
1835     return join( "\n", @result );
1836 }
1837
1838 sub as_csv {
1839         my $self = shift;
1840         my $opts = shift || {};
1841         $opts->{fieldsep} = ',';
1842         return $self->_tabular( $opts );
1843 }
1844
1845 sub as_tsv {
1846         my $self = shift;
1847         my $opts = shift || {};
1848         $opts->{fieldsep} = "\t";
1849         return $self->_tabular( $opts );
1850 }
1851
1852 =head2 alignment_table
1853
1854 Return a reference to an alignment table, in a slightly enhanced CollateX
1855 format which looks like this:
1856
1857  $table = { alignment => [ { witness => "SIGIL",
1858                              tokens => [ { t => "TEXT" }, ... ] },
1859                            { witness => "SIG2",
1860                              tokens => [ { t => "TEXT" }, ... ] },
1861                            ... ],
1862             length => TEXTLEN };
1863
1864 =cut
1865
1866 sub alignment_table {
1867     my( $self, $opts ) = @_;
1868     if( $self->has_cached_table ) {
1869                 return $self->cached_table
1870                         unless $opts->{noac} || $opts->{safe_ac};
1871     }
1872
1873     # Make sure we can do this
1874         throw( "Need a linear graph in order to make an alignment table" )
1875                 unless $self->linear;
1876     $self->calculate_ranks()
1877         unless $self->_graphcalc_done && $self->end->has_rank;
1878
1879     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1880     my @all_pos = ( 1 .. $self->end->rank - 1 );
1881     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1882         # say STDERR "Making witness row(s) for " . $wit->sigil;
1883         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1884         my @row = _make_witness_row( \@wit_path, \@all_pos );
1885         my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1886         $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1887         push( @{$table->{'alignment'}}, $witobj );
1888         if( $wit->is_layered && !$opts->{noac} ) {
1889                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1890                         $wit->sigil.$self->ac_label );
1891             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1892             my $witlabel = $opts->{safe_ac}
1893                 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1894             my $witacobj = { 'witness' => $witlabel,
1895                 'tokens' => \@ac_row };
1896             $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1897                         push( @{$table->{'alignment'}}, $witacobj );
1898         }
1899     }
1900     unless( $opts->{noac} || $opts->{safe_ac} ) {
1901             $self->cached_table( $table );
1902         }
1903     return $table;
1904 }
1905
1906 sub _make_witness_row {
1907     my( $path, $positions ) = @_;
1908     my %char_hash;
1909     map { $char_hash{$_} = undef } @$positions;
1910     my $debug = 0;
1911     foreach my $rdg ( @$path ) {
1912         say STDERR "rank " . $rdg->rank if $debug;
1913         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1914         $char_hash{$rdg->rank} = { 't' => $rdg };
1915     }
1916     my @row = map { $char_hash{$_} } @$positions;
1917     # Fill in lacuna markers for undef spots in the row
1918     my $last_el = shift @row;
1919     my @filled_row = ( $last_el );
1920     foreach my $el ( @row ) {
1921         # If we are using node reference, make the lacuna node appear many times
1922         # in the table.  If not, use the lacuna tag.
1923         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1924             $el = $last_el;
1925         }
1926         push( @filled_row, $el );
1927         $last_el = $el;
1928     }
1929     return @filled_row;
1930 }
1931
1932
1933 =head1 NAVIGATION METHODS
1934
1935 =head2 reading_sequence( $first, $last, $sigil, $backup )
1936
1937 Returns the ordered list of readings, starting with $first and ending
1938 with $last, for the witness given in $sigil. If a $backup sigil is
1939 specified (e.g. when walking a layered witness), it will be used wherever
1940 no $sigil path exists.  If there is a base text reading, that will be
1941 used wherever no path exists for $sigil or $backup.
1942
1943 =cut
1944
1945 # TODO Think about returning some lazy-eval iterator.
1946 # TODO Get rid of backup; we should know from what witness is whether we need it.
1947
1948 sub reading_sequence {
1949     my( $self, $start, $end, $witness ) = @_;
1950
1951     $witness = $self->baselabel unless $witness;
1952     my @readings = ( $start );
1953     my %seen;
1954     my $n = $start;
1955     while( $n && $n->id ne $end->id ) {
1956         if( exists( $seen{$n->id} ) ) {
1957             throw( "Detected loop for $witness at " . $n->id );
1958         }
1959         $seen{$n->id} = 1;
1960
1961         my $next = $self->next_reading( $n, $witness );
1962         unless( $next ) {
1963             throw( "Did not find any path for $witness from reading " . $n->id );
1964         }
1965         push( @readings, $next );
1966         $n = $next;
1967     }
1968     # Check that the last reading is our end reading.
1969     my $last = $readings[$#readings];
1970     throw( "Last reading found from " . $start->text .
1971         " for witness $witness is not the end!" ) # TODO do we get this far?
1972         unless $last->id eq $end->id;
1973
1974     return @readings;
1975 }
1976
1977 =head2 readings_at_rank( $rank )
1978
1979 Returns a list of readings at a given rank, taken from the alignment table.
1980
1981 =cut
1982
1983 sub readings_at_rank {
1984         my( $self, $rank, $nolacuna ) = @_;
1985         my $table = $self->alignment_table;
1986         # Table rank is real rank - 1.
1987         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1988         my %readings;
1989         foreach my $e ( @elements ) {
1990                 next unless ref( $e ) eq 'HASH';
1991                 next unless exists $e->{'t'};
1992                 my $rdg = $e->{'t'};
1993                 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
1994                 $readings{$e->{'t'}->id} = $e->{'t'};
1995         }
1996         return values %readings;
1997 }
1998
1999 =head2 next_reading( $reading, $sigil );
2000
2001 Returns the reading that follows the given reading along the given witness
2002 path.
2003
2004 =cut
2005
2006 sub next_reading {
2007     # Return the successor via the corresponding path.
2008     my $self = shift;
2009     my $answer = $self->_find_linked_reading( 'next', @_ );
2010         return undef unless $answer;
2011     return $self->reading( $answer );
2012 }
2013
2014 =head2 prior_reading( $reading, $sigil )
2015
2016 Returns the reading that precedes the given reading along the given witness
2017 path.
2018
2019 =cut
2020
2021 sub prior_reading {
2022     # Return the predecessor via the corresponding path.
2023     my $self = shift;
2024     my $answer = $self->_find_linked_reading( 'prior', @_ );
2025     return $self->reading( $answer );
2026 }
2027
2028 sub _find_linked_reading {
2029     my( $self, $direction, $node, $path ) = @_;
2030
2031     # Get a backup if we are dealing with a layered witness
2032     my $alt_path;
2033     my $aclabel = $self->ac_label;
2034     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2035         $alt_path = $1;
2036     }
2037
2038     my @linked_paths = $direction eq 'next'
2039         ? $self->sequence->edges_from( $node )
2040         : $self->sequence->edges_to( $node );
2041     return undef unless scalar( @linked_paths );
2042
2043     # We have to find the linked path that contains all of the
2044     # witnesses supplied in $path.
2045     my( @path_wits, @alt_path_wits );
2046     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2047     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2048     my $base_le;
2049     my $alt_le;
2050     foreach my $le ( @linked_paths ) {
2051         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2052             $base_le = $le;
2053         }
2054                 my @le_wits = sort $self->path_witnesses( $le );
2055                 if( _is_within( \@path_wits, \@le_wits ) ) {
2056                         # This is the right path.
2057                         return $direction eq 'next' ? $le->[1] : $le->[0];
2058                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2059                         $alt_le = $le;
2060                 }
2061     }
2062     # Got this far? Return the alternate path if it exists.
2063     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2064         if $alt_le;
2065
2066     # Got this far? Return the base path if it exists.
2067     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2068         if $base_le;
2069
2070     # Got this far? We have no appropriate path.
2071     warn "Could not find $direction node from " . $node->id
2072         . " along path $path";
2073     return undef;
2074 }
2075
2076 # Some set logic.
2077 sub _is_within {
2078     my( $set1, $set2 ) = @_;
2079     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2080     foreach my $el ( @$set1 ) {
2081         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2082     }
2083     return $ret;
2084 }
2085
2086 # Return the string that joins together a list of witnesses for
2087 # display on a single path.
2088 sub _witnesses_of_label {
2089     my( $self, $label ) = @_;
2090     my $regex = $self->wit_list_separator;
2091     my @answer = split( /\Q$regex\E/, $label );
2092     return @answer;
2093 }
2094
2095 =head2 common_readings
2096
2097 Returns the list of common readings in the graph (i.e. those readings that are
2098 shared by all non-lacunose witnesses.)
2099
2100 =cut
2101
2102 sub common_readings {
2103         my $self = shift;
2104         my @common = grep { $_->is_common } $self->readings;
2105         return @common;
2106 }
2107
2108 =head2 path_text( $sigil, [, $start, $end ] )
2109
2110 Returns the text of a witness (plus its backup, if we are using a layer)
2111 as stored in the collation.  The text is returned as a string, where the
2112 individual readings are joined with spaces and the meta-readings (e.g.
2113 lacunae) are omitted.  Optional specification of $start and $end allows
2114 the generation of a subset of the witness text.
2115
2116 =cut
2117
2118 sub path_text {
2119         my( $self, $wit, $start, $end ) = @_;
2120         $start = $self->start unless $start;
2121         $end = $self->end unless $end;
2122         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2123         my $pathtext = '';
2124         my $last;
2125         foreach my $r ( @path ) {
2126                 unless ( $r->join_prior || !$last || $last->join_next ) {
2127                         $pathtext .= ' ';
2128                 }
2129                 $pathtext .= $r->text;
2130                 $last = $r;
2131         }
2132         return $pathtext;
2133 }
2134
2135 =head1 INITIALIZATION METHODS
2136
2137 These are mostly for use by parsers.
2138
2139 =head2 make_witness_path( $witness )
2140
2141 Link the array of readings contained in $witness->path (and in
2142 $witness->uncorrected_path if it exists) into collation paths.
2143 Clear out the arrays when finished.
2144
2145 =head2 make_witness_paths
2146
2147 Call make_witness_path for all witnesses in the tradition.
2148
2149 =cut
2150
2151 # For use when a collation is constructed from a base text and an apparatus.
2152 # We have the sequences of readings and just need to add path edges.
2153 # When we are done, clear out the witness path attributes, as they are no
2154 # longer needed.
2155 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2156
2157 sub make_witness_paths {
2158     my( $self ) = @_;
2159     foreach my $wit ( $self->tradition->witnesses ) {
2160         # say STDERR "Making path for " . $wit->sigil;
2161         $self->make_witness_path( $wit );
2162     }
2163 }
2164
2165 sub make_witness_path {
2166     my( $self, $wit ) = @_;
2167     my @chain = @{$wit->path};
2168     my $sig = $wit->sigil;
2169     # Add start and end if necessary
2170     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2171     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2172     foreach my $idx ( 0 .. $#chain-1 ) {
2173         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2174     }
2175     if( $wit->is_layered ) {
2176         @chain = @{$wit->uncorrected_path};
2177                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2178                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2179         foreach my $idx( 0 .. $#chain-1 ) {
2180             my $source = $chain[$idx];
2181             my $target = $chain[$idx+1];
2182             $self->add_path( $source, $target, $sig.$self->ac_label )
2183                 unless $self->has_path( $source, $target, $sig );
2184         }
2185     }
2186     $wit->clear_path;
2187     $wit->clear_uncorrected_path;
2188 }
2189
2190 =head2 calculate_ranks
2191
2192 Calculate the reading ranks (that is, their aligned positions relative
2193 to each other) for the graph.  This can only be called on linear collations.
2194
2195 =begin testing
2196
2197 use Text::Tradition;
2198
2199 my $cxfile = 't/data/Collatex-16.xml';
2200 my $t = Text::Tradition->new(
2201     'name'  => 'inline',
2202     'input' => 'CollateX',
2203     'file'  => $cxfile,
2204     );
2205 my $c = $t->collation;
2206
2207 # Make an svg
2208 my $table = $c->alignment_table;
2209 ok( $c->has_cached_table, "Alignment table was cached" );
2210 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2211 $c->calculate_ranks;
2212 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2213 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2214 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2215 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2216 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2217
2218 =end testing
2219
2220 =cut
2221
2222 sub calculate_ranks {
2223     my $self = shift;
2224     # Save the existing ranks, in case we need to invalidate the cached SVG.
2225     throw( "Cannot calculate ranks on a non-linear graph" )
2226         unless $self->linear;
2227     my %existing_ranks;
2228     map { $existing_ranks{$_} = $_->rank } $self->readings;
2229
2230     # Do the rankings based on the relationship equivalence graph, starting
2231     # with the start node.
2232     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2233
2234     # Transfer our rankings from the topological graph to the real one.
2235     foreach my $r ( $self->readings ) {
2236         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2237             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2238         } else {
2239                 # Die. Find the last rank we calculated.
2240                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2241                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2242                         $self->readings;
2243                 my $last = pop @all_defined;
2244             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2245         }
2246     }
2247     # Do we need to invalidate the cached data?
2248     if( $self->has_cached_table ) {
2249         foreach my $r ( $self->readings ) {
2250                 next if defined( $existing_ranks{$r} )
2251                         && $existing_ranks{$r} == $r->rank;
2252                 # Something has changed, so clear the cache
2253                 $self->_clear_cache;
2254                         # ...and recalculate the common readings.
2255                         $self->calculate_common_readings();
2256                 last;
2257         }
2258     }
2259         # The graph calculation information is now up to date.
2260         $self->_graphcalc_done(1);
2261 }
2262
2263 sub _clear_cache {
2264         my $self = shift;
2265         $self->wipe_table if $self->has_cached_table;
2266 }
2267
2268
2269 =head2 flatten_ranks
2270
2271 A convenience method for parsing collation data.  Searches the graph for readings
2272 with the same text at the same rank, and merges any that are found.
2273
2274 =cut
2275
2276 sub flatten_ranks {
2277     my ( $self, %args ) = shift;
2278     my %unique_rank_rdg;
2279     my $changed;
2280     foreach my $p ( $self->identical_readings( %args ) ) {
2281                 # say STDERR "Combining readings at same rank: @$p";
2282                 $changed = 1;
2283                 $self->merge_readings( @$p );
2284                 # TODO see if this now makes a common point.
2285     }
2286     # If we merged readings, the ranks are still fine but the alignment
2287     # table is wrong. Wipe it.
2288     $self->wipe_table() if $changed;
2289 }
2290
2291 =head2 identical_readings
2292 =head2 identical_readings( start => $startnode, end => $endnode )
2293 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2294
2295 Goes through the graph identifying all pairs of readings that appear to be
2296 identical, and therefore able to be merged into a single reading. Returns the
2297 relevant identical pairs. Can be restricted to run over only a part of the
2298 graph, specified either by node or by rank.
2299
2300 =cut
2301
2302 sub identical_readings {
2303         my ( $self, %args ) = @_;
2304     # Find where we should start and end.
2305     my $startrank = $args{startrank} || 0;
2306     if( $args{start} ) {
2307         throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2308                 && $self->reading( $args{start} )->has_rank;
2309         $startrank = $self->reading( $args{start} )->rank;
2310     }
2311     my $endrank = $args{endrank} || $self->end->rank;
2312     if( $args{end} ) {
2313         throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2314                 && $self->reading( $args{end} )->has_rank;
2315         $endrank = $self->reading( $args{end} )->rank;
2316     }
2317
2318     # Make sure the ranks are correct.
2319     unless( $self->_graphcalc_done ) {
2320         $self->calculate_ranks;
2321     }
2322     # Go through the readings looking for duplicates.
2323     my %unique_rank_rdg;
2324     my @pairs;
2325     foreach my $rdg ( $self->readings ) {
2326         next unless $rdg->has_rank;
2327         my $rk = $rdg->rank;
2328         next if $rk > $endrank || $rk < $startrank;
2329         my $key = $rk . "||" . $rdg->text;
2330         if( exists $unique_rank_rdg{$key} ) {
2331                 # Make sure they don't have different grammatical forms
2332                         my $ur = $unique_rank_rdg{$key};
2333                 if( $rdg->is_identical( $ur ) ) {
2334                                 push( @pairs, [ $ur, $rdg ] );
2335                         }
2336         } else {
2337             $unique_rank_rdg{$key} = $rdg;
2338         }
2339     }
2340
2341     return @pairs;
2342 }
2343
2344
2345 =head2 calculate_common_readings
2346
2347 Goes through the graph identifying the readings that appear in every witness
2348 (apart from those with lacunae at that spot.) Marks them as common and returns
2349 the list.
2350
2351 =begin testing
2352
2353 use Text::Tradition;
2354
2355 my $cxfile = 't/data/Collatex-16.xml';
2356 my $t = Text::Tradition->new(
2357     'name'  => 'inline',
2358     'input' => 'CollateX',
2359     'file'  => $cxfile,
2360     );
2361 my $c = $t->collation;
2362
2363 my @common = $c->calculate_common_readings();
2364 is( scalar @common, 8, "Found correct number of common readings" );
2365 my @marked = sort $c->common_readings();
2366 is( scalar @common, 8, "All common readings got marked as such" );
2367 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2368 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2369
2370 =end testing
2371
2372 =cut
2373
2374 sub calculate_common_readings {
2375         my $self = shift;
2376         my @common;
2377         map { $_->is_common( 0 ) } $self->readings;
2378         # Implicitly calls calculate_ranks
2379         my $table = $self->alignment_table;
2380         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2381                 my @row = map { $_->{'tokens'}->[$idx]
2382                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2383                                         @{$table->{'alignment'}};
2384                 my %hash;
2385                 foreach my $r ( @row ) {
2386                         if( $r ) {
2387                                 $hash{$r->id} = $r unless $r->is_meta;
2388                         } else {
2389                                 $hash{'UNDEF'} = $r;
2390                         }
2391                 }
2392                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2393                         my( $r ) = values %hash;
2394                         $r->is_common( 1 );
2395                         push( @common, $r );
2396                 }
2397         }
2398         return @common;
2399 }
2400
2401 =head2 text_from_paths
2402
2403 Calculate the text array for all witnesses from the path, for later consistency
2404 checking.  Only to be used if there is no non-graph-based way to know the
2405 original texts.
2406
2407 =cut
2408
2409 sub text_from_paths {
2410         my $self = shift;
2411     foreach my $wit ( $self->tradition->witnesses ) {
2412         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2413         my @text;
2414         foreach my $r ( @readings ) {
2415                 next if $r->is_meta;
2416                 push( @text, $r->text );
2417         }
2418         $wit->text( \@text );
2419         if( $wit->is_layered ) {
2420                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2421                                                                                                   $wit->sigil.$self->ac_label );
2422                         my @uctext;
2423                         foreach my $r ( @ucrdgs ) {
2424                                 next if $r->is_meta;
2425                                 push( @uctext, $r->text );
2426                         }
2427                         $wit->layertext( \@uctext );
2428         }
2429     }
2430 }
2431
2432 =head1 UTILITY FUNCTIONS
2433
2434 =head2 common_predecessor( $reading_a, $reading_b )
2435
2436 Find the last reading that occurs in sequence before both the given readings.
2437 At the very least this should be $self->start.
2438
2439 =head2 common_successor( $reading_a, $reading_b )
2440
2441 Find the first reading that occurs in sequence after both the given readings.
2442 At the very least this should be $self->end.
2443
2444 =begin testing
2445
2446 use Text::Tradition;
2447
2448 my $cxfile = 't/data/Collatex-16.xml';
2449 my $t = Text::Tradition->new(
2450     'name'  => 'inline',
2451     'input' => 'CollateX',
2452     'file'  => $cxfile,
2453     );
2454 my $c = $t->collation;
2455
2456 is( $c->common_predecessor( 'n24', 'n23' )->id,
2457     'n20', "Found correct common predecessor" );
2458 is( $c->common_successor( 'n24', 'n23' )->id,
2459     '__END__', "Found correct common successor" );
2460
2461 is( $c->common_predecessor( 'n19', 'n17' )->id,
2462     'n16', "Found correct common predecessor for readings on same path" );
2463 is( $c->common_successor( 'n21', 'n10' )->id,
2464     '__END__', "Found correct common successor for readings on same path" );
2465
2466 =end testing
2467
2468 =cut
2469
2470 ## Return the closest reading that is a predecessor of both the given readings.
2471 sub common_predecessor {
2472         my $self = shift;
2473         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2474         return $self->_common_in_path( $r1, $r2, 'predecessors' );
2475 }
2476
2477 sub common_successor {
2478         my $self = shift;
2479         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2480         return $self->_common_in_path( $r1, $r2, 'successors' );
2481 }
2482
2483
2484 # TODO think about how to do this without ranks...
2485 sub _common_in_path {
2486         my( $self, $r1, $r2, $dir ) = @_;
2487         my $iter = $self->end->rank;
2488         my @candidates;
2489         my @last_r1 = ( $r1 );
2490         my @last_r2 = ( $r2 );
2491         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2492         my %all_seen;
2493         # say STDERR "Finding common $dir for $r1, $r2";
2494         while( !@candidates ) {
2495                 last unless $iter--;  # Avoid looping infinitely
2496                 # Iterate separately down the graph from r1 and r2
2497                 my( @new_lc1, @new_lc2 );
2498                 foreach my $lc ( @last_r1 ) {
2499                         foreach my $p ( $lc->$dir ) {
2500                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2501                                         # say STDERR "Path candidate $p from $lc";
2502                                         push( @candidates, $p );
2503                                 } elsif( !$all_seen{$p->id} ) {
2504                                         $all_seen{$p->id} = 'r1';
2505                                         push( @new_lc1, $p );
2506                                 }
2507                         }
2508                 }
2509                 foreach my $lc ( @last_r2 ) {
2510                         foreach my $p ( $lc->$dir ) {
2511                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2512                                         # say STDERR "Path candidate $p from $lc";
2513                                         push( @candidates, $p );
2514                                 } elsif( !$all_seen{$p->id} ) {
2515                                         $all_seen{$p->id} = 'r2';
2516                                         push( @new_lc2, $p );
2517                                 }
2518                         }
2519                 }
2520                 @last_r1 = @new_lc1;
2521                 @last_r2 = @new_lc2;
2522         }
2523         my @answer = sort { $a->rank <=> $b->rank } @candidates;
2524         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2525 }
2526
2527 sub throw {
2528         Text::Tradition::Error->throw(
2529                 'ident' => 'Collation error',
2530                 'message' => $_[0],
2531                 );
2532 }
2533
2534 no Moose;
2535 __PACKAGE__->meta->make_immutable;
2536
2537 =head1 BUGS/TODO
2538
2539 =over
2540
2541 =item * Rework XML serialization in a more modular way
2542
2543 =back
2544
2545 =head1 LICENSE
2546
2547 This package is free software and is provided "as is" without express
2548 or implied warranty.  You can redistribute it and/or modify it under
2549 the same terms as Perl itself.
2550
2551 =head1 AUTHOR
2552
2553 Tara L Andrews E<lt>aurum@cpan.orgE<gt>