base/lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use JSON qw/ to_json /;
  10 use Text::CSV;
  11 use Text::Tradition::Collation::Data;
  12 use Text::Tradition::Collation::Reading;
  13 use Text::Tradition::Collation::RelationshipStore;
  14 use Text::Tradition::Error;
  15 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  16 use XML::LibXML;
  17 use XML::LibXML::XPathContext;
  18 use Moose;
  19
  20 has _data => (
  21         isa      => 'Text::Tradition::Collation::Data',
  22         is       => 'ro',
  23         required => 1,
  24         handles  => [ qw(
  25                 sequence
  26                 paths
  27                 _set_relations
  28                 relations
  29                 _set_start
  30                 _set_end
  31                 ac_label
  32                 has_cached_table
  33                 relationships
  34                 relationship_types
  35                 related_readings
  36                 get_relationship
  37                 del_relationship
  38                 equivalence
  39                 equivalence_graph
  40                 readings
  41                 reading
  42                 _add_reading
  43                 del_reading
  44                 has_reading
  45                 wit_list_separator
  46                 baselabel
  47                 linear
  48                 wordsep
  49                 direction
  50                 start
  51                 end
  52                 cached_table
  53                 _graphcalc_done
  54                 has_cached_svg
  55                 wipe_table
  56         )]
  57 );
  58
  59 has 'tradition' => (
  60     is => 'ro',
  61     isa => 'Text::Tradition',
  62     writer => '_set_tradition',
  63     weak_ref => 1,
  64     );
  65
  66 =encoding utf8
  67
  68 =head1 NAME
  69
  70 Text::Tradition::Collation - a software model for a text collation
  71
  72 =head1 SYNOPSIS
  73
  74   use Text::Tradition;
  75   my $t = Text::Tradition->new(
  76     'name' => 'this is a text',
  77     'input' => 'TEI',
  78     'file' => '/path/to/tei_parallel_seg_file.xml' );
  79
  80   my $c = $t->collation;
  81   my @readings = $c->readings;
  82   my @paths = $c->paths;
  83   my @relationships = $c->relationships;
  84
  85   my $svg_variant_graph = $t->collation->as_svg();
  86
  87 =head1 DESCRIPTION
  88
  89 Text::Tradition is a library for representation and analysis of collated
  90 texts, particularly medieval ones.  The Collation is the central feature of
  91 a Tradition, where the text, its sequence of readings, and its relationships
  92 between readings are actually kept.
  93
  94 =head1 CONSTRUCTOR
  95
  96 =head2 new
  97
  98 The constructor.  Takes a hash or hashref of the following arguments:
  99
 100 =over
 101
 102 =item * tradition - The Text::Tradition object to which the collation
 103 belongs. Required.
 104
 105 =item * linear - Whether the collation should be linear; that is, whether
 106 transposed readings should be treated as two linked readings rather than one,
 107 and therefore whether the collation graph is acyclic.  Defaults to true.
 108
 109 =item * baselabel - The default label for the path taken by a base text
 110 (if any). Defaults to 'base text'.
 111
 112 =item * wit_list_separator - The string to join a list of witnesses for
 113 purposes of making labels in display graphs.  Defaults to ', '.
 114
 115 =item * ac_label - The extra label to tack onto a witness sigil when
 116 representing another layer of path for the given witness - that is, when
 117 a text has more than one possible reading due to scribal corrections or
 118 the like.  Defaults to ' (a.c.)'.
 119
 120 =item * wordsep - The string used to separate words in the original text.
 121 Defaults to ' '.
 122
 123 =back
 124
 125 =head1 ACCESSORS
 126
 127 =head2 tradition
 128
 129 =head2 linear
 130
 131 =head2 wit_list_separator
 132
 133 =head2 baselabel
 134
 135 =head2 ac_label
 136
 137 =head2 wordsep
 138
 139 Simple accessors for collation attributes.
 140
 141 =head2 start
 142
 143 The meta-reading at the start of every witness path.
 144
 145 =head2 end
 146
 147 The meta-reading at the end of every witness path.
 148
 149 =head2 readings
 150
 151 Returns all Reading objects in the graph.
 152
 153 =head2 reading( $id )
 154
 155 Returns the Reading object corresponding to the given ID.
 156
 157 =head2 add_reading( $reading_args )
 158
 159 Adds a new reading object to the collation.
 160 See L<Text::Tradition::Collation::Reading> for the available arguments.
 161
 162 =head2 del_reading( $object_or_id )
 163
 164 Removes the given reading from the collation, implicitly removing its
 165 paths and relationships.
 166
 167 =head2 has_reading( $id )
 168
 169 Predicate to see whether a given reading ID is in the graph.
 170
 171 =head2 reading_witnesses( $object_or_id )
 172
 173 Returns a list of sigils whose witnesses contain the reading.
 174
 175 =head2 paths
 176
 177 Returns all reading paths within the document - that is, all edges in the
 178 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 179
 180 =head2 add_path( $source, $target, $sigil )
 181
 182 Links the given readings in the collation in sequence, under the given witness
 183 sigil.  The readings may be specified by object or ID.
 184
 185 =head2 del_path( $source, $target, $sigil )
 186
 187 Links the given readings in the collation in sequence, under the given witness
 188 sigil.  The readings may be specified by object or ID.
 189
 190 =head2 has_path( $source, $target );
 191
 192 Returns true if the two readings are linked in sequence in any witness.
 193 The readings may be specified by object or ID.
 194
 195 =head2 relationships
 196
 197 Returns all Relationship objects in the collation.
 198
 199 =head2 add_relationship( $reading, $other_reading, $options, $changed_readings )
 200
 201 Adds a new relationship of the type given in $options between the two readings,
 202 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 203 where $status is true on success, and @vectors is a list of relationship edges
 204 that were ultimately added. If an array reference is passed in as $changed_readings,
 205 then any readings that were altered due to the relationship creation are added to
 206 the array.
 207
 208 See L<Text::Tradition::Collation::Relationship> for the available options.
 209
 210 =cut
 211
 212 sub BUILDARGS {
 213         my ( $class, @args ) = @_;
 214         my %args = @args == 1 ? %{ $args[0] } : @args;
 215         # TODO determine these from the Moose::Meta object
 216         my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
 217                 linear wordsep direction start end cached_table _graphcalc_done);
 218         my %data_args;
 219         for my $attr (@delegate_attrs) {
 220                 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
 221         }
 222         $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
 223         return \%args;
 224 }
 225
 226 sub BUILD {
 227     my $self = shift;
 228     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 229     $self->_set_start( $self->add_reading(
 230         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 231     $self->_set_end( $self->add_reading(
 232         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 233 }
 234
 235 =head2 register_relationship_type( %relationship_definition )
 236
 237 Add a relationship type definition to this collation. The argument can be either a
 238 hash or a hashref, defining the properties of the relationship. For relationship types
 239 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
 240
 241 =head2 get_relationship_type( $relationship_name )
 242
 243 Retrieve the RelationshipType object for the relationship with the given name.
 244
 245 =cut
 246
 247 sub register_relationship_type {
 248         my $self = shift;
 249         my %args = @_ == 1 ? %{$_[0]} : @_;
 250         if( $self->relations->has_type( $args{name} ) ) {
 251                 throw( 'Relationship type ' . $args{name} . ' already registered' );
 252         }
 253         $self->relations->add_type( %args );
 254 }
 255
 256 sub get_relationship_type {
 257         my( $self, $name ) = @_;
 258                 return $self->relations->has_type( $name )
 259                         ? $self->relations->type( $name ) : undef;
 260 }
 261
 262 ### Reading construct/destruct functions
 263
 264 sub add_reading {
 265         my( $self, $reading ) = @_;
 266         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 267                 my %args = %$reading;
 268                 if( $args{'init'} ) {
 269                         # If we are initializing an empty collation, don't assume that we
 270                         # have set a tradition.
 271                         delete $args{'init'};
 272                 } elsif( $self->tradition->can('language') && $self->tradition->has_language
 273                         && !exists $args{'language'} ) {
 274                         $args{'language'} = $self->tradition->language;
 275                 }
 276                 $reading = Text::Tradition::Collation::Reading->new(
 277                         'collation' => $self,
 278                         %args );
 279         }
 280         # First check to see if a reading with this ID exists.
 281         if( $self->reading( $reading->id ) ) {
 282                 throw( "Collation already has a reading with id " . $reading->id );
 283         }
 284         $self->_graphcalc_done(0);
 285         $self->_add_reading( $reading->id => $reading );
 286         # Once the reading has been added, put it in both graphs.
 287         $self->sequence->add_vertex( $reading->id );
 288         $self->relations->add_reading( $reading->id );
 289         return $reading;
 290 };
 291
 292 around del_reading => sub {
 293         my $orig = shift;
 294         my $self = shift;
 295         my $arg = shift;
 296
 297         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 298                 $arg = $arg->id;
 299         }
 300         # Remove the reading from the graphs.
 301         $self->_graphcalc_done(0);
 302         $self->_clear_cache; # Explicitly clear caches to GC the reading
 303         $self->sequence->delete_vertex( $arg );
 304         $self->relations->delete_reading( $arg );
 305
 306         # Carry on.
 307         $self->$orig( $arg );
 308 };
 309
 310 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 311
 312 Merges the $second reading into the $main one. If $concatenate is true, then
 313 the merged node will carry the text of both readings, concatenated with either
 314 $with_str (if specified) or a sensible default (the empty string if the
 315 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 316
 317 The first two arguments may be either readings or reading IDs.
 318
 319 =begin testing
 320
 321 use Text::Tradition;
 322 use TryCatch;
 323
 324 my $cxfile = 't/data/Collatex-16.xml';
 325 my $t = Text::Tradition->new(
 326     'name'  => 'inline',
 327     'input' => 'CollateX',
 328     'file'  => $cxfile,
 329     );
 330 my $c = $t->collation;
 331
 332 my $rno = scalar $c->readings;
 333 # Split n21 ('unto') for testing purposes
 334 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 335 my $old_r = $c->reading( 'n21' );
 336 $old_r->alter_text( 'to' );
 337 $c->del_path( 'n20', 'n21', 'A' );
 338 $c->add_path( 'n20', 'n21p0', 'A' );
 339 $c->add_path( 'n21p0', 'n21', 'A' );
 340 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
 341 $c->flatten_ranks();
 342 ok( $c->reading( 'n21p0' ), "New reading exists" );
 343 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 344
 345 # Combine n3 and n4 ( with his )
 346 $c->merge_readings( 'n3', 'n4', 1 );
 347 ok( !$c->reading('n4'), "Reading n4 is gone" );
 348 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 349
 350 # Collapse n9 and n10 ( rood / root )
 351 $c->merge_readings( 'n9', 'n10' );
 352 ok( !$c->reading('n10'), "Reading n10 is gone" );
 353 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 354
 355 # Try to combine n21 and n21p0. This should break.
 356 my $remaining = $c->reading('n21');
 357 $remaining ||= $c->reading('n22');  # one of these should still exist
 358 try {
 359         $c->merge_readings( 'n21p0', $remaining, 1 );
 360         ok( 0, "Bad reading merge changed the graph" );
 361 } catch( Text::Tradition::Error $e ) {
 362         like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
 363 } catch {
 364         ok( 0, "Unexpected error on bad reading merge: $@" );
 365 }
 366
 367 try {
 368         $c->calculate_ranks();
 369         ok( 1, "Graph is still evidently whole" );
 370 } catch( Text::Tradition::Error $e ) {
 371         ok( 0, "Caught a rank exception: " . $e->message );
 372 }
 373
 374 =end testing
 375
 376 =cut
 377
 378 sub merge_readings {
 379         my $self = shift;
 380
 381         # Sanity check
 382         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 383         my $mergemeta = $kept_obj->is_meta;
 384         throw( "Cannot merge meta and non-meta reading" )
 385                 unless ( $mergemeta && $del_obj->is_meta )
 386                         || ( !$mergemeta && !$del_obj->is_meta );
 387         if( $mergemeta ) {
 388                 throw( "Cannot merge with start or end node" )
 389                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 390                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 391                 throw( "Cannot combine text of meta readings" ) if $combine;
 392         }
 393         # We can only merge readings in a linear graph if:
 394         # - they are contiguous with only one edge between them, OR
 395         # - they are at equivalent ranks in the graph.
 396         if( $self->linear ) {
 397                 my @delpred = $del_obj->predecessors;
 398                 my @keptsuc = $kept_obj->successors;
 399                 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
 400                         && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
 401                         my( $is_ok, $msg ) = $self->relations->relationship_valid(
 402                                 $kept_obj, $del_obj, 'collated' );
 403                         unless( $is_ok ) {
 404                                 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
 405                         }
 406                 }
 407         }
 408
 409         # We only need the IDs for adding paths to the graph, not the reading
 410         # objects themselves.
 411         my $kept = $kept_obj->id;
 412         my $deleted = $del_obj->id;
 413         $self->_graphcalc_done(0);
 414
 415     # The kept reading should inherit the paths and the relationships
 416     # of the deleted reading.
 417         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 418                 my @vector = ( $kept );
 419                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 420                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 421                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 422                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 423                 $self->sequence->add_edge( @vector );
 424                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 425                 @wits{keys %$fwits} = values %$fwits;
 426                 $self->sequence->set_edge_attributes( @vector, \%wits );
 427         }
 428         $self->relations->merge_readings( $kept, $deleted, $combine );
 429
 430         # Do the deletion deed.
 431         if( $combine ) {
 432                 # Combine the text of the readings
 433                 my $joinstr = $combine_char;
 434                 unless( defined $joinstr ) {
 435                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 436                         $joinstr = $self->wordsep unless defined $joinstr;
 437                 }
 438                 $kept_obj->_combine( $del_obj, $joinstr );
 439         }
 440         $self->del_reading( $deleted );
 441 }
 442
 443 =head2 merge_related( @relationship_types )
 444
 445 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
 446
 447 WARNING: This operation cannot be undone.
 448
 449 =cut
 450
 451 =begin testing
 452
 453 use Test::Warn;
 454 use Text::Tradition;
 455 use TryCatch;
 456
 457 my $t;
 458 warnings_exist {
 459         $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
 460 } [qr/Cannot set relationship on a meta reading/],
 461         "Got expected relationship drop warning on parse";
 462
 463 my $c = $t->collation;
 464 # Force the transitive propagation of all existing relationships.
 465 $c->relations->propagate_all_relationships();
 466
 467 my %rdg_ids;
 468 map { $rdg_ids{$_} = 1 } $c->readings;
 469 $c->merge_related( 'orthographic' );
 470 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
 471         "Successfully collapsed orthographic variation" );
 472 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
 473 foreach my $rid ( keys %rdg_ids ) {
 474         my $exp = $rdg_ids{$rid};
 475         is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
 476                 ( $exp ? "retained" : "removed" ) );
 477 }
 478 ok( $c->linear, "Graph is still linear" );
 479 try {
 480         $c->calculate_ranks; # This should succeed
 481         ok( 1, "Can still calculate ranks on the new graph" );
 482 } catch {
 483         ok( 0, "Rank calculation on merged graph failed: $@" );
 484 }
 485
 486 # Now add some transpositions
 487 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
 488 $c->merge_related( 'transposition' );
 489 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
 490         "Transposed relationship is merged away" );
 491 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
 492 ok( !$c->linear, "Graph is no longer linear" );
 493 try {
 494         $c->calculate_ranks; # This should fail
 495         ok( 0, "Rank calculation happened on nonlinear graph?!" );
 496 } catch ( Text::Tradition::Error $e ) {
 497         is( $e->message, 'Cannot calculate ranks on a non-linear graph',
 498                 "Rank calculation on merged graph threw an error" );
 499 }
 500
 501 =end testing
 502
 503 =cut
 504
 505 # TODO: there should be a way to display merged without affecting the underlying data!
 506
 507 sub merge_related {
 508         my $self = shift;
 509         my %reltypehash;
 510         map { $reltypehash{$_} = 1 } @_;
 511
 512         # Set up the filter for finding related readings
 513         my $filter = sub {
 514                 exists $reltypehash{$_[0]->type};
 515         };
 516
 517         # Go through all readings looking for related ones
 518         foreach my $r ( $self->readings ) {
 519                 next unless $self->reading( "$r" ); # might have been deleted meanwhile
 520                 while( my @related = $self->related_readings( $r, $filter ) ) {
 521                         push( @related, $r );
 522                         @related = sort {
 523                                         scalar $b->witnesses <=> scalar $a->witnesses
 524                                 } @related;
 525                         my $keep = shift @related;
 526                         foreach my $delr ( @related ) {
 527                                 $self->linear( 0 )
 528                                         unless( $self->get_relationship( $keep, $delr )->colocated );
 529                                 $self->merge_readings( $keep, $delr );
 530                         }
 531                 }
 532         }
 533 }
 534
 535 =head2 compress_readings
 536
 537 Where possible in the graph, compresses plain sequences of readings into a
 538 single reading. The sequences must consist of readings with no
 539 relationships to other readings, with only a single witness path between
 540 them and no other witness paths from either that would skip the other. The
 541 readings must also not be marked as nonsense or bad grammar.
 542
 543 WARNING: This operation cannot be undone.
 544
 545 =begin testing
 546
 547 use Text::Tradition;
 548
 549 my $t = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
 550 my $c = $t->collation;
 551 my $n = scalar $c->readings;
 552 $c->compress_readings();
 553 is( scalar $c->readings, $n - 6, "Compressing readings seems to work" );
 554
 555 # Now put in a join-word and make sure the thing still works.
 556 my $t2 = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
 557 my $c2 = $t2->collation;
 558 # Split n21 ('unto') for testing purposes
 559 my $new_r = $c2->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 560 my $old_r = $c2->reading( 'n21' );
 561 $old_r->alter_text( 'to' );
 562 $c2->del_path( 'n20', 'n21', 'A' );
 563 $c2->add_path( 'n20', 'n21p0', 'A' );
 564 $c2->add_path( 'n21p0', 'n21', 'A' );
 565 $c2->calculate_ranks();
 566 is( scalar $c2->readings, $n + 1, "We have our extra test reading" );
 567 $c2->compress_readings();
 568 is( scalar $c2->readings, $n - 6, "Compressing readings also works with join_next" );
 569 is( $c2->reading( 'n21p0' )->text, 'unto', "The joined word has no space" );
 570
 571
 572 =end testing
 573
 574 =cut
 575
 576 sub compress_readings {
 577         my $self = shift;
 578         # Sanity check: first save the original text of each witness.
 579         my %origtext;
 580         foreach my $wit ( $self->tradition->witnesses ) {
 581                 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
 582                 if( $wit->is_layered ) {
 583                         my $acsig = $wit->sigil . $self->ac_label;
 584                         $origtext{$acsig} = $self->path_text( $acsig );
 585                 }
 586         }
 587
 588         # Now do the deed.
 589         # Anywhere in the graph that there is a reading that joins only to a single
 590         # successor, and neither of these have any relationships, just join the two
 591         # readings.
 592         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 593                 # Now look for readings that can be joined to their successors.
 594                 next unless $rdg->is_combinable;
 595                 my %seen;
 596                 while( $self->sequence->successors( $rdg ) == 1 ) {
 597                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 598                         throw( "Infinite loop" ) if $seen{$next->id};
 599                         $seen{$next->id} = 1;
 600                         last if $self->sequence->predecessors( $next ) > 1;
 601                         last unless $next->is_combinable;
 602                         say "Joining readings $rdg and $next";
 603                         $self->merge_readings( $rdg, $next, 1 );
 604                 }
 605         }
 606
 607         # Finally, make sure we haven't screwed anything up.
 608         foreach my $wit ( $self->tradition->witnesses ) {
 609                 my $pathtext = $self->path_text( $wit->sigil );
 610                 throw( "Text differs for witness " . $wit->sigil )
 611                         unless $pathtext eq $origtext{$wit->sigil};
 612                 if( $wit->is_layered ) {
 613                         my $acsig = $wit->sigil . $self->ac_label;
 614                         $pathtext = $self->path_text( $acsig );
 615                         throw( "Layered text differs for witness " . $wit->sigil )
 616                                 unless $pathtext eq $origtext{$acsig};
 617                 }
 618         }
 619
 620         $self->relations->rebuild_equivalence();
 621         $self->calculate_ranks();
 622 }
 623
 624 # Helper function for manipulating the graph.
 625 sub _stringify_args {
 626         my( $self, $first, $second, @args ) = @_;
 627     $first = $first->id
 628         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 629     $second = $second->id
 630         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 631     return( $first, $second, @args );
 632 }
 633
 634 # Helper function for manipulating the graph.
 635 sub _objectify_args {
 636         my( $self, $first, $second, $arg ) = @_;
 637     $first = $self->reading( $first )
 638         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 639     $second = $self->reading( $second )
 640         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 641     return( $first, $second, $arg );
 642 }
 643
 644 =head2 duplicate_reading( $reading, @witlist )
 645
 646 Split the given reading into two, so that the new reading is in the path for
 647 the witnesses given in @witlist. If the result is that certain non-colocated
 648 relationships (e.g. transpositions) are no longer valid, these will be removed.
 649 Returns the newly-created reading.
 650
 651 =begin testing
 652
 653 use Test::More::UTF8;
 654 use Text::Tradition;
 655 use TryCatch;
 656
 657 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
 658 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
 659 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
 660
 661 my $sc = $st->collation;
 662 my $numr = 17;
 663 ok( $sc->reading('n131'), "Tradition has the affected reading" );
 664 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
 665 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
 666
 667 # Detach the erroneously collated reading
 668 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
 669 ok( $newr, "New reading was created" );
 670 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
 671 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
 672 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
 673 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
 674 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
 675
 676 # Check that the bad transposition is gone
 677 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
 678 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
 679
 680 # The collation should not be fixed
 681 my @pairs = $sc->identical_readings();
 682 is( scalar @pairs, 0, "Not re-collated yet" );
 683 # Fix the collation
 684 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
 685 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
 686 is( scalar @pairs, 3, "Found three more identical readings" );
 687 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
 688 $sc->flatten_ranks();
 689 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
 690
 691 # Check that we can't "duplicate" a reading with no wits or with all wits
 692 try {
 693         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
 694         ok( 0, "Reading duplication without witnesses throws an error" );
 695 } catch( Text::Tradition::Error $e ) {
 696         like( $e->message, qr/Must specify one or more witnesses/,
 697                 "Reading duplication without witnesses throws the expected error" );
 698 } catch {
 699         ok( 0, "Reading duplication without witnesses threw the wrong error" );
 700 }
 701
 702 try {
 703         my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
 704         ok( 0, "Reading duplication with all witnesses throws an error" );
 705 } catch( Text::Tradition::Error $e ) {
 706         like( $e->message, qr/Cannot join all witnesses/,
 707                 "Reading duplication with all witnesses throws the expected error" );
 708 } catch {
 709         ok( 0, "Reading duplication with all witnesses threw the wrong error" );
 710 }
 711
 712 try {
 713         $sc->calculate_ranks();
 714         ok( 1, "Graph is still evidently whole" );
 715 } catch( Text::Tradition::Error $e ) {
 716         ok( 0, "Caught a rank exception: " . $e->message );
 717 }
 718
 719 =end testing
 720
 721 =cut
 722
 723 sub duplicate_reading {
 724         my( $self, $r, @wits ) = @_;
 725         # Check that we are not doing anything unwise.
 726         throw( "Must specify one or more witnesses for the duplicated reading" )
 727                 unless @wits;
 728         unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
 729                 $r = $self->reading( $r );
 730         }
 731         throw( "Cannot duplicate a meta-reading" )
 732                 if $r->is_meta;
 733         throw( "Cannot join all witnesses to the new reading" )
 734                 if scalar( @wits ) == scalar( $r->witnesses );
 735
 736         # Get all the reading attributes and duplicate them.
 737         my $rmeta = Text::Tradition::Collation::Reading->meta;
 738         my %args;
 739     foreach my $attr( $rmeta->get_all_attributes ) {
 740                 next if $attr->name =~ /^_/;
 741                 my $acc = $attr->get_read_method;
 742                 if( !$acc && $attr->has_applied_traits ) {
 743                         my $tr = $attr->applied_traits;
 744                         if( $tr->[0] =~ /::(Array|Hash)$/ ) {
 745                                 my $which = $1;
 746                                 my %methods = reverse %{$attr->handles};
 747                                 $acc = $methods{elements};
 748                                 $args{$attr->name} = $which eq 'Array'
 749                                         ? [ $r->$acc ] : { $r->$acc };
 750                         }
 751                 } elsif( $acc ) {
 752                         my $attrval = $r->$acc;
 753                         if( defined $attrval ) {
 754                                 $args{$attr->name} = $attrval;
 755                         }
 756                 }
 757         }
 758         # By definition the new reading will no longer be common.
 759         $args{is_common} = 0;
 760         # The new reading also needs its own ID.
 761         $args{id} = $self->_generate_dup_id( $r->id );
 762
 763         # Try to make the new reading.
 764         my $newr = $self->add_reading( \%args );
 765         # The old reading is also no longer common.
 766         $r->is_common( 0 );
 767
 768         # For each of the witnesses, dissociate from the old reading and
 769         # associate with the new.
 770         foreach my $wit ( @wits ) {
 771                 my $prior = $self->prior_reading( $r, $wit );
 772                 my $next = $self->next_reading( $r, $wit );
 773                 $self->del_path( $prior, $r, $wit );
 774                 $self->add_path( $prior, $newr, $wit );
 775                 $self->del_path( $r, $next, $wit );
 776                 $self->add_path( $newr, $next, $wit );
 777         }
 778
 779         # If the graph is ranked, we need to look for relationships that are now
 780         # invalid (i.e. 'non-colocation' types that might now be colocated) and
 781         # remove them. If not, we can skip it.
 782         my $succ;
 783         my %rrk;
 784         my @deleted_relations;
 785         if( $self->end->has_rank ) {
 786                 # Find the point where we can stop checking
 787                 $succ = $self->common_successor( $r, $newr );
 788
 789                 # Hash the existing ranks
 790                 foreach my $rdg ( $self->readings ) {
 791                         $rrk{$rdg->id} = $rdg->rank;
 792                 }
 793                 # Calculate the new ranks
 794                 $self->calculate_ranks();
 795
 796                 # Check for invalid non-colocated relationships among changed-rank readings
 797                 # from where the ranks start changing up to $succ
 798                 my $lastrank = $succ->rank;
 799                 foreach my $rdg ( $self->readings ) {
 800                         next if $rdg->rank > $lastrank;
 801                         next if $rdg->rank == $rrk{$rdg->id};
 802                         my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
 803                         next unless @noncolo;
 804                         foreach my $nc ( @noncolo ) {
 805                                 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
 806                                         push( @deleted_relations, [ $rdg->id, $nc->id ] );
 807                                 }
 808                         }
 809                 }
 810         }
 811         return ( $newr, @deleted_relations );
 812 }
 813
 814 sub _generate_dup_id {
 815         my( $self, $rid ) = @_;
 816         my $newid;
 817         my $i = 0;
 818         while( !$newid ) {
 819                 $newid = $rid."_$i";
 820                 if( $self->has_reading( $newid ) ) {
 821                         $newid = '';
 822                         $i++;
 823                 }
 824         }
 825         return $newid;
 826 }
 827
 828 ### Path logic
 829
 830 sub add_path {
 831         my $self = shift;
 832
 833         # We only need the IDs for adding paths to the graph, not the reading
 834         # objects themselves.
 835     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 836
 837         $self->_graphcalc_done(0);
 838         # Connect the readings
 839         unless( $self->sequence->has_edge( $source, $target ) ) {
 840             $self->sequence->add_edge( $source, $target );
 841             $self->relations->add_equivalence_edge( $source, $target );
 842         }
 843     # Note the witness in question
 844     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 845 }
 846
 847 sub del_path {
 848         my $self = shift;
 849         my @args;
 850         if( ref( $_[0] ) eq 'ARRAY' ) {
 851                 my $e = shift @_;
 852                 @args = ( @$e, @_ );
 853         } else {
 854                 @args = @_;
 855         }
 856
 857         # We only need the IDs for removing paths from the graph, not the reading
 858         # objects themselves.
 859     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 860
 861         $self->_graphcalc_done(0);
 862         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 863                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 864         }
 865         unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
 866                 $self->sequence->delete_edge( $source, $target );
 867                 $self->relations->delete_equivalence_edge( $source, $target );
 868         }
 869 }
 870
 871
 872 # Extra graph-alike utility
 873 sub has_path {
 874         my $self = shift;
 875     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 876         return undef unless $self->sequence->has_edge( $source, $target );
 877         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 878 }
 879
 880 =head2 clear_witness( @sigil_list )
 881
 882 Clear the given witnesses out of the collation entirely, removing references
 883 to them in paths, and removing readings that belong only to them.  Should only
 884 be called via $tradition->del_witness.
 885
 886 =cut
 887
 888 sub clear_witness {
 889         my( $self, @sigils ) = @_;
 890
 891         $self->_graphcalc_done(0);
 892         # Clear the witness(es) out of the paths
 893         foreach my $e ( $self->paths ) {
 894                 foreach my $sig ( @sigils ) {
 895                         $self->del_path( $e, $sig );
 896                 }
 897         }
 898
 899         # Clear out the newly unused readings
 900         foreach my $r ( $self->readings ) {
 901                 unless( $self->reading_witnesses( $r ) ) {
 902                         $self->del_reading( $r );
 903                 }
 904         }
 905 }
 906
 907 sub add_relationship {
 908         my $self = shift;
 909     my( $source, $target, $opts, $altered_readings ) = $self->_stringify_args( @_ );
 910     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 911     my $did_recalc;
 912     foreach my $v ( @vectors ) {
 913         my $rel = $self->get_relationship( $v );
 914         next unless $rel->colocated;
 915         my $r1 = $self->reading( $v->[0] );
 916         my $r2 =  $self->reading( $v->[1] );
 917         # If it's a spelling or orthographic relationship, and one is marked
 918         # as a lemma, set the normal form on the non-lemma to reflect that.
 919         if( $r1->does( 'Text::Tradition::Morphology' ) ) {
 920                 my @changed = $r1->relationship_added( $r2, $rel );
 921                 if( ref( $altered_readings ) eq 'ARRAY' ) {
 922                         push( @$altered_readings, @changed );
 923                 }
 924         }
 925         next if $did_recalc;
 926         if( $r1->has_rank && $r2->has_rank && $r1->rank ne $r2->rank ) {
 927                         $self->_graphcalc_done(0);
 928                         $self->_clear_cache;
 929                         $did_recalc = 1;
 930         }
 931     }
 932     return @vectors;
 933 }
 934
 935 around qw/ get_relationship del_relationship / => sub {
 936         my $orig = shift;
 937         my $self = shift;
 938         my @args = @_;
 939         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 940                 @args = @{$_[0]};
 941         }
 942         my @stringargs = $self->_stringify_args( @args );
 943         $self->$orig( @stringargs );
 944 };
 945
 946 =head2 reading_witnesses( $reading )
 947
 948 Return a list of sigils corresponding to the witnesses in which the reading appears.
 949
 950 =cut
 951
 952 sub reading_witnesses {
 953         my( $self, $reading ) = @_;
 954         # We need only check either the incoming or the outgoing edges; I have
 955         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 956         if( $reading eq $self->start ) {
 957                 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
 958         }
 959         my %all_witnesses;
 960         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 961                 my $wits = $self->sequence->get_edge_attributes( @$e );
 962                 @all_witnesses{ keys %$wits } = 1;
 963         }
 964         my $acstr = $self->ac_label;
 965         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 966                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 967         }
 968         return keys %all_witnesses;
 969 }
 970
 971 =head1 OUTPUT METHODS
 972
 973 =head2 as_svg( \%options )
 974
 975 Returns an SVG string that represents the graph, via as_dot and graphviz.
 976 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 977
 978 =cut
 979
 980 sub as_svg {
 981     my( $self, $opts ) = @_;
 982     throw( "Need GraphViz installed to output SVG" )
 983         unless File::Which::which( 'dot' );
 984     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 985     $self->calculate_ranks()
 986         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 987         my @cmd = qw/dot -Tsvg/;
 988         my( $svg, $err );
 989         my $dotfile = File::Temp->new();
 990         ## USE FOR DEBUGGING
 991         # $dotfile->unlink_on_destroy(0);
 992         binmode $dotfile, ':utf8';
 993         print $dotfile $self->as_dot( $opts );
 994         push( @cmd, $dotfile->filename );
 995         run( \@cmd, ">", binary(), \$svg );
 996         $svg = decode_utf8( $svg );
 997         return $svg;
 998 }
 999
1000
1001 =head2 as_dot( \%options )
1002
1003 Returns a string that is the collation graph expressed in dot
1004 (i.e. GraphViz) format.  Options include:
1005
1006 =over 4
1007
1008 =item * from
1009
1010 =item * to
1011
1012 =item * color_common
1013
1014 =back
1015
1016 =cut
1017
1018 sub as_dot {
1019     my( $self, $opts ) = @_;
1020     my $startrank = $opts->{'from'} if $opts;
1021     my $endrank = $opts->{'to'} if $opts;
1022     my $color_common = $opts->{'color_common'} if $opts;
1023     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
1024        && $self->end->rank > 100;
1025     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
1026
1027     # Check the arguments
1028     if( $startrank ) {
1029         return if $endrank && $startrank > $endrank;
1030         return if $startrank > $self->end->rank;
1031         }
1032         if( defined $endrank ) {
1033                 return if $endrank < 0;
1034                 $endrank = undef if $endrank == $self->end->rank;
1035         }
1036
1037     my $graph_name = $self->tradition->name;
1038     $graph_name =~ s/[^\w\s]//g;
1039     $graph_name = join( '_', split( /\s+/, $graph_name ) );
1040
1041     my %graph_attrs = (
1042         'bgcolor' => 'none',
1043         );
1044     unless( $self->direction eq 'BI' ) {
1045         $graph_attrs{rankdir} = $self->direction;
1046     }
1047     my %node_attrs = (
1048         'fontsize' => 14,
1049         'fillcolor' => 'white',
1050         'style' => 'filled',
1051         'shape' => 'ellipse'
1052         );
1053     my %edge_attrs = (
1054         'arrowhead' => 'open',
1055         'color' => '#000000',
1056         'fontcolor' => '#000000',
1057         );
1058
1059     my $dot = sprintf( "digraph %s {\n", $graph_name );
1060     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1061     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1062
1063         # Output substitute start/end readings if necessary
1064         if( $startrank ) {
1065                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1066         }
1067         if( $endrank ) {
1068                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1069         }
1070         if( $STRAIGHTENHACK ) {
1071                 ## HACK part 1
1072                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1073                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1074                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1075         }
1076         my %used;  # Keep track of the readings that actually appear in the graph
1077         # Sort the readings by rank if we have ranks; this speeds layout.
1078         my @all_readings = $self->end->has_rank
1079                 ? sort { $a->rank <=> $b->rank } $self->readings
1080                 : $self->readings;
1081         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1082     foreach my $reading ( @all_readings ) {
1083         # Only output readings within our rank range.
1084         next if $startrank && $reading->rank < $startrank;
1085         next if $endrank && $reading->rank > $endrank;
1086         $used{$reading->id} = 1;
1087         # Need not output nodes without separate labels
1088         next if $reading->id eq $reading->text;
1089         my $rattrs;
1090         my $label = $reading->text;
1091         unless( $label =~ /^[[:punct:]]+$/ ) {
1092                 $label .= '-' if $reading->join_next;
1093             $label = "-$label" if $reading->join_prior;
1094         }
1095         $label =~ s/\"/\\\"/g;
1096                 $rattrs->{'label'} = $label;
1097                 $rattrs->{'id'} = $reading->id;
1098                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1099         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1100     }
1101
1102         # Add the real edges.
1103     my @edges = $self->paths;
1104         my( %substart, %subend );
1105     foreach my $edge ( @edges ) {
1106         # Do we need to output this edge?
1107         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1108                 my $label = $self->_path_display_label( $opts,
1109                         $self->path_witnesses( $edge ) );
1110                         my $variables = { %edge_attrs, 'label' => $label };
1111
1112                         # Account for the rank gap if necessary
1113                         my $rank0 = $self->reading( $edge->[0] )->rank
1114                                 if $self->reading( $edge->[0] )->has_rank;
1115                         my $rank1 = $self->reading( $edge->[1] )->rank
1116                                 if $self->reading( $edge->[1] )->has_rank;
1117                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1118                                 $variables->{'minlen'} = $rank1 - $rank0;
1119                         }
1120
1121                         # EXPERIMENTAL: make edge width reflect no. of witnesses
1122                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1123                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1124
1125                         my $varopts = _dot_attr_string( $variables );
1126                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1127                                 $edge->[0], $edge->[1], $varopts );
1128         } elsif( $used{$edge->[0]} ) {
1129                 $subend{$edge->[0]} = $edge->[1];
1130         } elsif( $used{$edge->[1]} ) {
1131                 $substart{$edge->[1]} = $edge->[0];
1132         }
1133     }
1134
1135     # If we are asked to, add relationship links
1136     if( exists $opts->{show_relations} ) {
1137         my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1138         if( $filter eq 'transposition' ) {
1139                 $filter =~ qr/^transposition$/;
1140         }
1141         my %typecolors;
1142         my @types = sort( map { $_->name } $self->relations->types );
1143         if( exists $opts->{graphcolors} ) {
1144                 foreach my $tdx ( 0 .. $#types ) {
1145                         $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1146                 }
1147         } else {
1148                 map { $typecolors{$_} = '#FFA14F' } @types;
1149         }
1150         foreach my $redge ( $self->relationships ) {
1151                 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1152                                 my $rel = $self->get_relationship( $redge );
1153                                 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1154                                 my $variables = {
1155                                         arrowhead => 'none',
1156                                         color => $typecolors{$rel->type},
1157                                         constraint => 'false',
1158                                         penwidth => '3',
1159                                 };
1160                                 unless( exists $opts->{graphcolors} ) {
1161                                         $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1162                                 }
1163                                 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1164                                         $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1165                 }
1166         }
1167     }
1168
1169     # Add substitute start and end edges if necessary
1170     foreach my $node ( keys %substart ) {
1171         my $witstr = $self->_path_display_label( $opts,
1172                 $self->path_witnesses( $substart{$node}, $node ) );
1173         my $variables = { %edge_attrs, 'label' => $witstr };
1174         my $nrdg = $self->reading( $node );
1175         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1176                 # Substart is actually one lower than $startrank
1177                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1178         }
1179         my $varopts = _dot_attr_string( $variables );
1180         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1181         }
1182     foreach my $node ( keys %subend ) {
1183         my $witstr = $self->_path_display_label( $opts,
1184                 $self->path_witnesses( $node, $subend{$node} ) );
1185         my $variables = { %edge_attrs, 'label' => $witstr };
1186         my $varopts = _dot_attr_string( $variables );
1187         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1188         }
1189         # HACK part 2
1190         if( $STRAIGHTENHACK ) {
1191                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1192                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1193         }
1194
1195     $dot .= "}\n";
1196     return $dot;
1197 }
1198
1199 sub _dot_attr_string {
1200         my( $hash ) = @_;
1201         my @attrs;
1202         foreach my $k ( sort keys %$hash ) {
1203                 my $v = $hash->{$k};
1204                 push( @attrs, $k.'="'.$v.'"' );
1205         }
1206         return( '[ ' . join( ', ', @attrs ) . ' ]' );
1207 }
1208
1209 =head2 path_witnesses( $edge )
1210
1211 Returns the list of sigils whose witnesses are associated with the given edge.
1212 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1213
1214 =cut
1215
1216 sub path_witnesses {
1217         my( $self, @edge ) = @_;
1218         # If edge is an arrayref, cope.
1219         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1220                 my $e = shift @edge;
1221                 @edge = @$e;
1222         }
1223         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1224         return @wits;
1225 }
1226
1227 # Helper function. Make a display label for the given witnesses, showing a.c.
1228 # witnesses only where the main witness is not also in the list.
1229 sub _path_display_label {
1230         my $self = shift;
1231         my $opts = shift;
1232         my %wits;
1233         map { $wits{$_} = 1 } @_;
1234
1235         # If an a.c. wit is listed, remove it if the main wit is also listed.
1236         # Otherwise keep it for explicit listing.
1237         my $aclabel = $self->ac_label;
1238         my @disp_ac;
1239         foreach my $w ( sort keys %wits ) {
1240                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1241                         if( exists $wits{$1} ) {
1242                                 delete $wits{$w};
1243                         } else {
1244                                 push( @disp_ac, $w );
1245                         }
1246                 }
1247         }
1248
1249         if( $opts->{'explicit_wits'} ) {
1250                 return join( ', ', sort keys %wits );
1251         } else {
1252                 # See if we are in a majority situation.
1253                 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1254                 $maj = $maj > 5 ? $maj : 5;
1255                 if( scalar keys %wits > $maj ) {
1256                         unshift( @disp_ac, 'majority' );
1257                         return join( ', ', @disp_ac );
1258                 } else {
1259                         return join( ', ', sort keys %wits );
1260                 }
1261         }
1262 }
1263
1264 =head2 as_adjacency_list
1265
1266 Returns a JSON structure that represents the collation sequence graph.
1267
1268 =begin testing
1269
1270 use JSON qw/ from_json /;
1271 use Text::Tradition;
1272
1273 my $t = Text::Tradition->new(
1274         'input' => 'Self',
1275         'file' => 't/data/florilegium_graphml.xml' );
1276 my $c = $t->collation;
1277
1278 # Make a connection so we can test rank preservation
1279 $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1280
1281 # Create an adjacency list of the whole thing; test the output.
1282 my $adj_whole = from_json( $c->as_adjacency_list() );
1283 is( scalar @$adj_whole, scalar $c->readings(),
1284         "Same number of nodes in graph and adjacency list" );
1285 my @adj_whole_edges;
1286 map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1287 is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1288         "Same number of edges in graph and adjacency list" );
1289 # Find the reading whose rank should be preserved
1290 my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1291 my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1292 is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1293
1294 # Now create an adjacency list of just a portion. w76 to w122
1295 my $adj_part = from_json( $c->as_adjacency_list(
1296         { from => $c->reading('w76')->rank,
1297           to   => $c->reading('w122')->rank }));
1298 is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1299 my @adj_part_edges;
1300 map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1301 is( scalar @adj_part_edges, 58,
1302         "Same number of edges in partial graph and adjacency list" );
1303 # Check for consistency
1304 my %part_nodes;
1305 map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1306 foreach my $edge ( @adj_part_edges ) {
1307         my $testid = $edge->{id};
1308         ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1309 }
1310
1311 =end testing
1312
1313 =cut
1314
1315 sub as_adjacency_list {
1316         my( $self, $opts ) = @_;
1317         # Make a structure that contains all the nodes, the nodes they point to,
1318         # and the attributes of the edges that connect them.
1319         # [ { id: 'n0', label: 'Gallia', adjacent: [
1320         #               { id: 'n1', label: 'P Q' } ,
1321         #               { id: 'n2', label: 'R S', minlen: 2 } ] },
1322         #   { id: 'n1', label: 'est', adjacent: [ ... ] },
1323         #   ... ]
1324         my $startrank = $opts->{'from'} || 0;
1325         my $endrank = $opts->{'to'} || $self->end->rank;
1326
1327     $self->calculate_ranks()
1328         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1329         my $list = [];
1330         foreach my $rdg ( $self->readings ) {
1331                 my @successors;
1332                 my $phony = '';
1333                 # Figure out what the node's successors should be.
1334                 if( $rdg eq $self->start && $startrank > 0 ) {
1335                         # Connect the start node with all the nodes at startrank.
1336                         # Lacunas should be included only if the node really has that rank.
1337                         @successors = $self->readings_at_rank( $startrank, 1 );
1338                         $phony = 'start';
1339                 } elsif( $rdg->rank < $startrank
1340                                  || $rdg->rank > $endrank && $rdg ne $self->end ) {
1341                         next;
1342                 } else {
1343                         @successors = $rdg->successors;
1344                 }
1345                 # Make sure that the end node is at the end of the successors
1346                 # list if it is needed.
1347                 if( grep { $_ eq $self->end } @successors ) {
1348                         my @ts = grep { $_ ne $self->end } @successors;
1349                         @successors = ( @ts, $self->end );
1350                 } elsif ( grep { $_->rank > $endrank } @successors ) {
1351                         push( @successors, $self->end );
1352                 }
1353
1354                 my $listitem = { id => $rdg->id, label => $rdg->text };
1355                 my $adjacent = [];
1356                 my @endwits;
1357                 foreach my $succ ( @successors ) {
1358                         my @edgewits;
1359                         if( $phony eq 'start' ) {
1360                                 @edgewits = $succ->witnesses;
1361                         } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1362                                 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1363                         }
1364
1365                         if( $succ eq $self->end ) {
1366                                 @edgewits = @endwits;
1367                         } elsif( $succ->rank > $endrank ) {
1368                                 # These witnesses will point to 'end' instead, not to the
1369                                 # actual successor.
1370                                 push( @endwits, @edgewits );
1371                                 next;
1372                         }
1373                         my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1374                         my $edgedef = { id => $succ->id, label => $edgelabel };
1375                         my $rankoffset = $succ->rank - $rdg->rank;
1376                         if( $rankoffset > 1 and $succ ne $self->end ) {
1377                                 $edgedef->{minlen} = $rankoffset;
1378                         }
1379                         push( @$adjacent, $edgedef );
1380                 }
1381                 $listitem->{adjacent} = $adjacent;
1382                 push( @$list, $listitem );
1383         }
1384         return to_json( $list );
1385 }
1386
1387 =head2 as_graphml
1388
1389 Returns a GraphML representation of the collation.  The GraphML will contain
1390 two graphs. The first expresses the attributes of the readings and the witness
1391 paths that link them; the second expresses the relationships that link the
1392 readings.  This is the native transfer format for a tradition.
1393
1394 =begin testing
1395
1396 use Text::Tradition;
1397 use TryCatch;
1398
1399 my $READINGS = 311;
1400 my $PATHS = 361;
1401
1402 my $datafile = 't/data/florilegium_tei_ps.xml';
1403 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1404                                       'name' => 'test0',
1405                                       'file' => $datafile,
1406                                       'linear' => 1 );
1407
1408 ok( $tradition, "Got a tradition object" );
1409 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1410 ok( $tradition->collation, "Tradition has a collation" );
1411
1412 my $c = $tradition->collation;
1413 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1414 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1415 is( scalar $c->relationships, 0, "Collation has all relationships" );
1416
1417 # Add a few relationships
1418 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1419 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1420 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1421                                           'is_significant' => 'yes' } );
1422
1423 # Now write it to GraphML and parse it again.
1424
1425 my $graphml = $c->as_graphml;
1426 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1427 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1428 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1429 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1430 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1431 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1432
1433 # Now add a stemma, write to GraphML, and look at the output.
1434 SKIP: {
1435         skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1436         my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1437         is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1438         is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1439         $graphml = $c->as_graphml;
1440         like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1441 }
1442
1443 =end testing
1444
1445 =cut
1446
1447 ## TODO MOVE this to Tradition.pm and modularize it better
1448 sub as_graphml {
1449     my( $self, $options ) = @_;
1450         $self->calculate_ranks unless $self->_graphcalc_done;
1451
1452         my $start = $options->{'from'}
1453                 ? $self->reading( $options->{'from'} ) : $self->start;
1454         my $end = $options->{'to'}
1455                 ? $self->reading( $options->{'to'} ) : $self->end;
1456         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1457                 throw( 'Start node must be before end node' );
1458         }
1459         # The readings need to be ranked for this to work.
1460         $start = $self->start unless $start->has_rank;
1461         $end = $self->end unless $end->has_rank;
1462         my $rankoffset = 0;
1463         unless( $start eq $self->start ) {
1464                 $rankoffset = $start->rank - 1;
1465         }
1466         my %use_readings;
1467
1468     # Some namespaces
1469     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1470     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1471     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1472         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1473
1474     # Create the document and root node
1475     require XML::LibXML;
1476     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1477     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1478     $graphml->setDocumentElement( $root );
1479     $root->setNamespace( $xsi_ns, 'xsi', 0 );
1480     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1481
1482     # List of attribute types to save on our objects and their corresponding
1483     # GraphML types
1484     my %save_types = (
1485         'Str' => 'string',
1486         'Int' => 'int',
1487         'Bool' => 'boolean',
1488         'ReadingID' => 'string',
1489         'RelationshipType' => 'string',
1490         'RelationshipScope' => 'string',
1491         'Ternary' => 'string',
1492     );
1493
1494     # Add the data keys for the graph. Include an extra key 'version' for the
1495     # GraphML output version.
1496     my %graph_data_keys;
1497     my $gdi = 0;
1498     my %graph_attributes = ( 'version' => 'string' );
1499         # Graph attributes include those of Tradition and those of Collation.
1500         my %gattr_from;
1501         # TODO Use meta introspection method from duplicate_reading to do this
1502         # instead of naming custom keys.
1503         my $tmeta = $self->tradition->meta;
1504         my $cmeta = $self->meta;
1505         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1506         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1507         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1508                 next if $attr->name =~ /^_/;
1509                 next unless $save_types{$attr->type_constraint->name};
1510                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1511         }
1512     # Extra custom keys for complex objects that should be saved in some form.
1513     # The subroutine should return a string, or undef/empty.
1514     if( $tmeta->has_method('stemmata') ) {
1515                 $graph_attributes{'stemmata'} = sub {
1516                         my @stemstrs;
1517                         map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1518                                 $self->tradition->stemmata;
1519                         join( "\n", @stemstrs );
1520                 };
1521         }
1522
1523         if( $tmeta->has_method('user') ) {
1524                 $graph_attributes{'user'} = sub {
1525                         $self->tradition->user ? $self->tradition->user->id : undef
1526                 };
1527         }
1528
1529     foreach my $datum ( sort keys %graph_attributes ) {
1530         $graph_data_keys{$datum} = 'dg'.$gdi++;
1531         my $key = $root->addNewChild( $graphml_ns, 'key' );
1532         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1533                 : $graph_attributes{$datum};
1534         $key->setAttribute( 'attr.name', $datum );
1535         $key->setAttribute( 'attr.type', $dtype );
1536         $key->setAttribute( 'for', 'graph' );
1537         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1538     }
1539
1540     # Add the data keys for reading nodes
1541     my %reading_attributes;
1542     my $rmeta = Text::Tradition::Collation::Reading->meta;
1543     foreach my $attr( $rmeta->get_all_attributes ) {
1544                 next if $attr->name =~ /^_/;
1545                 next unless $save_types{$attr->type_constraint->name};
1546                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1547         }
1548         if( $self->start->does('Text::Tradition::Morphology' ) ) {
1549                 # Extra custom key for the reading morphology
1550                 $reading_attributes{'lexemes'} = 'string';
1551         }
1552
1553     my %node_data_keys;
1554     my $ndi = 0;
1555     foreach my $datum ( sort keys %reading_attributes ) {
1556         $node_data_keys{$datum} = 'dn'.$ndi++;
1557         my $key = $root->addNewChild( $graphml_ns, 'key' );
1558         $key->setAttribute( 'attr.name', $datum );
1559         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1560         $key->setAttribute( 'for', 'node' );
1561         $key->setAttribute( 'id', $node_data_keys{$datum} );
1562     }
1563
1564     # Add the data keys for edges, that is, paths and relationships. Path
1565     # data does not come from a Moose class so is here manually.
1566     my $edi = 0;
1567     my %edge_data_keys;
1568     my %edge_attributes = (
1569         witness => 'string',                    # ID/label for a path
1570         extra => 'boolean',                             # Path key
1571         );
1572     my @path_attributes = keys %edge_attributes; # track our manual additions
1573     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1574     foreach my $attr( $pmeta->get_all_attributes ) {
1575                 next if $attr->name =~ /^_/;
1576                 next unless $save_types{$attr->type_constraint->name};
1577                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1578         }
1579     foreach my $datum ( sort keys %edge_attributes ) {
1580         $edge_data_keys{$datum} = 'de'.$edi++;
1581         my $key = $root->addNewChild( $graphml_ns, 'key' );
1582         $key->setAttribute( 'attr.name', $datum );
1583         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1584         $key->setAttribute( 'for', 'edge' );
1585         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1586     }
1587
1588     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1589     my $xmlidname = $self->tradition->name;
1590     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1591     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1592         $xmlidname = '_'.$xmlidname;
1593     }
1594     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1595     $sgraph->setAttribute( 'edgedefault', 'directed' );
1596     $sgraph->setAttribute( 'id', $xmlidname );
1597     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1598     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1599     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1600     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1601     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1602
1603     # Tradition/collation attribute data
1604     foreach my $datum ( keys %graph_attributes ) {
1605         my $value;
1606         if( $datum eq 'version' ) {
1607                 $value = '3.2';
1608         } elsif( ref( $graph_attributes{$datum} ) ) {
1609                 my $sub = $graph_attributes{$datum};
1610                 $value = &$sub();
1611         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1612                 $value = $self->tradition->$datum;
1613         } else {
1614                 $value = $self->$datum;
1615         }
1616                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1617         }
1618
1619     my $node_ctr = 0;
1620     my %node_hash;
1621     # Add our readings to the graph
1622     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1623         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1624                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1625         $use_readings{$n->id} = 1;
1626         # Add to the main graph
1627         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1628         my $node_xmlid = 'n' . $node_ctr++;
1629         $node_hash{ $n->id } = $node_xmlid;
1630         $node_el->setAttribute( 'id', $node_xmlid );
1631         foreach my $d ( keys %reading_attributes ) {
1632                 my $nval = $n->$d;
1633                 # Custom serialization
1634                 if( $d eq 'lexemes' ) {
1635                                 # If nval is a true value, we have lexemes so we need to
1636                                 # serialize them. Otherwise set nval to undef so that the
1637                                 # key is excluded from this reading.
1638                         $nval = $nval ? $n->_serialize_lexemes : undef;
1639                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1640                         $nval = undef;
1641                 }
1642                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1643                         # Adjust the ranks within the subgraph.
1644                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1645                                 : $nval - $rankoffset;
1646                 }
1647                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1648                         if defined $nval;
1649         }
1650     }
1651
1652     # Add the path edges to the sequence graph
1653     my $edge_ctr = 0;
1654     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1655         # We add an edge in the graphml for every witness in $e.
1656         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1657         my @edge_wits = sort $self->path_witnesses( $e );
1658         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1659         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1660         # Skip any path from start to end; that witness is not in the subgraph.
1661         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1662         foreach my $wit ( @edge_wits ) {
1663                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1664                                                                                 $node_hash{ $e->[0] },
1665                                                                                 $node_hash{ $e->[1] } );
1666                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1667                         $edge_el->setAttribute( 'source', $from );
1668                         $edge_el->setAttribute( 'target', $to );
1669                         $edge_el->setAttribute( 'id', $id );
1670
1671                         # It's a witness path, so add the witness
1672                         my $base = $wit;
1673                         my $key = $edge_data_keys{'witness'};
1674                         # Is this an ante-corr witness?
1675                         my $aclabel = $self->ac_label;
1676                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1677                                 # Keep the base witness
1678                                 $base = $1;
1679                                 # ...and record that this is an 'extra' reading path
1680                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1681                         }
1682                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1683                 }
1684         }
1685
1686         # Report the actual number of nodes and edges that went in
1687         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1688         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1689
1690         # Add the relationship graph to the XML
1691         map { delete $edge_data_keys{$_} } @path_attributes;
1692         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1693                 $node_data_keys{'id'}, \%edge_data_keys );
1694
1695     # Save and return the thing
1696     my $result = decode_utf8( $graphml->toString(1) );
1697     return $result;
1698 }
1699
1700 sub _add_graphml_data {
1701     my( $el, $key, $value ) = @_;
1702     return unless defined $value;
1703     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1704     $data_el->setAttribute( 'key', $key );
1705     $data_el->appendText( $value );
1706 }
1707
1708 =head2 as_csv
1709
1710 Returns a CSV alignment table representation of the collation graph, one
1711 row per witness (or witness uncorrected.)
1712
1713 =head2 as_tsv
1714
1715 Returns a tab-separated alignment table representation of the collation graph,
1716 one row per witness (or witness uncorrected.)
1717
1718 =begin testing
1719
1720 use Text::Tradition;
1721 use Text::CSV;
1722
1723 my $READINGS = 311;
1724 my $PATHS = 361;
1725 my $WITS = 13;
1726 my $WITAC = 4;
1727
1728 my $datafile = 't/data/florilegium_tei_ps.xml';
1729 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1730                                       'name' => 'test0',
1731                                       'file' => $datafile,
1732                                       'linear' => 1 );
1733
1734 my $c = $tradition->collation;
1735 # Export the thing to CSV
1736 my $csvstr = $c->as_csv();
1737 # Count the columns
1738 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1739 my @lines = split(/\n/, $csvstr );
1740 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1741 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1742 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1743 ok( @q_ac, "Found a layered witness" );
1744
1745 my $t2 = Text::Tradition->new( input => 'Tabular',
1746                                                            name => 'test2',
1747                                                            string => $csvstr,
1748                                                            sep_char => ',' );
1749 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1750 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1751
1752 # Now do it with TSV
1753 my $tsvstr = $c->as_tsv();
1754 my $t3 = Text::Tradition->new( input => 'Tabular',
1755                                                            name => 'test3',
1756                                                            string => $tsvstr,
1757                                                            sep_char => "\t" );
1758 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1759 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1760
1761 my $table = $c->alignment_table;
1762 my $noaccsv = $c->as_csv({ noac => 1 });
1763 my @noaclines = split(/\n/, $noaccsv );
1764 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1765 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1766 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1767
1768 my $safecsv = $c->as_csv({ safe_ac => 1});
1769 my @safelines = split(/\n/, $safecsv );
1770 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1771 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1772 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1773 ok( @q_ac, "Found a sanitized layered witness" );
1774 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1775
1776 # Test relationship collapse
1777 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1778 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1779
1780 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1781 my $t4 = Text::Tradition->new( input => 'Tabular',
1782                                                            name => 'test4',
1783                                                            string => $mergedtsv,
1784                                                            sep_char => "\t" );
1785 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1786 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1787
1788 # Test non-ASCII sigla
1789 my $t5 = Text::Tradition->new( input => 'Tabular',
1790                                                            name => 'nonascii',
1791                                                            file => 't/data/armexample.xlsx',
1792                                                            excel => 'xlsx' );
1793 my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1794 my @awitlines = split( /\n/, $awittsv );
1795 like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1796
1797 =end testing
1798
1799 =cut
1800
1801 sub _tabular {
1802     my( $self, $opts ) = @_;
1803     my $table = $self->alignment_table( $opts );
1804         my $csv_options = { binary => 1, quote_null => 0 };
1805         $csv_options->{'sep_char'} = $opts->{fieldsep};
1806         if( $opts->{fieldsep} eq "\t" ) {
1807                 # If it is really tab separated, nothing is an escape char.
1808                 $csv_options->{'quote_char'} = undef;
1809                 $csv_options->{'escape_char'} = '';
1810         }
1811     my $csv = Text::CSV->new( $csv_options );
1812     my @result;
1813
1814     # Make the header row
1815     my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1816     if( $opts->{ascii} ) {
1817         # TODO think of a fix for this
1818         throw( "Cannot currently produce ASCII sigla with witness layers" )
1819                 unless $opts->{noac};
1820         my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1821         @witnesses = @awits;
1822     }
1823     $csv->combine( @witnesses );
1824         push( @result, $csv->string );
1825
1826     # Make the rest of the rows
1827     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1828         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1829         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1830         # Quick and dirty collapse of requested relationship types
1831         if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1832                 # Now substitute the reading in the relevant index of @row
1833                 # for its merge-related reading
1834                 my %substitutes;
1835                 while( @rowobjs ) {
1836                         my $thisr = shift @rowobjs;
1837                         next unless $thisr;
1838                                 next if exists $substitutes{$thisr->{t}->text};
1839                                 # Make sure we don't have A <-> B substitutions.
1840                                 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1841                         foreach my $thatr ( @rowobjs ) {
1842                                 next unless $thatr;
1843                                 next if exists $substitutes{$thatr->{t}->text};
1844                                 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1845                                 next unless $ttrel;
1846                                 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1847                                 # If we have got this far then we need to merge them.
1848                                 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1849                         }
1850                 }
1851                 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1852         }
1853         $csv->combine( @row );
1854         push( @result, $csv->string );
1855     }
1856     return join( "\n", @result );
1857 }
1858
1859 sub as_csv {
1860         my $self = shift;
1861         my $opts = shift || {};
1862         $opts->{fieldsep} = ',';
1863         return $self->_tabular( $opts );
1864 }
1865
1866 sub as_tsv {
1867         my $self = shift;
1868         my $opts = shift || {};
1869         $opts->{fieldsep} = "\t";
1870         return $self->_tabular( $opts );
1871 }
1872
1873 =head2 alignment_table
1874
1875 Return a reference to an alignment table, in a slightly enhanced CollateX
1876 format which looks like this:
1877
1878  $table = { alignment => [ { witness => "SIGIL",
1879                              tokens => [ { t => "TEXT" }, ... ] },
1880                            { witness => "SIG2",
1881                              tokens => [ { t => "TEXT" }, ... ] },
1882                            ... ],
1883             length => TEXTLEN };
1884
1885 =cut
1886
1887 sub alignment_table {
1888     my( $self, $opts ) = @_;
1889     if( $self->has_cached_table ) {
1890                 return $self->cached_table
1891                         unless $opts->{noac} || $opts->{safe_ac};
1892     }
1893
1894     # Make sure we can do this
1895         throw( "Need a linear graph in order to make an alignment table" )
1896                 unless $self->linear;
1897     $self->calculate_ranks()
1898         unless $self->_graphcalc_done && $self->end->has_rank;
1899
1900     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1901     my @all_pos = ( 1 .. $self->end->rank - 1 );
1902     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1903         # say STDERR "Making witness row(s) for " . $wit->sigil;
1904         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1905         my @row = _make_witness_row( \@wit_path, \@all_pos );
1906         my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1907         $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1908         push( @{$table->{'alignment'}}, $witobj );
1909         if( $wit->is_layered && !$opts->{noac} ) {
1910                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1911                         $wit->sigil.$self->ac_label );
1912             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1913             my $witlabel = $opts->{safe_ac}
1914                 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1915             my $witacobj = { 'witness' => $witlabel,
1916                 'tokens' => \@ac_row };
1917             $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1918                         push( @{$table->{'alignment'}}, $witacobj );
1919         }
1920     }
1921     unless( $opts->{noac} || $opts->{safe_ac} ) {
1922             $self->cached_table( $table );
1923         }
1924     return $table;
1925 }
1926
1927 sub _make_witness_row {
1928     my( $path, $positions ) = @_;
1929     my %char_hash;
1930     map { $char_hash{$_} = undef } @$positions;
1931     my $debug = 0;
1932     foreach my $rdg ( @$path ) {
1933         say STDERR "rank " . $rdg->rank if $debug;
1934         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1935         $char_hash{$rdg->rank} = { 't' => $rdg };
1936     }
1937     my @row = map { $char_hash{$_} } @$positions;
1938     # Fill in lacuna markers for undef spots in the row
1939     my $last_el = shift @row;
1940     my @filled_row = ( $last_el );
1941     foreach my $el ( @row ) {
1942         # If we are using node reference, make the lacuna node appear many times
1943         # in the table.  If not, use the lacuna tag.
1944         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1945             $el = $last_el;
1946         }
1947         push( @filled_row, $el );
1948         $last_el = $el;
1949     }
1950     return @filled_row;
1951 }
1952
1953
1954 =head1 NAVIGATION METHODS
1955
1956 =head2 reading_sequence( $first, $last, $sigil, $backup )
1957
1958 Returns the ordered list of readings, starting with $first and ending
1959 with $last, for the witness given in $sigil. If a $backup sigil is
1960 specified (e.g. when walking a layered witness), it will be used wherever
1961 no $sigil path exists.  If there is a base text reading, that will be
1962 used wherever no path exists for $sigil or $backup.
1963
1964 =cut
1965
1966 # TODO Think about returning some lazy-eval iterator.
1967 # TODO Get rid of backup; we should know from what witness is whether we need it.
1968
1969 sub reading_sequence {
1970     my( $self, $start, $end, $witness ) = @_;
1971
1972     $witness = $self->baselabel unless $witness;
1973     my @readings = ( $start );
1974     my %seen;
1975     my $n = $start;
1976     while( $n && $n->id ne $end->id ) {
1977         if( exists( $seen{$n->id} ) ) {
1978             throw( "Detected loop for $witness at " . $n->id );
1979         }
1980         $seen{$n->id} = 1;
1981
1982         my $next = $self->next_reading( $n, $witness );
1983         unless( $next ) {
1984             throw( "Did not find any path for $witness from reading " . $n->id );
1985         }
1986         push( @readings, $next );
1987         $n = $next;
1988     }
1989     # Check that the last reading is our end reading.
1990     my $last = $readings[$#readings];
1991     throw( "Last reading found from " . $start->text .
1992         " for witness $witness is not the end!" ) # TODO do we get this far?
1993         unless $last->id eq $end->id;
1994
1995     return @readings;
1996 }
1997
1998 =head2 readings_at_rank( $rank )
1999
2000 Returns a list of readings at a given rank, taken from the alignment table.
2001
2002 =cut
2003
2004 sub readings_at_rank {
2005         my( $self, $rank, $nolacuna ) = @_;
2006         my $table = $self->alignment_table;
2007         # Table rank is real rank - 1.
2008         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
2009         my %readings;
2010         foreach my $e ( @elements ) {
2011                 next unless ref( $e ) eq 'HASH';
2012                 next unless exists $e->{'t'};
2013                 my $rdg = $e->{'t'};
2014                 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
2015                 $readings{$e->{'t'}->id} = $e->{'t'};
2016         }
2017         return values %readings;
2018 }
2019
2020 =head2 next_reading( $reading, $sigil );
2021
2022 Returns the reading that follows the given reading along the given witness
2023 path.
2024
2025 =cut
2026
2027 sub next_reading {
2028     # Return the successor via the corresponding path.
2029     my $self = shift;
2030     my $answer = $self->_find_linked_reading( 'next', @_ );
2031         return undef unless $answer;
2032     return $self->reading( $answer );
2033 }
2034
2035 =head2 prior_reading( $reading, $sigil )
2036
2037 Returns the reading that precedes the given reading along the given witness
2038 path.
2039
2040 =cut
2041
2042 sub prior_reading {
2043     # Return the predecessor via the corresponding path.
2044     my $self = shift;
2045     my $answer = $self->_find_linked_reading( 'prior', @_ );
2046     return $self->reading( $answer );
2047 }
2048
2049 sub _find_linked_reading {
2050     my( $self, $direction, $node, $path ) = @_;
2051
2052     # Get a backup if we are dealing with a layered witness
2053     my $alt_path;
2054     my $aclabel = $self->ac_label;
2055     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2056         $alt_path = $1;
2057     }
2058
2059     my @linked_paths = $direction eq 'next'
2060         ? $self->sequence->edges_from( $node )
2061         : $self->sequence->edges_to( $node );
2062     return undef unless scalar( @linked_paths );
2063
2064     # We have to find the linked path that contains all of the
2065     # witnesses supplied in $path.
2066     my( @path_wits, @alt_path_wits );
2067     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2068     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2069     my $base_le;
2070     my $alt_le;
2071     foreach my $le ( @linked_paths ) {
2072         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2073             $base_le = $le;
2074         }
2075                 my @le_wits = sort $self->path_witnesses( $le );
2076                 if( _is_within( \@path_wits, \@le_wits ) ) {
2077                         # This is the right path.
2078                         return $direction eq 'next' ? $le->[1] : $le->[0];
2079                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2080                         $alt_le = $le;
2081                 }
2082     }
2083     # Got this far? Return the alternate path if it exists.
2084     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2085         if $alt_le;
2086
2087     # Got this far? Return the base path if it exists.
2088     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2089         if $base_le;
2090
2091     # Got this far? We have no appropriate path.
2092     warn "Could not find $direction node from " . $node->id
2093         . " along path $path";
2094     return undef;
2095 }
2096
2097 # Some set logic.
2098 sub _is_within {
2099     my( $set1, $set2 ) = @_;
2100     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2101     foreach my $el ( @$set1 ) {
2102         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2103     }
2104     return $ret;
2105 }
2106
2107 # Return the string that joins together a list of witnesses for
2108 # display on a single path.
2109 sub _witnesses_of_label {
2110     my( $self, $label ) = @_;
2111     my $regex = $self->wit_list_separator;
2112     my @answer = split( /\Q$regex\E/, $label );
2113     return @answer;
2114 }
2115
2116 =head2 common_readings
2117
2118 Returns the list of common readings in the graph (i.e. those readings that are
2119 shared by all non-lacunose witnesses.)
2120
2121 =cut
2122
2123 sub common_readings {
2124         my $self = shift;
2125         my @common = grep { $_->is_common } $self->readings;
2126         return @common;
2127 }
2128
2129 =head2 path_text( $sigil, [, $start, $end ] )
2130
2131 Returns the text of a witness (plus its backup, if we are using a layer)
2132 as stored in the collation.  The text is returned as a string, where the
2133 individual readings are joined with spaces and the meta-readings (e.g.
2134 lacunae) are omitted.  Optional specification of $start and $end allows
2135 the generation of a subset of the witness text.
2136
2137 =cut
2138
2139 sub path_text {
2140         my( $self, $wit, $start, $end ) = @_;
2141         $start = $self->start unless $start;
2142         $end = $self->end unless $end;
2143         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2144         my $pathtext = '';
2145         my $last;
2146         foreach my $r ( @path ) {
2147                 unless ( $r->join_prior || !$last || $last->join_next ) {
2148                         $pathtext .= ' ';
2149                 }
2150                 $pathtext .= $r->text;
2151                 $last = $r;
2152         }
2153         return $pathtext;
2154 }
2155
2156 =head1 INITIALIZATION METHODS
2157
2158 These are mostly for use by parsers.
2159
2160 =head2 make_witness_path( $witness )
2161
2162 Link the array of readings contained in $witness->path (and in
2163 $witness->uncorrected_path if it exists) into collation paths.
2164 Clear out the arrays when finished.
2165
2166 =head2 make_witness_paths
2167
2168 Call make_witness_path for all witnesses in the tradition.
2169
2170 =cut
2171
2172 # For use when a collation is constructed from a base text and an apparatus.
2173 # We have the sequences of readings and just need to add path edges.
2174 # When we are done, clear out the witness path attributes, as they are no
2175 # longer needed.
2176 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2177
2178 sub make_witness_paths {
2179     my( $self ) = @_;
2180     foreach my $wit ( $self->tradition->witnesses ) {
2181         # say STDERR "Making path for " . $wit->sigil;
2182         $self->make_witness_path( $wit );
2183     }
2184 }
2185
2186 sub make_witness_path {
2187     my( $self, $wit ) = @_;
2188     my @chain = @{$wit->path};
2189     my $sig = $wit->sigil;
2190     # Add start and end if necessary
2191     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2192     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2193     foreach my $idx ( 0 .. $#chain-1 ) {
2194         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2195     }
2196     if( $wit->is_layered ) {
2197         @chain = @{$wit->uncorrected_path};
2198                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2199                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2200         foreach my $idx( 0 .. $#chain-1 ) {
2201             my $source = $chain[$idx];
2202             my $target = $chain[$idx+1];
2203             $self->add_path( $source, $target, $sig.$self->ac_label )
2204                 unless $self->has_path( $source, $target, $sig );
2205         }
2206     }
2207     $wit->clear_path;
2208     $wit->clear_uncorrected_path;
2209 }
2210
2211 =head2 calculate_ranks
2212
2213 Calculate the reading ranks (that is, their aligned positions relative
2214 to each other) for the graph.  This can only be called on linear collations.
2215
2216 =begin testing
2217
2218 use Text::Tradition;
2219
2220 my $cxfile = 't/data/Collatex-16.xml';
2221 my $t = Text::Tradition->new(
2222     'name'  => 'inline',
2223     'input' => 'CollateX',
2224     'file'  => $cxfile,
2225     );
2226 my $c = $t->collation;
2227
2228 # Make an svg
2229 my $table = $c->alignment_table;
2230 ok( $c->has_cached_table, "Alignment table was cached" );
2231 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2232 $c->calculate_ranks;
2233 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2234 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2235 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2236 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2237 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2238
2239 =end testing
2240
2241 =cut
2242
2243 sub calculate_ranks {
2244     my $self = shift;
2245     # Save the existing ranks, in case we need to invalidate the cached SVG.
2246     throw( "Cannot calculate ranks on a non-linear graph" )
2247         unless $self->linear;
2248     my %existing_ranks;
2249     map { $existing_ranks{$_} = $_->rank } $self->readings;
2250
2251     # Do the rankings based on the relationship equivalence graph, starting
2252     # with the start node.
2253     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2254
2255     # Transfer our rankings from the topological graph to the real one.
2256     foreach my $r ( $self->readings ) {
2257         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2258             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2259         } else {
2260                 # Die. Find the last rank we calculated.
2261                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2262                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2263                         $self->readings;
2264                 my $last = pop @all_defined;
2265             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2266         }
2267     }
2268     # Do we need to invalidate the cached data?
2269     if( $self->has_cached_table ) {
2270         foreach my $r ( $self->readings ) {
2271                 next if defined( $existing_ranks{$r} )
2272                         && $existing_ranks{$r} == $r->rank;
2273                 # Something has changed, so clear the cache
2274                 $self->_clear_cache;
2275                         # ...and recalculate the common readings.
2276                         $self->calculate_common_readings();
2277                 last;
2278         }
2279     }
2280         # The graph calculation information is now up to date.
2281         $self->_graphcalc_done(1);
2282 }
2283
2284 sub _clear_cache {
2285         my $self = shift;
2286         $self->wipe_table if $self->has_cached_table;
2287 }
2288
2289
2290 =head2 flatten_ranks
2291
2292 A convenience method for parsing collation data.  Searches the graph for readings
2293 with the same text at the same rank, and merges any that are found.
2294
2295 =cut
2296
2297 sub flatten_ranks {
2298     my ( $self, %args ) = shift;
2299     my %unique_rank_rdg;
2300     my $changed;
2301     foreach my $p ( $self->identical_readings( %args ) ) {
2302                 # say STDERR "Combining readings at same rank: @$p";
2303                 $changed = 1;
2304                 $self->merge_readings( @$p );
2305                 # TODO see if this now makes a common point.
2306     }
2307     # If we merged readings, the ranks are still fine but the alignment
2308     # table is wrong. Wipe it.
2309     $self->wipe_table() if $changed;
2310 }
2311
2312 =head2 identical_readings
2313 =head2 identical_readings( start => $startnode, end => $endnode )
2314 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2315
2316 Goes through the graph identifying all pairs of readings that appear to be
2317 identical, and therefore able to be merged into a single reading. Returns the
2318 relevant identical pairs. Can be restricted to run over only a part of the
2319 graph, specified either by node or by rank.
2320
2321 =cut
2322
2323 sub identical_readings {
2324         my ( $self, %args ) = @_;
2325     # Find where we should start and end.
2326     my $startrank = $args{startrank} || 0;
2327     if( $args{start} ) {
2328         throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2329                 && $self->reading( $args{start} )->has_rank;
2330         $startrank = $self->reading( $args{start} )->rank;
2331     }
2332     my $endrank = $args{endrank} || $self->end->rank;
2333     if( $args{end} ) {
2334         throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2335                 && $self->reading( $args{end} )->has_rank;
2336         $endrank = $self->reading( $args{end} )->rank;
2337     }
2338
2339     # Make sure the ranks are correct.
2340     unless( $self->_graphcalc_done ) {
2341         $self->calculate_ranks;
2342     }
2343     # Go through the readings looking for duplicates.
2344     my %unique_rank_rdg;
2345     my @pairs;
2346     foreach my $rdg ( $self->readings ) {
2347         next unless $rdg->has_rank;
2348         my $rk = $rdg->rank;
2349         next if $rk > $endrank || $rk < $startrank;
2350         my $key = $rk . "||" . $rdg->text;
2351         if( exists $unique_rank_rdg{$key} ) {
2352                 # Make sure they don't have different grammatical forms
2353                         my $ur = $unique_rank_rdg{$key};
2354                 if( $rdg->is_identical( $ur ) ) {
2355                                 push( @pairs, [ $ur, $rdg ] );
2356                         }
2357         } else {
2358             $unique_rank_rdg{$key} = $rdg;
2359         }
2360     }
2361
2362     return @pairs;
2363 }
2364
2365
2366 =head2 calculate_common_readings
2367
2368 Goes through the graph identifying the readings that appear in every witness
2369 (apart from those with lacunae at that spot.) Marks them as common and returns
2370 the list.
2371
2372 =begin testing
2373
2374 use Text::Tradition;
2375
2376 my $cxfile = 't/data/Collatex-16.xml';
2377 my $t = Text::Tradition->new(
2378     'name'  => 'inline',
2379     'input' => 'CollateX',
2380     'file'  => $cxfile,
2381     );
2382 my $c = $t->collation;
2383
2384 my @common = $c->calculate_common_readings();
2385 is( scalar @common, 8, "Found correct number of common readings" );
2386 my @marked = sort $c->common_readings();
2387 is( scalar @common, 8, "All common readings got marked as such" );
2388 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2389 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2390
2391 =end testing
2392
2393 =cut
2394
2395 sub calculate_common_readings {
2396         my $self = shift;
2397         my @common;
2398         map { $_->is_common( 0 ) } $self->readings;
2399         # Implicitly calls calculate_ranks
2400         my $table = $self->alignment_table;
2401         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2402                 my @row = map { $_->{'tokens'}->[$idx]
2403                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2404                                         @{$table->{'alignment'}};
2405                 my %hash;
2406                 foreach my $r ( @row ) {
2407                         if( $r ) {
2408                                 $hash{$r->id} = $r unless $r->is_meta;
2409                         } else {
2410                                 $hash{'UNDEF'} = $r;
2411                         }
2412                 }
2413                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2414                         my( $r ) = values %hash;
2415                         $r->is_common( 1 );
2416                         push( @common, $r );
2417                 }
2418         }
2419         return @common;
2420 }
2421
2422 =head2 text_from_paths
2423
2424 Calculate the text array for all witnesses from the path, for later consistency
2425 checking.  Only to be used if there is no non-graph-based way to know the
2426 original texts.
2427
2428 =cut
2429
2430 sub text_from_paths {
2431         my $self = shift;
2432     foreach my $wit ( $self->tradition->witnesses ) {
2433         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2434         my @text;
2435         foreach my $r ( @readings ) {
2436                 next if $r->is_meta;
2437                 push( @text, $r->text );
2438         }
2439         $wit->text( \@text );
2440         if( $wit->is_layered ) {
2441                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2442                                                                                                   $wit->sigil.$self->ac_label );
2443                         my @uctext;
2444                         foreach my $r ( @ucrdgs ) {
2445                                 next if $r->is_meta;
2446                                 push( @uctext, $r->text );
2447                         }
2448                         $wit->layertext( \@uctext );
2449         }
2450     }
2451 }
2452
2453 =head1 UTILITY FUNCTIONS
2454
2455 =head2 common_predecessor( $reading_a, $reading_b )
2456
2457 Find the last reading that occurs in sequence before both the given readings.
2458 At the very least this should be $self->start.
2459
2460 =head2 common_successor( $reading_a, $reading_b )
2461
2462 Find the first reading that occurs in sequence after both the given readings.
2463 At the very least this should be $self->end.
2464
2465 =begin testing
2466
2467 use Text::Tradition;
2468
2469 my $cxfile = 't/data/Collatex-16.xml';
2470 my $t = Text::Tradition->new(
2471     'name'  => 'inline',
2472     'input' => 'CollateX',
2473     'file'  => $cxfile,
2474     );
2475 my $c = $t->collation;
2476
2477 is( $c->common_predecessor( 'n24', 'n23' )->id,
2478     'n20', "Found correct common predecessor" );
2479 is( $c->common_successor( 'n24', 'n23' )->id,
2480     '__END__', "Found correct common successor" );
2481
2482 is( $c->common_predecessor( 'n19', 'n17' )->id,
2483     'n16', "Found correct common predecessor for readings on same path" );
2484 is( $c->common_successor( 'n21', 'n10' )->id,
2485     '__END__', "Found correct common successor for readings on same path" );
2486
2487 =end testing
2488
2489 =cut
2490
2491 ## Return the closest reading that is a predecessor of both the given readings.
2492 sub common_predecessor {
2493         my $self = shift;
2494         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2495         return $self->_common_in_path( $r1, $r2, 'predecessors' );
2496 }
2497
2498 sub common_successor {
2499         my $self = shift;
2500         my( $r1, $r2 ) = $self->_objectify_args( @_ );
2501         return $self->_common_in_path( $r1, $r2, 'successors' );
2502 }
2503
2504
2505 # TODO think about how to do this without ranks...
2506 sub _common_in_path {
2507         my( $self, $r1, $r2, $dir ) = @_;
2508         my $iter = $self->end->rank;
2509         my @candidates;
2510         my @last_r1 = ( $r1 );
2511         my @last_r2 = ( $r2 );
2512         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2513         my %all_seen;
2514         # say STDERR "Finding common $dir for $r1, $r2";
2515         while( !@candidates ) {
2516                 last unless $iter--;  # Avoid looping infinitely
2517                 # Iterate separately down the graph from r1 and r2
2518                 my( @new_lc1, @new_lc2 );
2519                 foreach my $lc ( @last_r1 ) {
2520                         foreach my $p ( $lc->$dir ) {
2521                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2522                                         # say STDERR "Path candidate $p from $lc";
2523                                         push( @candidates, $p );
2524                                 } elsif( !$all_seen{$p->id} ) {
2525                                         $all_seen{$p->id} = 'r1';
2526                                         push( @new_lc1, $p );
2527                                 }
2528                         }
2529                 }
2530                 foreach my $lc ( @last_r2 ) {
2531                         foreach my $p ( $lc->$dir ) {
2532                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2533                                         # say STDERR "Path candidate $p from $lc";
2534                                         push( @candidates, $p );
2535                                 } elsif( !$all_seen{$p->id} ) {
2536                                         $all_seen{$p->id} = 'r2';
2537                                         push( @new_lc2, $p );
2538                                 }
2539                         }
2540                 }
2541                 @last_r1 = @new_lc1;
2542                 @last_r2 = @new_lc2;
2543         }
2544         my @answer = sort { $a->rank <=> $b->rank } @candidates;
2545         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2546 }
2547
2548 sub throw {
2549         Text::Tradition::Error->throw(
2550                 'ident' => 'Collation error',
2551                 'message' => $_[0],
2552                 );
2553 }
2554
2555 no Moose;
2556 __PACKAGE__->meta->make_immutable;
2557
2558 =head1 BUGS/TODO
2559
2560 =over
2561
2562 =item * Rework XML serialization in a more modular way
2563
2564 =back
2565
2566 =head1 LICENSE
2567
2568 This package is free software and is provided "as is" without express
2569 or implied warranty.  You can redistribute it and/or modify it under
2570 the same terms as Perl itself.
2571
2572 =head1 AUTHOR
2573
2574 Tara L Andrews E<lt>aurum@cpan.orgE<gt>