base/lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use feature 'say';
   4 use Encode qw( decode_utf8 );
   5 use File::Temp;
   6 use File::Which;
   7 use Graph;
   8 use IPC::Run qw( run binary );
   9 use Text::CSV;
  10 use Text::Tradition::Collation::Data;
  11 use Text::Tradition::Collation::Reading;
  12 use Text::Tradition::Collation::RelationshipStore;
  13 use Text::Tradition::Error;
  14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  15 use XML::LibXML;
  16 use XML::LibXML::XPathContext;
  17 use Moose;
  18
  19 has _data => (
  20         isa      => 'Text::Tradition::Collation::Data',
  21         is       => 'ro',
  22         required => 1,
  23         handles  => [ qw(
  24                 sequence
  25                 paths
  26                 _set_relations
  27                 relations
  28                 _set_start
  29                 _set_end
  30                 ac_label
  31                 has_cached_table
  32                 relationships
  33                 related_readings
  34                 get_relationship
  35                 del_relationship
  36                 equivalence
  37                 equivalence_graph
  38                 readings
  39                 reading
  40                 _add_reading
  41                 del_reading
  42                 has_reading
  43                 wit_list_separator
  44                 baselabel
  45                 linear
  46                 wordsep
  47                 start
  48                 end
  49                 cached_table
  50                 _graphcalc_done
  51                 has_cached_svg
  52                 wipe_table
  53         )]
  54 );
  55
  56 has 'tradition' => (
  57     is => 'ro',
  58     isa => 'Text::Tradition',
  59     writer => '_set_tradition',
  60     weak_ref => 1,
  61     );
  62
  63 =head1 NAME
  64
  65 Text::Tradition::Collation - a software model for a text collation
  66
  67 =head1 SYNOPSIS
  68
  69   use Text::Tradition;
  70   my $t = Text::Tradition->new(
  71     'name' => 'this is a text',
  72     'input' => 'TEI',
  73     'file' => '/path/to/tei_parallel_seg_file.xml' );
  74
  75   my $c = $t->collation;
  76   my @readings = $c->readings;
  77   my @paths = $c->paths;
  78   my @relationships = $c->relationships;
  79
  80   my $svg_variant_graph = $t->collation->as_svg();
  81
  82 =head1 DESCRIPTION
  83
  84 Text::Tradition is a library for representation and analysis of collated
  85 texts, particularly medieval ones.  The Collation is the central feature of
  86 a Tradition, where the text, its sequence of readings, and its relationships
  87 between readings are actually kept.
  88
  89 =head1 CONSTRUCTOR
  90
  91 =head2 new
  92
  93 The constructor.  Takes a hash or hashref of the following arguments:
  94
  95 =over
  96
  97 =item * tradition - The Text::Tradition object to which the collation
  98 belongs. Required.
  99
 100 =item * linear - Whether the collation should be linear; that is, whether
 101 transposed readings should be treated as two linked readings rather than one,
 102 and therefore whether the collation graph is acyclic.  Defaults to true.
 103
 104 =item * baselabel - The default label for the path taken by a base text
 105 (if any). Defaults to 'base text'.
 106
 107 =item * wit_list_separator - The string to join a list of witnesses for
 108 purposes of making labels in display graphs.  Defaults to ', '.
 109
 110 =item * ac_label - The extra label to tack onto a witness sigil when
 111 representing another layer of path for the given witness - that is, when
 112 a text has more than one possible reading due to scribal corrections or
 113 the like.  Defaults to ' (a.c.)'.
 114
 115 =item * wordsep - The string used to separate words in the original text.
 116 Defaults to ' '.
 117
 118 =back
 119
 120 =head1 ACCESSORS
 121
 122 =head2 tradition
 123
 124 =head2 linear
 125
 126 =head2 wit_list_separator
 127
 128 =head2 baselabel
 129
 130 =head2 ac_label
 131
 132 =head2 wordsep
 133
 134 Simple accessors for collation attributes.
 135
 136 =head2 start
 137
 138 The meta-reading at the start of every witness path.
 139
 140 =head2 end
 141
 142 The meta-reading at the end of every witness path.
 143
 144 =head2 readings
 145
 146 Returns all Reading objects in the graph.
 147
 148 =head2 reading( $id )
 149
 150 Returns the Reading object corresponding to the given ID.
 151
 152 =head2 add_reading( $reading_args )
 153
 154 Adds a new reading object to the collation.
 155 See L<Text::Tradition::Collation::Reading> for the available arguments.
 156
 157 =head2 del_reading( $object_or_id )
 158
 159 Removes the given reading from the collation, implicitly removing its
 160 paths and relationships.
 161
 162 =head2 merge_readings( $main, $second, $concatenate, $with_str )
 163
 164 Merges the $second reading into the $main one. If $concatenate is true, then
 165 the merged node will carry the text of both readings, concatenated with either
 166 $with_str (if specified) or a sensible default (the empty string if the
 167 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
 168
 169 The first two arguments may be either readings or reading IDs.
 170
 171 =head2 has_reading( $id )
 172
 173 Predicate to see whether a given reading ID is in the graph.
 174
 175 =head2 reading_witnesses( $object_or_id )
 176
 177 Returns a list of sigils whose witnesses contain the reading.
 178
 179 =head2 paths
 180
 181 Returns all reading paths within the document - that is, all edges in the
 182 collation graph.  Each path is an arrayref of [ $source, $target ] reading IDs.
 183
 184 =head2 add_path( $source, $target, $sigil )
 185
 186 Links the given readings in the collation in sequence, under the given witness
 187 sigil.  The readings may be specified by object or ID.
 188
 189 =head2 del_path( $source, $target, $sigil )
 190
 191 Links the given readings in the collation in sequence, under the given witness
 192 sigil.  The readings may be specified by object or ID.
 193
 194 =head2 has_path( $source, $target );
 195
 196 Returns true if the two readings are linked in sequence in any witness.
 197 The readings may be specified by object or ID.
 198
 199 =head2 relationships
 200
 201 Returns all Relationship objects in the collation.
 202
 203 =head2 add_relationship( $reading, $other_reading, $options )
 204
 205 Adds a new relationship of the type given in $options between the two readings,
 206 which may be specified by object or ID.  Returns a value of ( $status, @vectors)
 207 where $status is true on success, and @vectors is a list of relationship edges
 208 that were ultimately added.
 209 See L<Text::Tradition::Collation::Relationship> for the available options.
 210
 211 =cut
 212
 213 sub BUILDARGS {
 214         my ( $class, @args ) = @_;
 215         my %args = @args == 1 ? %{ $args[0] } : @args;
 216         # TODO determine these from the Moose::Meta object
 217         my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
 218                 linear wordsep start end cached_table _graphcalc_done);
 219         my %data_args;
 220         for my $attr (@delegate_attrs) {
 221                 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
 222         }
 223         $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
 224         return \%args;
 225 }
 226
 227 sub BUILD {
 228     my $self = shift;
 229     $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
 230     $self->_set_start( $self->add_reading(
 231         { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
 232     $self->_set_end( $self->add_reading(
 233         { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
 234 }
 235
 236 ### Reading construct/destruct functions
 237
 238 sub add_reading {
 239         my( $self, $reading ) = @_;
 240         unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
 241                 my %args = %$reading;
 242                 if( $args{'init'} ) {
 243                         # If we are initializing an empty collation, don't assume that we
 244                         # have set a tradition.
 245                         delete $args{'init'};
 246                 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
 247                         $args{'language'} = $self->tradition->language;
 248                 }
 249                 $reading = Text::Tradition::Collation::Reading->new(
 250                         'collation' => $self,
 251                         %args );
 252         }
 253         # First check to see if a reading with this ID exists.
 254         if( $self->reading( $reading->id ) ) {
 255                 throw( "Collation already has a reading with id " . $reading->id );
 256         }
 257         $self->_graphcalc_done(0);
 258         $self->_add_reading( $reading->id => $reading );
 259         # Once the reading has been added, put it in both graphs.
 260         $self->sequence->add_vertex( $reading->id );
 261         $self->relations->add_reading( $reading->id );
 262         return $reading;
 263 };
 264
 265 around del_reading => sub {
 266         my $orig = shift;
 267         my $self = shift;
 268         my $arg = shift;
 269
 270         if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
 271                 $arg = $arg->id;
 272         }
 273         # Remove the reading from the graphs.
 274         $self->_graphcalc_done(0);
 275         $self->_clear_cache; # Explicitly clear caches to GC the reading
 276         $self->sequence->delete_vertex( $arg );
 277         $self->relations->delete_reading( $arg );
 278
 279         # Carry on.
 280         $self->$orig( $arg );
 281 };
 282
 283 =begin testing
 284
 285 use Text::Tradition;
 286
 287 my $cxfile = 't/data/Collatex-16.xml';
 288 my $t = Text::Tradition->new(
 289     'name'  => 'inline',
 290     'input' => 'CollateX',
 291     'file'  => $cxfile,
 292     );
 293 my $c = $t->collation;
 294
 295 my $rno = scalar $c->readings;
 296 # Split n21 for testing purposes
 297 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
 298 my $old_r = $c->reading( 'n21' );
 299 $old_r->alter_text( 'to' );
 300 $c->del_path( 'n20', 'n21', 'A' );
 301 $c->add_path( 'n20', 'n21p0', 'A' );
 302 $c->add_path( 'n21p0', 'n21', 'A' );
 303 $c->flatten_ranks();
 304 ok( $c->reading( 'n21p0' ), "New reading exists" );
 305 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 306
 307 # Combine n3 and n4 ( with his )
 308 $c->merge_readings( 'n3', 'n4', 1 );
 309 ok( !$c->reading('n4'), "Reading n4 is gone" );
 310 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 311
 312 # Collapse n9 and n10 ( rood / root )
 313 $c->merge_readings( 'n9', 'n10' );
 314 ok( !$c->reading('n10'), "Reading n10 is gone" );
 315 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 316
 317 # Combine n21 and n21p0
 318 my $remaining = $c->reading('n21');
 319 $remaining ||= $c->reading('n22');  # one of these should still exist
 320 $c->merge_readings( 'n21p0', $remaining, 1 );
 321 ok( !$c->reading('n21'), "Reading $remaining is gone" );
 322 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
 323
 324 =end testing
 325
 326 =cut
 327
 328 sub merge_readings {
 329         my $self = shift;
 330
 331         # Sanity check
 332         my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
 333         my $mergemeta = $kept_obj->is_meta;
 334         throw( "Cannot merge meta and non-meta reading" )
 335                 unless ( $mergemeta && $del_obj->is_meta )
 336                         || ( !$mergemeta && !$del_obj->is_meta );
 337         if( $mergemeta ) {
 338                 throw( "Cannot merge with start or end node" )
 339                         if( $kept_obj eq $self->start || $kept_obj eq $self->end
 340                                 || $del_obj eq $self->start || $del_obj eq $self->end );
 341                 throw( "Cannot combine text of meta readings" ) if $combine;
 342         }
 343         # We only need the IDs for adding paths to the graph, not the reading
 344         # objects themselves.
 345         my $kept = $kept_obj->id;
 346         my $deleted = $del_obj->id;
 347         $self->_graphcalc_done(0);
 348
 349     # The kept reading should inherit the paths and the relationships
 350     # of the deleted reading.
 351         foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
 352                 my @vector = ( $kept );
 353                 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
 354                 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
 355                 next if $vector[0] eq $vector[1]; # Don't add a self loop
 356                 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
 357                 $self->sequence->add_edge( @vector );
 358                 my $fwits = $self->sequence->get_edge_attributes( @vector );
 359                 @wits{keys %$fwits} = values %$fwits;
 360                 $self->sequence->set_edge_attributes( @vector, \%wits );
 361         }
 362         $self->relations->merge_readings( $kept, $deleted, $combine );
 363
 364         # Do the deletion deed.
 365         if( $combine ) {
 366                 # Combine the text of the readings
 367                 my $joinstr = $combine_char;
 368                 unless( defined $joinstr ) {
 369                         $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
 370                         $joinstr = $self->wordsep unless defined $joinstr;
 371                 }
 372                 $kept_obj->_combine( $del_obj, $joinstr );
 373         }
 374         $self->del_reading( $deleted );
 375 }
 376
 377 =head2 compress_readings
 378
 379 Where possible in the graph, compresses plain sequences of readings into a
 380 single reading. The sequences must consist of readings with no
 381 relationships to other readings, with only a single witness path between
 382 them and no other witness paths from either that would skip the other. The
 383 readings must also not be marked as nonsense or bad grammar.
 384
 385 WARNING: This operation cannot be undone.
 386
 387 =cut
 388
 389 sub compress_readings {
 390         my $self = shift;
 391         # Anywhere in the graph that there is a reading that joins only to a single
 392         # successor, and neither of these have any relationships, just join the two
 393         # readings.
 394         foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
 395                 # Now look for readings that can be joined to their successors.
 396                 next unless $rdg->is_combinable;
 397                 my %seen;
 398                 while( $self->sequence->successors( $rdg ) == 1 ) {
 399                         my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
 400                         throw( "Infinite loop" ) if $seen{$next->id};
 401                         $seen{$next->id} = 1;
 402                         last if $self->sequence->predecessors( $next ) > 1;
 403                         last unless $next->is_combinable;
 404                         say "Joining readings $rdg and $next";
 405                         $self->merge_readings( $rdg, $next, 1 );
 406                 }
 407         }
 408         # Make sure we haven't screwed anything up
 409         foreach my $wit ( $self->tradition->witnesses ) {
 410                 my $pathtext = $self->path_text( $wit->sigil );
 411                 my $origtext = join( ' ', @{$wit->text} );
 412                 throw( "Text differs for witness " . $wit->sigil )
 413                         unless $pathtext eq $origtext;
 414                 if( $wit->is_layered ) {
 415                         $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
 416                         $origtext = join( ' ', @{$wit->layertext} );
 417                         throw( "Ante-corr text differs for witness " . $wit->sigil )
 418                                 unless $pathtext eq $origtext;
 419                 }
 420         }
 421
 422         $self->relations->rebuild_equivalence();
 423         $self->calculate_ranks();
 424 }
 425
 426 # Helper function for manipulating the graph.
 427 sub _stringify_args {
 428         my( $self, $first, $second, @args ) = @_;
 429     $first = $first->id
 430         if ref( $first ) eq 'Text::Tradition::Collation::Reading';
 431     $second = $second->id
 432         if ref( $second ) eq 'Text::Tradition::Collation::Reading';
 433     return( $first, $second, @args );
 434 }
 435
 436 # Helper function for manipulating the graph.
 437 sub _objectify_args {
 438         my( $self, $first, $second, $arg ) = @_;
 439     $first = $self->reading( $first )
 440         unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
 441     $second = $self->reading( $second )
 442         unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
 443     return( $first, $second, $arg );
 444 }
 445 ### Path logic
 446
 447 sub add_path {
 448         my $self = shift;
 449
 450         # We only need the IDs for adding paths to the graph, not the reading
 451         # objects themselves.
 452     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 453
 454         $self->_graphcalc_done(0);
 455         # Connect the readings
 456         unless( $self->sequence->has_edge( $source, $target ) ) {
 457             $self->sequence->add_edge( $source, $target );
 458             $self->relations->add_equivalence_edge( $source, $target );
 459         }
 460     # Note the witness in question
 461     $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 462 }
 463
 464 sub del_path {
 465         my $self = shift;
 466         my @args;
 467         if( ref( $_[0] ) eq 'ARRAY' ) {
 468                 my $e = shift @_;
 469                 @args = ( @$e, @_ );
 470         } else {
 471                 @args = @_;
 472         }
 473
 474         # We only need the IDs for adding paths to the graph, not the reading
 475         # objects themselves.
 476     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 477
 478         $self->_graphcalc_done(0);
 479         if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 480                 $self->sequence->delete_edge_attribute( $source, $target, $wit );
 481         }
 482         unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
 483                 $self->sequence->delete_edge( $source, $target );
 484                 $self->relations->delete_equivalence_edge( $source, $target );
 485         }
 486 }
 487
 488
 489 # Extra graph-alike utility
 490 sub has_path {
 491         my $self = shift;
 492     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 493         return undef unless $self->sequence->has_edge( $source, $target );
 494         return $self->sequence->has_edge_attribute( $source, $target, $wit );
 495 }
 496
 497 =head2 clear_witness( @sigil_list )
 498
 499 Clear the given witnesses out of the collation entirely, removing references
 500 to them in paths, and removing readings that belong only to them.  Should only
 501 be called via $tradition->del_witness.
 502
 503 =cut
 504
 505 sub clear_witness {
 506         my( $self, @sigils ) = @_;
 507
 508         $self->_graphcalc_done(0);
 509         # Clear the witness(es) out of the paths
 510         foreach my $e ( $self->paths ) {
 511                 foreach my $sig ( @sigils ) {
 512                         $self->del_path( $e, $sig );
 513                 }
 514         }
 515
 516         # Clear out the newly unused readings
 517         foreach my $r ( $self->readings ) {
 518                 unless( $self->reading_witnesses( $r ) ) {
 519                         $self->del_reading( $r );
 520                 }
 521         }
 522 }
 523
 524 sub add_relationship {
 525         my $self = shift;
 526     my( $source, $target, $opts ) = $self->_stringify_args( @_ );
 527     my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
 528         $self->_graphcalc_done(0);
 529     return @vectors;
 530 }
 531
 532 around qw/ get_relationship del_relationship / => sub {
 533         my $orig = shift;
 534         my $self = shift;
 535         my @args = @_;
 536         if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
 537                 @args = @{$_[0]};
 538         }
 539         my( $source, $target ) = $self->_stringify_args( @args );
 540         $self->$orig( $source, $target );
 541 };
 542
 543 =head2 reading_witnesses( $reading )
 544
 545 Return a list of sigils corresponding to the witnesses in which the reading appears.
 546
 547 =cut
 548
 549 sub reading_witnesses {
 550         my( $self, $reading ) = @_;
 551         # We need only check either the incoming or the outgoing edges; I have
 552         # arbitrarily chosen "incoming".  Thus, special-case the start node.
 553         if( $reading eq $self->start ) {
 554                 return map { $_->sigil } $self->tradition->witnesses;
 555         }
 556         my %all_witnesses;
 557         foreach my $e ( $self->sequence->edges_to( $reading ) ) {
 558                 my $wits = $self->sequence->get_edge_attributes( @$e );
 559                 @all_witnesses{ keys %$wits } = 1;
 560         }
 561         my $acstr = $self->ac_label;
 562         foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
 563                 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
 564         }
 565         return keys %all_witnesses;
 566 }
 567
 568 =head1 OUTPUT METHODS
 569
 570 =head2 as_svg( \%options )
 571
 572 Returns an SVG string that represents the graph, via as_dot and graphviz.
 573 See as_dot for a list of options.  Must have GraphViz (dot) installed to run.
 574
 575 =cut
 576
 577 sub as_svg {
 578     my( $self, $opts ) = @_;
 579     throw( "Need GraphViz installed to output SVG" )
 580         unless File::Which::which( 'dot' );
 581     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
 582     $self->calculate_ranks()
 583         unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
 584         my @cmd = qw/dot -Tsvg/;
 585         my( $svg, $err );
 586         my $dotfile = File::Temp->new();
 587         ## USE FOR DEBUGGING
 588         # $dotfile->unlink_on_destroy(0);
 589         binmode $dotfile, ':utf8';
 590         print $dotfile $self->as_dot( $opts );
 591         push( @cmd, $dotfile->filename );
 592         run( \@cmd, ">", binary(), \$svg );
 593         $svg = decode_utf8( $svg );
 594         return $svg;
 595 }
 596
 597
 598 =head2 as_dot( \%options )
 599
 600 Returns a string that is the collation graph expressed in dot
 601 (i.e. GraphViz) format.  Options include:
 602
 603 =over 4
 604
 605 =item * from
 606
 607 =item * to
 608
 609 =item * color_common
 610
 611 =back
 612
 613 =cut
 614
 615 sub as_dot {
 616     my( $self, $opts ) = @_;
 617     my $startrank = $opts->{'from'} if $opts;
 618     my $endrank = $opts->{'to'} if $opts;
 619     my $color_common = $opts->{'color_common'} if $opts;
 620     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
 621        && $self->end->rank > 100;
 622     $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 623
 624     # Check the arguments
 625     if( $startrank ) {
 626         return if $endrank && $startrank > $endrank;
 627         return if $startrank > $self->end->rank;
 628         }
 629         if( defined $endrank ) {
 630                 return if $endrank < 0;
 631                 $endrank = undef if $endrank == $self->end->rank;
 632         }
 633
 634     my $graph_name = $self->tradition->name;
 635     $graph_name =~ s/[^\w\s]//g;
 636     $graph_name = join( '_', split( /\s+/, $graph_name ) );
 637
 638     my %graph_attrs = (
 639         'rankdir' => 'LR',
 640         'bgcolor' => 'none',
 641         );
 642     my %node_attrs = (
 643         'fontsize' => 14,
 644         'fillcolor' => 'white',
 645         'style' => 'filled',
 646         'shape' => 'ellipse'
 647         );
 648     my %edge_attrs = (
 649         'arrowhead' => 'open',
 650         'color' => '#000000',
 651         'fontcolor' => '#000000',
 652         );
 653
 654     my $dot = sprintf( "digraph %s {\n", $graph_name );
 655     $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
 656     $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
 657
 658         # Output substitute start/end readings if necessary
 659         if( $startrank ) {
 660                 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
 661         }
 662         if( $endrank ) {
 663                 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
 664         }
 665         if( $STRAIGHTENHACK ) {
 666                 ## HACK part 1
 667                 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
 668                 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
 669                 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
 670         }
 671         my %used;  # Keep track of the readings that actually appear in the graph
 672         # Sort the readings by rank if we have ranks; this speeds layout.
 673         my @all_readings = $self->end->has_rank
 674                 ? sort { $a->rank <=> $b->rank } $self->readings
 675                 : $self->readings;
 676         # TODO Refrain from outputting lacuna nodes - just grey out the edges.
 677     foreach my $reading ( @all_readings ) {
 678         # Only output readings within our rank range.
 679         next if $startrank && $reading->rank < $startrank;
 680         next if $endrank && $reading->rank > $endrank;
 681         $used{$reading->id} = 1;
 682         # Need not output nodes without separate labels
 683         next if $reading->id eq $reading->text;
 684         my $rattrs;
 685         my $label = $reading->text;
 686         $label .= '-' if $reading->join_next;
 687         $label = "-$label" if $reading->join_prior;
 688         $label =~ s/\"/\\\"/g;
 689                 $rattrs->{'label'} = $label;
 690                 $rattrs->{'id'} = $reading->id;
 691                 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
 692         $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
 693     }
 694
 695         # Add the real edges. Need to weight one edge per rank jump, in a
 696         # continuous line.
 697         # my $weighted = $self->_add_edge_weights;
 698     my @edges = $self->paths;
 699         my( %substart, %subend );
 700     foreach my $edge ( @edges ) {
 701         # Do we need to output this edge?
 702         if( $used{$edge->[0]} && $used{$edge->[1]} ) {
 703                 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
 704                         my $variables = { %edge_attrs, 'label' => $label };
 705
 706                         # Account for the rank gap if necessary
 707                         my $rank0 = $self->reading( $edge->[0] )->rank
 708                                 if $self->reading( $edge->[0] )->has_rank;
 709                         my $rank1 = $self->reading( $edge->[1] )->rank
 710                                 if $self->reading( $edge->[1] )->has_rank;
 711                         if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
 712                                 $variables->{'minlen'} = $rank1 - $rank0;
 713                         }
 714
 715                         # Add the calculated edge weights
 716                         # if( exists $weighted->{$edge->[0]}
 717                         #       && $weighted->{$edge->[0]} eq $edge->[1] ) {
 718                         #       # $variables->{'color'} = 'red';
 719                         #       $variables->{'weight'} = 3.0;
 720                         # }
 721
 722                         # EXPERIMENTAL: make edge width reflect no. of witnesses
 723                         my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
 724                         $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
 725
 726                         my $varopts = _dot_attr_string( $variables );
 727                         $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
 728                                 $edge->[0], $edge->[1], $varopts );
 729         } elsif( $used{$edge->[0]} ) {
 730                 $subend{$edge->[0]} = $edge->[1];
 731         } elsif( $used{$edge->[1]} ) {
 732                 $substart{$edge->[1]} = $edge->[0];
 733         }
 734     }
 735     # Add substitute start and end edges if necessary
 736     foreach my $node ( keys %substart ) {
 737         my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
 738         my $variables = { %edge_attrs, 'label' => $witstr };
 739         my $nrdg = $self->reading( $node );
 740         if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
 741                 # Substart is actually one lower than $startrank
 742                 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
 743         }
 744         my $varopts = _dot_attr_string( $variables );
 745         $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
 746         }
 747     foreach my $node ( keys %subend ) {
 748         my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
 749         my $variables = { %edge_attrs, 'label' => $witstr };
 750         my $varopts = _dot_attr_string( $variables );
 751         $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
 752         }
 753         # HACK part 2
 754         if( $STRAIGHTENHACK ) {
 755                 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
 756                 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
 757         }
 758
 759     $dot .= "}\n";
 760     return $dot;
 761 }
 762
 763 sub _dot_attr_string {
 764         my( $hash ) = @_;
 765         my @attrs;
 766         foreach my $k ( sort keys %$hash ) {
 767                 my $v = $hash->{$k};
 768                 push( @attrs, $k.'="'.$v.'"' );
 769         }
 770         return( '[ ' . join( ', ', @attrs ) . ' ]' );
 771 }
 772
 773 sub _add_edge_weights {
 774         my $self = shift;
 775         # Walk the graph from START to END, choosing the successor node with
 776         # the largest number of witness paths each time.
 777         my $weighted = {};
 778         my $curr = $self->start->id;
 779         my $ranked = $self->end->has_rank;
 780         while( $curr ne $self->end->id ) {
 781                 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 782                 my @succ = sort { $self->path_witnesses( $curr, $a )
 783                                                         <=> $self->path_witnesses( $curr, $b ) }
 784                         $self->sequence->successors( $curr );
 785                 my $next = pop @succ;
 786                 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 787                 # Try to avoid lacunae in the weighted path.
 788                 while( @succ &&
 789                            ( $self->reading( $next )->is_lacuna ||
 790                                  $nextrank - $rank > 1 ) ){
 791                         $next = pop @succ;
 792                 }
 793                 $weighted->{$curr} = $next;
 794                 $curr = $next;
 795         }
 796         return $weighted;
 797 }
 798
 799 =head2 path_witnesses( $edge )
 800
 801 Returns the list of sigils whose witnesses are associated with the given edge.
 802 The edge can be passed as either an array or an arrayref of ( $source, $target ).
 803
 804 =cut
 805
 806 sub path_witnesses {
 807         my( $self, @edge ) = @_;
 808         # If edge is an arrayref, cope.
 809         if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
 810                 my $e = shift @edge;
 811                 @edge = @$e;
 812         }
 813         my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
 814         return @wits;
 815 }
 816
 817 # Helper function. Make a display label for the given witnesses, showing a.c.
 818 # witnesses only where the main witness is not also in the list.
 819 sub _path_display_label {
 820         my $self = shift;
 821         my %wits;
 822         map { $wits{$_} = 1 } @_;
 823
 824         # If an a.c. wit is listed, remove it if the main wit is also listed.
 825         # Otherwise keep it for explicit listing.
 826         my $aclabel = $self->ac_label;
 827         my @disp_ac;
 828         foreach my $w ( sort keys %wits ) {
 829                 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
 830                         if( exists $wits{$1} ) {
 831                                 delete $wits{$w};
 832                         } else {
 833                                 push( @disp_ac, $w );
 834                         }
 835                 }
 836         }
 837
 838         # See if we are in a majority situation.
 839         my $maj = scalar( $self->tradition->witnesses ) * 0.6;
 840         $maj = $maj > 5 ? $maj : 5;
 841         if( scalar keys %wits > $maj ) {
 842                 unshift( @disp_ac, 'majority' );
 843                 return join( ', ', @disp_ac );
 844         } else {
 845                 return join( ', ', sort keys %wits );
 846         }
 847 }
 848
 849 =head2 readings_at_rank( $rank )
 850
 851 Returns a list of readings at a given rank, taken from the alignment table.
 852
 853 =cut
 854
 855 sub readings_at_rank {
 856         my( $self, $rank ) = @_;
 857         my $table = $self->alignment_table;
 858         # Table rank is real rank - 1.
 859         my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
 860         my %readings;
 861         foreach my $e ( @elements ) {
 862                 next unless ref( $e ) eq 'HASH';
 863                 next unless exists $e->{'t'};
 864                 $readings{$e->{'t'}->id} = $e->{'t'};
 865         }
 866         return values %readings;
 867 }
 868
 869 =head2 as_graphml
 870
 871 Returns a GraphML representation of the collation.  The GraphML will contain
 872 two graphs. The first expresses the attributes of the readings and the witness
 873 paths that link them; the second expresses the relationships that link the
 874 readings.  This is the native transfer format for a tradition.
 875
 876 =begin testing
 877
 878 use Text::Tradition;
 879 use TryCatch;
 880
 881 my $READINGS = 311;
 882 my $PATHS = 361;
 883
 884 my $datafile = 't/data/florilegium_tei_ps.xml';
 885 my $tradition = Text::Tradition->new( 'input' => 'TEI',
 886                                       'name' => 'test0',
 887                                       'file' => $datafile,
 888                                       'linear' => 1 );
 889
 890 ok( $tradition, "Got a tradition object" );
 891 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
 892 ok( $tradition->collation, "Tradition has a collation" );
 893
 894 my $c = $tradition->collation;
 895 is( scalar $c->readings, $READINGS, "Collation has all readings" );
 896 is( scalar $c->paths, $PATHS, "Collation has all paths" );
 897 is( scalar $c->relationships, 0, "Collation has all relationships" );
 898
 899 # Add a few relationships
 900 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
 901 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
 902 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
 903
 904 # Now write it to GraphML and parse it again.
 905
 906 my $graphml = $c->as_graphml;
 907 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
 908 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
 909 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
 910 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
 911
 912 # Now add a stemma, write to GraphML, and look at the output.
 913 SKIP: {
 914         skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
 915         my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
 916         is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
 917         is( $tradition->stemmata, 1, "Tradition now has the stemma" );
 918         $graphml = $c->as_graphml;
 919         like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
 920 }
 921
 922 # Now add a user, write to GraphML, and look at the output.
 923 unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" );
 924 my $testuser = Text::Tradition::User->new(
 925         id => 'testuser', password => 'testpass' );
 926 is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" );
 927 $testuser->add_tradition( $tradition );
 928 is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" );
 929 $graphml = $c->as_graphml;
 930 like( $graphml, qr/testuser/, "Test user name now exists in GraphML" );
 931
 932 =end testing
 933
 934 =cut
 935
 936 ## TODO MOVE this to Tradition.pm and modularize it better
 937 sub as_graphml {
 938     my( $self, $options ) = @_;
 939         $self->calculate_ranks unless $self->_graphcalc_done;
 940
 941         my $start = $options->{'from'}
 942                 ? $self->reading( $options->{'from'} ) : $self->start;
 943         my $end = $options->{'to'}
 944                 ? $self->reading( $options->{'to'} ) : $self->end;
 945         if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
 946                 throw( 'Start node must be before end node' );
 947         }
 948         # The readings need to be ranked for this to work.
 949         $start = $self->start unless $start->has_rank;
 950         $end = $self->end unless $end->has_rank;
 951         my $rankoffset = 0;
 952         unless( $start eq $self->start ) {
 953                 $rankoffset = $start->rank - 1;
 954         }
 955         my %use_readings;
 956
 957     # Some namespaces
 958     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 959     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 960     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 961         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 962
 963     # Create the document and root node
 964     require XML::LibXML;
 965     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 966     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 967     $graphml->setDocumentElement( $root );
 968     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 969     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 970
 971     # List of attribute types to save on our objects and their corresponding
 972     # GraphML types
 973     my %save_types = (
 974         'Str' => 'string',
 975         'Int' => 'int',
 976         'Bool' => 'boolean',
 977         'ReadingID' => 'string',
 978         'RelationshipType' => 'string',
 979         'RelationshipScope' => 'string',
 980     );
 981
 982     # Add the data keys for the graph. Include an extra key 'version' for the
 983     # GraphML output version.
 984     my %graph_data_keys;
 985     my $gdi = 0;
 986     my %graph_attributes = ( 'version' => 'string' );
 987         # Graph attributes include those of Tradition and those of Collation.
 988         my %gattr_from;
 989         my $tmeta = $self->tradition->meta;
 990         my $cmeta = $self->meta;
 991         map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
 992         map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
 993         foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
 994                 next if $attr->name =~ /^_/;
 995                 next unless $save_types{$attr->type_constraint->name};
 996                 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
 997         }
 998     # Extra custom keys for complex objects that should be saved in some form.
 999     # The subroutine should return a string, or undef/empty.
1000     if( $tmeta->has_method('stemmata') ) {
1001                 $graph_attributes{'stemmata'} = sub {
1002                         my @stemstrs;
1003                         map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1004                                 $self->tradition->stemmata;
1005                         join( "\n", @stemstrs );
1006                 };
1007         }
1008
1009     $graph_attributes{'user'} = sub {
1010         $self->tradition->user ? $self->tradition->user->id : undef
1011     };
1012
1013     foreach my $datum ( sort keys %graph_attributes ) {
1014         $graph_data_keys{$datum} = 'dg'.$gdi++;
1015         my $key = $root->addNewChild( $graphml_ns, 'key' );
1016         my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1017                 : $graph_attributes{$datum};
1018         $key->setAttribute( 'attr.name', $datum );
1019         $key->setAttribute( 'attr.type', $dtype );
1020         $key->setAttribute( 'for', 'graph' );
1021         $key->setAttribute( 'id', $graph_data_keys{$datum} );
1022     }
1023
1024     # Add the data keys for reading nodes
1025     my %reading_attributes;
1026     my $rmeta = Text::Tradition::Collation::Reading->meta;
1027     foreach my $attr( $rmeta->get_all_attributes ) {
1028                 next if $attr->name =~ /^_/;
1029                 next unless $save_types{$attr->type_constraint->name};
1030                 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1031         }
1032         if( $self->start->does('Text::Tradition::Morphology' ) ) {
1033                 # Extra custom key for the reading morphology
1034                 $reading_attributes{'lexemes'} = 'string';
1035         }
1036
1037     my %node_data_keys;
1038     my $ndi = 0;
1039     foreach my $datum ( sort keys %reading_attributes ) {
1040         $node_data_keys{$datum} = 'dn'.$ndi++;
1041         my $key = $root->addNewChild( $graphml_ns, 'key' );
1042         $key->setAttribute( 'attr.name', $datum );
1043         $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1044         $key->setAttribute( 'for', 'node' );
1045         $key->setAttribute( 'id', $node_data_keys{$datum} );
1046     }
1047
1048     # Add the data keys for edges, that is, paths and relationships. Path
1049     # data does not come from a Moose class so is here manually.
1050     my $edi = 0;
1051     my %edge_data_keys;
1052     my %edge_attributes = (
1053         witness => 'string',                    # ID/label for a path
1054         extra => 'boolean',                             # Path key
1055         );
1056     my @path_attributes = keys %edge_attributes; # track our manual additions
1057     my $pmeta = Text::Tradition::Collation::Relationship->meta;
1058     foreach my $attr( $pmeta->get_all_attributes ) {
1059                 next if $attr->name =~ /^_/;
1060                 next unless $save_types{$attr->type_constraint->name};
1061                 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1062         }
1063     foreach my $datum ( sort keys %edge_attributes ) {
1064         $edge_data_keys{$datum} = 'de'.$edi++;
1065         my $key = $root->addNewChild( $graphml_ns, 'key' );
1066         $key->setAttribute( 'attr.name', $datum );
1067         $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1068         $key->setAttribute( 'for', 'edge' );
1069         $key->setAttribute( 'id', $edge_data_keys{$datum} );
1070     }
1071
1072     # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1073     my $xmlidname = $self->tradition->name;
1074     $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1075     if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1076         $xmlidname = '_'.$xmlidname;
1077     }
1078     my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1079     $sgraph->setAttribute( 'edgedefault', 'directed' );
1080     $sgraph->setAttribute( 'id', $xmlidname );
1081     $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1082     $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1083     $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1084     $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1085     $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1086
1087     # Tradition/collation attribute data
1088     foreach my $datum ( keys %graph_attributes ) {
1089         my $value;
1090         if( $datum eq 'version' ) {
1091                 $value = '3.2';
1092         } elsif( ref( $graph_attributes{$datum} ) ) {
1093                 my $sub = $graph_attributes{$datum};
1094                 $value = &$sub();
1095         } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1096                 $value = $self->tradition->$datum;
1097         } else {
1098                 $value = $self->$datum;
1099         }
1100                 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1101         }
1102
1103     my $node_ctr = 0;
1104     my %node_hash;
1105     # Add our readings to the graph
1106     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1107         next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1108                 ( $n->rank < $start->rank || $n->rank > $end->rank );
1109         $use_readings{$n->id} = 1;
1110         # Add to the main graph
1111         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1112         my $node_xmlid = 'n' . $node_ctr++;
1113         $node_hash{ $n->id } = $node_xmlid;
1114         $node_el->setAttribute( 'id', $node_xmlid );
1115         foreach my $d ( keys %reading_attributes ) {
1116                 my $nval = $n->$d;
1117                 # Custom serialization
1118                 if( $d eq 'lexemes' ) {
1119                                 # If nval is a true value, we have lexemes so we need to
1120                                 # serialize them. Otherwise set nval to undef so that the
1121                                 # key is excluded from this reading.
1122                         $nval = $nval ? $n->_serialize_lexemes : undef;
1123                 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1124                         $nval = undef;
1125                 }
1126                 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1127                         # Adjust the ranks within the subgraph.
1128                         $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1129                                 : $nval - $rankoffset;
1130                 }
1131                 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1132                         if defined $nval;
1133         }
1134     }
1135
1136     # Add the path edges to the sequence graph
1137     my $edge_ctr = 0;
1138     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1139         # We add an edge in the graphml for every witness in $e.
1140         next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1141         my @edge_wits = sort $self->path_witnesses( $e );
1142         $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1143         $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1144         # Skip any path from start to end; that witness is not in the subgraph.
1145         next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1146         foreach my $wit ( @edge_wits ) {
1147                         my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1148                                                                                 $node_hash{ $e->[0] },
1149                                                                                 $node_hash{ $e->[1] } );
1150                         my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1151                         $edge_el->setAttribute( 'source', $from );
1152                         $edge_el->setAttribute( 'target', $to );
1153                         $edge_el->setAttribute( 'id', $id );
1154
1155                         # It's a witness path, so add the witness
1156                         my $base = $wit;
1157                         my $key = $edge_data_keys{'witness'};
1158                         # Is this an ante-corr witness?
1159                         my $aclabel = $self->ac_label;
1160                         if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1161                                 # Keep the base witness
1162                                 $base = $1;
1163                                 # ...and record that this is an 'extra' reading path
1164                                 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1165                         }
1166                         _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1167                 }
1168         }
1169
1170         # Report the actual number of nodes and edges that went in
1171         $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1172         $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1173
1174         # Add the relationship graph to the XML
1175         map { delete $edge_data_keys{$_} } @path_attributes;
1176         $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1177                 $node_data_keys{'id'}, \%edge_data_keys );
1178
1179     # Save and return the thing
1180     my $result = decode_utf8( $graphml->toString(1) );
1181     return $result;
1182 }
1183
1184 sub _add_graphml_data {
1185     my( $el, $key, $value ) = @_;
1186     return unless defined $value;
1187     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1188     $data_el->setAttribute( 'key', $key );
1189     $data_el->appendText( $value );
1190 }
1191
1192 =head2 as_csv
1193
1194 Returns a CSV alignment table representation of the collation graph, one
1195 row per witness (or witness uncorrected.)
1196
1197 =cut
1198
1199 sub as_csv {
1200     my( $self ) = @_;
1201     my $table = $self->alignment_table;
1202     my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1203     my @result;
1204     # Make the header row
1205     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1206         push( @result, decode_utf8( $csv->string ) );
1207     # Make the rest of the rows
1208     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1209         my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1210         my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1211         $csv->combine( @row );
1212         push( @result, decode_utf8( $csv->string ) );
1213     }
1214     return join( "\n", @result );
1215 }
1216
1217 =head2 alignment_table( $use_refs, $include_witnesses )
1218
1219 Return a reference to an alignment table, in a slightly enhanced CollateX
1220 format which looks like this:
1221
1222  $table = { alignment => [ { witness => "SIGIL",
1223                              tokens => [ { t => "TEXT" }, ... ] },
1224                            { witness => "SIG2",
1225                              tokens => [ { t => "TEXT" }, ... ] },
1226                            ... ],
1227             length => TEXTLEN };
1228
1229 If $use_refs is set to 1, the reading object is returned in the table
1230 instead of READINGTEXT; if not, the text of the reading is returned.
1231
1232 If $include_witnesses is set to a hashref, only the witnesses whose sigil
1233 keys have a true hash value will be included.
1234
1235 =cut
1236
1237 sub alignment_table {
1238     my( $self ) = @_;
1239     $self->calculate_ranks() unless $self->_graphcalc_done;
1240     return $self->cached_table if $self->has_cached_table;
1241
1242     # Make sure we can do this
1243         throw( "Need a linear graph in order to make an alignment table" )
1244                 unless $self->linear;
1245         $self->calculate_ranks unless $self->end->has_rank;
1246
1247     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1248     my @all_pos = ( 1 .. $self->end->rank - 1 );
1249     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1250         # say STDERR "Making witness row(s) for " . $wit->sigil;
1251         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1252         my @row = _make_witness_row( \@wit_path, \@all_pos );
1253         push( @{$table->{'alignment'}},
1254                 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1255         if( $wit->is_layered ) {
1256                 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1257                         $wit->sigil.$self->ac_label );
1258             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1259                         push( @{$table->{'alignment'}},
1260                                 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
1261         }
1262     }
1263     $self->cached_table( $table );
1264     return $table;
1265 }
1266
1267 sub _make_witness_row {
1268     my( $path, $positions ) = @_;
1269     my %char_hash;
1270     map { $char_hash{$_} = undef } @$positions;
1271     my $debug = 0;
1272     foreach my $rdg ( @$path ) {
1273         say STDERR "rank " . $rdg->rank if $debug;
1274         # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1275         $char_hash{$rdg->rank} = { 't' => $rdg };
1276     }
1277     my @row = map { $char_hash{$_} } @$positions;
1278     # Fill in lacuna markers for undef spots in the row
1279     my $last_el = shift @row;
1280     my @filled_row = ( $last_el );
1281     foreach my $el ( @row ) {
1282         # If we are using node reference, make the lacuna node appear many times
1283         # in the table.  If not, use the lacuna tag.
1284         if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1285             $el = $last_el;
1286         }
1287         push( @filled_row, $el );
1288         $last_el = $el;
1289     }
1290     return @filled_row;
1291 }
1292
1293 =head1 NAVIGATION METHODS
1294
1295 =head2 reading_sequence( $first, $last, $sigil, $backup )
1296
1297 Returns the ordered list of readings, starting with $first and ending
1298 with $last, for the witness given in $sigil. If a $backup sigil is
1299 specified (e.g. when walking a layered witness), it will be used wherever
1300 no $sigil path exists.  If there is a base text reading, that will be
1301 used wherever no path exists for $sigil or $backup.
1302
1303 =cut
1304
1305 # TODO Think about returning some lazy-eval iterator.
1306 # TODO Get rid of backup; we should know from what witness is whether we need it.
1307
1308 sub reading_sequence {
1309     my( $self, $start, $end, $witness ) = @_;
1310
1311     $witness = $self->baselabel unless $witness;
1312     my @readings = ( $start );
1313     my %seen;
1314     my $n = $start;
1315     while( $n && $n->id ne $end->id ) {
1316         if( exists( $seen{$n->id} ) ) {
1317             throw( "Detected loop for $witness at " . $n->id );
1318         }
1319         $seen{$n->id} = 1;
1320
1321         my $next = $self->next_reading( $n, $witness );
1322         unless( $next ) {
1323             throw( "Did not find any path for $witness from reading " . $n->id );
1324         }
1325         push( @readings, $next );
1326         $n = $next;
1327     }
1328     # Check that the last reading is our end reading.
1329     my $last = $readings[$#readings];
1330     throw( "Last reading found from " . $start->text .
1331         " for witness $witness is not the end!" ) # TODO do we get this far?
1332         unless $last->id eq $end->id;
1333
1334     return @readings;
1335 }
1336
1337 =head2 next_reading( $reading, $sigil );
1338
1339 Returns the reading that follows the given reading along the given witness
1340 path.
1341
1342 =cut
1343
1344 sub next_reading {
1345     # Return the successor via the corresponding path.
1346     my $self = shift;
1347     my $answer = $self->_find_linked_reading( 'next', @_ );
1348         return undef unless $answer;
1349     return $self->reading( $answer );
1350 }
1351
1352 =head2 prior_reading( $reading, $sigil )
1353
1354 Returns the reading that precedes the given reading along the given witness
1355 path.
1356
1357 =cut
1358
1359 sub prior_reading {
1360     # Return the predecessor via the corresponding path.
1361     my $self = shift;
1362     my $answer = $self->_find_linked_reading( 'prior', @_ );
1363     return $self->reading( $answer );
1364 }
1365
1366 sub _find_linked_reading {
1367     my( $self, $direction, $node, $path ) = @_;
1368
1369     # Get a backup if we are dealing with a layered witness
1370     my $alt_path;
1371     my $aclabel = $self->ac_label;
1372     if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1373         $alt_path = $1;
1374     }
1375
1376     my @linked_paths = $direction eq 'next'
1377         ? $self->sequence->edges_from( $node )
1378         : $self->sequence->edges_to( $node );
1379     return undef unless scalar( @linked_paths );
1380
1381     # We have to find the linked path that contains all of the
1382     # witnesses supplied in $path.
1383     my( @path_wits, @alt_path_wits );
1384     @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1385     @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1386     my $base_le;
1387     my $alt_le;
1388     foreach my $le ( @linked_paths ) {
1389         if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1390             $base_le = $le;
1391         }
1392                 my @le_wits = sort $self->path_witnesses( $le );
1393                 if( _is_within( \@path_wits, \@le_wits ) ) {
1394                         # This is the right path.
1395                         return $direction eq 'next' ? $le->[1] : $le->[0];
1396                 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1397                         $alt_le = $le;
1398                 }
1399     }
1400     # Got this far? Return the alternate path if it exists.
1401     return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1402         if $alt_le;
1403
1404     # Got this far? Return the base path if it exists.
1405     return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1406         if $base_le;
1407
1408     # Got this far? We have no appropriate path.
1409     warn "Could not find $direction node from " . $node->id
1410         . " along path $path";
1411     return undef;
1412 }
1413
1414 # Some set logic.
1415 sub _is_within {
1416     my( $set1, $set2 ) = @_;
1417     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1418     foreach my $el ( @$set1 ) {
1419         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1420     }
1421     return $ret;
1422 }
1423
1424 # Return the string that joins together a list of witnesses for
1425 # display on a single path.
1426 sub _witnesses_of_label {
1427     my( $self, $label ) = @_;
1428     my $regex = $self->wit_list_separator;
1429     my @answer = split( /\Q$regex\E/, $label );
1430     return @answer;
1431 }
1432
1433 =head2 common_readings
1434
1435 Returns the list of common readings in the graph (i.e. those readings that are
1436 shared by all non-lacunose witnesses.)
1437
1438 =cut
1439
1440 sub common_readings {
1441         my $self = shift;
1442         my @common = grep { $_->is_common } $self->readings;
1443         return @common;
1444 }
1445
1446 =head2 path_text( $sigil, [, $start, $end ] )
1447
1448 Returns the text of a witness (plus its backup, if we are using a layer)
1449 as stored in the collation.  The text is returned as a string, where the
1450 individual readings are joined with spaces and the meta-readings (e.g.
1451 lacunae) are omitted.  Optional specification of $start and $end allows
1452 the generation of a subset of the witness text.
1453
1454 =cut
1455
1456 sub path_text {
1457         my( $self, $wit, $start, $end ) = @_;
1458         $start = $self->start unless $start;
1459         $end = $self->end unless $end;
1460         my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1461         my $pathtext = '';
1462         my $last;
1463         foreach my $r ( @path ) {
1464                 unless ( $r->join_prior || !$last || $last->join_next ) {
1465                         $pathtext .= ' ';
1466                 }
1467                 $pathtext .= $r->text;
1468                 $last = $r;
1469         }
1470         return $pathtext;
1471 }
1472
1473 =head1 INITIALIZATION METHODS
1474
1475 These are mostly for use by parsers.
1476
1477 =head2 make_witness_path( $witness )
1478
1479 Link the array of readings contained in $witness->path (and in
1480 $witness->uncorrected_path if it exists) into collation paths.
1481 Clear out the arrays when finished.
1482
1483 =head2 make_witness_paths
1484
1485 Call make_witness_path for all witnesses in the tradition.
1486
1487 =cut
1488
1489 # For use when a collation is constructed from a base text and an apparatus.
1490 # We have the sequences of readings and just need to add path edges.
1491 # When we are done, clear out the witness path attributes, as they are no
1492 # longer needed.
1493 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1494
1495 sub make_witness_paths {
1496     my( $self ) = @_;
1497     foreach my $wit ( $self->tradition->witnesses ) {
1498         # say STDERR "Making path for " . $wit->sigil;
1499         $self->make_witness_path( $wit );
1500     }
1501 }
1502
1503 sub make_witness_path {
1504     my( $self, $wit ) = @_;
1505     my @chain = @{$wit->path};
1506     my $sig = $wit->sigil;
1507     # Add start and end if necessary
1508     unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1509     push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1510     foreach my $idx ( 0 .. $#chain-1 ) {
1511         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1512     }
1513     if( $wit->is_layered ) {
1514         @chain = @{$wit->uncorrected_path};
1515                 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1516                 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1517         foreach my $idx( 0 .. $#chain-1 ) {
1518             my $source = $chain[$idx];
1519             my $target = $chain[$idx+1];
1520             $self->add_path( $source, $target, $sig.$self->ac_label )
1521                 unless $self->has_path( $source, $target, $sig );
1522         }
1523     }
1524     $wit->clear_path;
1525     $wit->clear_uncorrected_path;
1526 }
1527
1528 =head2 calculate_ranks
1529
1530 Calculate the reading ranks (that is, their aligned positions relative
1531 to each other) for the graph.  This can only be called on linear collations.
1532
1533 =begin testing
1534
1535 use Text::Tradition;
1536
1537 my $cxfile = 't/data/Collatex-16.xml';
1538 my $t = Text::Tradition->new(
1539     'name'  => 'inline',
1540     'input' => 'CollateX',
1541     'file'  => $cxfile,
1542     );
1543 my $c = $t->collation;
1544
1545 # Make an svg
1546 my $table = $c->alignment_table;
1547 ok( $c->has_cached_table, "Alignment table was cached" );
1548 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1549 $c->calculate_ranks;
1550 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1551 $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
1552 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
1553
1554 =end testing
1555
1556 =cut
1557
1558 sub calculate_ranks {
1559     my $self = shift;
1560     # Save the existing ranks, in case we need to invalidate the cached SVG.
1561     my %existing_ranks;
1562     map { $existing_ranks{$_} = $_->rank } $self->readings;
1563
1564     # Do the rankings based on the relationship equivalence graph, starting
1565     # with the start node.
1566     my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1567
1568     # Transfer our rankings from the topological graph to the real one.
1569     foreach my $r ( $self->readings ) {
1570         if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1571             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1572         } else {
1573                 # Die. Find the last rank we calculated.
1574                 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1575                                  <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1576                         $self->readings;
1577                 my $last = pop @all_defined;
1578             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1579         }
1580     }
1581     # Do we need to invalidate the cached data?
1582     if( $self->has_cached_table ) {
1583         foreach my $r ( $self->readings ) {
1584                 next if defined( $existing_ranks{$r} )
1585                         && $existing_ranks{$r} == $r->rank;
1586                 # Something has changed, so clear the cache
1587                 $self->_clear_cache;
1588                         # ...and recalculate the common readings.
1589                         $self->calculate_common_readings();
1590                 last;
1591         }
1592     }
1593         # The graph calculation information is now up to date.
1594         $self->_graphcalc_done(1);
1595 }
1596
1597 sub _clear_cache {
1598         my $self = shift;
1599         $self->wipe_table if $self->has_cached_table;
1600 }
1601
1602
1603 =head2 flatten_ranks
1604
1605 A convenience method for parsing collation data.  Searches the graph for readings
1606 with the same text at the same rank, and merges any that are found.
1607
1608 =cut
1609
1610 sub flatten_ranks {
1611     my $self = shift;
1612     my %unique_rank_rdg;
1613     my $changed;
1614     foreach my $rdg ( $self->readings ) {
1615         next unless $rdg->has_rank;
1616         my $key = $rdg->rank . "||" . $rdg->text;
1617         if( exists $unique_rank_rdg{$key} ) {
1618                 # Make sure they don't have different grammatical forms
1619                         my $ur = $unique_rank_rdg{$key};
1620                 if( $rdg->is_identical( $ur ) ) {
1621                                 # Combine!
1622                                 #say STDERR "Combining readings at same rank: $key";
1623                                 $changed = 1;
1624                                 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1625                                 # TODO see if this now makes a common point.
1626                         }
1627         } else {
1628             $unique_rank_rdg{$key} = $rdg;
1629         }
1630     }
1631     # If we merged readings, the ranks are still fine but the alignment
1632     # table is wrong. Wipe it.
1633     $self->wipe_table() if $changed;
1634 }
1635
1636
1637 =head2 calculate_common_readings
1638
1639 Goes through the graph identifying the readings that appear in every witness
1640 (apart from those with lacunae at that spot.) Marks them as common and returns
1641 the list.
1642
1643 =begin testing
1644
1645 use Text::Tradition;
1646
1647 my $cxfile = 't/data/Collatex-16.xml';
1648 my $t = Text::Tradition->new(
1649     'name'  => 'inline',
1650     'input' => 'CollateX',
1651     'file'  => $cxfile,
1652     );
1653 my $c = $t->collation;
1654
1655 my @common = $c->calculate_common_readings();
1656 is( scalar @common, 8, "Found correct number of common readings" );
1657 my @marked = sort $c->common_readings();
1658 is( scalar @common, 8, "All common readings got marked as such" );
1659 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1660 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1661
1662 =end testing
1663
1664 =cut
1665
1666 sub calculate_common_readings {
1667         my $self = shift;
1668         my @common;
1669         map { $_->is_common( 0 ) } $self->readings;
1670         # Implicitly calls calculate_ranks
1671         my $table = $self->alignment_table;
1672         foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1673                 my @row = map { $_->{'tokens'}->[$idx]
1674                                                         ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1675                                         @{$table->{'alignment'}};
1676                 my %hash;
1677                 foreach my $r ( @row ) {
1678                         if( $r ) {
1679                                 $hash{$r->id} = $r unless $r->is_meta;
1680                         } else {
1681                                 $hash{'UNDEF'} = $r;
1682                         }
1683                 }
1684                 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1685                         my( $r ) = values %hash;
1686                         $r->is_common( 1 );
1687                         push( @common, $r );
1688                 }
1689         }
1690         return @common;
1691 }
1692
1693 =head2 text_from_paths
1694
1695 Calculate the text array for all witnesses from the path, for later consistency
1696 checking.  Only to be used if there is no non-graph-based way to know the
1697 original texts.
1698
1699 =cut
1700
1701 sub text_from_paths {
1702         my $self = shift;
1703     foreach my $wit ( $self->tradition->witnesses ) {
1704         my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1705         my @text;
1706         foreach my $r ( @readings ) {
1707                 next if $r->is_meta;
1708                 push( @text, $r->text );
1709         }
1710         $wit->text( \@text );
1711         if( $wit->is_layered ) {
1712                         my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1713                                                                                                   $wit->sigil.$self->ac_label );
1714                         my @uctext;
1715                         foreach my $r ( @ucrdgs ) {
1716                                 next if $r->is_meta;
1717                                 push( @uctext, $r->text );
1718                         }
1719                         $wit->layertext( \@uctext );
1720         }
1721     }
1722 }
1723
1724 =head1 UTILITY FUNCTIONS
1725
1726 =head2 common_predecessor( $reading_a, $reading_b )
1727
1728 Find the last reading that occurs in sequence before both the given readings.
1729 At the very least this should be $self->start.
1730
1731 =head2 common_successor( $reading_a, $reading_b )
1732
1733 Find the first reading that occurs in sequence after both the given readings.
1734 At the very least this should be $self->end.
1735
1736 =begin testing
1737
1738 use Text::Tradition;
1739
1740 my $cxfile = 't/data/Collatex-16.xml';
1741 my $t = Text::Tradition->new(
1742     'name'  => 'inline',
1743     'input' => 'CollateX',
1744     'file'  => $cxfile,
1745     );
1746 my $c = $t->collation;
1747
1748 is( $c->common_predecessor( 'n24', 'n23' )->id,
1749     'n20', "Found correct common predecessor" );
1750 is( $c->common_successor( 'n24', 'n23' )->id,
1751     '__END__', "Found correct common successor" );
1752
1753 is( $c->common_predecessor( 'n19', 'n17' )->id,
1754     'n16', "Found correct common predecessor for readings on same path" );
1755 is( $c->common_successor( 'n21', 'n10' )->id,
1756     '__END__', "Found correct common successor for readings on same path" );
1757
1758 =end testing
1759
1760 =cut
1761
1762 ## Return the closest reading that is a predecessor of both the given readings.
1763 sub common_predecessor {
1764         my $self = shift;
1765         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1766         return $self->_common_in_path( $r1, $r2, 'predecessors' );
1767 }
1768
1769 sub common_successor {
1770         my $self = shift;
1771         my( $r1, $r2 ) = $self->_objectify_args( @_ );
1772         return $self->_common_in_path( $r1, $r2, 'successors' );
1773 }
1774
1775
1776 # TODO think about how to do this without ranks...
1777 sub _common_in_path {
1778         my( $self, $r1, $r2, $dir ) = @_;
1779         my $iter = $self->end->rank;
1780         my @candidates;
1781         my @last_r1 = ( $r1 );
1782         my @last_r2 = ( $r2 );
1783         # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1784         my %all_seen;
1785         # say STDERR "Finding common $dir for $r1, $r2";
1786         while( !@candidates ) {
1787                 last unless $iter--;  # Avoid looping infinitely
1788                 # Iterate separately down the graph from r1 and r2
1789                 my( @new_lc1, @new_lc2 );
1790                 foreach my $lc ( @last_r1 ) {
1791                         foreach my $p ( $lc->$dir ) {
1792                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1793                                         # say STDERR "Path candidate $p from $lc";
1794                                         push( @candidates, $p );
1795                                 } elsif( !$all_seen{$p->id} ) {
1796                                         $all_seen{$p->id} = 'r1';
1797                                         push( @new_lc1, $p );
1798                                 }
1799                         }
1800                 }
1801                 foreach my $lc ( @last_r2 ) {
1802                         foreach my $p ( $lc->$dir ) {
1803                                 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1804                                         # say STDERR "Path candidate $p from $lc";
1805                                         push( @candidates, $p );
1806                                 } elsif( !$all_seen{$p->id} ) {
1807                                         $all_seen{$p->id} = 'r2';
1808                                         push( @new_lc2, $p );
1809                                 }
1810                         }
1811                 }
1812                 @last_r1 = @new_lc1;
1813                 @last_r2 = @new_lc2;
1814         }
1815         my @answer = sort { $a->rank <=> $b->rank } @candidates;
1816         return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1817 }
1818
1819 sub throw {
1820         Text::Tradition::Error->throw(
1821                 'ident' => 'Collation error',
1822                 'message' => $_[0],
1823                 );
1824 }
1825
1826 no Moose;
1827 __PACKAGE__->meta->make_immutable;
1828
1829 =head1 BUGS/TODO
1830
1831 =over
1832
1833 =item * Rework XML serialization in a more modular way
1834
1835 =back
1836
1837 =head1 LICENSE
1838
1839 This package is free software and is provided "as is" without express
1840 or implied warranty.  You can redistribute it and/or modify it under
1841 the same terms as Perl itself.
1842
1843 =head1 AUTHOR
1844
1845 Tara L Andrews E<lt>aurum@cpan.orgE<gt>