split stemma lib into util and object; make phylip_input microservice

[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm

index 40cc565..4b15dd1 100644 (file)
--- a/lib/Text/Tradition/Collation.pm
+++ b/lib/Text/Tradition/Collation.pm
@@ -1,60 +1,50 @@
 package Text::Tradition::Collation;
 
-use Graph::Easy;
+use Encode qw( decode_utf8 );
+use File::Temp;
+use Graph;
 use IPC::Run qw( run binary );
-use Text::Tradition::Collation::Path;
-use Text::Tradition::Collation::Position;
+use Text::CSV_XS;
 use Text::Tradition::Collation::Reading;
-use Text::Tradition::Collation::Relationship;
-use Text::Tradition::Collation::Segment;
 use XML::LibXML;
 use Moose;
 
-has 'graph' => (
+has 'sequence' => (
     is => 'ro',
-    isa => 'Graph::Easy',
+    isa => 'Graph',
+    default => sub { Graph->new() },
     handles => {
-       add_reading => 'add_node',
-       del_reading => 'del_node',
-       add_path => 'add_edge',
-       del_path => 'del_edge',
-       reading => 'node',
-       path => 'edge',
-       readings => 'nodes',
-       segments => 'nodes',
-       paths => 'edges',
-       relationships => 'edges',
+       paths => 'edges',
     },
-    default => sub { Graph::Easy->new( undirected => 0 ) },
-    );
-               
-
-has 'tradition' => (  # TODO should this not be ro?
-    is => 'rw',
-    isa => 'Text::Tradition',
-    );
-
-has 'svg' => (
-    is => 'ro',
-    isa => 'Str',
-    writer => '_save_svg',
-    predicate => 'has_svg',
     );
+    
+has 'relations' => (
+       is => 'ro',
+       isa => 'Graph',
+       default => sub { Graph->new( undirected => 1 ) },
+    handles => {
+       relationships => 'edges',
+    },
+       );
 
-has 'graphml' => (
+has 'tradition' => (
     is => 'ro',
-    isa => 'Str',
-    writer => '_save_graphml',
-    predicate => 'has_graphml',
+    isa => 'Text::Tradition',
+    weak_ref => 1,
     );
 
-# Keeps track of the lemmas within the collation.  At most one lemma
-# per position in the graph.
-has 'lemmata' => (
-    is => 'ro',
-    isa => 'HashRef[Maybe[Str]]',
+has 'readings' => (
+       isa => 'HashRef[Text::Tradition::Collation::Reading]',
+       traits => ['Hash'],
+    handles => {
+        reading     => 'get',
+        _add_reading => 'set',
+        del_reading => 'delete',
+        has_reading => 'exists',
+        readings   => 'values',
+    },
     default => sub { {} },
-    );
+       );
 
 has 'wit_list_separator' => (
     is => 'rw',
@@ -68,11 +58,6 @@ has 'baselabel' => (
     default => 'base text',
     );
 
-has 'collapsed' => (
-    is => 'rw',
-    isa => 'Bool',
-    );
-
 has 'linear' => (
     is => 'rw',
     isa => 'Bool',
@@ -84,7 +69,20 @@ has 'ac_label' => (
     isa => 'Str',
     default => ' (a.c.)',
     );
-
+    
+has 'start' => (
+       is => 'ro',
+       isa => 'Text::Tradition::Collation::Reading',
+       writer => '_set_start',
+       weak_ref => 1,
+       );
+
+has 'end' => (
+       is => 'ro',
+       isa => 'Text::Tradition::Collation::Reading',
+       writer => '_set_end',
+       weak_ref => 1,
+       );
 
 # The collation can be created two ways:
 # 1. Collate a set of witnesses (with CollateX I guess) and process
@@ -102,156 +100,273 @@ has 'ac_label' => (
 # come through option 1.
 
 sub BUILD {
-    my( $self, $args ) = @_;
-    $self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
-    $self->graph->use_class('edge', 'Text::Tradition::Collation::Path');
-
-    # Pass through any graph-specific options.
-    my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
-    $self->graph->set_attribute( 'node', 'shape', $shape );
+    my $self = shift;
+    $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
+    $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
 }
 
-# Wrapper around add_path 
+### Reading construct/destruct functions
 
-around add_path => sub {
-    my $orig = shift;
-    my $self = shift;
+sub add_reading {
+       my( $self, $reading ) = @_;
+       unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
+               my %args = %$reading;
+               $reading = Text::Tradition::Collation::Reading->new( 
+                       'collation' => $self,
+                       %args );
+       }
+       # First check to see if a reading with this ID exists.
+       if( $self->reading( $reading->id ) ) {
+               warn "Collation already has a reading with id " . $reading->id;
+               return undef;
+       }
+       $self->_add_reading( $reading->id => $reading );
+       # Once the reading has been added, put it in both graphs.
+       $self->sequence->add_vertex( $reading->id );
+       $self->relations->add_vertex( $reading->id );
+       return $reading;
+};
 
-    # Make sure there are three arguments
-    unless( @_ == 3 ) {
-       warn "Call add_path with args source, target, witness";
-       return;
-    }
-    # Make sure the proposed path does not yet exist
-    # NOTE 'reading' will currently return readings and segments
-    my( $source, $target, $wit ) = @_;
-    $source = $self->reading( $source )
-       unless ref( $source ) eq 'Text::Tradition::Collation::Reading';
-    $target = $self->reading( $target )
-       unless ref( $target ) eq 'Text::Tradition::Collation::Reading';
-    foreach my $path ( $source->edges_to( $target ) ) {
-       if( $path->label eq $wit && $path->class eq 'edge.path' ) {
-           return;
+around del_reading => sub {
+       my $orig = shift;
+       my $self = shift;
+       my $arg = shift;
+       
+       if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
+               $arg = $arg->id;
        }
-    }
-    # Do the deed
-    $self->$orig( @_ );
+       # Remove the reading from the graphs.
+       $self->sequence->delete_vertex( $arg );
+       $self->relations->delete_vertex( $arg );
+       
+       # Carry on.
+       $self->$orig( $arg );
 };
 
-# Wrapper around paths
-around paths => sub {
-    my $orig = shift;
-    my $self = shift;
+# merge_readings( $main, $to_be_deleted );
 
-    my @result = grep { $_->sub_class eq 'path' } $self->$orig( @_ );
-    return @result;
-};
+sub merge_readings {
+       my $self = shift;
+
+       # We only need the IDs for adding paths to the graph, not the reading
+       # objects themselves.
+    my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
+
+    # The kept reading should inherit the paths and the relationships
+    # of the deleted reading.
+       foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
+               my @vector = ( $kept );
+               push( @vector, $path->[1] ) if $path->[0] eq $deleted;
+               unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
+               next if $vector[0] eq $vector[1]; # Don't add a self loop
+               my %wits = %{$self->sequence->get_edge_attributes( @$path )};
+               $self->sequence->add_edge( @vector );
+               my $fwits = $self->sequence->get_edge_attributes( @vector );
+               @wits{keys %$fwits} = values %$fwits;
+               $self->sequence->set_edge_attributes( @vector, \%wits );
+       }
+       foreach my $rel ( $self->relations->edges_at( $deleted ) ) {
+               my @vector = ( $kept );
+               push( @vector, $rel->[0] eq $deleted ? $rel->[1] : $rel->[0] );
+               next if $vector[0] eq $vector[1]; # Don't add a self loop
+               # Is there a relationship here already? If so, keep it.
+               # TODO Warn about conflicting relationships
+               next if $self->relations->has_edge( @vector );
+               # If not, adopt the relationship that would be deleted.
+               $self->relations->add_edge( @vector );
+               my $attr = $self->relations->get_edge_attributes( @$rel );
+               $self->relations->set_edge_attributes( @vector, $attr );
+       }
+       
+       # Do the deletion deed.
+       if( $combine_char ) {
+               my $kept_obj = $self->reading( $kept );
+               my $new_text = join( $combine_char, $kept_obj->text, 
+                       $self->reading( $deleted )->text );
+               $kept_obj->alter_text( $new_text );
+       }
+       $self->del_reading( $deleted );
+}
 
-around relationships => sub {
-    my $orig = shift;
-    my $self = shift;
-    my @result = grep { $_->sub_class eq 'relationship' } $self->$orig( @_ );
-    return @result;
-};
 
-around readings => sub {
-    my $orig = shift;
-    my $self = shift;
-    my @result = grep { $_->sub_class ne 'segment' } $self->$orig( @_ );
-    return @result;
-};
+# Helper function for manipulating the graph.
+sub _stringify_args {
+       my( $self, $first, $second, $arg ) = @_;
+    $first = $first->id
+        if ref( $first ) eq 'Text::Tradition::Collation::Reading';
+    $second = $second->id
+        if ref( $second ) eq 'Text::Tradition::Collation::Reading';        
+    return( $first, $second, $arg );
+}
 
-around segments => sub {
-    my $orig = shift;
-    my $self = shift;
-    my @result = grep { $_->sub_class eq 'segment' } $self->$orig( @_ );
-    return @result;
+### Path logic
+
+sub add_path {
+       my $self = shift;
+
+       # We only need the IDs for adding paths to the graph, not the reading
+       # objects themselves.
+    my( $source, $target, $wit ) = $self->_stringify_args( @_ );
+
+       # Connect the readings
+    $self->sequence->add_edge( $source, $target );
+    # Note the witness in question
+    $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
 };
 
-# Wrapper around merge_nodes
+sub del_path {
+       my $self = shift;
+       my @args;
+       if( ref( $_[0] ) eq 'ARRAY' ) {
+               my $e = shift @_;
+               @args = ( @$e, @_ );
+       } else {
+               @args = @_;
+       }
+
+       # We only need the IDs for adding paths to the graph, not the reading
+       # objects themselves.
+    my( $source, $target, $wit ) = $self->_stringify_args( @args );
 
-sub merge_readings {
-    my $self = shift;
-    my $first_node = shift;
-    my $second_node = shift;
-    $first_node->merge_from( $second_node );
-    unshift( @_, $first_node, $second_node );
-    return $self->graph->merge_nodes( @_ );
+       if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
+               $self->sequence->delete_edge_attribute( $source, $target, $wit );
+       }
+       unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
+               $self->sequence->delete_edge( $source, $target );
+       }
 }
 
+
 # Extra graph-alike utility
 sub has_path {
-    my( $self, $source, $target, $label ) = @_;
-    my @paths = $source->edges_to( $target );
-    my @relevant = grep { $_->label eq $label } @paths;
-    return scalar @relevant;
+       my $self = shift;
+    my( $source, $target, $wit ) = $self->_stringify_args( @_ );
+       return undef unless $self->sequence->has_edge( $source, $target );
+       return $self->sequence->has_edge_attribute( $source, $target, $wit );
 }
 
-## Dealing with groups of readings, i.e. segments.
+### Relationship logic
 
-sub add_segment {
-    my( $self, @items ) = @_;
-    my $segment = Text::Tradition::Collation::Segment->new( 'members' => \@items );
-    return $segment;
-}
+=head2 add_relationship( $reading1, $reading2, $definition )
 
-## Dealing with relationships between readings.  This is a different
-## sort of graph edge.  Return a success/failure value and a list of
-## node pairs that have been linked.
+Adds the specified relationship between the two readings.  A relationship
+is transitive (i.e. undirected), and must have the following attributes
+specified in the hashref $definition:
 
-sub add_relationship {
-    my( $self, $source, $target, $options ) = @_;
+=over 4
+
+=item * type - Can be one of spelling, orthographic, grammatical, meaning, lexical, collated, repetition, transposition.  All but the last two are only valid relationships between readings that occur at the same point in the text.
+
+=item * non_correctable - (Optional) True if the reading would not have been corrected independently.
+
+=item * non_independent - (Optional) True if the variant is unlikely to have occurred independently in unrelated witnesses.
 
+=item * global - (Optional) A meta-attribute, to set the same relationship between readings with the same text whenever they occur in the same place.
+
+=back
+
+=cut
+
+# Wouldn't it be lovely if edges could be objects, and all this type checking
+# and attribute management could be done via Moose?
+
+sub add_relationship {
+       my $self = shift;
+    my( $source, $target, $options ) = $self->_stringify_args( @_ );
+
+       # Check the options
+       if( !defined $options->{'type'} ||
+               $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|lexical|collated|repetition|transposition)$/i ) {
+               my $t = $options->{'type'} ? $options->{'type'} : '';
+               return( undef, "Invalid or missing type " . $options->{'type'} );
+       }
+       unless ( $options->{'type'} =~ /^(repetition|transposition)$/ ) {
+               $options->{'colocated'} = 1;
+       }
+       
     # Make sure there is not another relationship between these two
-    # readings or segments already
-    $source = $self->reading( $source )
-       unless ref( $source ) && $source->isa( 'Graph::Easy::Node' );
-    $target = $self->reading( $target )
-       unless ref( $target ) && $target->isa( 'Graph::Easy::Node' );
-    foreach my $rel ( $source->edges_to( $target ), $target->edges_to( $source ) ) {
-       if( $rel->class eq 'edge.relationship' ) {
-           return ( undef, "Relationship already exists between these readings" );
+    # readings already
+    if( $self->relations->has_edge( $source, $target ) ) {
+               return ( undef, "Relationship already exists between these readings" );
+    }
+    if( !$self->relationship_valid( $source, $target, $options->{'type'} ) ) {
+        return ( undef, 'Relationship creates witness loop' );
+    }
+
+       my @vector = ( $source, $target );
+       $self->relations->add_edge( @vector );
+       $self->relations->set_edge_attributes( @vector, $options );
+    
+    # TODO Handle global relationship setting
+
+    return( 1, @vector );
+}
+
+sub relationship_valid {
+    my( $self, $source, $target, $rel ) = @_;
+    if( $rel eq 'repetition' ) {
+       return 1;
+       } elsif ( $rel eq 'transposition' ) {
+               # Check that the two readings do not appear in the same witness.
+               my %seen_wits;
+               map { $seen_wits{$_} = 1 } $self->reading_witnesses( $source );
+               foreach my $w ( $self->reading_witnesses( $target ) ) {
+                       return 0 if $seen_wits{$w};
+               }
+               return 1;
        } else {
-           return ( undef, "There is a witness path between these readings" );
+               # Check that linking the source and target in a relationship won't lead
+               # to a path loop for any witness.  First make a lookup table of all the
+               # readings related to either the source or the target.
+               my @proposed_related = ( $source, $target );
+               push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
+               push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
+               my %pr_ids;
+               map { $pr_ids{ $_ } = 1 } @proposed_related;
+       
+               # None of these proposed related readings should have a neighbor that
+               # is also in proposed_related.
+               foreach my $pr ( keys %pr_ids ) {
+                       foreach my $neighbor( $self->sequence->neighbors( $pr ) ) {
+                               return 0 if exists $pr_ids{$neighbor};
+                       }
+               }               
+               return 1;
        }
-    }
+}
 
-    if( $source->has_position && $target->has_position ) {
-       unless( grep { $_ eq $target } $self->same_position_as( $source ) ) {
-           return( undef, "Cannot set relationship at different positions" );
+# Return a list of the witnesses in which the reading appears.
+sub reading_witnesses {
+       my( $self, $reading ) = @_;
+       # We need only check either the incoming or the outgoing edges; I have
+       # arbitrarily chosen "incoming".  Thus, special-case the start node.
+       if( $reading eq $self->start ) {
+               return map { $_->sigil } $self->tradition->witnesses;
        }
-    }
+       my %all_witnesses;
+       foreach my $e ( $self->sequence->edges_to( $reading ) ) {
+               my $wits = $self->sequence->get_edge_attributes( @$e );
+               @all_witnesses{ keys %$wits } = 1;
+       }
+       return keys %all_witnesses;
+}
 
-    my @joined = ( [ $source->name, $target->name ] );  # Keep track of the nodes we join.
-    
-    $options->{'this_relation'} = [ $source, $target ];
-    my $rel;
-    eval { $rel = Text::Tradition::Collation::Relationship->new( %$options ) };
-    if( $@ ) {
-       return ( undef, $@ );
-    }
-    $self->graph->add_edge( $source, $target, $rel );
-    if( $options->{'global'} ) {
-       # Look for all readings with the source label, and if there are
-       # colocated readings with the target label, join them too.
-       foreach my $r ( grep { $_->label eq $source->label } $self->readings() ) {
-           next if $r->name eq $source->name;
-           my @colocated = grep { $_->label eq $target->label }
-               $self->same_position_as( $r );
-           if( @colocated ) {
-               warn "Multiple readings with same label at same position!"
-                   if @colocated > 1;
-               my $colo = $colocated[0];
-               next if $colo->edges_to( $r ) || $r->edges_to( $colo );
-               $options->{'primary_relation'} = $options->{'this_relation'};
-               $options->{'this_relation'} = [ $r, $colocated[0] ];
-               my $dup_rel = Text::Tradition::Collation::Relationship->new( %$options );
-               $self->graph->add_edge( $r, $colocated[0], $dup_rel );
-               push( @joined, [ $r->name, $colocated[0]->name ] );
-           }
+sub related_readings {
+       my( $self, $reading, $colocated ) = @_;
+       my $return_object;
+       if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
+               $reading = $reading->id;
+               $return_object = 1;
+#              print STDERR "Returning related objects\n";
+#      } else {
+#              print STDERR "Returning related object names\n";
        }
-    }
-    return( 1, @joined );
+       my @related = $self->relations->all_reachable( $reading );
+       if( $colocated ) {
+               my @colo = grep { $self->relations->has_edge_attribute( $reading, $_, 'colocated' ) } @related;
+               @related = @colo;
+       } 
+       return $return_object ? map { $self->reading( $_ ) } @related : @related;
 }
 
 =head2 Output method(s)
@@ -262,25 +377,23 @@ sub add_relationship {
 
 print $graph->as_svg( $recalculate );
 
-Returns an SVG string that represents the graph.  Uses GraphViz to do
-this, because Graph::Easy doesn\'t cope well with long graphs. Unless
-$recalculate is passed (and is a true value), the method will return a
-cached copy of the SVG after the first call to the method.
+Returns an SVG string that represents the graph, via as_dot and graphviz.
 
 =cut
 
 sub as_svg {
-    my( $self, $recalc ) = @_;
-    return $self->svg if $self->has_svg;
-    
-    $self->collapse_graph_paths();
-    
+    my( $self ) = @_;
+        
     my @cmd = qw/dot -Tsvg/;
     my( $svg, $err );
-    my $in = $self->as_dot();
-    run( \@cmd, \$in, ">", binary(), \$svg );
-    $self->_save_svg( $svg );
-    $self->expand_graph_paths();
+    my $dotfile = File::Temp->new();
+    ## TODO REMOVE
+    # $dotfile->unlink_on_destroy(0);
+    binmode $dotfile, ':utf8';
+    print $dotfile $self->as_dot();
+    push( @cmd, $dotfile->filename );
+    run( \@cmd, ">", binary(), \$svg );
+    $svg = decode_utf8( $svg );
     return $svg;
 }
 
@@ -299,36 +412,68 @@ graph is produced.
 
 sub as_dot {
     my( $self, $view ) = @_;
-    $view = 'path' unless $view;
+    $view = 'sequence' unless $view;
     # TODO consider making some of these things configurable
-    my $dot = sprintf( "digraph %s {\n", $self->tradition->name );
+    my $graph_name = $self->tradition->name;
+    $graph_name =~ s/[^\w\s]//g;
+    $graph_name = join( '_', split( /\s+/, $graph_name ) );
+    my $dot = sprintf( "digraph %s {\n", $graph_name );
     $dot .= "\tedge [ arrowhead=open ];\n";
     $dot .= "\tgraph [ rankdir=LR ];\n";
     $dot .= sprintf( "\tnode [ fontsize=%d, fillcolor=%s, style=%s, shape=%s ];\n",
-                    11, "white", "filled", $self->graph->get_attribute( 'node', 'shape' ) );
+                     11, "white", "filled", "ellipse" );
 
     foreach my $reading ( $self->readings ) {
-       # Need not output nodes without separate labels
-       next if $reading->name eq $reading->label;
-       # TODO output readings or segments, but not both
-       next if $reading->class eq 'node.segment';
-       $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->name, $reading->label );
+        # Need not output nodes without separate labels
+        next if $reading->id eq $reading->text;
+        my $label = $reading->text;
+        $label =~ s/\"/\\\"/g;
+        $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
     }
+    
+    # TODO do something sensible for relationships
 
-    my @edges = $view eq 'relationship' ? $self->relationships : $self->paths;
+    my @edges = $self->paths;
     foreach my $edge ( @edges ) {
-       my %variables = ( 'color' => '#000000',
-                         'fontcolor' => '#000000',
-                         'label' => $edge->label,
-           );
-       my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
-       $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n",
-                        $edge->from->name, $edge->to->name, $varopts );
+        my %variables = ( 'color' => '#000000',
+                          'fontcolor' => '#000000',
+                          'label' => join( ', ', $self->path_display_label( $edge ) ),
+            );
+        my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
+        # Account for the rank gap if necessary
+        my $rankgap = $self->reading( $edge->[1] )->rank 
+               - $self->reading( $edge->[0] )->rank;
+               $varopts .= ", minlen=$rankgap" if $rankgap > 1;
+        $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n",
+                         $edge->[0], $edge->[1], $varopts );
     }
     $dot .= "}\n";
     return $dot;
 }
 
+sub path_witnesses {
+       my( $self, @edge ) = @_;
+       # If edge is an arrayref, cope.
+       if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
+               my $e = shift @edge;
+               @edge = @$e;
+       }
+       my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
+       return sort @wits;
+}
+
+sub path_display_label {
+       my( $self, $edge ) = @_;
+       my @wits = $self->path_witnesses( $edge );
+       my $maj = scalar( $self->tradition->witnesses ) * 0.6;
+       if( scalar @wits > $maj ) {
+               return 'majority';
+       } else {
+               return join( ', ', @wits );
+       }
+}
+               
+
 =item B<as_graphml>
 
 print $graph->as_graphml( $recalculate )
@@ -341,14 +486,13 @@ cached copy of the SVG after the first call to the method.
 =cut
 
 sub as_graphml {
-    my( $self, $recalc ) = @_;
-    return $self->graphml if $self->has_graphml;
+    my( $self ) = @_;
 
     # Some namespaces
     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
-       'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
+        'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 
     # Create the document and root node
     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
@@ -357,184 +501,295 @@ sub as_graphml {
     $root->setNamespace( $xsi_ns, 'xsi', 0 );
     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 
-    # TODO Add some global graph data
+    # Add the data keys for the graph
+    my %graph_data_keys;
+    my $gdi = 0;
+    my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
+    foreach my $datum ( @graph_attributes ) {
+       $graph_data_keys{$datum} = 'dg'.$gdi++;
+        my $key = $root->addNewChild( $graphml_ns, 'key' );
+        $key->setAttribute( 'attr.name', $datum );
+        $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
+        $key->setAttribute( 'for', 'graph' );
+        $key->setAttribute( 'id', $graph_data_keys{$datum} );          
+    }
 
     # Add the data keys for nodes
     my %node_data_keys;
     my $ndi = 0;
-    foreach my $datum ( qw/ name reading identical position class / ) {
-       $node_data_keys{$datum} = 'dn'.$ndi++;
-       my $key = $root->addNewChild( $graphml_ns, 'key' );
-       $key->setAttribute( 'attr.name', $datum );
-       $key->setAttribute( 'attr.type', 'string' );
-       $key->setAttribute( 'for', 'node' );
-       $key->setAttribute( 'id', $node_data_keys{$datum} );
+    my %node_data = ( 
+       id => 'string',
+       text => 'string',
+       rank => 'string',
+       is_start => 'boolean',
+       is_end => 'boolean',
+       is_lacuna => 'boolean',
+       );
+    foreach my $datum ( keys %node_data ) {
+        $node_data_keys{$datum} = 'dn'.$ndi++;
+        my $key = $root->addNewChild( $graphml_ns, 'key' );
+        $key->setAttribute( 'attr.name', $datum );
+        $key->setAttribute( 'attr.type', $node_data{$datum} );
+        $key->setAttribute( 'for', 'node' );
+        $key->setAttribute( 'id', $node_data_keys{$datum} );
     }
 
     # Add the data keys for edges, i.e. witnesses
     my $edi = 0;
     my %edge_data_keys;
-    foreach my $edge_key( qw/ witness_main witness_ante_corr relationship class / ) {
-       $edge_data_keys{$edge_key} = 'de'.$edi++;
-       my $key = $root->addNewChild( $graphml_ns, 'key' );
-       $key->setAttribute( 'attr.name', $edge_key );
-       $key->setAttribute( 'attr.type', 'string' );
-       $key->setAttribute( 'for', 'edge' );
-       $key->setAttribute( 'id', $edge_data_keys{$edge_key} );
+    my %edge_data = (
+       witness => 'string',                    # ID/label for a path
+       relationship => 'string',               # ID/label for a relationship
+       extra => 'boolean',                             # Path key
+       colocated => 'boolean',                 # Relationship key
+       non_correctable => 'boolean',   # Relationship key
+       non_independent => 'boolean',   # Relationship key
+       );
+    foreach my $datum ( keys %edge_data ) {
+        $edge_data_keys{$datum} = 'de'.$edi++;
+        my $key = $root->addNewChild( $graphml_ns, 'key' );
+        $key->setAttribute( 'attr.name', $datum );
+        $key->setAttribute( 'attr.type', $edge_data{$datum} );
+        $key->setAttribute( 'for', 'edge' );
+        $key->setAttribute( 'id', $edge_data_keys{$datum} );
     }
+
+    # Add the collation graphs themselves
+    my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
+    $sgraph->setAttribute( 'edgedefault', 'directed' );
+    $sgraph->setAttribute( 'id', $self->tradition->name );
+    $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
+    $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
+    $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
+    $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
+    $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
+    
+    my $rgraph = $root->addNewChild( $graphml_ns, 'graph' );
+    $rgraph->setAttribute( 'edgedefault', 'undirected' );
+    $rgraph->setAttribute( 'id', 'relationships' );
+    $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
+    $rgraph->setAttribute( 'parse.edges', scalar($self->relationships) );
+    $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
+    $rgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
+    $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
     
-    # Add the graph, its nodes, and its edges
-    my $graph = $root->addNewChild( $graphml_ns, 'graph' );
-    $graph->setAttribute( 'edgedefault', 'directed' );
-    $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
-    $graph->setAttribute( 'parse.edgeids', 'canonical' );
-    $graph->setAttribute( 'parse.edges', scalar($self->paths) );
-    $graph->setAttribute( 'parse.nodeids', 'canonical' );
-    $graph->setAttribute( 'parse.nodes', scalar($self->readings) );
-    $graph->setAttribute( 'parse.order', 'nodesfirst' );
+    # Collation attribute data
+    foreach my $datum ( @graph_attributes ) {
+       my $value = $datum eq 'version' ? '3.0' : $self->$datum;
+               _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
+       }
 
     my $node_ctr = 0;
     my %node_hash;
-    # Add our readings to the graph
-    foreach my $n ( sort { $a->name cmp $b->name } $self->readings ) {
-       my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
-       my $node_xmlid = 'n' . $node_ctr++;
-       $node_hash{ $n->name } = $node_xmlid;
-       $node_el->setAttribute( 'id', $node_xmlid );
-       _add_graphml_data( $node_el, $node_data_keys{'name'}, $n->name );
-       _add_graphml_data( $node_el, $node_data_keys{'reading'}, $n->label );
-       _add_graphml_data( $node_el, $node_data_keys{'position'}, $n->position->reference )
-           if $n->has_position;
-       _add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
-       _add_graphml_data( $node_el, $node_data_keys{'identical'}, $n->primary->name )
-           if $n->has_primary;
-    }
-
-    # Add any segments we have
-    foreach my $n ( sort { $a->name cmp $b->name } $self->segments ) {
-       my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
-       my $node_xmlid = 'n' . $node_ctr++;
-       $node_hash{ $n->name } = $node_xmlid;
-       $node_el->setAttribute( 'id', $node_xmlid );
-       _add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
-       _add_graphml_data( $node_el, $node_data_keys{'name'}, $n->name );
+    # Add our readings to the graphs
+    foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
+       # Add to the main graph
+        my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
+        my $node_xmlid = 'n' . $node_ctr++;
+        $node_hash{ $n->id } = $node_xmlid;
+        $node_el->setAttribute( 'id', $node_xmlid );
+        foreach my $d ( keys %node_data ) {
+               my $nval = $n->$d;
+               _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
+                       if defined $nval;
+        }
+        # Add to the relationships graph
+        my $rnode_el = $rgraph->addNewChild( $graphml_ns, 'node' );
+        $rnode_el->setAttribute( 'id', $node_xmlid );
     }
 
-    # Add the path, relationship, and segment edges
+    # Add the path edges to the sequence graph
     my $edge_ctr = 0;
-    foreach my $e ( sort { $a->from->name cmp $b->from->name } $self->graph->edges() ) {
-       my( $name, $from, $to ) = ( 'e'.$edge_ctr++,
-                                   $node_hash{ $e->from->name() },
-                                   $node_hash{ $e->to->name() } );
-       my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
-       $edge_el->setAttribute( 'source', $from );
-       $edge_el->setAttribute( 'target', $to );
-       $edge_el->setAttribute( 'id', $name );
-       # Add the edge class
-       _add_graphml_data( $edge_el, $edge_data_keys{'class'}, $e->sub_class );
-       if( $e->sub_class eq 'path' ) {
-           # It's a witness path, so add the witness
-           my $base = $e->label;
-           my $key = $edge_data_keys{'witness_main'};
-           # TODO kind of hacky
-           if( $e->label =~ /^(.*?)\s+(\(a\.c\.\))$/ ) {
-               $base = $1;
-               $key = $edge_data_keys{'witness_ante_corr'};
-           }
-           _add_graphml_data( $edge_el, $key, $base );
-       } elsif( $e->sub_class eq 'relationship' ) {
-           # It's a relationship
-           _add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $e->label );
-       } # else a segment, nothing to record but source, target, class
+    foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
+       # We add an edge in the graphml for every witness in $e.
+       foreach my $wit ( $self->path_witnesses( $e ) ) {
+                       my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
+                                                                               $node_hash{ $e->[0] },
+                                                                               $node_hash{ $e->[1] } );
+                       my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
+                       $edge_el->setAttribute( 'source', $from );
+                       $edge_el->setAttribute( 'target', $to );
+                       $edge_el->setAttribute( 'id', $id );
+                       
+                       # It's a witness path, so add the witness
+                       my $base = $wit;
+                       my $key = $edge_data_keys{'witness'};
+                       # Is this an ante-corr witness?
+                       my $aclabel = $self->ac_label;
+                       if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
+                               # Keep the base witness
+                               $base = $1;
+                               # ...and record that this is an 'extra' reading path
+                               _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
+                       }
+                       _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
+               }
+       }
+       
+       # Add the relationship edges to the relationships graph
+       foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->relationships ) {
+               my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
+                                                                       $node_hash{ $e->[0] },
+                                                                       $node_hash{ $e->[1] } );
+               my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
+               $edge_el->setAttribute( 'source', $from );
+               $edge_el->setAttribute( 'target', $to );
+               $edge_el->setAttribute( 'id', $id );
+               
+               my $data = $self->relations->get_edge_attributes( @$e );
+               # It's a relationship, so save the relationship data
+               _add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $data->{type} );
+               _add_graphml_data( $edge_el, $edge_data_keys{'colocated'}, $data->{colocated} );
+               if( exists $data->{non_correctable} ) {
+                       _add_graphml_data( $edge_el, $edge_data_keys{'non_correctable'}, 
+                               $data->{non_correctable} );
+               }
+               if( exists $data->{non_independent} ) {
+                       _add_graphml_data( $edge_el, $edge_data_keys{'non_independent'}, 
+                               $data->{non_independent} );
+               }
     }
 
-    # Return the thing
-    $self->_save_graphml( $graphml->toString(1) );
-    return $graphml->toString(1);
+    # Save and return the thing
+    my $result = decode_utf8( $graphml->toString(1) );
+    return $result;
 }
 
 sub _add_graphml_data {
     my( $el, $key, $value ) = @_;
-    my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
     return unless defined $value;
+    my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
     $data_el->setAttribute( 'key', $key );
     $data_el->appendText( $value );
 }
 
-sub collapse_graph_paths {
-    my $self = shift;
-    # Our collation graph has an path per witness.  This is great for
-    # calculation purposes, but terrible for display.  Thus we want to
-    # display only one path between any two nodes.
-
-    return if $self->collapsed;
-
-    print STDERR "Collapsing witness paths in graph...\n";
-
-    # Don't list out every witness if we have more than half to list.
-    my $majority = int( scalar( @{$self->tradition->witnesses} ) / 2 ) + 1;
-    # But don't compress if there are only a few witnesses.
-    $majority = 4 if $majority < 4;
-    foreach my $node ( $self->readings ) {
-       my $newlabels = {};
-       # We will visit each node, so we only look ahead.
-       foreach my $edge ( $node->outgoing() ) {
-           next unless $edge->class eq 'edge.path';
-           add_hash_entry( $newlabels, $edge->to->name, $edge->name );
-           $self->del_path( $edge );
-       }
+=item B<as_csv>
 
-       foreach my $newdest ( keys %$newlabels ) {
-           my $label;
-           my @compressed_wits = ();
-           if( @{$newlabels->{$newdest}} < $majority ) {
-               $label = join( ', ', sort( @{$newlabels->{$newdest}} ) );
-           } else {
-               ## TODO FIX THIS HACK
-               my @aclabels;
-               foreach my $wit ( @{$newlabels->{$newdest}} ) {
-                   if( $wit =~ /^(.*?)(\s*\(?a\.\s*c\.\)?)$/ ) {
-                       push( @aclabels, $wit );
-                   } else {
-                       push( @compressed_wits, $wit );
-                   }
-               }
-               $label = join( ', ', 'majority', sort( @aclabels ) );
-           }
-           
-           my $newpath = 
-               $self->add_path( $node, $self->reading( $newdest ), $label );
-           if( @compressed_wits ) {
-               $newpath->hidden_witnesses( \@compressed_wits );
-           }
-       }
+print $graph->as_csv( $recalculate )
+
+Returns a CSV alignment table representation of the collation graph, one
+row per witness (or witness uncorrected.) 
+
+=cut
+
+sub as_csv {
+    my( $self ) = @_;
+    my $table = $self->make_alignment_table;
+    my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );    
+    my @result;
+    # Make the header row
+    $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
+       push( @result, decode_utf8( $csv->string ) );
+    # Make the rest of the rows
+    foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
+       my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
+       my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
+        $csv->combine( @row );
+        push( @result, decode_utf8( $csv->string ) );
     }
+    return join( "\n", @result );
+}
+
+=item B<make_alignment_table>
+
+my $table = $graph->make_alignment_table( $use_refs, \@wits_to_include )
+
+Return a reference to an alignment table, in a slightly enhanced CollateX
+format which looks like this:
+
+ $table = { alignment => [ { witness => "SIGIL", 
+                             tokens => [ { t => "READINGTEXT" }, ... ] },
+                           { witness => "SIG2", 
+                             tokens => [ { t => "READINGTEXT" }, ... ] },
+                           ... ],
+            length => TEXTLEN };
 
-    $self->collapsed( 1 );
+If $use_refs is set to 1, the reading object is returned in the table 
+instead of READINGTEXT; if not, the text of the reading is returned.
+If $wits_to_include is set to a hashref, only the witnesses whose sigil
+keys have a true hash value will be included.
+
+=cut
+
+sub make_alignment_table {
+    my( $self, $noderefs, $include ) = @_;
+    unless( $self->linear ) {
+        warn "Need a linear graph in order to make an alignment table";
+        return;
+    }
+    my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
+    my @all_pos = ( 1 .. $self->end->rank - 1 );
+    foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
+       if( $include ) {
+               next unless $include->{$wit->sigil};
+       }
+        # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
+        my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
+        my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
+        push( @{$table->{'alignment'}}, 
+               { 'witness' => $wit->sigil, 'tokens' => \@row } );
+        if( $wit->is_layered ) {
+               my @wit_ac_path = $self->reading_sequence( $self->start, $self->end, 
+                       $wit->sigil.$self->ac_label, $wit->sigil );
+            my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
+                       push( @{$table->{'alignment'}},
+                               { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
+        }           
+    }
+       return $table;
 }
 
-sub expand_graph_paths {
-    my $self = shift;
-    # Our collation graph has only one path between any two nodes.
-    # This is great for display, but not so great for analysis.
-    # Expand this so that each witness has its own path between any
-    # two reading nodes.
-    return unless $self->collapsed;
-    
-    print STDERR "Expanding witness paths in graph...\n";
-    foreach my $path( $self->paths ) {
-       my $from = $path->from;
-       my $to = $path->to;
-       my @wits = split( /, /, $path->label );
-       if( $path->has_hidden_witnesses ) {
-           push( @wits, @{$path->hidden_witnesses} );
-       }
-       $self->del_path( $path );
-       foreach ( @wits ) {
-           $self->add_path( $from, $to, $_ );
-       }
+sub _make_witness_row {
+    my( $path, $positions, $noderefs ) = @_;
+    my %char_hash;
+    map { $char_hash{$_} = undef } @$positions;
+    my $debug = 0;
+    foreach my $rdg ( @$path ) {
+        my $rtext = $rdg->text;
+        $rtext = '#LACUNA#' if $rdg->is_lacuna;
+        print STDERR "rank " . $rdg->rank . "\n" if $debug;
+        # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
+        $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg } 
+                                                                          : { 't' => $rtext };
+    }
+    my @row = map { $char_hash{$_} } @$positions;
+    # Fill in lacuna markers for undef spots in the row
+    my $last_el = shift @row;
+    my @filled_row = ( $last_el );
+    foreach my $el ( @row ) {
+        # If we are using node reference, make the lacuna node appear many times
+        # in the table.  If not, use the lacuna tag.
+        if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
+            $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
+        }
+        push( @filled_row, $el );
+        $last_el = $el;
+    }
+    return @filled_row;
+}
+
+# Tiny utility function to say if a table element is a lacuna
+sub _el_is_lacuna {
+    my $el = shift;
+    return 1 if $el->{'t'} eq '#LACUNA#';
+    return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
+        && $el->{'t'}->is_lacuna;
+    return 0;
+}
+
+# Helper to turn the witnesses along columns rather than rows.  Assumes
+# equal-sized rows.
+sub _turn_table {
+    my( $table ) = @_;
+    my $result = [];
+    return $result unless scalar @$table;
+    my $nrows = scalar @{$table->[0]};
+    foreach my $idx ( 0 .. $nrows - 1 ) {
+        foreach my $wit ( 0 .. $#{$table} ) {
+            $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
+        }
     }
-    $self->collapsed( 0 );
+    return $result;        
 }
 
 =back
@@ -549,18 +804,12 @@ my $beginning = $collation->start();
 
 Returns the beginning of the collation, a meta-reading with label '#START#'.
 
-=cut
+=item B<end>
+
+my $end = $collation->end();
+
+Returns the end of the collation, a meta-reading with label '#END#'.
 
-sub start {
-    # Return the beginning reading of the graph.
-    my $self = shift;
-    my( $new_start ) = @_;
-    if( $new_start ) {
-       $self->del_reading( '#START#' );
-       $self->graph->rename_node( $new_start, '#START#' );
-    }
-    return $self->reading('#START#');
-}
 
 =item B<reading_sequence>
 
@@ -572,6 +821,8 @@ assume that the path is that of the base text (if any.)
 
 =cut
 
+# TODO Think about returning some lazy-eval iterator.
+
 sub reading_sequence {
     my( $self, $start, $end, $witness, $backup ) = @_;
 
@@ -579,24 +830,26 @@ sub reading_sequence {
     my @readings = ( $start );
     my %seen;
     my $n = $start;
-    while( $n && $n ne $end ) {
-       if( exists( $seen{$n->name()} ) ) {
-           warn "Detected loop at " . $n->name();
-           last;
-       }
-       $seen{$n->name()} = 1;
-       
-       my $next = $self->next_reading( $n, $witness, $backup );
-       warn "Did not find any path for $witness from reading " . $n->name
-           unless $next;
-       push( @readings, $next );
-       $n = $next;
+    while( $n && $n->id ne $end->id ) {
+        if( exists( $seen{$n->id} ) ) {
+            warn "Detected loop at " . $n->id;
+            last;
+        }
+        $seen{$n->id} = 1;
+        
+        my $next = $self->next_reading( $n, $witness, $backup );
+        unless( $next ) {
+            warn "Did not find any path for $witness from reading " . $n->id;
+            last;
+        }
+        push( @readings, $next );
+        $n = $next;
     }
     # Check that the last reading is our end reading.
     my $last = $readings[$#readings];
-    warn "Last reading found from " . $start->label() .
-       " for witness $witness is not the end!"
-       unless $last eq $end;
+    warn "Last reading found from " . $start->text .
+        " for witness $witness is not the end!"
+        unless $last->id eq $end->id;
     
     return @readings;
 }
@@ -613,7 +866,9 @@ path.
 sub next_reading {
     # Return the successor via the corresponding path.
     my $self = shift;
-    return $self->_find_linked_reading( 'next', @_ );
+    my $answer = $self->_find_linked_reading( 'next', @_ );
+       return undef unless $answer;
+    return $self->reading( $answer );
 }
 
 =item B<prior_reading>
@@ -628,46 +883,47 @@ path.
 sub prior_reading {
     # Return the predecessor via the corresponding path.
     my $self = shift;
-    return $self->_find_linked_reading( 'prior', @_ );
+    my $answer = $self->_find_linked_reading( 'prior', @_ );
+    return $self->reading( $answer );
 }
 
 sub _find_linked_reading {
     my( $self, $direction, $node, $path, $alt_path ) = @_;
     my @linked_paths = $direction eq 'next' 
-       ? $node->outgoing() : $node->incoming();
+        ? $self->sequence->edges_from( $node ) 
+        : $self->sequence->edges_to( $node );
     return undef unless scalar( @linked_paths );
     
     # We have to find the linked path that contains all of the
     # witnesses supplied in $path.
     my( @path_wits, @alt_path_wits );
-    @path_wits = $self->witnesses_of_label( $path ) if $path;
-    @alt_path_wits = $self->witnesses_of_label( $alt_path ) if $alt_path;
+    @path_wits = sort( $self->witnesses_of_label( $path ) ) if $path;
+    @alt_path_wits = sort( $self->witnesses_of_label( $alt_path ) ) if $alt_path;
     my $base_le;
     my $alt_le;
     foreach my $le ( @linked_paths ) {
-       if( $le->name eq $self->baselabel ) {
-           $base_le = $le;
-       } else {
-           my @le_wits = $self->witnesses_of_label( $le->name );
-           if( _is_within( \@path_wits, \@le_wits ) ) {
-               # This is the right path.
-               return $direction eq 'next' ? $le->to() : $le->from();
-           } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
-               $alt_le = $le;
-           }
-       }
+        if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
+            $base_le = $le;
+        }
+               my @le_wits = $self->path_witnesses( $le );
+               if( _is_within( \@path_wits, \@le_wits ) ) {
+                       # This is the right path.
+                       return $direction eq 'next' ? $le->[1] : $le->[0];
+               } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
+                       $alt_le = $le;
+               }
     }
     # Got this far? Return the alternate path if it exists.
-    return $direction eq 'next' ? $alt_le->to() : $alt_le->from()
-       if $alt_le;
+    return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
+        if $alt_le;
 
     # Got this far? Return the base path if it exists.
-    return $direction eq 'next' ? $base_le->to() : $base_le->from()
-       if $base_le;
+    return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
+        if $base_le;
 
     # Got this far? We have no appropriate path.
-    warn "Could not find $direction node from " . $node->label 
-       . " along path $path";
+    warn "Could not find $direction node from " . $node->id 
+        . " along path $path";
     return undef;
 }
 
@@ -676,89 +932,26 @@ sub _is_within {
     my( $set1, $set2 ) = @_;
     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
     foreach my $el ( @$set1 ) {
-       $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
+        $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
     }
     return $ret;
 }
 
 
 ## INITIALIZATION METHODS - for use by parsers
-# Walk the paths for each witness in the graph, and return the nodes
-# that the graph has in common.  If $using_base is true, some 
-# different logic is needed.
-
-sub walk_witness_paths {
-    my( $self, $end ) = @_;
-    # For each witness, walk the path through the graph.
-    # Then we need to find the common nodes.  
-    # TODO This method is going to fall down if we have a very gappy 
-    # text in the collation.
-    my $paths = {};
-    my @common_readings;
-    foreach my $wit ( @{$self->tradition->witnesses} ) {
-       my $curr_reading = $self->start;
-       my @wit_path = $self->reading_sequence( $self->start, $end, 
-                                               $wit->sigil );
-       $wit->path( \@wit_path );
-
-       # Detect the common readings.
-       @common_readings = _find_common( \@common_readings, \@wit_path );
-    }
-
-    # Mark all the nodes as either common or not.
-    foreach my $cn ( @common_readings ) {
-       print STDERR "Setting " . $cn->name . " / " . $cn->label 
-           . " as common node\n";
-       $cn->make_common;
-    }
-    foreach my $n ( $self->readings() ) {
-       $n->make_variant unless $n->is_common;
-    }
-    # Return an array of the common nodes in order.
-    return @common_readings;
-}
-
-sub _find_common {
-    my( $common_readings, $new_path ) = @_;
-    my @cr;
-    if( @$common_readings ) {
-       foreach my $n ( @$new_path ) {
-           push( @cr, $n ) if grep { $_ eq $n } @$common_readings;
-       }
-    } else {
-       push( @cr, @$new_path );
-    }
-    return @cr;
-}
-
-sub _remove_common {
-    my( $common_readings, $divergence ) = @_;
-    my @cr;
-    my %diverged;
-    map { $diverged{$_->name} = 1 } @$divergence;
-    foreach( @$common_readings ) {
-       push( @cr, $_ ) unless $diverged{$_->name};
-    }
-    return @cr;
-}
-
 
-# An alternative to walk_witness_paths, for use when a collation is
-# constructed from a base text and an apparatus.  We have the
-# sequences of readings and just need to add path edges.
+# For use when a collation is constructed from a base text and an apparatus.
+# We have the sequences of readings and just need to add path edges.
+# When we are done, clear out the witness path attributes, as they are no
+# longer needed.
+# TODO Find a way to replace the witness path attributes with encapsulated functions?
 
 sub make_witness_paths {
     my( $self ) = @_;
-
-    my @common_readings;
-    foreach my $wit ( @{$self->tradition->witnesses} ) {
-       print STDERR "Making path for " . $wit->sigil . "\n";
-       $self->make_witness_path( $wit );
-       @common_readings = _find_common( \@common_readings, $wit->path );
-       @common_readings = _find_common( \@common_readings, $wit->uncorrected_path );
+    foreach my $wit ( $self->tradition->witnesses ) {
+        # print STDERR "Making path for " . $wit->sigil . "\n";
+        $self->make_witness_path( $wit );
     }
-    map { $_->make_common } @common_readings;
-    return @common_readings;
 }
 
 sub make_witness_path {
@@ -766,370 +959,136 @@ sub make_witness_path {
     my @chain = @{$wit->path};
     my $sig = $wit->sigil;
     foreach my $idx ( 0 .. $#chain-1 ) {
-       $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
+        $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
     }
-    @chain = @{$wit->uncorrected_path};
-    foreach my $idx( 0 .. $#chain-1 ) {
-       my $source = $chain[$idx];
-       my $target = $chain[$idx+1];
-       $self->add_path( $source, $target, $sig.$self->ac_label )
-           unless $self->has_path( $source, $target, $sig );
+    if( $wit->is_layered ) {
+        @chain = @{$wit->uncorrected_path};
+        foreach my $idx( 0 .. $#chain-1 ) {
+            my $source = $chain[$idx];
+            my $target = $chain[$idx+1];
+            $self->add_path( $source, $target, $sig.$self->ac_label )
+                unless $self->has_path( $source, $target, $sig );
+        }
     }
+    $wit->clear_path;
+    $wit->clear_uncorrected_path;
 }
 
-sub common_readings {
+sub calculate_ranks {
     my $self = shift;
-    my @common = grep { $_->is_common } $self->readings();
-    return sort { $a->position->cmp_with( $b->position ) } @common;
-}
-
-# Calculate the relative positions of nodes in the graph, if they
-# were not given to us.
-sub calculate_positions {
-    my( $self, @ordered_common ) = @_;
-
-    # First assign positions to all the common nodes.
-    my $l = 1;
-    foreach my $oc ( @ordered_common ) {
-       $oc->position( $l++, 1 );
-    }
-
-    if( $self->linear ) {
-       # For the space between each common node, we have to find all the chains
-       # from all the witnesses.  The longest chain gives us our max, and the
-       # others get min/max ranges to fit.
-       my $first = shift @ordered_common;
-       while( @ordered_common ) {
-           my %paths;
-           my $next = shift @ordered_common;
-           my $longest = 0;
-           foreach my $wit ( @{$self->tradition->witnesses} ) {
-               # Key to the path is not important; we just have to get
-               # all unique paths.
-               my $length = $self->_track_paths( \%paths, $first, $next, $wit->sigil );
-               $longest = $length unless $longest > $length;
-               if( $wit->has_ante_corr ) {
-                   my $length = $self->_track_paths( \%paths, $first, $next, 
-                                                     $wit->sigil.$self->ac_label, $wit->sigil );
-                   $longest = $length unless $longest > $length;
-               }
-           }
-           
-           # Transform the path values from unique strings to arrays.
-           my @all_paths;
-           foreach my $k ( keys %paths ) {
-               my @v = split( /\s+/, $k );
-               push( @all_paths, \@v );
-           }
-           @all_paths = sort { scalar @$b <=> scalar @$a } @all_paths;
-           
-           # Now %paths has all the unique paths, and we know how long the
-           # longest of these is.  Assign positions, starting with the
-           # longest.  All non-common positions start at 2.
-           foreach my $path ( @all_paths  ) {
-               # Initially each element has a minimum position of 2
-               # plus its position in the array (1 is the common
-               # node), and a max position of the longest array
-               # length minus its position in the array.
-               my $range = $longest - scalar @$path;
-               my $min = 2;
-               foreach my $i ( 0 .. $#{$path} ) {
-                   my $rdg = $self->reading( $path->[$i] );
-                   if( $rdg->has_position ) {
-                       # This reading has already had a more specific
-                       # position set, so we need to take that into
-                       # account when calculating the min and max for
-                       # the next reading.
-                       my $rangeminus = $rdg->position->min - $min;
-                       $min = $rdg->position->min + 1; 
-                       $range = $range - $rangeminus;
-                       if( $range < 0 ) {
-                           print STDERR "Negative range for position! " . $rdg->name . "\n"; # May remove this warning
-                           $range = 0;
-                       }
-                   } else {
-                       $rdg->position( $first->position->common, $min, $min+$range );
-                       $min++;
-                       $longest = $min+$range-2 unless $longest+2 > $min+$range;  # min starts at 2 but longest assumes 0 start
-                   }
-               }
-           }
-           # Now go through again and make sure the positions are
-           # monotonic.  Do this until they are.
-           my $monotonic = 0;
-           my $counter = 0;
-           until( $monotonic ) {
-               $monotonic = 1;
-               $counter++;
-               foreach my $path ( @all_paths ) {
-                   foreach my $i ( 0 .. $#{$path} ) { 
-                       my $rdg = $self->reading( $path->[$i] );
-                       my $prior = $self->reading( $path->[$i-1] ) if $i > 0;
-                       my $next = $self->reading( $path->[$i+1] ) if $i < $#{$path};
-                       if( $prior && $rdg->position->min <= $prior->position->min ) {
-                           $monotonic = 0; 
-                           $rdg->position->min( $prior->position->min + 1 );
-                       }
-                       if( $next && $rdg->position->max >= $next->position->max ) {
-                           $monotonic = 0; 
-                           if( $next->position->max - 1 >= $rdg->position->min ) {
-                               # If moving rdg/max down would not send it below 
-                               # rdg/min, do that.  
-                               $rdg->position->max( $next->position->max - 1 );
-                           } else {
-                               # Otherwise increase next/max.
-                               $next->position->max( $rdg->position->max + 1 );
-                               # ...min will be fixed on the next pass.
-                           }
-                       }
-                   }
-               }
-               if( $counter > $#all_paths + 1 && !$monotonic ) {
-                   # We risk an infinite loop.  Get out of here.
-                   warn "Still not monotonic after $counter passes at common point "
-                       . $first->position->common;
-                   last;
-               }
-           }
-           print STDERR "Took $counter passes for monotonicity at " . $first->position->common. "\n" 
-               if $counter > 1;
-           
-           $first = $next;
-       }
-
-    } else {
-
-       # Non-linear positions are pretty much impossible to pin down.
-       # Any reading might appear anywhere in the graph.  I guess we
-       # can do positions where there aren't transpositions...
-
+    # Walk a version of the graph where every node linked by a relationship 
+    # edge is fundamentally the same node, and do a topological ranking on
+    # the nodes in this graph.
+    my $topo_graph = Graph->new();
+    my %rel_containers;
+    my $rel_ctr = 0;
+    # Add the nodes
+    foreach my $r ( $self->readings ) {
+        next if exists $rel_containers{$r->id};
+        my @rels = $r->related_readings( 'colocated' );
+        if( @rels ) {
+            # Make a relationship container.
+            push( @rels, $r );
+            my $rn = 'rel_container_' . $rel_ctr++;
+            $topo_graph->add_vertex( $rn );
+            foreach( @rels ) {
+                $rel_containers{$_->id} = $rn;
+            }
+        } else {
+            # Add a new node to mirror the old node.
+            $rel_containers{$r->id} = $r->id;
+            $topo_graph->add_vertex( $r->id );
+        }
     }
-    $self->init_lemmata();
-}
 
-# Helper function for the guts of calculate_positions.
-sub _track_paths {
-    my $self = shift;
-    my $track_hash = shift;
-    # Args are first, last, wit, backup
-    my @path = $self->reading_sequence( @_ );
-    # Top and tail the array
-    shift @path;
-    pop @path;
-    $track_hash->{join( ' ', map { $_->name } @path )} = $_[2]
-       if @path;
-    return @path;
-}
- 
-sub possible_positions {
-    my $self = shift;
-    my @answer;
-    my %positions = ();
+    # Add the edges.
     foreach my $r ( $self->readings ) {
-       next unless $r->has_position;
-       $positions{$r->position->maxref} = 1;
+        foreach my $n ( $self->sequence->successors( $r->id ) ) {
+               my( $tfrom, $tto ) = ( $rel_containers{$r->id},
+                       $rel_containers{$n} );
+               $DB::single = 1 unless $tfrom && $tto;
+            $topo_graph->add_edge( $tfrom, $tto );
+        }
     }
-    @answer = keys %positions;
-    return @answer;
-}
-
-# TODO think about indexing this.
-sub readings_at_position {
-    my( $self, $position, $strict ) = @_;
-    unless( ref( $position ) eq 'Text::Tradition::Collation::Position' ) {
-       $position = Text::Tradition::Collation::Position->new( $position );
+    
+    # Now do the rankings, starting with the start node.
+    my $topo_start = $rel_containers{$self->start->id};
+    my $node_ranks = { $topo_start => 0 };
+    my @curr_origin = ( $topo_start );
+    # A little iterative function.
+    while( @curr_origin ) {
+        @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
     }
-    my @answer;
+    # Transfer our rankings from the topological graph to the real one.
     foreach my $r ( $self->readings ) {
-       push( @answer, $r ) if $r->is_at_position( $position, $strict );
+        if( defined $node_ranks->{$rel_containers{$r->id}} ) {
+            $r->rank( $node_ranks->{$rel_containers{$r->id}} );
+        } else {
+            $DB::single = 1;
+            die "No rank calculated for node " . $r->id 
+                . " - do you have a cycle in the graph?";
+        }
     }
-    return @answer;
 }
 
-## Lemmatizer functions
-
-sub init_lemmata {
-    my $self = shift;
-
-    foreach my $position ( $self->possible_positions ) {
-       $self->lemmata->{$position} = undef;
-    }
-
-    foreach my $cr ( $self->common_readings ) {
-       $self->lemmata->{$cr->position->maxref} = $cr->name;
+sub _assign_rank {
+    my( $graph, $node_ranks, @current_nodes ) = @_;
+    # Look at each of the children of @current_nodes.  If all the child's 
+    # parents have a rank, assign it the highest rank + 1 and add it to 
+    # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
+    # parent gets a rank.
+    my @next_nodes;
+    foreach my $c ( @current_nodes ) {
+        warn "Current reading $c has no rank!"
+            unless exists $node_ranks->{$c};
+        # print STDERR "Looking at child of node $c, rank " 
+        #     . $node_ranks->{$c} . "\n";
+        foreach my $child ( $graph->successors( $c ) ) {
+            next if exists $node_ranks->{$child};
+            my $highest_rank = -1;
+            my $skip = 0;
+            foreach my $parent ( $graph->predecessors( $child ) ) {
+                if( exists $node_ranks->{$parent} ) {
+                    $highest_rank = $node_ranks->{$parent} 
+                        if $highest_rank <= $node_ranks->{$parent};
+                } else {
+                    $skip = 1;
+                    last;
+                }
+            }
+            next if $skip;
+            my $c_rank = $highest_rank + 1;
+            # print STDERR "Assigning rank $c_rank to node $child \n";
+            $node_ranks->{$child} = $c_rank;
+            push( @next_nodes, $child );
+        }
     }
+    return @next_nodes;
 }
-    
-=item B<lemma_readings>
-
-my @state = $graph->lemma_readings( @readings_delemmatized );
-
-Takes a list of readings that have just been delemmatized, and returns
-a set of tuples of the form ['reading', 'state'] that indicates what
-changes need to be made to the graph.
-
-=over
-
-=item * 
-
-A state of 1 means 'lemmatize this reading'
-
-=item * 
 
-A state of 0 means 'delemmatize this reading'
-
-=item * 
-
-A state of undef means 'an ellipsis belongs in the text here because
-no decision has been made / an earlier decision was backed out'
-
-=back
-
-=cut
-
-sub lemma_readings {
-    my( $self, @toggled_off_nodes ) = @_;
-
-    # First get the positions of those nodes which have been
-    # toggled off.
-    my $positions_off = {};
-    map { $positions_off->{ $_->position->reference } = $_->name } 
-        @toggled_off_nodes;
-
-    # Now for each position, we have to see if a node is on, and we
-    # have to see if a node has been turned off.  The lemmata hash
-    # should contain fixed positions, range positions whose node was
-    # just turned off, and range positions whose node is on.
-    my @answer;
-    my %fixed_positions;
-    # TODO One of these is probably redundant.
-    map { $fixed_positions{$_} = 0 } keys %{$self->lemmata};
-    map { $fixed_positions{$_} = 0 } keys %{$positions_off};
-    map { $fixed_positions{$_} = 1 } $self->possible_positions;
-    foreach my $pos ( sort { Text::Tradition::Collation::Position::str_cmp( $a, $b ) } keys %fixed_positions ) {
-       # Find the state of this position.  If there is an active node,
-       # its name will be the state; otherwise the state will be 0 
-       # (nothing at this position) or undef (ellipsis at this position)
-       my $active = undef;
-       $active = $self->lemmata->{$pos} if exists $self->lemmata->{$pos};
-       
-       # Is there a formerly active node that was toggled off?
-       if( exists( $positions_off->{$pos} ) ) {
-           my $off_node = $positions_off->{$pos};
-           if( $active && $active ne $off_node) {
-               push( @answer, [ $off_node, 0 ], [ $active, 1 ] );
-           } else {
-               unless( $fixed_positions{$pos} ) {
-                   $active = 0;
-                   delete $self->lemmata->{$pos};
-               }
-               push( @answer, [ $off_node, $active ] );
-           }
-
-       # No formerly active node, so we just see if there is a currently
-       # active one.
-       } elsif( $active ) {
-           # Push the active node, whatever it is.
-           push( @answer, [ $active, 1 ] );
-       } else {
-           # Push the state that is there. Arbitrarily use the first node
-           # at that position.
-           my @pos_nodes = $self->readings_at_position( $pos );
-           push( @answer, [ $pos_nodes[0]->name, $self->lemmata->{$pos} ] );
-           delete $self->lemmata->{$pos} unless $fixed_positions{$pos};
-       }
+# Another method to make up for rough collation methods.  If the same reading
+# appears multiple times at the same rank, collapse the nodes.
+sub flatten_ranks {
+    my $self = shift;
+    my %unique_rank_rdg;
+    foreach my $rdg ( $self->readings ) {
+        next unless $rdg->has_rank;
+        my $key = $rdg->rank . "||" . $rdg->text;
+        if( exists $unique_rank_rdg{$key} ) {
+            # Combine!
+            # print STDERR "Combining readings at same rank: $key\n";
+            $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
+        } else {
+            $unique_rank_rdg{$key} = $rdg;
+        }
     }
-
-    return @answer;
 }
 
-=item B<toggle_reading>
-
-my @readings_delemmatized = $graph->toggle_reading( $reading_name );
-
-Takes a reading node name, and either lemmatizes or de-lemmatizes
-it. Returns a list of all readings that are de-lemmatized as a result
-of the toggle.
 
-=cut
-
-sub toggle_reading {
-    my( $self, $rname ) = @_;
+## Utility functions
     
-    return unless $rname;
-    my $reading = $self->reading( $rname );
-    if( !$reading || $reading->is_common() ) {
-       # Do nothing, it's a common node.
-       return;
-    } 
-    
-    my $pos = $reading->position;
-    my $fixed = $reading->position->fixed;
-    my $old_state = $self->lemmata->{$pos->reference};
-
-    my @readings_off;
-    if( $old_state && $old_state eq $rname ) {
-       # Turn off the node. We turn on no others by default.
-       push( @readings_off, $reading );
-    } else {
-       # Turn on the node.
-       $self->lemmata->{$pos->reference} = $rname;
-       # Any other 'on' readings in the same position should be off
-       # if we have a fixed position.
-       push( @readings_off, $self->same_position_as( $reading, 1 ) )
-           if $pos->fixed;
-       # Any node that is an identical transposed one should be off.
-       push( @readings_off, $reading->identical_readings );
-    }
-    @readings_off = unique_list( @readings_off );
-       
-    # Turn off the readings that need to be turned off.
-    my @readings_delemmatized;
-    foreach my $n ( @readings_off ) {
-       my $npos = $n->position;
-       my $state = undef;
-       $state = $self->lemmata->{$npos->reference}
-           if defined $self->lemmata->{$npos->reference};
-       if( $state && $state eq $n->name ) { 
-           # this reading is still on, so turn it off
-           push( @readings_delemmatized, $n );
-           my $new_state = undef;
-           if( $npos->fixed && $n eq $reading ) {
-               # This is the reading that was clicked, so if there are no
-               # other readings there and this is a fixed position, turn off 
-               # the position.  In all other cases, restore the ellipsis.
-               my @other_n = $self->same_position_as( $n ); # TODO do we need strict?
-               $new_state = 0 unless @other_n;
-           }
-           $self->lemmata->{$npos->reference} = $new_state;
-       } elsif( $old_state && $old_state eq $n->name ) { 
-           # another reading has already been turned on here
-           push( @readings_delemmatized, $n );
-       } # else some other reading was on anyway, so pass.
-    }
-    return @readings_delemmatized;
-}
-
-sub same_position_as {
-    my( $self, $reading, $strict ) = @_;
-    my $pos = $reading->position;
-    my %onpath = ( $reading->name => 1 );
-    # TODO This might not always be sufficient.  We really want to
-    # exclude all readings on this one's path between its two
-    # common points.
-    map { $onpath{$_->name} = 1 } $reading->neighbor_readings;
-    my @same = grep { !$onpath{$_->name} } 
-        $self->readings_at_position( $reading->position, $strict );
-    return @same;
-}
-
 # Return the string that joins together a list of witnesses for
 # display on a single path.
-sub path_label {
-    my $self = shift;
-    return join( $self->wit_list_separator, @_ );
-}
-
 sub witnesses_of_label {
     my( $self, $label ) = @_;
     my $regex = $self->wit_list_separator;
@@ -1137,21 +1096,13 @@ sub witnesses_of_label {
     return @answer;
 }    
 
-sub unique_list {
-    my( @list ) = @_;
-    my %h;
-    map { $h{$_->name} = $_ } @list;
-    return values( %h );
-}
-
-sub add_hash_entry {
-    my( $hash, $key, $entry ) = @_;
-    if( exists $hash->{$key} ) {
-       push( @{$hash->{$key}}, $entry );
-    } else {
-       $hash->{$key} = [ $entry ];
-    }
-}
-
 no Moose;
 __PACKAGE__->meta->make_immutable;
+
+=head1 BUGS / TODO
+
+=over
+
+=item * Think about making Relationship objects again
+
+=back