From: tla <tla@mit.edu>
Date: Wed, 18 May 2011 15:07:39 +0000 (+0200)
Subject: UNTESTED saving work on base text parsing with new library
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e290206835dd4bc540d751ea2d1849255dd192f2;p=scpubgit%2Fstemmatology.git

UNTESTED saving work on base text parsing with new library
---

diff --git a/lib/Text/Tradition.pm b/lib/Text/Tradition.pm
index 6cce95f..8e028a9 100644
--- a/lib/Text/Tradition.pm
+++ b/lib/Text/Tradition.pm
@@ -92,7 +92,8 @@ sub witness {
     foreach my $wit ( @{$self->witnesses} ) {
 	$requested_wit = $wit if $wit->sigil eq $sigil;
     }
-    warn "No such witness $sigil" unless $requested_wit;
+    # We depend on an undef return value for no such witness.
+    # warn "No such witness $sigil" unless $requested_wit;
     return $requested_wit;
 }
 	
@@ -101,6 +102,7 @@ sub add_witness {
     my $self = shift;
     my $new_wit = Text::Tradition::Witness->new( @_ );
     push( @{$self->witnesses}, $new_wit );
+    return $new_wit;
 }
 
 # The user will usually be instantiating a Tradition object, and
diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm
index 7a75c18..aa905d2 100644
--- a/lib/Text/Tradition/Collation.pm
+++ b/lib/Text/Tradition/Collation.pm
@@ -255,6 +255,45 @@ sub start {
     return $self->reading('#START#');
 }
 
+=item B<reading_sequence>
+
+my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] );
+
+Returns the ordered list of readings, starting with $first and ending
+with $last, along the given witness path.  If no path is specified,
+assume that the path is that of the base text (if any.)
+
+=cut
+
+sub reading_sequence {
+    my( $self, $start, $end, $witness, $backup ) = @_;
+
+    $witness = 'base text' unless $witness;
+    my @readings = ( $start );
+    my %seen;
+    my $n = $start;
+    while( $n ne $end ) {
+	if( exists( $seen{$n->name()} ) ) {
+	    warn "Detected loop at " . $n->name();
+	    last;
+	}
+	$seen{$n->name()} = 1;
+	
+	my $next = $self->next_reading( $n, $witness, $backup );
+	warn "Did not find any path for $witness from reading " . $n->name
+	    unless $next;
+	push( @readings, $next );
+	$n = $next;
+    }
+    # Check that the last reading is our end reading.
+    my $last = $readings[$#readings];
+    warn "Last reading found from " . $start->label() .
+	" for witness $witness is not the end!"
+	unless $last eq $end;
+    
+    return @readings;
+}
+
 =item B<next_reading>
 
 my $next_reading = $graph->next_reading( $reading, $witpath );
@@ -265,7 +304,7 @@ path.  TODO These are badly named.
 =cut
 
 sub next_reading {
-    # Return the successor via the corresponding edge.
+    # Return the successor via the corresponding path.
     my $self = shift;
     return $self->_find_linked_reading( 'next', @_ );
 }
@@ -280,30 +319,48 @@ path.  TODO These are badly named.
 =cut
 
 sub prior_reading {
-    # Return the predecessor via the corresponding edge.
+    # Return the predecessor via the corresponding path.
     my $self = shift;
     return $self->_find_linked_reading( 'prior', @_ );
 }
 
 sub _find_linked_reading {
-    my( $self, $direction, $node, $edge ) = @_;
-    $edge = 'base text' unless $edge;
-    my @linked_edges = $direction eq 'next' 
+    my( $self, $direction, $node, $path, $alt_path ) = @_;
+    my @linked_paths = $direction eq 'next' 
 	? $node->outgoing() : $node->incoming();
-    return undef unless scalar( @linked_edges );
+    return undef unless scalar( @linked_paths );
     
-    # We have to find the linked edge that contains all of the
-    # witnesses supplied in $edge.
-    my @edge_wits = $self->witnesses_of_label( $edge );
-    foreach my $le ( @linked_edges ) {
-	my @le_wits = $self->witnesses_of_label( $le->name );
-	if( _is_within( \@edge_wits, \@le_wits ) ) {
-	    # This is the right edge.
-	    return $direction eq 'next' ? $le->to() : $le->from();
+    # We have to find the linked path that contains all of the
+    # witnesses supplied in $path.
+    my( @path_wits, @alt_path_wits );
+    @path_wits = $self->witnesses_of_label( $path ) if $path;
+    @alt_path_wits = $self->witnesses_of_label( $alt_path ) if $alt_path;
+    my $base_le;
+    my $alt_le;
+    foreach my $le ( @linked_paths ) {
+	if( $le->name eq 'base text' ) {
+	    $base_le = $le;
+	} else {
+	    my @le_wits = $self->witnesses_of_label( $le->name );
+	    if( _is_within( \@path_wits, \@le_wits ) ) {
+		# This is the right path.
+		return $direction eq 'next' ? $le->to() : $le->from();
+	    } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
+		$alt_le = $le;
+	    }
 	}
     }
+    # Got this far? Return the alternate path if it exists.
+    return $direction eq 'next' ? $alt_le->to() : $alt_le->from()
+	if $alt_le;
+
+    # Got this far? Return the base path if it exists.
+    return $direction eq 'next' ? $base_le->to() : $base_le->from()
+	if $base_le;
+
+    # Got this far? We have no appropriate path.
     warn "Could not find $direction node from " . $node->label 
-	. " along edge $edge";
+	. " along path $path";
     return undef;
 }
 
@@ -320,7 +377,8 @@ sub _is_within {
 
 ## INITIALIZATION METHODS - for use by parsers
 # Walk the paths for each witness in the graph, and return the nodes
-# that the graph has in common.
+# that the graph has in common.  If $using_base is true, some 
+# different logic is needed.
 
 sub walk_witness_paths {
     my( $self, $end ) = @_;
@@ -332,22 +390,11 @@ sub walk_witness_paths {
     my @common_readings;
     foreach my $wit ( @{$self->tradition->witnesses} ) {
 	my $curr_reading = $self->start;
-	my @wit_path = ( $curr_reading );
-	my %seen_readings;
-	# TODO Detect loops at some point
-	while( $curr_reading->name ne $end->name ) {
-	    if( $seen_readings{$curr_reading->name} ) {
-		warn "Detected loop walking path for witness " . $wit->sigil
-		    . " at reading " . $curr_reading->name;
-		last;
-	    }
-	    my $next_reading = $self->next_reading( $curr_reading, 
-						    $wit->sigil );
-	    push( @wit_path, $next_reading );
-	    $seen_readings{$curr_reading->name} = 1;
-	    $curr_reading = $next_reading;
-	}
+	my @wit_path = $self->reading_sequence( $self->start, $end, 
+						$wit->sigil );
 	$wit->path( \@wit_path );
+
+	# Detect the common readings.
 	if( @common_readings ) {
 	    my @cn;
 	    foreach my $n ( @wit_path ) {
@@ -362,7 +409,8 @@ sub walk_witness_paths {
 
     # Mark all the nodes as either common or not.
     foreach my $cn ( @common_readings ) {
-	print STDERR "Setting " . $cn->name . " / " . $cn->label . " as common node\n";
+	print STDERR "Setting " . $cn->name . " / " . $cn->label 
+	    . " as common node\n";
 	$cn->make_common;
     }
     foreach my $n ( $self->readings() ) {
@@ -372,6 +420,46 @@ sub walk_witness_paths {
     return @common_readings;
 }
 
+# An alternative to walk_witness_paths, for use when a collation is
+# constructed from a base text and an apparatus.  Also modifies the
+# collation graph to remove all 'base text' paths and replace them
+# with real witness paths.
+
+sub walk_and_expand_base {
+    my( $self, $end ) = @_;
+
+    foreach my $wit ( @{$self->tradition->witnesses} ) {
+	my $sig = $wit_sigil;
+	my $post_sig;
+	$post_sig = $wit->post_correctione 
+	    if $wit->has_post_correctione;
+	my @wit_path = ( $self->start );
+	my @wit_pc_path;
+	my $curr_rdg = $self->start;
+	my %seen;
+	while( $curr_rdg ne $end ) {
+	    if( $seen{$curr_reading->name} ) {
+		warn "Detected loop in walk_and_expand_base with witness "
+		    . "$sig on reading " . $curr_reading->name . "\n";
+		last;
+	    }
+	    my $next_rdg = $self->next_reading( $curr_reading, $sig );
+	    unless( $self->has_explicit_path( $curr_reading, 
+					      $next_reading, $sig ) ) {
+		$self->add_path( $curr_reading, $next_reading, $sig );
+	    }
+	    push( @wit_path, $next_reading );
+	    $seen{$curr_reading->name} = 1;
+	}
+	$wit->path( \@wit_path );
+
+	# Now go through this path and look for p.c. divergences.
+	# TODO decide how to handle p.c. paths
+	# BIG TODO handle case where p.c. follows the base and a.c. doesn't!
+		
+	    
+}
+
 sub common_readings {
     my $self = shift;
     my @common = grep { $_->is_common } $self->readings();
diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm
index 971a26b..59c60ca 100644
--- a/lib/Text/Tradition/Collation/Reading.pm
+++ b/lib/Text/Tradition/Collation/Reading.pm
@@ -52,6 +52,15 @@ sub BUILD {
     $self->same_as( [ $self ] );
 }
 
+sub text {
+    # Wrapper function around 'label' attribute.
+    my $self = shift;
+    if( @_ ) {
+	$self->set_attribute( 'label', $_[0] );
+    }
+    return $self->get_attribute( 'label' );
+}
+
 sub merge_from {
     my( $self, $merged_node ) = @_;
     # Adopt the identity pool of the other node.
diff --git a/lib/Text/Tradition/Parser/BaseText.pm b/lib/Text/Tradition/Parser/BaseText.pm
index d0f4816..d7d090b 100644
--- a/lib/Text/Tradition/Parser/BaseText.pm
+++ b/lib/Text/Tradition/Parser/BaseText.pm
@@ -40,12 +40,12 @@ Takes an initialized graph and a set of options, which must include:
 =cut
 
 sub parse {
-    my( $graph, %opts ) = @_;
+    my( $tradition, %opts ) = @_;
 
     my $format_mod = 'Text::Tradition::Parser::' . $opts{'format'};
     load( $format_mod );
     my @apparatus_entries = $format_mod->can('read')->( $opts{'data'} );
-    merge_base( $graph, $opts{'base'}, @apparatus_entries );
+    merge_base( $tradition->collation, $opts{'base'}, @apparatus_entries );
 }
 
 =item B<merge_base>
@@ -76,8 +76,8 @@ underscore in its name.
 =cut
 
 sub merge_base {
-    my( $graph, $base_file, @app_entries ) = @_;
-    my @base_line_starts = read_base( $base_file, $graph );
+    my( $collation, $base_file, @app_entries ) = @_;
+    my @base_line_starts = read_base( $base_file, $collation );
 
     my %all_witnesses;
     foreach my $app ( @app_entries ) {
@@ -86,7 +86,7 @@ sub merge_base {
 	# last if $line > 2;
 	# DEBUG for problematic entries
 	my $scrutinize = "";
-	my $first_line_node = $base_line_starts[ $line ];
+	my $first_line_reading = $base_line_starts[ $line ];
 	my $too_far = $base_line_starts[ $line+1 ];
 	
 	my $lemma = $app->{rdg_0};
@@ -98,7 +98,7 @@ sub merge_base {
 	my @lemma_words = split( /\s+/, $lemma );
 	
 	# Now search for the lemma words within this line.
-	my $lemma_start = $first_line_node;
+	my $lemma_start = $first_line_reading;
 	my $lemma_end;
 	my %seen;
 	while( $lemma_start ne $too_far ) {
@@ -120,17 +120,19 @@ sub merge_base {
 		if( --$seq < 1 ) {
 		    # Now we have to compare the rest of the words here.
 		    if( scalar( @lemma_words ) > 1 ) {
-			my $next_node = $graph->next_word( $lemma_start );
+			my $next_reading = 
+			    $collation->next_reading( $lemma_start );
 			foreach my $w ( @lemma_words[1..$#lemma_words] ) {
 			    printf STDERR "Now matching %s against %s\n", 
-				    cmp_str($next_node), $w
+				    cmp_str($next_reading), $w
 				if "$line.$num" eq $scrutinize;
-			    if( $w ne cmp_str($next_node) ) {
+			    if( $w ne cmp_str($next_reading) ) {
 				$unmatch = 1;
 				last;
 			    } else {
-				$lemma_end = $next_node;
-				$next_node = $graph->next_word( $lemma_end );
+				$lemma_end = $next_reading;
+				$next_reading = 
+				    $collation->next_reading( $lemma_end );
 			    }
 			}
 		    } else {
@@ -142,30 +144,33 @@ sub merge_base {
 	    }
 	    last unless ( $unmatch || !defined( $lemma_end ) );
 	    $lemma_end = undef;
-	    $lemma_start = $graph->next_word( $lemma_start );
+	    $lemma_start = $collation->next_reading( $lemma_start );
 	}
 	
 	unless( $lemma_end ) {
 	    warn "No match found for @lemma_words at $line.$num";
 	    next;
 	} else {
-	    # These are no longer common nodes; unmark them as such.
-	    my @lemma_nodes = $graph->node_sequence( $lemma_start, 
+	    # These are no longer common readings; unmark them as such.
+	    my @lemma_readings = $collation->reading_sequence( $lemma_start, 
 						     $lemma_end );
-	    map { $_->set_attribute( 'class', 'lemma' ) } @lemma_nodes;
+	    map { $_->set_attribute( 'class', 'lemma' ) } @lemma_readings;
 	}
 	
-	# Now we have our lemma nodes; we add the variant nodes to the graph.
+	# Now we have our lemma readings; we add the variant readings
+	# to the collation.
 	
 	# Keep track of the start and end point of each reading for later
-	# node collapse.
+	# reading collapse.
 	my @readings = ( $lemma_start, $lemma_end );
 
-	# For each reading that is not rdg_0, we make a chain of nodes
+	# For each reading that is not rdg_0, we make a chain of readings
 	# and connect them to the anchor.  Edges are named after the mss
 	# that are relevant.
 	foreach my $k ( grep { /^rdg/ } keys( %$app ) ) {
 	    next if $k eq 'rdg_0'; # that's the lemma.
+	    # TODO look at the lemma for any p.c. readings, and add
+	    # them explicitly!
 	    my @variant = split( /\s+/, $app->{$k} );
 	    @variant = () if $app->{$k} eq '/'; # This is an omission.
 	    my @mss = grep { $app->{$_} eq $k } keys( %$app );
@@ -175,124 +180,114 @@ sub merge_base {
 		next;
 	    }
 	    
-	    # Determine the label name for the edges here.
-	    my $edge_name = join(', ', @mss );
+	    # Keep track of what witnesses we have seen.
 	    @all_witnesses{ @mss } = ( 1 ) x scalar( @mss );
 	    
-	    # Make the variant into a set of nodes.
+	    # Make the variant into a set of readings.
 	    my $ctr = 0;
-	    my $last_node = $graph->prior_word( $lemma_start );
+	    my $last_reading = $collation->prior_reading( $lemma_start );
 	    my $var_start;
 	    foreach my $vw ( @variant ) {
 		my $vwname = "$k/$line.$num.$ctr"; $ctr++;
-		my $vwnode = $graph->add_node( $vwname );
-		$vwnode->set_attribute( 'label', $vw );
-		$vwnode->set_attribute( 'class', 'variant' );
-		$graph->add_edge( $last_node, $vwnode, $edge_name );
-		$var_start = $vwnode unless $var_start;
-		$last_node = $vwnode;
+		my $vwreading = $collation->add_reading( $vwname );
+		$vwreading->text( $vw );
+		$vwreading->make_variant();
+		foreach ( @mss ) {
+		    $collation->add_path( $last_reading, $vwreading, $_ );
+		}
+		$var_start = $vwreading unless $var_start;
+		$last_reading = $vwreading;
 	    }
 	    # Now hook it up at the end.
-	    $graph->add_edge( $last_node, $graph->next_word( $lemma_end ),
-					$edge_name );
+	    foreach ( @mss ) {
+		$collation->add_path( $last_reading, 
+				      $collation->next_word( $lemma_end ),
+				      $_ );
+	    }
 	    
 	    if( $var_start ) { # if it wasn't an empty reading
-		push( @readings, $var_start, $last_node );
+		push( @readings, $var_start, $last_reading );
 	    }
 	}
 
-	# Now collate and collapse the identical nodes within the graph.
-	collate_variants( $graph, @readings );
+	# Now collate and collapse the identical readings within the collation.
+	collate_variants( $collation, @readings );
     }
 
-    ## Now in theory I have a graph.  I want to make it a little easier to
-    ## read.  So I collapse nodes that have only one edge in and one edge
-    ## out, and I do this by looking at the edges.
-    
-#     foreach my $edge ( $graph->edges() ) {
-# 	my @out_edges = $edge->from()->outgoing();
-# 	my @in_edges = $edge->to()->incoming();
-	
-# 	next if $edge->from() eq $graph->start();
-# 	next if $edge->to()->name() eq '#END#';
-# 	next unless scalar( @out_edges ) == 1;
-# 	next unless scalar( @in_edges ) == 1;
-# 	next unless $out_edges[0] eq $in_edges[0];
-# 	# In theory if we've got this far, we're safe, but just to
-# 	# double-check...
-# 	next unless $out_edges[0] eq $edge;
-	
-# 	$graph->merge_nodes( $edge->from(), $edge->to(), ' ' );
-#     }
-
-    # Now walk the path for each witness, so that we can do the
-    # position calculations.
-    my $paths = {};
+    # Now make the witness objects
     foreach my $w ( keys %all_witnesses ) {
-	my $back = undef;
-	if( $w =~ /^(.*)\s*\(p\.\s*c\.\)/ ) {
-	    $back = $1;
+	my $base = _is_post_corr( $w );
+	if( $base ) {
+	    my $pctag = substr( $w, length( $base ) );
+	    my $existing_wit = $collation->tradition->witness( $base );
+	    unless( $existing_wit ) {
+		$existing_wit = $collation->tradition->add_witness( $base );
+	    }
+	    $existing_wit->post_correctione( $pctag );
+	} else {
+	    $collation->tradition->add_witness( $w )
+		unless $collation->tradition->witness( $w );
 	}
-	my @wit_nodes = $graph->node_sequence( $graph->start, 
-					       $graph->node( '#END#' ), 
-					       $w, $back );
-	my @wn_names = map { $_->name() } @wit_nodes;
-	$paths->{$w} = \@wn_names;
     }
-    $DB::single = 1;
-    my @common_nodes = grep { $graph->is_common( $_ ) } $graph->nodes();
-    $graph->make_positions( \@common_nodes, $paths );
+
+    # Now walk paths and calculate positions.
+    my @common_readings = 
+	$collation->walk_and_expand_base( $collation->reading( '#END#' ) );
+    $collation->calculate_positions( @common_readings );
 }
 
 =item B<read_base>
 
-my @line_beginnings = read_base( 'reference.txt', $graph );
+my @line_beginnings = read_base( 'reference.txt', $collation );
 
-Takes a text file and a (presumed empty) graph object, adds the words
-as simple linear nodes to the graph, and returns a list of nodes that
-represent the beginning of lines. This graph is now the starting point
-for application of apparatus entries in merge_base, e.g. from a CSV
-file or a Classical Text Editor file.
+Takes a text file and a (presumed empty) collation object, adds the
+words as simple linear readings to the collation, and returns a
+list of readings that represent the beginning of lines. This collation
+is now the starting point for application of apparatus entries in
+merge_base, e.g. from a CSV file or a Classical Text Editor file.
 
 =cut
 
 sub read_base {
-    my( $base_file, $graph ) = @_;
+    my( $base_file, $collation ) = @_;
     
-    # This array gives the first node for each line.  We put the
+    # This array gives the first reading for each line.  We put the
     # common starting point in line zero.
-    my $last_node = $graph->start();
-    my $lineref_array = [ $last_node ]; # There is no line zero.
+    my $last_reading = $collation->start();
+    my $lineref_array = [ $last_reading ]; # There is no line zero.
 
     open( BASE, $base_file ) or die "Could not open file $base_file: $!";
     while(<BASE>) {
-	# Make the nodes, and connect them up for the base, but also
-	# save the first node of each line in an array for the purpose.
+	# Make the readings, and connect them up for the base, but
+	# also save the first reading of each line in an array for the
+	# purpose.
+	# TODO use configurable reading separator
 	chomp;
 	my @words = split;
 	my $started = 0;
 	my $wordref = 0;
 	my $lineref = scalar @$lineref_array;
 	foreach my $w ( @words ) {
-	    my $noderef = join( ',', $lineref, ++$wordref );
-	    my $node = $graph->add_node( $noderef );
-	    $node->set_attribute( 'label', $w );
-	    $node->set_attribute( 'class', 'common' );
+	    my $readingref = join( ',', $lineref, ++$wordref );
+	    my $reading = $collation->add_reading( $readingref );
+	    $reading->text( $w );
+	    $reading->make_common();
 	    unless( $started ) {
-		push( @$lineref_array, $node );
+		push( @$lineref_array, $reading );
 		$started = 1;
 	    }
-	    if( $last_node ) {
-		my $edge = $graph->add_edge( $last_node, $node, "base text" );
-		$edge->set_attribute( 'class', 'basetext' );
-		$last_node = $node;
+	    if( $last_reading ) {
+		my $path = $collation->add_path( $last_reading, $reading, 
+						 "base text" );
+		$path->set_attribute( 'class', 'basetext' );
+		$last_reading = $reading;
 	    } # TODO there should be no else here...
 	}
     }
     close BASE;
     # Ending point for all texts
-    my $endpoint = $graph->add_node( '#END#' );
-    $graph->add_edge( $last_node, $endpoint, "base text" );
+    my $endpoint = $collation->add_reading( '#END#' );
+    $collation->add_path( $last_reading, $endpoint, "base text" );
     push( @$lineref_array, $endpoint );
 
     return( @$lineref_array );
@@ -300,20 +295,21 @@ sub read_base {
 
 =item B<collate_variants>
 
-collate_variants( $graph, @readings )
+collate_variants( $collation, @readings )
 
 Given a set of readings in the form 
 ( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
-walks through each to identify those nodes that are identical.  The
-graph is a Text::Tradition::Graph object; the elements of @readings are
-Graph::Easy::Node objects that appear on the graph.
+walks through each to identify those readings that are identical.  The
+collation is a Text::Tradition::Collation object; the elements of
+@readings are Text::Tradition::Collation::Reading objects that appear
+on the collation graph.
 
 TODO: Handle collapsed and non-collapsed transpositions.
 
 =cut
 
 sub collate_variants {
-    my( $graph, @readings ) = @_;
+    my( $collation, @readings ) = @_;
     my $lemma_start = shift @readings;
     my $lemma_end = shift @readings;
     my $detranspose = 0;
@@ -321,112 +317,122 @@ sub collate_variants {
     # We need to calculate positions at this point, which is where
     # we are getting the implicit information from the apparatus.
 
-    # Start the list of distinct nodes with those nodes in the lemma.
-    my @distinct_nodes;
+    # Start the list of distinct readings with those readings in the lemma.
+    my @distinct_readings;
     my $position = 0;
     while( $lemma_start ne $lemma_end ) {
-	push( @distinct_nodes, [ $lemma_start, 'base text', $position++ ] );
-	$lemma_start = $graph->next_word( $lemma_start );
+	push( @distinct_readings, [ $lemma_start, 'base text', $position++ ] );
+	$lemma_start = $collation->next_word( $lemma_start );
     } 
-    push( @distinct_nodes, [ $lemma_end, 'base text', $position++ ] );
+    push( @distinct_readings, [ $lemma_end, 'base text', $position++ ] );
     
 
     while( scalar @readings ) {
 	my( $var_start, $var_end ) = splice( @readings, 0, 2 );
 
-	# I want to look at the nodes in the variant and lemma, and
-	# collapse nodes that are the same word.  This is mini-collation.
+	# I want to look at the readings in the variant and lemma, and
+	# collapse readings that are the same word.  This is mini-collation.
 	# Each word in the 'main' list can only be collapsed once with a
 	# word from the current reading.
 	my %collapsed = ();
 
-	# Get the label. There will only be one outgoing edge to start
+	# Get the label. There will only be one outgoing path to start
 	# with, so this is safe.
 	my @out = $var_start->outgoing();
 	my $var_label = $out[0]->label();
 
-	my @variant_nodes;
+	my @variant_readings;
 	while( $var_start ne $var_end ) {
-	    push( @variant_nodes, $var_start );
-	    $var_start = $graph->next_word( $var_start, $var_label );
+	    push( @variant_readings, $var_start );
+	    $var_start = $collation->next_word( $var_start, $var_label );
 	}
-	push( @variant_nodes, $var_end );
+	push( @variant_readings, $var_end );
 
-	# Go through the variant nodes, and if we find a lemma node that
-	# hasn't yet been collapsed with a node, equate them.  If we do
-	# not, keep them to push onto the end of all_nodes.
-	my @remaining_nodes;
+	# Go through the variant readings, and if we find a lemma reading that
+	# hasn't yet been collapsed with a reading, equate them.  If we do
+	# not, keep them to push onto the end of all_readings.
+	my @remaining_readings;
 	my $last_index = 0;
 	my $curr_pos = 0;
-	foreach my $w ( @variant_nodes ) {
+	foreach my $w ( @variant_readings ) {
 	    my $word = $w->label();
 	    my $matched = 0;
-	    foreach my $idx ( $last_index .. $#distinct_nodes ) {
-		my( $l, $edgelabel, $pos ) = @{$distinct_nodes[$idx]};
+	    foreach my $idx ( $last_index .. $#distinct_readings ) {
+		my( $l, $pathlabel, $pos ) = @{$distinct_readings[$idx]};
 		if( $word eq cmp_str( $l ) ) {
 		    next if exists( $collapsed{ $l->label } )
 			&& $collapsed{ $l->label } eq $l;
 		    $matched = 1;
 		    $last_index = $idx if $detranspose;
-		    # Collapse the nodes.
-		    printf STDERR "Merging nodes %s/%s and %s/%s\n", 
+		    # Collapse the readings.
+		    printf STDERR "Merging readings %s/%s and %s/%s\n", 
 		        $l->name, $l->label, $w->name, $w->label;
-		    $graph->merge_nodes( $l, $w );
+		    $collation->merge_readings( $l, $w );
 		    $collapsed{ $l->label } = $l;
-		    # Now collapse any multiple edges to and from the node.
- 		    remove_duplicate_edges( $graph, 
- 				    $graph->prior_word( $l, $edgelabel ), $l );
- 		    remove_duplicate_edges( $graph, $l, 
- 				    $graph->next_word( $l, $edgelabel ) );
+		    # Now collapse any multiple paths to and from the reading.
+ 		    remove_duplicate_paths( $collation, 
+ 				    $collation->prior_word( $l, $pathlabel ), $l );
+ 		    remove_duplicate_paths( $collation, $l, 
+ 				    $collation->next_word( $l, $pathlabel ) );
 		    $curr_pos = $pos;
 		    last;
 		}
 	    }
-	    push( @remaining_nodes, [ $w, $var_label, $curr_pos++ ] ) unless $matched;
+	    push( @remaining_readings, [ $w, $var_label, $curr_pos++ ] ) unless $matched;
 	}
-	push( @distinct_nodes, @remaining_nodes) if scalar( @remaining_nodes );
+	push( @distinct_readings, @remaining_readings) if scalar( @remaining_readings );
     }
 
-    # Now set the positions of all the nodes in this variation.
+    # Now set the positions of all the readings in this variation.
     #$DB::single = 1;
-    print STDERR "Nodes and their positions are:\n";
-    foreach my $n ( @distinct_nodes ) {
+    print STDERR "Readings and their positions are:\n";
+    foreach my $n ( @distinct_readings ) {
 	printf STDERR "\t%s (position %s)\n", $n->[0]->label(), $n->[2];
     }
 }
 
-=item B<remove_duplicate_edges>
+=item B<remove_duplicate_paths>
 
-remove_duplicate_edges( $graph, $from, $to );
+remove_duplicate_paths( $collation, $from, $to );
 
-Given two nodes, reduce the number of edges between those nodes to
-one.  If neither edge represents a base text, combine their labels.
+Given two readings, reduce the number of paths between those readings to
+one.  If neither path represents a base text, combine their labels.
 
 =cut
 
-sub remove_duplicate_edges {
-    my( $graph, $from, $to ) = @_;
-    my @edges = $from->edges_to( $to );
-    if( scalar @edges > 1 ) {
-	my @base = grep { $_->label eq 'base text' } @edges;
+sub remove_duplicate_paths {
+    my( $collation, $from, $to ) = @_;
+    my @paths = $from->paths_to( $to );
+    if( scalar @paths > 1 ) {
+	my @base = grep { $_->label eq 'base text' } @paths;
 	if ( scalar @base ) {
-	    # Remove the edges that are not base.
-	    foreach my $e ( @edges ) {
-		$graph->del_edge( $e )
+	    # Remove the paths that are not base.
+	    foreach my $e ( @paths ) {
+		$collation->del_path( $e )
 		    unless $e eq $base[0];
 	    }
 	} else {
-	    # Combine the edges into one.
-	    my $new_edge_name = join( ', ', map { $_->label() } @edges );
-	    my $new_edge = shift @edges;
-	    $new_edge->set_attribute( 'label', $new_edge_name );
-	    foreach my $e ( @edges ) {
-		$graph->del_edge( $e );
+	    # Combine the paths into one.
+	    my $new_path_name = join( ', ', map { $_->label() } @paths );
+	    my $new_path = shift @paths;
+	    $new_path->set_attribute( 'label', $new_path_name );
+	    foreach my $e ( @paths ) {
+		$collation->del_path( $e );
 	    }
 	}
     }
 }
 
+# Helper function. Given a witness sigil, if it is a post-correctione
+# sigil,return the base witness.  If not, return a false value.
+sub _is_post_corr {
+    my( $sigil ) = @_;
+    if( $sigil =~ /^(.*?)(\s*\(p\.\s*c\.\))$/ ) {
+	return $1;
+    }
+    return undef;
+}
+
 =item B<cmp_str>
 
 Pretend you never saw this method.  Really it needs to not be hardcoded.
@@ -434,8 +440,8 @@ Pretend you never saw this method.  Really it needs to not be hardcoded.
 =cut
 
 sub cmp_str {
-    my( $node ) = @_;
-    my $word = $node->label();
+    my( $reading ) = @_;
+    my $word = $reading->label();
     $word = lc( $word );
     $word =~ s/\W//g;
     $word =~ s/v/u/g;
diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm
index 5c6244c..4e59b7e 100644
--- a/lib/Text/Tradition/Parser/GraphML.pm
+++ b/lib/Text/Tradition/Parser/GraphML.pm
@@ -79,10 +79,10 @@ sub parse {
     my @nodes = $xpc->findnodes( '//g:node' );
     foreach my $n ( @nodes ) {
 	my $id = _lookup_node_data( $n, 'number' );
-	my $label = _lookup_node_data( $n, 'token' );
+	my $token = _lookup_node_data( $n, 'token' );
 	my $gnode = $collation->add_reading( $id );
 	$node_name{ $n->getAttribute('id') } = $id;
-	$gnode->set_attribute( 'label', $label );
+	$gnode->text( $token );
 
 	# Now get the rest of the data, i.e. not the ID or label
 	my $extra = {};
@@ -101,8 +101,13 @@ sub parse {
 	# Label according to the witnesses present.
 	my @wit_ids = $xpc->findnodes( './g:data/attribute::key', $e );
 	my @wit_names = map { $witnesses{ $_->getValue() } } @wit_ids;
-	my $label = $collation->path_label( @wit_names );
-	$collation->add_path( $from, $to, $label );
+	# One path per witness
+	foreach( @wit_names ) {
+	    $collation->add_path( $from, $to, $_ );
+	}
+	# Only a single path between two readings
+	# my $label = $collation->path_label( @wit_names );
+	# $collation->add_path( $from, $to, $label );
     }
 
     ## Reverse the node_name hash so that we have two-way lookup.
diff --git a/lib/Text/Tradition/Witness.pm b/lib/Text/Tradition/Witness.pm
index cb461c5..d48a350 100644
--- a/lib/Text/Tradition/Witness.pm
+++ b/lib/Text/Tradition/Witness.pm
@@ -31,6 +31,13 @@ has 'path' => (
     predicate => 'has_path',
     );	       
 
+has 'post_correctione' => (
+    is => 'rw',
+    isa => 'Str',
+    predicate => 'has_post_correctione',
+    );
+    
+
 sub BUILD {
     my $self = shift;
     if( $self->has_source ) {
diff --git a/script/svg_from_csv.pl b/script/svg_from_csv.pl
index c469bbe..40fa7f5 100644
--- a/script/svg_from_csv.pl
+++ b/script/svg_from_csv.pl
@@ -3,9 +3,9 @@
 use lib 'lib';
 use strict;
 use warnings;
-use Text::Tradition::Graph;
+use Text::Tradition;
 
-my $collation_graph = Text::Tradition::Graph->new( 
+my $collation_graph = Text::Tradition->new( 
 						   'CSV' => $ARGV[0],
 						   'base' => $ARGV[1],
 						   );