Merge branch 'master' of github.com:tla/stemmatology

[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / CTE.pm
diff --git a/lib/Text/Tradition/Parser/CTE.pm b/lib/Text/Tradition/Parser/CTE.pm

index 7390826..49bcfe2 100644 (file)
--- a/lib/Text/Tradition/Parser/CTE.pm
+++ b/lib/Text/Tradition/Parser/CTE.pm
@@ -2,6 +2,8 @@ package Text::Tradition::Parser::CTE;
 
 use strict;
 use warnings;
+use Encode qw/ decode /;
+use Text::Tradition::Parser::Util qw/ collate_variants /;
 use XML::LibXML;
 use XML::LibXML::XPathContext;
 
@@ -23,220 +25,393 @@ Classical Text Editor.
 my @apparatus = read( $xml_file );
 
 Takes a Tradition object and a TEI file exported from Classical Text
-Editor; initializes the Tradition from the file.
+Editor using double-endpoint-attachment critical apparatus encoding; 
+initializes the Tradition from the file.
 
 =cut
 
-my %seg_readings;  # Save the XML IDs for apparatus anchors.
-my %sigil_for;     # Save the XML IDs for witnesses.
-my %note_start;    # Save the readings where an apparatus entry is attached.
+my %sigil_for;  # Save the XML IDs for witnesses.
+my %apps;       # Save the apparatus XML for a given ID.    
+my %has_ac;     # Keep track of witnesses that have corrections.
 
 sub parse {
-    my( $tradition, $xml_str ) = @_;
-    my $c = $tradition->collation;  # Some shorthand
-    
-    # First, parse the XML.
-    my $parser = XML::LibXML->new();
-    my $doc = $parser->parse_string( $xml_str );
-    my $tei = $doc->documentElement();
-    my $xpc = XML::LibXML::XPathContext->new( $tei );
-
-    # CTE uses a DTD rather than any xmlns-based parsing.  Thus we
-    # need no namespace foo.
-
-    # Get the witnesses and create the witness objects.
-    foreach my $wit_el ( $xpc->findnodes( '//sourceDesc/listWit/witness' ) ) {
-       # The witness xml:id is used internally, and is *not* the sigil name.
-       my $id= $wit_el->getAttribute( 'xml:id' );
-       $id =~ s/^M/sig/;  # Stupid but there you go.
-       my @sig_parts = $xpc->findnodes( './abbr/descendant::text()', $wit_el );
-       my $sig = join( '', grep { /\w/ } @sig_parts );
-       $tradition->add_witness( sigil => $sig, source => $wit_el->toString() );
-       $sigil_for{$id} = $sig;
-    }
-
-    # Now go through the text and find the base tokens.  Tokens are
-    # either plain text to be split on whitespace, or they are wrapped
-    # in <hi/> or <seg/> elements.
-    my @base_text;
-    my $ctr = 1;
-    foreach my $pg_el ( $xpc->findnodes( '/TEI/text/p' ) ) {
-       foreach my $xn ( $pg_el->childNodes ) {
-           push( @base_text, _get_readings( $tradition, $xn ) );
+       my( $tradition, $opts ) = @_;
+       my $c = $tradition->collation;  # Some shorthand
+       
+       # First, parse the XML.
+    my( $tei, $xpc ) = _remove_formatting( $opts );
+    return unless $tei; # we have already warned.
+
+       # CTE uses a DTD rather than any xmlns-based parsing.  Thus we
+       # need no namespace handling.
+       # Get the witnesses and create the witness objects.
+       foreach my $wit_el ( $xpc->findnodes( '//sourceDesc/listWit/witness' ) ) {
+               # The witness xml:id is used internally, and is *not* the sigil name.
+               my $id= $wit_el->getAttribute( 'xml:id' );
+               my @sig_parts = $xpc->findnodes( 'descendant::text()', $wit_el );
+               my $sig = _stringify_sigil( @sig_parts );
+               print STDERR "Adding witness $sig\n";
+               $tradition->add_witness( sigil => $sig, sourcetype => 'collation' );
+               $sigil_for{'#'.$id} = $sig;  # Make life easy by keying on the ID ref syntax
        }
+       
+       # Now go through the text and find the base tokens, apparatus tags, and
+       # anchors.  Make a giant array of all of these things in sequence.
+       # TODO consider combining this with creation of graph below
+       my @base_text;
+       foreach my $pg_el ( $xpc->findnodes( '/TEI/text/body/p' ) ) {
+               foreach my $xn ( $pg_el->childNodes ) {
+                       push( @base_text, _get_base( $xn ) );
+               }
+       }
+       # We now have to work through this array applying the alternate 
+       # apparatus readings to the base text.  Essentially we will put 
+       # everything on the graph, from which we will delete the apps and
+       # anchors when we are done.
+       
+       # First, put the base tokens, apps, and anchors in the graph.
+       my $counter = 0;
+       my $last = $c->start;
+       foreach my $item ( @base_text ) {
+           my $r;
+        if( $item->{'type'} eq 'token' ) {
+            $r = $c->add_reading( { id => 'n'.$counter++, 
+                                                       text => $item->{'content'} } );
+        } elsif ( $item->{'type'} eq 'anchor' ) {
+            $r = $c->add_reading( { id => '#ANCHOR_' . $item->{'content'} . '#', 
+                                                       is_ph => 1 } );
+        } elsif ( $item->{'type'} eq 'app' ) {
+            my $tag = '#APP_' . $counter++ . '#';
+            $r = $c->add_reading( { id => $tag, is_ph => 1 } );
+            $apps{$tag} = $item->{'content'};
+        }
+        $c->add_path( $last, $r, $c->baselabel );
+        $last = $r;
     }
-
-    # String together the base.
-    my $source = $c->start;
-    foreach my $b ( @base_text ) {
-       $c->add_path( $source, $b, $c->baselabel );
-       $source = $b;
-    }
-    $c->add_path( $source, $c->add_reading('#END#'), $c->baselabel );
-               
-    # Now go through the text and find all the apparatus notes, and parse them.
-    foreach my $note_el( $xpc->findnodes( '//note[attribute::type = "a1"]' ) ) {
-       my $app_start = $note_start{$note_el};
-       my $apparatus = _parse_note( $note_el, $c, $app_start );
+    $c->add_path( $last, $c->end, $c->baselabel );
+    
+    # Now we can parse the apparatus entries, and add the variant readings 
+    # to the graph.
+    
+    foreach my $app_id ( keys %apps ) {
+        _add_readings( $c, $app_id );
     }
+    
+    # Finally, add explicit witness paths, remove the base paths, and remove
+    # the app/anchor tags.
+    expand_all_paths( $c );
+
+    # Save the text for each witness so that we can ensure consistency
+    # later on
+       $tradition->collation->text_from_paths();       
+       $tradition->collation->calculate_ranks();
+       $tradition->collation->flatten_ranks();
 }
 
+sub _stringify_sigil {
+    my( @nodes ) = @_;
+    my @parts = grep { /\w/ } map { $_->data } @nodes;
+    return join( '', @parts );
+}
 
-## Recursive little helper function to help us navigate through nested
-## XML, picking out the text.
-
-sub _get_readings {
-    my( $tradition, $xn ) = @_;
-    my @readings;
-    if( $xn->nodeType == XML_TEXT_NODE ) {
-       my $str = $xn->data;
-       $str =~ s/^\s+//;
-       foreach my $w ( split( /\s+/, $str ) ) {
-           my $rdg = $tradition->collation->add_reading( 'n'.$ctr++ );
-           $rdg->text( $w );
-           push( @readings, $rdg );
-       }
-    } elsif( $xn->nodeName eq 'hi' ) {
-       foreach( $xn->childNodes ) {
-           # Recurse as if the hi weren't there.
-           push( @readings, _get_readings( $tradition, $_ ) );
+# Get rid of all the formatting elements that get in the way of tokenization.
+sub _remove_formatting {
+       my( $opts ) = @_;
+       
+       # First, parse the original XML
+       my $parser = XML::LibXML->new();
+    my $doc;
+    if( exists $opts->{'string'} ) {
+        $doc = $parser->parse_string( $opts->{'string'} );
+    } elsif ( exists $opts->{'file'} ) {
+        $doc = $parser->parse_file( $opts->{'file'} );
+    } else {
+        warn "Could not find string or file option to parse";
+        return;
+    }
+    
+    # Second, remove the formatting
+       my $xpc = XML::LibXML::XPathContext->new( $doc->documentElement );
+       my @useless = $xpc->findnodes( '//hi' );
+       foreach my $n ( @useless ) {
+               my $parent = $n->parentNode();
+               my @children = $n->childNodes();
+               my $first = shift @children;
+               $parent->replaceChild( $first, $n );
+               foreach my $c ( @children ) {
+                       $parent->insertAfter( $c, $first );
+                       $first = $c;
+               }
        }
-    } elsif( $xn->nodeName eq 'seg' ) {
-       # Read the reading, but also add the word in question as an anchor.
-       my $seg_id = $xn->getAttribute( 'xml:id' );
-       my @r;
-       foreach( $xn->childNodes ) {
-           push( @r, _get_readings( $tradition, $_ ) );
+       
+       # Third, write out and reparse to merge the text nodes.
+       my $result = decode( $doc->encoding, $doc->toString() );
+       my $tei = $parser->parse_string( $result )->documentElement;
+       $xpc = XML::LibXML::XPathContext->new( $tei );
+       return( $tei, $xpc );
+}
+
+## Helper function to help us navigate through nested XML, picking out 
+## the words, the apparatus, and the anchors.
+
+sub _get_base {
+       my( $xn ) = @_;
+       my @readings;
+       if( $xn->nodeType == XML_TEXT_NODE ) {
+           # Base text, just split the words on whitespace and add them 
+           # to our sequence.
+               my $str = $xn->data;
+               $str =~ s/^\s+//;
+               my @tokens = split( /\s+/, $str );
+               push( @readings, map { { 'type' => 'token', 'content' => $_ } } @tokens );
+       } elsif( $xn->nodeName eq 'app' ) {
+               # Apparatus, just save the entire XML node.
+               push( @readings, { 'type' => 'app', 'content' => $xn } );
+       } elsif( $xn->nodeName eq 'anchor' ) {
+               # Anchor to mark the end of some apparatus; save its ID.
+               push( @readings, { 'type' => 'anchor', 
+                   'content' => $xn->getAttribute( 'xml:id' ) } );
+       } elsif ( $xn->nodeName ne 'note' ) {  # Any tag we don't know to disregard
+           print STDERR "Unrecognized tag " . $xn->nodeName . "\n";
        }
-       warn "More than one reading found in seg $seg_id" unless @r == 1;
-       $seg_readings{'#'.$seg_id} = $r[0];
-       push( @readings, @r );
-    } elsif( $xn->nodeName eq 'note' ) {
-       # Save where we found this note.
-       $note_start{$xn} = $readings[-1];
-    }
-    return @readings;
+       return @readings;
 }
 
-## Helper function to parse apparatus entries.  This could get nasty, I mean fun.
-sub _parse_note {
-    my( $xn, $c, $app_start ) = @_;
-    my $app_end = $seg_readings{$xn->getAttribute( 'targetEnd' )};
-    my $lemma = join( ' ', map { $_->text } $c->reading_sequence( $app_start, $app_end ) );
-
-    my %seen_wits;
-    # TODO A list of active witnesses should be passed really.
-    map { $seen_wits{$_} = 0 } vals( %sigil_for );
-
-    # The note has a <p/> tag, then <mentioned/>, then 0-1 text nodes,
-    # then an assortment of <hi/> or <abbr/> elements.  If the hi
-    # contains an abbr, then it goes before, otherwise it probably
-    # goes after.
-    my @p = $xn->getChildrenByTagName( 'p' );
-    warn "More than one pg in note" unless @p == 1;
-
-    # Strip the <hi/> elements.
-    my @childnodes;
-    foreach ( $p[0]->childNodes ) {
-       if( $_->nodeName eq 'hi' ) {
-           push( @childnodes, $_->childNodes );
-       } else {
-           push( @childnodes, $_ );
+sub _append_tokens {
+       my( $list, @tokens ) = @_;
+       if( @$list && $list->[-1]->{'content'} =~ /\#JOIN\#$/ ) {
+               # The list evidently ended mid-word; join the next token onto it.
+               my $t = shift @tokens;
+               if( ref $t && $t->{'type'} eq 'token' ) {
+                       # Join the word
+                       $t = $t->{'content'};
+               } elsif( ref $t ) {
+                       # An app or anchor intervened; end the word.
+                       unshift( @tokens, $t );
+                       $t = '';
+               }
+               $list->[-1]->{'content'} =~ s/\#JOIN\#$/$t/;
        }
-    }
-       
-    # Go through and try to parse the sucker.
-    my $apparatus;
-    my $curr_rdg = '';
-    my $reading_sigla = 0;
-    my @curr_wits;
-    foreach my $pxn ( $p[0]->childNodes ) {
-       next if $pxn->nodeName eq 'mentioned';  # Redundant for us.
-       if( $pxn->nodeType == XML_TEXT_NODE ) {
-           my $pxn_str = $pxn->data;
-           $pxn_str =~ s/^\s+//;
-           $pxn_str =~ s/\s+$//;
-           my @parts = split( /,\s*/, $pxn_str );
-           if( @parts > 1 ) {
-               # Comma separation means that we are starting a new reading.
-               my $last = shift @parts;
-               if( $last =~ /^\s*a\.\s*c\.\s*$/ ) {
-                   my $sig = pop @curr_wits;
-                   $sig .= '_ac';
-                   push( @curr_wits, $sig );
+       foreach my $t ( @tokens ) {
+               unless( ref( $t ) ) {
+                       $t = { 'type' => 'token', 'content' => $t };
                }
-               $pxn_str = join( ', ', @parts );
-               # Trigger a reading interpretation.
-               $reading_sigla = 1;
-           }
-           if( $reading_sigla ) {
-               my @wits = keys %curr_wits;
-               $apparatus->{ interpret( $curr_rdg, $lemma ) } = \@wits;
-               $curr_rdg = '';
-               $reading_sigla = 0;
-               @curr_wits = ();
-           }
-
-           if( $pxn_str =~ /^\s*a\.\s*c\.\s*$/ ) {
-               my $sig = pop @curr_wits;
-               $sig .= '_ac';
-               push( @curr_wits, $sig );
-           } else {
-               $curr_rdg .= $pxn_str;
-           }
-       } elsif( $pxn->nodeName eq 'abbr' ) {
-           # It is a witness, stick it in @curr_wits
-           my $wit = $sigil_for{$pxn->getAttribute( 'n' )}
-           push( @curr_wits, $wit ) unless $curr_wits[-1] eq $wit;
-           $seen_wits{$wit} += 1;  # Keep track of a 'seen' count in case there is an a.c.
-           $reading_sigla = 1;
+               push( @$list, $t );
        }
+}
+
+sub _add_readings {
+    my( $c, $app_id ) = @_;
+    my $xn = $apps{$app_id};
+    my $anchor = _anchor_name( $xn->getAttribute( 'to' ) );
+    # Get the lemma, which is all the readings between app and anchor,
+    # excluding other apps or anchors.
+    my @lemma = _return_lemma( $c, $app_id, $anchor );
+    my $lemma_str = join( ' ', grep { $_ !~ /^\#/ } map { $_->text } @lemma );
+    
+    # For each reading, send its text to 'interpret' along with the lemma,
+    # and then save the list of witnesses that these tokens belong to.
+    my %wit_rdgs;  # Maps from witnesses to the variant text
+    my $ctr = 0;
+    my $tag = $app_id;
+    $tag =~ s/^\#APP_(.*)\#$/$1/;
+
+    foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) {
+        my @text;
+        foreach ( $rdg->childNodes ) {
+            push( @text, _get_base( $_ ) );
+        }
+        my( $interpreted, $flag ) = ( '', undef );
+        if( @text ) {
+               ( $interpreted, $flag ) = interpret( 
+                       join( ' ', map { $_->{'content'} } @text ), $lemma_str );
+        }
+        next if( $interpreted eq $lemma_str ) && !$flag;  # Reading is lemma.
+        
+        my @rdg_nodes;
+        if( $interpreted eq '#LACUNA#' ) {
+               push( @rdg_nodes, $c->add_reading( { id => $tag . "/" . $ctr++,
+                                                                                        is_lacuna => 1 } ) );
+        } else {
+                       foreach my $w ( split( /\s+/, $interpreted ) ) {
+                               my $r = $c->add_reading( { id => $tag . "/" . $ctr++,
+                                                                                  text => $w } );
+                               push( @rdg_nodes, $r );
+                       }
+        }
+        # For each listed wit, save the reading.
+        foreach my $wit ( split( /\s+/, $rdg->getAttribute( 'wit' ) ) ) {
+                       $wit .= $flag if $flag;
+            $wit_rdgs{$wit} = \@rdg_nodes;
+        }
+                       
+        # Does the reading have an ID? If so it probably has a witDetail
+        # attached, and we need to read it.
+        if( $rdg->hasAttribute( 'xml:id' ) ) {
+               warn "Witdetail on meta reading" if $flag; # this could get complicated.
+            my $rid = $rdg->getAttribute( 'xml:id' );
+            my $xpc = XML::LibXML::XPathContext->new( $xn );
+            my @details = $xpc->findnodes( './witDetail[@target="'.$rid.'"]' );
+            foreach my $d ( @details ) {
+                _parse_wit_detail( $d, \%wit_rdgs, \@lemma );
+            }
+        }
+    }       
+        
+    # Now collate the variant readings, since it is not done for us.
+    collate_variants( $c, \@lemma, values %wit_rdgs );
+        
+    # Now add the witness paths for each reading.
+    foreach my $wit_id ( keys %wit_rdgs ) {
+        my $witstr = get_sigil( $wit_id, $c );
+        my $rdg_list = $wit_rdgs{$wit_id};
+        _add_wit_path( $c, $rdg_list, $app_id, $anchor, $witstr );
     }
-    $apparatus->{ interpret( $curr_rdg, $lemma ) } = \@wits if $curr_rdg;
-    $apparatus->{ $lemma } = grep { $seen_wits{$_} == 0 } keys %seen_wits;
+}
 
-    return $apparatus;
+sub _anchor_name {
+    my $xmlid = shift;
+    $xmlid =~ s/^\#//;
+    return sprintf( "#ANCHOR_%s#", $xmlid );
 }
 
+sub _return_lemma {
+    my( $c, $app, $anchor ) = @_;
+    my @nodes = grep { $_->id !~ /^\#A(PP|NCHOR)/ } 
+        $c->reading_sequence( $c->reading( $app ), $c->reading( $anchor ),
+               $c->baselabel );
+    return @nodes;
+}
 
 sub interpret {
-    # A utility function to change apparatus-ese into a full variant.
-    my( $reading, $lemma ) = @_;
-    return $reading if $reading eq $lemma;
-    my $oldreading = $reading;
-    $lemma =~ s/\s+[[:punct:]]+$//;
-    $reading =~ s/\s*\(?sic([\s\w.]+)?\)?$//;
-    my @words = split( /\s+/, $lemma );
-    if( $reading =~ /^(.*) praem.$/ ) {
-       $reading = "$1 $lemma";
-    } elsif( $reading =~ /^(.*) add.$/ ) {
-        $reading = "$lemma $1";
-    } elsif( $reading eq 'om.' ) {
-       $reading = '';
-    } elsif( $reading eq 'inv.' ) {
-        # Hope it is two words.
-        print STDERR "WARNING: want to invert a lemma that is not two words\n" 
-            unless scalar( @words ) == 2;
-        $reading = join( ' ', reverse( @words ) );
-    } elsif( $reading eq 'iter.' ) {
-        # Repeat the lemma
-       $reading = "$lemma $lemma";
-     } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
-        # The first and last N words captured should replace the first and
-        # last N words of the lemma.
-       my @begin = split( /\s+/, $1 );
-       my @end = split( /\s+/, $2 );
-       if( scalar( @begin ) + scalar ( @end ) > scalar( @words ) ) {
-            # Something is wrong and we can't do the splice.
-           print STDERR "ERROR: $lemma is too short to accommodate $oldreading\n";
-       } else {
-           splice( @words, 0, scalar @begin, @begin );
-           splice( @words, -(scalar @end), scalar @end, @end );
-           $reading = join( ' ', @words );
+       # A utility function to change apparatus-ese into a full variant.
+       my( $reading, $lemma ) = @_;
+       return $reading if $reading eq $lemma;
+       my $oldreading = $reading;
+       # $lemma =~ s/\s+[[:punct:]]+$//;
+       my $flag;  # In case of p.c. indications
+       my @words = split( /\s+/, $lemma );
+       if( $reading =~ /^(.*) praem.$/ ) {
+               $reading = "$1 $lemma";
+       } elsif( $reading =~ /^(.*) add.$/ ) {
+               $reading = "$lemma $1";
+       } elsif( $reading =~ /add. alia manu/
+               || $reading =~ /inscriptionem compegi e/ # TODO huh?
+               || $reading eq 'inc.'  # TODO huh?
+               ) {
+               # Ignore it.
+               $reading = $lemma;
+       } elsif( $reading =~ /locus [uv]acuus/
+           || $reading eq 'def.'
+           || $reading eq 'illeg.'
+           || $reading eq 'onleesbar'
+           ) {
+               $reading = '#LACUNA#';
+       } elsif( $reading eq 'om.' ) {
+               $reading = '';
+       } elsif( $reading =~ /^in[uv]\.$/ 
+                        || $reading eq 'transp.' ) {
+               # Hope it is two words.
+               print STDERR "WARNING: want to invert a lemma that is not two words\n" 
+                       unless scalar( @words ) == 2;
+               $reading = join( ' ', reverse( @words ) );
+       } elsif( $reading =~ /^iter(\.|at)$/ ) {
+               # Repeat the lemma
+               $reading = "$lemma $lemma";
+       } elsif( $reading eq 'in marg.' ) {
+               # There was nothing before a correction.
+               $reading = '';
+               $flag = '_ac';
+       } elsif( $reading =~ /^(.*?)\s*\(?sic([\s\w.]+)?\)?$/ ) {
+               # Discard any 'sic' notation; indeed, indeed.
+               $reading = $1;
+       } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
+               # The first and last N words captured should replace the first and
+               # last N words of the lemma.
+               my @begin = split( /\s+/, $1 );
+               my @end = split( /\s+/, $2 );
+               if( scalar( @begin ) + scalar ( @end ) > scalar( @words ) ) {
+                       # Something is wrong and we can't do the splice.
+                       print STDERR "ERROR: $lemma is too short to accommodate $oldreading\n";
+               } else {
+                       splice( @words, 0, scalar @begin, @begin );
+                       splice( @words, -(scalar @end), scalar @end, @end );
+                       $reading = join( ' ', @words );
+               }
        }
+       if( $oldreading ne $reading || $flag || $oldreading =~ /\./ ) {
+               my $int = $reading;
+               $int .= " ($flag)" if $flag;
+               print STDERR "Interpreted $oldreading as $int given $lemma\n";
+       }
+       return( $reading, $flag );
+}
+
+sub _parse_wit_detail {
+    my( $detail, $readings, $lemma ) = @_;
+    my $wit = $detail->getAttribute( 'wit' );
+    my $content = $detail->textContent;
+    if( $content =~ /a\.\s*c\./ ) {
+        # Replace the key in the $readings hash
+        my $rdg = delete $readings->{$wit};
+        $readings->{$wit.'_ac'} = $rdg;
+        $has_ac{$sigil_for{$wit}} = 1;
+    } elsif( $content =~ /p\.\s*c\./ ) {
+        # If no key for the wit a.c. exists, add one pointing to the lemma
+        unless( exists $readings->{$wit.'_ac'} ) {
+            $readings->{$wit.'_ac'} = $lemma;
+        }
+        $has_ac{$sigil_for{$wit}} = 1;
+    } # else don't bother just yet
+}
+
+sub get_sigil {
+    my( $xml_id, $c ) = @_;
+    if( $xml_id =~ /^(.*)_ac$/ ) {
+        my $real_id = $1;
+        return $sigil_for{$real_id} . $c->ac_label;
+    } else {
+        return $sigil_for{$xml_id};
+    }
+}
+
+sub expand_all_paths { 
+    my( $c ) = @_;
+    
+    # Walk the collation and fish out the paths for each witness
+    foreach my $wit ( $c->tradition->witnesses ) {
+        my $sig = $wit->sigil;
+        my @path = grep { !$_->is_ph } 
+            $c->reading_sequence( $c->start, $c->end, $sig );
+        $wit->path( \@path );
+        if( $has_ac{$sig} ) {
+            my @ac_path = grep { !$_->is_ph } 
+                $c->reading_sequence( $c->start, $c->end, $sig.$c->ac_label );
+            $wit->uncorrected_path( \@ac_path );
+        }
+    }   
+    
+    # Delete the anchors
+    foreach my $anchor ( grep { $_->is_ph } $c->readings ) {
+        $c->del_reading( $anchor );
+    }
+    # Delete the base edges
+    map { $c->del_path( $_, $c->baselabel ) } $c->paths;
+    
+    # Make the path edges
+    $c->make_witness_paths();
+}
+
+sub _add_wit_path {
+    my( $c, $rdg, $app, $anchor, $wit ) = @_;
+    my @nodes = @$rdg;
+    push( @nodes, $c->reading( $anchor ) );
+    
+    my $cur = $c->reading( $app );
+    foreach my $n ( @nodes ) {
+        $c->add_path( $cur, $n, $wit );
+        $cur = $n;
     }
-    print STDERR "Interpreted $oldreading as $reading given $lemma\n";
-    return $reading;
 }
 
 =back