X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FCTE.pm;h=e706906b6a076c248007a0b6a08e15488344edb6;hb=4633f9e4c01ef98b3bf0a2851886c3e301071b27;hp=b21da1b3600d06b4187975722ceec6fadb02f0be;hpb=f6e19c7c687a367623a47a7991c10424ac378d02;p=scpubgit%2Fstemmatology.git

diff --git a/lib/Text/Tradition/Parser/CTE.pm b/lib/Text/Tradition/Parser/CTE.pm
index b21da1b..e706906 100644
--- a/lib/Text/Tradition/Parser/CTE.pm
+++ b/lib/Text/Tradition/Parser/CTE.pm
@@ -85,15 +85,14 @@ sub parse {
 	foreach my $item ( @base_text ) {
 	    my $r;
         if( $item->{'type'} eq 'token' ) {
-            $r = $c->add_reading( 'n'.$counter++ );
-            $r->text( $item->{'content'} );
+            $r = $c->add_reading( { id => 'n'.$counter++, 
+            						text => $item->{'content'} } );
         } elsif ( $item->{'type'} eq 'anchor' ) {
-            $r = $c->add_reading( '#ANCHOR_' . $item->{'content'} . '#' );
-            $r->is_meta(1);
+            $r = $c->add_reading( { id => '#ANCHOR_' . $item->{'content'} . '#', 
+            						is_ph => 1 } );
         } elsif ( $item->{'type'} eq 'app' ) {
             my $tag = '#APP_' . $counter++ . '#';
-            $r = $c->add_reading( $tag );
-            $r->is_meta(1);
+            $r = $c->add_reading( { id => $tag, is_ph => 1 } );
             $apps{$tag} = $item->{'content'};
         }
         $c->add_path( $last, $r, $c->baselabel );
@@ -111,6 +110,10 @@ sub parse {
     # Finally, add explicit witness paths, remove the base paths, and remove
     # the app/anchor tags.
     expand_all_paths( $c );
+
+    # Save the text for each witness so that we can ensure consistency
+    # later on
+	$tradition->collation->text_from_paths();	
 }
 
 sub _stringify_sigil {
@@ -132,8 +135,6 @@ sub _get_base {
 		my $str = $xn->data;
 		$str =~ s/^\s+//;
 		foreach my $w ( split( /\s+/, $str ) ) {
-		    # HACK to cope with mismatched doublequotes
-		    $w =~ s/\"//g;
 			push( @readings, { 'type' => 'token', 'content' => $w } );
 		}
 	} elsif( $xn->nodeName eq 'hi' ) {
@@ -169,29 +170,39 @@ sub _add_readings {
     my $ctr = 0;
     my $tag = $app_id;
     $tag =~ s/^\#APP_(.*)\#$/$1/;
-    $DB::single = 1 if $tag < 2;
     foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) {
         my @text;
         foreach ( $rdg->childNodes ) {
             push( @text, _get_base( $_ ) );
         }
-        my $interpreted = @text 
-            ? interpret( join( ' ', map { $_->{'content'} } @text ), $lemma_str ) 
-            : '';
-        my @rdg_nodes;
-        foreach my $w ( split( /\s+/, $interpreted ) ) {
-            my $r = $c->add_reading( $tag . "/" . $ctr++ );
-            $r->text( $w );
-            push( @rdg_nodes, $r );
+        my( $interpreted, $flag ) = ( '', undef );
+        if( @text ) {
+        	( $interpreted, $flag ) = interpret( 
+        		join( ' ', map { $_->{'content'} } @text ), $lemma_str );
         }
+        next if( $interpreted eq $lemma_str ) && !$flag;  # Reading is lemma.
         
+        my @rdg_nodes;
+        if( $interpreted eq '#LACUNA#' ) {
+        	push( @rdg_nodes, $c->add_reading( { id => $tag . "/" . $ctr++,
+        										 is_lacuna => 1 } ) );
+        } else {
+			foreach my $w ( split( /\s+/, $interpreted ) ) {
+				my $r = $c->add_reading( { id => $tag . "/" . $ctr++,
+										   text => $w } );
+				push( @rdg_nodes, $r );
+			}
+        }
         # For each listed wit, save the reading.
         foreach my $wit ( split( /\s+/, $rdg->getAttribute( 'wit' ) ) ) {
+			$wit .= $flag if $flag;
             $wit_rdgs{$wit} = \@rdg_nodes;
         }
+        		
         # Does the reading have an ID? If so it probably has a witDetail
         # attached, and we need to read it.
         if( $rdg->hasAttribute( 'xml:id' ) ) {
+        	warn "Witdetail on meta reading" if $flag; # this could get complicated.
             my $rid = $rdg->getAttribute( 'xml:id' );
             my $xpc = XML::LibXML::XPathContext->new( $xn );
             my @details = $xpc->findnodes( './witDetail[@target="'.$rid.'"]' );
@@ -202,8 +213,8 @@ sub _add_readings {
     }       
         
     # Now collate the variant readings, since it is not done for us.
-    collate_variants( $c, \@lemma, values %wit_rdgs );    
-    
+    collate_variants( $c, \@lemma, values %wit_rdgs );
+        
     # Now add the witness paths for each reading.
     foreach my $wit_id ( keys %wit_rdgs ) {
         my $witstr = get_sigil( $wit_id, $c );
@@ -220,10 +231,9 @@ sub _anchor_name {
 
 sub _return_lemma {
     my( $c, $app, $anchor ) = @_;
-    my $app_node = $c->graph->node( $app );
-    my $anchor_node = $c->graph->node( $anchor );
-    my @nodes = grep { $_->name !~ /^\#A(PP|NCHOR)/ } 
-        $c->reading_sequence( $app_node, $anchor_node, $c->baselabel );
+    my @nodes = grep { $_->id !~ /^\#A(PP|NCHOR)/ } 
+        $c->reading_sequence( $c->reading( $app ), $c->reading( $anchor ),
+        	$c->baselabel );
     return @nodes;
 }
 
@@ -234,26 +244,37 @@ sub interpret {
 	my $oldreading = $reading;
 	# $lemma =~ s/\s+[[:punct:]]+$//;
 	# $reading =~ s/\s*\(?sic([\s\w.]+)?\)?$//;
+	my $flag;  # In case of p.c. indications
 	my @words = split( /\s+/, $lemma );
 	if( $reading =~ /^(.*) praem.$/ ) {
 		$reading = "$1 $lemma";
 	} elsif( $reading =~ /^(.*) add.$/ ) {
 		$reading = "$lemma $1";
-	} elsif( $reading eq 'om.' 
-	    || $reading =~ /locus [uv]acuus/
-	    || $reading =~ /inscriptionem compegi e/ # TODO huh?
-	    || $reading eq 'def.' # TODO huh?
+	} elsif( $reading =~ /add. alia manu/
+		|| $reading =~ /inscriptionem compegi e/ # TODO huh?
+		|| $reading eq 'inc.'  # TODO huh?
+ 		) {
+		# Ignore it.
+		$reading = $lemma;
+	} elsif( $reading =~ /locus [uv]acuus/
+	    || $reading eq 'def.'
 	    ) {
+		$reading = '#LACUNA#';
+	} elsif( $reading eq 'om.' ) {
 		$reading = '';
-	} elsif( $reading eq 'inv.' ) {
+	} elsif( $reading =~ /^in[uv]\.$/ ) {
 		# Hope it is two words.
 		print STDERR "WARNING: want to invert a lemma that is not two words\n" 
 			unless scalar( @words ) == 2;
 		$reading = join( ' ', reverse( @words ) );
-	} elsif( $reading eq 'iter.' ) {
+	} elsif( $reading =~ /^iter(\.|at)$/ ) {
 		# Repeat the lemma
 		$reading = "$lemma $lemma";
-	 } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
+	} elsif( $reading eq 'in marg.' ) {
+		# There was nothing before a correction.
+		$reading = '';
+		$flag = '_ac';
+	} elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
 		# The first and last N words captured should replace the first and
 		# last N words of the lemma.
 		my @begin = split( /\s+/, $1 );
@@ -267,8 +288,12 @@ sub interpret {
 			$reading = join( ' ', @words );
 		}
 	}
-	print STDERR "Interpreted $oldreading as $reading given $lemma\n";
-	return $reading;
+	if( $oldreading ne $reading || $flag || $oldreading =~ /\./ ) {
+		my $int = $reading;
+		$int .= " ($flag)" if $flag;
+		print STDERR "Interpreted $oldreading as $int given $lemma\n";
+	}
+	return( $reading, $flag );
 }
 
 sub _parse_wit_detail {
@@ -305,22 +330,22 @@ sub expand_all_paths {
     # Walk the collation and fish out the paths for each witness
     foreach my $wit ( $c->tradition->witnesses ) {
         my $sig = $wit->sigil;
-        my @path = grep { $_->name !~ /(APP|ANCHOR)/ } 
+        my @path = grep { !$_->is_ph } 
             $c->reading_sequence( $c->start, $c->end, $sig );
         $wit->path( \@path );
         if( $has_ac{$sig} ) {
-            my @ac_path = grep { $_->name !~ /(APP|ANCHOR)/ } 
-                $c->reading_sequence( $c->start, $c->end, $sig.$c->ac_label, $sig );
+            my @ac_path = grep { !$_->is_ph } 
+                $c->reading_sequence( $c->start, $c->end, $sig.$c->ac_label );
             $wit->uncorrected_path( \@ac_path );
         }
     }   
     
     # Delete the anchors
-    foreach my $anchor ( grep { $_->name =~ /(APP|ANCHOR)/ } $c->readings ) {
+    foreach my $anchor ( grep { $_->is_ph } $c->readings ) {
         $c->del_reading( $anchor );
     }
-    # Delete all edges
-    map { $c->del_path( $_ ) } $c->paths;
+    # Delete the base edges
+    map { $c->del_path( $_, $c->baselabel ) } $c->paths;
     
     # Make the path edges
     $c->make_witness_paths();