X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FJSON.pm;h=0734856d361c3e157146532e61083eac6388756d;hb=HEAD;hp=913a21b1cef49b22a331f35f157e0e961d65d596;hpb=30f0df340a2e9ca60316a1175f09f7388c113289;p=scpubgit%2Fstemmatology.git

diff --git a/lib/Text/Tradition/Parser/JSON.pm b/lib/Text/Tradition/Parser/JSON.pm
index 913a21b..0734856 100644
--- a/lib/Text/Tradition/Parser/JSON.pm
+++ b/lib/Text/Tradition/Parser/JSON.pm
@@ -76,6 +76,24 @@ if( $t ) {
     is( scalar $t->witnesses, 3, "Collation has all witnesses" );
 }
 
+my %seen_wits;
+map { $seen_wits{$_} = 0 } qw/ A B C /;
+# Check that we have the right witnesses
+foreach my $wit ( $t->witnesses ) {
+	$seen_wits{$wit->sigil} = 1;
+}
+is( scalar keys %seen_wits, 3, "No extra witnesses were made" );
+foreach my $k ( keys %seen_wits ) {
+	ok( $seen_wits{$k}, "Witness $k still exists" );
+}
+
+# Check that the witnesses have the right texts
+foreach my $wit ( $t->witnesses ) {
+	my $origtext = join( ' ', @{$wit->text} );
+	my $graphtext = $t->collation->path_text( $wit->sigil );
+	is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil );
+}
+
 =end testing
 
 =cut
@@ -90,14 +108,28 @@ sub parse {
 	my @witnesses; # Keep the ordered list of our witnesses
     my %ac_wits;  # Track these for later removal
     foreach my $sigil ( map { $_->{'witness'} } @{$table->{'alignment'}} ) {
-        my $wit = $tradition->add_witness( 'sigil' => $sigil );
+        my $wit = $tradition->add_witness( 
+        	'sigil' => $sigil, 'sourcetype' => 'collation' );
         $wit->path( [ $c->start ] );
         push( @witnesses, $wit );
         my $aclabel = $c->ac_label;
         if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) {
-            $ac_wits{$1} = $wit;
+            $ac_wits{$sigil} = $1;
         }
     }
+    
+    # Save the original witness text for consistency checking. We do this
+    # in a separate loop to make sure we have all base witnesses defined,
+    # and to make sure that our munging and comparing later doesn't affect
+    # the original text.
+    foreach my $intext ( @{$table->{'alignment'}} ) {
+    	my $rs = $intext->{'witness'};
+    	my $is_layer = exists $ac_wits{$rs};
+    	my $wit = $tradition->witness( $is_layer ? $ac_wits{$rs} : $rs );
+    	my @tokens = grep { $_ && $_->{'t'} !~ /^\#.*\#$/ } @{$intext->{'tokens'}};
+    	my @words = map { _restore_punct( $_ ) } @tokens;
+    	$is_layer ? $wit->layertext( \@words ) : $wit->text( \@words );
+	}
 
 	# Create the readings in each row
     my $length = exists $table->{'length'}
@@ -138,9 +170,9 @@ sub parse {
     # Fold any a.c. witnesses into their main witness objects, and
     # delete the independent a.c. versions.
     foreach my $a ( keys %ac_wits ) {
-        my $main_wit = $tradition->witness( $a );
+    	my $ac_wit = $tradition->witness( $a );
+        my $main_wit = $tradition->witness( $ac_wits{$a} );
         next unless $main_wit;
-        my $ac_wit = $ac_wits{$a};
         $main_wit->uncorrected_path( $ac_wit->path );
         $tradition->del_witness( $ac_wit );
     }
@@ -151,6 +183,9 @@ sub parse {
 	foreach my $rdg ( grep { $_->is_lacuna } $c->readings ) {
 		$c->del_reading( $rdg ) unless $c->reading_witnesses( $rdg );
 	}
+	
+	# Note that our ranks and common readings are set.
+	$c->_graphcalc_done(1);
 }
 
 =head2 make_nodes( $collation, $index, @tokenlist )
@@ -164,29 +199,54 @@ sub make_nodes {
 	my( $c, $idx, @tokens ) = @_;
 	my %unique;
 	my @readings;
+	my $commonctr = 0;
 	foreach my $j ( 0 .. $#tokens ) {
 		if( $tokens[$j] ) {
-			my $t = $tokens[$j];
+			my $word = _restore_punct( $tokens[$j] );
 			my $rdg;
-			if( exists( $unique{$t->{'t'}} ) ) {
-				$rdg = $unique{$t->{'t'}};
+			if( exists( $unique{$word} ) ) {
+				$rdg = $unique{$word};
 			} else {
-				my %args = ( 'id' => join( ',', $idx, $j+1 ),
-					'json' => $t,
+				my %args = ( 'id' => 'r' . join( '.', $idx, $j+1 ),
+					'rank' => $idx,
+					'text' => $word,
 					'collation' => $c );
-				$args{'is_lacuna'} = 1 if $t->{'t'} eq '#LACUNA#';
+				if( $word eq '#LACUNA#' ) {
+					$args{'is_lacuna'} = 1 
+				} else {
+					$commonctr++;
+				}
 				$rdg = Text::Tradition::Collation::Reading->new( %args );
-				$unique{$t->{'t'}} = $rdg;
+				$unique{$word} = $rdg;
 			}
 			push( @readings, $rdg );
 		} else {
+			$commonctr++;
 			push( @readings, undef );
 		}
 	}
+	if( $commonctr == 1 ) {
+		# Whichever reading isn't a lacuna is a common node.
+		foreach my $rdg ( values %unique ) {
+			next if $rdg->is_lacuna;
+			$rdg->is_common( 1 );
+		}
+	}
 	map { $c->add_reading( $_ ) } values( %unique );
 	return @readings;
 }
 
+# Utility function for parsing JSON from nCritic
+sub _restore_punct {
+	my( $token ) = @_;
+	my $word = $token->{'t'};
+	return $word unless exists $token->{'punctuation'};
+	foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @{$token->{'punctuation'}} ) {
+		substr( $word, $p->{pos}, 0, $p->{char} );
+	}
+	return $word;
+}	
+
 1;
 
 =head1 LICENSE