X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FJSON.pm;h=0734856d361c3e157146532e61083eac6388756d;hb=HEAD;hp=913a21b1cef49b22a331f35f157e0e961d65d596;hpb=30f0df340a2e9ca60316a1175f09f7388c113289;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/JSON.pm b/lib/Text/Tradition/Parser/JSON.pm index 913a21b..0734856 100644 --- a/lib/Text/Tradition/Parser/JSON.pm +++ b/lib/Text/Tradition/Parser/JSON.pm @@ -76,6 +76,24 @@ if( $t ) { is( scalar $t->witnesses, 3, "Collation has all witnesses" ); } +my %seen_wits; +map { $seen_wits{$_} = 0 } qw/ A B C /; +# Check that we have the right witnesses +foreach my $wit ( $t->witnesses ) { + $seen_wits{$wit->sigil} = 1; +} +is( scalar keys %seen_wits, 3, "No extra witnesses were made" ); +foreach my $k ( keys %seen_wits ) { + ok( $seen_wits{$k}, "Witness $k still exists" ); +} + +# Check that the witnesses have the right texts +foreach my $wit ( $t->witnesses ) { + my $origtext = join( ' ', @{$wit->text} ); + my $graphtext = $t->collation->path_text( $wit->sigil ); + is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil ); +} + =end testing =cut @@ -90,14 +108,28 @@ sub parse { my @witnesses; # Keep the ordered list of our witnesses my %ac_wits; # Track these for later removal foreach my $sigil ( map { $_->{'witness'} } @{$table->{'alignment'}} ) { - my $wit = $tradition->add_witness( 'sigil' => $sigil ); + my $wit = $tradition->add_witness( + 'sigil' => $sigil, 'sourcetype' => 'collation' ); $wit->path( [ $c->start ] ); push( @witnesses, $wit ); my $aclabel = $c->ac_label; if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) { - $ac_wits{$1} = $wit; + $ac_wits{$sigil} = $1; } } + + # Save the original witness text for consistency checking. We do this + # in a separate loop to make sure we have all base witnesses defined, + # and to make sure that our munging and comparing later doesn't affect + # the original text. + foreach my $intext ( @{$table->{'alignment'}} ) { + my $rs = $intext->{'witness'}; + my $is_layer = exists $ac_wits{$rs}; + my $wit = $tradition->witness( $is_layer ? $ac_wits{$rs} : $rs ); + my @tokens = grep { $_ && $_->{'t'} !~ /^\#.*\#$/ } @{$intext->{'tokens'}}; + my @words = map { _restore_punct( $_ ) } @tokens; + $is_layer ? $wit->layertext( \@words ) : $wit->text( \@words ); + } # Create the readings in each row my $length = exists $table->{'length'} @@ -138,9 +170,9 @@ sub parse { # Fold any a.c. witnesses into their main witness objects, and # delete the independent a.c. versions. foreach my $a ( keys %ac_wits ) { - my $main_wit = $tradition->witness( $a ); + my $ac_wit = $tradition->witness( $a ); + my $main_wit = $tradition->witness( $ac_wits{$a} ); next unless $main_wit; - my $ac_wit = $ac_wits{$a}; $main_wit->uncorrected_path( $ac_wit->path ); $tradition->del_witness( $ac_wit ); } @@ -151,6 +183,9 @@ sub parse { foreach my $rdg ( grep { $_->is_lacuna } $c->readings ) { $c->del_reading( $rdg ) unless $c->reading_witnesses( $rdg ); } + + # Note that our ranks and common readings are set. + $c->_graphcalc_done(1); } =head2 make_nodes( $collation, $index, @tokenlist ) @@ -164,29 +199,54 @@ sub make_nodes { my( $c, $idx, @tokens ) = @_; my %unique; my @readings; + my $commonctr = 0; foreach my $j ( 0 .. $#tokens ) { if( $tokens[$j] ) { - my $t = $tokens[$j]; + my $word = _restore_punct( $tokens[$j] ); my $rdg; - if( exists( $unique{$t->{'t'}} ) ) { - $rdg = $unique{$t->{'t'}}; + if( exists( $unique{$word} ) ) { + $rdg = $unique{$word}; } else { - my %args = ( 'id' => join( ',', $idx, $j+1 ), - 'json' => $t, + my %args = ( 'id' => 'r' . join( '.', $idx, $j+1 ), + 'rank' => $idx, + 'text' => $word, 'collation' => $c ); - $args{'is_lacuna'} = 1 if $t->{'t'} eq '#LACUNA#'; + if( $word eq '#LACUNA#' ) { + $args{'is_lacuna'} = 1 + } else { + $commonctr++; + } $rdg = Text::Tradition::Collation::Reading->new( %args ); - $unique{$t->{'t'}} = $rdg; + $unique{$word} = $rdg; } push( @readings, $rdg ); } else { + $commonctr++; push( @readings, undef ); } } + if( $commonctr == 1 ) { + # Whichever reading isn't a lacuna is a common node. + foreach my $rdg ( values %unique ) { + next if $rdg->is_lacuna; + $rdg->is_common( 1 ); + } + } map { $c->add_reading( $_ ) } values( %unique ); return @readings; } +# Utility function for parsing JSON from nCritic +sub _restore_punct { + my( $token ) = @_; + my $word = $token->{'t'}; + return $word unless exists $token->{'punctuation'}; + foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @{$token->{'punctuation'}} ) { + substr( $word, $p->{pos}, 0, $p->{char} ); + } + return $word; +} + 1; =head1 LICENSE