From: Tara L Andrews Date: Wed, 1 Feb 2012 09:46:23 +0000 (+0100) Subject: detect and mark common readings X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d4b75f4494467600ed48a036d3805d5cfb418394;p=scpubgit%2Fstemmatology.git detect and mark common readings --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 9f40e07..4370f78 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -543,9 +543,13 @@ sub as_dot { $used{$reading->id} = 1; # Need not output nodes without separate labels next if $reading->id eq $reading->text; + my $rattrs; my $label = $reading->text; $label =~ s/\"/\\\"/g; - $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label ); + $rattrs->{'label'} = $label; + # TODO make this an option? + # $rattrs->{'fillcolor'} = 'green' if $reading->is_common; + $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) ); } # Add the real edges @@ -1088,6 +1092,19 @@ sub _witnesses_of_label { return @answer; } +=head2 common_readings + +Returns the list of common readings in the graph (i.e. those readings that are +shared by all non-lacunose witnesses.) + +=cut + +sub common_readings { + my $self = shift; + my @common = grep { $_->is_common } $self->readings; + return @common; +} + =head2 path_text( $sigil, $mainsigil [, $start, $end ] ) Returns the text of a witness (plus its backup, if we are using a layer) @@ -1281,6 +1298,58 @@ sub flatten_ranks { } } +=head2 calculate_common_readings + +Goes through the graph identifying the readings that appear in every witness +(apart from those with lacunae at that spot.) Marks them as common and returns +the list. + +=begin testing + +use Text::Tradition; + +my $cxfile = 't/data/Collatex-16.xml'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'CollateX', + 'file' => $cxfile, + ); +my $c = $t->collation; + +my @common = $c->calculate_common_readings(); +is( scalar @common, 8, "Found correct number of common readings" ); +my @marked = sort $c->common_readings(); +is( scalar @common, 8, "All common readings got marked as such" ); +my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /; +is_deeply( \@marked, \@expected, "Found correct list of common readings" ); + +=end testing + +=cut + +sub calculate_common_readings { + my $self = shift; + my @common; + my $table = $self->make_alignment_table( 1 ); + foreach my $idx ( 0 .. $table->{'length'} - 1 ) { + my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}}; + my %hash; + foreach my $r ( @row ) { + if( $r ) { + $hash{$r->id} = $r unless $r->is_meta; + } else { + $hash{'UNDEF'} = $r; + } + } + if( keys %hash == 1 && !exists $hash{'UNDEF'} ) { + my( $r ) = values %hash; + $r->is_common( 1 ); + push( @common, $r ); + } + } + return @common; +} + =head2 text_from_paths Calculate the text array for all witnesses from the path, for later consistency diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index 746fd5f..1d700a6 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -105,6 +105,12 @@ has 'is_ph' => ( isa => 'Bool', default => undef, ); + +has 'is_common' => ( + is => 'rw', + isa => 'Bool', + default => undef, + ); has 'rank' => ( is => 'rw', diff --git a/t/text_tradition_collation.t b/t/text_tradition_collation.t index 322cc39..38b698b 100644 --- a/t/text_tradition_collation.t +++ b/t/text_tradition_collation.t @@ -56,6 +56,28 @@ my $t = Text::Tradition->new( ); my $c = $t->collation; +my @common = $c->calculate_common_readings(); +is( scalar @common, 8, "Found correct number of common readings" ); +my @marked = sort $c->common_readings(); +is( scalar @common, 8, "All common readings got marked as such" ); +my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /; +is_deeply( \@marked, \@expected, "Found correct list of common readings" ); +} + + + +# =begin testing +{ +use Text::Tradition; + +my $cxfile = 't/data/Collatex-16.xml'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'CollateX', + 'file' => $cxfile, + ); +my $c = $t->collation; + is( $c->common_predecessor( 'n9', 'n23' )->id, 'n20', "Found correct common predecessor" ); is( $c->common_successor( 'n9', 'n23' )->id,