From: Tara L Andrews Date: Wed, 14 Mar 2012 14:36:16 +0000 (+0100) Subject: CollateX format for GraphML output changed; parser update X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=679f17e1a60a81370df8cbb49b94a2b5d19e3a98;p=scpubgit%2Fstemmatology.git CollateX format for GraphML output changed; parser update --- diff --git a/lib/Text/Tradition/Analysis.pm b/lib/Text/Tradition/Analysis.pm index 8bfaf1a..fda5137 100644 --- a/lib/Text/Tradition/Analysis.pm +++ b/lib/Text/Tradition/Analysis.pm @@ -56,6 +56,8 @@ is 0 (i.e. the first). =item * merge_types - Specify a list of relationship types, where related readings should be treated as identical for the purposes of analysis. +=item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants. + =back =begin testing @@ -150,7 +152,13 @@ sub run_analysis { my %lacunae; foreach my $rank ( @ranks ) { my $missing = [ @lacunose ]; - push( @groups, group_variants( $tradition, $rank, $missing, \@collapse ) ); + my $rankgroup = group_variants( $tradition, $rank, $missing, \@collapse ); + if( $opts{'exclude_type1'} ) { + # Check to see whether this is a "useful" group. + my( $rdgs, $grps ) = _useful_variant( $rankgroup ); + next unless @$rdgs; + } + push( @groups, $rankgroup ); $lacunae{$rank} = $missing; } $DB::single = 1; @@ -187,8 +195,8 @@ relationships in @merge_relationship_types as equivalent. $lacunose should be a reference to an array, to which the sigla of lacunose witnesses at this rank will be appended. -Returns two ordered lists $readings, $groups, where $readings->[$n] is attested -by the witnesses listed in $groups->[$n]. +Returns a hash $group_readings where $rdg is attested by the witnesses listed +in $group_readings->{$rdg}. =cut diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 7c22786..68b2adf 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -338,15 +338,15 @@ $c->flatten_ranks(); ok( $c->reading( 'n21p0' ), "New reading exists" ); is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" ); -# Combine n3 and n4 +# Combine n3 and n4 ( with his ) $c->merge_readings( 'n3', 'n4', 1 ); ok( !$c->reading('n4'), "Reading n4 is gone" ); is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" ); -# Collapse n25 and n26 -$c->merge_readings( 'n25', 'n26' ); -ok( !$c->reading('n26'), "Reading n26 is gone" ); -is( $c->reading('n25')->text, 'rood', "Reading n25 has an unchanged word" ); +# Collapse n9 and n10 ( rood / root ) +$c->merge_readings( 'n9', 'n10' ); +ok( !$c->reading('n10'), "Reading n10 is gone" ); +is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" ); # Combine n21 and n21p0 my $remaining = $c->reading('n21'); @@ -1407,7 +1407,7 @@ ok( $c->has_cached_table, "Alignment table was cached" ); is( $c->alignment_table, $table, "Cached table returned upon second call" ); $c->calculate_ranks; is( $c->alignment_table, $table, "Cached table retained with no rank change" ); -$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } ); +$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } ); isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" ); =end testing @@ -1585,7 +1585,7 @@ my @common = $c->calculate_common_readings(); is( scalar @common, 8, "Found correct number of common readings" ); my @marked = sort $c->common_readings(); is( scalar @common, 8, "All common readings got marked as such" ); -my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /; +my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /; is_deeply( \@marked, \@expected, "Found correct list of common readings" ); =end testing @@ -1672,14 +1672,14 @@ my $t = Text::Tradition->new( ); my $c = $t->collation; -is( $c->common_predecessor( 'n9', 'n23' )->id, +is( $c->common_predecessor( 'n24', 'n23' )->id, 'n20', "Found correct common predecessor" ); -is( $c->common_successor( 'n9', 'n23' )->id, +is( $c->common_successor( 'n24', 'n23' )->id, '#END#', "Found correct common successor" ); is( $c->common_predecessor( 'n19', 'n17' )->id, 'n16', "Found correct common predecessor for readings on same path" ); -is( $c->common_successor( 'n21', 'n26' )->id, +is( $c->common_successor( 'n21', 'n10' )->id, '#END#', "Found correct common successor for readings on same path" ); =end testing diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index f7d4f04..55d6943 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -41,12 +41,12 @@ my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } ); is( scalar @v1, 1, "Added a single relationship" ); is( $v1[0]->[0], 'n21', "Got correct node 1" ); is( $v1[0]->[1], 'n22', "Got correct node 2" ); -my @v2 = $c->add_relationship( 'n9', 'n23', +my @v2 = $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling', 'scope' => 'global' } ); is( scalar @v2, 2, "Added a global relationship with two instances" ); @v1 = $c->del_relationship( 'n22', 'n21' ); is( scalar @v1, 1, "Deleted first relationship" ); -@v2 = $c->del_relationship( 'n8', 'n13' ); +@v2 = $c->del_relationship( 'n12', 'n13' ); is( scalar @v2, 2, "Deleted second global relationship" ); my @v3 = $c->del_relationship( 'n1', 'n2' ); is( scalar @v3, 0, "Nothing deleted on non-existent relationship" ); diff --git a/lib/Text/Tradition/Parser/CollateX.pm b/lib/Text/Tradition/Parser/CollateX.pm index 7191f7e..68948d5 100644 --- a/lib/Text/Tradition/Parser/CollateX.pm +++ b/lib/Text/Tradition/Parser/CollateX.pm @@ -56,7 +56,7 @@ my $t = Text::Tradition->new( 'file' => $cxfile, ); -is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); +is( ref( $t ), 'Text::Tradition', "Parsed a CollateX input" ); if( $t ) { is( scalar $t->collation->readings, 26, "Collation has all readings" ); is( scalar $t->collation->paths, 32, "Collation has all paths" ); @@ -66,7 +66,7 @@ if( $t ) { my $transposed = $t->collation->reading( 'n15' ); my @related = $transposed->related_readings; is( scalar @related, 1, "Reading links to transposed version" ); - is( $related[0]->id, 'n17', "Correct transposition link" ); + is( $related[0]->id, 'n18', "Correct transposition link" ); } =end testing @@ -74,8 +74,9 @@ if( $t ) { =cut my $IDKEY = 'number'; -my $CONTENTKEY = 'token'; -my $TRANSKEY = 'identical'; +my $CONTENTKEY = 'tokens'; +my $EDGETYPEKEY = 'type'; +my $WITKEY = 'witnesses'; sub parse { my( $tradition, $opts ) = @_; @@ -83,78 +84,58 @@ sub parse { my $collation = $tradition->collation; # First add the readings to the graph. - my $extra_data = {}; # Keep track of info to be processed after all - # nodes have been created + ## Assume the start node has no text and id 0, and the end node has + ## no text and ID [number of nodes] - 1. + my $endnode = scalar @{$graph_data->{'nodes'}} - 1; foreach my $n ( @{$graph_data->{'nodes'}} ) { unless( defined $n->{$IDKEY} && defined $n->{$CONTENTKEY} ) { - warn "Did not find an ID or token for graph node, can't add it"; + if( defined $n->{$IDKEY} && $n->{$IDKEY} == 0 ) { + # It's the start node. + $n->{$IDKEY} = $collation->start->id; + } elsif ( defined $n->{$IDKEY} && $n->{$IDKEY} == $endnode ) { + # It's the end node. + $n->{$IDKEY} = $collation->end->id; + } else { + # Something is probably wrong. + warn "Did not find an ID or token for graph node, can't add it"; + } next; } - my %node_data = %$n; + # Node ID should be an XML name, so prepend an 'n' if necessary. + if( $n->{$IDKEY} =~ /^\d/ ) { + $n->{$IDKEY} = 'n' . $n->{$IDKEY}; + } + # Create the reading. my $gnode_args = { - 'id' => delete $node_data{$IDKEY}, - 'text' => delete $node_data{$CONTENTKEY}, + 'id' => $n->{$IDKEY}, + 'text' => $n->{$CONTENTKEY}, }; my $gnode = $collation->add_reading( $gnode_args ); - - # Whatever is left is extra info to be processed later, - # e.g. a transposition link. - if( keys %node_data ) { - $extra_data->{$gnode->id} = \%node_data; - } } # Now add the path edges. foreach my $e ( @{$graph_data->{'edges'}} ) { - my %edge_data = %$e; - my $from = delete $edge_data{'source'}; - my $to = delete $edge_data{'target'}; - - # In CollateX, we have a distinct witness data ID per witness, - # so that we can have multiple witnesses per edge. We want to - # translate this to one witness per edge in our own - # representation. - foreach my $ekey ( keys %edge_data ) { - my $wit = $edge_data{$ekey}; - # Create the witness object if it does not yet exist. - unless( $tradition->witness( $wit ) ) { - $tradition->add_witness( 'sigil' => $wit ); - } - $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $wit ); + my $from = $e->{'source'}; + my $to = $e->{'target'}; + + ## Edge data keys are ID (which we don't need), witnesses, and type. + ## Type can be 'path' or 'relationship'; + ## witnesses is a comma-separated list. + if( $e->{$EDGETYPEKEY} eq 'path' ) { + ## Add the path for each witness listesd. + # Create the witness objects if they does not yet exist. + foreach my $wit ( split( /, /, $e->{$WITKEY} ) ) { + unless( $tradition->witness( $wit ) ) { + $tradition->add_witness( 'sigil' => $wit ); + } + $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $wit ); + } + } else { # type 'relationship' + $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, + { 'type' => 'transposition' } ); } } - # Process the extra node data if it exists. - foreach my $nodeid ( keys %$extra_data ) { - my $ed = $extra_data->{$nodeid}; - if( exists $ed->{$TRANSKEY} ) { - my $tn_reading = $collation->reading( $nodeid ); - my $main_reading = $collation->reading( $ed->{$TRANSKEY} ); - if( $collation->linear ) { - $collation->add_relationship( $tn_reading, $main_reading, - { type => 'transposition' } ); - } else { - $collation->merge_readings( $main_reading, $tn_reading ); - } - } # else we don't have any other tags to process yet. - } - - # Find the beginning and end nodes of the graph. The beginning node - # has no incoming edges; the end node has no outgoing edges. - my( $begin_node, $end_node ); - my @starts = $collation->sequence->source_vertices(); - my @ends = $collation->sequence->sink_vertices(); - if( @starts != 1 ) { - warn "Found more or less than one start vertex: @starts"; - } else { - $collation->merge_readings( $collation->start, @starts ); - } - if( @ends != 1 ) { - warn "Found more or less than one end vertex: @ends"; - } else { - $collation->merge_readings( $collation->end, @ends ); - } - # Rank the readings. $collation->calculate_common_readings(); # will implicitly rank diff --git a/stemmaweb/lib/stemmaweb/Controller/Stexaminer.pm b/stemmaweb/lib/stemmaweb/Controller/Stexaminer.pm index 1449ad2..cb9990f 100644 --- a/stemmaweb/lib/stemmaweb/Controller/Stexaminer.pm +++ b/stemmaweb/lib/stemmaweb/Controller/Stexaminer.pm @@ -37,7 +37,7 @@ sub index :Path :Args(1) { $c->stash->{text_title} = $tradition->name; $c->stash->{template} = 'stexaminer.tt'; # TODO Run the analysis as AJAX from the loaded page. - my $t = run_analysis( $tradition ); + my $t = run_analysis( $tradition, 'exclude_type1' => 1 ); # Stringify the reading groups foreach my $loc ( @{$t->{'variants'}} ) { my $mst = wit_stringify( $loc->{'missing'} ); diff --git a/stemmaweb/root/css/relationship.css b/stemmaweb/root/css/relationship.css index 0d85213..e2e2f25 100644 --- a/stemmaweb/root/css/relationship.css +++ b/stemmaweb/root/css/relationship.css @@ -74,6 +74,15 @@ span.apimore { padding-top: 5px; padding-bottom: 5px; } +#loading_overlay { + display: none; + position: absolute; + height: 500px; + width: 89%; + z-index: 2; + opacity: 0.7; + background-color: #c5c5c5; +} #dialog_overlay { display: none; position: absolute; diff --git a/stemmaweb/root/js/relationship.js b/stemmaweb/root/js/relationship.js index 414b78c..64bd041 100644 --- a/stemmaweb/root/js/relationship.js +++ b/stemmaweb/root/js/relationship.js @@ -21,6 +21,8 @@ function getRelationshipURL() { } function svgEnlargementLoaded() { + //Give some visual evidence that we are working + $('#loading_overlay').show(); //Set viewbox widht and height to widht and height of $('#svgenlargement svg'). //This is essential to make sure zooming and panning works properly. $('#svgenlargement ellipse').attr( {stroke:'green', fill:'#b3f36d'} ); @@ -42,6 +44,7 @@ function svgEnlargementLoaded() { //used to calculate min and max zoom level: start_element_height = $("#svgenlargement .node title:contains('#START#')").siblings('ellipse')[0].getBBox().height; add_relations(); + // $('#loading_overlay').hide(); } function add_relations() { diff --git a/stemmaweb/root/src/relate.tt b/stemmaweb/root/src/relate.tt index 72ea8e1..07b9b0a 100644 --- a/stemmaweb/root/src/relate.tt +++ b/stemmaweb/root/src/relate.tt @@ -19,6 +19,7 @@ $(function() {
+
diff --git a/t/data/Collatex-16.xml b/t/data/Collatex-16.xml index 7554174..869d665 100644 --- a/t/data/Collatex-16.xml +++ b/t/data/Collatex-16.xml @@ -1,232 +1,294 @@ - - - - - - - - - + + + + + + + + - # - n0 + 0 + - when - n1 + 1 + when - april - n2 + 2 + april - with - n3 + 3 + with - his - n4 + 4 + his - showers - n5 + 5 + showers - sweet - n6 + 6 + sweet - with - n7 - - - teh - n8 - - - teh - n9 - + 7 + with + + + 8 + april + + + 11 + fruit + + + 12 + the + - april - n11 - n2 + 13 + teh - fruit - n12 + 14 + march - the - n13 + 15 + drought - drought - n14 + 16 + of - march - n15 - n17 + 17 + march - of - n16 + 18 + drought - march - n17 + 19 + has - drought - n18 - n14 + 20 + pierced - has - n19 + 21 + unto - pierced - n20 + 22 + to - unto - n21 + 23 + teh - to - n22 + 24 + the - the - n23 + 9 + rood - - rood - n25 - - - root - n26 + + 10 + root - - # - n27 + + 25 + - A - C - B + 0 + path + A, B, C - A + 1 + path + A - - A + + 2 + path + B, C - - A + + 3 + path + A - - A + + 4 + path + B, C - A - B - C + 5 + path + A, B, C + + + 6 + path + A + + + 7 + path + A, B + + + 8 + path + C + + + 9 + path + A, B, C + + + 10 + path + B, C + + + 11 + path + A - - A - B - C + + 12 + path + B - - C - B + + 13 + path + C - - C - - - C - - - C - B + + 14 + path + A - - A + + 15 + path + A - - C - B + + 16 + path + A, C - - A - B + + 17 + path + B - - A + + 18 + path + A - - B + + 19 + path + A, C - - A - C + + 20 + path + B - - B + + 22 + path + A, C - - A - C + + 23 + path + B - - B + + 24 + path + A, B, C - - A - C + + 25 + path + A - - B + + 26 + path + B - - A - C - B + + 27 + path + C - - A + + 28 + path + A - - B + + 29 + path + B - - A + + 30 + path + C - - B + + 31 + path + A, B - - C + + 32 + path + C - - C + + 33 + path + A, B - - A - B + + 36 + transposition - - A - B + + 37 + transposition - - C + + 38 + transposition - diff --git a/t/stemma.t b/t/stemma.t index 697d0d2..404b686 100644 --- a/t/stemma.t +++ b/t/stemma.t @@ -18,9 +18,9 @@ my $tradition = Text::Tradition->new( ); # Set up some relationships my $c = $tradition->collation; -$c->add_relationship( 'n25', 'n26', { 'type' => 'spelling' } ); -$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } ); -$c->add_relationship( 'n8', 'n13', { 'type' => 'spelling' } ); +$c->add_relationship( 'n23', 'n24', { 'type' => 'spelling' } ); +$c->add_relationship( 'n9', 'n10', { 'type' => 'spelling' } ); +$c->add_relationship( 'n12', 'n13', { 'type' => 'spelling' } ); $c->calculate_ranks(); my $stemma = $tradition->add_stemma( dotfile => 't/data/simple.dot' ); diff --git a/t/text_tradition_analysis.t b/t/text_tradition_analysis.t index 8340d4c..74428ef 100644 --- a/t/text_tradition_analysis.t +++ b/t/text_tradition_analysis.t @@ -55,8 +55,7 @@ foreach my $row ( @{$data->{'variants'}} ) { unless( exists $expected_genealogical{$row->{'id'}} ) { $expected_genealogical{$row->{'id'}} = 1; } - my $gen_bool = $row->{'genealogical'} ? 1 : 0; - is( $gen_bool, $expected_genealogical{$row->{'id'}}, + is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}}, "Got correct genealogical flag for row " . $row->{'id'} ); } is( $data->{'variant_count'}, 58, "Got right total variant number" ); diff --git a/t/text_tradition_collation.t b/t/text_tradition_collation.t index 0dcb081..8c29c02 100644 --- a/t/text_tradition_collation.t +++ b/t/text_tradition_collation.t @@ -30,15 +30,15 @@ $c->flatten_ranks(); ok( $c->reading( 'n21p0' ), "New reading exists" ); is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" ); -# Combine n3 and n4 +# Combine n3 and n4 ( with his ) $c->merge_readings( 'n3', 'n4', 1 ); ok( !$c->reading('n4'), "Reading n4 is gone" ); is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" ); -# Collapse n25 and n26 -$c->merge_readings( 'n25', 'n26' ); -ok( !$c->reading('n26'), "Reading n26 is gone" ); -is( $c->reading('n25')->text, 'rood', "Reading n25 has an unchanged word" ); +# Collapse n9 and n10 ( rood / root ) +$c->merge_readings( 'n9', 'n10' ); +ok( !$c->reading('n10'), "Reading n10 is gone" ); +is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" ); # Combine n21 and n21p0 my $remaining = $c->reading('n21'); @@ -106,7 +106,7 @@ ok( $c->has_cached_table, "Alignment table was cached" ); is( $c->alignment_table, $table, "Cached table returned upon second call" ); $c->calculate_ranks; is( $c->alignment_table, $table, "Cached table retained with no rank change" ); -$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } ); +$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } ); isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" ); } @@ -128,7 +128,7 @@ my @common = $c->calculate_common_readings(); is( scalar @common, 8, "Found correct number of common readings" ); my @marked = sort $c->common_readings(); is( scalar @common, 8, "All common readings got marked as such" ); -my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /; +my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /; is_deeply( \@marked, \@expected, "Found correct list of common readings" ); } @@ -146,14 +146,14 @@ my $t = Text::Tradition->new( ); my $c = $t->collation; -is( $c->common_predecessor( 'n9', 'n23' )->id, +is( $c->common_predecessor( 'n24', 'n23' )->id, 'n20', "Found correct common predecessor" ); -is( $c->common_successor( 'n9', 'n23' )->id, +is( $c->common_successor( 'n24', 'n23' )->id, '#END#', "Found correct common successor" ); is( $c->common_predecessor( 'n19', 'n17' )->id, 'n16', "Found correct common predecessor for readings on same path" ); -is( $c->common_successor( 'n21', 'n26' )->id, +is( $c->common_successor( 'n21', 'n10' )->id, '#END#', "Found correct common successor for readings on same path" ); } diff --git a/t/text_tradition_collation_relationshipstore.t b/t/text_tradition_collation_relationshipstore.t index 9f9ed65..6b9db2b 100644 --- a/t/text_tradition_collation_relationshipstore.t +++ b/t/text_tradition_collation_relationshipstore.t @@ -27,12 +27,12 @@ my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } ); is( scalar @v1, 1, "Added a single relationship" ); is( $v1[0]->[0], 'n21', "Got correct node 1" ); is( $v1[0]->[1], 'n22', "Got correct node 2" ); -my @v2 = $c->add_relationship( 'n9', 'n23', +my @v2 = $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling', 'scope' => 'global' } ); is( scalar @v2, 2, "Added a global relationship with two instances" ); @v1 = $c->del_relationship( 'n22', 'n21' ); is( scalar @v1, 1, "Deleted first relationship" ); -@v2 = $c->del_relationship( 'n8', 'n13' ); +@v2 = $c->del_relationship( 'n12', 'n13' ); is( scalar @v2, 2, "Deleted second global relationship" ); my @v3 = $c->del_relationship( 'n1', 'n2' ); is( scalar @v3, 0, "Nothing deleted on non-existent relationship" ); diff --git a/t/text_tradition_parser_collatex.t b/t/text_tradition_parser_collatex.t index 733cf1b..3b95ecd 100644 --- a/t/text_tradition_parser_collatex.t +++ b/t/text_tradition_parser_collatex.t @@ -20,7 +20,7 @@ my $t = Text::Tradition->new( 'file' => $cxfile, ); -is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); +is( ref( $t ), 'Text::Tradition', "Parsed a CollateX input" ); if( $t ) { is( scalar $t->collation->readings, 26, "Collation has all readings" ); is( scalar $t->collation->paths, 32, "Collation has all paths" ); @@ -30,7 +30,7 @@ if( $t ) { my $transposed = $t->collation->reading( 'n15' ); my @related = $transposed->related_readings; is( scalar @related, 1, "Reading links to transposed version" ); - is( $related[0]->id, 'n17', "Correct transposition link" ); + is( $related[0]->id, 'n18', "Correct transposition link" ); } }