From: Tara L Andrews Date: Thu, 12 May 2011 13:51:41 +0000 (+0200) Subject: use transposition info from CollateX X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c557b2096054261ecf0530d75371fc45378e127a;p=scpubgit%2Fstemmatology.git use transposition info from CollateX --- diff --git a/lib/Text/Tradition/Graph.pm b/lib/Text/Tradition/Graph.pm index ee90761..1e0f02f 100644 --- a/lib/Text/Tradition/Graph.pm +++ b/lib/Text/Tradition/Graph.pm @@ -426,21 +426,35 @@ Tell the graph that these two nodes contain the same (transposed) reading. =cut sub set_identical_node { - my( $self, $node, $same_node ) = @_; + my( $self, $node, $main_node ) = @_; + + # The identical_nodes hash contains a key per node, and a value + # that is an arrayref to a list of nodes. Those nodes that are + # the same (transposed) node should be keys that point to the same + # arrayref. Each arrayref should contain the name of each node + # that points to it. So basically here we want to merge the + # arrays for the two nodes that are now identical. The 'main' + # node should always be first in the array. + my $pool = $self->{'identical_nodes'}->{ $node }; - my $same_pool = $self->{'identical_nodes'}->{ $same_node }; + my $main_pool = $self->{'identical_nodes'}->{ $main_node }; + my %poolhash; - foreach ( @$pool ) { + foreach ( @$main_pool ) { + # Note which nodes are already in the main pool so that we + # don't re-add them. $poolhash{$_} = 1; } - foreach( @$same_pool ) { - push( @$pool, $_ ) unless $poolhash{$_}; - } - $self->{'identical_nodes'}->{ $same_node } = $pool; + foreach( @$pool ) { + # Add the remaining nodes to the main pool... + push( @$main_pool, $_ ) unless $poolhash{$_}; + } + # ...and set this node to point to the enlarged pool. + $self->{'identical_nodes'}->{ $node } = $main_pool; } -=item B +=item B my @nodes = $graph->identical_nodes( $node ) @@ -488,6 +502,110 @@ sub as_svg { return $svg; } +=item B + +print $graph->as_graphml( $recalculate ) + +Returns a GraphML representation of the collation graph, with +transposition information and position information. Unless +$recalculate is passed (and is a true value), the method will return a +cached copy of the SVG after the first call to the method. + +=cut + +sub as_graphml { + my( $self, $recalc ) = @_; + return $self->{'graphml'} if( exists $self->{'graphml'} && !$recalc ); + + # Some namespaces + my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns'; + my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance'; + my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' . + 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; + + # Create the document and root node + my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); + my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); + $graphml->setDocumentElement( $root ); + $root->setNamespace( $xsi_ns, 'xsi', 0 ); + $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); + + # Add the data keys for nodes + my @node_data = ( 'name', 'token', 'identical', 'position' ); + foreach my $ndi ( 0 .. $#node_data ) { + my $key = $root->addNewChild( $graphml_ns, 'key' ); + $key->setAttribute( 'attr.name', $node_data[$ndi] ); + $key->setAttribute( 'attr.type', 'string' ); + $key->setAttribute( 'for', 'node' ); + $key->setAttribute( 'id', 'd'.$ndi ); + } + + # Add the data keys for edges + my %wit_hash; + my $wit_ctr = 0; + foreach my $wit ( $self->getWitnessList ) { + my $wit_key = 'w' . $wit_ctr++; + $wit_hash{$wit} = $wit_key; + my $key = $root->addNewChild( $graphml_ns, 'key' ); + $key->setAttribute( 'attr.name', $wit ); + $key->setAttribute( 'attr.type', 'string' ); + $key->setAttribute( 'for', 'edge' ); + $key->setAttribute( 'id', $wit_key ); + } + + # Add the graph, its nodes, and its edges + my $graph = $root->addNewChild( $graphml_ns, 'graph' ); + $graph->setAttribute( 'edgedefault', 'directed' ); + $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful + $graph->setAttribute( 'parse.edgeids', 'canonical' ); + $graph->setAttribute( 'parse.edges', $self->edges() ); + $graph->setAttribute( 'parse.nodeids', 'canonical' ); + $graph->setAttribute( 'parse.nodes', $self->nodes() ); + $graph->setAttribute( 'parse.order', 'nodesfirst' ); + + my $node_ctr = 0; + my %node_hash; + foreach my $n ( $self->nodes() ) { + my %this_node_data = (); + foreach my $ndi ( 0 .. $#node_data ) { + my $value; + $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name'; + $this_node_data{'d'.$ndi} = $n->label + if $node_data[$ndi] eq 'token'; + $this_node_data{'d'.$ndi} = $self->primary_node( $n ) + if $node_data[$ndi] eq 'name'; + $this_node_data{'d'.$ndi} = + $self->{'positions'}->node_position( $n ) + if $node_data[$ndi] eq 'position'; + } + my $node_el = $graph->addNewChild( $graphml_ns, 'node' ); + my $node_xmlid = 'n' . $node_ctr++; + $node_hash{ $n->name } = $node_xmlid; + $node_el->setAttribute( 'id', $node_xmlid ); + + foreach my $dk ( keys %this_node_data ) { + my $d_el = $node_el->addNewChild( $graphml_ns, 'data' ); + $d_el->setAttribute( 'key', $dk ); + $d_el->appendTextChild( $this_node_data{$dk} ); + } + } + + foreach my $e ( $self->edges() ) { + my( $name, $from, $to ) = ( $e->name, + $node_hash{ $e->from()->name() }, + $node_hash{ $e->to()->name() } ); + my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' ); + $edge_el->setAttribute( 'source', $from ); + $edge_el->setAttribute( 'target', $to ); + $edge_el->setAttribute( 'id', $name ); + # TODO Got to add the witnesses + } + + # Return the thing + $self->{'graphml'} = $graphml; + return $graphml; +} + =back =head2 Lemmatization methods diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index 19b294c..260e938 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -97,11 +97,11 @@ sub parse { my %node_id = reverse %node_name; ## Record the nodes that are marked as transposed. - my $tr_xpath = '//g:node[g:data[@key="' . $nodedata{'identity'} . '"]]'; + my $tr_xpath = '//g:node[g:data[@key="' . $nodedata{'identical'} . '"]]'; my $transposition_nodes = $xpc->find( $tr_xpath ); foreach my $tn ( @$transposition_nodes ) { my $id_xpath = sprintf( './g:data[@key="%s"]/text()', - $nodedata{'identity'} ); + $nodedata{'identical'} ); $graph->set_identical_node( $node_name{ $tn->getAttribute( 'id' ) }, $node_name{ $xpc->findvalue( $id_xpath, $tn ) } ); @@ -124,8 +124,8 @@ sub parse { unless scalar @bn; $begin_node = $bn[0]; $graph->start( $gnode ); - $node_name{ 0 } = '#START#'; - $node_id{'#START#'} = 0; + $node_name{ $begin_node->getAttribute( 'id' ) } = '#START#'; + $node_id{'#START#'} = $begin_node->getAttribute( 'id' ); } unless( scalar @outgoing ) { warn "Already have an ending node" if $end_node; @@ -149,13 +149,14 @@ sub parse { my $node_id = $begin_node->getAttribute('id'); my @wit_path = ( $node_name{ $node_id } ); # TODO Detect loops at some point - while( $node_id != $end_node->getAttribute('id') ) { + while( $node_id ne $end_node->getAttribute('id') ) { # Find the node which is the target of the edge whose # source is $node_id and applies to this witness. my $xpath_expr = '//g:edge[child::g:data[@key="' . $wit . '"] and attribute::source="' . $node_id . '"]'; my $next_edge = $xpc->find( $xpath_expr, $graph_el )->[0]; + print STDERR " - at $wit / $node_id\n"; $node_id = $next_edge->getAttribute('target'); push( @wit_path, $node_name{ $node_id } ); } diff --git a/t/data/Collatex-16.xml b/t/data/Collatex-16.xml index 2a32a56..d8c4350 100644 --- a/t/data/Collatex-16.xml +++ b/t/data/Collatex-16.xml @@ -1,197 +1,224 @@ - + - + - - + + # - 0 + n0 - + when - 1 + n1 - + april - 2 - 9 - - - with his - 3 - - - showers sweet with - 8 - - + n2 + + + with + n3 + + + his + n4 + + + showers + n5 + + + sweet + n6 + + + with + n7 + + april - 9 - 2 + n11 + n2 - + fruit - 12 + n12 - + the - 13 + n13 - + drought - 14 - 18 + n14 - + march - 15 - 17 + n15 + n17 - + of - 16 + n16 - + march - 17 - 15 + n17 - + drought - 18 - 14 + n18 + n14 - + has - 19 + n19 - + pierced - 20 + n20 - + unto - 21 + n21 - + to - 22 + n22 - + the - 23 - - - root - 24 + n23 - + rood - 26 + n25 - + + root + n26 + + # - 27 + n27 - + A - B C + B - + A - + A - + A - + A - - B + + A C - - B + + + A C + B - + + C B + + C + B - + A - B + + C + B - + A C - - B - + A C - + B - + A C - + B - + A C - + B - + A - B C - - A + + B - + + A + C B - + A - + B - + A + + B - + C - + C - + + A + B + + A B - + C + diff --git a/t/data/collate.svg b/t/data/collate.svg index 55ba73d..a82f51e 100644 --- a/t/data/collate.svg +++ b/t/data/collate.svg @@ -1,254 +1,313 @@ - -]> - - - - -test - - -node_0 - -# - - -node_1 - -when - - -node_0->node_1 - - -A, B, C - - -node_2 - -april with his showers sweet with - - -node_1->node_2 - - -A - - -node_8 - -showers sweet with april - - -node_1->node_8 - - -B, C - - -node_12 - -fruit - - -node_2->node_12 - - -A - - -node_8->node_12 - - -B, C - - -node_13 - -the - - -node_12->node_13 - - -A, B, C - - -node_14 - -drought - - -node_13->node_14 - - -A, C - - -node_15 - -march - - -node_13->node_15 - - -B - - -node_16 - -of - - -node_14->node_16 - - -A, C - - -node_15->node_16 - - -B - - -node_17 - -march - - -node_16->node_17 - - -A, C - - -node_18 - -drought - - -node_16->node_18 - - -B - - -node_19 - -has - - -node_17->node_19 - - -A, C - - -node_18->node_19 - - -B - - -node_20 - -pierced - - -node_19->node_20 - - -A, B, C - - -node_21 - -unto - - -node_20->node_21 - - -A - - -node_22 - -to - - -node_20->node_22 - - -B - - -node_23 - -the - - -node_20->node_23 - - -C - - -node_21->node_23 - - -A - - -node_22->node_23 - - -B - - -node_24 - -root - - -node_23->node_24 - - -A, B - - -node_26 - -rood - - -node_23->node_26 - - -C - - -node_27 - -# - - -node_24->node_27 - - -A, B - - -node_26->node_27 - - -C + + + + + +GRAPH_0 + + +#START# + +# + + +n1 + +when + + +#START#->n1 + + +A, C, B + + +n2 + +april + + +n1->n2 + + +A + + +n5 + +showers + + +n1->n5 + + +C, B + + +n11 + +april + + +n12 + +fruit + + +n11->n12 + + +C, B + + +n13 + +the + + +n12->n13 + + +A, C, B + + +n14 + +drought + + +n13->n14 + + +A, C + + +n15 + +march + + +n13->n15 + + +B + + +n16 + +of + + +n14->n16 + + +A, C + + +n15->n16 + + +B + + +n17 + +march + + +n16->n17 + + +A, C + + +n18 + +drought + + +n16->n18 + + +B + + +n19 + +has + + +n17->n19 + + +A, C + + +n18->n19 + + +B + + +n20 + +pierced + + +n19->n20 + + +A, C, B + + +n3 + +with + + +n2->n3 + + +A + + +n21 + +unto + + +n20->n21 + + +A + + +n22 + +to + + +n20->n22 + + +B + + +n23 + +the + + +n20->n23 + + +C + + +n21->n23 + + +A + + +n22->n23 + + +B + + +n25 + +rood + + +n23->n25 + + +C + + +n26 + +root + + +n23->n26 + + +A, B + + +n27 + +# + + +n25->n27 + + +C + + +n26->n27 + + +A, B + + +n4 + +his + + +n3->n4 + + +A + + +n4->n5 + + +A + + +n6 + +sweet + + +n5->n6 + + +A, C, B + + +n7 + +with + + +n6->n7 + + +A, C, B + + +n7->n11 + + +C, B + + +n7->n12 + + +A diff --git a/t/graph.t b/t/graph.t index 032246a..f579a17 100644 --- a/t/graph.t +++ b/t/graph.t @@ -24,21 +24,22 @@ is( $svg->documentElement->nodeName(), 'svg', 'Got an svg document' ); my $svg_xpc = XML::LibXML::XPathContext->new( $svg->documentElement() ); $svg_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' ); my @svg_nodes = $svg_xpc->findnodes( '//svg:g[@class="node"]' ); -is( scalar @svg_nodes, 21, "Correct number of nodes in the graph" ); +is( scalar @svg_nodes, 24, "Correct number of nodes in the graph" ); # Test for the correct number of edges my @svg_edges = $svg_xpc->findnodes( '//svg:g[@class="edge"]' ); -is( scalar @svg_edges, 27, "Correct number of edges in the graph" ); +is( scalar @svg_edges, 30, "Correct number of edges in the graph" ); # Test for the correct common nodes -my @expected_nodes = map { [ $_, 1 ] } qw/#START# 1 8 12 13 16 19 20 23 27/; -foreach my $idx ( qw/2 3 5 8 10 13 15/ ) { +my @expected_nodes = map { [ $_, 1 ] } qw/ #START# n1 n5 n6 n7 n12 n13 + n16 n19 n20 n23 n27 /; +foreach my $idx ( qw/2 3 4 8 11 13 16 18/ ) { splice( @expected_nodes, $idx, 0, [ "node_null", undef ] ); } my @active_nodes = $graph->active_nodes(); # is_deeply( \@active_nodes, \@expected_nodes, "Initial common points" ); subtest 'Initial common points' => \&compare_active; -my $string = '# when ... ... showers sweet with ... fruit the ... of ... has pierced ... the ... #'; +my $string = '# when ... ... ... showers sweet with ... fruit the ... of ... has pierced ... the ... #'; is( make_text( @active_nodes ), $string, "Got the right starting text" ); sub compare_active { @@ -76,13 +77,14 @@ is( $graph->text_for_witness( "B" ), $wit_b, "Correct path for witness B" ); is( $graph->text_for_witness( "C" ), $wit_c, "Correct path for witness C" ); # Test the transposition identifiers -my $transposition_pools = [ [ 2, 9 ], [ 14, 18 ], [ 15, 17 ] ]; -my $transposed_nodes = { 2 => $transposition_pools->[0], - 9 => $transposition_pools->[0], - 14 => $transposition_pools->[1], - 15 => $transposition_pools->[2], - 17 => $transposition_pools->[2], - 18 => $transposition_pools->[1], +my $transposition_pools = [ [ 'n2', 'n11' ], [ 'n14', 'n18' ], + [ 'n17', 'n15' ] ]; +my $transposed_nodes = { 'n2' => $transposition_pools->[0], + 'n11' => $transposition_pools->[0], + 'n14' => $transposition_pools->[1], + 'n15' => $transposition_pools->[2], + 'n17' => $transposition_pools->[2], + 'n18' => $transposition_pools->[1], }; foreach my $n ( $graph->nodes() ) { $transposed_nodes->{ $n->name() } = [ $n->name() ] @@ -91,84 +93,84 @@ foreach my $n ( $graph->nodes() ) { is_deeply( $graph->{'identical_nodes'}, $transposed_nodes, "Found the right transpositions" ); # Test turning on a node -my @off = $graph->toggle_node( '24' ); -$expected_nodes[ 15 ] = [ "24", 1 ]; +my @off = $graph->toggle_node( 'n25' ); +$expected_nodes[ 18 ] = [ "n25", 1 ]; @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on node for new location' => \&compare_active; -$string = '# when ... ... showers sweet with ... fruit the ... of ... has pierced ... the root #'; +$string = '# when ... ... ... showers sweet with ... fruit the ... of ... has pierced ... the rood #'; is( make_text( @active_nodes ), $string, "Got the right text" ); # Test the toggling effects of same-column -@off = $graph->toggle_node( '26' ); -splice( @expected_nodes, 15, 1, ( [ "24", 0 ], [ "26", 1 ] ) ); +@off = $graph->toggle_node( 'n26' ); +splice( @expected_nodes, 18, 1, ( [ "n25", 0 ], [ "n26", 1 ] ) ); @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on other node in that location' => \&compare_active; -$string = '# when ... ... showers sweet with ... fruit the ... of ... has pierced ... the rood #'; +$string = '# when ... ... ... showers sweet with ... fruit the ... of ... has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); # Test the toggling effects of transposition -@off = $graph->toggle_node( '14' ); +@off = $graph->toggle_node( 'n14' ); # Add the turned on node -$expected_nodes[ 8 ] = [ "14", 1 ]; +$expected_nodes[ 11 ] = [ "n14", 1 ]; # Remove the 'off' for the previous node -splice( @expected_nodes, 15, 1 ); +splice( @expected_nodes, 18, 1 ); @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on transposition node' => \&compare_active; -$string = '# when ... ... showers sweet with ... fruit the drought of ... has pierced ... the rood #'; +$string = '# when ... ... ... showers sweet with ... fruit the drought of ... has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); -@off = $graph->toggle_node( '18' ); +@off = $graph->toggle_node( 'n18' ); # Toggle on the new node -$expected_nodes[ 10 ] = [ "18", 1 ]; +$expected_nodes[ 13 ] = [ "n18", 1 ]; # Toggle off the transposed node -$expected_nodes[ 8 ] = [ "14", undef ]; +$expected_nodes[ 11 ] = [ "n14", undef ]; @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on that node\'s partner' => \&compare_active; -$string = '# when ... ... showers sweet with ... fruit the ... of drought has pierced ... the rood #'; +$string = '# when ... ... ... showers sweet with ... fruit the ... of drought has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); -@off = $graph->toggle_node( '14' ); +@off = $graph->toggle_node( 'n14' ); # Toggle on the new node -$expected_nodes[ 8 ] = [ "14", 1 ]; +$expected_nodes[ 11 ] = [ "n14", 1 ]; # Toggle off the transposed node -$expected_nodes[ 10 ] = [ "18", undef ]; +$expected_nodes[ 13 ] = [ "n18", undef ]; @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on the original node' => \&compare_active; -$string = '# when ... ... showers sweet with ... fruit the drought of ... has pierced ... the rood #'; +$string = '# when ... ... ... showers sweet with ... fruit the drought of ... has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); -@off = $graph->toggle_node( '15' ); +@off = $graph->toggle_node( 'n15' ); # Toggle on the new node, and off with the old -splice( @expected_nodes, 8, 1, [ "14", 0 ], [ "15", 1 ] ); +splice( @expected_nodes, 11, 1, [ "n14", 0 ], [ "n15", 1 ] ); @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on the colocated node' => \&compare_active; -$string = '# when ... ... showers sweet with ... fruit the march of ... has pierced ... the rood #'; +$string = '# when ... ... ... showers sweet with ... fruit the march of ... has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); -@off = $graph->toggle_node( '3' ); +@off = $graph->toggle_node( 'n3' ); # Toggle on the new node -splice( @expected_nodes, 3, 1, [ "3", 1 ] ); +splice( @expected_nodes, 3, 1, [ "n3", 1 ] ); # Remove the old toggle-off -splice( @expected_nodes, 8, 1 ); +splice( @expected_nodes, 11, 1 ); @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on a singleton node' => \&compare_active; -$string = '# when ... with his showers sweet with ... fruit the march of ... has pierced ... the rood #'; +$string = '# when ... with ... showers sweet with ... fruit the march of ... has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); -@off = $graph->toggle_node( '3' ); +@off = $graph->toggle_node( 'n3' ); # Toggle off this node -splice( @expected_nodes, 3, 1, [ "3", 0 ] ); +splice( @expected_nodes, 3, 1, [ "n3", 0 ] ); @active_nodes = $graph->active_nodes( @off ); subtest 'Turned off a singleton node' => \&compare_active; -$string = '# when ... showers sweet with ... fruit the march of ... has pierced ... the rood #'; +$string = '# when ... ... showers sweet with ... fruit the march of ... has pierced ... the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); -@off = $graph->toggle_node( '21' ); -splice( @expected_nodes, 13, 1, [ "21", 1 ] ); +@off = $graph->toggle_node( 'n21' ); +splice( @expected_nodes, 16, 1, [ "n21", 1 ] ); @active_nodes = $graph->active_nodes( @off ); subtest 'Turned on a new node after singleton switchoff' => \&compare_active; -$string = '# when ... showers sweet with ... fruit the march of ... has pierced unto the rood #'; +$string = '# when ... ... showers sweet with ... fruit the march of ... has pierced unto the root #'; is( make_text( @active_nodes ), $string, "Got the right text" ); done_testing();