From: Tara L Andrews Date: Mon, 30 May 2011 11:37:12 +0000 (+0200) Subject: make one-witness-per-edge graphml output; still need to parse it properly X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b5054ca9a01f0751d7b00cf1ed98e2fa0f8c73ab;p=scpubgit%2Fstemmatology.git make one-witness-per-edge graphml output; still need to parse it properly --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 03aed2a..dd60f0c 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -305,21 +305,13 @@ sub as_graphml { } # Add the data keys for edges, i.e. witnesses - my %wit_hash; my $wit_ctr = 0; - foreach my $wit ( @{$self->tradition->witnesses} ) { - my $wit_key = 'w' . $wit_ctr++; - $wit_hash{$wit->sigil} = $wit_key; + foreach my $wit_key( qw/ main ante_corr / ) { my $key = $root->addNewChild( $graphml_ns, 'key' ); - $key->setAttribute( 'attr.name', _make_xml_attr( $wit->sigil ) ); + $key->setAttribute( 'attr.name', "witness_$wit_key" ); $key->setAttribute( 'attr.type', 'string' ); $key->setAttribute( 'for', 'edge' ); - $key->setAttribute( 'id', $wit_key ); - my $ackey = $root->addNewChild( $graphml_ns, 'key' ); - $ackey->setAttribute( 'attr.name', _make_xml_attr( $wit->sigil ) . "_ante_corr" ); - $ackey->setAttribute( 'attr.type', 'string' ); - $ackey->setAttribute( 'for', 'edge' ); - $ackey->setAttribute( 'id', $wit_key . "a" ); + $key->setAttribute( 'id', 'w'.$wit_ctr++ ); } # Add the graph, its nodes, and its edges @@ -334,7 +326,7 @@ sub as_graphml { my $node_ctr = 0; my %node_hash; - foreach my $n ( $self->readings ) { + foreach my $n ( sort { $a->name cmp $b->name } $self->readings ) { my %this_node_data = (); foreach my $ndi ( 0 .. $#node_data ) { my $key = $node_data[$ndi]; @@ -361,25 +353,25 @@ sub as_graphml { } my $edge_ctr = 0; - foreach my $e ( $self->paths() ) { + foreach my $e ( sort { $a->from->name cmp $b->from->name } $self->paths() ) { my( $name, $from, $to ) = ( 'e'.$edge_ctr++, - $node_hash{ $e->from()->name() }, - $node_hash{ $e->to()->name() } ); + $node_hash{ $e->from->name() }, + $node_hash{ $e->to->name() } ); my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' ); $edge_el->setAttribute( 'source', $from ); $edge_el->setAttribute( 'target', $to ); $edge_el->setAttribute( 'id', $name ); # Add the witness my $base = $e->label; - my $ante_corr; + my $key = 'w0'; + # TODO kind of hacky if( $e->label =~ /^(.*?)\s+(\(a\.c\.\))$/ ) { - ( $base, $ante_corr ) = ( $1, $2 ); + $base = $1; + $key = 'w1'; } - my $key = $wit_hash{$base}; - $key .= "a" if $ante_corr; my $wit_el = $edge_el->addNewChild( $graphml_ns, 'data' ); $wit_el->setAttribute( 'key', $key ); - $wit_el->appendText( $e->label ); + $wit_el->appendText( $base ); } # Return the thing diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index 9842bdb..4fd47e3 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -29,10 +29,14 @@ graph. =cut -use vars qw/ $xpc %nodedata /; +use vars qw/ $xpc $nodedata /; + +map { $nodedata->{'CollateX'}->{$_} = undef } qw/ number token identical ranking /; +map { $nodedata->{'Text::Tradition'}->{$_} = undef } qw/ name reading identical position /; sub parse { - my( $tradition, $graphml_str ) = @_; + my( $tradition, $graphml_str, $generator ) = @_; + $generator = 'CollateX' unless $generator; my $collation = $tradition->collation; my $parser = XML::LibXML->new(); @@ -46,6 +50,8 @@ sub parse { foreach my $k ( $xpc->findnodes( '//g:key' ) ) { # Each key has a 'for' attribute; the edge keys are witnesses, and # the node keys contain an ID and string for each node. + my $keyid = $k->getAttribute( 'id' ); + my $keyname = $k->getAttribute( 'attr.name' ); if( $k->getAttribute( 'for' ) eq 'node' ) { # The node data keys we expect are: @@ -54,13 +60,15 @@ sub parse { # 'identical' -> the node of which this node is # a transposed version # 'position' -> a calculated position for the node - $nodedata{ $k->getAttribute( 'attr.name' ) } = $k->getAttribute( 'id' ); + warn( "No data key $keyname defined for $generator GraphML" ) + unless exists( $nodedata->{$generator}->{$keyname} ); + $nodedata->{$generator}->{$keyname} = $keyid; } else { - $witnesses{ $k->getAttribute( 'id' ) } = $k->getAttribute( 'attr.name' ); + $witnesses{ $keyid } = $keyname; } } - my $has_explicit_positions = defined $nodedata{'position'}; + my $has_explicit_positions = defined $nodedata->{$generator}->{'position'}; # Add the witnesses that we have found foreach my $wit ( values %witnesses ) { @@ -78,19 +86,21 @@ sub parse { my $extra_data = {}; my @nodes = $xpc->findnodes( '//g:node' ); foreach my $n ( @nodes ) { - my $id = _lookup_node_data( $n, 'number' ); - $id = _lookup_node_data( $n, 'name' ) unless $id; - my $token = _lookup_node_data( $n, 'token' ); - $token = _lookup_node_data( $n, 'reading' ) unless $token; + # Could use a better way of registering these + my $nodeid_key = $generator eq 'CollateX' ? 'number' : 'name'; + my $reading_key = $generator eq 'CollateX' ? 'token' : 'reading'; + my $id = _lookup_node_data( $n, $nodeid_key, $generator ); + my $token = _lookup_node_data( $n, $reading_key, $generator ); my $gnode = $collation->add_reading( $id ); $node_name{ $n->getAttribute('id') } = $id; $gnode->text( $token ); # Now get the rest of the data, i.e. not the ID or label my $extra = {}; - foreach my $k ( keys %nodedata ) { - next if $k =~ /^(number|token)$/; - $extra->{ $k } = _lookup_node_data( $n, $k ); + foreach my $k ( keys %{$nodedata->{$generator}} ) { + next if $k eq $nodeid_key || $k eq $reading_key; + next unless $nodedata->{$generator}->{$k}; + $extra->{ $k } = _lookup_node_data( $n, $k, $generator ); } $extra_data->{ $id } = $extra; } @@ -116,11 +126,11 @@ sub parse { my %node_id = reverse %node_name; ## Record the nodes that are marked as transposed. - my $tr_xpath = '//g:node[g:data[@key="' . $nodedata{'identical'} . '"]]'; + my $tr_xpath = '//g:node[g:data[@key="' . $nodedata->{$generator}->{'identical'} . '"]]'; my $transposition_nodes = $xpc->find( $tr_xpath ); foreach my $tn ( @$transposition_nodes ) { my $id_xpath = sprintf( './g:data[@key="%s"]/text()', - $nodedata{'identical'} ); + $nodedata->{$generator}->{'identical'} ); my $tn_reading = $collation->reading( $node_id{ $tn->getAttribute( 'id' ) } ); my $main_reading = $collation->reading( $node_name{ $xpc->findvalue( $id_xpath, $tn ) } ); if( $collation->linear ) { @@ -169,10 +179,10 @@ sub parse { } sub _lookup_node_data { - my( $xmlnode, $key ) = @_; - return undef unless exists $nodedata{$key}; + my( $xmlnode, $key, $generator ) = @_; + return undef unless exists $nodedata->{$generator}->{$key}; my $lookup_xpath = './g:data[@key="%s"]/child::text()'; - my $data = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{$key} ), + my $data = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata->{$generator}->{$key} ), $xmlnode ); return $data; }