X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FGraphML.pm;h=be3640796a80b4f7e16a35c3749a8ab70f4be581;hb=94c00c71ffabc3dc155d237364e76af4385dcb96;hp=4e191a45d9ed34d706f76c444fb378d735140cb0;hpb=f6066bac61bc5609c60d48df17aad924c8944177;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index 4e191a4..be36407 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -39,7 +39,7 @@ sub parse { my( $graphml_str ) = @_; my $graph_hash = { 'nodes' => [], - 'edges' => [] }; + 'edges' => [] }; my $parser = XML::LibXML->new(); my $doc = $parser->parse_string( $graphml_str ); @@ -49,57 +49,59 @@ sub parse { # First get the ID keys, for witnesses and for collation data foreach my $k ( $xpc->findnodes( '//g:key' ) ) { - # Each key has a 'for' attribute; the edge keys are witnesses, and - # the node keys contain an ID and string for each node. - my $keyid = $k->getAttribute( 'id' ); - my $keyname = $k->getAttribute( 'attr.name' ); - - if( $k->getAttribute( 'for' ) eq 'node' ) { - # Keep track of the XML identifiers for the data carried - # in each node element. - $nodedata->{$keyid} = $keyname - } else { - $witnesses->{$keyid} = $keyname; - } + # Each key has a 'for' attribute; the edge keys are witnesses, and + # the node keys contain an ID and string for each node. + my $keyid = $k->getAttribute( 'id' ); + my $keyname = $k->getAttribute( 'attr.name' ); + + if( $k->getAttribute( 'for' ) eq 'node' ) { + # Keep track of the XML identifiers for the data carried + # in each node element. + $nodedata->{$keyid} = $keyname + } else { + $witnesses->{$keyid} = $keyname; + } } my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0]; my $node_reg = {}; - # Add the nodes to the graph hash. + # Add the nodes to the graph hash. + print STDERR "Reading graphml nodes\n"; my @nodes = $xpc->findnodes( '//g:node' ); foreach my $n ( @nodes ) { - # Could use a better way of registering these - my $node_hash = {}; - foreach my $dkey ( keys %$nodedata ) { - my $keyname = $nodedata->{$dkey}; - my $keyvalue = _lookup_node_data( $n, $dkey ); - $node_hash->{$keyname} = $keyvalue if defined $keyvalue; - } - $node_reg->{$n->getAttribute( 'id' )} = $node_hash; - push( @{$graph_hash->{'nodes'}}, $node_hash ); + # Could use a better way of registering these + my $node_hash = {}; + foreach my $dkey ( keys %$nodedata ) { + my $keyname = $nodedata->{$dkey}; + my $keyvalue = _lookup_node_data( $n, $dkey ); + $node_hash->{$keyname} = $keyvalue if defined $keyvalue; + } + $node_reg->{$n->getAttribute( 'id' )} = $node_hash; + push( @{$graph_hash->{'nodes'}}, $node_hash ); } - + # Now add the edges, and cross-ref with the node objects. + print STDERR "Reading graphml edges\n"; my @edges = $xpc->findnodes( '//g:edge' ); foreach my $e ( @edges ) { - my $from = $e->getAttribute('source'); - my $to = $e->getAttribute('target'); - - # We don't know whether the edge data is one per witness - # or one per witness type, or something else. So we just - # save it and let our calling parser decide. - my $edge_hash = { - 'source' => $node_reg->{$from}, - 'target' => $node_reg->{$to}, - }; - foreach my $wkey( keys %$witnesses ) { - my $wname = $witnesses->{$wkey}; - my $wlabel = _lookup_node_data( $e, $wkey ); - $edge_hash->{$wname} = $wlabel if $wlabel; - } - push( @{$graph_hash->{'edges'}}, $edge_hash ); + my $from = $e->getAttribute('source'); + my $to = $e->getAttribute('target'); + + # We don't know whether the edge data is one per witness + # or one per witness type, or something else. So we just + # save it and let our calling parser decide. + my $edge_hash = { + 'source' => $node_reg->{$from}, + 'target' => $node_reg->{$to}, + }; + foreach my $wkey( keys %$witnesses ) { + my $wname = $witnesses->{$wkey}; + my $wlabel = _lookup_node_data( $e, $wkey ); + $edge_hash->{$wname} = $wlabel if $wlabel; + } + push( @{$graph_hash->{'edges'}}, $edge_hash ); } return $graph_hash; } @@ -107,7 +109,13 @@ sub parse { sub _lookup_node_data { my( $xmlnode, $key ) = @_; my $lookup_xpath = './g:data[@key="%s"]/child::text()'; - my $data = $xpc->findvalue( sprintf( $lookup_xpath, $key ), $xmlnode ); + my $data = $xpc->find( sprintf( $lookup_xpath, $key ), $xmlnode ); + # If we get back an empty nodelist, we return undef. + if( ref( $data ) ) { + return undef unless $data->size; + return $data->to_literal->value; + } + # Otherwise we got back a value. Return it. return $data; }