X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FGraphML.pm;h=a4d57177aeac71ac07ff0e398cba8b95c2131fe7;hb=7c293912d5aeee653131449d246a0f442dc8119f;hp=4e191a45d9ed34d706f76c444fb378d735140cb0;hpb=f6066bac61bc5609c60d48df17aad924c8944177;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index 4e191a4..a4d5717 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -2,9 +2,14 @@ package Text::Tradition::Parser::GraphML; use strict; use warnings; +use Exporter 'import'; +use vars qw/ @EXPORT_OK $xpc /; + use XML::LibXML; use XML::LibXML::XPathContext; +@EXPORT_OK = qw/ graphml_parse /; + =head1 NAME Text::Tradition::Parser::GraphML @@ -19,100 +24,128 @@ GraphML conventions of the source program. =head1 METHODS -=over - -=item B +=head2 B( $init_opts ) -parse( $graphml_string ); +parse( $init_opts ); -Takes a string containing the GraphML; returns a list of nodes, edges, +Takes a set of Tradition initialization options, among which should be either +'file' or 'string'; parses that file or string and returns a list of nodes, edges, and their associated data. =cut -use vars qw/ $xpc $nodedata $witnesses /; - # Return graph -> nodeid -> { key1/val1, key2/val2, key3/val3 ... } # -> edgeid -> { source, target, wit1/val1, wit2/val2 ...} -sub parse { - my( $graphml_str ) = @_; - - my $graph_hash = { 'nodes' => [], - 'edges' => [] }; +sub graphml_parse { + my( $opts ) = @_; my $parser = XML::LibXML->new(); - my $doc = $parser->parse_string( $graphml_str ); + my $doc; + if( exists $opts->{'string'} ) { + $doc = $parser->parse_string( $opts->{'string'} ); + } elsif ( exists $opts->{'file'} ) { + $doc = $parser->parse_file( $opts->{'file'} ); + } else { + warn "Could not find string or file option to parse"; + return; + } + + my( $graphattr, $nodedata, $edgedata ) = ( {}, {}, {} ); my $graphml = $doc->documentElement(); $xpc = XML::LibXML::XPathContext->new( $graphml ); $xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' ); - # First get the ID keys, for witnesses and for collation data + # First get the ID keys, for node/edge data and for collation data foreach my $k ( $xpc->findnodes( '//g:key' ) ) { - # Each key has a 'for' attribute; the edge keys are witnesses, and - # the node keys contain an ID and string for each node. - my $keyid = $k->getAttribute( 'id' ); - my $keyname = $k->getAttribute( 'attr.name' ); - - if( $k->getAttribute( 'for' ) eq 'node' ) { - # Keep track of the XML identifiers for the data carried - # in each node element. - $nodedata->{$keyid} = $keyname - } else { - $witnesses->{$keyid} = $keyname; - } + # Each key has a 'for' attribute to say whether it is for graph, + # node, or edge. + my $keyid = $k->getAttribute( 'id' ); + my $keyname = $k->getAttribute( 'attr.name' ); + + # Keep track of the XML identifiers for the data carried + # in each node element. + my $dtype = $k->getAttribute( 'for' ); + if( $dtype eq 'graph' ) { + $graphattr->{$keyid} = $keyname; + } elsif( $dtype eq 'node' ) { + $nodedata->{$keyid} = $keyname; + } else { + $edgedata->{$keyid} = $keyname; + } } - my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0]; - - my $node_reg = {}; - - # Add the nodes to the graph hash. - my @nodes = $xpc->findnodes( '//g:node' ); - foreach my $n ( @nodes ) { - # Could use a better way of registering these - my $node_hash = {}; - foreach my $dkey ( keys %$nodedata ) { - my $keyname = $nodedata->{$dkey}; - my $keyvalue = _lookup_node_data( $n, $dkey ); - $node_hash->{$keyname} = $keyvalue if defined $keyvalue; - } - $node_reg->{$n->getAttribute( 'id' )} = $node_hash; - push( @{$graph_hash->{'nodes'}}, $node_hash ); - } + my @returned_graphs; + foreach my $graph_el ( $xpc->findnodes( '/g:graphml/g:graph' ) ) { + my $graph_hash = { 'nodes' => [], + 'edges' => [], + 'name' => $graph_el->getAttribute( 'id' ) }; + + my $node_reg = {}; + + # Read in graph globals (if any). + # print STDERR "Reading graphml global data\n"; + foreach my $dkey ( keys %$graphattr ) { + my $keyname = $graphattr->{$dkey}; + my $keyvalue = _lookup_node_data( $graph_el, $dkey ); + $graph_hash->{'global'}->{$keyname} = $keyvalue if defined $keyvalue; + } + + # Add the nodes to the graph hash. + # print STDERR "Reading graphml nodes\n"; + my @nodes = $xpc->findnodes( './/g:node', $graph_el ); + foreach my $n ( @nodes ) { + # Could use a better way of registering these + my $node_hash = {}; + foreach my $dkey ( keys %$nodedata ) { + my $keyname = $nodedata->{$dkey}; + my $keyvalue = _lookup_node_data( $n, $dkey ); + $node_hash->{$keyname} = $keyvalue if defined $keyvalue; + } + $node_reg->{$n->getAttribute( 'id' )} = $node_hash; + push( @{$graph_hash->{'nodes'}}, $node_hash ); + } + + # Now add the edges, and cross-ref with the node objects. + # print STDERR "Reading graphml edges\n"; + my @edges = $xpc->findnodes( './/g:edge', $graph_el ); + foreach my $e ( @edges ) { + my $from = $e->getAttribute('source'); + my $to = $e->getAttribute('target'); - # Now add the edges, and cross-ref with the node objects. - my @edges = $xpc->findnodes( '//g:edge' ); - foreach my $e ( @edges ) { - my $from = $e->getAttribute('source'); - my $to = $e->getAttribute('target'); - - # We don't know whether the edge data is one per witness - # or one per witness type, or something else. So we just - # save it and let our calling parser decide. - my $edge_hash = { - 'source' => $node_reg->{$from}, - 'target' => $node_reg->{$to}, - }; - foreach my $wkey( keys %$witnesses ) { - my $wname = $witnesses->{$wkey}; - my $wlabel = _lookup_node_data( $e, $wkey ); - $edge_hash->{$wname} = $wlabel if $wlabel; - } - push( @{$graph_hash->{'edges'}}, $edge_hash ); + # We don't know whether the edge data is one per witness + # or one per witness type, or something else. So we just + # save it and let our calling parser decide. + my $edge_hash = { + 'source' => $node_reg->{$from}, + 'target' => $node_reg->{$to}, + }; + foreach my $wkey( keys %$edgedata ) { + my $wname = $edgedata->{$wkey}; + my $wlabel = _lookup_node_data( $e, $wkey ); + $edge_hash->{$wname} = $wlabel if $wlabel; + } + push( @{$graph_hash->{'edges'}}, $edge_hash ); + } + push( @returned_graphs, $graph_hash ); } - return $graph_hash; + return @returned_graphs; } + sub _lookup_node_data { my( $xmlnode, $key ) = @_; my $lookup_xpath = './g:data[@key="%s"]/child::text()'; - my $data = $xpc->findvalue( sprintf( $lookup_xpath, $key ), $xmlnode ); + my $data = $xpc->find( sprintf( $lookup_xpath, $key ), $xmlnode ); + # If we get back an empty nodelist, we return undef. + if( ref( $data ) ) { + return undef unless $data->size; + return $data->to_literal->value; + } + # Otherwise we got back a value. Return it. return $data; } -=back - =head1 LICENSE This package is free software and is provided "as is" without express