Commit | Line | Data |
e58153d6 |
1 | package Text::Tradition::Parser::GraphML; |
b49c4318 |
2 | |
3 | use strict; |
4 | use warnings; |
b49c4318 |
5 | use XML::LibXML; |
6 | use XML::LibXML::XPathContext; |
7 | |
2ceca8c3 |
8 | =head1 NAME |
b49c4318 |
9 | |
2ceca8c3 |
10 | Text::Tradition::Parser::GraphML |
11 | |
12 | =head1 DESCRIPTION |
13 | |
14 | Parser module for Text::Tradition, given a GraphML file that describes |
ec3f9144 |
15 | a collation graph. Returns the information about the graph that has |
16 | been parsed out from the GraphML. This module is meant to be used |
17 | with a module (e.g. CollateX or Self) that interprets the specific |
18 | GraphML conventions of the source program. |
2ceca8c3 |
19 | |
20 | =head1 METHODS |
21 | |
22 | =over |
23 | |
24 | =item B<parse> |
25 | |
ec3f9144 |
26 | parse( $graphml_string ); |
2ceca8c3 |
27 | |
ec3f9144 |
28 | Takes a string containing the GraphML; returns a list of nodes, edges, |
29 | and their associated data. |
2ceca8c3 |
30 | |
31 | =cut |
b49c4318 |
32 | |
e309421a |
33 | use vars qw/ $xpc $graphattr $nodedata $witnesses /; |
b5054ca9 |
34 | |
ec3f9144 |
35 | # Return graph -> nodeid -> { key1/val1, key2/val2, key3/val3 ... } |
36 | # -> edgeid -> { source, target, wit1/val1, wit2/val2 ...} |
4a8828f0 |
37 | |
b49c4318 |
38 | sub parse { |
ec3f9144 |
39 | my( $graphml_str ) = @_; |
40 | |
41 | my $graph_hash = { 'nodes' => [], |
910a0a6d |
42 | 'edges' => [] }; |
b49c4318 |
43 | |
44 | my $parser = XML::LibXML->new(); |
45 | my $doc = $parser->parse_string( $graphml_str ); |
8e1394aa |
46 | my $graphml = $doc->documentElement(); |
4a8828f0 |
47 | $xpc = XML::LibXML::XPathContext->new( $graphml ); |
b49c4318 |
48 | $xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' ); |
49 | |
50 | # First get the ID keys, for witnesses and for collation data |
b49c4318 |
51 | foreach my $k ( $xpc->findnodes( '//g:key' ) ) { |
910a0a6d |
52 | # Each key has a 'for' attribute; the edge keys are witnesses, and |
53 | # the node keys contain an ID and string for each node. |
54 | my $keyid = $k->getAttribute( 'id' ); |
55 | my $keyname = $k->getAttribute( 'attr.name' ); |
56 | |
e309421a |
57 | # Keep track of the XML identifiers for the data carried |
58 | # in each node element. |
59 | my $dtype = $k->getAttribute( 'for' ); |
60 | if( $dtype eq 'graph' ) { |
61 | $graphattr->{$keyid} = $keyname; |
62 | } elsif( $dtype eq 'node' ) { |
63 | $nodedata->{$keyid} = $keyname; |
910a0a6d |
64 | } else { |
65 | $witnesses->{$keyid} = $keyname; |
66 | } |
b49c4318 |
67 | } |
68 | |
69 | my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0]; |
70 | |
ec3f9144 |
71 | my $node_reg = {}; |
e309421a |
72 | |
73 | # Read in graph globals (if any). |
74 | print STDERR "Reading graphml global data\n"; |
75 | foreach my $dkey ( keys %$graphattr ) { |
76 | my $keyname = $graphattr->{$dkey}; |
77 | my $keyvalue = _lookup_node_data( $graph_el, $dkey ); |
78 | $graph_hash->{'global'}->{$keyname} = $keyvalue; |
79 | } |
ec3f9144 |
80 | |
94c00c71 |
81 | # Add the nodes to the graph hash. |
82 | print STDERR "Reading graphml nodes\n"; |
b49c4318 |
83 | my @nodes = $xpc->findnodes( '//g:node' ); |
84 | foreach my $n ( @nodes ) { |
910a0a6d |
85 | # Could use a better way of registering these |
86 | my $node_hash = {}; |
87 | foreach my $dkey ( keys %$nodedata ) { |
88 | my $keyname = $nodedata->{$dkey}; |
89 | my $keyvalue = _lookup_node_data( $n, $dkey ); |
90 | $node_hash->{$keyname} = $keyvalue if defined $keyvalue; |
91 | } |
92 | $node_reg->{$n->getAttribute( 'id' )} = $node_hash; |
93 | push( @{$graph_hash->{'nodes'}}, $node_hash ); |
b49c4318 |
94 | } |
910a0a6d |
95 | |
ec3f9144 |
96 | # Now add the edges, and cross-ref with the node objects. |
94c00c71 |
97 | print STDERR "Reading graphml edges\n"; |
b49c4318 |
98 | my @edges = $xpc->findnodes( '//g:edge' ); |
99 | foreach my $e ( @edges ) { |
910a0a6d |
100 | my $from = $e->getAttribute('source'); |
101 | my $to = $e->getAttribute('target'); |
102 | |
103 | # We don't know whether the edge data is one per witness |
104 | # or one per witness type, or something else. So we just |
105 | # save it and let our calling parser decide. |
106 | my $edge_hash = { |
107 | 'source' => $node_reg->{$from}, |
108 | 'target' => $node_reg->{$to}, |
109 | }; |
110 | foreach my $wkey( keys %$witnesses ) { |
111 | my $wname = $witnesses->{$wkey}; |
112 | my $wlabel = _lookup_node_data( $e, $wkey ); |
113 | $edge_hash->{$wname} = $wlabel if $wlabel; |
114 | } |
115 | push( @{$graph_hash->{'edges'}}, $edge_hash ); |
b49c4318 |
116 | } |
ec3f9144 |
117 | return $graph_hash; |
4a8828f0 |
118 | } |
b49c4318 |
119 | |
4a8828f0 |
120 | sub _lookup_node_data { |
ec3f9144 |
121 | my( $xmlnode, $key ) = @_; |
4a8828f0 |
122 | my $lookup_xpath = './g:data[@key="%s"]/child::text()'; |
94c00c71 |
123 | my $data = $xpc->find( sprintf( $lookup_xpath, $key ), $xmlnode ); |
124 | # If we get back an empty nodelist, we return undef. |
125 | if( ref( $data ) ) { |
126 | return undef unless $data->size; |
127 | return $data->to_literal->value; |
128 | } |
129 | # Otherwise we got back a value. Return it. |
4a8828f0 |
130 | return $data; |
b49c4318 |
131 | } |
132 | |
2ceca8c3 |
133 | =back |
134 | |
135 | =head1 LICENSE |
136 | |
137 | This package is free software and is provided "as is" without express |
138 | or implied warranty. You can redistribute it and/or modify it under |
139 | the same terms as Perl itself. |
140 | |
141 | =head1 AUTHOR |
142 | |
143 | Tara L Andrews, aurum@cpan.org |
144 | |
145 | =cut |
146 | |
b49c4318 |
147 | 1; |