Commit | Line | Data |
e58153d6 |
1 | package Text::Tradition::Parser::GraphML; |
b49c4318 |
2 | |
3 | use strict; |
4 | use warnings; |
e867486f |
5 | use Exporter 'import'; |
6 | use vars qw/ @EXPORT_OK $xpc /; |
00311328 |
7 | use Text::Tradition::Error; |
b49c4318 |
8 | use XML::LibXML; |
9 | use XML::LibXML::XPathContext; |
10 | |
1f7aa795 |
11 | @EXPORT_OK = qw/ graphml_parse /; |
e867486f |
12 | |
2ceca8c3 |
13 | =head1 NAME |
b49c4318 |
14 | |
2ceca8c3 |
15 | Text::Tradition::Parser::GraphML |
16 | |
17 | =head1 DESCRIPTION |
18 | |
19 | Parser module for Text::Tradition, given a GraphML file that describes |
ec3f9144 |
20 | a collation graph. Returns the information about the graph that has |
21 | been parsed out from the GraphML. This module is meant to be used |
22 | with a module (e.g. CollateX or Self) that interprets the specific |
23 | GraphML conventions of the source program. |
2ceca8c3 |
24 | |
25 | =head1 METHODS |
26 | |
e867486f |
27 | =head2 B<graphml_parse>( $init_opts ) |
2ceca8c3 |
28 | |
dfc37e38 |
29 | parse( $init_opts ); |
2ceca8c3 |
30 | |
dfc37e38 |
31 | Takes a set of Tradition initialization options, among which should be either |
32 | 'file' or 'string'; parses that file or string and returns a list of nodes, edges, |
ec3f9144 |
33 | and their associated data. |
2ceca8c3 |
34 | |
35 | =cut |
b49c4318 |
36 | |
ec3f9144 |
37 | # Return graph -> nodeid -> { key1/val1, key2/val2, key3/val3 ... } |
38 | # -> edgeid -> { source, target, wit1/val1, wit2/val2 ...} |
4a8828f0 |
39 | |
e867486f |
40 | sub graphml_parse { |
dfc37e38 |
41 | my( $opts ) = @_; |
ec3f9144 |
42 | |
b49c4318 |
43 | my $parser = XML::LibXML->new(); |
dfc37e38 |
44 | my $doc; |
45 | if( exists $opts->{'string'} ) { |
46 | $doc = $parser->parse_string( $opts->{'string'} ); |
47 | } elsif ( exists $opts->{'file'} ) { |
48 | $doc = $parser->parse_file( $opts->{'file'} ); |
dead25ca |
49 | } elsif ( exists $opts->{'xmlobj'} ) { |
50 | $doc = $opts->{'xmlobj'}; |
dfc37e38 |
51 | } else { |
52 | warn "Could not find string or file option to parse"; |
53 | return; |
54 | } |
55 | |
2626f709 |
56 | my( $graphattr, $nodedata, $edgedata ) = ( {}, {}, {} ); |
8e1394aa |
57 | my $graphml = $doc->documentElement(); |
4a8828f0 |
58 | $xpc = XML::LibXML::XPathContext->new( $graphml ); |
b49c4318 |
59 | $xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' ); |
60 | |
2626f709 |
61 | # First get the ID keys, for node/edge data and for collation data |
b49c4318 |
62 | foreach my $k ( $xpc->findnodes( '//g:key' ) ) { |
2626f709 |
63 | # Each key has a 'for' attribute to say whether it is for graph, |
64 | # node, or edge. |
910a0a6d |
65 | my $keyid = $k->getAttribute( 'id' ); |
66 | my $keyname = $k->getAttribute( 'attr.name' ); |
67 | |
e309421a |
68 | # Keep track of the XML identifiers for the data carried |
69 | # in each node element. |
70 | my $dtype = $k->getAttribute( 'for' ); |
71 | if( $dtype eq 'graph' ) { |
72 | $graphattr->{$keyid} = $keyname; |
73 | } elsif( $dtype eq 'node' ) { |
74 | $nodedata->{$keyid} = $keyname; |
910a0a6d |
75 | } else { |
2626f709 |
76 | $edgedata->{$keyid} = $keyname; |
910a0a6d |
77 | } |
b49c4318 |
78 | } |
00311328 |
79 | |
80 | my @graph_elements = $xpc->findnodes( '/g:graphml/g:graph' ); |
81 | unless( @graph_elements ) { |
82 | throw( "No graph elements found in graph XML - is this really GraphML?" ); |
83 | } |
b49c4318 |
84 | |
2626f709 |
85 | my @returned_graphs; |
00311328 |
86 | foreach my $graph_el ( @graph_elements ) { |
2626f709 |
87 | my $graph_hash = { 'nodes' => [], |
88 | 'edges' => [], |
89 | 'name' => $graph_el->getAttribute( 'id' ) }; |
90 | |
91 | my $node_reg = {}; |
92 | |
93 | # Read in graph globals (if any). |
94 | # print STDERR "Reading graphml global data\n"; |
95 | foreach my $dkey ( keys %$graphattr ) { |
96 | my $keyname = $graphattr->{$dkey}; |
97 | my $keyvalue = _lookup_node_data( $graph_el, $dkey ); |
98 | $graph_hash->{'global'}->{$keyname} = $keyvalue if defined $keyvalue; |
99 | } |
100 | |
101 | # Add the nodes to the graph hash. |
102 | # print STDERR "Reading graphml nodes\n"; |
103 | my @nodes = $xpc->findnodes( './/g:node', $graph_el ); |
104 | foreach my $n ( @nodes ) { |
105 | # Could use a better way of registering these |
106 | my $node_hash = {}; |
107 | foreach my $dkey ( keys %$nodedata ) { |
108 | my $keyname = $nodedata->{$dkey}; |
109 | my $keyvalue = _lookup_node_data( $n, $dkey ); |
110 | $node_hash->{$keyname} = $keyvalue if defined $keyvalue; |
111 | } |
112 | $node_reg->{$n->getAttribute( 'id' )} = $node_hash; |
113 | push( @{$graph_hash->{'nodes'}}, $node_hash ); |
114 | } |
115 | |
116 | # Now add the edges, and cross-ref with the node objects. |
117 | # print STDERR "Reading graphml edges\n"; |
118 | my @edges = $xpc->findnodes( './/g:edge', $graph_el ); |
119 | foreach my $e ( @edges ) { |
120 | my $from = $e->getAttribute('source'); |
121 | my $to = $e->getAttribute('target'); |
122 | |
123 | # We don't know whether the edge data is one per witness |
124 | # or one per witness type, or something else. So we just |
125 | # save it and let our calling parser decide. |
126 | my $edge_hash = { |
127 | 'source' => $node_reg->{$from}, |
128 | 'target' => $node_reg->{$to}, |
129 | }; |
130 | foreach my $wkey( keys %$edgedata ) { |
131 | my $wname = $edgedata->{$wkey}; |
132 | my $wlabel = _lookup_node_data( $e, $wkey ); |
133 | $edge_hash->{$wname} = $wlabel if $wlabel; |
134 | } |
135 | push( @{$graph_hash->{'edges'}}, $edge_hash ); |
136 | } |
137 | push( @returned_graphs, $graph_hash ); |
b49c4318 |
138 | } |
2626f709 |
139 | return @returned_graphs; |
4a8828f0 |
140 | } |
b49c4318 |
141 | |
e867486f |
142 | |
4a8828f0 |
143 | sub _lookup_node_data { |
ec3f9144 |
144 | my( $xmlnode, $key ) = @_; |
4a8828f0 |
145 | my $lookup_xpath = './g:data[@key="%s"]/child::text()'; |
94c00c71 |
146 | my $data = $xpc->find( sprintf( $lookup_xpath, $key ), $xmlnode ); |
147 | # If we get back an empty nodelist, we return undef. |
148 | if( ref( $data ) ) { |
149 | return undef unless $data->size; |
150 | return $data->to_literal->value; |
151 | } |
152 | # Otherwise we got back a value. Return it. |
4a8828f0 |
153 | return $data; |
b49c4318 |
154 | } |
155 | |
00311328 |
156 | sub throw { |
157 | Text::Tradition::Error->throw( |
158 | 'ident' => 'Parser::GraphML error', |
159 | 'message' => $_[0], |
160 | ); |
161 | } |
162 | |
2ceca8c3 |
163 | =head1 LICENSE |
164 | |
165 | This package is free software and is provided "as is" without express |
166 | or implied warranty. You can redistribute it and/or modify it under |
167 | the same terms as Perl itself. |
168 | |
169 | =head1 AUTHOR |
170 | |
171 | Tara L Andrews, aurum@cpan.org |
172 | |
173 | =cut |
174 | |
b49c4318 |
175 | 1; |