Commit | Line | Data |
b49c4318 |
1 | package Traditions::Parser::GraphML; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Traditions::Graph; |
6 | use XML::LibXML; |
7 | use XML::LibXML::XPathContext; |
8 | |
9 | |
10 | # Takes a GraphML string; returns a Graph::Easy object. |
11 | |
12 | sub parse { |
13 | my( $graph, $graphml_str ) = @_; |
14 | |
15 | my $parser = XML::LibXML->new(); |
16 | my $doc = $parser->parse_string( $graphml_str ); |
17 | my $collation = $doc->documentElement(); |
18 | my $xpc = XML::LibXML::XPathContext->new( $collation ); |
19 | $xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' ); |
20 | |
21 | # First get the ID keys, for witnesses and for collation data |
22 | my %nodedata; |
23 | my %witnesses; |
24 | foreach my $k ( $xpc->findnodes( '//g:key' ) ) { |
25 | # Each key has a 'for' attribute; the edge keys are witnesses, and |
26 | # the node keys contain an ID and string for each node. |
27 | |
28 | if( $k->getAttribute( 'for' ) eq 'node' ) { |
29 | $nodedata{ $k->getAttribute( 'attr.name' ) } = $k->getAttribute( 'id' ); |
30 | } else { |
31 | $witnesses{ $k->getAttribute( 'id' ) } = $k->getAttribute( 'attr.name' ); |
32 | } |
33 | } |
34 | |
35 | my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0]; |
36 | |
37 | # Add the nodes to the graph. First delete the start node, because |
38 | # GraphML graphs will have their own start nodes. |
39 | $graph->del_node( $graph->start() ); |
40 | # Map from XML IDs to node name/identity |
41 | my %node_name; |
42 | # Keep track of whatever extra info we're passed |
43 | my $extra_data = {}; |
44 | my @nodes = $xpc->findnodes( '//g:node' ); |
45 | foreach my $n ( @nodes ) { |
46 | my $lookup_xpath = './g:data[@key="%s"]/child::text()'; |
47 | my $id = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{'number'} ), $n ); |
48 | my $label = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{'token'} ), $n ); |
49 | my $gnode = $graph->add_node( $id ); |
50 | $node_name{ $n->getAttribute('id') } = $id; |
51 | $gnode->set_attribute( 'label', $label ); |
52 | |
53 | # Now get the rest of the data |
54 | my $extra = {}; |
55 | my @keys = grep { $_ !~ /^(number|token)$/ } keys( %nodedata ); |
56 | foreach my $k ( @keys ) { |
57 | my $data = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{ $k } ), $n ); |
58 | $extra->{ $k } = $data; |
59 | } |
60 | $extra_data->{ $id } = $extra; |
61 | } |
62 | |
63 | # Now add the edges. |
64 | my @edges = $xpc->findnodes( '//g:edge' ); |
65 | foreach my $e ( @edges ) { |
66 | my $from = $node_name{ $e->getAttribute('source') }; |
67 | my $to = $node_name{ $e->getAttribute('target') }; |
68 | # Label according to the witnesses present. |
69 | my @wit_ids = $xpc->findnodes( './g:data/attribute::key', $e ); |
70 | my @wit_names = map { $witnesses{ $_->getValue() } } @wit_ids; |
71 | my $label = join( ', ', @wit_names ); |
72 | |
73 | $graph->add_edge( $from, $to, $label ); |
74 | } |
75 | |
76 | ## Reverse the node_name hash so that we have two-way lookup. |
77 | my %node_id = reverse %node_name; |
a25d4374 |
78 | |
79 | ## Record the nodes that are marked as transposed. |
80 | my $id_xpath = '//g:node[g:data[@key="' . $nodedata{'identity'} . '"]]'; |
81 | my $transposed_nodes = $xpc->find( $id_xpath ); |
82 | my $identical_nodes; |
83 | foreach my $tn ( @$transposed_nodes ) { |
84 | $identical_nodes->{ $node_name{ $tn->getAttribute('id') }} = |
85 | $node_name{ $xpc->findvalue( './g:data[@key="' |
86 | . $nodedata{'identity'} |
87 | . '"]/text()', $tn ) }; |
88 | } |
89 | $graph->set_identical_nodes( $identical_nodes ); |
90 | |
b49c4318 |
91 | |
92 | # Find the beginning and end nodes of the graph. The beginning node |
93 | # has no incoming edges; the end node has no outgoing edges. |
94 | my( $begin_node, $end_node ); |
95 | foreach my $gnode ( $graph->nodes() ) { |
96 | print STDERR "Checking node " . $gnode->name . "\n"; |
97 | my @outgoing = $gnode->outgoing(); |
98 | my @incoming = $gnode->incoming(); |
99 | |
100 | unless( scalar @incoming ) { |
101 | warn "Already have a beginning node" if $begin_node; |
102 | my $node_xml_id = $node_id{ $gnode->name() }; |
103 | my @bn = $xpc->findnodes( '//g:node[@id="' . $node_xml_id . '"]' ); |
104 | warn "XPath did not find a node for id $node_xml_id" |
105 | unless scalar @bn; |
106 | $begin_node = $bn[0]; |
107 | $graph->start( $gnode ); |
108 | $node_name{ 0 } = '#START#'; |
109 | $node_id{'#START#'} = 0; |
110 | } |
111 | unless( scalar @outgoing ) { |
112 | warn "Already have an ending node" if $end_node; |
113 | my $node_xml_id = $node_id{ $gnode->name() }; |
114 | my @bn = $xpc->findnodes( '//g:node[@id="' . $node_xml_id . '"]' ); |
115 | warn "XPath did not find a node for id $node_xml_id" |
116 | unless scalar @bn; |
117 | $end_node = $bn[0]; |
118 | } |
119 | } |
120 | |
121 | # Now for each witness, walk the path through the graph. |
122 | # Then we need to find the common nodes. |
123 | # TODO This method is going to fall down if we have a very gappy |
124 | # text in the collation. |
125 | # TODO think about whether it makes more sense to do this in the |
126 | # XML or in the graph. Right now it's the XML. |
127 | my $paths = {}; |
128 | my @common_nodes; |
129 | foreach my $wit ( keys %witnesses ) { |
130 | my $node_id = $begin_node->getAttribute('id'); |
131 | my @wit_path = ( $node_name{ $node_id } ); |
132 | # TODO Detect loops at some point |
133 | while( $node_id != $end_node->getAttribute('id') ) { |
134 | # Find the node which is the target of the edge whose |
135 | # source is $node_id and applies to this witness. |
136 | my $xpath_expr = '//g:edge[child::g:data[@key="' |
137 | . $wit . '"] and attribute::source="' |
138 | . $node_id . '"]'; |
139 | my $next_edge = $xpc->find( $xpath_expr, $graph_el )->[0]; |
140 | $node_id = $next_edge->getAttribute('target'); |
141 | push( @wit_path, $node_name{ $node_id } ); |
142 | } |
143 | $paths->{ $witnesses{ $wit }} = \@wit_path; |
144 | if( @common_nodes ) { |
145 | my @cn; |
146 | foreach my $n ( @wit_path) { |
147 | push( @cn, $n ) if grep { $_ eq $n } @common_nodes; |
148 | } |
149 | @common_nodes = (); |
150 | push( @common_nodes, @cn ); |
151 | } else { |
152 | push( @common_nodes, @wit_path ); |
153 | } |
154 | } |
155 | |
156 | # Mark all the nodes as either common or not. |
157 | foreach my $cn ( @common_nodes ) { |
158 | print STDERR "Setting $cn as common node\n"; |
159 | $graph->node( $cn )->set_attribute( 'class', 'common' ); |
160 | } |
161 | foreach my $n ( $graph->nodes() ) { |
162 | $n->set_attribute( 'class', 'variant' ) |
163 | unless $n->get_attribute( 'class' ) eq 'common'; |
164 | } |
165 | |
a25d4374 |
166 | # Now calculate graph positions. |
167 | $graph->make_positions( \@common_nodes, $paths ); |
168 | |
b49c4318 |
169 | } |
170 | |
171 | 1; |