Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition::Collation; |
d047cd52 |
2 | |
3 | use Graph::Easy; |
8e1394aa |
4 | use IPC::Run qw( run binary ); |
5 | use Module::Load; |
6 | use Text::Tradition::Collation::Reading; |
dd3b58b0 |
7 | use Moose; |
8 | |
9 | has 'graph' => ( |
d047cd52 |
10 | is => 'ro', |
11 | isa => 'Graph::Easy', |
12 | handles => { |
8e1394aa |
13 | add_reading => 'add_node', |
14 | del_reading => 'del_node', |
15 | add_path => 'add_edge', |
16 | del_path => 'del_edge', |
17 | reading => 'node', |
18 | path => 'edge', |
19 | readings => 'nodes', |
20 | paths => 'edges', |
d047cd52 |
21 | }, |
22 | default => sub { Graph::Easy->new( undirected => 0 ) }, |
23 | ); |
784877d9 |
24 | |
dd3b58b0 |
25 | |
dd3b58b0 |
26 | has 'tradition' => ( |
8e1394aa |
27 | is => 'rw', |
d047cd52 |
28 | isa => 'Text::Tradition', |
29 | ); |
dd3b58b0 |
30 | |
8e1394aa |
31 | has 'svg' => ( |
32 | is => 'ro', |
33 | isa => 'Str', |
34 | writer => '_save_svg', |
35 | predicate => 'has_svg', |
36 | ); |
37 | |
38 | has 'graphviz' => ( |
39 | is => 'ro', |
40 | isa => 'Str', |
41 | writer => '_save_graphviz', |
42 | predicate => 'has_graphviz', |
43 | ); |
44 | |
45 | has 'graphml' => ( |
46 | is => 'ro', |
47 | isa => 'XML::LibXML::Document', |
48 | writer => '_save_graphml', |
49 | predicate => 'has_graphml', |
50 | ); |
51 | |
dd3b58b0 |
52 | # The collation can be created two ways: |
53 | # 1. Collate a set of witnesses (with CollateX I guess) and process |
54 | # the results as in 2. |
55 | # 2. Read a pre-prepared collation in one of a variety of formats, |
56 | # and make the graph from that. |
57 | |
58 | # The graph itself will (for now) be immutable, and the positions |
59 | # within the graph will also be immutable. We need to calculate those |
60 | # positions upon graph construction. The equivalences between graph |
61 | # nodes will be mutable, entirely determined by the user (or possibly |
62 | # by some semantic pre-processing provided by the user.) So the |
63 | # constructor should just make an empty equivalences object. The |
64 | # constructor will also need to make the witness objects, if we didn't |
65 | # come through option 1. |
66 | |
d047cd52 |
67 | sub BUILD { |
68 | my( $self, $args ) = @_; |
69 | |
70 | # Call the appropriate parser on the given data |
71 | my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %$args ); |
72 | my $format = shift( @formats ); |
73 | unless( $format ) { |
74 | warn "No data given to create a graph; will initialize an empty one"; |
75 | } |
8e1394aa |
76 | if( $format && $format =~ /^(CSV|CTE)$/ && !exists $args->{'base'} ) { |
d047cd52 |
77 | warn "Cannot make a graph from $format without a base text"; |
78 | return; |
79 | } |
80 | |
81 | # Initialize our graph object. |
8e1394aa |
82 | $self->graph->use_class('node', 'Text::Tradition::Collation::Reading'); |
d047cd52 |
83 | $self->graph->set_attribute( 'node', 'shape', 'ellipse' ); |
84 | # Starting point for all texts |
8e1394aa |
85 | my $last_node = $self->add_reading( '#START#' ); |
d047cd52 |
86 | |
87 | # Now do the parsing. |
88 | my @sigla; |
89 | if( $format ) { |
90 | my @parseargs; |
91 | if( $format =~ /^(CSV|CTE)$/ ) { |
92 | @parseargs = ( 'base' => $args->{'base'}, |
93 | 'data' => $args->{$format}, |
94 | 'format' => $format ); |
95 | $format = 'BaseText'; |
96 | } else { |
97 | @parseargs = ( $args->{ $format } ); |
98 | } |
99 | my $mod = "Text::Tradition::Parser::$format"; |
100 | load( $mod ); |
101 | # TODO parse needs to return witness IDs |
8e1394aa |
102 | @sigla = $mod->can('parse')->( $self, @parseargs ); |
d047cd52 |
103 | } |
104 | |
105 | # Do we need to initialize the witnesses? |
106 | unless( $args->{'have_witnesses'} ) { |
107 | # initialize Witness objects for all our witnesses |
108 | my @witnesses; |
109 | foreach my $sigil ( @sigla ) { |
110 | push( @witnesses, Text::Tradition::Witness->new( 'sigil' => $sigil ) ); |
111 | } |
112 | $self->tradition->witnesses( \@witnesses ); |
113 | } |
114 | } |
784877d9 |
115 | |
116 | # Wrappers around some methods |
117 | |
118 | sub merge_readings { |
119 | my $self = shift; |
120 | my $first_node = shift; |
121 | my $second_node = shift; |
122 | $first_node->merge_from( $second_node ); |
123 | unshift( @_, $first_node, $second_node ); |
124 | return $self->graph->merge_nodes( @_ ); |
125 | } |
126 | |
8e1394aa |
127 | =head2 Output method(s) |
128 | |
129 | =over |
130 | |
131 | =item B<as_svg> |
132 | |
133 | print $graph->as_svg( $recalculate ); |
134 | |
135 | Returns an SVG string that represents the graph. Uses GraphViz to do |
136 | this, because Graph::Easy doesn't cope well with long graphs. Unless |
137 | $recalculate is passed (and is a true value), the method will return a |
138 | cached copy of the SVG after the first call to the method. |
139 | |
140 | =cut |
141 | |
142 | sub as_svg { |
143 | my( $self, $recalc ) = @_; |
144 | return $self->svg if $self->has_svg; |
145 | |
146 | $self->_save_graphviz( $self->graph->as_graphviz() ) |
147 | unless( $self->has_graphviz && !$recalc ); |
148 | |
149 | my @cmd = qw/dot -Tsvg/; |
150 | my( $svg, $err ); |
151 | my $in = $self->graphviz; |
152 | run( \@cmd, \$in, ">", binary(), \$svg ); |
153 | $self->{'svg'} = $svg; |
154 | return $svg; |
155 | } |
156 | |
157 | =item B<as_graphml> |
158 | |
159 | print $graph->as_graphml( $recalculate ) |
160 | |
161 | Returns a GraphML representation of the collation graph, with |
162 | transposition information and position information. Unless |
163 | $recalculate is passed (and is a true value), the method will return a |
164 | cached copy of the SVG after the first call to the method. |
165 | |
166 | =cut |
167 | |
168 | sub as_graphml { |
169 | my( $self, $recalc ) = @_; |
170 | return $self->graphml if $self->has_graphml; |
171 | |
172 | # Some namespaces |
173 | my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns'; |
174 | my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance'; |
175 | my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' . |
176 | 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; |
177 | |
178 | # Create the document and root node |
179 | my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); |
180 | my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); |
181 | $graphml->setDocumentElement( $root ); |
182 | $root->setNamespace( $xsi_ns, 'xsi', 0 ); |
183 | $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); |
184 | |
185 | # Add the data keys for nodes |
186 | my @node_data = ( 'name', 'token', 'identical', 'position' ); |
187 | foreach my $ndi ( 0 .. $#node_data ) { |
188 | my $key = $root->addNewChild( $graphml_ns, 'key' ); |
189 | $key->setAttribute( 'attr.name', $node_data[$ndi] ); |
190 | $key->setAttribute( 'attr.type', 'string' ); |
191 | $key->setAttribute( 'for', 'node' ); |
192 | $key->setAttribute( 'id', 'd'.$ndi ); |
193 | } |
194 | |
195 | # Add the data keys for edges |
196 | my %wit_hash; |
197 | my $wit_ctr = 0; |
198 | foreach my $wit ( $self->getWitnessList ) { |
199 | my $wit_key = 'w' . $wit_ctr++; |
200 | $wit_hash{$wit} = $wit_key; |
201 | my $key = $root->addNewChild( $graphml_ns, 'key' ); |
202 | $key->setAttribute( 'attr.name', $wit ); |
203 | $key->setAttribute( 'attr.type', 'string' ); |
204 | $key->setAttribute( 'for', 'edge' ); |
205 | $key->setAttribute( 'id', $wit_key ); |
206 | } |
207 | |
208 | # Add the graph, its nodes, and its edges |
209 | my $graph = $root->addNewChild( $graphml_ns, 'graph' ); |
210 | $graph->setAttribute( 'edgedefault', 'directed' ); |
211 | $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful |
212 | $graph->setAttribute( 'parse.edgeids', 'canonical' ); |
213 | $graph->setAttribute( 'parse.edges', $self->edges() ); |
214 | $graph->setAttribute( 'parse.nodeids', 'canonical' ); |
215 | $graph->setAttribute( 'parse.nodes', $self->nodes() ); |
216 | $graph->setAttribute( 'parse.order', 'nodesfirst' ); |
217 | |
218 | my $node_ctr = 0; |
219 | my %node_hash; |
220 | foreach my $n ( $self->readings ) { |
221 | my %this_node_data = (); |
222 | foreach my $ndi ( 0 .. $#node_data ) { |
223 | my $value; |
224 | $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name'; |
225 | $this_node_data{'d'.$ndi} = $n->label |
226 | if $node_data[$ndi] eq 'token'; |
227 | $this_node_data{'d'.$ndi} = $n->primary->name if $n->has_primary; |
228 | $this_node_data{'d'.$ndi} = |
229 | $self->{'positions'}->node_position( $n ) |
230 | if $node_data[$ndi] eq 'position'; |
231 | } |
232 | my $node_el = $graph->addNewChild( $graphml_ns, 'node' ); |
233 | my $node_xmlid = 'n' . $node_ctr++; |
234 | $node_hash{ $n->name } = $node_xmlid; |
235 | $node_el->setAttribute( 'id', $node_xmlid ); |
236 | |
237 | foreach my $dk ( keys %this_node_data ) { |
238 | my $d_el = $node_el->addNewChild( $graphml_ns, 'data' ); |
239 | $d_el->setAttribute( 'key', $dk ); |
240 | $d_el->appendTextChild( $this_node_data{$dk} ); |
241 | } |
242 | } |
243 | |
244 | foreach my $e ( $self->edges() ) { |
245 | my( $name, $from, $to ) = ( $e->name, |
246 | $node_hash{ $e->from()->name() }, |
247 | $node_hash{ $e->to()->name() } ); |
248 | my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' ); |
249 | $edge_el->setAttribute( 'source', $from ); |
250 | $edge_el->setAttribute( 'target', $to ); |
251 | $edge_el->setAttribute( 'id', $name ); |
252 | # TODO Got to add the witnesses |
253 | } |
254 | |
255 | # Return the thing |
256 | $self->_save_graphml( $graphml ); |
257 | return $graphml; |
258 | } |
259 | |
260 | =back |
261 | |
262 | =item B<start> |
263 | |
264 | my $beginning = $collation->start(); |
265 | |
266 | Returns the beginning of the collation, a meta-reading with label '#START#'. |
267 | |
268 | =cut |
269 | |
270 | sub start { |
271 | # Return the beginning node of the graph. |
272 | my $self = shift; |
273 | my( $new_start ) = @_; |
274 | if( $new_start ) { |
275 | $self->del_reading( '#START#' ); |
276 | $self->graph->rename_node( $new_start, '#START#' ); |
277 | } |
278 | return $self->reading('#START#'); |
279 | } |
280 | |
281 | =item B<next_word> |
282 | |
283 | my $next_node = $graph->next_word( $node, $path ); |
284 | |
285 | Returns the node that follows the given node along the given witness |
286 | path. TODO These are badly named. |
287 | |
288 | =cut |
289 | |
290 | sub next_word { |
291 | # Return the successor via the corresponding edge. |
292 | my $self = shift; |
293 | return $self->_find_linked_word( 'next', @_ ); |
294 | } |
295 | |
296 | =item B<prior_word> |
297 | |
298 | my $prior_node = $graph->prior_word( $node, $path ); |
299 | |
300 | Returns the node that precedes the given node along the given witness |
301 | path. TODO These are badly named. |
302 | |
303 | =cut |
304 | |
305 | sub prior_word { |
306 | # Return the predecessor via the corresponding edge. |
307 | my $self = shift; |
308 | return $self->_find_linked_word( 'prior', @_ ); |
309 | } |
310 | |
311 | sub _find_linked_word { |
312 | my( $self, $direction, $node, $edge ) = @_; |
313 | $edge = 'base text' unless $edge; |
314 | my @linked_edges = $direction eq 'next' |
315 | ? $node->outgoing() : $node->incoming(); |
316 | return undef unless scalar( @linked_edges ); |
317 | |
318 | # We have to find the linked edge that contains all of the |
319 | # witnesses supplied in $edge. |
320 | my @edge_wits = split( /, /, $edge ); |
321 | foreach my $le ( @linked_edges ) { |
322 | my @le_wits = split( /, /, $le->name() ); |
323 | if( _is_within( \@edge_wits, \@le_wits ) ) { |
324 | # This is the right edge. |
325 | return $direction eq 'next' ? $le->to() : $le->from(); |
326 | } |
327 | } |
328 | warn "Could not find $direction node from " . $node->label |
329 | . " along edge $edge"; |
330 | return undef; |
331 | } |
332 | |
333 | sub create_witnesses { |
334 | # TODO Given a new collation, make a bunch of Witness objects. |
335 | |
336 | return []; |
337 | } |
338 | |
dd3b58b0 |
339 | no Moose; |
340 | __PACKAGE__->meta->make_immutable; |