make the first couple of tests pass
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
3use Graph::Easy;
8e1394aa 4use IPC::Run qw( run binary );
5use Module::Load;
6use Text::Tradition::Collation::Reading;
dd3b58b0 7use Moose;
8
9has 'graph' => (
d047cd52 10 is => 'ro',
11 isa => 'Graph::Easy',
12 handles => {
8e1394aa 13 add_reading => 'add_node',
14 del_reading => 'del_node',
15 add_path => 'add_edge',
16 del_path => 'del_edge',
17 reading => 'node',
18 path => 'edge',
19 readings => 'nodes',
20 paths => 'edges',
d047cd52 21 },
22 default => sub { Graph::Easy->new( undirected => 0 ) },
23 );
784877d9 24
dd3b58b0 25
dd3b58b0 26has 'tradition' => (
8e1394aa 27 is => 'rw',
d047cd52 28 isa => 'Text::Tradition',
29 );
dd3b58b0 30
8e1394aa 31has 'svg' => (
32 is => 'ro',
33 isa => 'Str',
34 writer => '_save_svg',
35 predicate => 'has_svg',
36 );
37
38has 'graphviz' => (
39 is => 'ro',
40 isa => 'Str',
41 writer => '_save_graphviz',
42 predicate => 'has_graphviz',
43 );
44
45has 'graphml' => (
46 is => 'ro',
47 isa => 'XML::LibXML::Document',
48 writer => '_save_graphml',
49 predicate => 'has_graphml',
50 );
51
dd3b58b0 52# The collation can be created two ways:
53# 1. Collate a set of witnesses (with CollateX I guess) and process
54# the results as in 2.
55# 2. Read a pre-prepared collation in one of a variety of formats,
56# and make the graph from that.
57
58# The graph itself will (for now) be immutable, and the positions
59# within the graph will also be immutable. We need to calculate those
60# positions upon graph construction. The equivalences between graph
61# nodes will be mutable, entirely determined by the user (or possibly
62# by some semantic pre-processing provided by the user.) So the
63# constructor should just make an empty equivalences object. The
64# constructor will also need to make the witness objects, if we didn't
65# come through option 1.
66
d047cd52 67sub BUILD {
68 my( $self, $args ) = @_;
69
70 # Call the appropriate parser on the given data
71 my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %$args );
72 my $format = shift( @formats );
73 unless( $format ) {
74 warn "No data given to create a graph; will initialize an empty one";
75 }
8e1394aa 76 if( $format && $format =~ /^(CSV|CTE)$/ && !exists $args->{'base'} ) {
d047cd52 77 warn "Cannot make a graph from $format without a base text";
78 return;
79 }
80
81 # Initialize our graph object.
8e1394aa 82 $self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
d047cd52 83 $self->graph->set_attribute( 'node', 'shape', 'ellipse' );
84 # Starting point for all texts
8e1394aa 85 my $last_node = $self->add_reading( '#START#' );
d047cd52 86
87 # Now do the parsing.
88 my @sigla;
89 if( $format ) {
90 my @parseargs;
91 if( $format =~ /^(CSV|CTE)$/ ) {
92 @parseargs = ( 'base' => $args->{'base'},
93 'data' => $args->{$format},
94 'format' => $format );
95 $format = 'BaseText';
96 } else {
97 @parseargs = ( $args->{ $format } );
98 }
99 my $mod = "Text::Tradition::Parser::$format";
100 load( $mod );
101 # TODO parse needs to return witness IDs
8e1394aa 102 @sigla = $mod->can('parse')->( $self, @parseargs );
d047cd52 103 }
104
105 # Do we need to initialize the witnesses?
106 unless( $args->{'have_witnesses'} ) {
107 # initialize Witness objects for all our witnesses
108 my @witnesses;
109 foreach my $sigil ( @sigla ) {
110 push( @witnesses, Text::Tradition::Witness->new( 'sigil' => $sigil ) );
111 }
112 $self->tradition->witnesses( \@witnesses );
113 }
114}
784877d9 115
116# Wrappers around some methods
117
118sub merge_readings {
119 my $self = shift;
120 my $first_node = shift;
121 my $second_node = shift;
122 $first_node->merge_from( $second_node );
123 unshift( @_, $first_node, $second_node );
124 return $self->graph->merge_nodes( @_ );
125}
126
8e1394aa 127=head2 Output method(s)
128
129=over
130
131=item B<as_svg>
132
133print $graph->as_svg( $recalculate );
134
135Returns an SVG string that represents the graph. Uses GraphViz to do
136this, because Graph::Easy doesn't cope well with long graphs. Unless
137$recalculate is passed (and is a true value), the method will return a
138cached copy of the SVG after the first call to the method.
139
140=cut
141
142sub as_svg {
143 my( $self, $recalc ) = @_;
144 return $self->svg if $self->has_svg;
145
146 $self->_save_graphviz( $self->graph->as_graphviz() )
147 unless( $self->has_graphviz && !$recalc );
148
149 my @cmd = qw/dot -Tsvg/;
150 my( $svg, $err );
151 my $in = $self->graphviz;
152 run( \@cmd, \$in, ">", binary(), \$svg );
153 $self->{'svg'} = $svg;
154 return $svg;
155}
156
157=item B<as_graphml>
158
159print $graph->as_graphml( $recalculate )
160
161Returns a GraphML representation of the collation graph, with
162transposition information and position information. Unless
163$recalculate is passed (and is a true value), the method will return a
164cached copy of the SVG after the first call to the method.
165
166=cut
167
168sub as_graphml {
169 my( $self, $recalc ) = @_;
170 return $self->graphml if $self->has_graphml;
171
172 # Some namespaces
173 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
174 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
175 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
176 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
177
178 # Create the document and root node
179 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
180 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
181 $graphml->setDocumentElement( $root );
182 $root->setNamespace( $xsi_ns, 'xsi', 0 );
183 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
184
185 # Add the data keys for nodes
186 my @node_data = ( 'name', 'token', 'identical', 'position' );
187 foreach my $ndi ( 0 .. $#node_data ) {
188 my $key = $root->addNewChild( $graphml_ns, 'key' );
189 $key->setAttribute( 'attr.name', $node_data[$ndi] );
190 $key->setAttribute( 'attr.type', 'string' );
191 $key->setAttribute( 'for', 'node' );
192 $key->setAttribute( 'id', 'd'.$ndi );
193 }
194
195 # Add the data keys for edges
196 my %wit_hash;
197 my $wit_ctr = 0;
198 foreach my $wit ( $self->getWitnessList ) {
199 my $wit_key = 'w' . $wit_ctr++;
200 $wit_hash{$wit} = $wit_key;
201 my $key = $root->addNewChild( $graphml_ns, 'key' );
202 $key->setAttribute( 'attr.name', $wit );
203 $key->setAttribute( 'attr.type', 'string' );
204 $key->setAttribute( 'for', 'edge' );
205 $key->setAttribute( 'id', $wit_key );
206 }
207
208 # Add the graph, its nodes, and its edges
209 my $graph = $root->addNewChild( $graphml_ns, 'graph' );
210 $graph->setAttribute( 'edgedefault', 'directed' );
211 $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
212 $graph->setAttribute( 'parse.edgeids', 'canonical' );
213 $graph->setAttribute( 'parse.edges', $self->edges() );
214 $graph->setAttribute( 'parse.nodeids', 'canonical' );
215 $graph->setAttribute( 'parse.nodes', $self->nodes() );
216 $graph->setAttribute( 'parse.order', 'nodesfirst' );
217
218 my $node_ctr = 0;
219 my %node_hash;
220 foreach my $n ( $self->readings ) {
221 my %this_node_data = ();
222 foreach my $ndi ( 0 .. $#node_data ) {
223 my $value;
224 $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name';
225 $this_node_data{'d'.$ndi} = $n->label
226 if $node_data[$ndi] eq 'token';
227 $this_node_data{'d'.$ndi} = $n->primary->name if $n->has_primary;
228 $this_node_data{'d'.$ndi} =
229 $self->{'positions'}->node_position( $n )
230 if $node_data[$ndi] eq 'position';
231 }
232 my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
233 my $node_xmlid = 'n' . $node_ctr++;
234 $node_hash{ $n->name } = $node_xmlid;
235 $node_el->setAttribute( 'id', $node_xmlid );
236
237 foreach my $dk ( keys %this_node_data ) {
238 my $d_el = $node_el->addNewChild( $graphml_ns, 'data' );
239 $d_el->setAttribute( 'key', $dk );
240 $d_el->appendTextChild( $this_node_data{$dk} );
241 }
242 }
243
244 foreach my $e ( $self->edges() ) {
245 my( $name, $from, $to ) = ( $e->name,
246 $node_hash{ $e->from()->name() },
247 $node_hash{ $e->to()->name() } );
248 my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
249 $edge_el->setAttribute( 'source', $from );
250 $edge_el->setAttribute( 'target', $to );
251 $edge_el->setAttribute( 'id', $name );
252 # TODO Got to add the witnesses
253 }
254
255 # Return the thing
256 $self->_save_graphml( $graphml );
257 return $graphml;
258}
259
260=back
261
262=item B<start>
263
264my $beginning = $collation->start();
265
266Returns the beginning of the collation, a meta-reading with label '#START#'.
267
268=cut
269
270sub start {
271 # Return the beginning node of the graph.
272 my $self = shift;
273 my( $new_start ) = @_;
274 if( $new_start ) {
275 $self->del_reading( '#START#' );
276 $self->graph->rename_node( $new_start, '#START#' );
277 }
278 return $self->reading('#START#');
279}
280
281=item B<next_word>
282
283my $next_node = $graph->next_word( $node, $path );
284
285Returns the node that follows the given node along the given witness
286path. TODO These are badly named.
287
288=cut
289
290sub next_word {
291 # Return the successor via the corresponding edge.
292 my $self = shift;
293 return $self->_find_linked_word( 'next', @_ );
294}
295
296=item B<prior_word>
297
298my $prior_node = $graph->prior_word( $node, $path );
299
300Returns the node that precedes the given node along the given witness
301path. TODO These are badly named.
302
303=cut
304
305sub prior_word {
306 # Return the predecessor via the corresponding edge.
307 my $self = shift;
308 return $self->_find_linked_word( 'prior', @_ );
309}
310
311sub _find_linked_word {
312 my( $self, $direction, $node, $edge ) = @_;
313 $edge = 'base text' unless $edge;
314 my @linked_edges = $direction eq 'next'
315 ? $node->outgoing() : $node->incoming();
316 return undef unless scalar( @linked_edges );
317
318 # We have to find the linked edge that contains all of the
319 # witnesses supplied in $edge.
320 my @edge_wits = split( /, /, $edge );
321 foreach my $le ( @linked_edges ) {
322 my @le_wits = split( /, /, $le->name() );
323 if( _is_within( \@edge_wits, \@le_wits ) ) {
324 # This is the right edge.
325 return $direction eq 'next' ? $le->to() : $le->from();
326 }
327 }
328 warn "Could not find $direction node from " . $node->label
329 . " along edge $edge";
330 return undef;
331}
332
333sub create_witnesses {
334 # TODO Given a new collation, make a bunch of Witness objects.
335
336 return [];
337}
338
dd3b58b0 339no Moose;
340__PACKAGE__->meta->make_immutable;