1 package Text::Tradition::Collation;
4 use IPC::Run qw( run binary );
6 use Text::Tradition::Collation::Reading;
13 add_reading => 'add_node',
14 del_reading => 'del_node',
15 add_path => 'add_edge',
16 del_path => 'del_edge',
22 default => sub { Graph::Easy->new( undirected => 0 ) },
28 isa => 'Text::Tradition',
34 writer => '_save_svg',
35 predicate => 'has_svg',
41 writer => '_save_graphviz',
42 predicate => 'has_graphviz',
47 isa => 'XML::LibXML::Document',
48 writer => '_save_graphml',
49 predicate => 'has_graphml',
52 # The collation can be created two ways:
53 # 1. Collate a set of witnesses (with CollateX I guess) and process
54 # the results as in 2.
55 # 2. Read a pre-prepared collation in one of a variety of formats,
56 # and make the graph from that.
58 # The graph itself will (for now) be immutable, and the positions
59 # within the graph will also be immutable. We need to calculate those
60 # positions upon graph construction. The equivalences between graph
61 # nodes will be mutable, entirely determined by the user (or possibly
62 # by some semantic pre-processing provided by the user.) So the
63 # constructor should just make an empty equivalences object. The
64 # constructor will also need to make the witness objects, if we didn't
65 # come through option 1.
68 my( $self, $args ) = @_;
70 # Call the appropriate parser on the given data
71 my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %$args );
72 my $format = shift( @formats );
74 warn "No data given to create a graph; will initialize an empty one";
76 if( $format && $format =~ /^(CSV|CTE)$/ && !exists $args->{'base'} ) {
77 warn "Cannot make a graph from $format without a base text";
81 # Initialize our graph object.
82 $self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
83 $self->graph->set_attribute( 'node', 'shape', 'ellipse' );
84 # Starting point for all texts
85 my $last_node = $self->add_reading( '#START#' );
91 if( $format =~ /^(CSV|CTE)$/ ) {
92 @parseargs = ( 'base' => $args->{'base'},
93 'data' => $args->{$format},
94 'format' => $format );
97 @parseargs = ( $args->{ $format } );
99 my $mod = "Text::Tradition::Parser::$format";
101 # TODO parse needs to return witness IDs
102 @sigla = $mod->can('parse')->( $self, @parseargs );
105 # Do we need to initialize the witnesses?
106 unless( $args->{'have_witnesses'} ) {
107 # initialize Witness objects for all our witnesses
109 foreach my $sigil ( @sigla ) {
110 push( @witnesses, Text::Tradition::Witness->new( 'sigil' => $sigil ) );
112 $self->tradition->witnesses( \@witnesses );
116 # Wrappers around some methods
120 my $first_node = shift;
121 my $second_node = shift;
122 $first_node->merge_from( $second_node );
123 unshift( @_, $first_node, $second_node );
124 return $self->graph->merge_nodes( @_ );
127 =head2 Output method(s)
133 print $graph->as_svg( $recalculate );
135 Returns an SVG string that represents the graph. Uses GraphViz to do
136 this, because Graph::Easy doesn't cope well with long graphs. Unless
137 $recalculate is passed (and is a true value), the method will return a
138 cached copy of the SVG after the first call to the method.
143 my( $self, $recalc ) = @_;
144 return $self->svg if $self->has_svg;
146 $self->_save_graphviz( $self->graph->as_graphviz() )
147 unless( $self->has_graphviz && !$recalc );
149 my @cmd = qw/dot -Tsvg/;
151 my $in = $self->graphviz;
152 run( \@cmd, \$in, ">", binary(), \$svg );
153 $self->{'svg'} = $svg;
159 print $graph->as_graphml( $recalculate )
161 Returns a GraphML representation of the collation graph, with
162 transposition information and position information. Unless
163 $recalculate is passed (and is a true value), the method will return a
164 cached copy of the SVG after the first call to the method.
169 my( $self, $recalc ) = @_;
170 return $self->graphml if $self->has_graphml;
173 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
174 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
175 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
176 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
178 # Create the document and root node
179 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
180 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
181 $graphml->setDocumentElement( $root );
182 $root->setNamespace( $xsi_ns, 'xsi', 0 );
183 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
185 # Add the data keys for nodes
186 my @node_data = ( 'name', 'token', 'identical', 'position' );
187 foreach my $ndi ( 0 .. $#node_data ) {
188 my $key = $root->addNewChild( $graphml_ns, 'key' );
189 $key->setAttribute( 'attr.name', $node_data[$ndi] );
190 $key->setAttribute( 'attr.type', 'string' );
191 $key->setAttribute( 'for', 'node' );
192 $key->setAttribute( 'id', 'd'.$ndi );
195 # Add the data keys for edges
198 foreach my $wit ( $self->getWitnessList ) {
199 my $wit_key = 'w' . $wit_ctr++;
200 $wit_hash{$wit} = $wit_key;
201 my $key = $root->addNewChild( $graphml_ns, 'key' );
202 $key->setAttribute( 'attr.name', $wit );
203 $key->setAttribute( 'attr.type', 'string' );
204 $key->setAttribute( 'for', 'edge' );
205 $key->setAttribute( 'id', $wit_key );
208 # Add the graph, its nodes, and its edges
209 my $graph = $root->addNewChild( $graphml_ns, 'graph' );
210 $graph->setAttribute( 'edgedefault', 'directed' );
211 $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
212 $graph->setAttribute( 'parse.edgeids', 'canonical' );
213 $graph->setAttribute( 'parse.edges', $self->edges() );
214 $graph->setAttribute( 'parse.nodeids', 'canonical' );
215 $graph->setAttribute( 'parse.nodes', $self->nodes() );
216 $graph->setAttribute( 'parse.order', 'nodesfirst' );
220 foreach my $n ( $self->readings ) {
221 my %this_node_data = ();
222 foreach my $ndi ( 0 .. $#node_data ) {
224 $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name';
225 $this_node_data{'d'.$ndi} = $n->label
226 if $node_data[$ndi] eq 'token';
227 $this_node_data{'d'.$ndi} = $n->primary->name if $n->has_primary;
228 $this_node_data{'d'.$ndi} =
229 $self->{'positions'}->node_position( $n )
230 if $node_data[$ndi] eq 'position';
232 my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
233 my $node_xmlid = 'n' . $node_ctr++;
234 $node_hash{ $n->name } = $node_xmlid;
235 $node_el->setAttribute( 'id', $node_xmlid );
237 foreach my $dk ( keys %this_node_data ) {
238 my $d_el = $node_el->addNewChild( $graphml_ns, 'data' );
239 $d_el->setAttribute( 'key', $dk );
240 $d_el->appendTextChild( $this_node_data{$dk} );
244 foreach my $e ( $self->edges() ) {
245 my( $name, $from, $to ) = ( $e->name,
246 $node_hash{ $e->from()->name() },
247 $node_hash{ $e->to()->name() } );
248 my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
249 $edge_el->setAttribute( 'source', $from );
250 $edge_el->setAttribute( 'target', $to );
251 $edge_el->setAttribute( 'id', $name );
252 # TODO Got to add the witnesses
256 $self->_save_graphml( $graphml );
264 my $beginning = $collation->start();
266 Returns the beginning of the collation, a meta-reading with label '#START#'.
271 # Return the beginning node of the graph.
273 my( $new_start ) = @_;
275 $self->del_reading( '#START#' );
276 $self->graph->rename_node( $new_start, '#START#' );
278 return $self->reading('#START#');
283 my $next_node = $graph->next_word( $node, $path );
285 Returns the node that follows the given node along the given witness
286 path. TODO These are badly named.
291 # Return the successor via the corresponding edge.
293 return $self->_find_linked_word( 'next', @_ );
298 my $prior_node = $graph->prior_word( $node, $path );
300 Returns the node that precedes the given node along the given witness
301 path. TODO These are badly named.
306 # Return the predecessor via the corresponding edge.
308 return $self->_find_linked_word( 'prior', @_ );
311 sub _find_linked_word {
312 my( $self, $direction, $node, $edge ) = @_;
313 $edge = 'base text' unless $edge;
314 my @linked_edges = $direction eq 'next'
315 ? $node->outgoing() : $node->incoming();
316 return undef unless scalar( @linked_edges );
318 # We have to find the linked edge that contains all of the
319 # witnesses supplied in $edge.
320 my @edge_wits = split( /, /, $edge );
321 foreach my $le ( @linked_edges ) {
322 my @le_wits = split( /, /, $le->name() );
323 if( _is_within( \@edge_wits, \@le_wits ) ) {
324 # This is the right edge.
325 return $direction eq 'next' ? $le->to() : $le->from();
328 warn "Could not find $direction node from " . $node->label
329 . " along edge $edge";
333 sub create_witnesses {
334 # TODO Given a new collation, make a bunch of Witness objects.
340 __PACKAGE__->meta->make_immutable;