1 package Text::Tradition::Graph;
6 use IPC::Run qw( run binary );
8 use Text::Tradition::Graph::Position;
12 Text::Tradition::Graph
16 use Text::Tradition::Graph;
18 my $text = Text::Tradition::Graph->new( 'GraphML' => '/my/graphml/file.xml' );
19 my $text = Text::Tradition::Graph->new( 'TEI' => '/my/tei/file.xml' );
20 my $text = Text::Tradition::Graph->new( 'CSV' => '/my/csv/file.csv',
21 'base' => '/my/basefile.txt' );
22 my $text = Text::Tradition::Graph->new( 'CTE' => '/my/cte/file.txt',
23 'base' => '/my/basefile.txt' );
25 my $svg_string = $text->as_svg();
27 my $lemma_nodes = $text->active_nodes();
28 $text->toggle_node( 'some_word' );
32 A text tradition is the representation of our knowledge of a text that
33 has been passed down via manuscript copies from a time before printing
34 presses. Each text has a number of witnesses, that is, manuscripts
35 that bear a version of the text. The tradition is the aggregation of
36 these witnesses, which is to say, the collation of the text.
38 This module takes a text collation and represents it as a horizontal
39 directed graph, suitable for SVG rendering and for analysis of various
40 forms. Since this module was written by a medievalist, it also
41 provides a facility for making a critical text reconstruction by
42 choosing certain variants to be 'lemma' text - that is, text which
43 should be considered the 'standard' reading.
45 Although the graph is a very good way to render text collation, and is
46 visually very easy for a human to interpret, it doesn't have any
47 inherent information about which nodes 'go together' - that is, which
48 text readings appear in the same place as other readings. This module
49 therefore calculates 'positions' on the graph, thus holding some
50 information about which readings can and can't be substituted for
59 Constructor. Takes a source collation file from which to construct
60 the initial graph. This file can be TEI (parallel segmentation) XML,
61 CSV in a format yet to be documented, GraphML as documented by the
62 CollateX tool (L<http://gregor.middell.net/collatex/>), or a Classical
63 Text Editor apparatus. For CSV and Classical Text Editor files, the
64 user must also supply a base text to which the line numbering in the
65 collation file refers.
67 20/04/2011 Currently only CSV and GraphML are really supported.
73 my $class = ref( $proto ) || $proto;
74 my %opts = ( 'on_color' => 'yellow',
75 'off_color' => 'white',
79 # opts can be: GraphML, base+CSV, base+CTE, TEI. We need
81 my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %opts );
82 my $format = shift( @formats );
84 warn "No data given to create a graph: need GraphML, CSV, or TEI";
87 if( $format =~ /^(CSV|CTE)$/ && !exists $opts{'base'} ) {
88 warn "Cannot make a graph from $format without a base text";
92 # Make a graph object.
93 my $collation_graph = Graph::Easy->new();
94 $collation_graph->set_attribute( 'node', 'shape', 'ellipse' );
95 # Starting point for all texts
96 my $last_node = $collation_graph->add_node( '#START#' );
98 $self->{'graph'} = $collation_graph;
99 bless( $self, $class );
101 # Now do the parsing.
102 my $mod = "Text::Tradition::Parser::$format";
104 my @args = ( $opts{ $format } );
105 if( $format =~ /^(CSV|CTE)$/ ) {
106 push( @args, $opts{'base'} );
108 $mod->can('parse')->( $self, @args );
113 =item B<make_positions>
115 $graph->make_positions( $common_nodes, $paths )
117 Create an associated Graph::Positions object that records the position
118 of each node in the graph. This method call is probably in the wrong
124 my( $self, $common_nodes, $paths ) = @_;
125 my $positions = Text::Tradition::Graph::Position->new( $common_nodes, $paths );
126 $self->{'positions'} = $positions;
131 =head2 Graph::Easy object accessor methods
133 See the Graph::Easy documentation for descriptions of these functions.
143 return $self->{'graph'}->node( @_ );
152 return $self->{'graph'}->edge( @_ );
159 # Not only adds the node, but also initializes internal data
164 my $node = $self->{'graph'}->add_node( @_ );
165 $self->{'identical_nodes'}->{ $node->name() } = [ $node->name() ];
175 return $self->{'graph'}->add_edge( @_ );
186 # Delete this node out of any relevant transposition pool.
187 if( ref $node eq 'Graph::Easy::Node' ) {
188 $node = $node->name();
190 my @ident = $self->identical_nodes( $node );
193 my $pool = $self->{'identical_nodes'}->{ $ident[0] };
194 foreach my $i ( 0 .. scalar(@$pool)-1 ) {
195 if( $pool->[$i] eq $node ) {
196 splice( @$pool, $i, 1 );
201 delete $self->{'identical_nodes'}->{ $node };
203 # Now delete the node.
204 return $self->{'graph'}->del_node( @_ );
213 return $self->{'graph'}->del_edge( @_ );
222 return $self->{'graph'}->nodes( @_ );
231 return $self->{'graph'}->edges( @_ );
240 return $self->{'graph'}->merge_nodes( @_ );
243 ### Helper methods for navigating the tree
247 =head2 Graph navigation methods
253 my $node = $graph->start();
255 Returns the beginning node of the graph.
260 # Return the beginning node of the graph.
262 my( $new_start ) = @_;
264 # Fix the node transposition data
265 delete $self->{'identical_nodes'}->{ $new_start->name() };
266 $self->{'identical_nodes'}->{'#START#'} = [ '#START#' ];
267 $self->{'graph'}->rename_node( $new_start, '#START#' );
269 return $self->{'graph'}->node('#START#');
274 my $next_node = $graph->next_word( $node, $path );
276 Returns the node that follows the given node along the given witness
277 path. TODO These are badly named.
282 # Return the successor via the corresponding edge.
283 my( $self, $node, $edge ) = @_;
284 $edge = "base text" unless $edge;
285 my @next_edges = $node->outgoing();
286 return undef unless scalar( @next_edges );
288 foreach my $e ( @next_edges ) {
289 next unless $e->label() eq $edge;
293 warn "Could not find node connected to edge $edge";
299 my $prior_node = $graph->prior_word( $node, $path );
301 Returns the node that precedes the given node along the given witness
302 path. TODO These are badly named.
307 # Return the predecessor via the corresponding edge.
308 my( $self, $node, $edge ) = @_;
309 $edge = "base text" unless $edge;
310 my @prior_edges = $node->incoming();
311 return undef unless scalar( @prior_edges );
313 foreach my $e ( @prior_edges ) {
314 next unless $e->label() eq $edge;
318 warn "Could not find node connected from edge $edge";
322 =item B<node_sequence>
324 my @nodes = $graph->node_sequence( $first, $last, $path );
326 Returns the ordered list of nodes, starting with $first and ending
327 with $last, along the given witness path.
332 my( $self, $start, $end, $label ) = @_;
333 # TODO make label able to follow a single MS
334 unless( ref( $start ) eq 'Graph::Easy::Node'
335 && ref( $end ) eq 'Graph::Easy::Node' ) {
336 warn "Called node_sequence without two nodes!";
339 $label = 'base text' unless $label;
340 my @nodes = ( $start );
343 while( $n ne $end ) {
344 if( exists( $seen{$n->name()} ) ) {
345 warn "Detected loop at " . $n->name();
348 $seen{$n->name()} = 1;
350 my @edges = $n->outgoing();
351 my @relevant_edges = grep { $_->label =~ /^$label$/ } @edges;
352 warn "Did not find an edge $label from node " . $n->label
353 unless scalar @relevant_edges;
354 warn "Found more than one edge $label from node " . $n->label
355 unless scalar @relevant_edges == 1;
356 my $next = $relevant_edges[0]->to();
357 push( @nodes, $next );
360 # Check that the last node is our end node.
361 my $last = $nodes[$#nodes];
362 warn "Last node found from " . $start->label() .
363 " via path $label is not the end!"
364 unless $last eq $end;
369 =item B<string_lemma>
371 my $text = $graph->string_lemma( $first, $last, $path );
373 Returns the whitespace-separated text, starting with $first and ending
374 with $last, represented in the graph along the given path.
379 my( $self, $start, $end, $label ) = @_;
381 my @nodes = $self->node_sequence( $start, $end, $label );
382 my @words = map { $_->label() } @nodes;
383 return join( ' ', @words );
388 =head2 Transposition handling methods
390 These should really move to their own module. For use when the graph
391 has split transposed nodes in order to avoid edges that travel
396 =item B<set_identical_node>
398 $graph->set_identical_node( $node, $other_node )
400 Tell the graph that these two nodes contain the same (transposed) reading.
404 sub set_identical_node {
405 my( $self, $node, $same_node ) = @_;
406 my $pool = $self->{'identical_nodes'}->{ $node };
407 my $same_pool = $self->{'identical_nodes'}->{ $same_node };
412 foreach( @$same_pool ) {
413 push( @$pool, $_ ) unless $poolhash{$_};
416 $self->{'identical_nodes'}->{ $same_node } = $pool;
419 =item B<set_identical_node>
421 my @nodes = $graph->identical_nodes( $node )
423 Get a list of nodes that contain the same (transposed) reading as the
428 sub identical_nodes {
429 my( $self, $node ) = @_;
430 my @others = grep { $_ !~ /^$node$/ }
431 @{$self->{'identical_nodes'}->{ $node }};
437 =head2 Output method(s)
443 print $graph->as_svg( $recalculate );
445 Returns an SVG string that represents the graph. Uses GraphViz to do
446 this, because Graph::Easy doesn't cope well with long graphs. Unless
447 $recalculate is passed (and is a true value), the method will return a
448 cached copy of the SVG after the first call to the method.
453 my( $self, $recalc ) = @_;
454 return $self->{'svg'} if( exists $self->{'svg'} && !$recalc );
456 $self->{'graphviz'} = $self->{'graph'}->as_graphviz()
457 unless( exists $self->{'graphviz'} && !$recalc );
459 my @cmd = qw/dot -Tsvg/;
461 my $in = $self->{'graphviz'};
462 run( \@cmd, \$in, ">", binary(), \$svg );
463 $self->{'svg'} = $svg;
469 =head2 Lemmatization methods
473 =item B<init_lemmatizer>
477 sub init_lemmatizer {
479 # Initialize the 'lemma' hash, going through all the nodes and seeing
480 # which ones are common nodes. This should only be run once.
482 return if( $self->{'lemmatizer_initialized'} );
483 my @active_names = map { $_->name } grep { $self->is_common( $_ ) }
485 $self->{'positions'}->init_lemmatizer( @active_names );
486 $self->{'lemmatizer_initialized'} = 1;
490 # Takes a list of nodes that have just been turned off, and returns a
491 # set of tuples of the form ['node', 'state'] that indicates what
492 # changes need to be made to the graph.
493 # A state of 1 means 'turn on this node'
494 # A state of 0 means 'turn off this node'
495 # A state of undef means 'an ellipsis belongs in the text here because
496 # no decision has been made'
498 my( $self, @toggled_off_nodes ) = @_;
500 # In case this is the first run
501 $self->init_lemmatizer();
502 # First get the positions of those nodes which have been
504 my $positions_off = {};
505 map { $positions_off->{ $self->{'positions'}->node_position( $_ ) } = $_ }
509 # Now for each position, we have to see if a node is on, and we
510 # have to see if a node has been turned off.
512 foreach my $pos ( $self->{'positions'}->all() ) {
513 # Find the state of this position. If there is an active node,
514 # its name will be the state; otherwise the state will be 0
515 # (nothing at this position) or undef (ellipsis at this position)
516 my $active = $self->{'positions'}->state( $pos );
518 # Is there a formerly active node that was toggled off?
519 if( exists( $positions_off->{$pos} ) ) {
520 my $off_node = $positions_off->{$pos};
521 if( $active && $active ne $off_node) {
522 push( @answer, [ $off_node, 0 ], [ $active, 1 ] );
524 push( @answer, [ $off_node, $active ] );
527 # No formerly active node, so we just see if there is a currently
530 # Push the active node, whatever it is.
531 push( @answer, [ $active, 1 ] );
533 # Push the state that is there. Arbitrarily use the first node
535 my @pos_nodes = $self->{'positions'}->nodes_at_position( $pos );
537 [ $pos_nodes[0], $self->{'positions'}->state( $pos ) ] );
544 # A couple of helpers. TODO These should be gathered in the same place
548 my( $self, $node ) = @_;
549 $node = $self->_nodeobj( $node );
550 return $node->get_attribute('class') eq 'common';
554 my( $self, $node ) = @_;
555 unless( ref $node eq 'Graph::Easy::Node' ) {
556 $node = $self->node( $node );
561 # toggle_node takes a node name, and either lemmatizes or de-lemmatizes it.
562 # Returns a list of nodes that are de-lemmatized as a result of the toggle.
565 my( $self, $node ) = @_;
567 # In case this is being called for the first time.
568 $self->init_lemmatizer();
570 if( $self->is_common( $node ) ) {
571 # Do nothing, it's a common node.
575 my $pos = $self->{'positions'}->node_position( $node );
576 my $old_state = $self->{'positions'}->state( $pos );
578 if( $old_state && $old_state eq $node ) {
579 # Turn off the node. We turn on no others by default.
580 push( @nodes_off, $node );
583 $self->{'positions'}->set_state( $pos, $node );
584 # Any other 'on' nodes in the same position should be off.
585 push( @nodes_off, $self->colocated_nodes( $node ) );
586 # Any node that is an identical transposed one should be off.
587 push( @nodes_off, $self->identical_nodes( $node ) )
588 if $self->identical_nodes( $node );
590 @nodes_off = unique_list( @nodes_off );
592 # Turn off the nodes that need to be turned off.
593 my @nodes_turned_off;
594 foreach my $n ( @nodes_off ) {
595 my $npos = $self->{'positions'}->node_position( $n );
596 my $state = $self->{'positions'}->state( $npos );
597 if( $state && $state eq $n ) {
598 # this node is still on
599 push( @nodes_turned_off, $n );
600 my $new_state = undef;
602 # This is the node that was clicked, so if there are no
603 # other nodes there, turn off the position. In all other
604 # cases, restore the ellipsis.
605 my @all_n = $self->{'positions'}->nodes_at_position( $pos );
606 $new_state = 0 if scalar( @all_n ) == 1;
608 $self->{'positions'}->set_state( $npos, $new_state );
609 } elsif( $old_state && $old_state eq $n ) {
610 # another node has already been turned on here
611 push( @nodes_turned_off, $n );
612 } # else some other node was on anyway, so pass.
614 return @nodes_turned_off;
617 sub colocated_nodes {
619 return $self->{'positions'}->colocated_nodes( @_ );
623 my( $self, $node_id ) = @_;
624 # This is the label of the given node.
625 return $self->node( $node_id )->label();
628 sub text_for_witness {
629 my( $self, $wit ) = @_;
631 my @nodes = $self->{'positions'}->witness_path( $wit );
632 my @words = map { $self->node( $_ )->label() } @nodes;
633 return join( ' ', @words );
639 map { $h{$_} = 1 } @list;
647 This package is free software and is provided "as is" without express
648 or implied warranty. You can redistribute it and/or modify it under
649 the same terms as Perl itself.
653 Tara L Andrews, aurum@cpan.org