1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
14 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
19 my $t = Text::Tradition->new(
20 'name' => 'this is a text',
22 'file' => '/path/to/tei_parallel_seg_file.xml' );
24 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
28 Text::Tradition is a library for representation and analysis of collated
29 texts, particularly medieval ones. The Stemma is a representation of the
30 copying relationships between the witnesses in a Tradition, modelled with
31 a connected rooted directed acyclic graph (CRDAG).
35 The easiest way to define a stemma is to use a special form of the 'dot'
38 Each stemma opens with the line
42 and continues with a list of all manuscript witnesses in the stemma, whether
43 extant witnesses or missing archetypes or hyparchetypes. Each of these is
44 listed by its sigil on its own line, e.g.:
46 alpha [ class=hypothetical ]
47 1 [ class=hypothetical,label=* ]
50 Extant witnesses are listed with class=extant; missing or postulated witnesses
51 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
52 unique name or number, but can be represented as anonymous with the addition
53 of 'label=*' to their lines. Greek letters or other special characters may be
54 used as names, but they must always be wrapped in double quotes.
56 Links between manuscripts are then listed with arrow notation, as below. These
57 lines show the direction of copying, one step at a time, for the entire stemma.
62 The final line in the definition should be the closing brace:
66 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
67 from the archetype O and C was copied from B, the definition would be:
70 O [ class=hypothetical]
83 The constructor. This should generally be called from Text::Tradition, but
84 if called directly it takes the following options:
88 =item * dot - A filehandle open to a DOT representation of the stemma graph.
90 =item * graph - If no DOT specification is given, you can pass a Graph object
91 instead. The vertices of the graph should have an attribute 'class' set to
92 either of the values 'extant' or 'hypothetical'.
94 =item * is_undirected - If the graph specification (or graph object) is for an
95 undirected graph (e.g. a phylogenetic tree), this should be set.
103 use_ok( 'Text::Tradition::Stemma' );
105 # Try to create a bad graph
107 local $TODO = "cannot use stdout redirection trick with FastCGI";
109 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
111 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
112 ok( 0, "Created broken stemma from dotfile with syntax error" );
113 } catch( Text::Tradition::Error $e ) {
114 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
118 # Create a good graph
120 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
121 binmode( $dotfh, ':utf8' );
122 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
123 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
124 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
125 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
126 my $found_unicode_sigil;
127 foreach my $h ( $stemma->hypotheticals ) {
128 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
130 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
132 # TODO Create stemma from graph, create stemma from undirected graph,
133 # create stemma from incompletely-specified graph
141 isa => 'Text::Tradition::Collation',
142 clearer => 'clear_collation', # interim measure to remove refs in DB
149 predicate => 'has_graph',
152 has is_undirected => (
156 writer => 'set_undirected',
160 my( $self, $args ) = @_;
161 # If we have been handed a dotfile, initialize it into a graph.
162 if( exists $args->{'dot'} ) {
163 $self->_graph_from_dot( $args->{'dot'} );
168 before 'graph' => sub {
171 # Make sure all unclassed graph nodes are marked extant.
173 throw( "Cannot set graph to a non-Graph object" )
174 unless ref( $g ) eq 'Graph';
175 foreach my $v ( $g->vertices ) {
176 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
177 $g->set_vertex_attribute( $v, 'class', 'extant' );
180 $self->set_undirected( $g->is_undirected );
184 sub _graph_from_dot {
185 my( $self, $dotfh ) = @_;
186 my $reader = Graph::Reader::Dot->new();
187 # Redirect STDOUT in order to trap any error messages - syntax errors
188 # are evidently not fatal.
189 # TODO This breaks under FastCGI/Apache; reconsider.
192 #open $saved_stderr, ">&STDOUT";
194 #open STDOUT, ">", \$reader_out;
195 my $graph = $reader->read_graph( $dotfh );
197 #open STDOUT, ">", \$saved_stderr;
198 if( $reader_out && $reader_out =~ /error/s ) {
199 throw( "Error trying to parse dot: $reader_out" );
201 throw( "Failed to create graph from dot" );
203 $self->graph( $graph );
208 =head2 as_dot( \%options )
210 Returns a normal dot representation of the stemma layout, suitable for rendering
211 with GraphViz. Options include:
215 =item * graph - A hashref of global graph options.
217 =item * node - A hashref of global node options.
219 =item * edge - A hashref of global edge options.
223 See the GraphViz documentation for the list of available options.
228 my( $self, $opts ) = @_;
230 ## See if we are including any a.c. witnesses in this graph.
231 my $graph = $self->graph;
232 if( exists $opts->{'layerwits'} ) {
234 map { $extant->{$_} = 1 } $self->witnesses;
235 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
238 # Get default and specified options
245 'fillcolor' => 'white',
247 'shape' => 'ellipse', # Shape for the extant nodes
250 'arrowhead' => 'none',
252 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
254 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
256 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
259 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
261 push( @dotlines, "$gdecl stemma {" );
262 ## Print out the global attributes
263 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
264 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
265 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
267 # Add each of the nodes.
268 foreach my $n ( $graph->vertices ) {
269 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
270 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
271 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
273 # Use the default display settings.
274 $n = _dotquote( $n );
275 push( @dotlines, " $n;" );
278 # Add each of our edges.
279 foreach my $e ( $graph->edges ) {
280 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
281 my $connector = $graph->is_directed ? '->' : '--';
282 push( @dotlines, " $from $connector $to;" );
284 push( @dotlines, '}' );
286 return join( "\n", @dotlines );
289 =head2 alter_graph( $dotstring )
291 Alters the graph of this stemma according to the definition specified
297 my( $self, $dotstring ) = @_;
299 open $dotfh, '<', \$dotstring;
300 binmode $dotfh, ':utf8';
301 $self->_graph_from_dot( $dotfh );
304 =head2 editable( $opts )
306 =head2 editable_graph( $graph, $opts )
308 Returns a version of the graph rendered in our definition format. The
309 output separates statements with a newline; set $opts->{'linesep'} to the
310 empty string or to a space if the result is to be sent via JSON.
312 If a situational version of the stemma is required, the arguments for
313 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
318 my( $self, $opts ) = @_;
319 my $graph = $self->graph;
320 ## See if we need an editable version of a situational graph.
321 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
322 my $extant = delete $opts->{'extant'} || {};
323 my $layerwits = delete $opts->{'layerwits'} || [];
324 $graph = $self->situation_graph( $extant, $layerwits );
326 return editable_graph( $graph, $opts );
330 my( $graph, $opts ) = @_;
333 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
334 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
336 push( @dotlines, "$gdecl stemma {" );
337 my @real; # A cheap sort
338 foreach my $n ( sort $graph->vertices ) {
339 my $c = $graph->get_vertex_attribute( $n, 'class' );
340 $c = 'extant' unless $c;
341 if( $c eq 'extant' ) {
344 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
347 # Now do the real ones
348 foreach my $n ( @real ) {
349 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
351 foreach my $e ( sort _by_vertex $graph->edges ) {
352 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
353 my $conn = $graph->is_undirected ? '--' : '->';
354 push( @dotlines, " $from $conn $to;" );
356 push( @dotlines, '}' );
357 return join( $join, @dotlines );
361 my( $obj, %attr ) = @_;
363 foreach my $k ( keys %attr ) {
364 my $v = _dotquote( $attr{$k} );
365 push( @pairs, "$k=$v" );
367 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
372 return $str if $str =~ /^[A-Za-z0-9]+$/;
374 $str = '"' . $str . '"';
379 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
382 =head2 situation_graph( $extant, $layered )
384 Returns a graph which is the original stemma graph with all witnesses not
385 in the %$extant hash marked as hypothetical, and witness layers added to
386 the graph according to the list in @$layered. A layered (a.c.) witness is
387 added as a parent of its main version, and additionally shares all other
388 parents and children with that version.
392 sub situation_graph {
393 my( $self, $extant, $layerwits, $layerlabel ) = @_;
395 my $graph = $self->graph->copy;
396 foreach my $vertex ( $graph->vertices ) {
397 # Set as extant any vertex that is extant in the stemma AND
398 # exists in the $extant hash.
399 my $class = 'hypothetical';
400 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
401 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
402 $graph->set_vertex_attribute( $vertex, 'class', $class );
405 # For each 'layered' witness in the layerwits array, add it to the graph
406 # as an ancestor of the 'main' witness, and otherwise with the same parent/
407 # child links as its main analogue.
408 # TOOD Handle case where B is copied from A but corrected from C
409 $layerlabel = ' (a.c.)' unless $layerlabel;
410 foreach my $lw ( @$layerwits ) {
411 # Add the layered witness and set it with the same attributes as
412 # its 'main' analogue
413 throw( "Cannot add a layer to a hypothetical witness $lw" )
414 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
415 my $lwac = $lw . $layerlabel;
416 $graph->add_vertex( $lwac );
417 $graph->set_vertex_attributes( $lwac,
418 $graph->get_vertex_attributes( $lw ) );
420 # Set it as ancestor to the main witness
421 $graph->add_edge( $lwac, $lw );
423 # Give it the same ancestors and descendants as the main witness has,
424 # bearing in mind that those ancestors and descendants might also just
425 # have had a layered witness defined.
426 foreach my $v ( $graph->predecessors( $lw ) ) {
427 next if $v eq $lwac; # Don't add a loop
428 $graph->add_edge( $v, $lwac );
429 $graph->add_edge( $v.$layerlabel, $lwac )
430 if $graph->has_vertex( $v.$layerlabel );
432 foreach my $v ( $graph->successors( $lw ) ) {
433 next if $v eq $lwac; # but this shouldn't occur
434 $graph->add_edge( $lwac, $v );
435 $graph->add_edge( $lwac, $v.$layerlabel )
436 if $graph->has_vertex( $v.$layerlabel );
444 Returns an SVG representation of the graph, calling as_dot first.
449 my( $self, $opts ) = @_;
450 my $dot = $self->as_dot( $opts );
451 my @cmd = ( '-Tsvg' );
452 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
454 my $dotfile = File::Temp->new();
456 # $dotfile->unlink_on_destroy(0);
457 binmode $dotfile, ':utf8';
460 push( @cmd, $dotfile->filename );
461 run( \@cmd, ">", binary(), \$svg );
462 # HACK: Parse the SVG and change the dimensions.
463 # Get rid of width and height attributes to allow scaling.
464 if( $opts->{'size'} ) {
466 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
469 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
471 throw( "Could not reparse SVG: $@" ) if $@;
472 my( $ew, $eh ) = @{$opts->{'size'}};
473 # If the graph is wider than it is tall, set width to ew and remove height.
474 # Otherwise set height to eh and remove width.
475 # TODO Also scale the viewbox
476 my $width = $svgdoc->documentElement->getAttribute('width');
477 my $height = $svgdoc->documentElement->getAttribute('height');
480 my( $remove, $keep, $val, $viewbox );
481 if( $width > $height ) {
485 my $vbheight = $width / $ew * $height;
486 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
491 my $vbwidth = $height / $eh * $width;
492 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
494 $svgdoc->documentElement->removeAttribute( $remove );
495 $svgdoc->documentElement->setAttribute( $keep, $val );
496 $svgdoc->documentElement->removeAttribute( 'viewBox' );
497 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
498 $svg = $svgdoc->toString();
501 return decode_utf8( $svg );
506 Returns a list of the extant witnesses represented in the stemma.
512 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
513 $self->graph->vertices;
519 Returns a list of the hypothetical witnesses represented in the stemma.
526 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
527 $self->graph->vertices;
531 =head2 root( $root_vertex ) {
533 If the stemma graph is undirected, make it directed with $root_vertex at the root.
534 If it is directed, re-root it.
539 my( $self, $rootvertex ) = @_;
541 if( $self->is_undirected ) {
542 $graph = $self->graph;
544 # Make an undirected version of this graph.
545 $graph = $self->graph->undirected_copy();
547 my $rooted = Graph->new();
548 $rooted->add_vertex( $rootvertex );
549 my @next = ( $rootvertex );
552 foreach my $v ( @next ) {
553 # Place its not-placed neighbors (ergo children) in the tree
555 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
556 $graph->neighbors( $v ) ) {
557 $rooted->add_vertex( $n );
558 $rooted->add_edge( $v, $n );
559 push( @children, $n );
564 # Set the vertex classes
565 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
566 $self->graph->hypotheticals;
567 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
568 $self->graph->witnesses;
574 Text::Tradition::Error->throw(
575 'ident' => 'Stemma error',
582 __PACKAGE__->meta->make_immutable;
588 This package is free software and is provided "as is" without express
589 or implied warranty. You can redistribute it and/or modify it under
590 the same terms as Perl itself.
594 Tara L Andrews E<lt>aurum@cpan.orgE<gt>