1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * collation - The collation with which the stemma is associated.
91 =item * dot - A filehandle open to a DOT representation of the stemma graph.
99 isa => 'Text::Tradition::Collation',
107 predicate => 'has_graph',
111 my( $self, $args ) = @_;
112 # If we have been handed a dotfile, initialize it into a graph.
113 if( exists $args->{'dot'} ) {
114 $self->_graph_from_dot( $args->{'dot'} );
118 sub _graph_from_dot {
119 my( $self, $dotfh ) = @_;
120 my $reader = Graph::Reader::Dot->new();
121 my $graph = $reader->read_graph( $dotfh );
123 $self->graph( $graph );
124 # Go through the nodes and set any non-hypothetical node to extant.
125 foreach my $v ( $self->graph->vertices ) {
126 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
127 unless $self->graph->has_vertex_attribute( $v, 'class' );
130 throw( "Failed to parse dot in $dotfh" );
136 =head2 as_dot( \%options )
138 Returns a normal dot representation of the stemma layout, suitable for rendering
139 with GraphViz. Options include:
143 =item * graph - A hashref of global graph options.
145 =item * node - A hashref of global node options.
147 =item * edge - A hashref of global edge options.
151 See the GraphViz documentation for the list of available options.
156 my( $self, $opts ) = @_;
158 ## See if we are including any a.c. witnesses in this graph.
159 my $graph = $self->graph;
160 if( exists $opts->{'layerwits'} ) {
162 map { $extant->{$_} = 1 } $self->witnesses;
163 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
166 # Get default and specified options
173 'fillcolor' => 'white',
175 'shape' => 'ellipse', # Shape for the extant nodes
178 'arrowhead' => 'none',
180 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
182 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
184 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
188 push( @dotlines, 'digraph stemma {' );
189 ## Print out the global attributes
190 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
191 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
192 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
194 # Add each of the nodes.
195 foreach my $n ( $graph->vertices ) {
196 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
197 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
198 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
200 # Use the default display settings.
201 $n = _dotquote( $n );
202 push( @dotlines, " $n;" );
205 # Add each of our edges.
206 foreach my $e ( $graph->edges ) {
207 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
208 push( @dotlines, " $from -> $to;" );
210 push( @dotlines, '}' );
212 return join( "\n", @dotlines );
215 =head2 alter_graph( $dotstring )
217 Alters the graph of this stemma according to the definition specified
223 my( $self, $dotstring ) = @_;
225 open $dotfh, '<', \$dotstring;
226 $self->_graph_from_dot( $dotfh );
229 =head2 editable( $opts )
231 =head2 editable_graph( $graph, $opts )
233 Returns a version of the graph rendered in our definition format. The
234 output separates statements with a newline; set $opts->{'linesep'} to the
235 empty string or to a space if the result is to be sent via JSON.
237 If a situational version of the stemma is required, the arguments for
238 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
243 my( $self, $opts ) = @_;
244 my $graph = $self->graph;
245 ## See if we need an editable version of a situational graph.
246 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
247 my $extant = delete $opts->{'extant'} || {};
248 my $layerwits = delete $opts->{'layerwits'} || [];
249 $graph = $self->situation_graph( $extant, $layerwits );
251 return editable_graph( $graph, $opts );
255 my( $graph, $opts ) = @_;
258 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
260 push( @dotlines, 'digraph stemma {' );
261 my @real; # A cheap sort
262 foreach my $n ( sort $graph->vertices ) {
263 my $c = $graph->get_vertex_attribute( $n, 'class' );
264 $c = 'extant' unless $c;
265 if( $c eq 'extant' ) {
268 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
271 # Now do the real ones
272 foreach my $n ( @real ) {
273 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
275 foreach my $e ( sort _by_vertex $graph->edges ) {
276 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
277 push( @dotlines, " $from -> $to;" );
279 push( @dotlines, '}' );
280 return join( $join, @dotlines );
284 my( $obj, %attr ) = @_;
286 foreach my $k ( keys %attr ) {
287 my $v = _dotquote( $attr{$k} );
288 push( @pairs, "$k=$v" );
290 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
295 return $str if $str =~ /^[A-Za-z0-9]+$/;
297 $str = '"' . $str . '"';
302 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
305 =head2 situation_graph( $extant, $layered )
307 Returns a graph which is the original stemma with all witnesses not in the
308 %$extant hash marked as hypothetical, and witness layers added to the graph
309 according to the list in @$layered. A layered (a.c.) witness is added as a
310 parent of its main version, and additionally shares all other parents and
311 children with that version.
315 sub situation_graph {
316 my( $self, $extant, $layerwits ) = @_;
318 my $graph = $self->graph->copy;
319 foreach my $vertex ( $graph->vertices ) {
320 # Set as extant any vertex that is extant in the stemma AND
321 # exists in the $extant hash.
322 my $class = 'hypothetical';
323 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
324 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
325 $graph->set_vertex_attribute( $vertex, 'class', $class );
328 # For each 'layered' witness in the layerwits array, add it to the graph
329 # as an ancestor of the 'main' witness, and otherwise with the same parent/
330 # child links as its main analogue.
331 # TOOD Handle case where B is copied from A but corrected from C
332 my $aclabel = $self->collation->ac_label;
333 foreach my $lw ( @$layerwits ) {
334 # Add the layered witness and set it with the same attributes as
335 # its 'main' analogue
336 throw( "Cannot add a layer to a hypothetical witness $lw" )
337 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
338 my $lwac = $lw . $aclabel;
339 $graph->add_vertex( $lwac );
340 $graph->set_vertex_attributes( $lwac,
341 $graph->get_vertex_attributes( $lw ) );
343 # Set it as ancestor to the main witness
344 $graph->add_edge( $lwac, $lw );
346 # Give it the same ancestors and descendants as the main witness has,
347 # bearing in mind that those ancestors and descendants might also just
348 # have had a layered witness defined.
349 foreach my $v ( $graph->predecessors( $lw ) ) {
350 next if $v eq $lwac; # Don't add a loop
351 $graph->add_edge( $v, $lwac );
352 $graph->add_edge( $v.$aclabel, $lwac )
353 if $graph->has_vertex( $v.$aclabel );
355 foreach my $v ( $graph->successors( $lw ) ) {
356 next if $v eq $lwac; # but this shouldn't occur
357 $graph->add_edge( $lwac, $v );
358 $graph->add_edge( $lwac, $v.$aclabel )
359 if $graph->has_vertex( $v.$aclabel );
367 Returns an SVG representation of the graph, calling as_dot first.
372 my( $self, $opts ) = @_;
373 my $dot = $self->as_dot( $opts );
374 my @cmd = qw/dot -Tsvg/;
376 my $dotfile = File::Temp->new();
378 # $dotfile->unlink_on_destroy(0);
379 binmode $dotfile, ':utf8';
381 push( @cmd, $dotfile->filename );
382 run( \@cmd, ">", binary(), \$svg );
383 # HACK: Parse the SVG and change the dimensions.
384 # Get rid of width and height attributes to allow scaling.
385 if( $opts->{'size'} ) {
387 my $parser = XML::LibXML->new();
388 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
389 my( $ew, $eh ) = @{$opts->{'size'}};
390 # If the graph is wider than it is tall, set width to ew and remove height.
391 # Otherwise set height to eh and remove width.
392 my $width = $svgdoc->documentElement->getAttribute('width');
393 my $height = $svgdoc->documentElement->getAttribute('height');
396 my( $remove, $keep, $val );
397 if( $width > $height ) {
406 $svgdoc->documentElement->removeAttribute( $remove );
407 $svgdoc->documentElement->setAttribute( $keep, $val );
408 $svg = $svgdoc->toString();
411 return decode_utf8( $svg );
416 Returns a list of the extant witnesses represented in the stemma.
422 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
423 $self->graph->vertices;
429 Returns a list of the hypothetical witnesses represented in the stemma.
436 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
437 $self->graph->vertices;
442 Text::Tradition::Error->throw(
443 'ident' => 'Stemma error',
450 __PACKAGE__->meta->make_immutable;
456 This package is free software and is provided "as is" without express
457 or implied warranty. You can redistribute it and/or modify it under
458 the same terms as Perl itself.
462 Tara L Andrews E<lt>aurum@cpan.orgE<gt>