1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * collation - The collation with which the stemma is associated.
91 =item * dot - A filehandle open to a DOT representation of the stemma graph.
99 isa => 'Text::Tradition::Collation',
107 predicate => 'has_graph',
111 my( $self, $args ) = @_;
112 # If we have been handed a dotfile, initialize it into a graph.
113 if( exists $args->{'dot'} ) {
114 $self->_graph_from_dot( $args->{'dot'} );
118 sub _graph_from_dot {
119 my( $self, $dotfh ) = @_;
120 my $reader = Graph::Reader::Dot->new();
121 my $graph = $reader->read_graph( $dotfh );
123 $self->graph( $graph );
124 # Go through the nodes and set any non-hypothetical node to extant.
125 foreach my $v ( $self->graph->vertices ) {
126 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
127 unless $self->graph->has_vertex_attribute( $v, 'class' );
130 throw( "Failed to parse dot in $dotfh" );
136 =head2 as_dot( \%options )
138 Returns a normal dot representation of the stemma layout, suitable for rendering
139 with GraphViz. Options include:
143 =item * graph - A hashref of global graph options.
145 =item * node - A hashref of global node options.
147 =item * edge - A hashref of global edge options.
151 See the GraphViz documentation for the list of available options.
156 my( $self, $opts ) = @_;
158 ## See if we are including any a.c. witnesses in this graph.
159 my $graph = $self->graph;
160 if( exists $opts->{'layerwits'} ) {
161 $graph = $self->extend_graph( $opts->{'layerwits'} );
164 # Get default and specified options
171 'fillcolor' => 'white',
173 'shape' => 'ellipse', # Shape for the extant nodes
176 'arrowhead' => 'none',
178 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
180 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
182 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
186 push( @dotlines, 'digraph stemma {' );
187 ## Print out the global attributes
188 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
189 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
190 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
192 # Add each of the nodes.
193 foreach my $n ( $graph->vertices ) {
194 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
195 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
196 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
198 # Use the default display settings.
199 $n = _dotquote( $n );
200 push( @dotlines, " $n;" );
203 # Add each of our edges.
204 foreach my $e ( $graph->edges ) {
205 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
206 push( @dotlines, " $from -> $to;" );
208 push( @dotlines, '}' );
210 return join( "\n", @dotlines );
213 =head2 editable( $opts )
215 Returns a version of the graph rendered in our definition format. The
216 output separates statements with a newline; set $opts->{'linesep'} to the
217 empty string or to a space if the result is to be sent via JSON.
219 Any layer witnesses to be included should be passed via $opts->{'layerwits'}.
224 my( $self, $opts ) = @_;
226 ## See if we are including any a.c. witnesses in this graph.
227 my $graph = $self->graph;
228 if( exists $opts->{'layerwits'} ) {
229 $graph = $self->extend_graph( $opts->{'layerwits'} );
233 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
235 push( @dotlines, 'digraph stemma {' );
236 my @real; # A cheap sort
237 foreach my $n ( sort $self->graph->vertices ) {
238 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
239 $c = 'extant' unless $c;
240 if( $c eq 'extant' ) {
243 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
246 # Now do the real ones
247 foreach my $n ( @real ) {
248 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
250 foreach my $e ( sort _by_vertex $self->graph->edges ) {
251 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
252 push( @dotlines, " $from -> $to;" );
254 push( @dotlines, '}' );
255 return join( $join, @dotlines );
259 my( $obj, %attr ) = @_;
261 foreach my $k ( keys %attr ) {
262 my $v = _dotquote( $attr{$k} );
263 push( @pairs, "$k=$v" );
265 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
270 return $str if $str =~ /^[A-Za-z0-9]+$/;
272 $str = '"' . $str . '"';
277 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
280 =head2 extend_graph( $layered_witnesses )
282 Returns a graph which is the original stemma with witness layers added for the
283 list in @$layered_witnesses. A layered (a.c.) witness is added as a parent
284 of its main version, and additionally shares all other parents and children with
290 my( $self, $layerwits ) = @_;
291 # For each 'layered' witness in the layerwits array, add it to the graph
292 # as an ancestor of the 'main' witness, and otherwise with the same parent/
293 # child links as its main analogue.
294 # TOOD Handle case where B is copied from A but corrected from C
296 # Iterate through, adding a.c. witnesses
297 my $actag = $self->collation->ac_label;
298 my $graph = $self->graph->deep_copy;
299 foreach my $lw ( @$layerwits ) {
300 # Add the layered witness and set it with the same attributes as
301 # its 'main' analogue
302 my $lwac = $lw . $self->collation->ac_label;
303 $graph->add_vertex( $lwac );
304 $graph->set_vertex_attributes( $lwac,
305 $graph->get_vertex_attributes( $lw ) );
307 # Set it as ancestor to the main witness
308 $graph->add_edge( $lwac, $lw );
310 # Give it the same ancestors and descendants as the main witness has,
311 # bearing in mind that those ancestors and descendants might also just
312 # have had a layered witness defined.
313 foreach my $v ( $graph->predecessors( $lw ) ) {
314 next if $v eq $lwac; # Don't add a loop
315 $graph->add_edge( $v, $lwac );
316 $graph->add_edge( $v.$self->collation->ac_label, $lwac )
317 if $graph->has_vertex( $v.$self->collation->ac_label );
319 foreach my $v ( $graph->successors( $lw ) ) {
320 next if $v eq $lwac; # but this shouldn't occur
321 $graph->add_edge( $lwac, $v );
322 $graph->add_edge( $lwac, $v.$self->collation->ac_label )
323 if $graph->has_vertex( $v.$self->collation->ac_label );
331 Returns an SVG representation of the graph, calling as_dot first.
336 my( $self, $opts ) = @_;
337 my $dot = $self->as_dot( $opts );
338 my @cmd = qw/dot -Tsvg/;
340 my $dotfile = File::Temp->new();
342 # $dotfile->unlink_on_destroy(0);
343 binmode $dotfile, ':utf8';
345 push( @cmd, $dotfile->filename );
346 run( \@cmd, ">", binary(), \$svg );
347 # HACK: Parse the SVG and change the dimensions.
348 # Get rid of width and height attributes to allow scaling.
349 if( $opts->{'size'} ) {
351 my $parser = XML::LibXML->new();
352 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
353 my( $ew, $eh ) = @{$opts->{'size'}};
354 # If the graph is wider than it is tall, set width to ew and remove height.
355 # Otherwise set height to eh and remove width.
356 my $width = $svgdoc->documentElement->getAttribute('width');
357 my $height = $svgdoc->documentElement->getAttribute('height');
360 my( $remove, $keep, $val );
361 if( $width > $height ) {
370 $svgdoc->documentElement->removeAttribute( $remove );
371 $svgdoc->documentElement->setAttribute( $keep, $val );
372 $svg = $svgdoc->toString();
375 return decode_utf8( $svg );
380 Returns a list of the extant witnesses represented in the stemma.
386 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
387 $self->graph->vertices;
393 Returns a list of the hypothetical witnesses represented in the stemma.
400 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
401 $self->graph->vertices;
406 Text::Tradition::Error->throw(
407 'ident' => 'Stemma error',
414 __PACKAGE__->meta->make_immutable;
420 This package is free software and is provided "as is" without express
421 or implied warranty. You can redistribute it and/or modify it under
422 the same terms as Perl itself.
426 Tara L Andrews E<lt>aurum@cpan.orgE<gt>