1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
16 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
21 my $t = Text::Tradition->new(
22 'name' => 'this is a text',
24 'file' => '/path/to/tei_parallel_seg_file.xml' );
26 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
30 Text::Tradition is a library for representation and analysis of collated
31 texts, particularly medieval ones. The Stemma is a representation of the
32 copying relationships between the witnesses in a Tradition, modelled with
33 a connected rooted directed acyclic graph (CRDAG).
37 The easiest way to define a stemma is to use a special form of the 'dot'
40 Each stemma opens with the line
44 and continues with a list of all manuscript witnesses in the stemma, whether
45 extant witnesses or missing archetypes or hyparchetypes. Each of these is
46 listed by its sigil on its own line, e.g.:
48 alpha [ class=hypothetical ]
49 1 [ class=hypothetical,label=* ]
52 Extant witnesses are listed with class=extant; missing or postulated witnesses
53 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
54 unique name or number, but can be represented as anonymous with the addition
55 of 'label=*' to their lines. Greek letters or other special characters may be
56 used as names, but they must always be wrapped in double quotes.
58 Links between manuscripts are then listed with arrow notation, as below. These
59 lines show the direction of copying, one step at a time, for the entire stemma.
64 The final line in the definition should be the closing brace:
68 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
69 from the archetype O and C was copied from B, the definition would be:
72 O [ class=hypothetical]
85 The constructor. This should generally be called from Text::Tradition, but
86 if called directly it takes the following options:
90 =item * collation - The collation with which the stemma is associated.
92 =item * dot - A filehandle open to a DOT representation of the stemma graph.
100 isa => 'Text::Tradition::Collation',
108 predicate => 'has_graph',
112 my( $self, $args ) = @_;
113 # If we have been handed a dotfile, initialize it into a graph.
114 if( exists $args->{'dot'} ) {
115 $self->_graph_from_dot( $args->{'dot'} );
119 sub _graph_from_dot {
120 my( $self, $dotfh ) = @_;
121 my $reader = Graph::Reader::Dot->new();
122 my $graph = $reader->read_graph( $dotfh );
124 $self->graph( $graph );
125 # Go through the nodes and set any non-hypothetical node to extant.
126 foreach my $v ( $self->graph->vertices ) {
127 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
128 unless $self->graph->has_vertex_attribute( $v, 'class' );
131 throw( "Failed to parse dot in $dotfh" );
137 =head2 as_dot( \%options )
139 Returns a normal dot representation of the stemma layout, suitable for rendering
140 with GraphViz. Options include:
144 =item * graph - A hashref of global graph options.
146 =item * node - A hashref of global node options.
148 =item * edge - A hashref of global edge options.
152 See the GraphViz documentation for the list of available options.
157 my( $self, $opts ) = @_;
159 ## See if we are including any a.c. witnesses in this graph.
160 my $graph = $self->graph;
161 if( exists $opts->{'layerwits'} ) {
162 $graph = $self->extend_graph( $opts->{'layerwits'} );
165 # Get default and specified options
172 'fillcolor' => 'white',
174 'shape' => 'ellipse', # Shape for the extant nodes
177 'arrowhead' => 'none',
179 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
181 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
183 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
187 push( @dotlines, 'digraph stemma {' );
188 ## Print out the global attributes
189 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
190 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
191 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
193 # Add each of the nodes.
194 foreach my $n ( $graph->vertices ) {
195 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
196 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
197 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
199 # Use the default display settings.
200 $n = _dotquote( $n );
201 push( @dotlines, " $n;" );
204 # Add each of our edges.
205 foreach my $e ( $graph->edges ) {
206 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
207 push( @dotlines, " $from -> $to;" );
209 push( @dotlines, '}' );
211 return join( "\n", @dotlines );
214 =head2 editable( $opts )
216 Returns a version of the graph rendered in our definition format. The
217 output separates statements with a newline; set $opts->{'linesep'} to the
218 empty string or to a space if the result is to be sent via JSON.
220 Any layer witnesses to be included should be passed via $opts->{'layerwits'}.
225 my( $self, $opts ) = @_;
227 ## See if we are including any a.c. witnesses in this graph.
228 my $graph = $self->graph;
229 if( exists $opts->{'layerwits'} ) {
230 $graph = $self->extend_graph( $opts->{'layerwits'} );
234 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
236 push( @dotlines, 'digraph stemma {' );
237 my @real; # A cheap sort
238 foreach my $n ( sort $self->graph->vertices ) {
239 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
240 $c = 'extant' unless $c;
241 if( $c eq 'extant' ) {
244 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
247 # Now do the real ones
248 foreach my $n ( @real ) {
249 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
251 foreach my $e ( sort _by_vertex $self->graph->edges ) {
252 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
253 push( @dotlines, " $from -> $to;" );
255 push( @dotlines, '}' );
256 return join( $join, @dotlines );
260 my( $obj, %attr ) = @_;
262 foreach my $k ( keys %attr ) {
263 my $v = _dotquote( $attr{$k} );
264 push( @pairs, "$k=$v" );
266 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
271 return $str if $str =~ /^[A-Za-z0-9]+$/;
273 $str = '"' . $str . '"';
278 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
281 =head2 extend_graph( $layered_witnesses )
283 Returns a graph which is the original stemma with witness layers added for the
284 list in @$layered_witnesses. A layered (a.c.) witness is added as a parent
285 of its main version, and additionally shares all other parents and children with
291 my( $self, $layerwits ) = @_;
292 # For each 'layered' witness in the layerwits array, add it to the graph
293 # as an ancestor of the 'main' witness, and otherwise with the same parent/
294 # child links as its main analogue.
295 # TOOD Handle case where B is copied from A but corrected from C
297 # Iterate through, adding a.c. witnesses
298 my $actag = $self->collation->ac_label;
299 my $graph = $self->graph->copy;
300 foreach my $lw ( @$layerwits ) {
301 # Add the layered witness and set it with the same attributes as
302 # its 'main' analogue
303 my $lwac = $lw . $self->collation->ac_label;
304 $graph->add_vertex( $lwac );
305 $graph->set_vertex_attributes( $lwac,
306 $graph->get_vertex_attributes( $lw ) );
308 # Set it as ancestor to the main witness
309 $graph->add_edge( $lwac, $lw );
311 # Give it the same ancestors and descendants as the main witness has,
312 # bearing in mind that those ancestors and descendants might also just
313 # have had a layered witness defined.
314 foreach my $v ( $graph->predecessors( $lw ) ) {
315 next if $v eq $lwac; # Don't add a loop
316 $graph->add_edge( $v, $lwac );
317 $graph->add_edge( $v.$self->collation->ac_label, $lwac )
318 if $graph->has_vertex( $v.$self->collation->ac_label );
320 foreach my $v ( $graph->successors( $lw ) ) {
321 next if $v eq $lwac; # but this shouldn't occur
322 $graph->add_edge( $lwac, $v );
323 $graph->add_edge( $lwac, $v.$self->collation->ac_label )
324 if $graph->has_vertex( $v.$self->collation->ac_label );
332 Returns an SVG representation of the graph, calling as_dot first.
337 my( $self, $opts ) = @_;
338 my $dot = $self->as_dot( $opts );
339 my @cmd = qw/dot -Tsvg/;
341 my $dotfile = File::Temp->new();
343 # $dotfile->unlink_on_destroy(0);
344 binmode $dotfile, ':utf8';
346 push( @cmd, $dotfile->filename );
347 run( \@cmd, ">", binary(), \$svg );
348 # HACK: Parse the SVG and change the dimensions.
349 # Get rid of width and height attributes to allow scaling.
350 my $parser = XML::LibXML->new();
351 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
352 $svgdoc->documentElement->removeAttribute('width');
353 $svgdoc->documentElement->removeAttribute('height');
355 return decode_utf8( $svgdoc->toString );
360 Returns a list of the extant witnesses represented in the stemma.
366 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
367 $self->graph->vertices;
374 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
375 $self->graph->vertices;
380 Text::Tradition::Error->throw(
381 'ident' => 'Stemma error',
388 __PACKAGE__->meta->make_immutable;
394 This package is free software and is provided "as is" without express
395 or implied warranty. You can redistribute it and/or modify it under
396 the same terms as Perl itself.
400 Tara L Andrews E<lt>aurum@cpan.orgE<gt>