1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
16 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
21 my $t = Text::Tradition->new(
22 'name' => 'this is a text',
24 'file' => '/path/to/tei_parallel_seg_file.xml' );
26 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
30 Text::Tradition is a library for representation and analysis of collated
31 texts, particularly medieval ones. The Collation is the central feature of
32 a Tradition, where the text, its sequence of readings, and its relationships
33 between readings are actually kept.
37 The easiest way to define a stemma (which is a directed acyclic graph, denoting
38 the scholar's hypothesis concerning which text(s) were copied from which other(s))
39 is to use a special form of the 'dot' syntax of GraphViz.
41 Each stemma opens with the line
45 and continues with a list of all manuscript witnesses in the stemma, whether
46 extant witnesses or missing archetypes or hyparchetypes. Each of these is
47 listed by its sigil on its own line, e.g.:
49 alpha [ class=hypothetical ]
50 1 [ class=hypothetical,label=* ]
53 Extant witnesses are listed with class=extant; missing or postulated witnesses
54 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
55 unique name or number, but can be represented as anonymous with the addition
56 of 'label=*' to their lines. Greek letters or other special characters may be
57 used as names, but they must always be wrapped in double quotes.
59 Links between manuscripts are then listed with arrow notation, as below. These
60 lines show the direction of copying, one step at a time, for the entire stemma.
65 The final line in the definition should be the closing brace:
69 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
70 from the archetype O and C was copied from B, the definition would be:
73 O [ class=hypothetical]
86 The constructor. This should generally be called from Text::Tradition, but
87 if called directly it takes the following options:
91 =item * collation - The collation with which the stemma is associated.
93 =item * dot - A filehandle open to a DOT representation of the stemma graph.
101 isa => 'Text::Tradition::Collation',
109 predicate => 'has_graph',
113 my( $self, $args ) = @_;
114 # If we have been handed a dotfile, initialize it into a graph.
115 if( exists $args->{'dot'} ) {
116 $self->_graph_from_dot( $args->{'dot'} );
120 sub _graph_from_dot {
121 my( $self, $dotfh ) = @_;
122 my $reader = Graph::Reader::Dot->new();
123 my $graph = $reader->read_graph( $dotfh );
125 $self->graph( $graph );
126 # Go through the nodes and set any non-hypothetical node to extant.
127 foreach my $v ( $self->graph->vertices ) {
128 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
129 unless $self->graph->has_vertex_attribute( $v, 'class' );
132 throw( "Failed to parse dot in $dotfh" );
138 =head2 as_dot( \%options )
140 Returns a normal dot representation of the stemma layout, suitable for rendering
141 with GraphViz. Options include:
145 =item * graph - A hashref of global graph options.
147 =item * node - A hashref of global node options.
149 =item * edge - A hashref of global edge options.
153 See the GraphViz documentation for the list of available options.
158 my( $self, $opts ) = @_;
160 # Get default and specified options
167 'fillcolor' => 'white',
169 'shape' => 'ellipse', # Shape for the extant nodes
172 'arrowhead' => 'none',
174 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
176 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
178 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
182 push( @dotlines, 'digraph stemma {' );
183 ## Print out the global attributes
184 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
185 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
186 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
188 # Add each of the nodes.
189 foreach my $n ( $self->graph->vertices ) {
190 if( $self->graph->has_vertex_attribute( $n, 'label' ) ) {
191 my $ltext = $self->graph->get_vertex_attribute( $n, 'label' );
192 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
194 # Use the default display settings.
195 $n = _dotquote( $n );
196 push( @dotlines, " $n;" );
199 # Add each of our edges.
200 foreach my $e ( $self->graph->edges ) {
201 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
202 push( @dotlines, " $from -> $to;" );
204 push( @dotlines, '}' );
206 return join( "\n", @dotlines );
209 =head2 editable( $linesep )
211 Returns a version of the graph rendered in our definition format. The
212 $linesep argument defaults to newline; set it to the empty string or to
213 a space if the result is to be sent via JSON.
219 my $join = shift || "\n";
221 push( @dotlines, 'digraph stemma {' );
222 my @real; # A cheap sort
223 foreach my $n ( sort $self->graph->vertices ) {
224 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
225 $c = 'extant' unless $c;
226 if( $c eq 'extant' ) {
229 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
232 # Now do the real ones
233 foreach my $n ( @real ) {
234 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
236 foreach my $e ( sort _by_vertex $self->graph->edges ) {
237 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
238 push( @dotlines, " $from -> $to;" );
240 push( @dotlines, '}' );
241 return join( $join, @dotlines );
245 my( $obj, %attr ) = @_;
247 foreach my $k ( keys %attr ) {
248 my $v = _dotquote( $attr{$k} );
249 push( @pairs, "$k=$v" );
251 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
256 return $str if $str =~ /^[A-Za-z0-9]+$/;
258 $str = '"' . $str . '"';
263 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
268 Returns an SVG representation of the graph, calling as_dot first.
273 my( $self, $opts ) = @_;
274 my $dot = $self->as_dot( $opts );
275 my @cmd = qw/dot -Tsvg/;
277 my $dotfile = File::Temp->new();
279 # $dotfile->unlink_on_destroy(0);
280 binmode $dotfile, ':utf8';
282 push( @cmd, $dotfile->filename );
283 run( \@cmd, ">", binary(), \$svg );
284 # HACK: Parse the SVG and change the dimensions.
285 # Get rid of width and height attributes to allow scaling.
286 my $parser = XML::LibXML->new();
287 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
288 $svgdoc->documentElement->removeAttribute('width');
289 $svgdoc->documentElement->removeAttribute('height');
291 return decode_utf8( $svgdoc->toString );
296 Returns a list of the extant witnesses represented in the stemma.
302 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
303 $self->graph->vertices;
310 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
311 $self->graph->vertices;
316 Text::Tradition::Error->throw(
317 'ident' => 'Stemma error',
324 __PACKAGE__->meta->make_immutable;
330 This package is free software and is provided "as is" without express
331 or implied warranty. You can redistribute it and/or modify it under
332 the same terms as Perl itself.
336 Tara L Andrews E<lt>aurum@cpan.orgE<gt>