1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Collation is the central feature of
31 a Tradition, where the text, its sequence of readings, and its relationships
32 between readings are actually kept.
36 The easiest way to define a stemma (which is a directed acyclic graph, denoting
37 the scholar's hypothesis concerning which text(s) were copied from which other(s))
38 is to use a special form of the 'dot' syntax of GraphViz.
40 Each stemma opens with the line
44 and continues with a list of all manuscript witnesses in the stemma, whether
45 extant witnesses or missing archetypes or hyparchetypes. Each of these is
46 listed by its sigil on its own line, e.g.:
48 alpha [ class=hypothetical ]
49 1 [ class=hypothetical,label=* ]
52 Extant witnesses are listed with class=extant; missing or postulated witnesses
53 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
54 unique name or number, but can be represented as anonymous with the addition
55 of 'label=*' to their lines. Greek letters or other special characters may be
56 used as names, but they must always be wrapped in double quotes.
58 Links between manuscripts are then listed with arrow notation, as below. These
59 lines show the direction of copying, one step at a time, for the entire stemma.
64 The final line in the definition should be the closing brace:
68 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
69 from the archetype O and C was copied from B, the definition would be:
72 O [ class=hypothetical]
85 The constructor. This should generally be called from Text::Tradition, but
86 if called directly it takes the following options:
90 =item * collation - The collation with which the stemma is associated.
92 =item * dot - A filehandle open to a DOT representation of the stemma graph.
100 isa => 'Text::Tradition::Collation',
108 predicate => 'has_graph',
111 has distance_trees => (
113 isa => 'ArrayRef[Graph]',
114 writer => '_save_distance_trees',
115 predicate => 'has_distance_trees',
118 has distance_program => (
125 my( $self, $args ) = @_;
126 # If we have been handed a dotfile, initialize it into a graph.
127 if( exists $args->{'dot'} ) {
128 $self->_graph_from_dot( $args->{'dot'} );
132 sub _graph_from_dot {
133 my( $self, $dotfh ) = @_;
134 my $reader = Graph::Reader::Dot->new();
135 my $graph = $reader->read_graph( $dotfh );
137 $self->graph( $graph );
138 # Go through the nodes and set any non-hypothetical node to extant.
139 foreach my $v ( $self->graph->vertices ) {
140 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
141 unless $self->graph->has_vertex_attribute( $v, 'class' );
144 throw( "Failed to parse dot in $dotfh" );
150 =head2 as_dot( \%options )
152 Returns a normal dot representation of the stemma layout, suitable for rendering
153 with GraphViz. Options include:
157 =item * graph - A hashref of global graph options.
159 =item * node - A hashref of global node options.
161 =item * edge - A hashref of global edge options.
165 See the GraphViz documentation for the list of available options.
170 my( $self, $opts ) = @_;
172 # Get default and specified options
179 'fillcolor' => 'white',
181 'shape' => 'ellipse', # Shape for the extant nodes
184 'arrowhead' => 'none',
186 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
188 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
190 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
194 push( @dotlines, 'digraph stemma {' );
195 ## Print out the global attributes
196 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
197 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
198 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
200 # Add each of the nodes.
201 foreach my $n ( $self->graph->vertices ) {
202 if( $self->graph->has_vertex_attribute( $n, 'label' ) ) {
203 my $ltext = $self->graph->get_vertex_attribute( $n, 'label' );
204 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
206 # Use the default display settings.
207 push( @dotlines, " $n;" );
210 # Add each of our edges.
211 foreach my $e ( $self->graph->edges ) {
212 my( $from, $to ) = @$e;
213 push( @dotlines, " $from -> $to;" );
215 push( @dotlines, '}' );
217 return join( "\n", @dotlines );
222 Returns a version of the graph rendered in our definition format.
229 push( @dotlines, 'digraph stemma {' );
230 my @real; # A cheap sort
231 foreach my $n ( sort $self->graph->vertices ) {
232 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
233 $c = 'extant' unless $c;
234 if( $c eq 'extant' ) {
237 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
240 # Now do the real ones
241 foreach my $n ( @real ) {
242 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
244 foreach my $e ( sort _by_vertex $self->graph->edges ) {
245 my( $from, $to ) = @$e;
246 push( @dotlines, " $from -> $to;" );
248 push( @dotlines, '}' );
249 return join( "\n", @dotlines );
253 my( $obj, %attr ) = @_;
255 foreach my $k ( keys %attr ) {
258 push( @pairs, "$k=\"$v\"" );
260 return sprintf( " %s [ %s ];", $obj, join( ', ', @pairs ) );
264 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
269 Returns an SVG representation of the graph, calling as_dot first.
274 my( $self, $opts ) = @_;
275 my $dot = $self->as_dot( $opts );
276 my @cmd = qw/dot -Tsvg/;
278 my $dotfile = File::Temp->new();
280 # $dotfile->unlink_on_destroy(0);
281 binmode $dotfile, ':utf8';
283 push( @cmd, $dotfile->filename );
284 run( \@cmd, ">", binary(), \$svg );
285 $svg = decode_utf8( $svg );
291 Returns a list of the extant witnesses represented in the stemma.
297 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
298 $self->graph->vertices;
302 =head2 distance_trees( program => $program )
304 Returns a set of undirected graphs, which are the result of running a distance
305 tree calculation program on the collation. Currently the only supported
306 program is phylip_pars.
310 #### Methods for calculating phylogenetic trees ####
312 before 'distance_trees' => sub {
315 'program' => 'phylip_pars',
317 # TODO allow specification of method for calculating distance tree
318 if( !$self->has_distance_trees
319 || $args{'program'} ne $self->distance_program ) {
320 # We need to make a tree before we can return it.
321 my $dsub = 'run_' . $args{'program'};
322 my $result = $self->$dsub(); # this might throw an error - catch it?
323 # Save the resulting trees
324 my $trees = parse_newick( $result );
325 $self->_save_distance_trees( $trees );
326 $self->distance_program( $args{'program'} );
330 =head2 run_phylip_pars
332 Runs Phylip Pars on the collation, returning the results in Newick format.
333 Used for the distance_trees calculation.
337 sub run_phylip_pars {
339 my $cdata = character_input( $self->collation->make_alignment_table() );
340 return phylip_pars( $cdata );
344 Text::Tradition::Error->throw(
345 'ident' => 'Stemma error',
352 __PACKAGE__->meta->make_immutable;
358 This package is free software and is provided "as is" without express
359 or implied warranty. You can redistribute it and/or modify it under
360 the same terms as Perl itself.
364 Tara L Andrews E<lt>aurum@cpan.orgE<gt>