properly display & scale stemma graphs
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
3bf5d6f1 11use XML::LibXML;
40f19742 12use Moose;
9463b0bf 13
027d819c 14=head1 NAME
15
16Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
17
18=head1 SYNOPSIS
19
20 use Text::Tradition;
21 my $t = Text::Tradition->new(
22 'name' => 'this is a text',
23 'input' => 'TEI',
24 'file' => '/path/to/tei_parallel_seg_file.xml' );
25
26 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
27
28=head1 DESCRIPTION
29
30Text::Tradition is a library for representation and analysis of collated
31texts, particularly medieval ones. The Collation is the central feature of
32a Tradition, where the text, its sequence of readings, and its relationships
33between readings are actually kept.
34
35=head1 DOT SYNTAX
36
37The easiest way to define a stemma (which is a directed acyclic graph, denoting
38the scholar's hypothesis concerning which text(s) were copied from which other(s))
39is to use a special form of the 'dot' syntax of GraphViz.
40
41Each stemma opens with the line
42
43 digraph Stemma {
44
45and continues with a list of all manuscript witnesses in the stemma, whether
46extant witnesses or missing archetypes or hyparchetypes. Each of these is
47listed by its sigil on its own line, e.g.:
48
49 alpha [ class=hypothetical ]
50 1 [ class=hypothetical,label=* ]
51 Ms4 [ class=extant ]
52
53Extant witnesses are listed with class=extant; missing or postulated witnesses
54are listed with class=hypothetical. Anonymous hyparchetypes must be given a
55unique name or number, but can be represented as anonymous with the addition
56of 'label=*' to their lines. Greek letters or other special characters may be
57used as names, but they must always be wrapped in double quotes.
58
59Links between manuscripts are then listed with arrow notation, as below. These
60lines show the direction of copying, one step at a time, for the entire stemma.
61
62 alpha -> 1
63 1 -> Ms4
64
65The final line in the definition should be the closing brace:
66
67 }
68
69Thus for a set of extant manuscripts A, B, and C, where A and B were copied
70from the archetype O and C was copied from B, the definition would be:
71
72 digraph Stemma {
73 O [ class=hypothetical]
74 A [ class=extant ]
75 B [ class=extant ]
76 C [ class=extant ]
77 O -> A
78 O -> B
79 B -> C
80 }
81
82=head1 CONSTRUCTOR
83
84=head2 new
85
86The constructor. This should generally be called from Text::Tradition, but
87if called directly it takes the following options:
88
89=over
90
91=item * collation - The collation with which the stemma is associated.
92
93=item * dot - A filehandle open to a DOT representation of the stemma graph.
94
95=back
96
97=cut
98
9463b0bf 99has collation => (
100 is => 'ro',
101 isa => 'Text::Tradition::Collation',
102 required => 1,
8d9a1cd8 103 weak_ref => 1,
9463b0bf 104 );
105
e05997e2 106has graph => (
107 is => 'rw',
108 isa => 'Graph',
109 predicate => 'has_graph',
110 );
9457207b 111
e05997e2 112sub BUILD {
113 my( $self, $args ) = @_;
114 # If we have been handed a dotfile, initialize it into a graph.
115 if( exists $args->{'dot'} ) {
027d819c 116 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 117 }
c0ccdb62 118}
119
027d819c 120sub _graph_from_dot {
8d9a1cd8 121 my( $self, $dotfh ) = @_;
8d9a1cd8 122 my $reader = Graph::Reader::Dot->new();
123 my $graph = $reader->read_graph( $dotfh );
7a7c249c 124 if( $graph ) {
125 $self->graph( $graph );
126 # Go through the nodes and set any non-hypothetical node to extant.
127 foreach my $v ( $self->graph->vertices ) {
128 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
129 unless $self->graph->has_vertex_attribute( $v, 'class' );
130 }
131 } else {
63778331 132 throw( "Failed to parse dot in $dotfh" );
7a7c249c 133 }
8d9a1cd8 134}
135
027d819c 136=head1 METHODS
137
138=head2 as_dot( \%options )
139
140Returns a normal dot representation of the stemma layout, suitable for rendering
141with GraphViz. Options include:
142
143=over
144
145=item * graph - A hashref of global graph options.
146
147=item * node - A hashref of global node options.
148
149=item * edge - A hashref of global edge options.
150
151=back
152
153See the GraphViz documentation for the list of available options.
154
155=cut
156
8d9a1cd8 157sub as_dot {
e367f5c0 158 my( $self, $opts ) = @_;
7a7c249c 159
160 # Get default and specified options
e02340f3 161 my %graphopts = (
162 # 'ratio' => 1,
163 );
7a7c249c 164 my %nodeopts = (
165 'fontsize' => 11,
7a7c249c 166 'style' => 'filled',
167 'fillcolor' => 'white',
e02340f3 168 'color' => 'white',
7a7c249c 169 'shape' => 'ellipse', # Shape for the extant nodes
170 );
171 my %edgeopts = (
e02340f3 172 'arrowhead' => 'none',
7a7c249c 173 );
174 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
175 if $opts->{'graph'};
176 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
177 if $opts->{'node'};
178 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
179 if $opts->{'edge'};
180
181 my @dotlines;
182 push( @dotlines, 'digraph stemma {' );
183 ## Print out the global attributes
184 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
185 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 186 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
187
188 # Add each of the nodes.
189 foreach my $n ( $self->graph->vertices ) {
e02340f3 190 if( $self->graph->has_vertex_attribute( $n, 'label' ) ) {
191 my $ltext = $self->graph->get_vertex_attribute( $n, 'label' );
192 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 193 } else {
7a7c249c 194 # Use the default display settings.
195 push( @dotlines, " $n;" );
e79c23c7 196 }
197 }
7a7c249c 198 # Add each of our edges.
199 foreach my $e ( $self->graph->edges ) {
200 my( $from, $to ) = @$e;
201 push( @dotlines, " $from -> $to;" );
202 }
203 push( @dotlines, '}' );
e79c23c7 204
7a7c249c 205 return join( "\n", @dotlines );
206}
207
88a6bac5 208=head2 editable( $linesep )
027d819c 209
88a6bac5 210Returns a version of the graph rendered in our definition format. The
211$linesep argument defaults to newline; set it to the empty string or to
212a space if the result is to be sent via JSON.
027d819c 213
214=cut
7a7c249c 215
7a7c249c 216sub editable {
217 my $self = shift;
88a6bac5 218 my $join = shift || "\n";
7a7c249c 219 my @dotlines;
220 push( @dotlines, 'digraph stemma {' );
221 my @real; # A cheap sort
222 foreach my $n ( sort $self->graph->vertices ) {
223 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
224 $c = 'extant' unless $c;
225 if( $c eq 'extant' ) {
226 push( @real, $n );
227 } else {
228 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
229 }
e367f5c0 230 }
7a7c249c 231 # Now do the real ones
232 foreach my $n ( @real ) {
233 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
234 }
235 foreach my $e ( sort _by_vertex $self->graph->edges ) {
236 my( $from, $to ) = @$e;
237 push( @dotlines, " $from -> $to;" );
238 }
239 push( @dotlines, '}' );
88a6bac5 240 return join( $join, @dotlines );
7a7c249c 241}
242
243sub _make_dotline {
244 my( $obj, %attr ) = @_;
245 my @pairs;
246 foreach my $k ( keys %attr ) {
247 my $v = $attr{$k};
248 $v =~ s/\"/\\\"/g;
249 push( @pairs, "$k=\"$v\"" );
250 }
251 return sprintf( " %s [ %s ];", $obj, join( ', ', @pairs ) );
8d9a1cd8 252}
253
7a7c249c 254sub _by_vertex {
255 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
256}
8d9a1cd8 257
027d819c 258=head2 as_svg
259
260Returns an SVG representation of the graph, calling as_dot first.
261
262=cut
263
8d9a1cd8 264sub as_svg {
265 my( $self, $opts ) = @_;
266 my $dot = $self->as_dot( $opts );
e79c23c7 267 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 268 my $svg;
e79c23c7 269 my $dotfile = File::Temp->new();
270 ## TODO REMOVE
271 # $dotfile->unlink_on_destroy(0);
272 binmode $dotfile, ':utf8';
8d9a1cd8 273 print $dotfile $dot;
e79c23c7 274 push( @cmd, $dotfile->filename );
275 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 276 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 277 # Get rid of width and height attributes to allow scaling.
3bf5d6f1 278 my $parser = XML::LibXML->new();
279 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
5a7e26a9 280 $svgdoc->documentElement->removeAttribute('width');
3bf5d6f1 281 $svgdoc->documentElement->removeAttribute('height');
282 # Return the result
283 return decode_utf8( $svgdoc->toString );
e79c23c7 284}
285
027d819c 286=head2 witnesses
287
288Returns a list of the extant witnesses represented in the stemma.
289
290=cut
291
08e0fb85 292sub witnesses {
293 my $self = shift;
294 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
295 $self->graph->vertices;
296 return @wits;
297}
298
bebec0e9 299sub hypotheticals {
300 my $self = shift;
301 my @wits = grep
302 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
303 $self->graph->vertices;
304 return @wits;
305}
306
63778331 307sub throw {
308 Text::Tradition::Error->throw(
309 'ident' => 'Stemma error',
310 'message' => $_[0],
311 );
312}
313
314
9463b0bf 315no Moose;
316__PACKAGE__->meta->make_immutable;
317
3181;
027d819c 319
320=head1 LICENSE
321
322This package is free software and is provided "as is" without express
323or implied warranty. You can redistribute it and/or modify it under
324the same terms as Perl itself.
325
326=head1 AUTHOR
327
328Tara L Andrews E<lt>aurum@cpan.orgE<gt>