add OpenID logo and a little styling to the login forms
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
3bf5d6f1 11use XML::LibXML;
40f19742 12use Moose;
9463b0bf 13
027d819c 14=head1 NAME
15
16Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
17
18=head1 SYNOPSIS
19
20 use Text::Tradition;
21 my $t = Text::Tradition->new(
22 'name' => 'this is a text',
23 'input' => 'TEI',
24 'file' => '/path/to/tei_parallel_seg_file.xml' );
25
26 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
27
28=head1 DESCRIPTION
29
30Text::Tradition is a library for representation and analysis of collated
335a62ef 31texts, particularly medieval ones. The Stemma is a representation of the
32copying relationships between the witnesses in a Tradition, modelled with
33a connected rooted directed acyclic graph (CRDAG).
027d819c 34
35=head1 DOT SYNTAX
36
335a62ef 37The easiest way to define a stemma is to use a special form of the 'dot'
38syntax of GraphViz.
027d819c 39
40Each stemma opens with the line
41
42 digraph Stemma {
43
44and continues with a list of all manuscript witnesses in the stemma, whether
45extant witnesses or missing archetypes or hyparchetypes. Each of these is
46listed by its sigil on its own line, e.g.:
47
48 alpha [ class=hypothetical ]
49 1 [ class=hypothetical,label=* ]
50 Ms4 [ class=extant ]
51
52Extant witnesses are listed with class=extant; missing or postulated witnesses
53are listed with class=hypothetical. Anonymous hyparchetypes must be given a
54unique name or number, but can be represented as anonymous with the addition
55of 'label=*' to their lines. Greek letters or other special characters may be
56used as names, but they must always be wrapped in double quotes.
57
58Links between manuscripts are then listed with arrow notation, as below. These
59lines show the direction of copying, one step at a time, for the entire stemma.
60
61 alpha -> 1
62 1 -> Ms4
63
64The final line in the definition should be the closing brace:
65
66 }
67
68Thus for a set of extant manuscripts A, B, and C, where A and B were copied
69from the archetype O and C was copied from B, the definition would be:
70
71 digraph Stemma {
72 O [ class=hypothetical]
73 A [ class=extant ]
74 B [ class=extant ]
75 C [ class=extant ]
76 O -> A
77 O -> B
78 B -> C
79 }
80
81=head1 CONSTRUCTOR
82
83=head2 new
84
85The constructor. This should generally be called from Text::Tradition, but
86if called directly it takes the following options:
87
88=over
89
90=item * collation - The collation with which the stemma is associated.
91
92=item * dot - A filehandle open to a DOT representation of the stemma graph.
93
94=back
95
96=cut
97
9463b0bf 98has collation => (
99 is => 'ro',
100 isa => 'Text::Tradition::Collation',
101 required => 1,
8d9a1cd8 102 weak_ref => 1,
9463b0bf 103 );
104
e05997e2 105has graph => (
106 is => 'rw',
107 isa => 'Graph',
108 predicate => 'has_graph',
109 );
c57be097 110
e05997e2 111sub BUILD {
112 my( $self, $args ) = @_;
113 # If we have been handed a dotfile, initialize it into a graph.
114 if( exists $args->{'dot'} ) {
027d819c 115 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 116 }
c0ccdb62 117}
118
027d819c 119sub _graph_from_dot {
8d9a1cd8 120 my( $self, $dotfh ) = @_;
8d9a1cd8 121 my $reader = Graph::Reader::Dot->new();
122 my $graph = $reader->read_graph( $dotfh );
7a7c249c 123 if( $graph ) {
124 $self->graph( $graph );
125 # Go through the nodes and set any non-hypothetical node to extant.
126 foreach my $v ( $self->graph->vertices ) {
127 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
128 unless $self->graph->has_vertex_attribute( $v, 'class' );
129 }
130 } else {
63778331 131 throw( "Failed to parse dot in $dotfh" );
7a7c249c 132 }
8d9a1cd8 133}
134
027d819c 135=head1 METHODS
136
137=head2 as_dot( \%options )
138
139Returns a normal dot representation of the stemma layout, suitable for rendering
140with GraphViz. Options include:
141
142=over
143
144=item * graph - A hashref of global graph options.
145
146=item * node - A hashref of global node options.
147
148=item * edge - A hashref of global edge options.
149
150=back
151
152See the GraphViz documentation for the list of available options.
153
154=cut
155
8d9a1cd8 156sub as_dot {
e367f5c0 157 my( $self, $opts ) = @_;
7a7c249c 158
335a62ef 159 ## See if we are including any a.c. witnesses in this graph.
160 my $graph = $self->graph;
161 if( exists $opts->{'layerwits'} ) {
162 $graph = $self->extend_graph( $opts->{'layerwits'} );
163 }
164
7a7c249c 165 # Get default and specified options
e02340f3 166 my %graphopts = (
167 # 'ratio' => 1,
168 );
7a7c249c 169 my %nodeopts = (
170 'fontsize' => 11,
7a7c249c 171 'style' => 'filled',
172 'fillcolor' => 'white',
e02340f3 173 'color' => 'white',
7a7c249c 174 'shape' => 'ellipse', # Shape for the extant nodes
175 );
176 my %edgeopts = (
e02340f3 177 'arrowhead' => 'none',
7a7c249c 178 );
179 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
180 if $opts->{'graph'};
181 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
182 if $opts->{'node'};
183 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
184 if $opts->{'edge'};
335a62ef 185
7a7c249c 186 my @dotlines;
187 push( @dotlines, 'digraph stemma {' );
188 ## Print out the global attributes
189 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
190 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 191 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
192
193 # Add each of the nodes.
335a62ef 194 foreach my $n ( $graph->vertices ) {
195 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
196 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 197 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 198 } else {
7a7c249c 199 # Use the default display settings.
986bbd1b 200 $n = _dotquote( $n );
7a7c249c 201 push( @dotlines, " $n;" );
e79c23c7 202 }
203 }
7a7c249c 204 # Add each of our edges.
335a62ef 205 foreach my $e ( $graph->edges ) {
986bbd1b 206 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 207 push( @dotlines, " $from -> $to;" );
208 }
209 push( @dotlines, '}' );
e79c23c7 210
7a7c249c 211 return join( "\n", @dotlines );
212}
213
335a62ef 214=head2 editable( $opts )
027d819c 215
88a6bac5 216Returns a version of the graph rendered in our definition format. The
335a62ef 217output separates statements with a newline; set $opts->{'linesep'} to the
218empty string or to a space if the result is to be sent via JSON.
219
220Any layer witnesses to be included should be passed via $opts->{'layerwits'}.
027d819c 221
222=cut
7a7c249c 223
7a7c249c 224sub editable {
335a62ef 225 my( $self, $opts ) = @_;
226
227 ## See if we are including any a.c. witnesses in this graph.
228 my $graph = $self->graph;
229 if( exists $opts->{'layerwits'} ) {
230 $graph = $self->extend_graph( $opts->{'layerwits'} );
231 }
232
233 # Create the graph
234 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 235 my @dotlines;
236 push( @dotlines, 'digraph stemma {' );
237 my @real; # A cheap sort
238 foreach my $n ( sort $self->graph->vertices ) {
239 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
240 $c = 'extant' unless $c;
241 if( $c eq 'extant' ) {
242 push( @real, $n );
243 } else {
244 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
245 }
e367f5c0 246 }
7a7c249c 247 # Now do the real ones
248 foreach my $n ( @real ) {
249 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
250 }
251 foreach my $e ( sort _by_vertex $self->graph->edges ) {
986bbd1b 252 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 253 push( @dotlines, " $from -> $to;" );
254 }
255 push( @dotlines, '}' );
88a6bac5 256 return join( $join, @dotlines );
7a7c249c 257}
258
259sub _make_dotline {
260 my( $obj, %attr ) = @_;
261 my @pairs;
262 foreach my $k ( keys %attr ) {
986bbd1b 263 my $v = _dotquote( $attr{$k} );
264 push( @pairs, "$k=$v" );
7a7c249c 265 }
986bbd1b 266 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 267}
268
986bbd1b 269sub _dotquote {
270 my( $str ) = @_;
271 return $str if $str =~ /^[A-Za-z0-9]+$/;
272 $str =~ s/\"/\\\"/g;
273 $str = '"' . $str . '"';
274 return $str;
275}
276
7a7c249c 277sub _by_vertex {
278 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
279}
8d9a1cd8 280
335a62ef 281=head2 extend_graph( $layered_witnesses )
282
283Returns a graph which is the original stemma with witness layers added for the
284list in @$layered_witnesses. A layered (a.c.) witness is added as a parent
285of its main version, and additionally shares all other parents and children with
286that version.
287
288=cut
289
290sub extend_graph {
291 my( $self, $layerwits ) = @_;
292 # For each 'layered' witness in the layerwits array, add it to the graph
293 # as an ancestor of the 'main' witness, and otherwise with the same parent/
294 # child links as its main analogue.
295 # TOOD Handle case where B is copied from A but corrected from C
296
297 # Iterate through, adding a.c. witnesses
298 my $actag = $self->collation->ac_label;
94654e27 299 my $graph = $self->graph->deep_copy;
335a62ef 300 foreach my $lw ( @$layerwits ) {
301 # Add the layered witness and set it with the same attributes as
302 # its 'main' analogue
303 my $lwac = $lw . $self->collation->ac_label;
304 $graph->add_vertex( $lwac );
305 $graph->set_vertex_attributes( $lwac,
306 $graph->get_vertex_attributes( $lw ) );
307
308 # Set it as ancestor to the main witness
309 $graph->add_edge( $lwac, $lw );
310
311 # Give it the same ancestors and descendants as the main witness has,
312 # bearing in mind that those ancestors and descendants might also just
313 # have had a layered witness defined.
314 foreach my $v ( $graph->predecessors( $lw ) ) {
315 next if $v eq $lwac; # Don't add a loop
316 $graph->add_edge( $v, $lwac );
317 $graph->add_edge( $v.$self->collation->ac_label, $lwac )
318 if $graph->has_vertex( $v.$self->collation->ac_label );
319 }
320 foreach my $v ( $graph->successors( $lw ) ) {
321 next if $v eq $lwac; # but this shouldn't occur
322 $graph->add_edge( $lwac, $v );
323 $graph->add_edge( $lwac, $v.$self->collation->ac_label )
324 if $graph->has_vertex( $v.$self->collation->ac_label );
325 }
326 }
327 return $graph;
328}
329
027d819c 330=head2 as_svg
331
332Returns an SVG representation of the graph, calling as_dot first.
333
334=cut
335
8d9a1cd8 336sub as_svg {
337 my( $self, $opts ) = @_;
338 my $dot = $self->as_dot( $opts );
e79c23c7 339 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 340 my $svg;
e79c23c7 341 my $dotfile = File::Temp->new();
342 ## TODO REMOVE
343 # $dotfile->unlink_on_destroy(0);
344 binmode $dotfile, ':utf8';
8d9a1cd8 345 print $dotfile $dot;
e79c23c7 346 push( @cmd, $dotfile->filename );
347 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 348 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 349 # Get rid of width and height attributes to allow scaling.
3bf5d6f1 350 my $parser = XML::LibXML->new();
351 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
c57be097 352 if( $opts->{'size'} ) {
353 my( $ew, $eh ) = @{$opts->{'size'}};
354 # If the graph is wider than it is tall, set width to ew and remove height.
355 # Otherwise set height to eh and remove width.
356 my $width = $svgdoc->documentElement->getAttribute('width');
357 my $height = $svgdoc->documentElement->getAttribute('height');
358 $width =~ s/\D+//g;
359 $height =~ s/\D+//g;
360 my( $remove, $keep, $val );
361 if( $width > $height ) {
362 $remove = 'height';
363 $keep = 'width';
364 $val = $ew . 'px';
365 } else {
366 $remove = 'width';
367 $keep = 'height';
368 $val = $eh . 'px';
369 }
370 $svgdoc->documentElement->removeAttribute( $remove );
371 $svgdoc->documentElement->setAttribute( $keep, $val );
372 }
3bf5d6f1 373 # Return the result
374 return decode_utf8( $svgdoc->toString );
e79c23c7 375}
376
027d819c 377=head2 witnesses
378
379Returns a list of the extant witnesses represented in the stemma.
380
381=cut
382
08e0fb85 383sub witnesses {
384 my $self = shift;
385 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
386 $self->graph->vertices;
387 return @wits;
388}
389
06e7cbc7 390=head2 hypotheticals
391
392Returns a list of the hypothetical witnesses represented in the stemma.
393
394=cut
395
bebec0e9 396sub hypotheticals {
397 my $self = shift;
398 my @wits = grep
399 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
400 $self->graph->vertices;
401 return @wits;
402}
403
63778331 404sub throw {
405 Text::Tradition::Error->throw(
406 'ident' => 'Stemma error',
407 'message' => $_[0],
408 );
409}
410
411
9463b0bf 412no Moose;
413__PACKAGE__->meta->make_immutable;
414
4151;
027d819c 416
417=head1 LICENSE
418
419This package is free software and is provided "as is" without express
420or implied warranty. You can redistribute it and/or modify it under
421the same terms as Perl itself.
422
423=head1 AUTHOR
424
425Tara L Andrews E<lt>aurum@cpan.orgE<gt>