add stemma edit/add dialog, textinfo edit dialog, UI bugfixes mostly to error handlin...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
89=item * collation - The collation with which the stemma is associated.
90
91=item * dot - A filehandle open to a DOT representation of the stemma graph.
92
93=back
94
95=cut
96
9463b0bf 97has collation => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation',
100 required => 1,
8d9a1cd8 101 weak_ref => 1,
9463b0bf 102 );
103
e05997e2 104has graph => (
105 is => 'rw',
106 isa => 'Graph',
107 predicate => 'has_graph',
108 );
c57be097 109
e05997e2 110sub BUILD {
111 my( $self, $args ) = @_;
112 # If we have been handed a dotfile, initialize it into a graph.
113 if( exists $args->{'dot'} ) {
027d819c 114 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 115 }
c0ccdb62 116}
117
027d819c 118sub _graph_from_dot {
8d9a1cd8 119 my( $self, $dotfh ) = @_;
8d9a1cd8 120 my $reader = Graph::Reader::Dot->new();
121 my $graph = $reader->read_graph( $dotfh );
7a7c249c 122 if( $graph ) {
123 $self->graph( $graph );
124 # Go through the nodes and set any non-hypothetical node to extant.
125 foreach my $v ( $self->graph->vertices ) {
126 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
127 unless $self->graph->has_vertex_attribute( $v, 'class' );
128 }
129 } else {
63778331 130 throw( "Failed to parse dot in $dotfh" );
7a7c249c 131 }
8d9a1cd8 132}
133
027d819c 134=head1 METHODS
135
136=head2 as_dot( \%options )
137
138Returns a normal dot representation of the stemma layout, suitable for rendering
139with GraphViz. Options include:
140
141=over
142
143=item * graph - A hashref of global graph options.
144
145=item * node - A hashref of global node options.
146
147=item * edge - A hashref of global edge options.
148
149=back
150
151See the GraphViz documentation for the list of available options.
152
153=cut
154
8d9a1cd8 155sub as_dot {
e367f5c0 156 my( $self, $opts ) = @_;
7a7c249c 157
335a62ef 158 ## See if we are including any a.c. witnesses in this graph.
159 my $graph = $self->graph;
160 if( exists $opts->{'layerwits'} ) {
5c44c598 161 my $extant = {};
162 map { $extant->{$_} = 1 } $self->witnesses;
163 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 164 }
165
7a7c249c 166 # Get default and specified options
e02340f3 167 my %graphopts = (
168 # 'ratio' => 1,
169 );
7a7c249c 170 my %nodeopts = (
171 'fontsize' => 11,
7a7c249c 172 'style' => 'filled',
173 'fillcolor' => 'white',
e02340f3 174 'color' => 'white',
7a7c249c 175 'shape' => 'ellipse', # Shape for the extant nodes
176 );
177 my %edgeopts = (
e02340f3 178 'arrowhead' => 'none',
7a7c249c 179 );
180 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
181 if $opts->{'graph'};
182 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
183 if $opts->{'node'};
184 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
185 if $opts->{'edge'};
335a62ef 186
7a7c249c 187 my @dotlines;
188 push( @dotlines, 'digraph stemma {' );
189 ## Print out the global attributes
190 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
191 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 192 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
193
194 # Add each of the nodes.
335a62ef 195 foreach my $n ( $graph->vertices ) {
196 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
197 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 198 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 199 } else {
7a7c249c 200 # Use the default display settings.
986bbd1b 201 $n = _dotquote( $n );
7a7c249c 202 push( @dotlines, " $n;" );
e79c23c7 203 }
204 }
7a7c249c 205 # Add each of our edges.
335a62ef 206 foreach my $e ( $graph->edges ) {
986bbd1b 207 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 208 push( @dotlines, " $from -> $to;" );
209 }
210 push( @dotlines, '}' );
e79c23c7 211
7a7c249c 212 return join( "\n", @dotlines );
213}
214
0bded693 215=head2 alter_graph( $dotstring )
216
217Alters the graph of this stemma according to the definition specified
218in $dotstring.
219
220=cut
221
222sub alter_graph {
223 my( $self, $dotstring ) = @_;
224 my $dotfh;
225 open $dotfh, '<', \$dotstring;
226 $self->_graph_from_dot( $dotfh );
227}
228
335a62ef 229=head2 editable( $opts )
027d819c 230
5c44c598 231=head2 editable_graph( $graph, $opts )
232
88a6bac5 233Returns a version of the graph rendered in our definition format. The
335a62ef 234output separates statements with a newline; set $opts->{'linesep'} to the
235empty string or to a space if the result is to be sent via JSON.
236
5c44c598 237If a situational version of the stemma is required, the arguments for
238situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 239
240=cut
7a7c249c 241
7a7c249c 242sub editable {
5c44c598 243 my( $self, $opts ) = @_;
335a62ef 244 my $graph = $self->graph;
5c44c598 245 ## See if we need an editable version of a situational graph.
246 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
247 my $extant = delete $opts->{'extant'} || {};
248 my $layerwits = delete $opts->{'layerwits'} || [];
249 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 250 }
5c44c598 251 return editable_graph( $graph, $opts );
252}
253
254sub editable_graph {
255 my( $graph, $opts ) = @_;
335a62ef 256
257 # Create the graph
258 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 259 my @dotlines;
260 push( @dotlines, 'digraph stemma {' );
261 my @real; # A cheap sort
5c44c598 262 foreach my $n ( sort $graph->vertices ) {
263 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 264 $c = 'extant' unless $c;
265 if( $c eq 'extant' ) {
266 push( @real, $n );
267 } else {
268 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
269 }
e367f5c0 270 }
7a7c249c 271 # Now do the real ones
272 foreach my $n ( @real ) {
273 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
274 }
5c44c598 275 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 276 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 277 push( @dotlines, " $from -> $to;" );
278 }
279 push( @dotlines, '}' );
88a6bac5 280 return join( $join, @dotlines );
7a7c249c 281}
282
283sub _make_dotline {
284 my( $obj, %attr ) = @_;
285 my @pairs;
286 foreach my $k ( keys %attr ) {
986bbd1b 287 my $v = _dotquote( $attr{$k} );
288 push( @pairs, "$k=$v" );
7a7c249c 289 }
986bbd1b 290 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 291}
292
986bbd1b 293sub _dotquote {
294 my( $str ) = @_;
295 return $str if $str =~ /^[A-Za-z0-9]+$/;
296 $str =~ s/\"/\\\"/g;
297 $str = '"' . $str . '"';
298 return $str;
299}
300
7a7c249c 301sub _by_vertex {
302 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
303}
8d9a1cd8 304
5c44c598 305=head2 situation_graph( $extant, $layered )
335a62ef 306
5c44c598 307Returns a graph which is the original stemma with all witnesses not in the
308%$extant hash marked as hypothetical, and witness layers added to the graph
309according to the list in @$layered. A layered (a.c.) witness is added as a
310parent of its main version, and additionally shares all other parents and
311children with that version.
335a62ef 312
313=cut
314
5c44c598 315sub situation_graph {
316 my( $self, $extant, $layerwits ) = @_;
317
318 my $graph = $self->graph->copy;
319 foreach my $vertex ( $graph->vertices ) {
320 # Set as extant any vertex that is extant in the stemma AND
321 # exists in the $extant hash.
322 my $class = 'hypothetical';
323 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
324 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
325 $graph->set_vertex_attribute( $vertex, 'class', $class );
326 }
327
335a62ef 328 # For each 'layered' witness in the layerwits array, add it to the graph
329 # as an ancestor of the 'main' witness, and otherwise with the same parent/
330 # child links as its main analogue.
331 # TOOD Handle case where B is copied from A but corrected from C
5c44c598 332 my $aclabel = $self->collation->ac_label;
335a62ef 333 foreach my $lw ( @$layerwits ) {
334 # Add the layered witness and set it with the same attributes as
335 # its 'main' analogue
5c44c598 336 throw( "Cannot add a layer to a hypothetical witness $lw" )
337 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
338 my $lwac = $lw . $aclabel;
335a62ef 339 $graph->add_vertex( $lwac );
340 $graph->set_vertex_attributes( $lwac,
341 $graph->get_vertex_attributes( $lw ) );
342
343 # Set it as ancestor to the main witness
344 $graph->add_edge( $lwac, $lw );
345
346 # Give it the same ancestors and descendants as the main witness has,
347 # bearing in mind that those ancestors and descendants might also just
348 # have had a layered witness defined.
349 foreach my $v ( $graph->predecessors( $lw ) ) {
350 next if $v eq $lwac; # Don't add a loop
351 $graph->add_edge( $v, $lwac );
5c44c598 352 $graph->add_edge( $v.$aclabel, $lwac )
353 if $graph->has_vertex( $v.$aclabel );
335a62ef 354 }
355 foreach my $v ( $graph->successors( $lw ) ) {
356 next if $v eq $lwac; # but this shouldn't occur
357 $graph->add_edge( $lwac, $v );
5c44c598 358 $graph->add_edge( $lwac, $v.$aclabel )
359 if $graph->has_vertex( $v.$aclabel );
335a62ef 360 }
361 }
362 return $graph;
363}
364
027d819c 365=head2 as_svg
366
367Returns an SVG representation of the graph, calling as_dot first.
368
369=cut
370
8d9a1cd8 371sub as_svg {
372 my( $self, $opts ) = @_;
373 my $dot = $self->as_dot( $opts );
e79c23c7 374 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 375 my $svg;
e79c23c7 376 my $dotfile = File::Temp->new();
377 ## TODO REMOVE
378 # $dotfile->unlink_on_destroy(0);
379 binmode $dotfile, ':utf8';
8d9a1cd8 380 print $dotfile $dot;
e79c23c7 381 push( @cmd, $dotfile->filename );
382 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 383 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 384 # Get rid of width and height attributes to allow scaling.
c57be097 385 if( $opts->{'size'} ) {
428bcf0b 386 require XML::LibXML;
387 my $parser = XML::LibXML->new();
388 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
c57be097 389 my( $ew, $eh ) = @{$opts->{'size'}};
390 # If the graph is wider than it is tall, set width to ew and remove height.
391 # Otherwise set height to eh and remove width.
392 my $width = $svgdoc->documentElement->getAttribute('width');
393 my $height = $svgdoc->documentElement->getAttribute('height');
394 $width =~ s/\D+//g;
395 $height =~ s/\D+//g;
396 my( $remove, $keep, $val );
397 if( $width > $height ) {
398 $remove = 'height';
399 $keep = 'width';
400 $val = $ew . 'px';
401 } else {
402 $remove = 'width';
403 $keep = 'height';
404 $val = $eh . 'px';
405 }
406 $svgdoc->documentElement->removeAttribute( $remove );
407 $svgdoc->documentElement->setAttribute( $keep, $val );
428bcf0b 408 $svg = $svgdoc->toString();
c57be097 409 }
3bf5d6f1 410 # Return the result
428bcf0b 411 return decode_utf8( $svg );
e79c23c7 412}
413
027d819c 414=head2 witnesses
415
416Returns a list of the extant witnesses represented in the stemma.
417
418=cut
419
08e0fb85 420sub witnesses {
421 my $self = shift;
422 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
423 $self->graph->vertices;
424 return @wits;
425}
426
06e7cbc7 427=head2 hypotheticals
428
429Returns a list of the hypothetical witnesses represented in the stemma.
430
431=cut
432
bebec0e9 433sub hypotheticals {
434 my $self = shift;
435 my @wits = grep
436 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
437 $self->graph->vertices;
438 return @wits;
439}
440
63778331 441sub throw {
442 Text::Tradition::Error->throw(
443 'ident' => 'Stemma error',
444 'message' => $_[0],
445 );
446}
447
448
9463b0bf 449no Moose;
450__PACKAGE__->meta->make_immutable;
451
4521;
027d819c 453
454=head1 LICENSE
455
456This package is free software and is provided "as is" without express
457or implied warranty. You can redistribute it and/or modify it under
458the same terms as Perl itself.
459
460=head1 AUTHOR
461
462Tara L Andrews E<lt>aurum@cpan.orgE<gt>