overhaul of analysis with corresponding updates to stemma graph generation
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
89=item * collation - The collation with which the stemma is associated.
90
91=item * dot - A filehandle open to a DOT representation of the stemma graph.
92
93=back
94
95=cut
96
9463b0bf 97has collation => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation',
100 required => 1,
8d9a1cd8 101 weak_ref => 1,
9463b0bf 102 );
103
e05997e2 104has graph => (
105 is => 'rw',
106 isa => 'Graph',
107 predicate => 'has_graph',
108 );
c57be097 109
e05997e2 110sub BUILD {
111 my( $self, $args ) = @_;
112 # If we have been handed a dotfile, initialize it into a graph.
113 if( exists $args->{'dot'} ) {
027d819c 114 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 115 }
c0ccdb62 116}
117
027d819c 118sub _graph_from_dot {
8d9a1cd8 119 my( $self, $dotfh ) = @_;
8d9a1cd8 120 my $reader = Graph::Reader::Dot->new();
121 my $graph = $reader->read_graph( $dotfh );
7a7c249c 122 if( $graph ) {
123 $self->graph( $graph );
124 # Go through the nodes and set any non-hypothetical node to extant.
125 foreach my $v ( $self->graph->vertices ) {
126 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
127 unless $self->graph->has_vertex_attribute( $v, 'class' );
128 }
129 } else {
63778331 130 throw( "Failed to parse dot in $dotfh" );
7a7c249c 131 }
8d9a1cd8 132}
133
027d819c 134=head1 METHODS
135
136=head2 as_dot( \%options )
137
138Returns a normal dot representation of the stemma layout, suitable for rendering
139with GraphViz. Options include:
140
141=over
142
143=item * graph - A hashref of global graph options.
144
145=item * node - A hashref of global node options.
146
147=item * edge - A hashref of global edge options.
148
149=back
150
151See the GraphViz documentation for the list of available options.
152
153=cut
154
8d9a1cd8 155sub as_dot {
e367f5c0 156 my( $self, $opts ) = @_;
7a7c249c 157
335a62ef 158 ## See if we are including any a.c. witnesses in this graph.
159 my $graph = $self->graph;
160 if( exists $opts->{'layerwits'} ) {
5c44c598 161 my $extant = {};
162 map { $extant->{$_} = 1 } $self->witnesses;
163 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 164 }
165
7a7c249c 166 # Get default and specified options
e02340f3 167 my %graphopts = (
168 # 'ratio' => 1,
169 );
7a7c249c 170 my %nodeopts = (
171 'fontsize' => 11,
7a7c249c 172 'style' => 'filled',
173 'fillcolor' => 'white',
e02340f3 174 'color' => 'white',
7a7c249c 175 'shape' => 'ellipse', # Shape for the extant nodes
176 );
177 my %edgeopts = (
e02340f3 178 'arrowhead' => 'none',
7a7c249c 179 );
180 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
181 if $opts->{'graph'};
182 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
183 if $opts->{'node'};
184 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
185 if $opts->{'edge'};
335a62ef 186
7a7c249c 187 my @dotlines;
188 push( @dotlines, 'digraph stemma {' );
189 ## Print out the global attributes
190 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
191 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 192 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
193
194 # Add each of the nodes.
335a62ef 195 foreach my $n ( $graph->vertices ) {
196 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
197 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 198 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 199 } else {
7a7c249c 200 # Use the default display settings.
986bbd1b 201 $n = _dotquote( $n );
7a7c249c 202 push( @dotlines, " $n;" );
e79c23c7 203 }
204 }
7a7c249c 205 # Add each of our edges.
335a62ef 206 foreach my $e ( $graph->edges ) {
986bbd1b 207 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 208 push( @dotlines, " $from -> $to;" );
209 }
210 push( @dotlines, '}' );
e79c23c7 211
7a7c249c 212 return join( "\n", @dotlines );
213}
214
335a62ef 215=head2 editable( $opts )
027d819c 216
5c44c598 217=head2 editable_graph( $graph, $opts )
218
88a6bac5 219Returns a version of the graph rendered in our definition format. The
335a62ef 220output separates statements with a newline; set $opts->{'linesep'} to the
221empty string or to a space if the result is to be sent via JSON.
222
5c44c598 223If a situational version of the stemma is required, the arguments for
224situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 225
226=cut
7a7c249c 227
7a7c249c 228sub editable {
5c44c598 229 my( $self, $opts ) = @_;
335a62ef 230 my $graph = $self->graph;
5c44c598 231 ## See if we need an editable version of a situational graph.
232 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
233 my $extant = delete $opts->{'extant'} || {};
234 my $layerwits = delete $opts->{'layerwits'} || [];
235 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 236 }
5c44c598 237 return editable_graph( $graph, $opts );
238}
239
240sub editable_graph {
241 my( $graph, $opts ) = @_;
335a62ef 242
243 # Create the graph
244 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 245 my @dotlines;
246 push( @dotlines, 'digraph stemma {' );
247 my @real; # A cheap sort
5c44c598 248 foreach my $n ( sort $graph->vertices ) {
249 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 250 $c = 'extant' unless $c;
251 if( $c eq 'extant' ) {
252 push( @real, $n );
253 } else {
254 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
255 }
e367f5c0 256 }
7a7c249c 257 # Now do the real ones
258 foreach my $n ( @real ) {
259 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
260 }
5c44c598 261 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 262 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 263 push( @dotlines, " $from -> $to;" );
264 }
265 push( @dotlines, '}' );
88a6bac5 266 return join( $join, @dotlines );
7a7c249c 267}
268
269sub _make_dotline {
270 my( $obj, %attr ) = @_;
271 my @pairs;
272 foreach my $k ( keys %attr ) {
986bbd1b 273 my $v = _dotquote( $attr{$k} );
274 push( @pairs, "$k=$v" );
7a7c249c 275 }
986bbd1b 276 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 277}
278
986bbd1b 279sub _dotquote {
280 my( $str ) = @_;
281 return $str if $str =~ /^[A-Za-z0-9]+$/;
282 $str =~ s/\"/\\\"/g;
283 $str = '"' . $str . '"';
284 return $str;
285}
286
7a7c249c 287sub _by_vertex {
288 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
289}
8d9a1cd8 290
5c44c598 291=head2 situation_graph( $extant, $layered )
335a62ef 292
5c44c598 293Returns a graph which is the original stemma with all witnesses not in the
294%$extant hash marked as hypothetical, and witness layers added to the graph
295according to the list in @$layered. A layered (a.c.) witness is added as a
296parent of its main version, and additionally shares all other parents and
297children with that version.
335a62ef 298
299=cut
300
5c44c598 301sub situation_graph {
302 my( $self, $extant, $layerwits ) = @_;
303
304 my $graph = $self->graph->copy;
305 foreach my $vertex ( $graph->vertices ) {
306 # Set as extant any vertex that is extant in the stemma AND
307 # exists in the $extant hash.
308 my $class = 'hypothetical';
309 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
310 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
311 $graph->set_vertex_attribute( $vertex, 'class', $class );
312 }
313
335a62ef 314 # For each 'layered' witness in the layerwits array, add it to the graph
315 # as an ancestor of the 'main' witness, and otherwise with the same parent/
316 # child links as its main analogue.
317 # TOOD Handle case where B is copied from A but corrected from C
5c44c598 318 my $aclabel = $self->collation->ac_label;
335a62ef 319 foreach my $lw ( @$layerwits ) {
320 # Add the layered witness and set it with the same attributes as
321 # its 'main' analogue
5c44c598 322 throw( "Cannot add a layer to a hypothetical witness $lw" )
323 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
324 my $lwac = $lw . $aclabel;
335a62ef 325 $graph->add_vertex( $lwac );
326 $graph->set_vertex_attributes( $lwac,
327 $graph->get_vertex_attributes( $lw ) );
328
329 # Set it as ancestor to the main witness
330 $graph->add_edge( $lwac, $lw );
331
332 # Give it the same ancestors and descendants as the main witness has,
333 # bearing in mind that those ancestors and descendants might also just
334 # have had a layered witness defined.
335 foreach my $v ( $graph->predecessors( $lw ) ) {
336 next if $v eq $lwac; # Don't add a loop
337 $graph->add_edge( $v, $lwac );
5c44c598 338 $graph->add_edge( $v.$aclabel, $lwac )
339 if $graph->has_vertex( $v.$aclabel );
335a62ef 340 }
341 foreach my $v ( $graph->successors( $lw ) ) {
342 next if $v eq $lwac; # but this shouldn't occur
343 $graph->add_edge( $lwac, $v );
5c44c598 344 $graph->add_edge( $lwac, $v.$aclabel )
345 if $graph->has_vertex( $v.$aclabel );
335a62ef 346 }
347 }
348 return $graph;
349}
350
027d819c 351=head2 as_svg
352
353Returns an SVG representation of the graph, calling as_dot first.
354
355=cut
356
8d9a1cd8 357sub as_svg {
358 my( $self, $opts ) = @_;
359 my $dot = $self->as_dot( $opts );
e79c23c7 360 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 361 my $svg;
e79c23c7 362 my $dotfile = File::Temp->new();
363 ## TODO REMOVE
364 # $dotfile->unlink_on_destroy(0);
365 binmode $dotfile, ':utf8';
8d9a1cd8 366 print $dotfile $dot;
e79c23c7 367 push( @cmd, $dotfile->filename );
368 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 369 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 370 # Get rid of width and height attributes to allow scaling.
c57be097 371 if( $opts->{'size'} ) {
428bcf0b 372 require XML::LibXML;
373 my $parser = XML::LibXML->new();
374 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
c57be097 375 my( $ew, $eh ) = @{$opts->{'size'}};
376 # If the graph is wider than it is tall, set width to ew and remove height.
377 # Otherwise set height to eh and remove width.
378 my $width = $svgdoc->documentElement->getAttribute('width');
379 my $height = $svgdoc->documentElement->getAttribute('height');
380 $width =~ s/\D+//g;
381 $height =~ s/\D+//g;
382 my( $remove, $keep, $val );
383 if( $width > $height ) {
384 $remove = 'height';
385 $keep = 'width';
386 $val = $ew . 'px';
387 } else {
388 $remove = 'width';
389 $keep = 'height';
390 $val = $eh . 'px';
391 }
392 $svgdoc->documentElement->removeAttribute( $remove );
393 $svgdoc->documentElement->setAttribute( $keep, $val );
428bcf0b 394 $svg = $svgdoc->toString();
c57be097 395 }
3bf5d6f1 396 # Return the result
428bcf0b 397 return decode_utf8( $svg );
e79c23c7 398}
399
027d819c 400=head2 witnesses
401
402Returns a list of the extant witnesses represented in the stemma.
403
404=cut
405
08e0fb85 406sub witnesses {
407 my $self = shift;
408 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
409 $self->graph->vertices;
410 return @wits;
411}
412
06e7cbc7 413=head2 hypotheticals
414
415Returns a list of the hypothetical witnesses represented in the stemma.
416
417=cut
418
bebec0e9 419sub hypotheticals {
420 my $self = shift;
421 my @wits = grep
422 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
423 $self->graph->vertices;
424 return @wits;
425}
426
63778331 427sub throw {
428 Text::Tradition::Error->throw(
429 'ident' => 'Stemma error',
430 'message' => $_[0],
431 );
432}
433
434
9463b0bf 435no Moose;
436__PACKAGE__->meta->make_immutable;
437
4381;
027d819c 439
440=head1 LICENSE
441
442This package is free software and is provided "as is" without express
443or implied warranty. You can redistribute it and/or modify it under
444the same terms as Perl itself.
445
446=head1 AUTHOR
447
448Tara L Andrews E<lt>aurum@cpan.orgE<gt>