add support for named stemmata
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
f96630e3 39 digraph "Name of Stemma" {
027d819c 40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
f96630e3 68 digraph "Test stemma 1" {
027d819c 69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
ea45d2a6 105TODO: {
106 local $TODO = "cannot use stdout redirection trick with FastCGI";
836e0546 107 my $baddotfh;
ea45d2a6 108 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 try {
110 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
111 ok( 0, "Created broken stemma from dotfile with syntax error" );
112 } catch( Text::Tradition::Error $e ) {
113 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 }
64a36834 115}
116
117# Create a good graph
118my $dotfh;
119open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
120binmode( $dotfh, ':utf8' );
ace5fce5 121my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 122is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
123is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
124is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 125ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
126is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 127my $found_unicode_sigil;
128foreach my $h ( $stemma->hypotheticals ) {
129 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
130}
131ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
132
ea45d2a6 133# TODO Create stemma from graph, create stemma from undirected graph,
134# create stemma from incompletely-specified graph
135
64a36834 136=end testing
137
027d819c 138=cut
139
9463b0bf 140has collation => (
141 is => 'ro',
142 isa => 'Text::Tradition::Collation',
ea45d2a6 143 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 144 weak_ref => 1,
9463b0bf 145 );
146
e05997e2 147has graph => (
148 is => 'rw',
149 isa => 'Graph',
150 predicate => 'has_graph',
151 );
ea45d2a6 152
98f22390 153has identifier => (
ea45d2a6 154 is => 'ro',
98f22390 155 isa => 'Str',
156 writer => 'set_identifier',
157 predicate => 'has_identifier',
ea45d2a6 158 );
98f22390 159
e05997e2 160sub BUILD {
161 my( $self, $args ) = @_;
162 # If we have been handed a dotfile, initialize it into a graph.
163 if( exists $args->{'dot'} ) {
027d819c 164 $self->_graph_from_dot( $args->{'dot'} );
98f22390 165 }
c0ccdb62 166}
167
ea45d2a6 168before 'graph' => sub {
169 my $self = shift;
170 if( @_ ) {
171 # Make sure all unclassed graph nodes are marked extant.
172 my $g = $_[0];
173 throw( "Cannot set graph to a non-Graph object" )
174 unless ref( $g ) eq 'Graph';
175 foreach my $v ( $g->vertices ) {
176 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
177 $g->set_vertex_attribute( $v, 'class', 'extant' );
178 }
179 }
98f22390 180 }
181};
182
183after 'graph' => sub {
184 my $self = shift;
185 return unless @_;
186 unless( $self->has_identifier ) {
187 ## HORRIBLE HACK but there is no API access to graph attributes!
188 if( exists $_[0]->[4]->{'name'} ) {
189 $self->set_identifier( $_[0]->[4]->{'name'} );
190 }
ea45d2a6 191 }
192};
193
027d819c 194sub _graph_from_dot {
8d9a1cd8 195 my( $self, $dotfh ) = @_;
8d9a1cd8 196 my $reader = Graph::Reader::Dot->new();
64a36834 197 # Redirect STDOUT in order to trap any error messages - syntax errors
198 # are evidently not fatal.
d34fdf7b 199 # TODO This breaks under FastCGI/Apache; reconsider.
64a36834 200 my $reader_out;
d34fdf7b 201 #my $saved_stderr;
202 #open $saved_stderr, ">&STDOUT";
203 #close STDOUT;
204 #open STDOUT, ">", \$reader_out;
8d9a1cd8 205 my $graph = $reader->read_graph( $dotfh );
d34fdf7b 206 #close STDOUT;
207 #open STDOUT, ">", \$saved_stderr;
64a36834 208 if( $reader_out && $reader_out =~ /error/s ) {
209 throw( "Error trying to parse dot: $reader_out" );
210 } elsif( !$graph ) {
211 throw( "Failed to create graph from dot" );
212 }
213 $self->graph( $graph );
8d9a1cd8 214}
215
98f22390 216sub is_undirected {
217 my( $self ) = @_;
218 return undef unless $self->has_graph;
219 return $self->graph->is_undirected;
220}
221
027d819c 222=head1 METHODS
223
224=head2 as_dot( \%options )
225
226Returns a normal dot representation of the stemma layout, suitable for rendering
227with GraphViz. Options include:
228
229=over
230
231=item * graph - A hashref of global graph options.
232
233=item * node - A hashref of global node options.
234
235=item * edge - A hashref of global edge options.
236
237=back
238
239See the GraphViz documentation for the list of available options.
240
241=cut
242
8d9a1cd8 243sub as_dot {
e367f5c0 244 my( $self, $opts ) = @_;
7a7c249c 245
335a62ef 246 ## See if we are including any a.c. witnesses in this graph.
247 my $graph = $self->graph;
248 if( exists $opts->{'layerwits'} ) {
5c44c598 249 my $extant = {};
250 map { $extant->{$_} = 1 } $self->witnesses;
251 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 252 }
253
7a7c249c 254 # Get default and specified options
e02340f3 255 my %graphopts = (
256 # 'ratio' => 1,
f05e891c 257 'bgcolor' => 'transparent',
e02340f3 258 );
7a7c249c 259 my %nodeopts = (
260 'fontsize' => 11,
7a7c249c 261 'style' => 'filled',
262 'fillcolor' => 'white',
e02340f3 263 'color' => 'white',
7a7c249c 264 'shape' => 'ellipse', # Shape for the extant nodes
265 );
266 my %edgeopts = (
e02340f3 267 'arrowhead' => 'none',
7a7c249c 268 );
269 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
270 if $opts->{'graph'};
271 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
272 if $opts->{'node'};
273 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
274 if $opts->{'edge'};
335a62ef 275
ea45d2a6 276 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
f96630e3 277 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
278 : 'stemma';
7a7c249c 279 my @dotlines;
f96630e3 280 push( @dotlines, "$gdecl $gname {" );
7a7c249c 281 ## Print out the global attributes
282 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
283 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 284 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
285
286 # Add each of the nodes.
335a62ef 287 foreach my $n ( $graph->vertices ) {
c753ea40 288 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 289 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 290 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 291 }
c753ea40 292 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 293 }
7a7c249c 294 # Add each of our edges.
335a62ef 295 foreach my $e ( $graph->edges ) {
986bbd1b 296 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 297 my $connector = $graph->is_directed ? '->' : '--';
298 push( @dotlines, " $from $connector $to;" );
7a7c249c 299 }
300 push( @dotlines, '}' );
e79c23c7 301
7a7c249c 302 return join( "\n", @dotlines );
303}
304
0bded693 305=head2 alter_graph( $dotstring )
306
307Alters the graph of this stemma according to the definition specified
308in $dotstring.
309
310=cut
311
312sub alter_graph {
313 my( $self, $dotstring ) = @_;
314 my $dotfh;
315 open $dotfh, '<', \$dotstring;
f90b2bde 316 binmode $dotfh, ':utf8';
0bded693 317 $self->_graph_from_dot( $dotfh );
318}
319
335a62ef 320=head2 editable( $opts )
027d819c 321
5c44c598 322=head2 editable_graph( $graph, $opts )
323
88a6bac5 324Returns a version of the graph rendered in our definition format. The
335a62ef 325output separates statements with a newline; set $opts->{'linesep'} to the
326empty string or to a space if the result is to be sent via JSON.
327
5c44c598 328If a situational version of the stemma is required, the arguments for
329situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 330
331=cut
7a7c249c 332
7a7c249c 333sub editable {
5c44c598 334 my( $self, $opts ) = @_;
335a62ef 335 my $graph = $self->graph;
f96630e3 336 if( $self->has_identifier ) {
337 $opts->{'name'} = $self->identifier;
338 }
5c44c598 339 ## See if we need an editable version of a situational graph.
340 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
341 my $extant = delete $opts->{'extant'} || {};
342 my $layerwits = delete $opts->{'layerwits'} || [];
343 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 344 }
5c44c598 345 return editable_graph( $graph, $opts );
346}
347
348sub editable_graph {
349 my( $graph, $opts ) = @_;
335a62ef 350
351 # Create the graph
352 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 353 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
f96630e3 354 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
355 : 'stemma';
7a7c249c 356 my @dotlines;
f96630e3 357 push( @dotlines, "$gdecl $gname {" );
7a7c249c 358 my @real; # A cheap sort
5c44c598 359 foreach my $n ( sort $graph->vertices ) {
360 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 361 $c = 'extant' unless $c;
362 if( $c eq 'extant' ) {
363 push( @real, $n );
364 } else {
365 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
366 }
e367f5c0 367 }
7a7c249c 368 # Now do the real ones
369 foreach my $n ( @real ) {
370 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
371 }
5c44c598 372 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 373 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 374 my $conn = $graph->is_undirected ? '--' : '->';
375 push( @dotlines, " $from $conn $to;" );
7a7c249c 376 }
377 push( @dotlines, '}' );
88a6bac5 378 return join( $join, @dotlines );
7a7c249c 379}
380
381sub _make_dotline {
382 my( $obj, %attr ) = @_;
383 my @pairs;
384 foreach my $k ( keys %attr ) {
986bbd1b 385 my $v = _dotquote( $attr{$k} );
386 push( @pairs, "$k=$v" );
7a7c249c 387 }
986bbd1b 388 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 389}
390
986bbd1b 391sub _dotquote {
392 my( $str ) = @_;
393 return $str if $str =~ /^[A-Za-z0-9]+$/;
394 $str =~ s/\"/\\\"/g;
395 $str = '"' . $str . '"';
396 return $str;
397}
398
7a7c249c 399sub _by_vertex {
400 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
401}
8d9a1cd8 402
5c44c598 403=head2 situation_graph( $extant, $layered )
335a62ef 404
ea45d2a6 405Returns a graph which is the original stemma graph with all witnesses not
406in the %$extant hash marked as hypothetical, and witness layers added to
407the graph according to the list in @$layered. A layered (a.c.) witness is
408added as a parent of its main version, and additionally shares all other
409parents and children with that version.
335a62ef 410
411=cut
412
5c44c598 413sub situation_graph {
ace5fce5 414 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 415
416 my $graph = $self->graph->copy;
417 foreach my $vertex ( $graph->vertices ) {
418 # Set as extant any vertex that is extant in the stemma AND
419 # exists in the $extant hash.
420 my $class = 'hypothetical';
421 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
422 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
423 $graph->set_vertex_attribute( $vertex, 'class', $class );
424 }
425
335a62ef 426 # For each 'layered' witness in the layerwits array, add it to the graph
427 # as an ancestor of the 'main' witness, and otherwise with the same parent/
428 # child links as its main analogue.
429 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 430 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 431 foreach my $lw ( @$layerwits ) {
432 # Add the layered witness and set it with the same attributes as
433 # its 'main' analogue
5c44c598 434 throw( "Cannot add a layer to a hypothetical witness $lw" )
435 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 436 my $lwac = $lw . $layerlabel;
335a62ef 437 $graph->add_vertex( $lwac );
438 $graph->set_vertex_attributes( $lwac,
439 $graph->get_vertex_attributes( $lw ) );
440
441 # Set it as ancestor to the main witness
442 $graph->add_edge( $lwac, $lw );
443
444 # Give it the same ancestors and descendants as the main witness has,
445 # bearing in mind that those ancestors and descendants might also just
446 # have had a layered witness defined.
447 foreach my $v ( $graph->predecessors( $lw ) ) {
448 next if $v eq $lwac; # Don't add a loop
449 $graph->add_edge( $v, $lwac );
ace5fce5 450 $graph->add_edge( $v.$layerlabel, $lwac )
451 if $graph->has_vertex( $v.$layerlabel );
335a62ef 452 }
453 foreach my $v ( $graph->successors( $lw ) ) {
454 next if $v eq $lwac; # but this shouldn't occur
455 $graph->add_edge( $lwac, $v );
ace5fce5 456 $graph->add_edge( $lwac, $v.$layerlabel )
457 if $graph->has_vertex( $v.$layerlabel );
335a62ef 458 }
459 }
460 return $graph;
461}
462
027d819c 463=head2 as_svg
464
465Returns an SVG representation of the graph, calling as_dot first.
466
467=cut
468
8d9a1cd8 469sub as_svg {
470 my( $self, $opts ) = @_;
471 my $dot = $self->as_dot( $opts );
ea45d2a6 472 my @cmd = ( '-Tsvg' );
473 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 474 my $svg;
e79c23c7 475 my $dotfile = File::Temp->new();
e79c23c7 476 binmode $dotfile, ':utf8';
8d9a1cd8 477 print $dotfile $dot;
459c39b3 478 close $dotfile;
e79c23c7 479 push( @cmd, $dotfile->filename );
480 run( \@cmd, ">", binary(), \$svg );
428bcf0b 481 return decode_utf8( $svg );
e79c23c7 482}
483
027d819c 484=head2 witnesses
485
486Returns a list of the extant witnesses represented in the stemma.
487
488=cut
489
08e0fb85 490sub witnesses {
491 my $self = shift;
492 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
493 $self->graph->vertices;
494 return @wits;
495}
496
06e7cbc7 497=head2 hypotheticals
498
499Returns a list of the hypothetical witnesses represented in the stemma.
500
501=cut
502
bebec0e9 503sub hypotheticals {
504 my $self = shift;
505 my @wits = grep
506 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
507 $self->graph->vertices;
508 return @wits;
509}
510
37bf09f4 511=head2 root_graph( $root_vertex )
ea45d2a6 512
513If the stemma graph is undirected, make it directed with $root_vertex at the root.
514If it is directed, re-root it.
515
516=cut
517
518sub root_graph {
519 my( $self, $rootvertex ) = @_;
520 my $graph;
521 if( $self->is_undirected ) {
522 $graph = $self->graph;
523 } else {
524 # Make an undirected version of this graph.
525 $graph = $self->graph->undirected_copy();
526 }
527 my $rooted = Graph->new();
528 $rooted->add_vertex( $rootvertex );
529 my @next = ( $rootvertex );
530 while( @next ) {
531 my @children;
532 foreach my $v ( @next ) {
533 # Place its not-placed neighbors (ergo children) in the tree
534 # and connect them
535 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
536 $graph->neighbors( $v ) ) {
537 $rooted->add_vertex( $n );
538 $rooted->add_edge( $v, $n );
539 push( @children, $n );
540 }
541 }
542 @next = @children;
543 }
544 # Set the vertex classes
545 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
546 $self->graph->hypotheticals;
547 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
548 $self->graph->witnesses;
549 return $rooted;
550}
551
552
63778331 553sub throw {
554 Text::Tradition::Error->throw(
555 'ident' => 'Stemma error',
556 'message' => $_[0],
557 );
558}
559
560
9463b0bf 561no Moose;
562__PACKAGE__->meta->make_immutable;
563
5641;
027d819c 565
566=head1 LICENSE
567
568This package is free software and is provided "as is" without express
569or implied warranty. You can redistribute it and/or modify it under
570the same terms as Perl itself.
571
572=head1 AUTHOR
573
574Tara L Andrews E<lt>aurum@cpan.orgE<gt>