notice and save changes to graph name / stemma identifier (tla/stemmaweb#28)
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
f96630e3 39 digraph "Name of Stemma" {
027d819c 40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
f96630e3 68 digraph "Test stemma 1" {
027d819c 69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
02b6340e 105my $baddotfh;
106open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
107try {
108 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110} catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
64a36834 112}
113
114# Create a good graph
115my $dotfh;
116open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
117binmode( $dotfh, ':utf8' );
ace5fce5 118my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 119is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
120is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
121is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 122ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
123is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 124my $found_unicode_sigil;
125foreach my $h ( $stemma->hypotheticals ) {
126 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
127}
128ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
129
cb741417 130# Create an undirected graph
131my $undirdotfh;
132open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile";
133binmode( $undirdotfh, ':utf8' );
134my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh );
135is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
136is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
137is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
138ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
ea45d2a6 139
64a36834 140=end testing
141
027d819c 142=cut
143
9463b0bf 144has collation => (
145 is => 'ro',
146 isa => 'Text::Tradition::Collation',
ea45d2a6 147 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 148 weak_ref => 1,
9463b0bf 149 );
150
e05997e2 151has graph => (
152 is => 'rw',
153 isa => 'Graph',
154 predicate => 'has_graph',
155 );
ea45d2a6 156
98f22390 157has identifier => (
ea45d2a6 158 is => 'ro',
98f22390 159 isa => 'Str',
160 writer => 'set_identifier',
161 predicate => 'has_identifier',
ea45d2a6 162 );
98f22390 163
e05997e2 164sub BUILD {
165 my( $self, $args ) = @_;
166 # If we have been handed a dotfile, initialize it into a graph.
167 if( exists $args->{'dot'} ) {
027d819c 168 $self->_graph_from_dot( $args->{'dot'} );
98f22390 169 }
c0ccdb62 170}
171
ea45d2a6 172before 'graph' => sub {
173 my $self = shift;
174 if( @_ ) {
175 # Make sure all unclassed graph nodes are marked extant.
176 my $g = $_[0];
177 throw( "Cannot set graph to a non-Graph object" )
cb741417 178 unless $g->isa( 'Graph' );
ea45d2a6 179 foreach my $v ( $g->vertices ) {
180 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
181 $g->set_vertex_attribute( $v, 'class', 'extant' );
182 }
183 }
98f22390 184 }
185};
186
187after 'graph' => sub {
188 my $self = shift;
189 return unless @_;
907f6671 190 ## HORRIBLE HACK but there is no API access to graph attributes!
191 my $graph_id = exists $_[0]->[4]->{'name'} ? $_[0]->[4]->{'name'} : '';
192 if( $graph_id && !( $self->has_identifier && $self->identifier eq $graph_id ) ) {
193 $self->set_identifier( $graph_id );
194 } elsif ( !$graph_id && $self->has_identifier ) {
195 $self->set_identifier( 'stemma' );
ea45d2a6 196 }
197};
198
027d819c 199sub _graph_from_dot {
8d9a1cd8 200 my( $self, $dotfh ) = @_;
8d9a1cd8 201 my $reader = Graph::Reader::Dot->new();
64a36834 202 # Redirect STDOUT in order to trap any error messages - syntax errors
203 # are evidently not fatal.
02b6340e 204 my $graph;
205 my $reader_out;
cb741417 206 my $reader_err;
02b6340e 207 {
208 local(*STDOUT);
209 open( STDOUT, ">", \$reader_out );
cb741417 210 local(*STDERR);
211 open( STDERR, ">", \$reader_err );
02b6340e 212 $graph = $reader->read_graph( $dotfh );
213 close STDOUT;
cb741417 214 close STDERR;
02b6340e 215 }
64a36834 216 if( $reader_out && $reader_out =~ /error/s ) {
217 throw( "Error trying to parse dot: $reader_out" );
218 } elsif( !$graph ) {
219 throw( "Failed to create graph from dot" );
220 }
cb741417 221 # Correct for implicit graph -> digraph quirk of reader
222 if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) {
223 my $udgraph = $graph->undirected_copy;
224 foreach my $v ( $graph->vertices ) {
225 $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) );
226 }
227 $graph = $udgraph;
228 }
64a36834 229 $self->graph( $graph );
8d9a1cd8 230}
231
98f22390 232sub is_undirected {
233 my( $self ) = @_;
234 return undef unless $self->has_graph;
235 return $self->graph->is_undirected;
236}
237
027d819c 238=head1 METHODS
239
240=head2 as_dot( \%options )
241
242Returns a normal dot representation of the stemma layout, suitable for rendering
243with GraphViz. Options include:
244
245=over
246
247=item * graph - A hashref of global graph options.
248
249=item * node - A hashref of global node options.
250
251=item * edge - A hashref of global edge options.
252
253=back
254
255See the GraphViz documentation for the list of available options.
256
257=cut
258
8d9a1cd8 259sub as_dot {
e367f5c0 260 my( $self, $opts ) = @_;
7a7c249c 261
335a62ef 262 ## See if we are including any a.c. witnesses in this graph.
263 my $graph = $self->graph;
264 if( exists $opts->{'layerwits'} ) {
5c44c598 265 my $extant = {};
266 map { $extant->{$_} = 1 } $self->witnesses;
267 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 268 }
269
7a7c249c 270 # Get default and specified options
e02340f3 271 my %graphopts = (
272 # 'ratio' => 1,
f05e891c 273 'bgcolor' => 'transparent',
e02340f3 274 );
7a7c249c 275 my %nodeopts = (
276 'fontsize' => 11,
7a7c249c 277 'style' => 'filled',
278 'fillcolor' => 'white',
e02340f3 279 'color' => 'white',
7a7c249c 280 'shape' => 'ellipse', # Shape for the extant nodes
281 );
282 my %edgeopts = (
e02340f3 283 'arrowhead' => 'none',
7a7c249c 284 );
285 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
286 if $opts->{'graph'};
287 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
288 if $opts->{'node'};
289 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
290 if $opts->{'edge'};
335a62ef 291
ea45d2a6 292 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
f96630e3 293 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
294 : 'stemma';
7a7c249c 295 my @dotlines;
f96630e3 296 push( @dotlines, "$gdecl $gname {" );
7a7c249c 297 ## Print out the global attributes
298 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
299 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 300 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
301
302 # Add each of the nodes.
335a62ef 303 foreach my $n ( $graph->vertices ) {
c753ea40 304 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 305 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 306 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 307 }
c753ea40 308 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 309 }
7a7c249c 310 # Add each of our edges.
335a62ef 311 foreach my $e ( $graph->edges ) {
986bbd1b 312 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 313 my $connector = $graph->is_directed ? '->' : '--';
314 push( @dotlines, " $from $connector $to;" );
7a7c249c 315 }
316 push( @dotlines, '}' );
e79c23c7 317
7a7c249c 318 return join( "\n", @dotlines );
319}
320
0bded693 321=head2 alter_graph( $dotstring )
322
323Alters the graph of this stemma according to the definition specified
324in $dotstring.
325
326=cut
327
328sub alter_graph {
329 my( $self, $dotstring ) = @_;
330 my $dotfh;
331 open $dotfh, '<', \$dotstring;
f90b2bde 332 binmode $dotfh, ':utf8';
0bded693 333 $self->_graph_from_dot( $dotfh );
334}
335
335a62ef 336=head2 editable( $opts )
027d819c 337
5c44c598 338=head2 editable_graph( $graph, $opts )
339
88a6bac5 340Returns a version of the graph rendered in our definition format. The
335a62ef 341output separates statements with a newline; set $opts->{'linesep'} to the
342empty string or to a space if the result is to be sent via JSON.
343
5c44c598 344If a situational version of the stemma is required, the arguments for
345situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 346
347=cut
7a7c249c 348
7a7c249c 349sub editable {
5c44c598 350 my( $self, $opts ) = @_;
335a62ef 351 my $graph = $self->graph;
f96630e3 352 if( $self->has_identifier ) {
353 $opts->{'name'} = $self->identifier;
354 }
5c44c598 355 ## See if we need an editable version of a situational graph.
356 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
357 my $extant = delete $opts->{'extant'} || {};
358 my $layerwits = delete $opts->{'layerwits'} || [];
359 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 360 }
5c44c598 361 return editable_graph( $graph, $opts );
362}
363
364sub editable_graph {
365 my( $graph, $opts ) = @_;
335a62ef 366
367 # Create the graph
368 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 369 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
f96630e3 370 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
371 : 'stemma';
7a7c249c 372 my @dotlines;
f96630e3 373 push( @dotlines, "$gdecl $gname {" );
7a7c249c 374 my @real; # A cheap sort
5c44c598 375 foreach my $n ( sort $graph->vertices ) {
376 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 377 $c = 'extant' unless $c;
378 if( $c eq 'extant' ) {
379 push( @real, $n );
380 } else {
381 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
382 }
e367f5c0 383 }
7a7c249c 384 # Now do the real ones
385 foreach my $n ( @real ) {
386 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
387 }
5c44c598 388 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 389 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 390 my $conn = $graph->is_undirected ? '--' : '->';
391 push( @dotlines, " $from $conn $to;" );
7a7c249c 392 }
393 push( @dotlines, '}' );
88a6bac5 394 return join( $join, @dotlines );
7a7c249c 395}
396
397sub _make_dotline {
398 my( $obj, %attr ) = @_;
399 my @pairs;
400 foreach my $k ( keys %attr ) {
986bbd1b 401 my $v = _dotquote( $attr{$k} );
402 push( @pairs, "$k=$v" );
7a7c249c 403 }
986bbd1b 404 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 405}
406
986bbd1b 407sub _dotquote {
408 my( $str ) = @_;
409 return $str if $str =~ /^[A-Za-z0-9]+$/;
410 $str =~ s/\"/\\\"/g;
411 $str = '"' . $str . '"';
412 return $str;
413}
414
7a7c249c 415sub _by_vertex {
416 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
417}
8d9a1cd8 418
5c44c598 419=head2 situation_graph( $extant, $layered )
335a62ef 420
ea45d2a6 421Returns a graph which is the original stemma graph with all witnesses not
422in the %$extant hash marked as hypothetical, and witness layers added to
423the graph according to the list in @$layered. A layered (a.c.) witness is
424added as a parent of its main version, and additionally shares all other
425parents and children with that version.
335a62ef 426
427=cut
428
5c44c598 429sub situation_graph {
ace5fce5 430 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 431
432 my $graph = $self->graph->copy;
433 foreach my $vertex ( $graph->vertices ) {
434 # Set as extant any vertex that is extant in the stemma AND
435 # exists in the $extant hash.
436 my $class = 'hypothetical';
437 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
438 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
439 $graph->set_vertex_attribute( $vertex, 'class', $class );
440 }
441
335a62ef 442 # For each 'layered' witness in the layerwits array, add it to the graph
443 # as an ancestor of the 'main' witness, and otherwise with the same parent/
444 # child links as its main analogue.
445 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 446 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 447 foreach my $lw ( @$layerwits ) {
448 # Add the layered witness and set it with the same attributes as
449 # its 'main' analogue
5c44c598 450 throw( "Cannot add a layer to a hypothetical witness $lw" )
451 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 452 my $lwac = $lw . $layerlabel;
335a62ef 453 $graph->add_vertex( $lwac );
454 $graph->set_vertex_attributes( $lwac,
455 $graph->get_vertex_attributes( $lw ) );
456
457 # Set it as ancestor to the main witness
458 $graph->add_edge( $lwac, $lw );
459
460 # Give it the same ancestors and descendants as the main witness has,
461 # bearing in mind that those ancestors and descendants might also just
462 # have had a layered witness defined.
463 foreach my $v ( $graph->predecessors( $lw ) ) {
464 next if $v eq $lwac; # Don't add a loop
465 $graph->add_edge( $v, $lwac );
ace5fce5 466 $graph->add_edge( $v.$layerlabel, $lwac )
467 if $graph->has_vertex( $v.$layerlabel );
335a62ef 468 }
469 foreach my $v ( $graph->successors( $lw ) ) {
470 next if $v eq $lwac; # but this shouldn't occur
471 $graph->add_edge( $lwac, $v );
ace5fce5 472 $graph->add_edge( $lwac, $v.$layerlabel )
473 if $graph->has_vertex( $v.$layerlabel );
335a62ef 474 }
475 }
476 return $graph;
477}
478
027d819c 479=head2 as_svg
480
481Returns an SVG representation of the graph, calling as_dot first.
482
483=cut
484
8d9a1cd8 485sub as_svg {
486 my( $self, $opts ) = @_;
487 my $dot = $self->as_dot( $opts );
ea45d2a6 488 my @cmd = ( '-Tsvg' );
489 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 490 my $svg;
e79c23c7 491 my $dotfile = File::Temp->new();
e79c23c7 492 binmode $dotfile, ':utf8';
8d9a1cd8 493 print $dotfile $dot;
459c39b3 494 close $dotfile;
e79c23c7 495 push( @cmd, $dotfile->filename );
496 run( \@cmd, ">", binary(), \$svg );
428bcf0b 497 return decode_utf8( $svg );
e79c23c7 498}
499
027d819c 500=head2 witnesses
501
502Returns a list of the extant witnesses represented in the stemma.
503
504=cut
505
08e0fb85 506sub witnesses {
507 my $self = shift;
508 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
509 $self->graph->vertices;
510 return @wits;
511}
512
06e7cbc7 513=head2 hypotheticals
514
515Returns a list of the hypothetical witnesses represented in the stemma.
516
517=cut
518
bebec0e9 519sub hypotheticals {
520 my $self = shift;
521 my @wits = grep
522 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
523 $self->graph->vertices;
524 return @wits;
525}
526
37bf09f4 527=head2 root_graph( $root_vertex )
ea45d2a6 528
529If the stemma graph is undirected, make it directed with $root_vertex at the root.
530If it is directed, re-root it.
531
532=cut
533
534sub root_graph {
535 my( $self, $rootvertex ) = @_;
536 my $graph;
907f6671 537 my $ident = $self->identifier; # will have to restore this at the end
ea45d2a6 538 if( $self->is_undirected ) {
539 $graph = $self->graph;
540 } else {
541 # Make an undirected version of this graph.
542 $graph = $self->graph->undirected_copy();
543 }
1cf6dd32 544 # First, ensure that the requested root is actually a vertex in the graph.
545 unless( $graph->has_vertex( $rootvertex ) ) {
546 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
547 }
548
549 # Now make a directed version of the graph.
ea45d2a6 550 my $rooted = Graph->new();
551 $rooted->add_vertex( $rootvertex );
552 my @next = ( $rootvertex );
553 while( @next ) {
554 my @children;
555 foreach my $v ( @next ) {
556 # Place its not-placed neighbors (ergo children) in the tree
557 # and connect them
558 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
559 $graph->neighbors( $v ) ) {
560 $rooted->add_vertex( $n );
561 $rooted->add_edge( $v, $n );
562 push( @children, $n );
563 }
564 }
565 @next = @children;
566 }
567 # Set the vertex classes
568 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
1cf6dd32 569 $self->hypotheticals;
570 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
571 $self->witnesses;
572 $self->graph( $rooted );
907f6671 573 $self->set_identifier( $ident );
ea45d2a6 574}
575
576
63778331 577sub throw {
578 Text::Tradition::Error->throw(
579 'ident' => 'Stemma error',
580 'message' => $_[0],
581 );
582}
583
584
9463b0bf 585no Moose;
586__PACKAGE__->meta->make_immutable;
587
5881;
027d819c 589
590=head1 LICENSE
591
592This package is free software and is provided "as is" without express
593or implied warranty. You can redistribute it and/or modify it under
594the same terms as Perl itself.
595
596=head1 AUTHOR
597
598Tara L Andrews E<lt>aurum@cpan.orgE<gt>