Add as_tsv routine for tab-sep export of collation. Fixes #7
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
f96630e3 39 digraph "Name of Stemma" {
027d819c 40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
f96630e3 68 digraph "Test stemma 1" {
027d819c 69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
02b6340e 105my $baddotfh;
106open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
107try {
108 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110} catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
64a36834 112}
113
114# Create a good graph
115my $dotfh;
116open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
117binmode( $dotfh, ':utf8' );
ace5fce5 118my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 119is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
120is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
121is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 122ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
123is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 124my $found_unicode_sigil;
125foreach my $h ( $stemma->hypotheticals ) {
126 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
127}
128ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
129
ea45d2a6 130# TODO Create stemma from graph, create stemma from undirected graph,
131# create stemma from incompletely-specified graph
132
64a36834 133=end testing
134
027d819c 135=cut
136
9463b0bf 137has collation => (
138 is => 'ro',
139 isa => 'Text::Tradition::Collation',
ea45d2a6 140 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 141 weak_ref => 1,
9463b0bf 142 );
143
e05997e2 144has graph => (
145 is => 'rw',
146 isa => 'Graph',
147 predicate => 'has_graph',
148 );
ea45d2a6 149
98f22390 150has identifier => (
ea45d2a6 151 is => 'ro',
98f22390 152 isa => 'Str',
153 writer => 'set_identifier',
154 predicate => 'has_identifier',
ea45d2a6 155 );
98f22390 156
e05997e2 157sub BUILD {
158 my( $self, $args ) = @_;
159 # If we have been handed a dotfile, initialize it into a graph.
160 if( exists $args->{'dot'} ) {
027d819c 161 $self->_graph_from_dot( $args->{'dot'} );
98f22390 162 }
c0ccdb62 163}
164
ea45d2a6 165before 'graph' => sub {
166 my $self = shift;
167 if( @_ ) {
168 # Make sure all unclassed graph nodes are marked extant.
169 my $g = $_[0];
170 throw( "Cannot set graph to a non-Graph object" )
171 unless ref( $g ) eq 'Graph';
172 foreach my $v ( $g->vertices ) {
173 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
174 $g->set_vertex_attribute( $v, 'class', 'extant' );
175 }
176 }
98f22390 177 }
178};
179
180after 'graph' => sub {
181 my $self = shift;
182 return unless @_;
183 unless( $self->has_identifier ) {
184 ## HORRIBLE HACK but there is no API access to graph attributes!
185 if( exists $_[0]->[4]->{'name'} ) {
186 $self->set_identifier( $_[0]->[4]->{'name'} );
187 }
ea45d2a6 188 }
189};
190
027d819c 191sub _graph_from_dot {
8d9a1cd8 192 my( $self, $dotfh ) = @_;
8d9a1cd8 193 my $reader = Graph::Reader::Dot->new();
64a36834 194 # Redirect STDOUT in order to trap any error messages - syntax errors
195 # are evidently not fatal.
02b6340e 196 my $graph;
197 my $reader_out;
198 {
199 local(*STDOUT);
200 open( STDOUT, ">", \$reader_out );
201 $graph = $reader->read_graph( $dotfh );
202 close STDOUT;
203 }
64a36834 204 if( $reader_out && $reader_out =~ /error/s ) {
205 throw( "Error trying to parse dot: $reader_out" );
206 } elsif( !$graph ) {
207 throw( "Failed to create graph from dot" );
208 }
209 $self->graph( $graph );
8d9a1cd8 210}
211
98f22390 212sub is_undirected {
213 my( $self ) = @_;
214 return undef unless $self->has_graph;
215 return $self->graph->is_undirected;
216}
217
027d819c 218=head1 METHODS
219
220=head2 as_dot( \%options )
221
222Returns a normal dot representation of the stemma layout, suitable for rendering
223with GraphViz. Options include:
224
225=over
226
227=item * graph - A hashref of global graph options.
228
229=item * node - A hashref of global node options.
230
231=item * edge - A hashref of global edge options.
232
233=back
234
235See the GraphViz documentation for the list of available options.
236
237=cut
238
8d9a1cd8 239sub as_dot {
e367f5c0 240 my( $self, $opts ) = @_;
7a7c249c 241
335a62ef 242 ## See if we are including any a.c. witnesses in this graph.
243 my $graph = $self->graph;
244 if( exists $opts->{'layerwits'} ) {
5c44c598 245 my $extant = {};
246 map { $extant->{$_} = 1 } $self->witnesses;
247 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 248 }
249
7a7c249c 250 # Get default and specified options
e02340f3 251 my %graphopts = (
252 # 'ratio' => 1,
f05e891c 253 'bgcolor' => 'transparent',
e02340f3 254 );
7a7c249c 255 my %nodeopts = (
256 'fontsize' => 11,
7a7c249c 257 'style' => 'filled',
258 'fillcolor' => 'white',
e02340f3 259 'color' => 'white',
7a7c249c 260 'shape' => 'ellipse', # Shape for the extant nodes
261 );
262 my %edgeopts = (
e02340f3 263 'arrowhead' => 'none',
7a7c249c 264 );
265 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
266 if $opts->{'graph'};
267 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
268 if $opts->{'node'};
269 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
270 if $opts->{'edge'};
335a62ef 271
ea45d2a6 272 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
f96630e3 273 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
274 : 'stemma';
7a7c249c 275 my @dotlines;
f96630e3 276 push( @dotlines, "$gdecl $gname {" );
7a7c249c 277 ## Print out the global attributes
278 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
279 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 280 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
281
282 # Add each of the nodes.
335a62ef 283 foreach my $n ( $graph->vertices ) {
c753ea40 284 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 285 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 286 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 287 }
c753ea40 288 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 289 }
7a7c249c 290 # Add each of our edges.
335a62ef 291 foreach my $e ( $graph->edges ) {
986bbd1b 292 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 293 my $connector = $graph->is_directed ? '->' : '--';
294 push( @dotlines, " $from $connector $to;" );
7a7c249c 295 }
296 push( @dotlines, '}' );
e79c23c7 297
7a7c249c 298 return join( "\n", @dotlines );
299}
300
0bded693 301=head2 alter_graph( $dotstring )
302
303Alters the graph of this stemma according to the definition specified
304in $dotstring.
305
306=cut
307
308sub alter_graph {
309 my( $self, $dotstring ) = @_;
310 my $dotfh;
311 open $dotfh, '<', \$dotstring;
f90b2bde 312 binmode $dotfh, ':utf8';
0bded693 313 $self->_graph_from_dot( $dotfh );
314}
315
335a62ef 316=head2 editable( $opts )
027d819c 317
5c44c598 318=head2 editable_graph( $graph, $opts )
319
88a6bac5 320Returns a version of the graph rendered in our definition format. The
335a62ef 321output separates statements with a newline; set $opts->{'linesep'} to the
322empty string or to a space if the result is to be sent via JSON.
323
5c44c598 324If a situational version of the stemma is required, the arguments for
325situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 326
327=cut
7a7c249c 328
7a7c249c 329sub editable {
5c44c598 330 my( $self, $opts ) = @_;
335a62ef 331 my $graph = $self->graph;
f96630e3 332 if( $self->has_identifier ) {
333 $opts->{'name'} = $self->identifier;
334 }
5c44c598 335 ## See if we need an editable version of a situational graph.
336 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
337 my $extant = delete $opts->{'extant'} || {};
338 my $layerwits = delete $opts->{'layerwits'} || [];
339 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 340 }
5c44c598 341 return editable_graph( $graph, $opts );
342}
343
344sub editable_graph {
345 my( $graph, $opts ) = @_;
335a62ef 346
347 # Create the graph
348 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 349 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
f96630e3 350 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
351 : 'stemma';
7a7c249c 352 my @dotlines;
f96630e3 353 push( @dotlines, "$gdecl $gname {" );
7a7c249c 354 my @real; # A cheap sort
5c44c598 355 foreach my $n ( sort $graph->vertices ) {
356 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 357 $c = 'extant' unless $c;
358 if( $c eq 'extant' ) {
359 push( @real, $n );
360 } else {
361 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
362 }
e367f5c0 363 }
7a7c249c 364 # Now do the real ones
365 foreach my $n ( @real ) {
366 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
367 }
5c44c598 368 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 369 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 370 my $conn = $graph->is_undirected ? '--' : '->';
371 push( @dotlines, " $from $conn $to;" );
7a7c249c 372 }
373 push( @dotlines, '}' );
88a6bac5 374 return join( $join, @dotlines );
7a7c249c 375}
376
377sub _make_dotline {
378 my( $obj, %attr ) = @_;
379 my @pairs;
380 foreach my $k ( keys %attr ) {
986bbd1b 381 my $v = _dotquote( $attr{$k} );
382 push( @pairs, "$k=$v" );
7a7c249c 383 }
986bbd1b 384 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 385}
386
986bbd1b 387sub _dotquote {
388 my( $str ) = @_;
389 return $str if $str =~ /^[A-Za-z0-9]+$/;
390 $str =~ s/\"/\\\"/g;
391 $str = '"' . $str . '"';
392 return $str;
393}
394
7a7c249c 395sub _by_vertex {
396 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
397}
8d9a1cd8 398
5c44c598 399=head2 situation_graph( $extant, $layered )
335a62ef 400
ea45d2a6 401Returns a graph which is the original stemma graph with all witnesses not
402in the %$extant hash marked as hypothetical, and witness layers added to
403the graph according to the list in @$layered. A layered (a.c.) witness is
404added as a parent of its main version, and additionally shares all other
405parents and children with that version.
335a62ef 406
407=cut
408
5c44c598 409sub situation_graph {
ace5fce5 410 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 411
412 my $graph = $self->graph->copy;
413 foreach my $vertex ( $graph->vertices ) {
414 # Set as extant any vertex that is extant in the stemma AND
415 # exists in the $extant hash.
416 my $class = 'hypothetical';
417 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
418 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
419 $graph->set_vertex_attribute( $vertex, 'class', $class );
420 }
421
335a62ef 422 # For each 'layered' witness in the layerwits array, add it to the graph
423 # as an ancestor of the 'main' witness, and otherwise with the same parent/
424 # child links as its main analogue.
425 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 426 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 427 foreach my $lw ( @$layerwits ) {
428 # Add the layered witness and set it with the same attributes as
429 # its 'main' analogue
5c44c598 430 throw( "Cannot add a layer to a hypothetical witness $lw" )
431 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 432 my $lwac = $lw . $layerlabel;
335a62ef 433 $graph->add_vertex( $lwac );
434 $graph->set_vertex_attributes( $lwac,
435 $graph->get_vertex_attributes( $lw ) );
436
437 # Set it as ancestor to the main witness
438 $graph->add_edge( $lwac, $lw );
439
440 # Give it the same ancestors and descendants as the main witness has,
441 # bearing in mind that those ancestors and descendants might also just
442 # have had a layered witness defined.
443 foreach my $v ( $graph->predecessors( $lw ) ) {
444 next if $v eq $lwac; # Don't add a loop
445 $graph->add_edge( $v, $lwac );
ace5fce5 446 $graph->add_edge( $v.$layerlabel, $lwac )
447 if $graph->has_vertex( $v.$layerlabel );
335a62ef 448 }
449 foreach my $v ( $graph->successors( $lw ) ) {
450 next if $v eq $lwac; # but this shouldn't occur
451 $graph->add_edge( $lwac, $v );
ace5fce5 452 $graph->add_edge( $lwac, $v.$layerlabel )
453 if $graph->has_vertex( $v.$layerlabel );
335a62ef 454 }
455 }
456 return $graph;
457}
458
027d819c 459=head2 as_svg
460
461Returns an SVG representation of the graph, calling as_dot first.
462
463=cut
464
8d9a1cd8 465sub as_svg {
466 my( $self, $opts ) = @_;
467 my $dot = $self->as_dot( $opts );
ea45d2a6 468 my @cmd = ( '-Tsvg' );
469 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 470 my $svg;
e79c23c7 471 my $dotfile = File::Temp->new();
e79c23c7 472 binmode $dotfile, ':utf8';
8d9a1cd8 473 print $dotfile $dot;
459c39b3 474 close $dotfile;
e79c23c7 475 push( @cmd, $dotfile->filename );
476 run( \@cmd, ">", binary(), \$svg );
428bcf0b 477 return decode_utf8( $svg );
e79c23c7 478}
479
027d819c 480=head2 witnesses
481
482Returns a list of the extant witnesses represented in the stemma.
483
484=cut
485
08e0fb85 486sub witnesses {
487 my $self = shift;
488 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
489 $self->graph->vertices;
490 return @wits;
491}
492
06e7cbc7 493=head2 hypotheticals
494
495Returns a list of the hypothetical witnesses represented in the stemma.
496
497=cut
498
bebec0e9 499sub hypotheticals {
500 my $self = shift;
501 my @wits = grep
502 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
503 $self->graph->vertices;
504 return @wits;
505}
506
37bf09f4 507=head2 root_graph( $root_vertex )
ea45d2a6 508
509If the stemma graph is undirected, make it directed with $root_vertex at the root.
510If it is directed, re-root it.
511
512=cut
513
514sub root_graph {
515 my( $self, $rootvertex ) = @_;
516 my $graph;
517 if( $self->is_undirected ) {
518 $graph = $self->graph;
519 } else {
520 # Make an undirected version of this graph.
521 $graph = $self->graph->undirected_copy();
522 }
523 my $rooted = Graph->new();
524 $rooted->add_vertex( $rootvertex );
525 my @next = ( $rootvertex );
526 while( @next ) {
527 my @children;
528 foreach my $v ( @next ) {
529 # Place its not-placed neighbors (ergo children) in the tree
530 # and connect them
531 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
532 $graph->neighbors( $v ) ) {
533 $rooted->add_vertex( $n );
534 $rooted->add_edge( $v, $n );
535 push( @children, $n );
536 }
537 }
538 @next = @children;
539 }
540 # Set the vertex classes
541 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
542 $self->graph->hypotheticals;
543 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
544 $self->graph->witnesses;
545 return $rooted;
546}
547
548
63778331 549sub throw {
550 Text::Tradition::Error->throw(
551 'ident' => 'Stemma error',
552 'message' => $_[0],
553 );
554}
555
556
9463b0bf 557no Moose;
558__PACKAGE__->meta->make_immutable;
559
5601;
027d819c 561
562=head1 LICENSE
563
564This package is free software and is provided "as is" without express
565or implied warranty. You can redistribute it and/or modify it under
566the same terms as Perl itself.
567
568=head1 AUTHOR
569
570Tara L Andrews E<lt>aurum@cpan.orgE<gt>