distribution housekeeping
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
39 digraph Stemma {
40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
68 digraph Stemma {
69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
ea45d2a6 105TODO: {
106 local $TODO = "cannot use stdout redirection trick with FastCGI";
836e0546 107 my $baddotfh;
ea45d2a6 108 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 try {
110 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
111 ok( 0, "Created broken stemma from dotfile with syntax error" );
112 } catch( Text::Tradition::Error $e ) {
113 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 }
64a36834 115}
116
117# Create a good graph
118my $dotfh;
119open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
120binmode( $dotfh, ':utf8' );
ace5fce5 121my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 122is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
123is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
124is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
125my $found_unicode_sigil;
126foreach my $h ( $stemma->hypotheticals ) {
127 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
128}
129ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
130
ea45d2a6 131# TODO Create stemma from graph, create stemma from undirected graph,
132# create stemma from incompletely-specified graph
133
64a36834 134=end testing
135
027d819c 136=cut
137
9463b0bf 138has collation => (
139 is => 'ro',
140 isa => 'Text::Tradition::Collation',
ea45d2a6 141 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 142 weak_ref => 1,
9463b0bf 143 );
144
e05997e2 145has graph => (
146 is => 'rw',
147 isa => 'Graph',
148 predicate => 'has_graph',
149 );
ea45d2a6 150
151has is_undirected => (
152 is => 'ro',
153 isa => 'Bool',
154 default => undef,
155 writer => 'set_undirected',
156 );
c57be097 157
e05997e2 158sub BUILD {
159 my( $self, $args ) = @_;
160 # If we have been handed a dotfile, initialize it into a graph.
161 if( exists $args->{'dot'} ) {
027d819c 162 $self->_graph_from_dot( $args->{'dot'} );
ea45d2a6 163 } else {
164 }
c0ccdb62 165}
166
ea45d2a6 167before 'graph' => sub {
168 my $self = shift;
169 if( @_ ) {
170 # Make sure all unclassed graph nodes are marked extant.
171 my $g = $_[0];
172 throw( "Cannot set graph to a non-Graph object" )
173 unless ref( $g ) eq 'Graph';
174 foreach my $v ( $g->vertices ) {
175 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
176 $g->set_vertex_attribute( $v, 'class', 'extant' );
177 }
178 }
179 $self->set_undirected( $g->is_undirected );
180 }
181};
182
027d819c 183sub _graph_from_dot {
8d9a1cd8 184 my( $self, $dotfh ) = @_;
8d9a1cd8 185 my $reader = Graph::Reader::Dot->new();
64a36834 186 # Redirect STDOUT in order to trap any error messages - syntax errors
187 # are evidently not fatal.
d34fdf7b 188 # TODO This breaks under FastCGI/Apache; reconsider.
64a36834 189 my $reader_out;
d34fdf7b 190 #my $saved_stderr;
191 #open $saved_stderr, ">&STDOUT";
192 #close STDOUT;
193 #open STDOUT, ">", \$reader_out;
8d9a1cd8 194 my $graph = $reader->read_graph( $dotfh );
d34fdf7b 195 #close STDOUT;
196 #open STDOUT, ">", \$saved_stderr;
64a36834 197 if( $reader_out && $reader_out =~ /error/s ) {
198 throw( "Error trying to parse dot: $reader_out" );
199 } elsif( !$graph ) {
200 throw( "Failed to create graph from dot" );
201 }
202 $self->graph( $graph );
8d9a1cd8 203}
204
027d819c 205=head1 METHODS
206
207=head2 as_dot( \%options )
208
209Returns a normal dot representation of the stemma layout, suitable for rendering
210with GraphViz. Options include:
211
212=over
213
214=item * graph - A hashref of global graph options.
215
216=item * node - A hashref of global node options.
217
218=item * edge - A hashref of global edge options.
219
220=back
221
222See the GraphViz documentation for the list of available options.
223
224=cut
225
8d9a1cd8 226sub as_dot {
e367f5c0 227 my( $self, $opts ) = @_;
7a7c249c 228
335a62ef 229 ## See if we are including any a.c. witnesses in this graph.
230 my $graph = $self->graph;
231 if( exists $opts->{'layerwits'} ) {
5c44c598 232 my $extant = {};
233 map { $extant->{$_} = 1 } $self->witnesses;
234 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 235 }
236
7a7c249c 237 # Get default and specified options
e02340f3 238 my %graphopts = (
239 # 'ratio' => 1,
f05e891c 240 'bgcolor' => 'transparent',
e02340f3 241 );
7a7c249c 242 my %nodeopts = (
243 'fontsize' => 11,
7a7c249c 244 'style' => 'filled',
245 'fillcolor' => 'white',
e02340f3 246 'color' => 'white',
7a7c249c 247 'shape' => 'ellipse', # Shape for the extant nodes
248 );
249 my %edgeopts = (
e02340f3 250 'arrowhead' => 'none',
7a7c249c 251 );
252 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
253 if $opts->{'graph'};
254 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
255 if $opts->{'node'};
256 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
257 if $opts->{'edge'};
335a62ef 258
ea45d2a6 259 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
7a7c249c 260 my @dotlines;
ea45d2a6 261 push( @dotlines, "$gdecl stemma {" );
7a7c249c 262 ## Print out the global attributes
263 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
264 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 265 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
266
267 # Add each of the nodes.
335a62ef 268 foreach my $n ( $graph->vertices ) {
269 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
270 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 271 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 272 } else {
7a7c249c 273 # Use the default display settings.
986bbd1b 274 $n = _dotquote( $n );
7a7c249c 275 push( @dotlines, " $n;" );
e79c23c7 276 }
277 }
7a7c249c 278 # Add each of our edges.
335a62ef 279 foreach my $e ( $graph->edges ) {
986bbd1b 280 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 281 my $connector = $graph->is_directed ? '->' : '--';
282 push( @dotlines, " $from $connector $to;" );
7a7c249c 283 }
284 push( @dotlines, '}' );
e79c23c7 285
7a7c249c 286 return join( "\n", @dotlines );
287}
288
0bded693 289=head2 alter_graph( $dotstring )
290
291Alters the graph of this stemma according to the definition specified
292in $dotstring.
293
294=cut
295
296sub alter_graph {
297 my( $self, $dotstring ) = @_;
298 my $dotfh;
299 open $dotfh, '<', \$dotstring;
f90b2bde 300 binmode $dotfh, ':utf8';
0bded693 301 $self->_graph_from_dot( $dotfh );
302}
303
335a62ef 304=head2 editable( $opts )
027d819c 305
5c44c598 306=head2 editable_graph( $graph, $opts )
307
88a6bac5 308Returns a version of the graph rendered in our definition format. The
335a62ef 309output separates statements with a newline; set $opts->{'linesep'} to the
310empty string or to a space if the result is to be sent via JSON.
311
5c44c598 312If a situational version of the stemma is required, the arguments for
313situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 314
315=cut
7a7c249c 316
7a7c249c 317sub editable {
5c44c598 318 my( $self, $opts ) = @_;
335a62ef 319 my $graph = $self->graph;
5c44c598 320 ## See if we need an editable version of a situational graph.
321 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
322 my $extant = delete $opts->{'extant'} || {};
323 my $layerwits = delete $opts->{'layerwits'} || [];
324 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 325 }
5c44c598 326 return editable_graph( $graph, $opts );
327}
328
329sub editable_graph {
330 my( $graph, $opts ) = @_;
335a62ef 331
332 # Create the graph
333 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 334 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
7a7c249c 335 my @dotlines;
ea45d2a6 336 push( @dotlines, "$gdecl stemma {" );
7a7c249c 337 my @real; # A cheap sort
5c44c598 338 foreach my $n ( sort $graph->vertices ) {
339 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 340 $c = 'extant' unless $c;
341 if( $c eq 'extant' ) {
342 push( @real, $n );
343 } else {
344 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
345 }
e367f5c0 346 }
7a7c249c 347 # Now do the real ones
348 foreach my $n ( @real ) {
349 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
350 }
5c44c598 351 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 352 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 353 my $conn = $graph->is_undirected ? '--' : '->';
354 push( @dotlines, " $from $conn $to;" );
7a7c249c 355 }
356 push( @dotlines, '}' );
88a6bac5 357 return join( $join, @dotlines );
7a7c249c 358}
359
360sub _make_dotline {
361 my( $obj, %attr ) = @_;
362 my @pairs;
363 foreach my $k ( keys %attr ) {
986bbd1b 364 my $v = _dotquote( $attr{$k} );
365 push( @pairs, "$k=$v" );
7a7c249c 366 }
986bbd1b 367 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 368}
369
986bbd1b 370sub _dotquote {
371 my( $str ) = @_;
372 return $str if $str =~ /^[A-Za-z0-9]+$/;
373 $str =~ s/\"/\\\"/g;
374 $str = '"' . $str . '"';
375 return $str;
376}
377
7a7c249c 378sub _by_vertex {
379 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
380}
8d9a1cd8 381
5c44c598 382=head2 situation_graph( $extant, $layered )
335a62ef 383
ea45d2a6 384Returns a graph which is the original stemma graph with all witnesses not
385in the %$extant hash marked as hypothetical, and witness layers added to
386the graph according to the list in @$layered. A layered (a.c.) witness is
387added as a parent of its main version, and additionally shares all other
388parents and children with that version.
335a62ef 389
390=cut
391
5c44c598 392sub situation_graph {
ace5fce5 393 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 394
395 my $graph = $self->graph->copy;
396 foreach my $vertex ( $graph->vertices ) {
397 # Set as extant any vertex that is extant in the stemma AND
398 # exists in the $extant hash.
399 my $class = 'hypothetical';
400 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
401 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
402 $graph->set_vertex_attribute( $vertex, 'class', $class );
403 }
404
335a62ef 405 # For each 'layered' witness in the layerwits array, add it to the graph
406 # as an ancestor of the 'main' witness, and otherwise with the same parent/
407 # child links as its main analogue.
408 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 409 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 410 foreach my $lw ( @$layerwits ) {
411 # Add the layered witness and set it with the same attributes as
412 # its 'main' analogue
5c44c598 413 throw( "Cannot add a layer to a hypothetical witness $lw" )
414 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 415 my $lwac = $lw . $layerlabel;
335a62ef 416 $graph->add_vertex( $lwac );
417 $graph->set_vertex_attributes( $lwac,
418 $graph->get_vertex_attributes( $lw ) );
419
420 # Set it as ancestor to the main witness
421 $graph->add_edge( $lwac, $lw );
422
423 # Give it the same ancestors and descendants as the main witness has,
424 # bearing in mind that those ancestors and descendants might also just
425 # have had a layered witness defined.
426 foreach my $v ( $graph->predecessors( $lw ) ) {
427 next if $v eq $lwac; # Don't add a loop
428 $graph->add_edge( $v, $lwac );
ace5fce5 429 $graph->add_edge( $v.$layerlabel, $lwac )
430 if $graph->has_vertex( $v.$layerlabel );
335a62ef 431 }
432 foreach my $v ( $graph->successors( $lw ) ) {
433 next if $v eq $lwac; # but this shouldn't occur
434 $graph->add_edge( $lwac, $v );
ace5fce5 435 $graph->add_edge( $lwac, $v.$layerlabel )
436 if $graph->has_vertex( $v.$layerlabel );
335a62ef 437 }
438 }
439 return $graph;
440}
441
027d819c 442=head2 as_svg
443
444Returns an SVG representation of the graph, calling as_dot first.
445
446=cut
447
8d9a1cd8 448sub as_svg {
449 my( $self, $opts ) = @_;
450 my $dot = $self->as_dot( $opts );
ea45d2a6 451 my @cmd = ( '-Tsvg' );
452 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 453 my $svg;
e79c23c7 454 my $dotfile = File::Temp->new();
e79c23c7 455 binmode $dotfile, ':utf8';
8d9a1cd8 456 print $dotfile $dot;
459c39b3 457 close $dotfile;
e79c23c7 458 push( @cmd, $dotfile->filename );
459 run( \@cmd, ">", binary(), \$svg );
428bcf0b 460 return decode_utf8( $svg );
e79c23c7 461}
462
027d819c 463=head2 witnesses
464
465Returns a list of the extant witnesses represented in the stemma.
466
467=cut
468
08e0fb85 469sub witnesses {
470 my $self = shift;
471 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
472 $self->graph->vertices;
473 return @wits;
474}
475
06e7cbc7 476=head2 hypotheticals
477
478Returns a list of the hypothetical witnesses represented in the stemma.
479
480=cut
481
bebec0e9 482sub hypotheticals {
483 my $self = shift;
484 my @wits = grep
485 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
486 $self->graph->vertices;
487 return @wits;
488}
489
37bf09f4 490=head2 root_graph( $root_vertex )
ea45d2a6 491
492If the stemma graph is undirected, make it directed with $root_vertex at the root.
493If it is directed, re-root it.
494
495=cut
496
497sub root_graph {
498 my( $self, $rootvertex ) = @_;
499 my $graph;
500 if( $self->is_undirected ) {
501 $graph = $self->graph;
502 } else {
503 # Make an undirected version of this graph.
504 $graph = $self->graph->undirected_copy();
505 }
506 my $rooted = Graph->new();
507 $rooted->add_vertex( $rootvertex );
508 my @next = ( $rootvertex );
509 while( @next ) {
510 my @children;
511 foreach my $v ( @next ) {
512 # Place its not-placed neighbors (ergo children) in the tree
513 # and connect them
514 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
515 $graph->neighbors( $v ) ) {
516 $rooted->add_vertex( $n );
517 $rooted->add_edge( $v, $n );
518 push( @children, $n );
519 }
520 }
521 @next = @children;
522 }
523 # Set the vertex classes
524 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
525 $self->graph->hypotheticals;
526 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
527 $self->graph->witnesses;
528 return $rooted;
529}
530
531
63778331 532sub throw {
533 Text::Tradition::Error->throw(
534 'ident' => 'Stemma error',
535 'message' => $_[0],
536 );
537}
538
539
9463b0bf 540no Moose;
541__PACKAGE__->meta->make_immutable;
542
5431;
027d819c 544
545=head1 LICENSE
546
547This package is free software and is provided "as is" without express
548or implied warranty. You can redistribute it and/or modify it under
549the same terms as Perl itself.
550
551=head1 AUTHOR
552
553Tara L Andrews E<lt>aurum@cpan.orgE<gt>