pull out direct DBI connection horribleness into separate method
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
39 digraph Stemma {
40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
68 digraph Stemma {
69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
ea45d2a6 105TODO: {
106 local $TODO = "cannot use stdout redirection trick with FastCGI";
836e0546 107 my $baddotfh;
ea45d2a6 108 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 try {
110 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
111 ok( 0, "Created broken stemma from dotfile with syntax error" );
112 } catch( Text::Tradition::Error $e ) {
113 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 }
64a36834 115}
116
117# Create a good graph
118my $dotfh;
119open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
120binmode( $dotfh, ':utf8' );
ace5fce5 121my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 122is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
123is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
124is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
125my $found_unicode_sigil;
126foreach my $h ( $stemma->hypotheticals ) {
127 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
128}
129ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
130
ea45d2a6 131# TODO Create stemma from graph, create stemma from undirected graph,
132# create stemma from incompletely-specified graph
133
64a36834 134=end testing
135
027d819c 136=cut
137
9463b0bf 138has collation => (
139 is => 'ro',
140 isa => 'Text::Tradition::Collation',
ea45d2a6 141 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 142 weak_ref => 1,
9463b0bf 143 );
144
e05997e2 145has graph => (
146 is => 'rw',
147 isa => 'Graph',
148 predicate => 'has_graph',
149 );
ea45d2a6 150
151has is_undirected => (
152 is => 'ro',
153 isa => 'Bool',
154 default => undef,
155 writer => 'set_undirected',
156 );
c57be097 157
e05997e2 158sub BUILD {
159 my( $self, $args ) = @_;
160 # If we have been handed a dotfile, initialize it into a graph.
161 if( exists $args->{'dot'} ) {
027d819c 162 $self->_graph_from_dot( $args->{'dot'} );
ea45d2a6 163 } else {
164 }
c0ccdb62 165}
166
ea45d2a6 167before 'graph' => sub {
168 my $self = shift;
169 if( @_ ) {
170 # Make sure all unclassed graph nodes are marked extant.
171 my $g = $_[0];
172 throw( "Cannot set graph to a non-Graph object" )
173 unless ref( $g ) eq 'Graph';
174 foreach my $v ( $g->vertices ) {
175 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
176 $g->set_vertex_attribute( $v, 'class', 'extant' );
177 }
178 }
179 $self->set_undirected( $g->is_undirected );
180 }
181};
182
027d819c 183sub _graph_from_dot {
8d9a1cd8 184 my( $self, $dotfh ) = @_;
8d9a1cd8 185 my $reader = Graph::Reader::Dot->new();
64a36834 186 # Redirect STDOUT in order to trap any error messages - syntax errors
187 # are evidently not fatal.
d34fdf7b 188 # TODO This breaks under FastCGI/Apache; reconsider.
64a36834 189 my $reader_out;
d34fdf7b 190 #my $saved_stderr;
191 #open $saved_stderr, ">&STDOUT";
192 #close STDOUT;
193 #open STDOUT, ">", \$reader_out;
8d9a1cd8 194 my $graph = $reader->read_graph( $dotfh );
d34fdf7b 195 #close STDOUT;
196 #open STDOUT, ">", \$saved_stderr;
64a36834 197 if( $reader_out && $reader_out =~ /error/s ) {
198 throw( "Error trying to parse dot: $reader_out" );
199 } elsif( !$graph ) {
200 throw( "Failed to create graph from dot" );
201 }
202 $self->graph( $graph );
8d9a1cd8 203}
204
027d819c 205=head1 METHODS
206
207=head2 as_dot( \%options )
208
209Returns a normal dot representation of the stemma layout, suitable for rendering
210with GraphViz. Options include:
211
212=over
213
214=item * graph - A hashref of global graph options.
215
216=item * node - A hashref of global node options.
217
218=item * edge - A hashref of global edge options.
219
220=back
221
222See the GraphViz documentation for the list of available options.
223
224=cut
225
8d9a1cd8 226sub as_dot {
e367f5c0 227 my( $self, $opts ) = @_;
7a7c249c 228
335a62ef 229 ## See if we are including any a.c. witnesses in this graph.
230 my $graph = $self->graph;
231 if( exists $opts->{'layerwits'} ) {
5c44c598 232 my $extant = {};
233 map { $extant->{$_} = 1 } $self->witnesses;
234 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 235 }
236
7a7c249c 237 # Get default and specified options
e02340f3 238 my %graphopts = (
239 # 'ratio' => 1,
f05e891c 240 'bgcolor' => 'transparent',
e02340f3 241 );
7a7c249c 242 my %nodeopts = (
243 'fontsize' => 11,
7a7c249c 244 'style' => 'filled',
245 'fillcolor' => 'white',
e02340f3 246 'color' => 'white',
7a7c249c 247 'shape' => 'ellipse', # Shape for the extant nodes
248 );
249 my %edgeopts = (
e02340f3 250 'arrowhead' => 'none',
7a7c249c 251 );
252 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
253 if $opts->{'graph'};
254 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
255 if $opts->{'node'};
256 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
257 if $opts->{'edge'};
335a62ef 258
ea45d2a6 259 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
7a7c249c 260 my @dotlines;
ea45d2a6 261 push( @dotlines, "$gdecl stemma {" );
7a7c249c 262 ## Print out the global attributes
263 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
264 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 265 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
266
267 # Add each of the nodes.
335a62ef 268 foreach my $n ( $graph->vertices ) {
c753ea40 269 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 270 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 271 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 272 }
c753ea40 273 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 274 }
7a7c249c 275 # Add each of our edges.
335a62ef 276 foreach my $e ( $graph->edges ) {
986bbd1b 277 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 278 my $connector = $graph->is_directed ? '->' : '--';
279 push( @dotlines, " $from $connector $to;" );
7a7c249c 280 }
281 push( @dotlines, '}' );
e79c23c7 282
7a7c249c 283 return join( "\n", @dotlines );
284}
285
0bded693 286=head2 alter_graph( $dotstring )
287
288Alters the graph of this stemma according to the definition specified
289in $dotstring.
290
291=cut
292
293sub alter_graph {
294 my( $self, $dotstring ) = @_;
295 my $dotfh;
296 open $dotfh, '<', \$dotstring;
f90b2bde 297 binmode $dotfh, ':utf8';
0bded693 298 $self->_graph_from_dot( $dotfh );
299}
300
335a62ef 301=head2 editable( $opts )
027d819c 302
5c44c598 303=head2 editable_graph( $graph, $opts )
304
88a6bac5 305Returns a version of the graph rendered in our definition format. The
335a62ef 306output separates statements with a newline; set $opts->{'linesep'} to the
307empty string or to a space if the result is to be sent via JSON.
308
5c44c598 309If a situational version of the stemma is required, the arguments for
310situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 311
312=cut
7a7c249c 313
7a7c249c 314sub editable {
5c44c598 315 my( $self, $opts ) = @_;
335a62ef 316 my $graph = $self->graph;
5c44c598 317 ## See if we need an editable version of a situational graph.
318 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
319 my $extant = delete $opts->{'extant'} || {};
320 my $layerwits = delete $opts->{'layerwits'} || [];
321 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 322 }
5c44c598 323 return editable_graph( $graph, $opts );
324}
325
326sub editable_graph {
327 my( $graph, $opts ) = @_;
335a62ef 328
329 # Create the graph
330 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 331 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
7a7c249c 332 my @dotlines;
ea45d2a6 333 push( @dotlines, "$gdecl stemma {" );
7a7c249c 334 my @real; # A cheap sort
5c44c598 335 foreach my $n ( sort $graph->vertices ) {
336 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 337 $c = 'extant' unless $c;
338 if( $c eq 'extant' ) {
339 push( @real, $n );
340 } else {
341 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
342 }
e367f5c0 343 }
7a7c249c 344 # Now do the real ones
345 foreach my $n ( @real ) {
346 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
347 }
5c44c598 348 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 349 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 350 my $conn = $graph->is_undirected ? '--' : '->';
351 push( @dotlines, " $from $conn $to;" );
7a7c249c 352 }
353 push( @dotlines, '}' );
88a6bac5 354 return join( $join, @dotlines );
7a7c249c 355}
356
357sub _make_dotline {
358 my( $obj, %attr ) = @_;
359 my @pairs;
360 foreach my $k ( keys %attr ) {
986bbd1b 361 my $v = _dotquote( $attr{$k} );
362 push( @pairs, "$k=$v" );
7a7c249c 363 }
986bbd1b 364 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 365}
366
986bbd1b 367sub _dotquote {
368 my( $str ) = @_;
369 return $str if $str =~ /^[A-Za-z0-9]+$/;
370 $str =~ s/\"/\\\"/g;
371 $str = '"' . $str . '"';
372 return $str;
373}
374
7a7c249c 375sub _by_vertex {
376 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
377}
8d9a1cd8 378
5c44c598 379=head2 situation_graph( $extant, $layered )
335a62ef 380
ea45d2a6 381Returns a graph which is the original stemma graph with all witnesses not
382in the %$extant hash marked as hypothetical, and witness layers added to
383the graph according to the list in @$layered. A layered (a.c.) witness is
384added as a parent of its main version, and additionally shares all other
385parents and children with that version.
335a62ef 386
387=cut
388
5c44c598 389sub situation_graph {
ace5fce5 390 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 391
392 my $graph = $self->graph->copy;
393 foreach my $vertex ( $graph->vertices ) {
394 # Set as extant any vertex that is extant in the stemma AND
395 # exists in the $extant hash.
396 my $class = 'hypothetical';
397 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
398 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
399 $graph->set_vertex_attribute( $vertex, 'class', $class );
400 }
401
335a62ef 402 # For each 'layered' witness in the layerwits array, add it to the graph
403 # as an ancestor of the 'main' witness, and otherwise with the same parent/
404 # child links as its main analogue.
405 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 406 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 407 foreach my $lw ( @$layerwits ) {
408 # Add the layered witness and set it with the same attributes as
409 # its 'main' analogue
5c44c598 410 throw( "Cannot add a layer to a hypothetical witness $lw" )
411 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 412 my $lwac = $lw . $layerlabel;
335a62ef 413 $graph->add_vertex( $lwac );
414 $graph->set_vertex_attributes( $lwac,
415 $graph->get_vertex_attributes( $lw ) );
416
417 # Set it as ancestor to the main witness
418 $graph->add_edge( $lwac, $lw );
419
420 # Give it the same ancestors and descendants as the main witness has,
421 # bearing in mind that those ancestors and descendants might also just
422 # have had a layered witness defined.
423 foreach my $v ( $graph->predecessors( $lw ) ) {
424 next if $v eq $lwac; # Don't add a loop
425 $graph->add_edge( $v, $lwac );
ace5fce5 426 $graph->add_edge( $v.$layerlabel, $lwac )
427 if $graph->has_vertex( $v.$layerlabel );
335a62ef 428 }
429 foreach my $v ( $graph->successors( $lw ) ) {
430 next if $v eq $lwac; # but this shouldn't occur
431 $graph->add_edge( $lwac, $v );
ace5fce5 432 $graph->add_edge( $lwac, $v.$layerlabel )
433 if $graph->has_vertex( $v.$layerlabel );
335a62ef 434 }
435 }
436 return $graph;
437}
438
027d819c 439=head2 as_svg
440
441Returns an SVG representation of the graph, calling as_dot first.
442
443=cut
444
8d9a1cd8 445sub as_svg {
446 my( $self, $opts ) = @_;
447 my $dot = $self->as_dot( $opts );
ea45d2a6 448 my @cmd = ( '-Tsvg' );
449 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 450 my $svg;
e79c23c7 451 my $dotfile = File::Temp->new();
e79c23c7 452 binmode $dotfile, ':utf8';
8d9a1cd8 453 print $dotfile $dot;
459c39b3 454 close $dotfile;
e79c23c7 455 push( @cmd, $dotfile->filename );
456 run( \@cmd, ">", binary(), \$svg );
428bcf0b 457 return decode_utf8( $svg );
e79c23c7 458}
459
027d819c 460=head2 witnesses
461
462Returns a list of the extant witnesses represented in the stemma.
463
464=cut
465
08e0fb85 466sub witnesses {
467 my $self = shift;
468 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
469 $self->graph->vertices;
470 return @wits;
471}
472
06e7cbc7 473=head2 hypotheticals
474
475Returns a list of the hypothetical witnesses represented in the stemma.
476
477=cut
478
bebec0e9 479sub hypotheticals {
480 my $self = shift;
481 my @wits = grep
482 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
483 $self->graph->vertices;
484 return @wits;
485}
486
37bf09f4 487=head2 root_graph( $root_vertex )
ea45d2a6 488
489If the stemma graph is undirected, make it directed with $root_vertex at the root.
490If it is directed, re-root it.
491
492=cut
493
494sub root_graph {
495 my( $self, $rootvertex ) = @_;
496 my $graph;
497 if( $self->is_undirected ) {
498 $graph = $self->graph;
499 } else {
500 # Make an undirected version of this graph.
501 $graph = $self->graph->undirected_copy();
502 }
503 my $rooted = Graph->new();
504 $rooted->add_vertex( $rootvertex );
505 my @next = ( $rootvertex );
506 while( @next ) {
507 my @children;
508 foreach my $v ( @next ) {
509 # Place its not-placed neighbors (ergo children) in the tree
510 # and connect them
511 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
512 $graph->neighbors( $v ) ) {
513 $rooted->add_vertex( $n );
514 $rooted->add_edge( $v, $n );
515 push( @children, $n );
516 }
517 }
518 @next = @children;
519 }
520 # Set the vertex classes
521 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
522 $self->graph->hypotheticals;
523 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
524 $self->graph->witnesses;
525 return $rooted;
526}
527
528
63778331 529sub throw {
530 Text::Tradition::Error->throw(
531 'ident' => 'Stemma error',
532 'message' => $_[0],
533 );
534}
535
536
9463b0bf 537no Moose;
538__PACKAGE__->meta->make_immutable;
539
5401;
027d819c 541
542=head1 LICENSE
543
544This package is free software and is provided "as is" without express
545or implied warranty. You can redistribute it and/or modify it under
546the same terms as Perl itself.
547
548=head1 AUTHOR
549
550Tara L Andrews E<lt>aurum@cpan.orgE<gt>