change worker script to using 'say'; timestamp the log
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
f96630e3 39 digraph "Name of Stemma" {
027d819c 40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
f96630e3 68 digraph "Test stemma 1" {
027d819c 69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
02b6340e 105my $baddotfh;
106open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
107try {
108 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110} catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
64a36834 112}
113
114# Create a good graph
115my $dotfh;
116open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
117binmode( $dotfh, ':utf8' );
ace5fce5 118my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 119is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
120is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
121is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 122ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
123is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 124my $found_unicode_sigil;
125foreach my $h ( $stemma->hypotheticals ) {
126 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
127}
128ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
129
cb741417 130# Create an undirected graph
131my $undirdotfh;
132open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile";
133binmode( $undirdotfh, ':utf8' );
134my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh );
135is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
136is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
137is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
138ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
6665a327 139is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
ea45d2a6 140
64a36834 141=end testing
142
027d819c 143=cut
144
9463b0bf 145has collation => (
146 is => 'ro',
147 isa => 'Text::Tradition::Collation',
ea45d2a6 148 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 149 weak_ref => 1,
9463b0bf 150 );
151
e05997e2 152has graph => (
153 is => 'rw',
154 isa => 'Graph',
155 predicate => 'has_graph',
156 );
ea45d2a6 157
98f22390 158has identifier => (
ea45d2a6 159 is => 'ro',
98f22390 160 isa => 'Str',
161 writer => 'set_identifier',
162 predicate => 'has_identifier',
ea45d2a6 163 );
738620c7 164
165has from_jobid => (
166 is => 'ro',
167 isa => 'Str',
168 predicate => 'came_from_jobid',
169 writer => '_set_from_jobid',
170 );
98f22390 171
e05997e2 172sub BUILD {
173 my( $self, $args ) = @_;
174 # If we have been handed a dotfile, initialize it into a graph.
175 if( exists $args->{'dot'} ) {
027d819c 176 $self->_graph_from_dot( $args->{'dot'} );
98f22390 177 }
c0ccdb62 178}
179
ea45d2a6 180before 'graph' => sub {
181 my $self = shift;
182 if( @_ ) {
183 # Make sure all unclassed graph nodes are marked extant.
184 my $g = $_[0];
185 throw( "Cannot set graph to a non-Graph object" )
cb741417 186 unless $g->isa( 'Graph' );
ea45d2a6 187 foreach my $v ( $g->vertices ) {
188 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
189 $g->set_vertex_attribute( $v, 'class', 'extant' );
190 }
191 }
98f22390 192 }
193};
194
027d819c 195sub _graph_from_dot {
8d9a1cd8 196 my( $self, $dotfh ) = @_;
8d9a1cd8 197 my $reader = Graph::Reader::Dot->new();
64a36834 198 # Redirect STDOUT in order to trap any error messages - syntax errors
199 # are evidently not fatal.
02b6340e 200 my $graph;
201 my $reader_out;
cb741417 202 my $reader_err;
02b6340e 203 {
204 local(*STDOUT);
205 open( STDOUT, ">", \$reader_out );
cb741417 206 local(*STDERR);
207 open( STDERR, ">", \$reader_err );
02b6340e 208 $graph = $reader->read_graph( $dotfh );
209 close STDOUT;
cb741417 210 close STDERR;
02b6340e 211 }
64a36834 212 if( $reader_out && $reader_out =~ /error/s ) {
213 throw( "Error trying to parse dot: $reader_out" );
214 } elsif( !$graph ) {
215 throw( "Failed to create graph from dot" );
216 }
6665a327 217 ## HORRIBLE HACK but there is no API access to graph attributes!
218 my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma';
cb741417 219 # Correct for implicit graph -> digraph quirk of reader
220 if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) {
221 my $udgraph = $graph->undirected_copy;
222 foreach my $v ( $graph->vertices ) {
223 $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) );
224 }
225 $graph = $udgraph;
226 }
64a36834 227 $self->graph( $graph );
6665a327 228 $self->set_identifier( $graph_id );
8d9a1cd8 229}
230
98f22390 231sub is_undirected {
232 my( $self ) = @_;
233 return undef unless $self->has_graph;
234 return $self->graph->is_undirected;
235}
236
027d819c 237=head1 METHODS
238
239=head2 as_dot( \%options )
240
241Returns a normal dot representation of the stemma layout, suitable for rendering
242with GraphViz. Options include:
243
244=over
245
246=item * graph - A hashref of global graph options.
247
248=item * node - A hashref of global node options.
249
250=item * edge - A hashref of global edge options.
251
252=back
253
254See the GraphViz documentation for the list of available options.
255
256=cut
257
8d9a1cd8 258sub as_dot {
e367f5c0 259 my( $self, $opts ) = @_;
7a7c249c 260
335a62ef 261 ## See if we are including any a.c. witnesses in this graph.
262 my $graph = $self->graph;
263 if( exists $opts->{'layerwits'} ) {
5c44c598 264 my $extant = {};
265 map { $extant->{$_} = 1 } $self->witnesses;
266 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 267 }
268
7a7c249c 269 # Get default and specified options
e02340f3 270 my %graphopts = (
271 # 'ratio' => 1,
f05e891c 272 'bgcolor' => 'transparent',
e02340f3 273 );
7a7c249c 274 my %nodeopts = (
275 'fontsize' => 11,
7a7c249c 276 'style' => 'filled',
277 'fillcolor' => 'white',
e02340f3 278 'color' => 'white',
7a7c249c 279 'shape' => 'ellipse', # Shape for the extant nodes
280 );
281 my %edgeopts = (
e02340f3 282 'arrowhead' => 'none',
7a7c249c 283 );
284 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
285 if $opts->{'graph'};
286 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
287 if $opts->{'node'};
288 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
289 if $opts->{'edge'};
335a62ef 290
ea45d2a6 291 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
f96630e3 292 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
293 : 'stemma';
7a7c249c 294 my @dotlines;
f96630e3 295 push( @dotlines, "$gdecl $gname {" );
7a7c249c 296 ## Print out the global attributes
297 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
298 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 299 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
300
301 # Add each of the nodes.
335a62ef 302 foreach my $n ( $graph->vertices ) {
c753ea40 303 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 304 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 305 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 306 }
c753ea40 307 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 308 }
7a7c249c 309 # Add each of our edges.
335a62ef 310 foreach my $e ( $graph->edges ) {
986bbd1b 311 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 312 my $connector = $graph->is_directed ? '->' : '--';
313 push( @dotlines, " $from $connector $to;" );
7a7c249c 314 }
315 push( @dotlines, '}' );
e79c23c7 316
7a7c249c 317 return join( "\n", @dotlines );
318}
319
0bded693 320=head2 alter_graph( $dotstring )
321
322Alters the graph of this stemma according to the definition specified
323in $dotstring.
324
325=cut
326
327sub alter_graph {
328 my( $self, $dotstring ) = @_;
329 my $dotfh;
330 open $dotfh, '<', \$dotstring;
f90b2bde 331 binmode $dotfh, ':utf8';
0bded693 332 $self->_graph_from_dot( $dotfh );
333}
334
335a62ef 335=head2 editable( $opts )
027d819c 336
5c44c598 337=head2 editable_graph( $graph, $opts )
338
88a6bac5 339Returns a version of the graph rendered in our definition format. The
335a62ef 340output separates statements with a newline; set $opts->{'linesep'} to the
341empty string or to a space if the result is to be sent via JSON.
342
5c44c598 343If a situational version of the stemma is required, the arguments for
344situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 345
346=cut
7a7c249c 347
7a7c249c 348sub editable {
5c44c598 349 my( $self, $opts ) = @_;
335a62ef 350 my $graph = $self->graph;
f96630e3 351 if( $self->has_identifier ) {
352 $opts->{'name'} = $self->identifier;
353 }
5c44c598 354 ## See if we need an editable version of a situational graph.
355 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
356 my $extant = delete $opts->{'extant'} || {};
357 my $layerwits = delete $opts->{'layerwits'} || [];
358 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 359 }
5c44c598 360 return editable_graph( $graph, $opts );
361}
362
363sub editable_graph {
364 my( $graph, $opts ) = @_;
335a62ef 365
366 # Create the graph
367 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 368 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
f96630e3 369 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
370 : 'stemma';
7a7c249c 371 my @dotlines;
f96630e3 372 push( @dotlines, "$gdecl $gname {" );
7a7c249c 373 my @real; # A cheap sort
5c44c598 374 foreach my $n ( sort $graph->vertices ) {
375 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 376 $c = 'extant' unless $c;
377 if( $c eq 'extant' ) {
378 push( @real, $n );
379 } else {
380 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
381 }
e367f5c0 382 }
7a7c249c 383 # Now do the real ones
384 foreach my $n ( @real ) {
385 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
386 }
5c44c598 387 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 388 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 389 my $conn = $graph->is_undirected ? '--' : '->';
390 push( @dotlines, " $from $conn $to;" );
7a7c249c 391 }
392 push( @dotlines, '}' );
88a6bac5 393 return join( $join, @dotlines );
7a7c249c 394}
395
396sub _make_dotline {
397 my( $obj, %attr ) = @_;
398 my @pairs;
399 foreach my $k ( keys %attr ) {
986bbd1b 400 my $v = _dotquote( $attr{$k} );
401 push( @pairs, "$k=$v" );
7a7c249c 402 }
986bbd1b 403 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 404}
405
986bbd1b 406sub _dotquote {
407 my( $str ) = @_;
408 return $str if $str =~ /^[A-Za-z0-9]+$/;
409 $str =~ s/\"/\\\"/g;
410 $str = '"' . $str . '"';
411 return $str;
412}
413
7a7c249c 414sub _by_vertex {
415 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
416}
8d9a1cd8 417
5c44c598 418=head2 situation_graph( $extant, $layered )
335a62ef 419
ea45d2a6 420Returns a graph which is the original stemma graph with all witnesses not
421in the %$extant hash marked as hypothetical, and witness layers added to
422the graph according to the list in @$layered. A layered (a.c.) witness is
423added as a parent of its main version, and additionally shares all other
424parents and children with that version.
335a62ef 425
426=cut
427
5c44c598 428sub situation_graph {
ace5fce5 429 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 430
431 my $graph = $self->graph->copy;
432 foreach my $vertex ( $graph->vertices ) {
433 # Set as extant any vertex that is extant in the stemma AND
434 # exists in the $extant hash.
435 my $class = 'hypothetical';
436 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
437 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
438 $graph->set_vertex_attribute( $vertex, 'class', $class );
439 }
440
335a62ef 441 # For each 'layered' witness in the layerwits array, add it to the graph
442 # as an ancestor of the 'main' witness, and otherwise with the same parent/
443 # child links as its main analogue.
444 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 445 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 446 foreach my $lw ( @$layerwits ) {
447 # Add the layered witness and set it with the same attributes as
448 # its 'main' analogue
5c44c598 449 throw( "Cannot add a layer to a hypothetical witness $lw" )
450 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 451 my $lwac = $lw . $layerlabel;
335a62ef 452 $graph->add_vertex( $lwac );
453 $graph->set_vertex_attributes( $lwac,
454 $graph->get_vertex_attributes( $lw ) );
455
456 # Set it as ancestor to the main witness
457 $graph->add_edge( $lwac, $lw );
458
459 # Give it the same ancestors and descendants as the main witness has,
460 # bearing in mind that those ancestors and descendants might also just
461 # have had a layered witness defined.
462 foreach my $v ( $graph->predecessors( $lw ) ) {
463 next if $v eq $lwac; # Don't add a loop
464 $graph->add_edge( $v, $lwac );
ace5fce5 465 $graph->add_edge( $v.$layerlabel, $lwac )
466 if $graph->has_vertex( $v.$layerlabel );
335a62ef 467 }
468 foreach my $v ( $graph->successors( $lw ) ) {
469 next if $v eq $lwac; # but this shouldn't occur
470 $graph->add_edge( $lwac, $v );
ace5fce5 471 $graph->add_edge( $lwac, $v.$layerlabel )
472 if $graph->has_vertex( $v.$layerlabel );
335a62ef 473 }
474 }
475 return $graph;
476}
477
027d819c 478=head2 as_svg
479
480Returns an SVG representation of the graph, calling as_dot first.
481
482=cut
483
8d9a1cd8 484sub as_svg {
485 my( $self, $opts ) = @_;
486 my $dot = $self->as_dot( $opts );
ea45d2a6 487 my @cmd = ( '-Tsvg' );
488 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 489 my $svg;
e79c23c7 490 my $dotfile = File::Temp->new();
e79c23c7 491 binmode $dotfile, ':utf8';
8d9a1cd8 492 print $dotfile $dot;
459c39b3 493 close $dotfile;
e79c23c7 494 push( @cmd, $dotfile->filename );
495 run( \@cmd, ">", binary(), \$svg );
428bcf0b 496 return decode_utf8( $svg );
e79c23c7 497}
498
027d819c 499=head2 witnesses
500
501Returns a list of the extant witnesses represented in the stemma.
502
503=cut
504
08e0fb85 505sub witnesses {
506 my $self = shift;
507 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
508 $self->graph->vertices;
509 return @wits;
510}
511
06e7cbc7 512=head2 hypotheticals
513
514Returns a list of the hypothetical witnesses represented in the stemma.
515
516=cut
517
bebec0e9 518sub hypotheticals {
519 my $self = shift;
520 my @wits = grep
521 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
522 $self->graph->vertices;
523 return @wits;
524}
525
37bf09f4 526=head2 root_graph( $root_vertex )
ea45d2a6 527
528If the stemma graph is undirected, make it directed with $root_vertex at the root.
529If it is directed, re-root it.
530
531=cut
532
533sub root_graph {
534 my( $self, $rootvertex ) = @_;
535 my $graph;
907f6671 536 my $ident = $self->identifier; # will have to restore this at the end
ea45d2a6 537 if( $self->is_undirected ) {
538 $graph = $self->graph;
539 } else {
540 # Make an undirected version of this graph.
541 $graph = $self->graph->undirected_copy();
542 }
1cf6dd32 543 # First, ensure that the requested root is actually a vertex in the graph.
544 unless( $graph->has_vertex( $rootvertex ) ) {
545 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
546 }
547
548 # Now make a directed version of the graph.
ea45d2a6 549 my $rooted = Graph->new();
550 $rooted->add_vertex( $rootvertex );
551 my @next = ( $rootvertex );
552 while( @next ) {
553 my @children;
554 foreach my $v ( @next ) {
555 # Place its not-placed neighbors (ergo children) in the tree
556 # and connect them
557 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
558 $graph->neighbors( $v ) ) {
559 $rooted->add_vertex( $n );
560 $rooted->add_edge( $v, $n );
561 push( @children, $n );
562 }
563 }
564 @next = @children;
565 }
566 # Set the vertex classes
567 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
1cf6dd32 568 $self->hypotheticals;
569 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
570 $self->witnesses;
571 $self->graph( $rooted );
907f6671 572 $self->set_identifier( $ident );
ea45d2a6 573}
574
575
63778331 576sub throw {
577 Text::Tradition::Error->throw(
578 'ident' => 'Stemma error',
579 'message' => $_[0],
580 );
581}
582
583
9463b0bf 584no Moose;
585__PACKAGE__->meta->make_immutable;
586
5871;
027d819c 588
589=head1 LICENSE
590
591This package is free software and is provided "as is" without express
592or implied warranty. You can redistribute it and/or modify it under
593the same terms as Perl itself.
594
595=head1 AUTHOR
596
597Tara L Andrews E<lt>aurum@cpan.orgE<gt>