initial hooks for Stemweb integration
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
39 digraph Stemma {
40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
68 digraph Stemma {
69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
ea45d2a6 105TODO: {
106 local $TODO = "cannot use stdout redirection trick with FastCGI";
836e0546 107 my $baddotfh;
ea45d2a6 108 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 try {
110 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
111 ok( 0, "Created broken stemma from dotfile with syntax error" );
112 } catch( Text::Tradition::Error $e ) {
113 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 }
64a36834 115}
116
117# Create a good graph
118my $dotfh;
119open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
120binmode( $dotfh, ':utf8' );
ace5fce5 121my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 122is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
123is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
124is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
125my $found_unicode_sigil;
126foreach my $h ( $stemma->hypotheticals ) {
127 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
128}
129ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
130
ea45d2a6 131# TODO Create stemma from graph, create stemma from undirected graph,
132# create stemma from incompletely-specified graph
133
64a36834 134=end testing
135
027d819c 136=cut
137
9463b0bf 138has collation => (
139 is => 'ro',
140 isa => 'Text::Tradition::Collation',
ea45d2a6 141 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 142 weak_ref => 1,
9463b0bf 143 );
144
e05997e2 145has graph => (
146 is => 'rw',
147 isa => 'Graph',
148 predicate => 'has_graph',
149 );
ea45d2a6 150
98f22390 151has identifier => (
ea45d2a6 152 is => 'ro',
98f22390 153 isa => 'Str',
154 writer => 'set_identifier',
155 predicate => 'has_identifier',
ea45d2a6 156 );
98f22390 157
e05997e2 158sub BUILD {
159 my( $self, $args ) = @_;
160 # If we have been handed a dotfile, initialize it into a graph.
161 if( exists $args->{'dot'} ) {
027d819c 162 $self->_graph_from_dot( $args->{'dot'} );
98f22390 163 }
c0ccdb62 164}
165
ea45d2a6 166before 'graph' => sub {
167 my $self = shift;
168 if( @_ ) {
169 # Make sure all unclassed graph nodes are marked extant.
170 my $g = $_[0];
171 throw( "Cannot set graph to a non-Graph object" )
172 unless ref( $g ) eq 'Graph';
173 foreach my $v ( $g->vertices ) {
174 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
175 $g->set_vertex_attribute( $v, 'class', 'extant' );
176 }
177 }
98f22390 178 }
179};
180
181after 'graph' => sub {
182 my $self = shift;
183 return unless @_;
184 unless( $self->has_identifier ) {
185 ## HORRIBLE HACK but there is no API access to graph attributes!
186 if( exists $_[0]->[4]->{'name'} ) {
187 $self->set_identifier( $_[0]->[4]->{'name'} );
188 }
ea45d2a6 189 }
190};
191
027d819c 192sub _graph_from_dot {
8d9a1cd8 193 my( $self, $dotfh ) = @_;
8d9a1cd8 194 my $reader = Graph::Reader::Dot->new();
64a36834 195 # Redirect STDOUT in order to trap any error messages - syntax errors
196 # are evidently not fatal.
d34fdf7b 197 # TODO This breaks under FastCGI/Apache; reconsider.
64a36834 198 my $reader_out;
d34fdf7b 199 #my $saved_stderr;
200 #open $saved_stderr, ">&STDOUT";
201 #close STDOUT;
202 #open STDOUT, ">", \$reader_out;
8d9a1cd8 203 my $graph = $reader->read_graph( $dotfh );
d34fdf7b 204 #close STDOUT;
205 #open STDOUT, ">", \$saved_stderr;
64a36834 206 if( $reader_out && $reader_out =~ /error/s ) {
207 throw( "Error trying to parse dot: $reader_out" );
208 } elsif( !$graph ) {
209 throw( "Failed to create graph from dot" );
210 }
211 $self->graph( $graph );
8d9a1cd8 212}
213
98f22390 214sub is_undirected {
215 my( $self ) = @_;
216 return undef unless $self->has_graph;
217 return $self->graph->is_undirected;
218}
219
027d819c 220=head1 METHODS
221
222=head2 as_dot( \%options )
223
224Returns a normal dot representation of the stemma layout, suitable for rendering
225with GraphViz. Options include:
226
227=over
228
229=item * graph - A hashref of global graph options.
230
231=item * node - A hashref of global node options.
232
233=item * edge - A hashref of global edge options.
234
235=back
236
237See the GraphViz documentation for the list of available options.
238
239=cut
240
8d9a1cd8 241sub as_dot {
e367f5c0 242 my( $self, $opts ) = @_;
7a7c249c 243
335a62ef 244 ## See if we are including any a.c. witnesses in this graph.
245 my $graph = $self->graph;
246 if( exists $opts->{'layerwits'} ) {
5c44c598 247 my $extant = {};
248 map { $extant->{$_} = 1 } $self->witnesses;
249 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 250 }
251
7a7c249c 252 # Get default and specified options
e02340f3 253 my %graphopts = (
254 # 'ratio' => 1,
f05e891c 255 'bgcolor' => 'transparent',
e02340f3 256 );
7a7c249c 257 my %nodeopts = (
258 'fontsize' => 11,
7a7c249c 259 'style' => 'filled',
260 'fillcolor' => 'white',
e02340f3 261 'color' => 'white',
7a7c249c 262 'shape' => 'ellipse', # Shape for the extant nodes
263 );
264 my %edgeopts = (
e02340f3 265 'arrowhead' => 'none',
7a7c249c 266 );
267 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
268 if $opts->{'graph'};
269 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
270 if $opts->{'node'};
271 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
272 if $opts->{'edge'};
335a62ef 273
ea45d2a6 274 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
7a7c249c 275 my @dotlines;
ea45d2a6 276 push( @dotlines, "$gdecl stemma {" );
7a7c249c 277 ## Print out the global attributes
278 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
279 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 280 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
281
282 # Add each of the nodes.
335a62ef 283 foreach my $n ( $graph->vertices ) {
c753ea40 284 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 285 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 286 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 287 }
c753ea40 288 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 289 }
7a7c249c 290 # Add each of our edges.
335a62ef 291 foreach my $e ( $graph->edges ) {
986bbd1b 292 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 293 my $connector = $graph->is_directed ? '->' : '--';
294 push( @dotlines, " $from $connector $to;" );
7a7c249c 295 }
296 push( @dotlines, '}' );
e79c23c7 297
7a7c249c 298 return join( "\n", @dotlines );
299}
300
0bded693 301=head2 alter_graph( $dotstring )
302
303Alters the graph of this stemma according to the definition specified
304in $dotstring.
305
306=cut
307
308sub alter_graph {
309 my( $self, $dotstring ) = @_;
310 my $dotfh;
311 open $dotfh, '<', \$dotstring;
f90b2bde 312 binmode $dotfh, ':utf8';
0bded693 313 $self->_graph_from_dot( $dotfh );
314}
315
335a62ef 316=head2 editable( $opts )
027d819c 317
5c44c598 318=head2 editable_graph( $graph, $opts )
319
88a6bac5 320Returns a version of the graph rendered in our definition format. The
335a62ef 321output separates statements with a newline; set $opts->{'linesep'} to the
322empty string or to a space if the result is to be sent via JSON.
323
5c44c598 324If a situational version of the stemma is required, the arguments for
325situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 326
327=cut
7a7c249c 328
7a7c249c 329sub editable {
5c44c598 330 my( $self, $opts ) = @_;
335a62ef 331 my $graph = $self->graph;
5c44c598 332 ## See if we need an editable version of a situational graph.
333 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
334 my $extant = delete $opts->{'extant'} || {};
335 my $layerwits = delete $opts->{'layerwits'} || [];
336 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 337 }
5c44c598 338 return editable_graph( $graph, $opts );
339}
340
341sub editable_graph {
342 my( $graph, $opts ) = @_;
335a62ef 343
344 # Create the graph
345 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 346 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
7a7c249c 347 my @dotlines;
ea45d2a6 348 push( @dotlines, "$gdecl stemma {" );
7a7c249c 349 my @real; # A cheap sort
5c44c598 350 foreach my $n ( sort $graph->vertices ) {
351 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 352 $c = 'extant' unless $c;
353 if( $c eq 'extant' ) {
354 push( @real, $n );
355 } else {
356 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
357 }
e367f5c0 358 }
7a7c249c 359 # Now do the real ones
360 foreach my $n ( @real ) {
361 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
362 }
5c44c598 363 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 364 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 365 my $conn = $graph->is_undirected ? '--' : '->';
366 push( @dotlines, " $from $conn $to;" );
7a7c249c 367 }
368 push( @dotlines, '}' );
88a6bac5 369 return join( $join, @dotlines );
7a7c249c 370}
371
372sub _make_dotline {
373 my( $obj, %attr ) = @_;
374 my @pairs;
375 foreach my $k ( keys %attr ) {
986bbd1b 376 my $v = _dotquote( $attr{$k} );
377 push( @pairs, "$k=$v" );
7a7c249c 378 }
986bbd1b 379 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 380}
381
986bbd1b 382sub _dotquote {
383 my( $str ) = @_;
384 return $str if $str =~ /^[A-Za-z0-9]+$/;
385 $str =~ s/\"/\\\"/g;
386 $str = '"' . $str . '"';
387 return $str;
388}
389
7a7c249c 390sub _by_vertex {
391 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
392}
8d9a1cd8 393
5c44c598 394=head2 situation_graph( $extant, $layered )
335a62ef 395
ea45d2a6 396Returns a graph which is the original stemma graph with all witnesses not
397in the %$extant hash marked as hypothetical, and witness layers added to
398the graph according to the list in @$layered. A layered (a.c.) witness is
399added as a parent of its main version, and additionally shares all other
400parents and children with that version.
335a62ef 401
402=cut
403
5c44c598 404sub situation_graph {
ace5fce5 405 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 406
407 my $graph = $self->graph->copy;
408 foreach my $vertex ( $graph->vertices ) {
409 # Set as extant any vertex that is extant in the stemma AND
410 # exists in the $extant hash.
411 my $class = 'hypothetical';
412 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
413 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
414 $graph->set_vertex_attribute( $vertex, 'class', $class );
415 }
416
335a62ef 417 # For each 'layered' witness in the layerwits array, add it to the graph
418 # as an ancestor of the 'main' witness, and otherwise with the same parent/
419 # child links as its main analogue.
420 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 421 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 422 foreach my $lw ( @$layerwits ) {
423 # Add the layered witness and set it with the same attributes as
424 # its 'main' analogue
5c44c598 425 throw( "Cannot add a layer to a hypothetical witness $lw" )
426 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 427 my $lwac = $lw . $layerlabel;
335a62ef 428 $graph->add_vertex( $lwac );
429 $graph->set_vertex_attributes( $lwac,
430 $graph->get_vertex_attributes( $lw ) );
431
432 # Set it as ancestor to the main witness
433 $graph->add_edge( $lwac, $lw );
434
435 # Give it the same ancestors and descendants as the main witness has,
436 # bearing in mind that those ancestors and descendants might also just
437 # have had a layered witness defined.
438 foreach my $v ( $graph->predecessors( $lw ) ) {
439 next if $v eq $lwac; # Don't add a loop
440 $graph->add_edge( $v, $lwac );
ace5fce5 441 $graph->add_edge( $v.$layerlabel, $lwac )
442 if $graph->has_vertex( $v.$layerlabel );
335a62ef 443 }
444 foreach my $v ( $graph->successors( $lw ) ) {
445 next if $v eq $lwac; # but this shouldn't occur
446 $graph->add_edge( $lwac, $v );
ace5fce5 447 $graph->add_edge( $lwac, $v.$layerlabel )
448 if $graph->has_vertex( $v.$layerlabel );
335a62ef 449 }
450 }
451 return $graph;
452}
453
027d819c 454=head2 as_svg
455
456Returns an SVG representation of the graph, calling as_dot first.
457
458=cut
459
8d9a1cd8 460sub as_svg {
461 my( $self, $opts ) = @_;
462 my $dot = $self->as_dot( $opts );
ea45d2a6 463 my @cmd = ( '-Tsvg' );
464 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 465 my $svg;
e79c23c7 466 my $dotfile = File::Temp->new();
e79c23c7 467 binmode $dotfile, ':utf8';
8d9a1cd8 468 print $dotfile $dot;
459c39b3 469 close $dotfile;
e79c23c7 470 push( @cmd, $dotfile->filename );
471 run( \@cmd, ">", binary(), \$svg );
428bcf0b 472 return decode_utf8( $svg );
e79c23c7 473}
474
027d819c 475=head2 witnesses
476
477Returns a list of the extant witnesses represented in the stemma.
478
479=cut
480
08e0fb85 481sub witnesses {
482 my $self = shift;
483 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
484 $self->graph->vertices;
485 return @wits;
486}
487
06e7cbc7 488=head2 hypotheticals
489
490Returns a list of the hypothetical witnesses represented in the stemma.
491
492=cut
493
bebec0e9 494sub hypotheticals {
495 my $self = shift;
496 my @wits = grep
497 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
498 $self->graph->vertices;
499 return @wits;
500}
501
37bf09f4 502=head2 root_graph( $root_vertex )
ea45d2a6 503
504If the stemma graph is undirected, make it directed with $root_vertex at the root.
505If it is directed, re-root it.
506
507=cut
508
509sub root_graph {
510 my( $self, $rootvertex ) = @_;
511 my $graph;
512 if( $self->is_undirected ) {
513 $graph = $self->graph;
514 } else {
515 # Make an undirected version of this graph.
516 $graph = $self->graph->undirected_copy();
517 }
518 my $rooted = Graph->new();
519 $rooted->add_vertex( $rootvertex );
520 my @next = ( $rootvertex );
521 while( @next ) {
522 my @children;
523 foreach my $v ( @next ) {
524 # Place its not-placed neighbors (ergo children) in the tree
525 # and connect them
526 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
527 $graph->neighbors( $v ) ) {
528 $rooted->add_vertex( $n );
529 $rooted->add_edge( $v, $n );
530 push( @children, $n );
531 }
532 }
533 @next = @children;
534 }
535 # Set the vertex classes
536 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
537 $self->graph->hypotheticals;
538 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
539 $self->graph->witnesses;
540 return $rooted;
541}
542
543
63778331 544sub throw {
545 Text::Tradition::Error->throw(
546 'ident' => 'Stemma error',
547 'message' => $_[0],
548 );
549}
550
551
9463b0bf 552no Moose;
553__PACKAGE__->meta->make_immutable;
554
5551;
027d819c 556
557=head1 LICENSE
558
559This package is free software and is provided "as is" without express
560or implied warranty. You can redistribute it and/or modify it under
561the same terms as Perl itself.
562
563=head1 AUTHOR
564
565Tara L Andrews E<lt>aurum@cpan.orgE<gt>