reconfigure stexaminer layout
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
30texts, particularly medieval ones. The Collation is the central feature of
31a Tradition, where the text, its sequence of readings, and its relationships
32between readings are actually kept.
33
34=head1 DOT SYNTAX
35
36The easiest way to define a stemma (which is a directed acyclic graph, denoting
37the scholar's hypothesis concerning which text(s) were copied from which other(s))
38is to use a special form of the 'dot' syntax of GraphViz.
39
40Each stemma opens with the line
41
42 digraph Stemma {
43
44and continues with a list of all manuscript witnesses in the stemma, whether
45extant witnesses or missing archetypes or hyparchetypes. Each of these is
46listed by its sigil on its own line, e.g.:
47
48 alpha [ class=hypothetical ]
49 1 [ class=hypothetical,label=* ]
50 Ms4 [ class=extant ]
51
52Extant witnesses are listed with class=extant; missing or postulated witnesses
53are listed with class=hypothetical. Anonymous hyparchetypes must be given a
54unique name or number, but can be represented as anonymous with the addition
55of 'label=*' to their lines. Greek letters or other special characters may be
56used as names, but they must always be wrapped in double quotes.
57
58Links between manuscripts are then listed with arrow notation, as below. These
59lines show the direction of copying, one step at a time, for the entire stemma.
60
61 alpha -> 1
62 1 -> Ms4
63
64The final line in the definition should be the closing brace:
65
66 }
67
68Thus for a set of extant manuscripts A, B, and C, where A and B were copied
69from the archetype O and C was copied from B, the definition would be:
70
71 digraph Stemma {
72 O [ class=hypothetical]
73 A [ class=extant ]
74 B [ class=extant ]
75 C [ class=extant ]
76 O -> A
77 O -> B
78 B -> C
79 }
80
81=head1 CONSTRUCTOR
82
83=head2 new
84
85The constructor. This should generally be called from Text::Tradition, but
86if called directly it takes the following options:
87
88=over
89
90=item * collation - The collation with which the stemma is associated.
91
92=item * dot - A filehandle open to a DOT representation of the stemma graph.
93
94=back
95
96=cut
97
9463b0bf 98has collation => (
99 is => 'ro',
100 isa => 'Text::Tradition::Collation',
101 required => 1,
8d9a1cd8 102 weak_ref => 1,
9463b0bf 103 );
104
e05997e2 105has graph => (
106 is => 'rw',
107 isa => 'Graph',
108 predicate => 'has_graph',
109 );
9457207b 110
e05997e2 111sub BUILD {
112 my( $self, $args ) = @_;
113 # If we have been handed a dotfile, initialize it into a graph.
114 if( exists $args->{'dot'} ) {
027d819c 115 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 116 }
c0ccdb62 117}
118
027d819c 119sub _graph_from_dot {
8d9a1cd8 120 my( $self, $dotfh ) = @_;
8d9a1cd8 121 my $reader = Graph::Reader::Dot->new();
122 my $graph = $reader->read_graph( $dotfh );
7a7c249c 123 if( $graph ) {
124 $self->graph( $graph );
125 # Go through the nodes and set any non-hypothetical node to extant.
126 foreach my $v ( $self->graph->vertices ) {
127 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
128 unless $self->graph->has_vertex_attribute( $v, 'class' );
129 }
130 } else {
63778331 131 throw( "Failed to parse dot in $dotfh" );
7a7c249c 132 }
8d9a1cd8 133}
134
027d819c 135=head1 METHODS
136
137=head2 as_dot( \%options )
138
139Returns a normal dot representation of the stemma layout, suitable for rendering
140with GraphViz. Options include:
141
142=over
143
144=item * graph - A hashref of global graph options.
145
146=item * node - A hashref of global node options.
147
148=item * edge - A hashref of global edge options.
149
150=back
151
152See the GraphViz documentation for the list of available options.
153
154=cut
155
8d9a1cd8 156sub as_dot {
e367f5c0 157 my( $self, $opts ) = @_;
7a7c249c 158
159 # Get default and specified options
e02340f3 160 my %graphopts = (
161 # 'ratio' => 1,
162 );
7a7c249c 163 my %nodeopts = (
164 'fontsize' => 11,
7a7c249c 165 'style' => 'filled',
166 'fillcolor' => 'white',
e02340f3 167 'color' => 'white',
7a7c249c 168 'shape' => 'ellipse', # Shape for the extant nodes
169 );
170 my %edgeopts = (
e02340f3 171 'arrowhead' => 'none',
7a7c249c 172 );
173 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
174 if $opts->{'graph'};
175 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
176 if $opts->{'node'};
177 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
178 if $opts->{'edge'};
179
180 my @dotlines;
181 push( @dotlines, 'digraph stemma {' );
182 ## Print out the global attributes
183 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
184 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 185 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
186
187 # Add each of the nodes.
188 foreach my $n ( $self->graph->vertices ) {
e02340f3 189 if( $self->graph->has_vertex_attribute( $n, 'label' ) ) {
190 my $ltext = $self->graph->get_vertex_attribute( $n, 'label' );
191 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 192 } else {
7a7c249c 193 # Use the default display settings.
194 push( @dotlines, " $n;" );
e79c23c7 195 }
196 }
7a7c249c 197 # Add each of our edges.
198 foreach my $e ( $self->graph->edges ) {
199 my( $from, $to ) = @$e;
200 push( @dotlines, " $from -> $to;" );
201 }
202 push( @dotlines, '}' );
e79c23c7 203
7a7c249c 204 return join( "\n", @dotlines );
205}
206
88a6bac5 207=head2 editable( $linesep )
027d819c 208
88a6bac5 209Returns a version of the graph rendered in our definition format. The
210$linesep argument defaults to newline; set it to the empty string or to
211a space if the result is to be sent via JSON.
027d819c 212
213=cut
7a7c249c 214
7a7c249c 215sub editable {
216 my $self = shift;
88a6bac5 217 my $join = shift || "\n";
7a7c249c 218 my @dotlines;
219 push( @dotlines, 'digraph stemma {' );
220 my @real; # A cheap sort
221 foreach my $n ( sort $self->graph->vertices ) {
222 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
223 $c = 'extant' unless $c;
224 if( $c eq 'extant' ) {
225 push( @real, $n );
226 } else {
227 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
228 }
e367f5c0 229 }
7a7c249c 230 # Now do the real ones
231 foreach my $n ( @real ) {
232 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
233 }
234 foreach my $e ( sort _by_vertex $self->graph->edges ) {
235 my( $from, $to ) = @$e;
236 push( @dotlines, " $from -> $to;" );
237 }
238 push( @dotlines, '}' );
88a6bac5 239 return join( $join, @dotlines );
7a7c249c 240}
241
242sub _make_dotline {
243 my( $obj, %attr ) = @_;
244 my @pairs;
245 foreach my $k ( keys %attr ) {
246 my $v = $attr{$k};
247 $v =~ s/\"/\\\"/g;
248 push( @pairs, "$k=\"$v\"" );
249 }
250 return sprintf( " %s [ %s ];", $obj, join( ', ', @pairs ) );
8d9a1cd8 251}
252
7a7c249c 253sub _by_vertex {
254 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
255}
8d9a1cd8 256
027d819c 257=head2 as_svg
258
259Returns an SVG representation of the graph, calling as_dot first.
260
261=cut
262
8d9a1cd8 263sub as_svg {
264 my( $self, $opts ) = @_;
265 my $dot = $self->as_dot( $opts );
e79c23c7 266 my @cmd = qw/dot -Tsvg/;
267 my( $svg, $err );
268 my $dotfile = File::Temp->new();
269 ## TODO REMOVE
270 # $dotfile->unlink_on_destroy(0);
271 binmode $dotfile, ':utf8';
8d9a1cd8 272 print $dotfile $dot;
e79c23c7 273 push( @cmd, $dotfile->filename );
274 run( \@cmd, ">", binary(), \$svg );
275 $svg = decode_utf8( $svg );
276 return $svg;
277}
278
027d819c 279=head2 witnesses
280
281Returns a list of the extant witnesses represented in the stemma.
282
283=cut
284
08e0fb85 285sub witnesses {
286 my $self = shift;
287 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
288 $self->graph->vertices;
289 return @wits;
290}
291
bebec0e9 292sub hypotheticals {
293 my $self = shift;
294 my @wits = grep
295 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
296 $self->graph->vertices;
297 return @wits;
298}
299
63778331 300sub throw {
301 Text::Tradition::Error->throw(
302 'ident' => 'Stemma error',
303 'message' => $_[0],
304 );
305}
306
307
9463b0bf 308no Moose;
309__PACKAGE__->meta->make_immutable;
310
3111;
027d819c 312
313=head1 LICENSE
314
315This package is free software and is provided "as is" without express
316or implied warranty. You can redistribute it and/or modify it under
317the same terms as Perl itself.
318
319=head1 AUTHOR
320
321Tara L Andrews E<lt>aurum@cpan.orgE<gt>