From: tla Date: Wed, 15 Jan 2014 14:21:09 +0000 (+0100) Subject: Collect all hacks for Graph::Reader::Dot into a single utility. Fixes #15 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=11954259de7d1fd546e79098b2ec091e6e8a8fda;p=scpubgit%2Fstemmatology.git Collect all hacks for Graph::Reader::Dot into a single utility. Fixes #15 --- diff --git a/analysis/lib/Text/Tradition/HasStemma.pm b/analysis/lib/Text/Tradition/HasStemma.pm index 18ecc67..aec811a 100644 --- a/analysis/lib/Text/Tradition/HasStemma.pm +++ b/analysis/lib/Text/Tradition/HasStemma.pm @@ -82,7 +82,9 @@ before 'set_stemweb_jobid' => sub { } }; -=head2 add_stemma( $dotfile ) +=head2 add_stemma( dotfile => $dotfile ) +=head2 add_stemma( dot => $dotstring ) +=head2 add_stemma( $stemma_obj ) Initializes a Text::Tradition::Stemma object from the given dotfile, and associates it with the tradition. @@ -109,19 +111,12 @@ is( $t->stemma(0), $s, "Tradition hands back the right stemma" ); sub add_stemma { my $self = shift; - my %opts = @_; - my $stemma_fh; - if( $opts{'dotfile'} ) { - open $stemma_fh, '<', $opts{'dotfile'} - or warn "Could not open file " . $opts{'dotfile'}; - } elsif( $opts{'dot'} ) { - my $str = $opts{'dot'}; - open $stemma_fh, '<', \$str; + my $stemma; + if( ref( @_ ) eq 'Text::Tradition::Stemma' ) { + $stemma = shift; + } else { + $stemma = Text::Tradition::Stemma->new( @_ ); } - # Assume utf-8 - binmode $stemma_fh, ':utf8'; - my $stemma = Text::Tradition::Stemma->new( - 'dot' => $stemma_fh ); $self->_add_stemma( $stemma ) if $stemma; return $stemma; } diff --git a/analysis/lib/Text/Tradition/Stemma.pm b/analysis/lib/Text/Tradition/Stemma.pm index 9a35feb..b9f84f5 100644 --- a/analysis/lib/Text/Tradition/Stemma.pm +++ b/analysis/lib/Text/Tradition/Stemma.pm @@ -6,7 +6,8 @@ use Graph; use Graph::Reader::Dot; use IPC::Run qw/ run binary /; use Text::Tradition::Error; -use Text::Tradition::StemmaUtil qw/ editable_graph display_graph parse_newick /; +use Text::Tradition::StemmaUtil qw/ read_graph editable_graph display_graph + parse_newick /; use Moose; =head1 NAME @@ -103,20 +104,15 @@ use TryCatch; use_ok( 'Text::Tradition::Stemma' ); # Try to create a bad graph -my $baddotfh; -open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile"; try { - my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh ); + my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_bad.dot' ); ok( 0, "Created broken stemma from dotfile with syntax error" ); } catch( Text::Tradition::Error $e ) { like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" ); } # Create a good graph -my $dotfh; -open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile"; -binmode( $dotfh, ':utf8' ); -my $stemma = Text::Tradition::Stemma->new( dot => $dotfh ); +my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/florilegium.dot' ); is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" ); is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" ); is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" ); @@ -129,10 +125,7 @@ foreach my $h ( $stemma->hypotheticals ) { ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" ); # Create an undirected graph -my $undirdotfh; -open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile"; -binmode( $undirdotfh, ':utf8' ); -my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh ); +my $udstemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_undirected.dot' ); is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" ); is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" ); is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" ); @@ -173,9 +166,20 @@ has from_jobid => ( sub BUILD { my( $self, $args ) = @_; # If we have been handed a dotfile, initialize it into a graph. + my $dotstring; if( exists $args->{'dot'} ) { - $self->_graph_from_dot( $args->{'dot'} ); - } + $dotstring = $args->{'dot'}; + } elsif( exists $args->{'dotfile'} ) { + # Read the file into a string. + my @dotlines; + open( DOTFH, $args->{'dotfile'} ) + or throw( "Could not read specified dot file " . $args->{'dotfile'} ); + binmode( DOTFH, ':encoding(UTF-8)' ); + @dotlines = ; + close DOTFH; + $dotstring = join( '', @dotlines ); + } + $self->_graph_from_dot( $dotstring ) if $dotstring; } before 'graph' => sub { @@ -194,37 +198,12 @@ before 'graph' => sub { }; sub _graph_from_dot { - my( $self, $dotfh ) = @_; - my $reader = Graph::Reader::Dot->new(); - # Redirect STDOUT in order to trap any error messages - syntax errors - # are evidently not fatal. - my $graph; - my $reader_out; - my $reader_err; - { - local(*STDOUT); - open( STDOUT, ">", \$reader_out ); - local(*STDERR); - open( STDERR, ">", \$reader_err ); - $graph = $reader->read_graph( $dotfh ); - close STDOUT; - close STDERR; - } - if( $reader_out && $reader_out =~ /error/s ) { - throw( "Error trying to parse dot: $reader_out" ); - } elsif( !$graph ) { - throw( "Failed to create graph from dot" ); - } + my( $self, $dotstring ) = @_; + my $graph = read_graph( $dotstring ); + ## HORRIBLE HACK but there is no API access to graph attributes! - my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma'; - # Correct for implicit graph -> digraph quirk of reader - if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) { - my $udgraph = $graph->undirected_copy; - foreach my $v ( $graph->vertices ) { - $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) ); - } - $graph = $udgraph; - } + my $graph_id = $graph->has_graph_attribute( 'name' ) + ? $graph->get_graph_attribute( 'name' ) : 'stemma'; $self->graph( $graph ); $self->set_identifier( $graph_id ); } @@ -298,10 +277,7 @@ in $dotstring. sub alter_graph { my( $self, $dotstring ) = @_; - my $dotfh; - open $dotfh, '<', \$dotstring; - binmode $dotfh, ':utf8'; - $self->_graph_from_dot( $dotfh ); + $self->_graph_from_dot( $dotstring ); } =head2 editable( $opts ) diff --git a/analysis/lib/Text/Tradition/StemmaUtil.pm b/analysis/lib/Text/Tradition/StemmaUtil.pm index 865b88d..2cde6c0 100644 --- a/analysis/lib/Text/Tradition/StemmaUtil.pm +++ b/analysis/lib/Text/Tradition/StemmaUtil.pm @@ -10,9 +10,10 @@ use File::chdir; use File::Temp; use File::Which; use Graph; +use Graph::Reader::Dot; use IPC::Run qw/ run binary /; use Text::Tradition::Error; -@EXPORT_OK = qw/ display_graph editable_graph +@EXPORT_OK = qw/ read_graph display_graph editable_graph character_input phylip_pars parse_newick newick_to_svg /; =head1 NAME @@ -27,6 +28,69 @@ running phylogenetic analysis on text collations. =head1 SUBROUTINES +=head2 read_graph( $dotstr) { + +Parses the graph specification on the filehandle in $dotstr and returns a Graph +object. This subroutine works around some deficiencies in Graph::Reader::Dot. + +=cut + +sub read_graph { + my $dotstr = shift; + # Graph::Reader::Dot does not handle bare non-ASCII Unicode word characters. + # We get around this by wrapping all words in double quotes, as long as they + # aren't already wrapped, and as long as they aren't the initial '(di)?graph'. + # Also need to deal correctly with the graph title. + if( $dotstr =~ /^\s*((di)?graph)\s+(.*?)\s*\{(.*)$/s ) { + my( $decl, $ident, $rest ) = ( $1, $3, $4 ); + unless( substr( $ident, 0, 1 ) eq '"' ) { + $ident = '"'.$ident.'"'; + } + $rest =~ s/(?new(); + # Redirect STDOUT in order to trap any error messages - syntax errors + # are evidently not fatal. + my $graph; + my $reader_out; + my $reader_err; + { + local(*STDOUT); + open( STDOUT, ">", \$reader_out ); + local(*STDERR); + open( STDERR, ">", \$reader_err ); + $graph = $reader->read_graph( $dotfh ); + close STDOUT; + close STDERR; + } + if( $reader_out && $reader_out =~ /error/s ) { + throw( "Error trying to parse dot: $reader_out" ); + } elsif( !$graph ) { + throw( "Failed to create graph from dot" ); + } + # Wrench the graph identifier out of the graph + ## HORRIBLE HACK but there is no API access to the graph identifier! + $graph->set_graph_attribute( 'name', $graph->[4]->{'name'} ); + + # Correct for implicit graph -> digraph quirk of reader + if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) { + my $udgraph = $graph->undirected_copy; + $udgraph->set_graph_attributes( $graph->get_graph_attributes ); + foreach my $v ( $graph->vertices ) { + $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) ); + } + $graph = $udgraph; + } + + return $graph; +} + =head2 display_graph( $graph, $opts ) Returns a dot specification intended for display, according to the logical diff --git a/analysis/t/data/florilegium.dot b/analysis/t/data/florilegium.dot index c63b0d8..e6aed66 100644 --- a/analysis/t/data/florilegium.dot +++ b/analysis/t/data/florilegium.dot @@ -20,19 +20,19 @@ digraph "Coislinianum lineage" { Q [ class=extant ]; S [ class=extant ]; T [ class=extant ]; - "α" -> A; - "α" -> T; - "α" -> "δ"; - "δ" -> 2; + α -> A; + α -> T; + α -> δ; + δ -> 2; 2 -> C; 2 -> B; B -> P; B -> S; - "δ" -> "γ"; - "γ" -> 3; + δ -> γ; + γ -> 3; 3 -> F; 3 -> H; - "γ" -> 4; + γ -> 4; 4 -> D; 4 -> 5; 5 -> Q; diff --git a/analysis/t/stemma.t b/analysis/t/stemma.t index 9d9526d..1bccc9e 100644 --- a/analysis/t/stemma.t +++ b/analysis/t/stemma.t @@ -72,7 +72,7 @@ like( $editable, qr/^digraph \"?Stemma/, "Got a dot edit graph" ); ok( $editable =~ /hypothetical/, "Graph contains an edit class" ); # Test changing the name of the Graph -$editable =~ s/^(digraph )\"?Stemma\"?/$1"Simple test stemma"/; +$editable =~ s/Stemma/Simple test stemma/; $stemma->alter_graph( $editable ); is( $stemma->identifier, "Simple test stemma", "Successfully changed name of graph" ); diff --git a/analysis/t/text_tradition_stemma.t b/analysis/t/text_tradition_stemma.t index da22e80..7995db4 100644 --- a/analysis/t/text_tradition_stemma.t +++ b/analysis/t/text_tradition_stemma.t @@ -13,20 +13,15 @@ use TryCatch; use_ok( 'Text::Tradition::Stemma' ); # Try to create a bad graph -my $baddotfh; -open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile"; try { - my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh ); + my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_bad.dot' ); ok( 0, "Created broken stemma from dotfile with syntax error" ); } catch( Text::Tradition::Error $e ) { like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" ); } # Create a good graph -my $dotfh; -open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile"; -binmode( $dotfh, ':utf8' ); -my $stemma = Text::Tradition::Stemma->new( dot => $dotfh ); +my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/florilegium.dot' ); is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" ); is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" ); is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" ); @@ -39,10 +34,7 @@ foreach my $h ( $stemma->hypotheticals ) { ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" ); # Create an undirected graph -my $undirdotfh; -open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile"; -binmode( $undirdotfh, ':utf8' ); -my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh ); +my $udstemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_undirected.dot' ); is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" ); is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" ); is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );