From: Tara L Andrews Date: Mon, 16 Jan 2012 20:06:01 +0000 (+0100) Subject: set up graphml output for relationships X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c84275ff42c4d3e6f7fbc13140101975c990101a;p=scpubgit%2Fstemmatology.git set up graphml output for relationships --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 85c2360..3cc85b8 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -65,6 +65,12 @@ has 'linear' => ( isa => 'Bool', default => 1, ); + +has 'collapse_punctuation' => ( + is => 'rw', + isa => 'Bool', + default => 1, + ); has 'ac_label' => ( is => 'rw', @@ -396,7 +402,7 @@ sub as_dot { if $endrank && $endrank == $reading->rank; # Need not output nodes without separate labels next if $reading->id eq $reading->text; - my $label = $reading->text; + my $label = $reading->punctuated_form; $label =~ s/\"/\\\"/g; $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label ); } @@ -528,7 +534,7 @@ sub as_graphml { witness => 'string', # ID/label for a path relationship => 'string', # ID/label for a relationship extra => 'boolean', # Path key - colocated => 'boolean', # Relationship key + scope => 'string', # Relationship key non_correctable => 'boolean', # Relationship key non_independent => 'boolean', # Relationship key ); @@ -568,6 +574,7 @@ sub as_graphml { $node_el->setAttribute( 'id', $node_xmlid ); foreach my $d ( keys %node_data ) { my $nval = $n->$d; + $nval = $n->punctuated_form if $d eq 'text'; _add_graphml_data( $node_el, $node_data_keys{$d}, $nval ) if defined $nval; } @@ -603,7 +610,7 @@ sub as_graphml { } # Add the relationship graph to the XML - $self->relations->as_graphml( $root ); + $self->relations->as_graphml( $root, $graphml_ns, \%node_hash, \%edge_data_keys ); # Save and return the thing my $result = decode_utf8( $graphml->toString(1) ); diff --git a/lib/Text/Tradition/Collation/Relationship.pm b/lib/Text/Tradition/Collation/Relationship.pm index f539601..11594b5 100644 --- a/lib/Text/Tradition/Collation/Relationship.pm +++ b/lib/Text/Tradition/Collation/Relationship.pm @@ -4,7 +4,7 @@ use Moose; use Moose::Util::TypeConstraints; enum 'RelationshipType' => qw( spelling orthographic grammatical meaning lexical - collation repetition transposition ); + collated repetition transposition ); enum 'RelationshipScope' => qw( local tradition global ); @@ -51,11 +51,13 @@ has 'scope' => ( has 'non_correctable' => ( is => 'ro', isa => 'Bool', + predicate => 'noncorr_set', ); has 'non_independent' => ( is => 'ro', isa => 'Bool', + predicate => 'nonind_set', ); # A read-only meta-Boolean attribute. diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index 2479d08..ba8b2de 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -183,7 +183,7 @@ sub add_relationship { foreach my $v ( @vectors ) { if( $self->graph->has_edge( @$v ) ) { # Is it locally scoped? - my $rel = $self->graph->get_edge_attribute( @$v ); + my $rel = $self->graph->get_edge_attribute( @$v, 'object' ); if( $rel->nonlocal ) { # TODO I think we should not be able to get here. warn "Found conflicting relationship at @$v"; @@ -262,20 +262,33 @@ sub related_readings { $reading = $reading->id; $return_object = 1; } - my @related = $self->graph->all_reachable( $reading ); + my @answer; if( $colocated ) { - my @colo; - foreach my $r ( @related ) { - my $obj = $self->graph->get_edge_attribute( $reading, $r, 'object' ); - push( @colo, $r ) if $obj->colocated; + my %found = ( $reading => 1 ); + my $check = [ $reading ]; + my $iter = 0; + while( @$check ) { + $DB::single = 1 if $iter++ > 100; + my $more = []; + foreach my $r ( @$check ) { + foreach my $nr ( $self->graph->neighbors( $r ) ) { + if( $self->graph->get_edge_attribute( $r, $nr, 'object' )->colocated ) { + push( @$more, $nr ) unless exists $found{$nr}; + $found{$nr} = 1; + } + } + } + $check = $more; } - @related = @colo; + @answer = keys %found; + } else { + @answer = $self->graph->all_reachable( $reading ); } if( $return_object ) { my $c = $self->collation; - return map { $c->reading( $_ ) } @related; + return map { $c->reading( $_ ) } @answer; } else { - return @related; + return @answer; } } @@ -311,8 +324,55 @@ sub merge_readings { $self->delete_reading( $deleted ); } -sub as_graphml { ## TODO - return; +sub as_graphml { + my( $self, $graphml_ns, $xmlroot, $node_hash, $edge_keys ) = @_; + + my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' ); + $rgraph->setAttribute( 'edgedefault', 'directed' ); + $rgraph->setAttribute( 'id', 'relationships', ); + $rgraph->setAttribute( 'parse.edgeids', 'canonical' ); + $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) ); + $rgraph->setAttribute( 'parse.nodeids', 'canonical' ); + $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) ); + $rgraph->setAttribute( 'parse.order', 'nodesfirst' ); + + # Add the vertices according to their XML IDs + foreach my $n ( sort _by_xmlid values( %$node_hash ) ) { + my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' ); + $n_el->setAttribute( 'id', $n ); + } + + # Add the relationship edges, with their object information + my $edge_ctr = 0; + foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) { + # Add an edge and fill in its relationship info. + my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' ); + $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} ); + $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} ); + $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ ); + + my $rel_obj = $self->graph->get_edge_attribute( @$e, 'object' ); + _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type ); + _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope ); + _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, + $rel_obj->non_correctable ) if $rel_obj->noncorr_set; + _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, + $rel_obj->non_independent ) if $rel_obj->nonind_set; + } +} + +sub _by_xmlid { + $a =~ s/\D//g; + $b =~ s/\D//g; + return $a <=> $b; +} + +sub _add_graphml_data { + my( $el, $key, $value ) = @_; + return unless defined $value; + my $data_el = $el->addNewChild( $el->namespaceURI, 'data' ); + $data_el->setAttribute( 'key', $key ); + $data_el->appendText( $value ); } no Moose; diff --git a/lib/Text/Tradition/Parser/TEI.pm b/lib/Text/Tradition/Parser/TEI.pm index 2fb91d2..65de99e 100644 --- a/lib/Text/Tradition/Parser/TEI.pm +++ b/lib/Text/Tradition/Parser/TEI.pm @@ -160,7 +160,6 @@ sub parse { } # See if we need to make an a.c. version of the witness. if( exists $app_ac->{$sig} ) { - $DB::single = 1; my @uncorrected; push( @uncorrected, @real_sequence ); foreach my $app ( keys %{$app_ac->{$sig}} ) {