}
# Add the relationship graph to the XML
- $self->relations->as_graphml( $root, $graphml_ns, \%node_hash, \%edge_data_keys );
+ $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
+ $node_data_keys{'id'}, \%edge_data_keys );
# Save and return the thing
my $result = decode_utf8( $graphml->toString(1) );
=head1 NAME
-Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
+Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
+between readings in a given collation
=head1 DESCRIPTION
if( $self->graph->has_edge( $source, $target ) ) {
$rel = $self->graph->get_edge_attribute( $source, $target, 'object' );
if( $rel->type ne $options->type ) {
- warn "Relationship of type " . $rel->type
+ warn "Another relationship of type " . $rel->type
. "already exists between $source and $target";
return;
} else {
}
sub as_graphml {
- my( $self, $graphml_ns, $xmlroot, $node_hash, $edge_keys ) = @_;
+ my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
$rgraph->setAttribute( 'edgedefault', 'directed' );
$rgraph->setAttribute( 'parse.order', 'nodesfirst' );
# Add the vertices according to their XML IDs
- foreach my $n ( sort _by_xmlid values( %$node_hash ) ) {
+ my %rdg_lookup = ( reverse %$node_hash );
+ foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
$n_el->setAttribute( 'id', $n );
+ _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
}
# Add the relationship edges, with their object information
}
sub _by_xmlid {
- $a =~ s/\D//g;
- $b =~ s/\D//g;
- return $a <=> $b;
+ my $tmp_a = $a;
+ my $tmp_b = $b;
+ $tmp_a =~ s/\D//g;
+ $tmp_b =~ s/\D//g;
+ return $tmp_a <=> $tmp_b;
}
sub _add_graphml_data {
sub parse {
my( $tradition, $opts ) = @_;
- my $graph_data = graphml_parse( $opts );
+ my( $graph_data ) = graphml_parse( $opts );
my $collation = $tradition->collation;
# First add the readings to the graph.
sub graphml_parse {
my( $opts ) = @_;
- my $graph_hash = { 'nodes' => [],
- 'edges' => [] };
-
my $parser = XML::LibXML->new();
my $doc;
if( exists $opts->{'string'} ) {
return;
}
- my( $graphattr, $nodedata, $witnesses ) = ( {}, {}, {} );
+ my( $graphattr, $nodedata, $edgedata ) = ( {}, {}, {} );
my $graphml = $doc->documentElement();
$xpc = XML::LibXML::XPathContext->new( $graphml );
$xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' );
- # First get the ID keys, for witnesses and for collation data
+ # First get the ID keys, for node/edge data and for collation data
foreach my $k ( $xpc->findnodes( '//g:key' ) ) {
- # Each key has a 'for' attribute; the edge keys are witnesses, and
- # the node keys contain an ID and string for each node.
+ # Each key has a 'for' attribute to say whether it is for graph,
+ # node, or edge.
my $keyid = $k->getAttribute( 'id' );
my $keyname = $k->getAttribute( 'attr.name' );
} elsif( $dtype eq 'node' ) {
$nodedata->{$keyid} = $keyname;
} else {
- $witnesses->{$keyid} = $keyname;
+ $edgedata->{$keyid} = $keyname;
}
}
- my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0];
- $graph_hash->{'name'} = $graph_el->getAttribute( 'id' );
-
- my $node_reg = {};
-
- # Read in graph globals (if any).
- # print STDERR "Reading graphml global data\n";
- foreach my $dkey ( keys %$graphattr ) {
- my $keyname = $graphattr->{$dkey};
- my $keyvalue = _lookup_node_data( $graph_el, $dkey );
- $graph_hash->{'global'}->{$keyname} = $keyvalue;
- }
-
- # Add the nodes to the graph hash.
- # print STDERR "Reading graphml nodes\n";
- my @nodes = $xpc->findnodes( '//g:node' );
- foreach my $n ( @nodes ) {
- # Could use a better way of registering these
- my $node_hash = {};
- foreach my $dkey ( keys %$nodedata ) {
- my $keyname = $nodedata->{$dkey};
- my $keyvalue = _lookup_node_data( $n, $dkey );
- $node_hash->{$keyname} = $keyvalue if defined $keyvalue;
- }
- $node_reg->{$n->getAttribute( 'id' )} = $node_hash;
- push( @{$graph_hash->{'nodes'}}, $node_hash );
- }
-
- # Now add the edges, and cross-ref with the node objects.
- # print STDERR "Reading graphml edges\n";
- my @edges = $xpc->findnodes( '//g:edge' );
- foreach my $e ( @edges ) {
- my $from = $e->getAttribute('source');
- my $to = $e->getAttribute('target');
-
- # We don't know whether the edge data is one per witness
- # or one per witness type, or something else. So we just
- # save it and let our calling parser decide.
- my $edge_hash = {
- 'source' => $node_reg->{$from},
- 'target' => $node_reg->{$to},
- };
- foreach my $wkey( keys %$witnesses ) {
- my $wname = $witnesses->{$wkey};
- my $wlabel = _lookup_node_data( $e, $wkey );
- $edge_hash->{$wname} = $wlabel if $wlabel;
- }
- push( @{$graph_hash->{'edges'}}, $edge_hash );
+ my @returned_graphs;
+ foreach my $graph_el ( $xpc->findnodes( '/g:graphml/g:graph' ) ) {
+ my $graph_hash = { 'nodes' => [],
+ 'edges' => [],
+ 'name' => $graph_el->getAttribute( 'id' ) };
+
+ my $node_reg = {};
+
+ # Read in graph globals (if any).
+ # print STDERR "Reading graphml global data\n";
+ foreach my $dkey ( keys %$graphattr ) {
+ my $keyname = $graphattr->{$dkey};
+ my $keyvalue = _lookup_node_data( $graph_el, $dkey );
+ $graph_hash->{'global'}->{$keyname} = $keyvalue if defined $keyvalue;
+ }
+
+ # Add the nodes to the graph hash.
+ # print STDERR "Reading graphml nodes\n";
+ my @nodes = $xpc->findnodes( './/g:node', $graph_el );
+ foreach my $n ( @nodes ) {
+ # Could use a better way of registering these
+ my $node_hash = {};
+ foreach my $dkey ( keys %$nodedata ) {
+ my $keyname = $nodedata->{$dkey};
+ my $keyvalue = _lookup_node_data( $n, $dkey );
+ $node_hash->{$keyname} = $keyvalue if defined $keyvalue;
+ }
+ $node_reg->{$n->getAttribute( 'id' )} = $node_hash;
+ push( @{$graph_hash->{'nodes'}}, $node_hash );
+ }
+
+ # Now add the edges, and cross-ref with the node objects.
+ # print STDERR "Reading graphml edges\n";
+ my @edges = $xpc->findnodes( './/g:edge', $graph_el );
+ foreach my $e ( @edges ) {
+ my $from = $e->getAttribute('source');
+ my $to = $e->getAttribute('target');
+
+ # We don't know whether the edge data is one per witness
+ # or one per witness type, or something else. So we just
+ # save it and let our calling parser decide.
+ my $edge_hash = {
+ 'source' => $node_reg->{$from},
+ 'target' => $node_reg->{$to},
+ };
+ foreach my $wkey( keys %$edgedata ) {
+ my $wname = $edgedata->{$wkey};
+ my $wlabel = _lookup_node_data( $e, $wkey );
+ $edge_hash->{$wname} = $wlabel if $wlabel;
+ }
+ push( @{$graph_hash->{'edges'}}, $edge_hash );
+ }
+ push( @returned_graphs, $graph_hash );
}
- return $graph_hash;
+ return @returned_graphs;
}
=cut
-my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
+my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY,
$START_KEY, $END_KEY, $LACUNA_KEY,
$SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY,
- $COLO_KEY, $CORRECT_KEY, $INDEP_KEY )
- = qw/ name reading identical rank class
+ $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY )
+ = qw/ id text identical rank
is_start is_end is_lacuna
source target witness extra relationship
- equal_rank non_correctable non_independent /;
+ scope non_correctable non_independent /;
sub parse {
my( $tradition, $opts ) = @_;
- my $graph_data = graphml_parse( $opts );
+
+ # Collation data is in the first graph; relationship-specific stuff
+ # is in the second.
+ my( $graph_data, $rel_data ) = graphml_parse( $opts );
my $collation = $tradition->collation;
my %witnesses;
- # Set up the graph-global attributes. They will appear in the
- # hash under their accessor names.
- my $use_version;
# print STDERR "Setting graph globals\n";
$tradition->name( $graph_data->{'name'} );
+ my $use_version;
foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
my $val = $graph_data->{'global'}->{$gkey};
if( $gkey eq 'version' ) {
$collation->$gkey( $val );
}
}
- if( $use_version ) {
- # Many of our tags changed.
- $IDKEY = 'id';
- $TOKENKEY = 'text';
- $COLO_KEY = 'colocated';
- }
# Add the nodes to the graph.
- my $extra_data = {}; # Keep track of data that needs to be processed
- # after the nodes & edges are created.
# print STDERR "Adding graph nodes\n";
- foreach my $n ( @{$graph_data->{'nodes'}} ) {
- unless( $use_version ) {
- # Backwards compat!
- $n->{$START_KEY} = 1 if $n->{$IDKEY} eq '#START#';
- $n->{$END_KEY} = 1 if $n->{$IDKEY} eq '#END#';
- }
-
+ foreach my $n ( @{$graph_data->{'nodes'}} ) {
# If it is the start or end node, we already have one, so
# grab the rank and go.
next if( defined $n->{$START_KEY} );
# First extract the data that we can use without reference to
# anything else.
- my %node_data = %$n; # Need $n itself untouched for edge processing
# Create the node.
my $reading_options = {
- 'id' => delete $node_data{$IDKEY},
- 'is_lacuna' => delete $node_data{$LACUNA_KEY},
+ 'id' => $n->{$IDKEY},
+ 'is_lacuna' => $n->{$LACUNA_KEY},
};
- my $rank = delete $node_data{$RANK_KEY};
+ my $rank = $n->{$RANK_KEY};
$reading_options->{'rank'} = $rank if $rank;
- my $text = delete $node_data{$TOKENKEY};
+ my $text = $n->{$TOKENKEY};
$reading_options->{'text'} = $text if $text;
- # This is a horrible hack for backwards compatibility.
- unless( $use_version ) {
- $reading_options->{'is_lacuna'} = 1
- if $reading_options->{'text'} =~ /^\#LACUNA/;
- }
-
- delete $node_data{$CLASS_KEY}; # Not actually used
my $gnode = $collation->add_reading( $reading_options );
-
- # Now save the data that we need for post-processing,
- # if it exists. TODO this is unneeded after conversion
- if ( keys %node_data ) {
- $extra_data->{$gnode->id} = \%node_data
- }
}
# Now add the edges.
foreach my $e ( @{$graph_data->{'edges'}} ) {
my $from = $e->{$SOURCE_KEY};
my $to = $e->{$TARGET_KEY};
- my $class = $e->{$CLASS_KEY} || 'path';
-
- # We may have more information depending on the class.
- if( $class eq 'path' ) {
- # We need the witness, and whether it is an 'extra' reading path.
- my $wit = $e->{$WITNESS_KEY};
- warn "No witness label on path edge!" unless $wit;
- my $extra = $e->{$EXTRA_KEY};
- my $label = $wit . ( $extra ? $collation->ac_label : '' );
- $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
- # Add the witness if we don't have it already.
- unless( $witnesses{$wit} ) {
- $tradition->add_witness( sigil => $wit );
- $witnesses{$wit} = 1;
- }
- $tradition->witness( $wit )->is_layered( 1 ) if $extra;
- } elsif( $class eq 'relationship' ) {
- # We need the metadata about the relationship.
- my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
- $opts->{$COLO_KEY} = $e->{$COLO_KEY}
- if exists $e->{$COLO_KEY};
- $opts->{$CORRECT_KEY} = $e->{$CORRECT_KEY}
- if exists $e->{$CORRECT_KEY};
- $opts->{$INDEP_KEY} = $e->{$INDEP_KEY}
- if exists $e->{$INDEP_KEY};
- warn "No relationship type for relationship edge!" unless $opts->{'type'};
- my( $ok, @result ) = $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
- unless( $ok ) {
- my $relinfo = $opts->{'type'} . ' '
- . join( ' -> ', $from->{$IDKEY}, $to->{$IDKEY} );
- warn "Did not add relationship $relinfo: @result";
- }
- }
- }
- ## Deal with node information (transposition, relationships, etc.) that
- ## needs to be processed after all the nodes are created.
- ## TODO unneeded after conversion
- unless( $use_version ) {
- # print STDERR "Adding second-pass node data\n";
- foreach my $nkey ( keys %$extra_data ) {
- foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
- my $this_reading = $collation->reading( $nkey );
- if( $edkey eq $TRANSPOS_KEY ) {
- my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
- $this_reading->set_identical( $other_reading );
- } else {
- warn "Unfamiliar reading node data $edkey for $nkey";
- }
- }
+ # We need the witness, and whether it is an 'extra' reading path.
+ my $wit = $e->{$WITNESS_KEY};
+ warn "No witness label on path edge!" unless $wit;
+ my $extra = $e->{$EXTRA_KEY};
+ my $label = $wit . ( $extra ? $collation->ac_label : '' );
+ $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
+ # Add the witness if we don't have it already.
+ unless( $witnesses{$wit} ) {
+ $tradition->add_witness( sigil => $wit );
+ $witnesses{$wit} = 1;
}
+ $tradition->witness( $wit )->is_layered( 1 ) if $extra;
}
+
+ ## Done with the main graph, now look at the relationships.
+ # Nodes are added via the call to add_reading above. We only need
+ # add the relationships themselves.
+ # TODO check that scoping does trt
+ foreach my $e ( @{$rel_data->{'edges'}} ) {
+ my $from = $e->{$SOURCE_KEY};
+ my $to = $e->{$TARGET_KEY};
+ my $relationship_opts = {
+ 'type' => $e->{$RELATIONSHIP_KEY},
+ 'scope' => $e->{$SCOPE_KEY},
+ };
+ $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY}
+ if exists $e->{$CORRECT_KEY};
+ $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY}
+ if exists $e->{$INDEP_KEY};
+ $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY},
+ $relationship_opts );
+ }
}
1;