=cut
-my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $POSITION_KEY )
- = qw/ name reading identical position /;
+# TODO share these with Collation.pm somehow
+my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
+ $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY )
+ = qw/ name reading identical rank class
+ source target witness extra relationship/;
sub parse {
my( $tradition, $graphml_str ) = @_;
+
+ # TODO this is begging for stream parsing instead of multiple loops.
my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str );
my $collation = $tradition->collation;
my %witnesses;
-
+
+ # Set up the graph-global attributes. They will appear in the
+ # hash under their accessor names.
+ # TODO Consider simplifying this for nodes & edges as well.
+ print STDERR "Setting graph globals\n";
+ foreach my $gkey ( keys %{$graph_data->{'attr'}} ) {
+ my $val = $graph_data->{'attr'}->{$gkey};
+ $collation->$gkey( $val );
+ }
+
# Add the nodes to the graph.
+ # TODO Are we adding extra start/end nodes?
my $extra_data = {}; # Keep track of data that needs to be processed
# after the nodes & edges are created.
+ print STDERR "Adding graph nodes\n";
foreach my $n ( @{$graph_data->{'nodes'}} ) {
- # Could use a better way of registering these
- my %node_data = %$n;
- my $nodeid = delete $node_data{$IDKEY};
- my $reading = delete $node_data{$TOKENKEY};
- my $gnode = $collation->add_reading( $nodeid );
- $gnode->text( $reading );
-
- # Now save the rest of the data, i.e. not the ID or label,
- # if it exists.
- if ( keys %node_data ) {
- $extra_data->{$nodeid} = \%node_data;
- }
+ # First extract the data that we can use without reference to
+ # anything else.
+ my %node_data = %$n; # Need $n itself untouched for edge processing
+ my $nodeid = delete $node_data{$IDKEY};
+ my $reading = delete $node_data{$TOKENKEY};
+ my $class = delete $node_data{$CLASS_KEY} || '';
+ my $rank = delete $node_data{$RANK_KEY};
+
+ # Create the node. Current valid classes are common and meta.
+ # Everything else is a normal reading.
+ my $gnode = $collation->add_reading( $nodeid );
+ $gnode->text( $reading );
+ $gnode->make_common if $class eq 'common';
+ $gnode->is_meta( 1 ) if $class eq 'meta';
+ $gnode->rank( $rank ) if defined $rank;
+
+ # Now save the data that we need for post-processing,
+ # if it exists.
+ if ( keys %node_data ) {
+ $extra_data->{$nodeid} = \%node_data
+ }
}
-
+
# Now add the edges.
+ print STDERR "Adding graph edges\n";
foreach my $e ( @{$graph_data->{'edges'}} ) {
- my %edge_data = %$e;
- my $from = delete $edge_data{'source'};
- my $to = delete $edge_data{'target'};
-
- # Whatever is left tells us what kind of edge it is.
- foreach my $wkey ( keys %edge_data ) {
- if( $wkey =~ /^witness/ ) {
- my $wit = $edge_data{$wkey};
- unless( $witnesses{$wit} ) {
- $tradition->add_witness( sigil => $wit );
- $witnesses{$wit} = 1;
- }
- my $label = $wkey eq 'witness_ante_corr'
- ? $wit . $collation->ac_label : $wit;
- $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
- } else {
- my $rel = $edge_data{$wkey};
- # TODO handle global relationships
- $collation->add_relationship( $rel, $from->{$IDKEY}, $to->{$IDKEY} );
- }
- }
+ my $from = $e->{$SOURCE_KEY};
+ my $to = $e->{$TARGET_KEY};
+ my $class = $e->{$CLASS_KEY};
+
+ # We may have more information depending on the class.
+ if( $class eq 'path' ) {
+ # We need the witness, and whether it is an 'extra' reading path.
+ my $wit = $e->{$WITNESS_KEY};
+ warn "No witness label on path edge!" unless $wit;
+ my $extra = $e->{$EXTRA_KEY};
+ my $label = $wit . ( $extra ? $collation->ac_label : '' );
+ $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
+ # Add the witness if we don't have it already.
+ unless( $witnesses{$wit} ) {
+ $tradition->add_witness( sigil => $wit );
+ $witnesses{$wit} = 1;
+ }
+ $witnesses{$wit} = 2 if $extra;
+ } elsif( $class eq 'relationship' ) {
+ # We need the metadata about the relationship.
+ my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
+ $opts->{'equal_rank'} = $e->{'equal_rank'}
+ if exists $e->{'equal_rank'};
+ $opts->{'non_correctable'} = $e->{'non_correctable'}
+ if exists $e->{'non_correctable'};
+ $opts->{'non_independent'} = $e->{'non_independent'}
+ if exists $e->{'non_independent'};
+ warn "No relationship type for relationship edge!" unless $opts->{'type'};
+ $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
+ }
}
## Deal with node information (transposition, relationships, etc.) that
## needs to be processed after all the nodes are created.
+ print STDERR "Adding second-pass node data\n";
foreach my $nkey ( keys %$extra_data ) {
- foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
- my $this_reading = $collation->reading( $nkey );
- if( $edkey eq $TRANSPOS_KEY ) {
- my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
- if( $collation->linear ) {
- $this_reading->set_identical( $other_reading );
- } else {
- $collation->merge_readings( $other_reading, $this_reading );
- }
- } elsif ( $edkey eq $POSITION_KEY ) {
- $this_reading->position( $extra_data->{$nkey}->{$edkey} );
- } else {
- warn "Unfamiliar reading node data $edkey for $nkey";
- }
- }
+ foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
+ my $this_reading = $collation->reading( $nkey );
+ if( $edkey eq $TRANSPOS_KEY ) {
+ my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
+ $this_reading->set_identical( $other_reading );
+ } else {
+ warn "Unfamiliar reading node data $edkey for $nkey";
+ }
+ }
+ }
+
+ # Set the $witness->path arrays for each wit.
+ print STDERR "Walking paths for witnesses\n";
+ foreach my $wit ( $tradition->witnesses ) {
+ my @path = $collation->reading_sequence( $collation->start, $collation->end,
+ $wit->sigil );
+ $wit->path( \@path );
+ if( $witnesses{$wit->sigil} == 2 ) {
+ # Get the uncorrected path too
+ my @uc = $collation->reading_sequence( $collation->start, $collation->end,
+ $wit->sigil . $collation->ac_label, $wit->sigil );
+ $wit->uncorrected_path( \@uc );
+ }
}
-
- # We know what the beginning and ending nodes are, no need to
- # search or reset.
- my $end_node = $collation->reading( '#END#' );
- $DB::single = 1;
- # Walk the paths and make reading sequences for our witnesses.
- $collation->walk_witness_paths( $end_node );
}
=back