From: Tara L Andrews Date: Sun, 2 Oct 2011 20:39:01 +0000 (+0200) Subject: more work on our own graphml format X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e309421ae7b04ee2dcb71cdae2f206ffc6b6c384;p=scpubgit%2Fstemmatology.git more work on our own graphml format --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 37efe67..e0c879a 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -367,7 +367,18 @@ sub as_graphml { $root->setNamespace( $xsi_ns, 'xsi', 0 ); $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); - # TODO Add some global graph data + # Add the data keys for the graph + my %graph_data_keys; + my $gdi = 0; + my @graph_attributes = qw/ wit_list_separator baselabel linear ac_label /; + foreach my $datum ( @graph_attributes ) { + $graph_data_keys{$datum} = 'dg'.$gdi++; + my $key = $root->addNewChild( $graphml_ns, 'key' ); + $key->setAttribute( 'attr.name', $datum ); + $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' ); + $key->setAttribute( 'for', 'graph' ); + $key->setAttribute( 'id', $graph_data_keys{$datum} ); + } # Add the data keys for nodes my %node_data_keys; @@ -412,6 +423,11 @@ sub as_graphml { $graph->setAttribute( 'parse.nodeids', 'canonical' ); $graph->setAttribute( 'parse.nodes', scalar($self->readings) ); $graph->setAttribute( 'parse.order', 'nodesfirst' ); + + # Collation attribute data + foreach my $datum ( @graph_attributes ) { + _add_graphml_data( $graph, $graph_data_keys{$datum}, $self->$datum ); + } my $node_ctr = 0; my %node_hash; @@ -516,6 +532,7 @@ sub make_alignment_table { } my $table; my @all_pos = sort { $a <=> $b } $self->possible_positions; + $DB::single = 1; foreach my $wit ( $self->tradition->witnesses ) { # print STDERR "Making witness row(s) for " . $wit->sigil . "\n"; my @row = _make_witness_row( $wit->path, \@all_pos, $noderefs ); diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index be36407..27af6af 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -30,7 +30,7 @@ and their associated data. =cut -use vars qw/ $xpc $nodedata $witnesses /; +use vars qw/ $xpc $graphattr $nodedata $witnesses /; # Return graph -> nodeid -> { key1/val1, key2/val2, key3/val3 ... } # -> edgeid -> { source, target, wit1/val1, wit2/val2 ...} @@ -54,10 +54,13 @@ sub parse { my $keyid = $k->getAttribute( 'id' ); my $keyname = $k->getAttribute( 'attr.name' ); - if( $k->getAttribute( 'for' ) eq 'node' ) { - # Keep track of the XML identifiers for the data carried - # in each node element. - $nodedata->{$keyid} = $keyname + # Keep track of the XML identifiers for the data carried + # in each node element. + my $dtype = $k->getAttribute( 'for' ); + if( $dtype eq 'graph' ) { + $graphattr->{$keyid} = $keyname; + } elsif( $dtype eq 'node' ) { + $nodedata->{$keyid} = $keyname; } else { $witnesses->{$keyid} = $keyname; } @@ -66,6 +69,14 @@ sub parse { my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0]; my $node_reg = {}; + + # Read in graph globals (if any). + print STDERR "Reading graphml global data\n"; + foreach my $dkey ( keys %$graphattr ) { + my $keyname = $graphattr->{$dkey}; + my $keyvalue = _lookup_node_data( $graph_el, $dkey ); + $graph_hash->{'global'}->{$keyname} = $keyvalue; + } # Add the nodes to the graph hash. print STDERR "Reading graphml nodes\n"; diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index 2e7a0f1..97c2e55 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -41,7 +41,16 @@ sub parse { my $collation = $tradition->collation; my %witnesses; - + + # Set up the graph-global attributes. They will appear in the + # hash under their accessor names. + # TODO Consider simplifying this for nodes & edges as well. + print STDERR "Setting graph globals\n"; + foreach my $gkey ( keys %{$graph_data->{'attr'}} ) { + my $val = $graph_data->{'attr'}->{$gkey}; + $collation->$gkey( $val ); + } + # Add the nodes to the graph. # TODO Are we adding extra start/end nodes? @@ -92,6 +101,7 @@ sub parse { $tradition->add_witness( sigil => $wit ); $witnesses{$wit} = 1; } + $witnesses{$wit} = 2 if $extra; } elsif( $class eq 'relationship' ) { # We need the metadata about the relationship. my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} }; @@ -109,22 +119,31 @@ sub parse { ## Deal with node information (transposition, relationships, etc.) that ## needs to be processed after all the nodes are created. print STDERR "Adding second-pass node data\n"; - my $linear = undef; foreach my $nkey ( keys %$extra_data ) { foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { my $this_reading = $collation->reading( $nkey ); if( $edkey eq $TRANSPOS_KEY ) { my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); - # We evidently have a linear graph. - $linear = 1; $this_reading->set_identical( $other_reading ); } else { warn "Unfamiliar reading node data $edkey for $nkey"; } } } - $collation->linear( $linear ); - # TODO We probably need to set the $witness->path arrays for each wit. + + # Set the $witness->path arrays for each wit. + print STDERR "Walking paths for witnesses\n"; + foreach my $wit ( $tradition->witnesses ) { + my @path = $collation->reading_sequence( $collation->start, $collation->end, + $wit->sigil ); + $wit->path( \@path ); + if( $witnesses{$wit->sigil} == 2 ) { + # Get the uncorrected path too + my @uc = $collation->reading_sequence( $collation->start, $collation->end, + $wit->sigil . $collation->ac_label, $wit->sigil ); + $wit->uncorrected_path( \@uc ); + } + } } =back diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm index 16e2863..199490b 100644 --- a/lib/Text/Tradition/Parser/Tabular.pm +++ b/lib/Text/Tradition/Parser/Tabular.pm @@ -33,8 +33,6 @@ sub parse { my $csv = Text::CSV_XS->new( { binary => 1, # binary for UTF-8 sep_char => "\t" } ); my @lines = split( "\n", $tab_str ); - # Conveniently, we are basically receiving exactly the sort of alignment table - # we might want to produce later. May as well save it. my $alignment_table; foreach my $l ( @lines ) { my $status = $csv->parse( $l ); @@ -55,7 +53,6 @@ sub parse { # Now for the next rows, make nodes as necessary, assign their ranks, and # add them to the witness paths. - $DB::single = 1; foreach my $idx ( 1 .. $#{$alignment_table} ) { my $row = $alignment_table->[$idx]; my $nodes = make_nodes( $c, $row, $idx ); @@ -100,11 +97,6 @@ sub parse { # Join up the paths. $c->make_witness_paths; - - # Save the alignment table that was so handily provided to us. - # TODO if we support other delimiters, we will have to re-export this - # rather than saving the original string. - $c->_save_csv( $tab_str ); } sub make_nodes {