From: Tara L Andrews Date: Mon, 26 Dec 2011 21:04:24 +0000 (+0100) Subject: make Self module parse old and new graphml X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=255875b8b5d34bbb40eb4b101657e4ca0ac256d3;p=scpubgit%2Fstemmatology.git make Self module parse old and new graphml --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 996e7fe..8609c96 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -487,7 +487,7 @@ sub as_graphml { my $ndi = 0; my %node_data = ( id => 'string', - reading => 'string', + text => 'string', rank => 'string', is_start => 'boolean', is_end => 'boolean', @@ -547,10 +547,11 @@ sub as_graphml { my $node_xmlid = 'n' . $node_ctr++; $node_hash{ $n->id } = $node_xmlid; $node_el->setAttribute( 'id', $node_xmlid ); - _add_graphml_data( $node_el, $node_data_keys{'id'}, $n->id ); - _add_graphml_data( $node_el, $node_data_keys{'reading'}, $n->text ); - _add_graphml_data( $node_el, $node_data_keys{'rank'}, $n->rank ) - if $n->has_rank; + foreach my $d ( keys %node_data ) { + my $nval = $n->$d; + _add_graphml_data( $node_el, $node_data_keys{$d}, $nval ) + if defined $nval; + } } # Add the path edges diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index 7bc2c6d..62f9f1f 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -109,7 +109,7 @@ my $t = Text::Tradition->new( is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); if( $t ) { is( scalar $t->collation->readings, 319, "Collation has all readings" ); - is( scalar $t->collation->paths, 2854, "Collation has all paths" ); + is( scalar $t->collation->paths, 376, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } @@ -118,9 +118,13 @@ if( $t ) { =cut my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY, - $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY ) - = qw/ name reading identical rank class - source target witness extra relationship/; + $START_KEY, $END_KEY, $LACUNA_KEY, + $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY, + $COLO_KEY, $CORRECT_KEY, $INDEP_KEY ) + = qw/ name reading identical rank class + is_start is_end is_lacuna + source target witness extra relationship + equal_rank non_correctable non_independent /; sub parse { my( $tradition, $opts ) = @_; @@ -131,11 +135,23 @@ sub parse { # Set up the graph-global attributes. They will appear in the # hash under their accessor names. + my $use_version; print STDERR "Setting graph globals\n"; $tradition->name( $graph_data->{'name'} ); - foreach my $gkey ( keys %{$graph_data->{'attr'}} ) { - my $val = $graph_data->{'attr'}->{$gkey}; - $collation->$gkey( $val ); + $DB::single = 1; + foreach my $gkey ( keys %{$graph_data->{'global'}} ) { + my $val = $graph_data->{'global'}->{$gkey}; + if( $gkey eq 'version' ) { + $use_version = $val; + } else { + $collation->$gkey( $val ); + } + } + if( $use_version ) { + # Many of our tags changed. + $IDKEY = 'id'; + $TOKENKEY = 'text'; + $COLO_KEY = 'colocated'; } # Add the nodes to the graph. @@ -144,28 +160,36 @@ sub parse { # after the nodes & edges are created. print STDERR "Adding graph nodes\n"; foreach my $n ( @{$graph_data->{'nodes'}} ) { + # If it is the start or end node, we already have one, so skip it. + next if defined $n->{$START_KEY} || defined $n->{$END_KEY}; + # First extract the data that we can use without reference to # anything else. my %node_data = %$n; # Need $n itself untouched for edge processing - my $nodeid = delete $node_data{$IDKEY}; - my $reading = delete $node_data{$TOKENKEY}; - my $class = delete $node_data{$CLASS_KEY} || ''; - my $rank = delete $node_data{$RANK_KEY}; - # Create the node. Current valid classes are common and meta. - # Everything else is a normal reading. - my $gnode = $collation->add_reading( $nodeid ); - $gnode->text( $reading ); - $gnode->make_common if $class eq 'common'; - $gnode->is_meta( 1 ) if $class eq 'meta'; - # This is a horrible hack. - $gnode->is_lacuna( $reading =~ /^\#LACUNA/ ); - $gnode->rank( $rank ) if defined $rank; + # Create the node. + my $reading_options = { + 'id' => delete $node_data{$IDKEY}, + 'is_lacuna' => delete $node_data{$LACUNA_KEY}, + }; + my $rank = delete $node_data{$RANK_KEY}; + $reading_options->{'rank'} = $rank if $rank; + my $text = delete $node_data{$TOKENKEY}; + $reading_options->{'text'} = $text if $text; + + # This is a horrible hack for backwards compatibility. + unless( $use_version ) { + $reading_options->{'is_lacuna'} = 1 + if $reading_options->{'text'} =~ /^\#LACUNA/; + } + + delete $node_data{$CLASS_KEY}; # Not actually used + my $gnode = $collation->add_reading( $reading_options ); # Now save the data that we need for post-processing, - # if it exists. + # if it exists. TODO this is unneeded after conversion if ( keys %node_data ) { - $extra_data->{$nodeid} = \%node_data + $extra_data->{$gnode->id} = \%node_data } } @@ -193,12 +217,12 @@ sub parse { } elsif( $class eq 'relationship' ) { # We need the metadata about the relationship. my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} }; - $opts->{'equal_rank'} = $e->{'equal_rank'} - if exists $e->{'equal_rank'}; - $opts->{'non_correctable'} = $e->{'non_correctable'} - if exists $e->{'non_correctable'}; - $opts->{'non_independent'} = $e->{'non_independent'} - if exists $e->{'non_independent'}; + $opts->{$COLO_KEY} = $e->{$COLO_KEY} + if exists $e->{$COLO_KEY}; + $opts->{$CORRECT_KEY} = $e->{$CORRECT_KEY} + if exists $e->{$CORRECT_KEY}; + $opts->{$INDEP_KEY} = $e->{$INDEP_KEY} + if exists $e->{$INDEP_KEY}; warn "No relationship type for relationship edge!" unless $opts->{'type'}; my( $ok, @result ) = $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts ); unless( $ok ) { @@ -209,17 +233,20 @@ sub parse { ## Deal with node information (transposition, relationships, etc.) that ## needs to be processed after all the nodes are created. - print STDERR "Adding second-pass node data\n"; - foreach my $nkey ( keys %$extra_data ) { - foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { - my $this_reading = $collation->reading( $nkey ); - if( $edkey eq $TRANSPOS_KEY ) { - my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); - $this_reading->set_identical( $other_reading ); - } else { - warn "Unfamiliar reading node data $edkey for $nkey"; - } - } + ## TODO unneeded after conversion + unless( $use_version ) { + print STDERR "Adding second-pass node data\n"; + foreach my $nkey ( keys %$extra_data ) { + foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { + my $this_reading = $collation->reading( $nkey ); + if( $edkey eq $TRANSPOS_KEY ) { + my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); + $this_reading->set_identical( $other_reading ); + } else { + warn "Unfamiliar reading node data $edkey for $nkey"; + } + } + } } } diff --git a/t/text_tradition_parser_self.t b/t/text_tradition_parser_self.t index c8a9bc0..997b5ec 100644 --- a/t/text_tradition_parser_self.t +++ b/t/text_tradition_parser_self.t @@ -23,7 +23,7 @@ my $t = Text::Tradition->new( is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); if( $t ) { is( scalar $t->collation->readings, 319, "Collation has all readings" ); - is( scalar $t->collation->paths, 2854, "Collation has all paths" ); + is( scalar $t->collation->paths, 376, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } }