From: Tara L Andrews Date: Sun, 4 Mar 2012 13:37:57 +0000 (+0100) Subject: refactor GraphML write/parse to use Moose introspection X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=scpubgit%2Fstemmatology.git;a=commitdiff_plain;h=bbd064a9a0899c1ca82b094ecd81a11b312a3ed9 refactor GraphML write/parse to use Moose introspection --- diff --git a/lib/Text/Tradition.pm b/lib/Text/Tradition.pm index 20f33e1..0bbc76b 100644 --- a/lib/Text/Tradition.pm +++ b/lib/Text/Tradition.pm @@ -35,7 +35,7 @@ has 'name' => ( ); has 'language' => ( - is => 'ro', + is => 'rw', isa => 'Str', ); diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 8344288..42a3d3b 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -864,61 +864,90 @@ sub as_graphml { $graphml->setDocumentElement( $root ); $root->setNamespace( $xsi_ns, 'xsi', 0 ); $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); + + # List of attribute types to save on our objects and their corresponding + # GraphML types + my %save_types = ( + 'Str' => 'string', + 'Int' => 'int', + 'Bool' => 'boolean', + 'RelationshipType' => 'string', + 'RelationshipScope' => 'string', + ); + + # List of attribute names *not* to save on our objects. + # We will also not save any attribute beginning with _. + my %skipsave; + map { $skipsave{$_} = 1 } qw/ cached_svg /; - # Add the data keys for the graph + # Add the data keys for the graph. Include an extra key 'version' for the + # GraphML output version. my %graph_data_keys; my $gdi = 0; - my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /; - foreach my $datum ( @graph_attributes ) { + my %graph_attributes = ( 'version' => 'string' ); + # Graph attributes include those of Tradition and those of Collation. + my %gattr_from; + my $tmeta = $self->tradition->meta; + my $cmeta = $self->meta; + map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes; + map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes; + foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) { + next if $attr->name =~ /^_/; + next if $skipsave{$attr->name}; + next unless $save_types{$attr->type_constraint->name}; + $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; + } + + foreach my $datum ( sort keys %graph_attributes ) { $graph_data_keys{$datum} = 'dg'.$gdi++; my $key = $root->addNewChild( $graphml_ns, 'key' ); $key->setAttribute( 'attr.name', $datum ); - $key->setAttribute( 'attr.type', $datum eq 'linear' ? 'boolean' : 'string' ); + $key->setAttribute( 'attr.type', $graph_attributes{$datum} ); $key->setAttribute( 'for', 'graph' ); $key->setAttribute( 'id', $graph_data_keys{$datum} ); } - # Add the data keys for nodes + # Add the data keys for reading nodes + my %reading_attributes; + my $rmeta = Text::Tradition::Collation::Reading->meta; + foreach my $attr( $rmeta->get_all_attributes ) { + next if $attr->name =~ /^_/; + next if $skipsave{$attr->name}; + next unless $save_types{$attr->type_constraint->name}; + $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; + } my %node_data_keys; my $ndi = 0; - my %node_data = ( - id => 'string', - text => 'string', - rank => 'string', - is_start => 'boolean', - is_end => 'boolean', - is_lacuna => 'boolean', - is_common => 'boolean', - join_prior => 'boolean', - join_next => 'boolean', - ); - foreach my $datum ( keys %node_data ) { + foreach my $datum ( sort keys %reading_attributes ) { $node_data_keys{$datum} = 'dn'.$ndi++; my $key = $root->addNewChild( $graphml_ns, 'key' ); $key->setAttribute( 'attr.name', $datum ); - $key->setAttribute( 'attr.type', $node_data{$datum} ); + $key->setAttribute( 'attr.type', $reading_attributes{$datum} ); $key->setAttribute( 'for', 'node' ); $key->setAttribute( 'id', $node_data_keys{$datum} ); } - # Add the data keys for edges, i.e. witnesses + # Add the data keys for edges, that is, paths and relationships. Path + # data does not come from a Moose class so is here manually. my $edi = 0; my %edge_data_keys; - my %edge_data = ( - class => 'string', # Class, deprecated soon + my %edge_attributes = ( witness => 'string', # ID/label for a path - relationship => 'string', # ID/label for a relationship extra => 'boolean', # Path key - scope => 'string', # Relationship key - annotation => 'string', # Relationship key - non_correctable => 'boolean', # Relationship key - non_independent => 'boolean', # Relationship key ); - foreach my $datum ( keys %edge_data ) { + my @path_attributes = keys %edge_attributes; # track our manual additions + my $pmeta = Text::Tradition::Collation::Relationship->meta; + foreach my $attr( $pmeta->get_all_attributes ) { + next if $attr->name =~ /^_/; + next if $skipsave{$attr->name}; + next unless $save_types{$attr->type_constraint->name}; + $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; + } + foreach my $datum ( sort keys %edge_attributes ) { $edge_data_keys{$datum} = 'de'.$edi++; my $key = $root->addNewChild( $graphml_ns, 'key' ); $key->setAttribute( 'attr.name', $datum ); - $key->setAttribute( 'attr.type', $edge_data{$datum} ); + $key->setAttribute( 'attr.type', $edge_attributes{$datum} ); $key->setAttribute( 'for', 'edge' ); $key->setAttribute( 'id', $edge_data_keys{$datum} ); } @@ -934,8 +963,15 @@ sub as_graphml { $sgraph->setAttribute( 'parse.order', 'nodesfirst' ); # Collation attribute data - foreach my $datum ( @graph_attributes ) { - my $value = $datum eq 'version' ? '3.0' : $self->$datum; + foreach my $datum ( keys %graph_attributes ) { + my $value; + if( $datum eq 'version' ) { + $value = '3.1'; + } elsif( $gattr_from{$datum} eq 'Tradition' ) { + $value = $self->tradition->$datum; + } else { + $value = $self->$datum; + } _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value ); } @@ -948,7 +984,7 @@ sub as_graphml { my $node_xmlid = 'n' . $node_ctr++; $node_hash{ $n->id } = $node_xmlid; $node_el->setAttribute( 'id', $node_xmlid ); - foreach my $d ( keys %node_data ) { + foreach my $d ( keys %reading_attributes ) { my $nval = $n->$d; _add_graphml_data( $node_el, $node_data_keys{$d}, $nval ) if defined $nval; @@ -980,11 +1016,11 @@ sub as_graphml { _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel ); } _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base ); - _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' ); } } # Add the relationship graph to the XML + map { delete $edge_data_keys{$_} } @path_attributes; $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash, $node_data_keys{'id'}, \%edge_data_keys ); diff --git a/lib/Text/Tradition/Collation/Relationship.pm b/lib/Text/Tradition/Collation/Relationship.pm index 06748dc..0e20f8d 100644 --- a/lib/Text/Tradition/Collation/Relationship.pm +++ b/lib/Text/Tradition/Collation/Relationship.pm @@ -102,13 +102,11 @@ has 'annotation' => ( has 'non_correctable' => ( is => 'ro', isa => 'Bool', - predicate => 'noncorr_set', ); has 'non_independent' => ( is => 'ro', isa => 'Bool', - predicate => 'nonind_set', ); # A read-only meta-Boolean attribute. diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index d8b06e2..c9136b3 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -500,13 +500,11 @@ sub _as_graphml { $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ ); my $rel_obj = $self->get_relationship( @$e ); - _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type ); - _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope ); - _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation ); - _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, - $rel_obj->non_correctable ) if $rel_obj->noncorr_set; - _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, - $rel_obj->non_independent ) if $rel_obj->nonind_set; + foreach my $key ( keys %$edge_keys ) { + my $value = $rel_obj->$key; + _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) + if defined $value; + } } } diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index e3d6c00..bdadce2 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -106,26 +106,40 @@ my $t = Text::Tradition->new( 'file' => $tradition, ); -is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); +is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" ); if( $t ) { is( scalar $t->collation->readings, 319, "Collation has all readings" ); is( scalar $t->collation->paths, 376, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } +# TODO add a relationship, write graphml, reparse it, check that the rel +# is still there +$t->language('Greek'); +$t->collation->add_relationship( 'w12', 'w13', + { 'type' => 'grammatical', 'scope' => 'global', + 'annotation' => 'This is some note' } ); +ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" ); +my $graphml_str = $t->collation->as_graphml; + +my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); +is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" ); +if( $newt ) { + is( scalar $newt->collation->readings, 319, "Collation has all readings" ); + is( scalar $newt->collation->paths, 376, "Collation has all paths" ); + is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); + is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); + is( $newt->language, 'Greek', "Tradition has correct language setting" ); + my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); + ok( $rel, "Found set relationship" ); + is( $rel->annotation, 'This is some note', "Relationship has its properties" ); +} + + =end testing =cut -my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, - $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY, - $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY, - $SCOPE_KEY, $ANNOTATION_KEY, $CORRECT_KEY, $INDEP_KEY ) - = qw/ id text identical rank - is_start is_end is_lacuna is_common - source target witness extra relationship - scope annotation non_correctable non_independent /; - sub parse { my( $tradition, $opts ) = @_; @@ -139,10 +153,14 @@ sub parse { # print STDERR "Setting graph globals\n"; $tradition->name( $graph_data->{'name'} ); my $use_version; + my $tmeta = $tradition->meta; + my $cmeta = $collation->meta; foreach my $gkey ( keys %{$graph_data->{'global'}} ) { my $val = $graph_data->{'global'}->{$gkey}; if( $gkey eq 'version' ) { $use_version = $val; + } elsif( $tmeta->has_attribute( $gkey ) ) { + $tradition->$gkey( $val ); } else { $collation->$gkey( $val ); } @@ -150,51 +168,34 @@ sub parse { # Add the nodes to the graph. - # print STDERR "Adding graph nodes\n"; + # print STDERR "Adding collation readings\n"; foreach my $n ( @{$graph_data->{'nodes'}} ) { # If it is the start or end node, we already have one, so # grab the rank and go. - next if( defined $n->{$START_KEY} ); - if( defined $n->{$END_KEY} ) { - $collation->end->rank( $n->{$RANK_KEY} ); + next if( defined $n->{'is_start'} ); + if( defined $n->{'is_end'} ) { + $collation->end->rank( $n->{'rank'} ); next; } - - # First extract the data that we can use without reference to - # anything else. - - # Create the node. - my $reading_options = { - 'id' => $n->{$IDKEY}, - 'is_lacuna' => $n->{$LACUNA_KEY}, - 'is_common' => $n->{$COMMON_KEY}, - }; - my $rank = $n->{$RANK_KEY}; - $reading_options->{'rank'} = $rank if $rank; - my $text = $n->{$TOKENKEY}; - $reading_options->{'text'} = $text if $text; - - my $gnode = $collation->add_reading( $reading_options ); + my $gnode = $collation->add_reading( $n ); } # Now add the edges. - # print STDERR "Adding graph edges\n"; + # print STDERR "Adding collation path edges\n"; foreach my $e ( @{$graph_data->{'edges'}} ) { - my $from = $e->{$SOURCE_KEY}; - my $to = $e->{$TARGET_KEY}; - - # We need the witness, and whether it is an 'extra' reading path. - my $wit = $e->{$WITNESS_KEY}; - warn "No witness label on path edge!" unless $wit; - my $extra = $e->{$EXTRA_KEY}; - my $label = $wit . ( $extra ? $collation->ac_label : '' ); - $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); + my $from = $collation->reading( $e->{'source'}->{'id'} ); + my $to = $collation->reading( $e->{'target'}->{'id'} ); + + warn "No witness label on path edge!" unless $e->{'witness'}; + my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' ); + $collation->add_path( $from, $to, $label ); + # Add the witness if we don't have it already. - unless( $witnesses{$wit} ) { - $tradition->add_witness( sigil => $wit ); - $witnesses{$wit} = 1; + unless( $witnesses{$e->{'witness'}} ) { + $tradition->add_witness( sigil => $e->{'witness'} ); + $witnesses{$e->{'witness'}} = 1; } - $tradition->witness( $wit )->is_layered( 1 ) if $extra; + $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'}; } ## Done with the main graph, now look at the relationships. @@ -202,35 +203,31 @@ sub parse { # add the relationships themselves. # TODO check that scoping does trt foreach my $e ( @{$rel_data->{'edges'}} ) { - my $from = $e->{$SOURCE_KEY}; - my $to = $e->{$TARGET_KEY}; - my $relationship_opts = { - 'type' => $e->{$RELATIONSHIP_KEY}, - 'scope' => $e->{$SCOPE_KEY}, - }; - $relationship_opts->{'annotation'} = $e->{$ANNOTATION_KEY} - if exists $e->{$ANNOTATION_KEY}; - $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY} - if exists $e->{$CORRECT_KEY}; - $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY} - if exists $e->{$INDEP_KEY}; - # TODO unless relationship is scoped and that scoped relationship exists... + my $from = $collation->reading( $e->{'source'}->{'id'} ); + my $to = $collation->reading( $e->{'target'}->{'id'} ); + delete $e->{'source'}; + delete $e->{'target'}; + # The remaining keys are relationship attributes. + # Backward compatibility... + if( $use_version eq '2.0' || $use_version eq '3.0' ) { + delete $e->{'class'}; + $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'}; + } + # Add the specified relationship unless we already have done. my $rel_exists; - if( $relationship_opts->{'scope'} ne 'local' ) { - my $relobj = $collation->get_relationship( $from->{$IDKEY}, $to->{$IDKEY} ); - if( $relobj && $relobj->{'scope'} eq $relationship_opts->{'scope'} - && $relobj->{'type'} eq $relationship_opts->{'type'} ) { + if( $e->{'scope'} ne 'local' ) { + my $relobj = $collation->get_relationship( $from, $to ); + if( $relobj && $relobj->scope eq $e->{'scope'} + && $relobj->type eq $e->{'type'} ) { $rel_exists = 1; } } - $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, - $relationship_opts ) unless $rel_exists; + $collation->add_relationship( $from, $to, $e ) unless $rel_exists; } # Save the text for each witness so that we can ensure consistency # later on - $tradition->collation->text_from_paths(); - + $collation->text_from_paths(); } 1; diff --git a/t/text_tradition_parser_self.t b/t/text_tradition_parser_self.t index 997b5ec..612cf1e 100644 --- a/t/text_tradition_parser_self.t +++ b/t/text_tradition_parser_self.t @@ -20,12 +20,34 @@ my $t = Text::Tradition->new( 'file' => $tradition, ); -is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); +is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" ); if( $t ) { is( scalar $t->collation->readings, 319, "Collation has all readings" ); is( scalar $t->collation->paths, 376, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } + +# TODO add a relationship, write graphml, reparse it, check that the rel +# is still there +$t->language('Greek'); +$t->collation->add_relationship( 'w12', 'w13', + { 'type' => 'grammatical', 'scope' => 'global', + 'annotation' => 'This is some note' } ); +ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" ); +my $graphml_str = $t->collation->as_graphml; + +my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); +is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" ); +if( $newt ) { + is( scalar $newt->collation->readings, 319, "Collation has all readings" ); + is( scalar $newt->collation->paths, 376, "Collation has all paths" ); + is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); + is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); + is( $newt->language, 'Greek', "Tradition has correct language setting" ); + my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); + ok( $rel, "Found set relationship" ); + is( $rel->annotation, 'This is some note', "Relationship has its properties" ); +} }