From: Tara L Andrews Date: Mon, 26 Dec 2011 20:37:50 +0000 (+0100) Subject: make tabular parse test work X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1d3104950074a3d7470f01ef0ec8e9046d95b124;p=scpubgit%2Fstemmatology.git make tabular parse test work --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 3214cdc..996e7fe 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -275,7 +275,7 @@ sub add_relationship { if( $self->relations->has_edge( $source, $target ) ) { return ( undef, "Relationship already exists between these readings" ); } - if( $options->{'colocated'} && !$self->relationship_valid( $source, $target ) ) { + if( !$self->relationship_valid( $source, $target, $options->{'type'} ) ) { return ( undef, 'Relationship creates witness loop' ); } @@ -289,25 +289,49 @@ sub add_relationship { } sub relationship_valid { - my( $self, $source, $target ) = @_; - # Check that linking the source and target in a relationship won't lead - # to a path loop for any witness. First make a lookup table of all the - # readings related to either the source or the target. - my @proposed_related = ( $source, $target ); - push( @proposed_related, $source->related_readings( 'colocated' ) ); - push( @proposed_related, $target->related_readings( 'colocated' ) ); - my %pr_ids; - map { $pr_ids{ $_->id } = 1 } @proposed_related; - - # None of these proposed related readings should have a neighbor that - # is also in proposed_related. - foreach my $pr ( keys %pr_ids ) { - foreach my $neighbor( $self->sequence->neighbors( $pr ) ) { - return 0 if exists $pr_ids{$neighbor}; - } - } - - return 1; + my( $self, $source, $target, $rel ) = @_; + if( $rel eq 'repetition' ) { + return 1; + } elsif ( $rel eq 'transposition' ) { + # Check that the two readings do not appear in the same witness. + my %seen_wits; + map { $seen_wits{$_} = 1 } $self->reading_witnesses( $source ); + foreach my $w ( $self->reading_witnesses( $target ) ) { + return 0 if $seen_wits{$w}; + } + return 1; + } else { + # Check that linking the source and target in a relationship won't lead + # to a path loop for any witness. First make a lookup table of all the + # readings related to either the source or the target. + my @proposed_related = ( $source, $target ); + push( @proposed_related, $source->related_readings( 'colocated' ) ); + push( @proposed_related, $target->related_readings( 'colocated' ) ); + my %pr_ids; + map { $pr_ids{ $_->id } = 1 } @proposed_related; + + # None of these proposed related readings should have a neighbor that + # is also in proposed_related. + foreach my $pr ( keys %pr_ids ) { + foreach my $neighbor( $self->sequence->neighbors( $pr ) ) { + return 0 if exists $pr_ids{$neighbor}; + } + } + return 1; + } +} + +# Return a list of the witnesses in which the reading appears. +sub reading_witnesses { + my( $self, $reading ) = @_; + # We need only check either the incoming or the outgoing edges; I have + # arbitrarily chosen "incoming". + my %all_witnesses; + foreach my $e ( $self->sequence->edges_to( $reading ) ) { + my $wits = $self->sequence->get_edge_attributes( @$e ); + @all_witnesses{ keys %$wits } = 1; + } + return keys %all_witnesses; } sub related_readings { @@ -448,7 +472,7 @@ sub as_graphml { # Add the data keys for the graph my %graph_data_keys; my $gdi = 0; - my @graph_attributes = qw/ wit_list_separator baselabel linear ac_label /; + my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /; foreach my $datum ( @graph_attributes ) { $graph_data_keys{$datum} = 'dg'.$gdi++; my $key = $root->addNewChild( $graphml_ns, 'key' ); @@ -511,7 +535,8 @@ sub as_graphml { # Collation attribute data foreach my $datum ( @graph_attributes ) { - _add_graphml_data( $graph, $graph_data_keys{$datum}, $self->$datum ); + my $value = $datum eq 'version' ? '2.0' : $self->$datum; + _add_graphml_data( $graph, $graph_data_keys{$datum}, $value ); } my $node_ctr = 0; diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index b77649a..fa9ce8b 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -118,7 +118,7 @@ around BUILDARGS => sub { # If one of our special booleans is set, we change the text and the # ID to match. - if( exists $args->{'is_lacuna'} ) { + if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) { $args->{'text'} = sprintf( "#LACUNA_%s#", $args->{'id'} ); } elsif( exists $args->{'is_start'} ) { $args->{'id'} = '#START#'; # Change the ID to ensure we have only one @@ -151,6 +151,12 @@ sub related_readings { return $self->collation->related_readings( $self, @_ ); } +sub set_identical { + my( $self, $other ) = @_; + return $self->collation->add_relationship( $self, $other, + { 'type' => 'transposition' } ); +} + sub _stringify { my $self = shift; return $self->id; diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm index 543100e..4c1e511 100644 --- a/lib/Text/Tradition/Parser/Tabular.pm +++ b/lib/Text/Tradition/Parser/Tabular.pm @@ -68,8 +68,8 @@ is( ref( $t ), 'Text::Tradition', "Parsed florilegium CSV file" ); ### TODO Check these figures if( $t ) { - is( scalar $t->collation->readings, 313, "Collation has all readings" ); - is( scalar $t->collation->paths, 2877, "Collation has all paths" ); + is( scalar $t->collation->readings, 312, "Collation has all readings" ); + is( scalar $t->collation->paths, 363, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } @@ -138,7 +138,6 @@ sub parse { } } - # Collapse our lacunae into a single node and # push the end node onto all paths. $c->end->rank( scalar @$alignment_table ); @@ -147,15 +146,22 @@ sub parse { my $last_rdg = shift @$p; my $new_p = [ $last_rdg ]; foreach my $rdg ( @$p ) { + $DB::single = 1 if $rdg->id eq '228,1'; if( $rdg->text eq '#LACUNA#' ) { # If we are in a lacuna already, drop this node. # Otherwise make a lacuna node and drop this node. unless( $last_rdg->is_lacuna ) { - my $l = $c->add_reading( { - 'collation' => $c, - 'id' => $rdg->name, - 'is_lacuna' => 1, - } ); + my $l_id = 'l' . $rdg->id; + my $l; + if( $c->has_reading( $l_id ) ) { + $l = $c->reading( $l_id ); + } else { + $l = $c->add_reading( { + 'collation' => $c, + 'id' => $l_id, + 'is_lacuna' => 1, + } ); + } push( @$new_p, $l ); $last_rdg = $l; } diff --git a/t/data/florilegium.csv b/t/data/florilegium.csv index 4278da2..9598e80 100644 --- a/t/data/florilegium.csv +++ b/t/data/florilegium.csv @@ -1,5 +1,4 @@ A,B,C,D,E,"E (a.c.)",F,G,H,K,P,"P (a.c.)",Q,"Q (a.c.)",S,T,"T (a.c.)" -#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START#,#START# "Μαξίμου",#LACUNA#,,,,,"Μαξίμου",#LACUNA#,"Μαξίμου","Μαξίμου","Μαξίμου","Μαξίμου",,,"Μαξίμου",, ,#LACUNA#,,,,,"ἁγίου",#LACUNA#,"ἁγίου",,,,,,,, Ἡ,#LACUNA#,,Ἡ,Ἡ,Ἡ,Ἡ,#LACUNA#,Ἡ,Ἡ,,,Ἡ,Ἡ,Ἡ,Ἡ,Ἡ @@ -280,4 +279,4 @@ A,B,C,D,E,"E (a.c.)",F,G,H,K,P,"P (a.c.)",Q,"Q (a.c.)",S,T,"T (a.c.)" "δικαστήριον","δικαστήριον","δικαστήριον",#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,"δικαστήριον","δικαστήριον",#LACUNA#,#LACUNA#,"δικαστήριον",#LACUNA#,#LACUNA# "ἕλκωσιν,","ἕλκωσιν,","ἕλκωσιν,",#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,"ἕλκωσιν,","ἕλκωσιν,",#LACUNA#,#LACUNA#,"ἕλκωσιν,",#LACUNA#,#LACUNA# "ἀκολούθησον.","ἀκολούθησον.","ἀκολούθησον.",#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,#LACUNA#,"ἀκολούθησον.","ἀκολούθησον.",#LACUNA#,#LACUNA#,"ἀκολούθησον.",#LACUNA#,#LACUNA# -#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END# \ No newline at end of file +#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END#,#END# diff --git a/t/text_tradition_parser_tabular.t b/t/text_tradition_parser_tabular.t index 2696b52..d7f450c 100644 --- a/t/text_tradition_parser_tabular.t +++ b/t/text_tradition_parser_tabular.t @@ -25,8 +25,8 @@ is( ref( $t ), 'Text::Tradition', "Parsed florilegium CSV file" ); ### TODO Check these figures if( $t ) { - is( scalar $t->collation->readings, 313, "Collation has all readings" ); - is( scalar $t->collation->paths, 2877, "Collation has all paths" ); + is( scalar $t->collation->readings, 312, "Collation has all readings" ); + is( scalar $t->collation->paths, 363, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } }