From: Tara L Andrews Date: Tue, 6 Mar 2012 13:36:10 +0000 (+0100) Subject: allow case-insensitive relationships on all but orthography X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=bf6e338dd676742fbd0c6d88c98795adae40429f;p=scpubgit%2Fstemmatology.git allow case-insensitive relationships on all but orthography --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 42a3d3b..7176134 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -792,14 +792,24 @@ sub _path_display_label { } } -=head2 witnesses_at_rank +=head2 readings_at_rank( $rank ) -Returns a list of witnesses that are not lacunose, for a given rank. +Returns a list of readings at a given rank, taken from the alignment table. =cut -sub witnesses_at_rank { +sub readings_at_rank { my( $self, $rank ) = @_; + my $table = $self->alignment_table; + # Table rank is real rank - 1. + my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}}; + my %readings; + foreach my $e ( @elements ) { + next unless ref( $e ) eq 'HASH'; + next unless exists $e->{'t'}; + $readings{$e->{'t'}->id} = $e->{'t'}; + } + return values %readings; } =head2 as_graphml @@ -1530,18 +1540,23 @@ with the same text at the same rank, and merges any that are found. sub flatten_ranks { my $self = shift; my %unique_rank_rdg; + my $changed; foreach my $rdg ( $self->readings ) { next unless $rdg->has_rank; my $key = $rdg->rank . "||" . $rdg->text; if( exists $unique_rank_rdg{$key} ) { # Combine! # print STDERR "Combining readings at same rank: $key\n"; + $changed = 1; $self->merge_readings( $unique_rank_rdg{$key}, $rdg ); # TODO see if this now makes a common point. } else { $unique_rank_rdg{$key} = $rdg; } } + # If we merged readings, the ranks are still fine but the alignment + # table is wrong. Wipe it. + $self->wipe_table() if $changed; } diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index c9136b3..a0b7ff3 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -216,6 +216,7 @@ sub add_relationship { } else { # Check the options $options->{'scope'} = 'local' unless $options->{'scope'}; + $options->{'scope'} = 'local' if $options->{'type'} eq 'collated'; my( $is_valid, $reason ) = $self->relationship_valid( $source, $target, $options->{'type'} ); @@ -243,28 +244,11 @@ sub add_relationship { # Find all the pairs for which we need to set the relationship. - my @vectors = ( [ $source, $target ] ); + my @vectors = [ $source, $target ]; if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) { - my $c = $self->collation; - # Set the same relationship everywhere we can, throughout the graph. - my @identical_readings = grep { $_->text eq $relationship->reading_a } - $c->readings; - foreach my $ir ( @identical_readings ) { - next if $ir->id eq $source; - # Check to see if there is a target reading with the same text at - # the same rank. - my @itarget = grep - { $_->rank == $ir->rank && $_->text eq $relationship->reading_b } - $c->readings; - if( @itarget ) { - # We found a hit. - warn "More than one reading with text " . $target_rdg->text - . " at rank " . $ir->rank . "!" if @itarget > 1; - push( @vectors, [ $ir->id, $itarget[0]->id ] ); - } - } - } - + push( @vectors, $self->_find_applicable( $relationship ) ); + } + # Now set the relationship(s). my @pairs_set; foreach my $v ( @vectors ) { @@ -284,6 +268,47 @@ sub add_relationship { return @pairs_set; } +sub _find_applicable { + my( $self, $rel ) = @_; + my $c = $self->collation; + # TODO Someday we might use a case sensitive language. + my $lang = $c->tradition->language; + my @vectors; + my @identical_readings; + if( $rel->type eq 'orthographic' ) { + @identical_readings = grep { $_->text eq $rel->reading_a } + $c->readings; + } else { + @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) } + $c->readings; + } + foreach my $ir ( @identical_readings ) { + my @itarget; + if( $rel->type eq 'orthographic' ) { + @itarget = grep { $_->rank == $ir->rank + && $_->text eq $rel->reading_b } $c->readings; + } else { + @itarget = grep { $_->rank == $ir->rank + && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings; + } + if( @itarget ) { + # Warn if there is more than one hit with no orth link between them. + my $itmain = shift @itarget; + if( @itarget ) { + my %all_targets; + map { $all_targets{$_} = 1 } @itarget; + map { delete $all_targets{$_} } + $self->related_readings( $itmain, + sub { $_[0]->type eq 'orthographic' } ); + warn "More than one unrelated reading with text " . $itmain->text + . " at rank " . $ir->rank . "!" if keys %all_targets; + } + push( @vectors, [ $ir->id, $itmain->id ] ); + } + } + return @vectors; +} + =head2 del_relationship( $source, $target ) Removes the relationship between the given readings. If the relationship is diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index bdadce2..5499fc7 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -202,7 +202,8 @@ sub parse { # Nodes are added via the call to add_reading above. We only need # add the relationships themselves. # TODO check that scoping does trt - foreach my $e ( @{$rel_data->{'edges'}} ) { + $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels + foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) { my $from = $collation->reading( $e->{'source'}->{'id'} ); my $to = $collation->reading( $e->{'target'}->{'id'} ); delete $e->{'source'}; @@ -230,6 +231,21 @@ sub parse { $collation->text_from_paths(); } +## Return the relationship that comes first in priority. +my %LAYERS = ( + 'collated' => 1, + 'orthographic' => 2, + 'spelling' => 3, + ); + +sub _layersort_rel { + my( $a, $b ) = @_; + my $key = exists $a->{'type'} ? 'type' : 'relationship'; + my $at = $LAYERS{$a->{$key}} || 99; + my $bt = $LAYERS{$b->{$key}} || 99; + return $at <=> $bt; +} + 1; =head1 BUGS / TODO diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm index a561dde..87fca3f 100644 --- a/lib/Text/Tradition/Parser/Tabular.pm +++ b/lib/Text/Tradition/Parser/Tabular.pm @@ -270,6 +270,24 @@ sub _make_nodes { $unique{$w} = $r; $ctr++; } + # Collate this sequence of readings via a single 'collation' relationship. + my @rankrdgs = values %unique; + my $collation_rel; + while( @rankrdgs ) { + my $r = shift @rankrdgs; + next if $r->is_meta; + foreach my $nr ( @rankrdgs ) { + if( $collation_rel ) { + $collation->add_relationship( $r, $nr, $collation_rel ); + } else { + $collation->add_relationship( $r, $nr, + { 'type' => 'collated', + 'annotation' => "Parsed together for rank $index" } ); + $collation_rel = $collation->get_relationship( $r, $nr ); + } + } + } + return \%unique; }