From: Tara L Andrews <tla@mit.edu>
Date: Tue, 6 Mar 2012 13:36:10 +0000 (+0100)
Subject: allow case-insensitive relationships on all but orthography
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=scpubgit%2Fstemmatology.git;a=commitdiff_plain;h=bf6e338dd676742fbd0c6d88c98795adae40429f

allow case-insensitive relationships on all but orthography
---

diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm
index 42a3d3b..7176134 100644
--- a/lib/Text/Tradition/Collation.pm
+++ b/lib/Text/Tradition/Collation.pm
@@ -792,14 +792,24 @@ sub _path_display_label {
 	}
 }
 
-=head2 witnesses_at_rank
+=head2 readings_at_rank( $rank )
 
-Returns a list of witnesses that are not lacunose, for a given rank.
+Returns a list of readings at a given rank, taken from the alignment table.
 
 =cut
 
-sub witnesses_at_rank {
+sub readings_at_rank {
 	my( $self, $rank ) = @_;
+	my $table = $self->alignment_table;
+	# Table rank is real rank - 1.
+	my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
+	my %readings;
+	foreach my $e ( @elements ) {
+		next unless ref( $e ) eq 'HASH';
+		next unless exists $e->{'t'};
+		$readings{$e->{'t'}->id} = $e->{'t'};
+	}
+	return values %readings;
 }		
 
 =head2 as_graphml
@@ -1530,18 +1540,23 @@ with the same text at the same rank, and merges any that are found.
 sub flatten_ranks {
     my $self = shift;
     my %unique_rank_rdg;
+    my $changed;
     foreach my $rdg ( $self->readings ) {
         next unless $rdg->has_rank;
         my $key = $rdg->rank . "||" . $rdg->text;
         if( exists $unique_rank_rdg{$key} ) {
             # Combine!
         	# print STDERR "Combining readings at same rank: $key\n";
+        	$changed = 1;
             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
             # TODO see if this now makes a common point.
         } else {
             $unique_rank_rdg{$key} = $rdg;
         }
     }
+    # If we merged readings, the ranks are still fine but the alignment
+    # table is wrong. Wipe it.
+    $self->wipe_table() if $changed;
 }
 	
 
diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm
index c9136b3..a0b7ff3 100644
--- a/lib/Text/Tradition/Collation/RelationshipStore.pm
+++ b/lib/Text/Tradition/Collation/RelationshipStore.pm
@@ -216,6 +216,7 @@ sub add_relationship {
 	} else {
 		# Check the options
 		$options->{'scope'} = 'local' unless $options->{'scope'};
+		$options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
 		
 		my( $is_valid, $reason ) = 
 			$self->relationship_valid( $source, $target, $options->{'type'} );
@@ -243,28 +244,11 @@ sub add_relationship {
 
 
 	# Find all the pairs for which we need to set the relationship.
-	my @vectors = ( [ $source, $target ] );	
+	my @vectors = [ $source, $target ];
     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
-    	my $c = $self->collation;
-    	# Set the same relationship everywhere we can, throughout the graph.
-    	my @identical_readings = grep { $_->text eq $relationship->reading_a }
-    		$c->readings;
-    	foreach my $ir ( @identical_readings ) {
-    		next if $ir->id eq $source;
-    		# Check to see if there is a target reading with the same text at
-    		# the same rank.
-    		my @itarget = grep 
-    			{ $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
-    			$c->readings;
-    		if( @itarget ) {
-    			# We found a hit.
-    			warn "More than one reading with text " . $target_rdg->text
-    				. " at rank " . $ir->rank . "!" if @itarget > 1;
-    			push( @vectors, [ $ir->id, $itarget[0]->id ] );
-    		}
-    	}	
-    }
-    
+    	push( @vectors, $self->_find_applicable( $relationship ) );
+    } 
+        
     # Now set the relationship(s).
     my @pairs_set;
     foreach my $v ( @vectors ) {
@@ -284,6 +268,47 @@ sub add_relationship {
     return @pairs_set;
 }
 
+sub _find_applicable {
+	my( $self, $rel ) = @_;
+	my $c = $self->collation;
+	# TODO Someday we might use a case sensitive language.
+	my $lang = $c->tradition->language;
+	my @vectors;
+	my @identical_readings;
+	if( $rel->type eq 'orthographic' ) {
+		@identical_readings = grep { $_->text eq $rel->reading_a } 
+			$c->readings;
+	} else {
+		@identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
+			$c->readings;
+	}
+	foreach my $ir ( @identical_readings ) {
+		my @itarget;
+		if( $rel->type eq 'orthographic' ) {
+			@itarget = grep { $_->rank == $ir->rank 
+							  && $_->text eq $rel->reading_b } $c->readings;
+		} else {
+			@itarget = grep { $_->rank == $ir->rank 
+							  && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
+		}
+		if( @itarget ) {
+			# Warn if there is more than one hit with no orth link between them.
+			my $itmain = shift @itarget;
+			if( @itarget ) {
+				my %all_targets;
+				map { $all_targets{$_} = 1 } @itarget;
+				map { delete $all_targets{$_} } 
+					$self->related_readings( $itmain, 
+						sub { $_[0]->type eq 'orthographic' } );
+    			warn "More than one unrelated reading with text " . $itmain->text
+    				. " at rank " . $ir->rank . "!" if keys %all_targets;
+			}
+			push( @vectors, [ $ir->id, $itmain->id ] );
+		}
+	}
+	return @vectors;
+}
+
 =head2 del_relationship( $source, $target )
 
 Removes the relationship between the given readings. If the relationship is
diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm
index bdadce2..5499fc7 100644
--- a/lib/Text/Tradition/Parser/Self.pm
+++ b/lib/Text/Tradition/Parser/Self.pm
@@ -202,7 +202,8 @@ sub parse {
 	# Nodes are added via the call to add_reading above.  We only need
 	# add the relationships themselves.
 	# TODO check that scoping does trt
-	foreach my $e ( @{$rel_data->{'edges'}} ) {
+	$rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
+	foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
 		my $from = $collation->reading( $e->{'source'}->{'id'} );
 		my $to = $collation->reading( $e->{'target'}->{'id'} );
 		delete $e->{'source'};
@@ -230,6 +231,21 @@ sub parse {
 	$collation->text_from_paths();	
 }
 
+## Return the relationship that comes first in priority.
+my %LAYERS = (
+	'collated' => 1,
+	'orthographic' => 2,
+	'spelling' => 3,
+	);
+
+sub _layersort_rel {
+	my( $a, $b ) = @_;
+	my $key = exists $a->{'type'} ? 'type' : 'relationship';
+	my $at = $LAYERS{$a->{$key}} || 99;
+	my $bt = $LAYERS{$b->{$key}} || 99;
+	return $at <=> $bt;
+}
+
 1;
 
 =head1 BUGS / TODO
diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm
index a561dde..87fca3f 100644
--- a/lib/Text/Tradition/Parser/Tabular.pm
+++ b/lib/Text/Tradition/Parser/Tabular.pm
@@ -270,6 +270,24 @@ sub _make_nodes {
         $unique{$w} = $r;
         $ctr++;
     }
+    # Collate this sequence of readings via a single 'collation' relationship.
+    my @rankrdgs = values %unique;
+    my $collation_rel;
+    while( @rankrdgs ) {
+    	my $r = shift @rankrdgs;
+    	next if $r->is_meta;
+    	foreach my $nr ( @rankrdgs ) {
+    		if( $collation_rel ) {
+    			$collation->add_relationship( $r, $nr, $collation_rel );
+    		} else {
+    			$collation->add_relationship( $r, $nr, 
+    				{ 'type' => 'collated', 
+    				  'annotation' => "Parsed together for rank $index" } );
+    			$collation_rel = $collation->get_relationship( $r, $nr );
+    		}
+    	}
+    }
+    
     return \%unique;
 }