remove some debugging statements

[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Util.pm
diff --git a/lib/Text/Tradition/Parser/Util.pm b/lib/Text/Tradition/Parser/Util.pm

index 6d9dab1..de18a6f 100644 (file)
--- a/lib/Text/Tradition/Parser/Util.pm
+++ b/lib/Text/Tradition/Parser/Util.pm
@@ -7,9 +7,18 @@ use Exporter 'import';
 use vars qw/ @EXPORT_OK /;
 @EXPORT_OK = qw/ add_hash_entry check_for_repeated cmp_str collate_variants is_monotonic /;
 
-=item B<collate_variants>
+=head1 NAME
 
-collate_variants( $collation, @reading_ranges )
+Text::Tradition::Parser::Util
+
+=head1 DESCRIPTION
+
+A collection of utilities used by multiple Text::Tradition parsers.  
+Probably not of external interest.
+
+=head1 METHODS
+
+=head2 B<collate_variants>( $collation, @reading_ranges )
 
 Given a set of readings in the form 
 ( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
@@ -18,22 +27,28 @@ collation is a Text::Tradition::Collation object; the elements of
 @readings are Text::Tradition::Collation::Reading objects that appear
 on the collation graph.
 
-TODO: Handle collapsed and non-collapsed transpositions.
-
 =cut
 
 sub collate_variants {
     my( $collation, @reading_sets ) = @_;
+    
+    # Make sure the reading sets are unique, but retain their ordering.
+    my %unique_sets;
+    my @sets;
+    foreach( @reading_sets ) {
+       push( @sets, $_ ) unless $unique_sets{$_};
+       $unique_sets{$_} = $_;
+    }
 
     # Two different ways to do this, depending on whether we want
     # transposed reading nodes to be merged into one (producing a
     # nonlinear, bidirectional graph) or not (producing a relatively
     # linear, unidirectional graph.)
-    return $collation->linear ? collate_linearly( @_ )
-        : collate_nonlinearly( @_ );
+    return $collation->linear ? _collate_linearly( $collation, @sets )
+        : _collate_nonlinearly( $collation, @sets );
 }
 
-sub collate_linearly {
+sub _collate_linearly {
     my( $collation, $lemma_set, @variant_sets ) = @_;
 
     my @unique;
@@ -52,15 +67,16 @@ sub collate_linearly {
                 my @l = $diff->Items( 1 );
                 my @v = $diff->Items( 2 );
                 foreach my $i ( 0 .. $#l ) {
-                    if( !$merged{$l[$i]->name} ) {
-                        print STDERR sprintf( "Merging %s into %s\n", 
-                                             $v[$i]->name,
-                                             $l[$i]->name );
+                    if( !$merged{$l[$i]->id} ) {
+                        next if $v[$i] eq $l[$i];
+#                         print STDERR sprintf( "Merging %s into %s\n", 
+#                                              $v[$i]->id,
+#                                              $l[$i]->id );
                         $collation->merge_readings( $l[$i], $v[$i] );
-                        $merged{$l[$i]->name} = 1;
-                        $substitutions->{$v[$i]->name} = $l[$i];
+                        $merged{$l[$i]->id} = 1;
+                        $substitutions->{$v[$i]->id} = $l[$i];
                     } else {
-                        print STDERR "Would have double merged " . $l[$i]->name . "\n";
+                        print STDERR "Would have double merged " . $l[$i]->id . "\n";
                     }
                 }
                 # splice the lemma nodes into the variant set
@@ -79,7 +95,7 @@ sub collate_linearly {
     return $substitutions;
 }
 
-sub collate_nonlinearly {
+sub _collate_nonlinearly {
     my( $collation, $lemma_set, @variant_sets ) = @_;
     
     my @unique;
@@ -94,19 +110,19 @@ sub collate_nonlinearly {
         my %merged;
         foreach my $idx ( 0 .. $#{$variant_set} ) {
             my $vw = $variant_set->[$idx];
-            my @same = grep { cmp_str( $_ ) eq $vw->label } @unique;
+            my @same = grep { cmp_str( $_ ) eq $vw->text } @unique;
             my $matched;
             if( @same ) {
                 foreach my $i ( 0 .. $#same ) {
-                    unless( $merged{$same[$i]->name} ) {
+                    unless( $merged{$same[$i]->id} ) {
                         #print STDERR sprintf( "Merging %s into %s\n", 
-                        #                     $vw->name,
-                        #                     $same[$i]->name );
+                        #                     $vw->id,
+                        #                     $same[$i]->id );
                         $collation->merge_readings( $same[$i], $vw );
-                        $merged{$same[$i]->name} = 1;
+                        $merged{$same[$i]->id} = 1;
                         $matched = $i;
                         $variant_set->[$idx] = $same[$i];
-                        $substitutions->{$vw->name} = $same[$i];
+                        $substitutions->{$vw->id} = $same[$i];
                     }
                 }
             }
@@ -124,15 +140,16 @@ sub _collation_hash {
     return cmp_str( $node );
 }
 
-=item B<cmp_str>
+=head2 B<cmp_str>
 
-Pretend you never saw this method.  Really it needs to not be hardcoded.
+Don't use this. Really.
 
 =cut
 
 sub cmp_str {
     my( $reading ) = @_;
-    my $word = $reading->label();
+    my $word = $reading->text();
+    return $word unless $reading->collation->tradition->name =~ /158/;
     $word = lc( $word );
     $word =~ s/\W//g;
     $word =~ s/v/u/g;
@@ -143,9 +160,7 @@ sub cmp_str {
     return $word;
 }
 
-=item B<collate_variants>
-
-my @rep = check_for_repeated( @readings )
+=head2 B<check_for_repeated>( @readings )
 
 Given an array of items, returns any items that appear in the array more
 than once.
@@ -157,15 +172,21 @@ sub check_for_repeated {
     my %unique;
     my @repeated;
     foreach ( @seq ) {
-        if( exists $unique{$_->name} ) {
-            push( @repeated, $_->name );
+        if( exists $unique{$_->id} ) {
+            push( @repeated, $_->id );
         } else {
-            $unique{$_->name} = 1;
+            $unique{$_->id} = 1;
         }
     }
     return @repeated;
 }
 
+=head2 B<add_hash_entry>( $hash, $key, $entry )
+
+Very simple utility for adding $entry to the list at $hash->{$key}.
+
+=cut
+
 sub add_hash_entry {
     my( $hash, $key, $entry ) = @_;
     if( exists $hash->{$key} ) {
@@ -175,20 +196,22 @@ sub add_hash_entry {
     }
 }
 
-sub is_monotonic {
-    my( @readings ) = @_;
-    my( $common, $min, $max ) = ( -1, -1, -1 );
-    foreach my $rdg ( @readings ) {
-#         print STDERR "Checking reading " . $rdg->name . "/" . $rdg->text . " - " 
-#         . $rdg->position->reference ."\n";
-        return 0 if $rdg->position->common < $common;
-        if( $rdg->position->common == $common ) {
-            return 0 if $rdg->position->min <= $min;
-            return 0 if $rdg->position->max <= $max;
-        }
-        $common = $rdg->position->common;
-        $min = $rdg->position->min;
-        $max = $rdg->position->max;
-    }
-    return 1;
-}
\ No newline at end of file
+1;
+
+=head1 BUGS / TODO
+
+=over
+
+=item * Get rid of abomination that is cmp_str.
+
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty.  You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews E<lt>aurum@cpan.orgE<gt>