use vars qw/ @EXPORT_OK /;
@EXPORT_OK = qw/ add_hash_entry check_for_repeated cmp_str collate_variants is_monotonic /;
-=item B<collate_variants>
+=head1 NAME
-collate_variants( $collation, @reading_ranges )
+Text::Tradition::Parser::Util
+
+=head1 DESCRIPTION
+
+A collection of utilities used by multiple Text::Tradition parsers.
+Probably not of external interest.
+
+=head1 METHODS
+
+=head2 B<collate_variants>( $collation, @reading_ranges )
Given a set of readings in the form
( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
@readings are Text::Tradition::Collation::Reading objects that appear
on the collation graph.
-TODO: Handle collapsed and non-collapsed transpositions.
-
=cut
sub collate_variants {
my( $collation, @reading_sets ) = @_;
+
+ # Make sure the reading sets are unique, but retain their ordering.
+ my %unique_sets;
+ my @sets;
+ foreach( @reading_sets ) {
+ push( @sets, $_ ) unless $unique_sets{$_};
+ $unique_sets{$_} = $_;
+ }
# Two different ways to do this, depending on whether we want
# transposed reading nodes to be merged into one (producing a
# nonlinear, bidirectional graph) or not (producing a relatively
# linear, unidirectional graph.)
- return $collation->linear ? collate_linearly( @_ )
- : collate_nonlinearly( @_ );
+ return $collation->linear ? _collate_linearly( $collation, @sets )
+ : _collate_nonlinearly( $collation, @sets );
}
-sub collate_linearly {
+sub _collate_linearly {
my( $collation, $lemma_set, @variant_sets ) = @_;
my @unique;
my @l = $diff->Items( 1 );
my @v = $diff->Items( 2 );
foreach my $i ( 0 .. $#l ) {
- if( !$merged{$l[$i]->name} ) {
- print STDERR sprintf( "Merging %s into %s\n",
- $v[$i]->name,
- $l[$i]->name );
+ if( !$merged{$l[$i]->id} ) {
+ next if $v[$i] eq $l[$i];
+# print STDERR sprintf( "Merging %s into %s\n",
+# $v[$i]->id,
+# $l[$i]->id );
$collation->merge_readings( $l[$i], $v[$i] );
- $merged{$l[$i]->name} = 1;
- $substitutions->{$v[$i]->name} = $l[$i];
+ $merged{$l[$i]->id} = 1;
+ $substitutions->{$v[$i]->id} = $l[$i];
} else {
- print STDERR "Would have double merged " . $l[$i]->name . "\n";
+ print STDERR "Would have double merged " . $l[$i]->id . "\n";
}
}
# splice the lemma nodes into the variant set
return $substitutions;
}
-sub collate_nonlinearly {
+sub _collate_nonlinearly {
my( $collation, $lemma_set, @variant_sets ) = @_;
my @unique;
my %merged;
foreach my $idx ( 0 .. $#{$variant_set} ) {
my $vw = $variant_set->[$idx];
- my @same = grep { cmp_str( $_ ) eq $vw->label } @unique;
+ my @same = grep { cmp_str( $_ ) eq $vw->text } @unique;
my $matched;
if( @same ) {
foreach my $i ( 0 .. $#same ) {
- unless( $merged{$same[$i]->name} ) {
+ unless( $merged{$same[$i]->id} ) {
#print STDERR sprintf( "Merging %s into %s\n",
- # $vw->name,
- # $same[$i]->name );
+ # $vw->id,
+ # $same[$i]->id );
$collation->merge_readings( $same[$i], $vw );
- $merged{$same[$i]->name} = 1;
+ $merged{$same[$i]->id} = 1;
$matched = $i;
$variant_set->[$idx] = $same[$i];
- $substitutions->{$vw->name} = $same[$i];
+ $substitutions->{$vw->id} = $same[$i];
}
}
}
return cmp_str( $node );
}
-=item B<cmp_str>
+=head2 B<cmp_str>
-Pretend you never saw this method. Really it needs to not be hardcoded.
+Don't use this. Really.
=cut
sub cmp_str {
my( $reading ) = @_;
- my $word = $reading->label();
+ my $word = $reading->text();
+ return $word unless $reading->collation->tradition->name =~ /158/;
$word = lc( $word );
$word =~ s/\W//g;
$word =~ s/v/u/g;
return $word;
}
-=item B<collate_variants>
-
-my @rep = check_for_repeated( @readings )
+=head2 B<check_for_repeated>( @readings )
Given an array of items, returns any items that appear in the array more
than once.
my %unique;
my @repeated;
foreach ( @seq ) {
- if( exists $unique{$_->name} ) {
- push( @repeated, $_->name );
+ if( exists $unique{$_->id} ) {
+ push( @repeated, $_->id );
} else {
- $unique{$_->name} = 1;
+ $unique{$_->id} = 1;
}
}
return @repeated;
}
+=head2 B<add_hash_entry>( $hash, $key, $entry )
+
+Very simple utility for adding $entry to the list at $hash->{$key}.
+
+=cut
+
sub add_hash_entry {
my( $hash, $key, $entry ) = @_;
if( exists $hash->{$key} ) {
}
}
-sub is_monotonic {
- my( @readings ) = @_;
- my( $common, $min, $max ) = ( -1, -1, -1 );
- foreach my $rdg ( @readings ) {
-# print STDERR "Checking reading " . $rdg->name . "/" . $rdg->text . " - "
-# . $rdg->position->reference ."\n";
- return 0 if $rdg->position->common < $common;
- if( $rdg->position->common == $common ) {
- return 0 if $rdg->position->min <= $min;
- return 0 if $rdg->position->max <= $max;
- }
- $common = $rdg->position->common;
- $min = $rdg->position->min;
- $max = $rdg->position->max;
- }
- return 1;
-}
\ No newline at end of file
+1;
+
+=head1 BUGS / TODO
+
+=over
+
+=item * Get rid of abomination that is cmp_str.
+
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews E<lt>aurum@cpan.orgE<gt>