1 package Text::Tradition::Parser::Util;
7 use vars qw/ @EXPORT_OK /;
8 @EXPORT_OK = qw/ add_hash_entry check_for_repeated cmp_str collate_variants is_monotonic /;
12 Text::Tradition::Parser::Util
16 A collection of utilities used by multiple Text::Tradition parsers.
17 Probably not of external interest.
21 =head2 B<collate_variants>( $collation, @reading_ranges )
23 Given a set of readings in the form
24 ( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
25 walks through each to identify those readings that are identical. The
26 collation is a Text::Tradition::Collation object; the elements of
27 @readings are Text::Tradition::Collation::Reading objects that appear
28 on the collation graph.
32 sub collate_variants {
33 my( $collation, @reading_sets ) = @_;
35 # Two different ways to do this, depending on whether we want
36 # transposed reading nodes to be merged into one (producing a
37 # nonlinear, bidirectional graph) or not (producing a relatively
38 # linear, unidirectional graph.)
39 return $collation->linear ? collate_linearly( @_ )
40 : collate_nonlinearly( @_ );
43 sub collate_linearly {
44 my( $collation, $lemma_set, @variant_sets ) = @_;
47 my $substitutions = {};
48 push( @unique, @$lemma_set );
49 while( @variant_sets ) {
50 my $variant_set = shift @variant_sets;
51 # Use diff to do this job
52 my $diff = Algorithm::Diff->new( \@unique, $variant_set,
53 {'keyGen' => \&_collation_hash} );
56 while( $diff->Next ) {
59 my @l = $diff->Items( 1 );
60 my @v = $diff->Items( 2 );
61 foreach my $i ( 0 .. $#l ) {
62 if( !$merged{$l[$i]->name} ) {
63 print STDERR sprintf( "Merging %s into %s\n",
66 $collation->merge_readings( $l[$i], $v[$i] );
67 $merged{$l[$i]->name} = 1;
68 $substitutions->{$v[$i]->name} = $l[$i];
70 print STDERR "Would have double merged " . $l[$i]->name . "\n";
73 # splice the lemma nodes into the variant set
74 my( $offset ) = $diff->Get( 'min2' );
75 splice( @$variant_set, $offset, scalar( @l ), @l );
76 push( @new_unique, @l );
78 # Keep the old unique readings
79 push( @new_unique, $diff->Items( 1 ) ) if $diff->Items( 1 );
80 # Add the new readings to the 'unique' list
81 push( @new_unique, $diff->Items( 2 ) ) if $diff->Items( 2 );
84 @unique = @new_unique;
86 return $substitutions;
89 sub collate_nonlinearly {
90 my( $collation, $lemma_set, @variant_sets ) = @_;
93 my $substitutions = {};
94 push( @unique, @$lemma_set );
95 while( @variant_sets ) {
96 my $variant_set = shift @variant_sets;
97 # Simply match the first reading that carries the same word, so
98 # long as that reading has not yet been used to match another
99 # word in this variant. That way lies loopy madness.
102 foreach my $idx ( 0 .. $#{$variant_set} ) {
103 my $vw = $variant_set->[$idx];
104 my @same = grep { cmp_str( $_ ) eq $vw->label } @unique;
107 foreach my $i ( 0 .. $#same ) {
108 unless( $merged{$same[$i]->name} ) {
109 #print STDERR sprintf( "Merging %s into %s\n",
112 $collation->merge_readings( $same[$i], $vw );
113 $merged{$same[$i]->name} = 1;
115 $variant_set->[$idx] = $same[$i];
116 $substitutions->{$vw->name} = $same[$i];
120 unless( @same && defined($matched) ) {
121 push( @distinct, $vw );
124 push( @unique, @distinct );
126 return $substitutions;
129 sub _collation_hash {
131 return cmp_str( $node );
136 my $word = $reading->label();
142 $word =~ s/quatuor/quattuor/g;
143 $word =~ s/ioannes/iohannes/g;
147 =head2 B<check_for_repeated>( @readings )
149 Given an array of items, returns any items that appear in the array more
154 sub check_for_repeated {
159 if( exists $unique{$_->name} ) {
160 push( @repeated, $_->name );
162 $unique{$_->name} = 1;
169 my( $hash, $key, $entry ) = @_;
170 if( exists $hash->{$key} ) {
171 push( @{$hash->{$key}}, $entry );
173 $hash->{$key} = [ $entry ];
178 my( @readings ) = @_;
179 my( $common, $min, $max ) = ( -1, -1, -1 );
180 foreach my $rdg ( @readings ) {
181 # print STDERR "Checking reading " . $rdg->name . "/" . $rdg->text . " - "
182 # . $rdg->position->reference ."\n";
183 return 0 if $rdg->position->common < $common;
184 if( $rdg->position->common == $common ) {
185 return 0 if $rdg->position->min <= $min;
186 return 0 if $rdg->position->max <= $max;
188 $common = $rdg->position->common;
189 $min = $rdg->position->min;
190 $max = $rdg->position->max;
201 =item * Get rid of abomination that is cmp_str.
207 This package is free software and is provided "as is" without express
208 or implied warranty. You can redistribute it and/or modify it under
209 the same terms as Perl itself.
213 Tara L Andrews E<lt>aurum@cpan.orgE<gt>