1 package Text::Tradition::Parser::Util;
7 use vars qw/ @EXPORT_OK /;
8 @EXPORT_OK = qw/ add_hash_entry check_for_repeated cmp_str collate_variants is_monotonic /;
12 Text::Tradition::Parser::Util
16 A collection of utilities used by multiple Text::Tradition parsers.
17 Probably not of external interest.
21 =head2 B<collate_variants>( $collation, @reading_ranges )
23 Given a set of readings in the form
24 ( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
25 walks through each to identify those readings that are identical. The
26 collation is a Text::Tradition::Collation object; the elements of
27 @readings are Text::Tradition::Collation::Reading objects that appear
28 on the collation graph.
32 sub collate_variants {
33 my( $collation, @reading_sets ) = @_;
35 # Make sure the reading sets are unique, but retain their ordering.
38 foreach( @reading_sets ) {
39 push( @sets, $_ ) unless $unique_sets{$_};
40 $unique_sets{$_} = $_;
43 # Two different ways to do this, depending on whether we want
44 # transposed reading nodes to be merged into one (producing a
45 # nonlinear, bidirectional graph) or not (producing a relatively
46 # linear, unidirectional graph.)
47 return $collation->linear ? _collate_linearly( $collation, @sets )
48 : _collate_nonlinearly( $collation, @sets );
51 sub _collate_linearly {
52 my( $collation, $lemma_set, @variant_sets ) = @_;
55 my $substitutions = {};
56 push( @unique, @$lemma_set );
57 while( @variant_sets ) {
58 my $variant_set = shift @variant_sets;
59 # Use diff to do this job
60 my $diff = Algorithm::Diff->new( \@unique, $variant_set,
61 {'keyGen' => \&_collation_hash} );
64 while( $diff->Next ) {
67 my @l = $diff->Items( 1 );
68 my @v = $diff->Items( 2 );
69 foreach my $i ( 0 .. $#l ) {
70 if( !$merged{$l[$i]->id} ) {
71 next if $v[$i] eq $l[$i];
72 # print STDERR sprintf( "Merging %s into %s\n",
75 $collation->merge_readings( $l[$i], $v[$i] );
76 $merged{$l[$i]->id} = 1;
77 $substitutions->{$v[$i]->id} = $l[$i];
79 print STDERR "Would have double merged " . $l[$i]->id . "\n";
82 # splice the lemma nodes into the variant set
83 my( $offset ) = $diff->Get( 'min2' );
84 splice( @$variant_set, $offset, scalar( @l ), @l );
85 push( @new_unique, @l );
87 # Keep the old unique readings
88 push( @new_unique, $diff->Items( 1 ) ) if $diff->Items( 1 );
89 # Add the new readings to the 'unique' list
90 push( @new_unique, $diff->Items( 2 ) ) if $diff->Items( 2 );
93 @unique = @new_unique;
95 return $substitutions;
98 sub _collate_nonlinearly {
99 my( $collation, $lemma_set, @variant_sets ) = @_;
102 my $substitutions = {};
103 push( @unique, @$lemma_set );
104 while( @variant_sets ) {
105 my $variant_set = shift @variant_sets;
106 # Simply match the first reading that carries the same word, so
107 # long as that reading has not yet been used to match another
108 # word in this variant. That way lies loopy madness.
111 foreach my $idx ( 0 .. $#{$variant_set} ) {
112 my $vw = $variant_set->[$idx];
113 my @same = grep { cmp_str( $_ ) eq $vw->text } @unique;
116 foreach my $i ( 0 .. $#same ) {
117 unless( $merged{$same[$i]->id} ) {
118 #print STDERR sprintf( "Merging %s into %s\n",
121 $collation->merge_readings( $same[$i], $vw );
122 $merged{$same[$i]->id} = 1;
124 $variant_set->[$idx] = $same[$i];
125 $substitutions->{$vw->id} = $same[$i];
129 unless( @same && defined($matched) ) {
130 push( @distinct, $vw );
133 push( @unique, @distinct );
135 return $substitutions;
138 sub _collation_hash {
140 return cmp_str( $node );
145 Don't use this. Really.
151 my $word = $reading->text();
152 return $word unless $reading->collation->tradition->name =~ /158/;
158 $word =~ s/quatuor/quattuor/g;
159 $word =~ s/ioannes/iohannes/g;
163 =head2 B<check_for_repeated>( @readings )
165 Given an array of items, returns any items that appear in the array more
170 sub check_for_repeated {
175 if( exists $unique{$_->id} ) {
176 push( @repeated, $_->id );
184 =head2 B<add_hash_entry>( $hash, $key, $entry )
186 Very simple utility for adding $entry to the list at $hash->{$key}.
191 my( $hash, $key, $entry ) = @_;
192 if( exists $hash->{$key} ) {
193 push( @{$hash->{$key}}, $entry );
195 $hash->{$key} = [ $entry ];
205 =item * Get rid of abomination that is cmp_str.
211 This package is free software and is provided "as is" without express
212 or implied warranty. You can redistribute it and/or modify it under
213 the same terms as Perl itself.
217 Tara L Andrews E<lt>aurum@cpan.orgE<gt>