[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / BaseText.pm

package Text::Tradition::Parser::BaseText;

use strict;
use warnings;
use Exporter 'import';
use vars qw( @EXPORT_OK );
@EXPORT_OK = qw( merge_base );

=head1 NAME

Text::Tradition::Parser::BaseText

=head1 SYNOPSIS

use Text::Tradition::Parser::BaseText qw( merge_base );
merge_base( $graph, 'reference.txt', @apparatus_entries )

=head1 DESCRIPTION

For an overview of the package, see the documentation for the
Text::Tradition::Graph module.

This module is meant for use with certain of the other Parser classes
- whenever a list of variants is given with reference to a base text,
these must be joined into a single collation.  The parser should
therefore make a list of variants and their locations, and BaseText
will join those listed variants onto the reference text.  

=head1 SUBROUTINES

=over

=item B<merge_base>

merge_base( $graph, 'reference.txt', @apparatus_entries )

Takes three arguments: a newly-initialized Text::Tradition::Graph
object, a text file containing the reference text, and a list of
variants (apparatus entries).  Adds the base text to the graph, and
joins the variants to that.

The list of variants is an array of hash references; each hash takes
the form
 { '_id' => line reference,
   'rdg_0' => lemma reading,
   'rdg_1' => first variant,
   ...  # and so on until all distinct readings are listed
   'WitnessA' => 'rdg_0',
   'WitnessB' => 'rdg_1',
   ...  # and so on until all witnesses are listed with their readings
 }

Any hash key that is not of the form /^rdg_\d+$/ and that does not
begin with an underscore is assumed to be a witness name.  Any 'meta'
information to be passed must be passed in a key with a leading
underscore in its name.

=cut

sub merge_base {
    my( $graph, $base_file, @app_entries ) = @_;
    my @base_line_starts = read_base( $base_file, $graph );

    foreach my $app ( @app_entries ) {
	my( $line, $num ) = split( /\./, $app->{_id} );
	# DEBUG with a short graph
	# last if $line > 2;
	# DEBUG for problematic entries
	# my $scrutinize = "21.8";
	my $first_line_node = $base_line_starts[ $line ];
	my $too_far = $base_line_starts[ $line+1 ];
	
	my $lemma = $app->{rdg_0};
	my $seq = 1; 
	# Is this the Nth occurrence of this reading in the line?
	if( $lemma =~ s/(_)?(\d)$// ) {
	    $seq = $2;
	}
	my @lemma_words = split( /\s+/, $lemma );
	
	# Now search for the lemma words within this line.
	my $lemma_start = $first_line_node;
	my $lemma_end;
	my %seen;
	while( $lemma_start ne $too_far ) {
	    # Loop detection
	    if( $seen{ $lemma_start->name() } ) {
		warn "Detected loop at " . $lemma_start->name() . 
		    ", ref $line,$num";
		last;
	    }
	    $seen{ $lemma_start->name() } = 1;
	    
	    # Try to match the lemma.
	    my $unmatch = 0;
	    print STDERR "Matching " . cmp_str( $lemma_start) . " against " .
		$lemma_words[0] . "...\n"
		if "$line.$num" eq $scrutinize;
	    if( cmp_str( $lemma_start ) eq $lemma_words[0] ) {
		# Skip it if we need a match that is not the first.
		if( --$seq < 1 ) {
		    # Now we have to compare the rest of the words here.
		    if( scalar( @lemma_words ) > 1 ) {
			my $next_node = $graph->next_word( $lemma_start );
			foreach my $w ( @lemma_words[1..$#lemma_words] ) {
			    printf STDERR "Now matching %s against %s\n", 
				    cmp_str($next_node), $w
				if "$line.$num" eq $scrutinize;
			    if( $w ne cmp_str($next_node) ) {
				$unmatch = 1;
				last;
			    } else {
				$lemma_end = $next_node;
				$next_node = $graph->next_word( $lemma_end );
			    }
			}
		    } else {
			$lemma_end = $lemma_start;
		    }
		} else {
		    $unmatch = 1;
		}
	    }
	    last unless ( $unmatch || !defined( $lemma_end ) );
	    $lemma_end = undef;
	    $lemma_start = $graph->next_word( $lemma_start );
	}
	
	unless( $lemma_end ) {
	    warn "No match found for @lemma_words at $line.$num";
	    next;
	} else {
	    # These are no longer common nodes; unmark them as such.
	    my @lemma_nodes = $graph->node_sequence( $lemma_start, 
						     $lemma_end );
	    map { $_->set_attribute( 'class', 'lemma' ) } @lemma_nodes;
	}
	
	# Now we have our lemma nodes; we add the variant nodes to the graph.
	
	# For each reading that is not rdg_0, we make a chain of nodes
	# and connect them to the anchor.  Edges are named after the mss
	# that are relevant.
	foreach my $k ( grep { /^rdg/ } keys( %$app ) ) {
	    next if $k eq 'rdg_0'; # that's the lemma.
	    my @variant = split( /\s+/, $app->{$k} );
	    @variant = () if $app->{$k} eq '/'; # This is an omission.
	    my @mss = grep { $app->{$_} eq $k } keys( %$app );
	    
	    unless( @mss ) {
		print STDERR "Skipping '@variant' at $line.$num: no mss\n";
		next;
	    }
	    
	    # Determine the label name for the edges here.
	    my $edge_name = join(', ', @mss );
	    
	    # Make the variant into a set of nodes.
	    my $ctr = 0;
	    my $last_node = $graph->prior_word( $lemma_start );
	    my $var_start;
	    foreach my $vw ( @variant ) {
		my $vwname = "$k/$line.$num.$ctr"; $ctr++;
		my $vwnode = $graph->add_node( $vwname );
		$vwnode->set_attribute( 'label', $vw );
		$vwnode->set_attribute( 'class', 'variant' );
		$graph->add_edge( $last_node, $vwnode, $edge_name );
		$var_start = $vwnode unless $var_start;
		$last_node = $vwnode;
	    }
	    # Now hook it up at the end.
	    $graph->add_edge( $last_node, $graph->next_word( $lemma_end ),
					$edge_name );
	    
	    # Now collate and collapse the identical nodes within the graph.
	    collate_variant( $graph, $lemma_start, $lemma_end, 
			     $var_start, $last_node );
	    
	}
    }

    ## Now in theory I have a graph.  I want to make it a little easier to
    ## read.  So I collapse nodes that have only one edge in and one edge
    ## out, and I do this by looking at the edges.
    
    foreach my $edge ( $graph->edges() ) {
	my @out_edges = $edge->from()->outgoing();
	my @in_edges = $edge->to()->incoming();
	
	next unless scalar( @out_edges ) == 1;
	next unless scalar( @in_edges ) == 1;
	next unless $out_edges[0] eq $in_edges[0];
	# In theory if we've got this far, we're safe, but just to
	# double-check...
	next unless $out_edges[0] eq $edge;
	
	$graph->merge_nodes( $edge->from(), $edge->to(), ' ' );
    }
}

=item B<read_base>

my @line_beginnings = read_base( 'reference.txt', $graph );

Takes a text file and a (presumed empty) graph object, adds the words
as simple linear nodes to the graph, and returns a list of nodes that
represent the beginning of lines. This graph is now the starting point
for application of apparatus entries in merge_base, e.g. from a CSV
file or a Classical Text Editor file.

=cut

sub read_base {
    my( $base_file, $graph ) = @_;
    
    # This array gives the first node for each line.  We put the
    # common starting point in line zero.
    my $last_node = $graph->start();
    my $lineref_array = [ $last_node ]; # There is no line zero.

    open( BASE, $base_file ) or die "Could not open file $base_file: $!";
    while(<BASE>) {
	# Make the nodes, and connect them up for the base, but also
	# save the first node of each line in an array for the purpose.
	chomp;
	my @words = split;
	my $started = 0;
	my $wordref = 0;
	my $lineref = scalar @$lineref_array;
	foreach my $w ( @words ) {
	    my $noderef = join( ',', $lineref, ++$wordref );
	    my $node = $graph->add_node( $noderef );
	    $node->set_attribute( 'label', $w );
	    $node->set_attribute( 'class', 'common' );
	    unless( $started ) {
		push( @$lineref_array, $node );
		$started = 1;
	    }
	    if( $last_node ) {
		$graph->add_edge( $last_node, $node, "base text" );
		$last_node = $node;
	    } # TODO there should be no else here...
	}
    }
    close BASE;
    # Ending point for all texts
    my $endpoint = $graph->add_node( '#END#' );
    $graph->add_edge( $last_node, $endpoint, "base text" );
    push( @$lineref_array, $endpoint );

    return( @$lineref_array );
}

=item B<collate_variant>

collate_variant( $graph, $lemma_start, $lemma_end, $var_start, $var_end );

Given a lemma and a variant as start- and endpoints on the graph,
walks through each to identify those nodes that are identical.  The
graph is a Text::Tradition::Graph object; the other arguments are
Graph::Easy::Node objects that appear on the graph.

TODO: Handle collapsed and non-collapsed transpositions.

=cut

sub collate_variant {
    my( $graph, $lemma_start, $lemma_end, $var_start, $var_end ) = @_;
    # If var_start is undef, then the variant is an omission and
    # there's nothing to collate. Return.
    return unless $var_start;

    # I want to look at the nodes in the variant and lemma, and
    # collapse nodes that are the same word.  This is mini-collation.
    my %collapsed = ();
    # There will only be one outgoing edge at first, so this is safe.
    my @out = $var_start->outgoing();
    my $var_label = $out[0]->label();

    my @lemma_nodes;
    while( $lemma_start ne $lemma_end ) {
	push( @lemma_nodes, $lemma_start );
	$lemma_start = $graph->next_word( $lemma_start );
    } 
    push( @lemma_nodes, $lemma_end );
    
    my @variant_nodes;
    while( $var_start ne $var_end ) {
	push( @variant_nodes, $var_start );
	$var_start = $graph->next_word( $var_start, $var_label );
    }
    push( @variant_nodes, $var_end );

    # Go through the variant nodes, and if we find a lemma node that
    # hasn't yet been collapsed with a node, equate them.

    foreach my $w ( @variant_nodes ) {
	my $word = $w->label();
	foreach my $l ( @lemma_nodes ) {
	    if( $word eq cmp_str( $l ) ) {
		next if exists( $collapsed{ $l->label } )
		    && $collapsed{ $l->label } eq $l;
		# Collapse the nodes.
		printf STDERR "Merging nodes %s/%s and %s/%s\n", 
		    $l->name, $l->label, $w->name, $w->label;
		$graph->merge_nodes( $l, $w );
		$collapsed{ $l->label } = $l;
		# Now collapse any multiple edges to and from the node.
		# Rely on the presence of the 'base text' edge.
		remove_duplicate_edges( $graph, $graph->prior_word( $l ), $l );
		remove_duplicate_edges( $graph, $l, $graph->next_word( $l ) );
	    }
	}
    }
}

=item B<remove_duplicate_edges>

remove_duplicate_edges( $graph, $from, $to );

Given two nodes, reduce the number of edges between those nodes to
one.  If neither edge represents a base text, combine their labels.

=cut

sub remove_duplicate_edges {
    my( $graph, $from, $to ) = @_;
    my @edges = $from->edges_to( $to );
    if( scalar @edges > 1 ) {
	my @base = grep { $_->label eq 'base text' } @edges;
	if ( scalar @base ) {
	    # Remove the edges that are not base.
	    foreach my $e ( @edges ) {
		$graph->del_edge( $e )
		    unless $e eq $base[0];
	    }
	} else {
	    # Combine the edges into one.
	    my $new_edge_name = join( ', ', map { $_->label() } @edges );
	    my $new_edge = shift @edges;
	    $new_edge->set_attribute( 'label', $new_edge_name );
	    foreach my $e ( @edges ) {
		$graph->del_edge( $e );
	    }
	}
    }
}

=item B<cmp_str>

Pretend you never saw this method.  Really it needs to not be hardcoded.

=cut

sub cmp_str {
    my( $node ) = @_;
    my $word = $node->label();
    $word = lc( $word );
    $word =~ s/\W//g;
    $word =~ s/v/u/g;
    $word =~ s/j/i/g;
    $word =~ s/cha/ca/g;
    $word =~ s/quatuor/quattuor/g;
    $word =~ s/ioannes/iohannes/g;
    return $word;
}

=back

=head1 LICENSE

This package is free software and is provided "as is" without express
or implied warranty.  You can redistribute it and/or modify it under
the same terms as Perl itself.

=head1 AUTHOR

Tara L Andrews, aurum@cpan.org

=cut

1;
Commit	Line	Data
e58153d6	1	package Text::Tradition::Parser::BaseText;
b49c4318	2
	3	use strict;
	4	use warnings;
	5	use Exporter 'import';
	6	use vars qw( @EXPORT_OK );
	7	@EXPORT_OK = qw( merge_base );
	8
2ceca8c3	9	=head1 NAME
	10
	11	Text::Tradition::Parser::BaseText
	12
	13	=head1 SYNOPSIS
	14
	15	use Text::Tradition::Parser::BaseText qw( merge_base );
	16	merge_base( $graph, 'reference.txt', @apparatus_entries )
	17
	18	=head1 DESCRIPTION
	19
	20	For an overview of the package, see the documentation for the
	21	Text::Tradition::Graph module.
	22
	23	This module is meant for use with certain of the other Parser classes
	24	- whenever a list of variants is given with reference to a base text,
	25	these must be joined into a single collation. The parser should
	26	therefore make a list of variants and their locations, and BaseText
	27	will join those listed variants onto the reference text.
	28
	29	=head1 SUBROUTINES
	30
	31	=over
	32
	33	=item B<merge_base>
	34
	35	merge_base( $graph, 'reference.txt', @apparatus_entries )
	36
	37	Takes three arguments: a newly-initialized Text::Tradition::Graph
	38	object, a text file containing the reference text, and a list of
	39	variants (apparatus entries). Adds the base text to the graph, and
	40	joins the variants to that.
	41
	42	The list of variants is an array of hash references; each hash takes
	43	the form
	44	{ '_id' => line reference,
	45	'rdg_0' => lemma reading,
	46	'rdg_1' => first variant,
	47	... # and so on until all distinct readings are listed
	48	'WitnessA' => 'rdg_0',
	49	'WitnessB' => 'rdg_1',
	50	... # and so on until all witnesses are listed with their readings
	51	}
	52
	53	Any hash key that is not of the form /^rdg_\d+$/ and that does not
	54	begin with an underscore is assumed to be a witness name. Any 'meta'
	55	information to be passed must be passed in a key with a leading
	56	underscore in its name.
	57
	58	=cut
	59
b49c4318	60	sub merge_base {
	61	my( $graph, $base_file, @app_entries ) = @_;
	62	my @base_line_starts = read_base( $base_file, $graph );
	63
	64	foreach my $app ( @app_entries ) {
	65	my( $line, $num ) = split( /\./, $app->{_id} );
	66	# DEBUG with a short graph
	67	# last if $line > 2;
2ceca8c3	68	# DEBUG for problematic entries
2ceca8c3	69	# my $scrutinize = "21.8";
b49c4318	70	my $first_line_node = $base_line_starts[ $line ];
	71	my $too_far = $base_line_starts[ $line+1 ];
	72
	73	my $lemma = $app->{rdg_0};
	74	my $seq = 1;
	75	# Is this the Nth occurrence of this reading in the line?
	76	if( $lemma =~ s/(_)?(\d)$// ) {
	77	$seq = $2;
	78	}
	79	my @lemma_words = split( /\s+/, $lemma );
	80
	81	# Now search for the lemma words within this line.
	82	my $lemma_start = $first_line_node;
	83	my $lemma_end;
	84	my %seen;
	85	while( $lemma_start ne $too_far ) {
	86	# Loop detection
	87	if( $seen{ $lemma_start->name() } ) {
	88	warn "Detected loop at " . $lemma_start->name() .
	89	", ref $line,$num";
	90	last;
	91	}
	92	$seen{ $lemma_start->name() } = 1;
	93
	94	# Try to match the lemma.
	95	my $unmatch = 0;
	96	print STDERR "Matching " . cmp_str( $lemma_start) . " against " .
	97	$lemma_words[0] . "...\n"
	98	if "$line.$num" eq $scrutinize;
	99	if( cmp_str( $lemma_start ) eq $lemma_words[0] ) {
	100	# Skip it if we need a match that is not the first.
	101	if( --$seq < 1 ) {
	102	# Now we have to compare the rest of the words here.
	103	if( scalar( @lemma_words ) > 1 ) {
	104	my $next_node = $graph->next_word( $lemma_start );
	105	foreach my $w ( @lemma_words[1..$#lemma_words] ) {
	106	printf STDERR "Now matching %s against %s\n",
	107	cmp_str($next_node), $w
	108	if "$line.$num" eq $scrutinize;
	109	if( $w ne cmp_str($next_node) ) {
	110	$unmatch = 1;
	111	last;
	112	} else {
	113	$lemma_end = $next_node;
	114	$next_node = $graph->next_word( $lemma_end );
	115	}
	116	}
	117	} else {
	118	$lemma_end = $lemma_start;
	119	}
	120	} else {
	121	$unmatch = 1;
	122	}
	123	}
	124	last unless ( $unmatch \|\| !defined( $lemma_end ) );
	125	$lemma_end = undef;
	126	$lemma_start = $graph->next_word( $lemma_start );
	127	}
	128
	129	unless( $lemma_end ) {
	130	warn "No match found for @lemma_words at $line.$num";
	131	next;
	132	} else {
	133	# These are no longer common nodes; unmark them as such.
134	my @lemma_nodes = $graph->node_sequence( $lemma_start,
135	$lemma_end );
136	map { $_->set_attribute( 'class', 'lemma' ) } @lemma_nodes;
137	}
138
139	# Now we have our lemma nodes; we add the variant nodes to the graph.
140
141	# For each reading that is not rdg_0, we make a chain of nodes
142	# and connect them to the anchor. Edges are named after the mss
143	# that are relevant.
144	foreach my $k ( grep { /^rdg/ } keys( %$app ) ) {
145	next if $k eq 'rdg_0'; # that's the lemma.
146	my @variant = split( /\s+/, $app->{$k} );
147	@variant = () if $app->{$k} eq '/'; # This is an omission.
148	my @mss = grep { $app->{$_} eq $k } keys( %$app );
149
150	unless( @mss ) {
151	print STDERR "Skipping '@variant' at $line.$num: no mss\n";
152	next;
153	}
154
155	# Determine the label name for the edges here.
156	my $edge_name = join(', ', @mss );
157
158	# Make the variant into a set of nodes.
159	my $ctr = 0;
160	my $last_node = $graph->prior_word( $lemma_start );
161	my $var_start;
162	foreach my $vw ( @variant ) {
163	my $vwname = "$k/$line.$num.$ctr"; $ctr++;
164	my $vwnode = $graph->add_node( $vwname );
165	$vwnode->set_attribute( 'label', $vw );
166	$vwnode->set_attribute( 'class', 'variant' );
167	$graph->add_edge( $last_node, $vwnode, $edge_name );
168	$var_start = $vwnode unless $var_start;
169	$last_node = $vwnode;
170	}
171	# Now hook it up at the end.
172	$graph->add_edge( $last_node, $graph->next_word( $lemma_end ),
173	$edge_name );
174
175	# Now collate and collapse the identical nodes within the graph.
176	collate_variant( $graph, $lemma_start, $lemma_end,
177	$var_start, $last_node );
178
179	}
180	}
181
182	## Now in theory I have a graph. I want to make it a little easier to
183	## read. So I collapse nodes that have only one edge in and one edge
184	## out, and I do this by looking at the edges.
185
186	foreach my $edge ( $graph->edges() ) {
187	my @out_edges = $edge->from()->outgoing();
188	my @in_edges = $edge->to()->incoming();
189
190	next unless scalar( @out_edges ) == 1;
191	next unless scalar( @in_edges ) == 1;
192	next unless $out_edges[0] eq $in_edges[0];
193	# In theory if we've got this far, we're safe, but just to
194	# double-check...
195	next unless $out_edges[0] eq $edge;
196
197	$graph->merge_nodes( $edge->from(), $edge->to(), ' ' );
198	}
199	}
200
2ceca8c3	201	=item B<read_base>
	202
	203	my @line_beginnings = read_base( 'reference.txt', $graph );
	204
	205	Takes a text file and a (presumed empty) graph object, adds the words
	206	as simple linear nodes to the graph, and returns a list of nodes that
	207	represent the beginning of lines. This graph is now the starting point
	208	for application of apparatus entries in merge_base, e.g. from a CSV
	209	file or a Classical Text Editor file.
	210
	211	=cut
b49c4318	212
	213	sub read_base {
	214	my( $base_file, $graph ) = @_;
	215
	216	# This array gives the first node for each line. We put the
	217	# common starting point in line zero.
	218	my $last_node = $graph->start();
	219	my $lineref_array = [ $last_node ]; # There is no line zero.
	220
	221	open( BASE, $base_file ) or die "Could not open file $base_file: $!";
	222	while(<BASE>) {
	223	# Make the nodes, and connect them up for the base, but also
	224	# save the first node of each line in an array for the purpose.
	225	chomp;
	226	my @words = split;
	227	my $started = 0;
	228	my $wordref = 0;
	229	my $lineref = scalar @$lineref_array;
	230	foreach my $w ( @words ) {
	231	my $noderef = join( ',', $lineref, ++$wordref );
	232	my $node = $graph->add_node( $noderef );
	233	$node->set_attribute( 'label', $w );
	234	$node->set_attribute( 'class', 'common' );
	235	unless( $started ) {
	236	push( @$lineref_array, $node );
	237	$started = 1;
	238	}
	239	if( $last_node ) {
	240	$graph->add_edge( $last_node, $node, "base text" );
	241	$last_node = $node;
	242	} # TODO there should be no else here...
	243	}
	244	}
	245	close BASE;
	246	# Ending point for all texts
	247	my $endpoint = $graph->add_node( '#END#' );
	248	$graph->add_edge( $last_node, $endpoint, "base text" );
	249	push( @$lineref_array, $endpoint );
	250
	251	return( @$lineref_array );
	252	}
	253
2ceca8c3	254	=item B<collate_variant>
	255
	256	collate_variant( $graph, $lemma_start, $lemma_end, $var_start, $var_end );
	257
	258	Given a lemma and a variant as start- and endpoints on the graph,
	259	walks through each to identify those nodes that are identical. The
	260	graph is a Text::Tradition::Graph object; the other arguments are
	261	Graph::Easy::Node objects that appear on the graph.
b49c4318	262
2ceca8c3	263	TODO: Handle collapsed and non-collapsed transpositions.
	264
	265	=cut
b49c4318	266
	267	sub collate_variant {
	268	my( $graph, $lemma_start, $lemma_end, $var_start, $var_end ) = @_;
	269	# If var_start is undef, then the variant is an omission and
	270	# there's nothing to collate. Return.
	271	return unless $var_start;
	272
	273	# I want to look at the nodes in the variant and lemma, and
	274	# collapse nodes that are the same word. This is mini-collation.
	275	my %collapsed = ();
	276	# There will only be one outgoing edge at first, so this is safe.
	277	my @out = $var_start->outgoing();
	278	my $var_label = $out[0]->label();
	279
	280	my @lemma_nodes;
	281	while( $lemma_start ne $lemma_end ) {
	282	push( @lemma_nodes, $lemma_start );
	283	$lemma_start = $graph->next_word( $lemma_start );
	284	}
	285	push( @lemma_nodes, $lemma_end );
	286
	287	my @variant_nodes;
	288	while( $var_start ne $var_end ) {
	289	push( @variant_nodes, $var_start );
	290	$var_start = $graph->next_word( $var_start, $var_label );
	291	}
	292	push( @variant_nodes, $var_end );
	293
	294	# Go through the variant nodes, and if we find a lemma node that
	295	# hasn't yet been collapsed with a node, equate them.
	296
	297	foreach my $w ( @variant_nodes ) {
	298	my $word = $w->label();
	299	foreach my $l ( @lemma_nodes ) {
	300	if( $word eq cmp_str( $l ) ) {
	301	next if exists( $collapsed{ $l->label } )
	302	&& $collapsed{ $l->label } eq $l;
	303	# Collapse the nodes.
	304	printf STDERR "Merging nodes %s/%s and %s/%s\n",
	305	$l->name, $l->label, $w->name, $w->label;
	306	$graph->merge_nodes( $l, $w );
	307	$collapsed{ $l->label } = $l;
	308	# Now collapse any multiple edges to and from the node.
	309	# Rely on the presence of the 'base text' edge.
	310	remove_duplicate_edges( $graph, $graph->prior_word( $l ), $l );
	311	remove_duplicate_edges( $graph, $l, $graph->next_word( $l ) );
	312	}
	313	}
	314	}
	315	}
	316
2ceca8c3	317	=item B<remove_duplicate_edges>
	318
	319	remove_duplicate_edges( $graph, $from, $to );
	320
	321	Given two nodes, reduce the number of edges between those nodes to
	322	one. If neither edge represents a base text, combine their labels.
	323
	324	=cut
	325
b49c4318	326	sub remove_duplicate_edges {
	327	my( $graph, $from, $to ) = @_;
	328	my @edges = $from->edges_to( $to );
	329	if( scalar @edges > 1 ) {
	330	my @base = grep { $_->label eq 'base text' } @edges;
	331	if ( scalar @base ) {
	332	# Remove the edges that are not base.
	333	foreach my $e ( @edges ) {
	334	$graph->del_edge( $e )
	335	unless $e eq $base[0];
	336	}
	337	} else {
	338	# Combine the edges into one.
	339	my $new_edge_name = join( ', ', map { $_->label() } @edges );
	340	my $new_edge = shift @edges;
	341	$new_edge->set_attribute( 'label', $new_edge_name );
	342	foreach my $e ( @edges ) {
	343	$graph->del_edge( $e );
	344	}
	345	}
	346	}
	347	}
	348
2ceca8c3	349	=item B<cmp_str>
	350
	351	Pretend you never saw this method. Really it needs to not be hardcoded.
	352
	353	=cut
	354
b49c4318	355	sub cmp_str {
	356	my( $node ) = @_;
	357	my $word = $node->label();
	358	$word = lc( $word );
	359	$word =~ s/\W//g;
	360	$word =~ s/v/u/g;
	361	$word =~ s/j/i/g;
	362	$word =~ s/cha/ca/g;
	363	$word =~ s/quatuor/quattuor/g;
	364	$word =~ s/ioannes/iohannes/g;
	365	return $word;
	366	}
	367
2ceca8c3	368	=back
	369
	370	=head1 LICENSE
	371
	372	This package is free software and is provided "as is" without express
	373	or implied warranty. You can redistribute it and/or modify it under
	374	the same terms as Perl itself.
	375
	376	=head1 AUTHOR
	377
	378	Tara L Andrews, aurum@cpan.org
	379
	380	=cut
	381
b49c4318	382	1;