lib/Text/Tradition/Parser/BaseText.pm

   1 package Text::Tradition::Parser::BaseText;
   2
   3 use strict;
   4 use warnings;
   5 use Module::Load;
   6 use Algorithm::Diff;
   7
   8 =head1 NAME
   9
  10 Text::Tradition::Parser::BaseText
  11
  12 =head1 SYNOPSIS
  13
  14 use Text::Tradition::Parser::BaseText qw( merge_base );
  15 merge_base( $graph, 'reference.txt', @apparatus_entries )
  16
  17 =head1 DESCRIPTION
  18
  19 For an overview of the package, see the documentation for the
  20 Text::Tradition::Graph module.
  21
  22 This module is meant for use with certain of the other Parser classes
  23 - whenever a list of variants is given with reference to a base text,
  24 these must be joined into a single collation.  The parser should
  25 therefore make a list of variants and their locations, and BaseText
  26 will join those listed variants onto the reference text.
  27
  28 =head1 SUBROUTINES
  29
  30 =over
  31
  32 =item B<parse>
  33
  34 parse( $graph, %opts );
  35
  36 Takes an initialized graph and a set of options, which must include:
  37 - 'base' - the base text referenced by the variants
  38 - 'format' - the format of the variant list
  39 - 'data' - the variants, in the given format.
  40
  41 =cut
  42
  43 sub parse {
  44     my( $tradition, %opts ) = @_;
  45
  46     my $format_mod = 'Text::Tradition::Parser::' . $opts{'format'};
  47     load( $format_mod );
  48     my @apparatus_entries = $format_mod->can('read')->( $opts{'data'} );
  49     merge_base( $tradition->collation, $opts{'base'}, @apparatus_entries );
  50 }
  51
  52 =item B<merge_base>
  53
  54 merge_base( $graph, 'reference.txt', @apparatus_entries )
  55
  56 Takes three arguments: a newly-initialized Text::Tradition::Graph
  57 object, a text file containing the reference text, and a list of
  58 variants (apparatus entries).  Adds the base text to the graph, and
  59 joins the variants to that.
  60
  61 The list of variants is an array of hash references; each hash takes
  62 the form
  63  { '_id' => line reference,
  64    'rdg_0' => lemma reading,
  65    'rdg_1' => first variant,
  66    ...  # and so on until all distinct readings are listed
  67    'WitnessA' => 'rdg_0',
  68    'WitnessB' => 'rdg_1',
  69    ...  # and so on until all witnesses are listed with their readings
  70  }
  71
  72 Any hash key that is not of the form /^rdg_\d+$/ and that does not
  73 begin with an underscore is assumed to be a witness name.  Any 'meta'
  74 information to be passed must be passed in a key with a leading
  75 underscore in its name.
  76
  77 =cut
  78
  79 my $SHORTEND = 20; # Debug var - set this to limit the number of lines parsed
  80
  81 my %base_text_index;
  82 my $edits_required = {};
  83
  84 # edits_required -> wit -> [ { start_idx, end_idx, items } ]
  85
  86 sub merge_base {
  87     my( $collation, $base_file, @app_entries ) = @_;
  88     my @base_line_starts = read_base( $base_file, $collation );
  89
  90     my %all_witnesses;
  91     my @unwitnessed_lemma_nodes;
  92     foreach my $app ( @app_entries ) {
  93         my( $line, $num ) = split( /\./, $app->{_id} );
  94         # DEBUG with a short graph
  95         last if $SHORTEND && $line > $SHORTEND;
  96         # DEBUG for problematic entries
  97         my $scrutinize = '';
  98         my $first_line_reading = $base_line_starts[ $line ];
  99         my $too_far = $base_line_starts[ $line+1 ];
 100
 101         my $lemma = $app->{rdg_0};
 102         my $seq = 1;
 103         # Is this the Nth occurrence of this reading in the line?
 104         if( $lemma =~ s/(_)?(\d)$// ) {
 105             $seq = $2;
 106         }
 107         my @lemma_words = split( /\s+/, $lemma );
 108
 109         # Now search for the lemma words within this line.
 110         my $lemma_start = $first_line_reading;
 111         my $lemma_end;
 112         my %seen;
 113         while( $lemma_start ne $too_far ) {
 114             # Loop detection
 115             if( $seen{ $lemma_start->name() } ) {
 116                 warn "Detected loop at " . $lemma_start->name() .
 117                     ", ref $line,$num";
 118                 last;
 119             }
 120             $seen{ $lemma_start->name() } = 1;
 121
 122             # Try to match the lemma.
 123             my $unmatch = 0;
 124             print STDERR "Matching " . cmp_str( $lemma_start) . " against " .
 125                 $lemma_words[0] . "...\n"
 126                 if "$line.$num" eq $scrutinize;
 127             if( cmp_str( $lemma_start ) eq $lemma_words[0] ) {
 128                 # Skip it if we need a match that is not the first.
 129                 if( --$seq < 1 ) {
 130                     # Now we have to compare the rest of the words here.
 131                     if( scalar( @lemma_words ) > 1 ) {
 132                         my $next_reading =
 133                             $collation->next_reading( $lemma_start );
 134                         foreach my $w ( @lemma_words[1..$#lemma_words] ) {
 135                             printf STDERR "Now matching %s against %s\n",
 136                                     cmp_str($next_reading), $w
 137                                 if "$line.$num" eq $scrutinize;
 138                             if( $w ne cmp_str($next_reading) ) {
 139                                 $unmatch = 1;
 140                                 last;
 141                             } else {
 142                                 $lemma_end = $next_reading;
 143                                 $next_reading =
 144                                     $collation->next_reading( $lemma_end );
 145                             }
 146                         }
 147                     } else {
 148                         $lemma_end = $lemma_start;
 149                     }
 150                 } else {
 151                     $unmatch = 1;
 152                 }
 153             }
 154             last unless ( $unmatch || !defined( $lemma_end ) );
 155             $lemma_end = undef;
 156             $lemma_start = $collation->next_reading( $lemma_start );
 157         }
 158
 159         unless( $lemma_end ) {
 160             warn "No match found for @lemma_words at $line.$num";
 161             next;
 162         }
 163
 164         # Now we have found the lemma; we will record an 'edit', in
 165         # terms of a splice operation, for each subsequent reading.
 166         # We also note which witnesses take the given edit.
 167
 168         my @lemma_set = $collation->reading_sequence( $lemma_start,
 169                                                       $lemma_end );
 170         my @reading_sets = [ @lemma_set ];
 171
 172         # For each reading that is not rdg_0, we create the variant
 173         # reading nodes, and store the range as an edit operation on
 174         # the base text.
 175         my $variant_objects;
 176         my %pc_seen; # Keep track of mss with explicit post-corr data
 177         foreach my $k ( grep { /^rdg/ } keys( %$app ) ) {
 178             my @mss = grep { $app->{$_} eq $k } keys( %$app );
 179
 180             # Keep track of lemma nodes that don't actually appear in
 181             # any MSS; we will want to remove them from the collation.
 182             push( @unwitnessed_lemma_nodes, @lemma_set )
 183                 if !@mss && $k eq 'rdg_0';
 184
 185             # Keep track of what witnesses we have seen.
 186             @all_witnesses{ @mss } = ( 1 ) x scalar( @mss );
 187             # Keep track of which witnesses bear corrected readings here.
 188             foreach my $m ( @mss ) {
 189                 my $base = _is_post_corr( $m );
 190                 next unless $base;
 191                 $pc_seen{$base} = 1;
 192             }
 193             next if $k eq 'rdg_0';
 194
 195             # Parse the variant into reading tokens.
 196             # TODO don't hardcode the reading split operation
 197             my @variant = split( /\s+/, $app->{$k} );
 198             @variant = () if $app->{$k} eq '/'; # This is an omission.
 199
 200             my @variant_readings;
 201             my $ctr = 0;
 202             foreach my $vw ( @variant ) {
 203                 my $vwname = "$k/$line.$num.$ctr"; $ctr++;
 204                 my $vwreading = $collation->add_reading( $vwname );
 205                 $vwreading->text( $vw );
 206                 push( @variant_readings, $vwreading );
 207             }
 208
 209             $variant_objects->{$k} = { 'mss' => \@mss,
 210                                        'reading' => \@variant_readings,
 211             };
 212             push( @reading_sets, \@variant_readings );
 213         }
 214
 215         # Now collate and collapse the identical readings within the
 216         # collated sets.  Modifies the reading sets that were passed.
 217         collate_variants( $collation, @reading_sets );
 218
 219         # Record any stated relationships between the nodes and the lemma.
 220         set_relationships( $collation, $app, \@lemma_set, $variant_objects );
 221
 222         # Now create the splice-edit objects that will be used
 223         # to reconstruct each witness.
 224
 225         foreach my $rkey ( keys %$variant_objects ) {
 226             # Object is argument list for splice, so:
 227             # offset, length, replacements
 228             my $edit_object = [ $lemma_start->name,
 229                                 scalar( @lemma_set ),
 230                                 $variant_objects->{$rkey}->{reading} ];
 231             foreach my $ms ( @{$variant_objects->{$rkey}->{mss}} ) {
 232                 # Is this a p.c. entry?
 233                 my $base = _is_post_corr( $ms );
 234                 if( $base ) { # this is a post-corr witness
 235                     my $pc_key = $base . "_post";
 236                     _add_hash_entry( $edits_required, $pc_key, $edit_object );
 237                 } else { # this is an ante-corr witness
 238                     my $pc_key = $ms . "_post";
 239                     _add_hash_entry( $edits_required, $ms, $edit_object );
 240                     unless( $pc_seen{$ms} ) {
 241                         # If this witness carries no correction, add this
 242                         # same object to its post-corrected state.
 243                         _add_hash_entry( $edits_required, $pc_key,
 244                                          $edit_object );
 245                     }
 246                 }
 247             }
 248         }
 249     } # Finished going through the apparatus entries
 250
 251     # Now make the witness objects, and create their text sequences
 252     foreach my $w ( grep { $_ !~ /_post$/ } keys %$edits_required ) {
 253         print STDERR "Creating witness $w\n";
 254         my $witness_obj = $collation->tradition->add_witness( sigil => $w );
 255         my $debug = undef; # $w eq 'Vb10';
 256         my ( $text_seq, $ac ) = apply_edits( $collation,
 257                                              $edits_required->{$w},
 258                                              $edits_required->{$w."_post"},
 259                                              $debug );
 260
 261         my @repeated = _check_for_repeated( @$text_seq );
 262         warn "Repeated elements @repeated in $w"
 263             if @repeated;
 264         # Now save these paths in my witness object
 265         $witness_obj->path( $text_seq );
 266         if( $ac ) {
 267             $witness_obj->uncorrected( $ac );
 268         }
 269     }
 270
 271     # Now remove our 'base text' edges, which is to say, the only
 272     # ones we have created so far.  Also remove any unwitnessed
 273     # lemma nodes (TODO unless we are treating base as witness)
 274     foreach ( $collation->paths() ) {
 275         $collation->del_path( $_ );
 276     }
 277     foreach( @unwitnessed_lemma_nodes ) {
 278         $collation->del_reading( $_ );
 279     }
 280
 281     # Now walk paths and calculate positions.
 282     my @common_readings =
 283         $collation->make_witness_paths();
 284     $collation->calculate_positions( @common_readings );
 285 }
 286
 287 sub _check_for_repeated {
 288     my @seq = @_;
 289     my %unique;
 290     my @repeated;
 291     foreach ( @seq ) {
 292         if( exists $unique{$_->name} ) {
 293             push( @repeated, $_->name );
 294         } else {
 295             $unique{$_->name} = 1;
 296         }
 297     }
 298     return @repeated;
 299 }
 300
 301 =item B<read_base>
 302
 303 my @line_beginnings = read_base( 'reference.txt', $collation );
 304
 305 Takes a text file and a (presumed empty) collation object, adds the
 306 words as simple linear readings to the collation, and returns a
 307 list of readings that represent the beginning of lines. This collation
 308 is now the starting point for application of apparatus entries in
 309 merge_base, e.g. from a CSV file or a Classical Text Editor file.
 310
 311 =cut
 312
 313 sub read_base {
 314     my( $base_file, $collation ) = @_;
 315
 316     # This array gives the first reading for each line.  We put the
 317     # common starting point in line zero.
 318     my $last_reading = $collation->start();
 319     $base_text_index{$last_reading->name} = 0;
 320     my $lineref_array = [ $last_reading ]; # There is no line zero.
 321
 322     open( BASE, $base_file ) or die "Could not open file $base_file: $!";
 323     my $i = 1;
 324     while(<BASE>) {
 325         # Make the readings, and connect them up for the base, but
 326         # also save the first reading of each line in an array for the
 327         # purpose.
 328         # TODO use configurable reading separator
 329         chomp;
 330         my @words = split;
 331         my $started = 0;
 332         my $wordref = 0;
 333         my $lineref = scalar @$lineref_array;
 334         last if $SHORTEND && $lineref > $SHORTEND;
 335         foreach my $w ( @words ) {
 336             my $readingref = join( ',', $lineref, ++$wordref );
 337             my $reading = $collation->add_reading( $readingref );
 338             $reading->text( $w );
 339             unless( $started ) {
 340                 push( @$lineref_array, $reading );
 341                 $started = 1;
 342             }
 343             # Add edge paths in the graph, for easier tracking when
 344             # we start applying corrections.  These paths will be
 345             # removed when we're done.
 346             my $path = $collation->add_path( $last_reading, $reading,
 347                                              $collation->baselabel );
 348             $last_reading = $reading;
 349
 350             # Note an array index for the reading, for later correction splices.
 351             $base_text_index{$readingref} = $i++;
 352         }
 353     }
 354     close BASE;
 355     # Ending point for all texts
 356     my $endpoint = $collation->add_reading( '#END#' );
 357     $collation->add_path( $last_reading, $endpoint, $collation->baselabel );
 358     push( @$lineref_array, $endpoint );
 359     $base_text_index{$endpoint->name} = $i;
 360
 361     return( @$lineref_array );
 362 }
 363
 364 =item B<collate_variants>
 365
 366 collate_variants( $collation, @reading_ranges )
 367
 368 Given a set of readings in the form
 369 ( lemma_start, lemma_end, rdg1_start, rdg1_end, ... )
 370 walks through each to identify those readings that are identical.  The
 371 collation is a Text::Tradition::Collation object; the elements of
 372 @readings are Text::Tradition::Collation::Reading objects that appear
 373 on the collation graph.
 374
 375 TODO: Handle collapsed and non-collapsed transpositions.
 376
 377 =cut
 378
 379 sub collate_variants {
 380     my( $collation, @reading_sets ) = @_;
 381
 382     # Two different ways to do this, depending on whether we want
 383     # transposed reading nodes to be merged into one (producing a
 384     # nonlinear, bidirectional graph) or not (producing a relatively
 385     # linear, unidirectional graph.)
 386     return $collation->linear ? collate_linearly( @_ )
 387         : collate_nonlinearly( @_ );
 388 }
 389
 390 sub collate_linearly {
 391     my( $collation, $lemma_set, @variant_sets ) = @_;
 392
 393     my @unique;
 394     push( @unique, @$lemma_set );
 395     while( @variant_sets ) {
 396         my $variant_set = shift @variant_sets;
 397         # Use diff to do this job
 398         my $diff = Algorithm::Diff->new( \@unique, $variant_set,
 399                                          {'keyGen' => \&_collation_hash} );
 400         my @new_unique;
 401         my %merged;
 402         while( $diff->Next ) {
 403             if( $diff->Same ) {
 404                 # merge the nodes
 405                 my @l = $diff->Items( 1 );
 406                 my @v = $diff->Items( 2 );
 407                 foreach my $i ( 0 .. $#l ) {
 408                     if( !$merged{$l[$i]->name} ) {
 409                         $collation->merge_readings( $l[$i], $v[$i] );
 410                         $merged{$l[$i]->name} = 1;
 411                     } else {
 412                         print STDERR "Would have double merged " . $l[$i]->name . "\n";
 413                     }
 414                 }
 415                 # splice the lemma nodes into the variant set
 416                 my( $offset ) = $diff->Get( 'min2' );
 417                 splice( @$variant_set, $offset, scalar( @l ), @l );
 418                 push( @new_unique, @l );
 419             } else {
 420                 # Keep the old unique readings
 421                 push( @new_unique, $diff->Items( 1 ) ) if $diff->Items( 1 );
 422                 # Add the new readings to the 'unique' list
 423                 push( @new_unique, $diff->Items( 2 ) ) if $diff->Items( 2 );
 424             }
 425         }
 426         @unique = @new_unique;
 427     }
 428 }
 429
 430 sub collate_nonlinearly {
 431     my( $collation, $lemma_set, @variant_sets ) = @_;
 432
 433     my @unique;
 434     push( @unique, @$lemma_set );
 435     while( @variant_sets ) {
 436         my $variant_set = shift @variant_sets;
 437         # Simply match the first reading that carries the same word, so
 438         # long as that reading has not yet been used to match another
 439         # word in this variant. That way lies loopy madness.
 440         my @distinct;
 441         my %merged;
 442         foreach my $idx ( 0 .. $#{$variant_set} ) {
 443             my $vw = $variant_set->[$idx];
 444             my @same = grep { cmp_str( $_ ) eq $vw->label } @unique;
 445             my $matched;
 446             if( @same ) {
 447                 foreach my $i ( 0 .. $#same ) {
 448                     unless( $merged{$same[$i]->name} ) {
 449                         print STDERR sprintf( "Merging %s into %s\n",
 450                                               $vw->name,
 451                                               $same[$i]->name );
 452                         $collation->merge_readings( $same[$i], $vw );
 453                         $merged{$same[$i]->name} = 1;
 454                         $matched = $i;
 455                         $variant_set->[$idx] = $same[$i];
 456                     }
 457                 }
 458             }
 459             unless( @same && defined($matched) ) {
 460                 push( @distinct, $vw );
 461             }
 462         }
 463         push( @unique, @distinct );
 464     }
 465 }
 466
 467
 468
 469 sub _collation_hash {
 470     my $node = shift;
 471     return cmp_str( $node );
 472 }
 473
 474 sub set_relationships {
 475     my( $collation, $app, $lemma, $variants ) = @_;
 476     foreach my $rkey ( keys %$variants ) {
 477         my $var = $variants->{$rkey}->{'reading'};
 478         my $typekey = sprintf( "_%s_type", $rkey );
 479         my $type = $app->{$typekey};
 480
 481         if( $type =~ /^(inv|tr|rep)$/i ) {
 482             # Transposition or repetition: look for nodes with the
 483             # same label but different IDs and mark them.
 484             $type = 'repetition' if $type =~ /^rep/i;
 485             my %labels;
 486             foreach my $r ( @$lemma ) {
 487                 $labels{cmp_str( $r )} = $r;
 488             }
 489             foreach my $r( @$var ) {
 490                 if( exists $labels{$r->label} &&
 491                     $r->name ne $labels{$r->label}->name ) {
 492                     if( $type eq 'repetition' ) {
 493                         # Repetition
 494                         $collation->add_relationship( $type, $r, $labels{$r->label} );
 495                     } else {
 496                         # Transposition
 497                         $r->set_identical( $labels{$r->label} );
 498                     }
 499                 }
 500             }
 501         } elsif( $type =~ /^(gr|sp(el)?)$/i ) {
 502             # Grammar/spelling: this can be a one-to-one or one-to-many
 503             # mapping.  We should think about merging readings if it is
 504             # one-to-many.
 505             $type = 'grammatical' if $type =~ /gr/i;
 506             $type = 'spelling' if $type =~ /sp/i;
 507             $type = 'repetition' if $type =~ /rep/i;
 508             if( @$lemma == @$var ) {
 509                 foreach my $i ( 0 .. $#{$lemma} ) {
 510                     $collation->add_relationship( $type, $var->[$i],
 511                                                   $lemma->[$i] );
 512                 }
 513             } elsif ( @$lemma > @$var && @$var == 1 ) {
 514                 # Merge the lemma readings into one
 515                 ## TODO This is a bad solution. We need a real one-to-many
 516                 ##  mapping.
 517                 my $ln1 = shift @$lemma;
 518                 foreach my $ln ( @$lemma ) {
 519                     $collation->merge_readings( $ln1, $ln, ' ' );
 520                 }
 521                 $lemma = [ $ln1 ];
 522                 $collation->add_relationship( $type, $var->[0], $lemma->[0] );
 523             } elsif ( @$lemma < @$var && @$lemma == 1 ) {
 524                 my $vn1 = shift @$var;
 525                 foreach my $vn ( @$var ) {
 526                     $collation->merge_readings( $vn1, $vn, ' ' );
 527                 }
 528                 $var = [ $vn1 ];
 529                 $collation->add_relationship( $type, $var->[0], $lemma->[0] );
 530             } else {
 531                 warn "Cannot set $type relationship on a many-to-many variant";
 532             }
 533         } elsif( $type !~ /^(lex|add|om)$/i ) {
 534             warn "Unrecognized type $type";
 535         }
 536     }
 537 }
 538
 539
 540
 541 sub apply_edits {
 542     my( $collation, $edit_sequence, $corrected_edit_sequence, $debug ) = @_;
 543
 544     # Index the ante- and post-correctione edits that we have, so that
 545     # for each spot in the text we can apply the original witness
 546     # state and then apply its corrected state, if applicable.
 547     my $all_edits = {};
 548     foreach my $c ( @$edit_sequence ) {
 549         my $lemma_index = $base_text_index{$c->[0]};
 550         $all_edits->{$lemma_index}->{'ac'} = $c;
 551         # If the text carries no corrections, pc == ac.
 552         $all_edits->{$lemma_index}->{'pc'} = $c
 553             unless $corrected_edit_sequence;
 554     }
 555     foreach my $c ( @$corrected_edit_sequence ) {
 556         my $lemma_index = $base_text_index{$c->[0]};
 557         $all_edits->{$lemma_index}->{'pc'} = $c;
 558     }
 559
 560     my @lemma_text = $collation->reading_sequence( $collation->start,
 561                                            $collation->reading( '#END#' ) );
 562     my $drift = 0;
 563     my @ac_sequence;
 564     foreach my $lemma_index ( sort keys %$all_edits ) {
 565         my $ac = $all_edits->{$lemma_index}->{'ac'};
 566         my $pc = $all_edits->{$lemma_index}->{'pc'};
 567         my $realoffset = $lemma_index + $drift;
 568         if( $ac && $pc && $ac eq $pc ) {
 569             # No correction, just apply the edit
 570             my( $lemma_start, $length, $items ) = @$pc;
 571             splice( @lemma_text, $realoffset, $length, @$items );
 572             $drift += @$items + $length;
 573         } elsif ( !$pc ) {
 574             # Lemma text is unaltered, save a.c. as an 'uncorrection'
 575             my( $lemma_start, $length, $items ) = @$ac;
 576             push( @ac_sequence, [ $realoffset, $length, $items ] );
 577         } elsif ( !$ac ) {
 578             # Apply the edit, save lemma text as an 'uncorrection'
 579             my( $lemma_start, $length, $items ) = @$pc;
 580             my @old = splice( @lemma_text, $realoffset, $length, @$items );
 581             $drift += @$items + $length;
 582             push( @ac_sequence, [ $realoffset, scalar( @$items ), \@old ] );
 583         } else {
 584             # Apply the p.c. edit, then save the a.c. edit as an
 585             # 'uncorrection' on the p.c. text
 586             my( $lemma_start, $length, $items ) = @$pc;
 587             my @old = splice( @lemma_text, $realoffset, $length, @$items );
 588             $drift += @$items + $length;
 589             push( @ac_sequence, [ $realoffset, scalar( @$items ), \@old ] );
 590         }
 591     }
 592     return( \@lemma_text, \@ac_sequence );
 593 }
 594
 595 # sub _apply_sequence_splice {
 596 #     my( $collation, $sequence, $correction
 597
 598
 599 # Helper function. Given a witness sigil, if it is a post-correctione
 600 # sigil,return the base witness.  If not, return a false value.
 601 sub _is_post_corr {
 602     my( $sigil ) = @_;
 603     if( $sigil =~ /^(.*?)(\s*\(?p\.\s*c\.\)?)$/ ) {
 604         return $1;
 605     }
 606     return undef;
 607 }
 608
 609 sub _add_hash_entry {
 610     my( $hash, $key, $entry ) = @_;
 611     if( exists $hash->{$key} ) {
 612         push( @{$hash->{$key}}, $entry );
 613     } else {
 614         $hash->{$key} = [ $entry ];
 615     }
 616 }
 617
 618
 619 =item B<cmp_str>
 620
 621 Pretend you never saw this method.  Really it needs to not be hardcoded.
 622
 623 =cut
 624
 625 sub cmp_str {
 626     my( $reading ) = @_;
 627     my $word = $reading->label();
 628     $word = lc( $word );
 629     $word =~ s/\W//g;
 630     $word =~ s/v/u/g;
 631     $word =~ s/j/i/g;
 632     $word =~ s/cha/ca/g;
 633     $word =~ s/quatuor/quattuor/g;
 634     $word =~ s/ioannes/iohannes/g;
 635     return $word;
 636 }
 637
 638 =back
 639
 640 =head1 LICENSE
 641
 642 This package is free software and is provided "as is" without express
 643 or implied warranty.  You can redistribute it and/or modify it under
 644 the same terms as Perl itself.
 645
 646 =head1 AUTHOR
 647
 648 Tara L Andrews, aurum@cpan.org
 649
 650 =cut
 651
 652 1;