use strict;
use warnings;
use Module::Load;
-use Text::Tradition::Parser::Util qw( collate_variants cmp_str check_for_repeated add_hash_entry );
+use TryCatch;
+use Text::Tradition::Parser::Util qw( collate_variants cmp_str
+ check_for_repeated add_hash_entry );
=head1 NAME
=head1 DESCRIPTION
For an overview of the package, see the documentation for the
-Text::Tradition::Graph module.
+Text::Tradition module.
This module is meant for use with certain of the other Parser classes
- whenever a list of variants is given with reference to a base text,
=item B<parse>
-parse( $graph, %opts );
+parse( $graph, $opts );
-Takes an initialized graph and a set of options, which must include:
+Takes an initialized graph and a hashref of options, which must include:
- 'base' - the base text referenced by the variants
- 'format' - the format of the variant list
- 'data' - the variants, in the given format.
my $format_mod = 'Text::Tradition::Parser::' . $opts->{'input'};
load( $format_mod );
- my @apparatus_entries = $format_mod->can('read')->( $opts->{'file'} );
- merge_base( $tradition->collation, $opts->{'base'}, @apparatus_entries );
+ # TODO Handle a string someday if we ever have a format other than KUL
+ my @apparatus_entries = $format_mod->can('read')->( $opts );
+ merge_base( $tradition->collation, $opts, @apparatus_entries );
}
=item B<merge_base>
# edits_required -> wit -> [ { start_idx, end_idx, items } ]
sub merge_base {
- my( $collation, $base_file, @app_entries ) = @_;
- my @base_line_starts = read_base( $base_file, $collation );
+ my( $collation, $opts, @app_entries ) = @_;
+ my @base_line_starts = read_base( $opts->{'base'}, $collation );
my %all_witnesses;
- my @unwitnessed_lemma_nodes;
foreach my $app ( @app_entries ) {
my( $line, $num ) = split( /\./, $app->{_id} );
# DEBUG with a short graph
my %seen;
while( $lemma_start ne $too_far ) {
# Loop detection
- if( $seen{ $lemma_start->name() } ) {
- warn "Detected loop at " . $lemma_start->name() .
+ if( $seen{ $lemma_start->id() } ) {
+ warn "Detected loop at " . $lemma_start->id() .
", ref $line,$num";
last;
}
- $seen{ $lemma_start->name() } = 1;
+ $seen{ $lemma_start->id() } = 1;
# Try to match the lemma.
my $unmatch = 0;
my @lemma_set = $collation->reading_sequence( $lemma_start,
$lemma_end );
my @reading_sets = [ @lemma_set ];
-
+
# For each reading that is not rdg_0, we create the variant
# reading nodes, and store the range as an edit operation on
# the base text.
foreach my $k ( grep { /^rdg/ } keys( %$app ) ) {
my @mss = grep { $app->{$_} eq $k } keys( %$app );
- # Keep track of lemma nodes that don't actually appear in
- # any MSS; we will want to remove them from the collation.
- push( @unwitnessed_lemma_nodes, @lemma_set )
- if !@mss && $k eq 'rdg_0';
-
# Keep track of what witnesses we have seen.
@all_witnesses{ @mss } = ( 1 ) x scalar( @mss );
# Keep track of which witnesses bear corrected readings here.
my $ctr = 0;
foreach my $vw ( @variant ) {
my $vwname = "$k/$line.$num.$ctr"; $ctr++;
- my $vwreading = $collation->add_reading( $vwname );
- $vwreading->text( $vw );
+ my $vwreading = $collation->add_reading( {
+ 'id' => $vwname,
+ 'text' => $vw } );
push( @variant_readings, $vwreading );
}
foreach my $rkey ( keys %$variant_objects ) {
# Object is argument list for splice, so:
# offset, length, replacements
- my $edit_object = [ $lemma_start->name,
+ my $edit_object = [ $lemma_start->id,
scalar( @lemma_set ),
$variant_objects->{$rkey}->{reading} ];
foreach my $ms ( @{$variant_objects->{$rkey}->{mss}} ) {
# Now make the witness objects, and create their text sequences
foreach my $w ( grep { $_ !~ /_post$/ } keys %$edits_required ) {
print STDERR "Creating witness $w\n";
- my $witness_obj = $collation->tradition->add_witness( sigil => $w );
+ my $witness_obj = $collation->tradition->add_witness(
+ sigil => $w, sourcetype => 'collation' );
my $debug; # = $w eq 'Vb11';
my @ante_corr_seq = apply_edits( $collation, $edits_required->{$w}, $debug );
my @post_corr_seq = apply_edits( $collation, $edits_required->{$w."_post"}, $debug )
# ones we have created so far. Also remove any unwitnessed
# lemma nodes (TODO unless we are treating base as witness)
foreach ( $collation->paths() ) {
- $collation->del_path( $_ );
- }
- foreach( @unwitnessed_lemma_nodes ) {
- $collation->del_reading( $_ );
- # TODO do we need to delete any relationship paths here?
+ $collation->del_path( $_, $collation->baselabel );
}
### HACKY HACKY Do some one-off path corrections here.
- if( $collation->linear ) {
- my $c = $collation;
- my $end = $SHORTEND ? $SHORTEND : 155;
- # Vb11
- my $path;
- if( $end > 16 ) {
- $c->merge_readings( $c->reading('rdg_1/16.3.0'), $c->reading('rdg_1/16.2.1') );
- $path = $c->tradition->witness('Vb11')->path;
- splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( 'rdg_1/16.2.2' ) );
- $path = $c->tradition->witness('Vb11')->uncorrected_path;
- splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( 'rdg_1/16.2.2' ) );
- }
- # What else?
- # Vb26:
- $path = $c->tradition->witness('Vb26')->path;
- splice( @$path, 618, 0, $c->reading('rdg_1/46.1.1') ) if $end > 46;
- # Vb13:
- $path = $c->tradition->witness('Vb13')->path;
- splice( @$path, 782, 0, $c->reading( '58,5' ) ) if $end > 58;
- $path = $c->tradition->witness('Vb13')->uncorrected_path;
- splice( @$path, 758, 0, $c->reading( '58,5' ) ) if $end > 58;
- # Vb20 a.c.:
- $path = $c->tradition->witness('Vb20')->uncorrected_path;
- splice( @$path, 1251, 1, $c->reading( '94,4' ) ) if $end > 94;
- # Vb5:
- $path = $c->tradition->witness('Vb5')->path;
- splice( @$path, 1436, 0, $c->reading('rdg_1/106.5.1') ) if $end > 106;
- # extraneous:
- $c->del_reading( 'rdg_2/147.6.13' );
- $c->del_reading( 'rdg_2/147.6.14' );
- $c->del_reading( 'rdg_2/147.6.15' );
-
- } else {
- my $c = $collation;
- my $end = $SHORTEND ? $SHORTEND : 155;
- # Vb5:
- my $path = $c->tradition->witness('Vb5')->path;
- splice( @$path, 1436, 0, $c->reading('106,14') ) if $end > 106;
- # Vb11:
- $path = $c->tradition->witness('Vb11')->path;
- if( $end > 16 ) {
- $c->merge_readings( $c->reading('rdg_1/16.3.0'), $c->reading('rdg_1/16.2.1') );
- splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( '16,1' ) );
- }
- # Vb13:
- $path = $c->tradition->witness('Vb13')->path;
- splice( @$path, 782, 0, $c->reading( '58,5' ) ) if $end > 58;
- $path = $c->tradition->witness('Vb13')->uncorrected_path;
- splice( @$path, 758, 0, $c->reading( '58,5' ) ) if $end > 58;
- # Vb20 a.c.:
- $path = $c->tradition->witness('Vb20')->uncorrected_path;
- splice( @$path, 1251, 1, $c->reading( '94,4' ) ) if $end > 94;
- # Vb26:
- $path = $c->tradition->witness('Vb26')->path;
- splice( @$path, 618, 0, $c->reading('46,2') ) if $end > 46;
- }
-
+ if( $opts->{'input'} eq 'KUL' ) {
+ require 'data/boodts/s158.HACK';
+ KUL::HACK::pre_path_hack( $collation );
+ }
+
# Now walk paths and calculate positional rank.
$collation->make_witness_paths();
+ # Now delete any orphaned readings.
+ foreach my $r ( $collation->sequence->isolated_vertices ) {
+ print STDERR "Deleting unconnected reading $r / " .
+ $collation->reading( $r )->text . "\n";
+ $collation->del_reading( $r );
+ }
+
+ KUL::HACK::post_path_hack( $collation ) if $opts->{'input'} eq 'KUL';
# Have to check relationship validity at this point, because before that
# we had no paths.
# foreach my $rel ( $collation->relationships ) {
# next unless $rel->equal_rank;
# unless( Text::Tradition::Collation::relationship_valid( $rel->from, $rel->to ) ) {
# warn sprintf( "Relationship type %s between %s and %s is invalid, deleting",
-# $rel->type, $rel->from->name, $rel->to->name );
+# $rel->type, $rel->from->id, $rel->to->id );
# }
# }
- $collation->calculate_ranks();
+ $collation->calculate_common_readings(); # will implicitly rank
}
=item B<read_base>
# This array gives the first reading for each line. We put the
# common starting point in line zero.
- my $last_reading = $collation->start();
- $base_text_index{$last_reading->name} = 0;
+ my $last_reading = $collation->start;
+ $base_text_index{$last_reading->id} = 0;
my $lineref_array = [ $last_reading ]; # There is no line zero.
open( BASE, $base_file ) or die "Could not open file $base_file: $!";
last if $SHORTEND && $lineref > $SHORTEND;
foreach my $w ( @words ) {
my $readingref = join( ',', $lineref, ++$wordref );
- my $reading = $collation->add_reading( $readingref );
- $reading->text( $w );
+ my $reading = $collation->add_reading( { id => $readingref, text => $w } );
unless( $started ) {
push( @$lineref_array, $reading );
$started = 1;
# Ending point for all texts
$collation->add_path( $last_reading, $collation->end, $collation->baselabel );
push( @$lineref_array, $collation->end );
- $base_text_index{$collation->end->name} = $i;
+ $base_text_index{$collation->end->id} = $i;
return( @$lineref_array );
}
$labels{cmp_str( $r )} = $r;
}
foreach my $r( @$var ) {
- if( exists $labels{$r->label} &&
- $r->name ne $labels{$r->label}->name ) {
+ if( exists $labels{$r->text} &&
+ $r->id ne $labels{$r->text}->id ) {
if( $type eq 'repetition' ) {
# Repetition
- $collation->add_relationship( $r, $labels{$r->label}, \%rel_options );
+ try {
+ $collation->add_relationship( $r, $labels{$r->text}, \%rel_options );
+ } catch( Text::Tradition::Error $e ) {
+ warn "Could not set repetition relationship $r -> "
+ . $labels{$r->text} . ": " . $e->message;
+ }
} else {
# Transposition
- $r->set_identical( $labels{$r->label} );
+ try {
+ $r->set_identical( $labels{$r->text} );
+ } catch( Text::Tradition::Error $e ) {
+ warn "Could not set transposition relationship $r -> "
+ . $labels{$r->text} . ": " . $e->message;
+ }
}
}
}
$rel_options{'equal_rank'} = 1;
if( @$lemma == @$var ) {
foreach my $i ( 0 .. $#{$lemma} ) {
- $collation->add_relationship( $var->[$i], $lemma->[$i],
- \%rel_options );
+ try {
+ $collation->add_relationship( $var->[$i], $lemma->[$i],
+ \%rel_options );
+ } catch( Text::Tradition::Error $e ) {
+ warn "Could not set $type relationship " . $var->[$i] . " -> "
+ . $lemma->[$i] . ": " . $e->message;
+ }
}
} else {
# An uneven many-to-many mapping. Skip for now.
# my $lemseg = @$lemma > 1 ? $collation->add_segment( @$lemma ) : $lemma->[0];
# my $varseg = @$var > 1 ? $collation->add_segment( @$var ) : $var->[0];
# $collation->add_relationship( $varseg, $lemseg, \%rel_options );
- if( @$lemma == 1 && @$var == 1 ) {
- $collation->add_relationship( $lemma->[0], $var->[0], \%rel_options );
- }
+ # if( @$lemma == 1 && @$var == 1 ) {
+ # $collation->add_relationship( $lemma->[0], $var->[0], \%rel_options );
+ # }
}
} elsif( $type !~ /^(add|om|lex)$/i ) {
warn "Unrecognized type $type";
sub apply_edits {
my( $collation, $edit_sequence, $debug ) = @_;
- my @lemma_text = $collation->reading_sequence( $collation->start,
- $collation->reading( '#END#' ) );
+ my @lemma_text = $collation->reading_sequence(
+ $collation->start, $collation->end );
my $drift = 0;
foreach my $correction ( @$edit_sequence ) {
my( $lemma_start, $length, $items ) = @$correction;
my $offset = $base_text_index{$lemma_start};
my $realoffset = $offset + $drift;
if( $debug ||
- $lemma_text[$realoffset]->name ne $lemma_start ) {
+ $lemma_text[$realoffset]->id ne $lemma_start ) {
my @this_phrase = @lemma_text[$realoffset..$realoffset+$length-1];
my @base_phrase;
my $i = $realoffset;
print STDERR sprintf( "Trying to replace %s (%s) starting at %d " .
"with %s (%s) with drift %d\n",
- join( ' ', map {$_->label} @base_phrase ),
- join( ' ', map {$_->name} @base_phrase ),
+ join( ' ', map {$_->text} @base_phrase ),
+ join( ' ', map {$_->id} @base_phrase ),
$realoffset,
- join( ' ', map {$_->label} @$items ),
- join( ' ', map {$_->name} @$items ),
+ join( ' ', map {$_->text} @$items ),
+ join( ' ', map {$_->id} @$items ),
$drift,
) if $debug;
- if( $lemma_text[$realoffset]->name ne $lemma_start ) {
+ if( $lemma_text[$realoffset]->id ne $lemma_start ) {
warn( sprintf( "Should be replacing %s (%s) with %s (%s) " .
"but %s (%s) is there instead",
- join( ' ', map {$_->label} @base_phrase ),
- join( ' ', map {$_->name} @base_phrase ),
- join( ' ', map {$_->label} @$items ),
- join( ' ', map {$_->name} @$items ),
- join( ' ', map {$_->label} @this_phrase ),
- join( ' ', map {$_->name} @this_phrase ),
+ join( ' ', map {$_->text} @base_phrase ),
+ join( ' ', map {$_->id} @base_phrase ),
+ join( ' ', map {$_->text} @$items ),
+ join( ' ', map {$_->id} @$items ),
+ join( ' ', map {$_->text} @this_phrase ),
+ join( ' ', map {$_->id} @this_phrase ),
) );
# next;
}