package Text::Tradition::Collation;
+use feature 'say';
use Encode qw( decode_utf8 );
use File::Temp;
use File::Which;
sub BUILD {
my $self = shift;
$self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
- $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
- $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
+ $self->_set_start( $self->add_reading(
+ { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
+ $self->_set_end( $self->add_reading(
+ { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
}
### Reading construct/destruct functions
my( $self, $reading ) = @_;
unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
my %args = %$reading;
- if( $self->tradition->has_language && !exists $args{'language'} ) {
+ if( $args{'init'} ) {
+ # If we are initializing an empty collation, don't assume that we
+ # have set a tradition.
+ delete $args{'init'};
+ } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
$args{'language'} = $self->tradition->language;
}
$reading = Text::Tradition::Collation::Reading->new(
$self->_add_reading( $reading->id => $reading );
# Once the reading has been added, put it in both graphs.
$self->sequence->add_vertex( $reading->id );
- # All meta readings save 'start' and 'end' get disregarded for relationships.
- unless( $reading->is_nonrel ) {
- $self->relations->add_reading( $reading->id );
- }
+ $self->relations->add_reading( $reading->id );
return $reading;
};
my $self = shift;
my $arg = shift;
- unless( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
- $arg = $self->reading( $arg )
+ if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
+ $arg = $arg->id;
}
- my $argid = $arg->id;
# Remove the reading from the graphs.
$self->_graphcalc_done(0);
$self->_clear_cache; # Explicitly clear caches to GC the reading
- $self->sequence->delete_vertex( $argid );
- $self->relations->delete_reading( $argid )
- unless $arg->is_nonrel;
+ $self->sequence->delete_vertex( $arg );
+ $self->relations->delete_reading( $arg );
# Carry on.
- $self->$orig( $argid );
+ $self->$orig( $arg );
};
=begin testing
@wits{keys %$fwits} = values %$fwits;
$self->sequence->set_edge_attributes( @vector, \%wits );
}
- $self->relations->merge_readings( $kept, $deleted, $combine )
- unless $mergemeta;
+ $self->relations->merge_readings( $kept, $deleted, $combine );
# Do the deletion deed.
if( $combine ) {
+ # Combine the text of the readings
my $joinstr = $combine_char;
unless( defined $joinstr ) {
$joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
$joinstr = $self->wordsep unless defined $joinstr;
}
$kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
+ $kept_obj->normal_form(
+ join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
+ # Combine the lexemes present in the readings
+ if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
+ $kept_obj->add_lexeme( $del_obj->lexemes );
+ }
}
$self->del_reading( $deleted );
}
+=head2 compress_readings
+
+Where possible in the graph, compresses plain sequences of readings into a
+single reading. The sequences must consist of readings with no
+relationships to other readings, with only a single witness path between
+them and no other witness paths from either that would skip the other. The
+readings must also not be marked as nonsense or bad grammar.
+
+WARNING: This operation cannot be undone.
+
+=cut
+
+sub compress_readings {
+ my $self = shift;
+ # Anywhere in the graph that there is a reading that joins only to a single
+ # successor, and neither of these have any relationships, just join the two
+ # readings.
+ my %gobbled;
+ foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
+ next if $rdg->is_meta;
+ next if $gobbled{$rdg->id};
+ next if $rdg->grammar_invalid || $rdg->is_nonsense;
+ next if $rdg->related_readings();
+ my %seen;
+ while( $self->sequence->successors( $rdg ) == 1 ) {
+ my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
+ throw( "Infinite loop" ) if $seen{$next->id};
+ $seen{$next->id} = 1;
+ last if $self->sequence->predecessors( $next ) > 1;
+ last if $next->is_meta;
+ last if $next->grammar_invalid || $next->is_nonsense;
+ last if $next->related_readings();
+ say "Joining readings $rdg and $next";
+ $self->merge_readings( $rdg, $next, 1 );
+ }
+ }
+ # Make sure we haven't screwed anything up
+ foreach my $wit ( $self->tradition->witnesses ) {
+ my $pathtext = $self->path_text( $wit->sigil );
+ my $origtext = join( ' ', @{$wit->text} );
+ throw( "Text differs for witness " . $wit->sigil )
+ unless $pathtext eq $origtext;
+ if( $wit->is_layered ) {
+ $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
+ $origtext = join( ' ', @{$wit->layertext} );
+ throw( "Ante-corr text differs for witness " . $wit->sigil )
+ unless $pathtext eq $origtext;
+ }
+ }
+
+ $self->relations->rebuild_equivalence();
+ $self->calculate_ranks();
+}
# Helper function for manipulating the graph.
sub _stringify_args {
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
- my( $source, $target, $wit ) = $self->_objectify_args( @_ );
+ my( $source, $target, $wit ) = $self->_stringify_args( @_ );
$self->_graphcalc_done(0);
# Connect the readings
$dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
$edge->[0], $edge->[1], $varopts );
} elsif( $used{$edge->[0]} ) {
- $subend{$edge->[0]} = 1;
+ $subend{$edge->[0]} = $edge->[1];
} elsif( $used{$edge->[1]} ) {
- $substart{$edge->[1]} = 1;
+ $substart{$edge->[1]} = $edge->[0];
}
}
# Add substitute start and end edges if necessary
foreach my $node ( keys %substart ) {
- my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
+ my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
my $variables = { %edge_attrs, 'label' => $witstr };
+ my $nrdg = $self->reading( $node );
+ if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
+ # Substart is actually one lower than $startrank
+ $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
+ }
my $varopts = _dot_attr_string( $variables );
- $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;";
+ $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
}
foreach my $node ( keys %subend ) {
- my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
+ my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
my $variables = { %edge_attrs, 'label' => $witstr };
my $varopts = _dot_attr_string( $variables );
- $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;";
+ $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
}
# HACK part 2
if( $STRAIGHTENHACK ) {
my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
my @all_pos = ( 1 .. $self->end->rank - 1 );
foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
- # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
+ # say STDERR "Making witness row(s) for " . $wit->sigil;
my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
my @row = _make_witness_row( \@wit_path, \@all_pos );
push( @{$table->{'alignment'}},
foreach my $rdg ( @$path ) {
my $rtext = $rdg->text;
$rtext = '#LACUNA#' if $rdg->is_lacuna;
- print STDERR "rank " . $rdg->rank . "\n" if $debug;
- # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
+ say STDERR "rank " . $rdg->rank if $debug;
+ # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
$char_hash{$rdg->rank} = { 't' => $rdg };
}
my @row = map { $char_hash{$_} } @$positions;
sub make_witness_paths {
my( $self ) = @_;
foreach my $wit ( $self->tradition->witnesses ) {
- # print STDERR "Making path for " . $wit->sigil . "\n";
+ # say STDERR "Making path for " . $wit->sigil;
$self->make_witness_path( $wit );
}
}
# Transfer our rankings from the topological graph to the real one.
foreach my $r ( $self->readings ) {
- if( $r->is_nonrel ) {
- # These are not in the equivalence graph. Grab the rank of the highest
- # predecessor + 1.
- my @preds = $self->sequence->predecessors( $r );
- my $mrank = 0;
- map { my $rk = $node_ranks->{$self->equivalence( $_ )} + 1;
- $mrank = $rk > $mrank ? $rk : $mrank; }
- $self->sequence->predecessors( $r );
- throw( "All predecessors of $r unranked!" ) unless $mrank;
- $r->rank( $mrank );
- } elsif( defined $node_ranks->{$self->equivalence( $r->id )} ) {
+ if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
$r->rank( $node_ranks->{$self->equivalence( $r->id )} );
} else {
# Die. Find the last rank we calculated.
next;
}
# Combine!
- print STDERR "Combining readings at same rank: $key\n";
+ #say STDERR "Combining readings at same rank: $key";
$changed = 1;
$self->merge_readings( $unique_rank_rdg{$key}, $rdg );
# TODO see if this now makes a common point.
my @last_r2 = ( $r2 );
# my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
my %all_seen;
- # print STDERR "Finding common $dir for $r1, $r2\n";
+ # say STDERR "Finding common $dir for $r1, $r2";
while( !@candidates ) {
last unless $iter--; # Avoid looping infinitely
# Iterate separately down the graph from r1 and r2
foreach my $lc ( @last_r1 ) {
foreach my $p ( $lc->$dir ) {
if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
- # print STDERR "Path candidate $p from $lc\n";
+ # say STDERR "Path candidate $p from $lc";
push( @candidates, $p );
} elsif( !$all_seen{$p->id} ) {
$all_seen{$p->id} = 'r1';
foreach my $lc ( @last_r2 ) {
foreach my $p ( $lc->$dir ) {
if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
- # print STDERR "Path candidate $p from $lc\n";
+ # say STDERR "Path candidate $p from $lc";
push( @candidates, $p );
} elsif( !$all_seen{$p->id} ) {
$all_seen{$p->id} = 'r2';