my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
my $variables = { %edge_attrs, 'label' => $label };
# Account for the rank gap if necessary
- my $rankgap = $self->reading( $edge->[1] )->rank
+ if( $self->reading( $edge->[1] )->has_rank
+ && $self->reading( $edge->[0] )->has_rank
+ && $self->reading( $edge->[1] )->rank
+ - $self->reading( $edge->[0] )->rank > 1 ) {
+ $variables->{'minlen'} = $self->reading( $edge->[1] )->rank
- $self->reading( $edge->[0] )->rank;
- $variables->{'minlen'} = $rankgap if $rankgap > 1;
+ }
my $varopts = _dot_attr_string( $variables );
$dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
$edge->[0], $edge->[1], $varopts );
=cut
# TODO Think about returning some lazy-eval iterator.
+# TODO Get rid of backup; we should know from what witness is whether we need it.
sub reading_sequence {
my( $self, $start, $end, $witness, $backup ) = @_;
my $regex = $self->wit_list_separator;
my @answer = split( /\Q$regex\E/, $label );
return @answer;
-}
+}
+
+=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
+
+Returns the text of a witness (plus its backup, if we are using a layer)
+as stored in the collation. The text is returned as a string, where the
+individual readings are joined with spaces and the meta-readings (e.g.
+lacunae) are omitted. Optional specification of $start and $end allows
+the generation of a subset of the witness text.
+=cut
+
+sub path_text {
+ my( $self, $wit, $backup, $start, $end ) = @_;
+ $start = $self->start unless $start;
+ $end = $self->end unless $end;
+ my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit, $backup );
+ return join( ' ', map { $_->text } @path );
+}
=head1 INITIALIZATION METHODS
} else {
$args = { @_ };
}
-
- # Did we get a JSON token to parse into a reading? If so, massage it.
- if( exists $args->{'json'} ) {
- my $j = delete $args->{'json'};
-
- # If we have separated punctuation, restore it.
- if( exists $j->{'punctuation'} ) {
- $args->{'text'} = _restore_punct( $j->{'t'}, $j->{'punctuation'} );
- } else {
- $args->{'text'} = $j->{'t'};
- # we don't use comparison or canonical forms yet
- }
- }
-
+
# If one of our special booleans is set, we change the text and the
# ID to match.
if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
$class->$orig( $args );
};
-# Utility function for parsing JSON from nCritic
-sub _restore_punct {
- my( $word, @punct ) = @_;
- foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @punct ) {
- substr( $word, $p->{pos}, 0, $p->{char} );
- }
- return $word;
-}
-
=head2 is_meta
A meta attribute (ha ha), which should be true if any of our 'special'
load( $format_mod );
# TODO Handle a string someday if we ever have a format other than KUL
my @apparatus_entries = $format_mod->can('read')->( $opts );
- merge_base( $tradition->collation, $opts->{'base'}, @apparatus_entries );
+ merge_base( $tradition->collation, $opts, @apparatus_entries );
}
=item B<merge_base>
# edits_required -> wit -> [ { start_idx, end_idx, items } ]
sub merge_base {
- my( $collation, $base_file, @app_entries ) = @_;
- my @base_line_starts = read_base( $base_file, $collation );
+ my( $collation, $opts, @app_entries ) = @_;
+ my @base_line_starts = read_base( $opts->{'base'}, $collation );
my %all_witnesses;
foreach my $app ( @app_entries ) {
my @lemma_set = $collation->reading_sequence( $lemma_start,
$lemma_end );
my @reading_sets = [ @lemma_set ];
-
+
# For each reading that is not rdg_0, we create the variant
# reading nodes, and store the range as an edit operation on
# the base text.
}
### HACKY HACKY Do some one-off path corrections here.
- require( 'data/boodts/s158.HACK' );
- KUL::HACK::pre_path_hack( $collation );
-
+ if( $opts->{'input'} eq 'KUL' ) {
+ require 'data/boodts/s158.HACK';
+ KUL::HACK::pre_path_hack( $collation );
+ }
+
# Now walk paths and calculate positional rank.
$collation->make_witness_paths();
# Now delete any orphaned readings.
$collation->del_reading( $r );
}
- KUL::HACK::post_path_hack( $collation );
+ KUL::HACK::post_path_hack( $collation ) if $opts->{'input'} eq 'KUL';
# Have to check relationship validity at this point, because before that
# we had no paths.
# foreach my $rel ( $collation->relationships ) {
# $rel->type, $rel->from->id, $rel->to->id );
# }
# }
- $collation->calculate_ranks();
+ # $collation->calculate_ranks();
}
=item B<read_base>
is( scalar $t->witnesses, 3, "Collation has all witnesses" );
}
+my %seen_wits;
+map { $seen_wits{$_} = 0 } qw/ A B C /;
+# Check that we have the right witnesses
+foreach my $wit ( $t->witnesses ) {
+ $seen_wits{$wit->sigil} = 1;
+}
+is( scalar keys %seen_wits, 3, "No extra witnesses were made" );
+foreach my $k ( keys %seen_wits ) {
+ ok( $seen_wits{$k}, "Witness $k still exists" );
+}
+
+# Check that the witnesses have the right texts
+foreach my $wit ( $t->witnesses ) {
+ my $origtext = join( ' ', @{$wit->text} );
+ my $graphtext = $t->collation->path_text( $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil );
+}
+
=end testing
=cut
push( @witnesses, $wit );
my $aclabel = $c->ac_label;
if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) {
- $ac_wits{$1} = $wit;
+ $ac_wits{$sigil} = $1;
}
}
+
+ # Save the original witness text for consistency checking. We do this
+ # in a separate loop to make sure we have all base witnesses defined,
+ # and to make sure that our munging and comparing later doesn't affect
+ # the original text.
+ foreach my $intext ( @{$table->{'alignment'}} ) {
+ my $rs = $intext->{'witness'};
+ my $is_layer = exists $ac_wits{$rs};
+ my $wit = $tradition->witness( $is_layer ? $ac_wits{$rs} : $rs );
+ my @tokens = grep { $_ && $_->{'t'} !~ /^\#.*\#$/ } @{$intext->{'tokens'}};
+ my @words = map { _restore_punct( $_ ) } @tokens;
+ $is_layer ? $wit->layertext( \@words ) : $wit->text( \@words );
+ }
# Create the readings in each row
my $length = exists $table->{'length'}
# Fold any a.c. witnesses into their main witness objects, and
# delete the independent a.c. versions.
foreach my $a ( keys %ac_wits ) {
- my $main_wit = $tradition->witness( $a );
+ my $ac_wit = $tradition->witness( $a );
+ my $main_wit = $tradition->witness( $ac_wits{$a} );
next unless $main_wit;
- my $ac_wit = $ac_wits{$a};
$main_wit->uncorrected_path( $ac_wit->path );
$tradition->del_witness( $ac_wit );
}
my @readings;
foreach my $j ( 0 .. $#tokens ) {
if( $tokens[$j] ) {
- my $t = $tokens[$j];
+ my $word = _restore_punct( $tokens[$j] );
my $rdg;
- if( exists( $unique{$t->{'t'}} ) ) {
- $rdg = $unique{$t->{'t'}};
+ if( exists( $unique{$word} ) ) {
+ $rdg = $unique{$word};
} else {
my %args = ( 'id' => join( ',', $idx, $j+1 ),
- 'json' => $t,
+ 'text' => $word,
'collation' => $c );
- $args{'is_lacuna'} = 1 if $t->{'t'} eq '#LACUNA#';
+ $args{'is_lacuna'} = 1 if $word eq '#LACUNA#';
$rdg = Text::Tradition::Collation::Reading->new( %args );
- $unique{$t->{'t'}} = $rdg;
+ $unique{$word} = $rdg;
}
push( @readings, $rdg );
} else {
return @readings;
}
+# Utility function for parsing JSON from nCritic
+sub _restore_punct {
+ my( $token ) = @_;
+ my $word = $token->{'t'};
+ return $word unless exists $token->{'punctuation'};
+ foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @{$token->{'punctuation'}} ) {
+ substr( $word, $p->{pos}, 0, $p->{char} );
+ }
+ return $word;
+}
+
1;
=head1 LICENSE
is( scalar $t->witnesses, 13, "Collation has all witnesses" );
}
+# Check that we have the right witnesses
+my %seen_wits;
+map { $seen_wits{$_} = 0 } qw/ A B C D E F G H K P Q S T /;
+foreach my $wit ( $t->witnesses ) {
+ $seen_wits{$wit->sigil} = 1;
+}
+is( scalar keys %seen_wits, 13, "No extra witnesses were made" );
+foreach my $k ( keys %seen_wits ) {
+ ok( $seen_wits{$k}, "Witness $k still exists" );
+}
+
+# Check that the witnesses have the right texts
+foreach my $wit ( $t->witnesses ) {
+ my $origtext = join( ' ', @{$wit->text} );
+ my $graphtext = $t->collation->path_text( $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil );
+}
+
+# Check that the a.c. witnesses have the right text
+map { $seen_wits{$_} = 0 } qw/ A B C D F G H K S /;
+foreach my $k ( keys %seen_wits ) {
+ my $wit = $t->witness( $k );
+ if( $seen_wits{$k} ) {
+ ok( $wit->is_layered, "Witness $k got marked as layered" );
+ ok( $wit->has_layertext, "Witness $k has an a.c. version" );
+ my $origtext = join( ' ', @{$wit->layertext} );
+ my $acsig = $wit->sigil . $t->collation->ac_label;
+ my $graphtext = $t->collation->path_text( $acsig, $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original a.c. for witness $k" );
+ } else {
+ ok( !$wit->is_layered, "Witness $k not marked as layered" );
+ ok( !$wit->has_layertext, "Witness $k has no a.c. version" );
+ }
+}
+
=end testing
=cut
# Set up the witnesses we find in the first line
my @witnesses;
- my %ac_wits; # Track these for later removal
+ my %ac_wits; # Track layered witness -> main witness mapping
foreach my $sigil ( @{$alignment_table->[0]} ) {
my $wit = $tradition->add_witness( 'sigil' => $sigil );
$wit->path( [ $c->start ] );
push( @witnesses, $wit );
my $aclabel = $c->ac_label;
if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) {
- $ac_wits{$1} = $wit;
+ $ac_wits{$sigil} = $1;
}
}
+ # Save the original witness text sequences. Have to loop back through
+ # the witness columns after we have identified all the a.c. witnesses.
+ foreach my $idx ( 0 .. $#{$alignment_table->[0]} ) {
+ my @sequence = map { $_->[$idx] } @{$alignment_table};
+ my $sigil = shift @sequence;
+ my $is_layer = exists( $ac_wits{$sigil} );
+ my $wit = $tradition->witness( $is_layer ? $ac_wits{$sigil} : $sigil );
+ # Now get rid of gaps and meta-readings like #LACUNA#
+ my @words = grep { $_ && $_ !~ /^\#.*\#$/ } @sequence;
+ $is_layer ? $wit->layertext( \@words ) : $wit->text( \@words );
+ }
+
# Now for the next rows, make nodes as necessary, assign their ranks, and
# add them to the witness paths.
foreach my $idx ( 1 .. $#{$alignment_table} ) {
# Fold any a.c. witnesses into their main witness objects, and
# delete the independent a.c. versions.
foreach my $a ( keys %ac_wits ) {
- my $main_wit = $tradition->witness( $a );
+ my $ac_wit = $tradition->witness( $a );
+ my $main_wit = $tradition->witness( $ac_wits{$a} );
next unless $main_wit;
- my $ac_wit = $ac_wits{$a};
$main_wit->uncorrected_path( $ac_wit->path );
$tradition->del_witness( $ac_wit );
}
isa => 'ArrayRef[Str]',
predicate => 'has_text',
);
+
+has 'layertext' => (
+ is => 'rw',
+ isa => 'ArrayRef[Str]',
+ predicate => 'has_layertext',
+ );
# Source. This is where we read in the witness, if not from a
# pre-prepared collation. It is probably a filename.
--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Test::More 'no_plan';
+$| = 1;
+
+
+
+# =begin testing
+{
+use Text::Tradition;
+
+use_ok( 'Text::Tradition::Collation::RelationshipStore' );
+}
+
+
+
+
+1;
is( scalar $t->collation->paths, 32, "Collation has all paths" );
is( scalar $t->witnesses, 3, "Collation has all witnesses" );
}
+
+my %seen_wits;
+map { $seen_wits{$_} = 0 } qw/ A B C /;
+# Check that we have the right witnesses
+foreach my $wit ( $t->witnesses ) {
+ $seen_wits{$wit->sigil} = 1;
+}
+is( scalar keys %seen_wits, 3, "No extra witnesses were made" );
+foreach my $k ( keys %seen_wits ) {
+ ok( $seen_wits{$k}, "Witness $k still exists" );
+}
+
+# Check that the witnesses have the right texts
+foreach my $wit ( $t->witnesses ) {
+ my $origtext = join( ' ', @{$wit->text} );
+ my $graphtext = $t->collation->path_text( $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil );
+}
}
is( scalar $t->collation->paths, 361, "Collation has all paths" );
is( scalar $t->witnesses, 13, "Collation has all witnesses" );
}
+
+# Check that we have the right witnesses
+my %seen_wits;
+map { $seen_wits{$_} = 0 } qw/ A B C D E F G H K P Q S T /;
+foreach my $wit ( $t->witnesses ) {
+ $seen_wits{$wit->sigil} = 1;
+}
+is( scalar keys %seen_wits, 13, "No extra witnesses were made" );
+foreach my $k ( keys %seen_wits ) {
+ ok( $seen_wits{$k}, "Witness $k still exists" );
+}
+
+# Check that the witnesses have the right texts
+foreach my $wit ( $t->witnesses ) {
+ my $origtext = join( ' ', @{$wit->text} );
+ my $graphtext = $t->collation->path_text( $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil );
+}
+
+# Check that the a.c. witnesses have the right text
+map { $seen_wits{$_} = 0 } qw/ A B C D F G H K S /;
+foreach my $k ( keys %seen_wits ) {
+ my $wit = $t->witness( $k );
+ if( $seen_wits{$k} ) {
+ ok( $wit->is_layered, "Witness $k got marked as layered" );
+ ok( $wit->has_layertext, "Witness $k has an a.c. version" );
+ my $origtext = join( ' ', @{$wit->layertext} );
+ my $acsig = $wit->sigil . $t->collation->ac_label;
+ my $graphtext = $t->collation->path_text( $acsig, $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original a.c. for witness $k" );
+ } else {
+ ok( !$wit->is_layered, "Witness $k not marked as layered" );
+ ok( !$wit->has_layertext, "Witness $k has no a.c. version" );
+ }
+}
}