### TODO Check these figures
if( $t ) {
- is( scalar $t->collation->readings, 312, "Collation has all readings" );
- is( scalar $t->collation->paths, 363, "Collation has all paths" );
+ is( scalar $t->collation->readings, 311, "Collation has all readings" );
+ is( scalar $t->collation->paths, 361, "Collation has all paths" );
is( scalar $t->witnesses, 13, "Collation has all witnesses" );
}
+# Check that we have the right witnesses
+my %seen_wits;
+map { $seen_wits{$_} = 0 } qw/ A B C D E F G H K P Q S T /;
+foreach my $wit ( $t->witnesses ) {
+ $seen_wits{$wit->sigil} = 1;
+}
+is( scalar keys %seen_wits, 13, "No extra witnesses were made" );
+foreach my $k ( keys %seen_wits ) {
+ ok( $seen_wits{$k}, "Witness $k still exists" );
+}
+
+# Check that the witnesses have the right texts
+foreach my $wit ( $t->witnesses ) {
+ my $origtext = join( ' ', @{$wit->text} );
+ my $graphtext = $t->collation->path_text( $wit->sigil );
+ is( $graphtext, $origtext, "Collation matches original for witness " . $wit->sigil );
+}
+
+# Check that the a.c. witnesses have the right text
+map { $seen_wits{$_} = 0 } qw/ A B C D F G H K S /;
+foreach my $k ( keys %seen_wits ) {
+ my $wit = $t->witness( $k );
+ if( $seen_wits{$k} ) {
+ ok( $wit->is_layered, "Witness $k got marked as layered" );
+ ok( $wit->has_layertext, "Witness $k has an a.c. version" );
+ my $origtext = join( ' ', @{$wit->layertext} );
+ my $acsig = $wit->sigil . $t->collation->ac_label;
+ my $graphtext = $t->collation->path_text( $acsig );
+ is( $graphtext, $origtext, "Collation matches original a.c. for witness $k" );
+ } else {
+ ok( !$wit->is_layered, "Witness $k not marked as layered" );
+ ok( !$wit->has_layertext, "Witness $k has no a.c. version" );
+ }
+}
+
=end testing
=cut
sub parse {
my( $tradition, $opts ) = @_;
my $c = $tradition->collation; # shorthand
- my $csv = Text::CSV_XS->new( {
- binary => 1, # binary for UTF-8
- sep_char => exists $opts->{'sep_char'} ? $opts->{'sep_char'} : "\t" }
- );
+ my $csv_options = { 'binary' => 1 };
+ $csv_options->{'sep_char'} = $opts->{'sep_char'} || "\t";
+ if( $csv_options->{'sep_char'} eq "\t" ) {
+ # If it is really tab separated, nothing is an escape char.
+ $csv_options->{'quote_char'} = undef;
+ $csv_options->{'escape_char'} = undef;
+ }
+ my $csv = Text::CSV_XS->new( $csv_options );
my $alignment_table;
if( exists $opts->{'string' } ) {
# Set up the witnesses we find in the first line
my @witnesses;
- my %ac_wits; # Track these for later removal
+ my %ac_wits; # Track layered witness -> main witness mapping
foreach my $sigil ( @{$alignment_table->[0]} ) {
my $wit = $tradition->add_witness( 'sigil' => $sigil );
$wit->path( [ $c->start ] );
push( @witnesses, $wit );
my $aclabel = $c->ac_label;
if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) {
- $ac_wits{$1} = $wit;
+ $ac_wits{$sigil} = $1;
}
}
+ # Save the original witness text sequences. Have to loop back through
+ # the witness columns after we have identified all the a.c. witnesses.
+ foreach my $idx ( 0 .. $#{$alignment_table->[0]} ) {
+ my @sequence = map { $_->[$idx] } @{$alignment_table};
+ my $sigil = shift @sequence;
+ my $is_layer = exists( $ac_wits{$sigil} );
+ my $wit = $tradition->witness( $is_layer ? $ac_wits{$sigil} : $sigil );
+ # Now get rid of gaps and meta-readings like #LACUNA#
+ my @words = grep { $_ && $_ !~ /^\#.*\#$/ } @sequence;
+ $is_layer ? $wit->layertext( \@words ) : $wit->text( \@words );
+ }
+
# Now for the next rows, make nodes as necessary, assign their ranks, and
# add them to the witness paths.
foreach my $idx ( 1 .. $#{$alignment_table} ) {
my $row = $alignment_table->[$idx];
- my $nodes = make_nodes( $c, $row, $idx );
+ my $nodes = _make_nodes( $c, $row, $idx );
foreach my $w ( 0 .. $#{$row} ) {
# push the appropriate node onto the appropriate witness path
my $word = $row->[$w];
# Fold any a.c. witnesses into their main witness objects, and
# delete the independent a.c. versions.
foreach my $a ( keys %ac_wits ) {
- my $main_wit = $tradition->witness( $a );
+ my $ac_wit = $tradition->witness( $a );
+ my $main_wit = $tradition->witness( $ac_wits{$a} );
next unless $main_wit;
- my $ac_wit = $ac_wits{$a};
+ $main_wit->is_layered(1);
$main_wit->uncorrected_path( $ac_wit->path );
$tradition->del_witness( $ac_wit );
}
foreach my $rdg ( grep { $_->is_lacuna } $c->readings ) {
$c->del_reading( $rdg ) unless $c->reading_witnesses( $rdg );
}
+
+ # Do a consistency check.
+ foreach my $wit ( $tradition->witnesses ) {
+ my $pathtext = $c->path_text( $wit->sigil );
+ my $origtext = join( ' ', @{$wit->text} );
+ warn "Text differs for witness " . $wit->sigil
+ unless $pathtext eq $origtext;
+ if( $wit->is_layered ) {
+ $pathtext = $c->path_text( $wit->sigil.$c->ac_label );
+ $origtext = join( ' ', @{$wit->layertext} );
+ warn "Ante-corr text differs for witness " . $wit->sigil
+ unless $pathtext eq $origtext;
+ } else {
+ warn "Text " . $wit->sigil . " has a layered text but is not marked as layered"
+ if $wit->has_layertext;
+ }
+ }
+
+ # Note that our ranks and common readings are set.
+ $c->_graphcalc_done(1);
}
-sub make_nodes {
+sub _make_nodes {
my( $collation, $row, $index ) = @_;
my %unique;
+ my $commonctr = 0; # Holds the number of unique readings + gaps, ex. lacunae.
foreach my $w ( @$row ) {
$unique{$w} = 1 if $w;
+ $commonctr +=1 unless ( $w && $w eq '#LACUNA#' );
}
my $ctr = 1;
foreach my $w ( keys %unique ) {
'rank' => $index,
'text' => $w,
};
- $rargs->{'is_lacuna'} = 1 if $w eq '#LACUNA#';
+ if( $w eq '#LACUNA#' ) {
+ $rargs->{'is_lacuna'} = 1;
+ } elsif( $commonctr == 1 ) {
+ $rargs->{'is_common'} = 1;
+ }
my $r = $collation->add_reading( $rargs );
$unique{$w} = $r;
$ctr++;
}
+ # Collate this sequence of readings via a single 'collation' relationship.
+ my @rankrdgs = values %unique;
+ my $collation_rel;
+ while( @rankrdgs ) {
+ my $r = shift @rankrdgs;
+ next if $r->is_meta;
+ foreach my $nr ( @rankrdgs ) {
+ if( $collation_rel ) {
+ $collation->add_relationship( $r, $nr, $collation_rel );
+ } else {
+ $collation->add_relationship( $r, $nr,
+ { 'type' => 'collated',
+ 'annotation' => "Parsed together for rank $index" } );
+ $collation_rel = $collation->get_relationship( $r, $nr );
+ }
+ }
+ }
+
return \%unique;
}