use Graph::Easy;
use IPC::Run qw( run binary );
-use Text::Tradition::Collation::Reading;
use Text::Tradition::Collation::Path;
+use Text::Tradition::Collation::Reading;
+use Text::Tradition::Collation::Relationship;
+use Text::Tradition::Collation::Segment;
use XML::LibXML;
use Moose;
reading => 'node',
path => 'edge',
readings => 'nodes',
+ segments => 'nodes',
paths => 'edges',
relationships => 'edges',
},
return;
}
# Make sure the proposed path does not yet exist
+ # NOTE 'reading' will currently return readings and segments
my( $source, $target, $wit ) = @_;
$source = $self->reading( $source )
unless ref( $source ) eq 'Text::Tradition::Collation::Reading';
my $orig = shift;
my $self = shift;
- my @result = grep { $_->class eq 'edge.path' } $self->$orig( @_ );
+ my @result = grep { $_->sub_class eq 'path' } $self->$orig( @_ );
return @result;
};
around relationships => sub {
my $orig = shift;
my $self = shift;
- my @result = grep { $_->class eq 'edge.relationship' } $self->$orig( @_ );
+ my @result = grep { $_->sub_class eq 'relationship' } $self->$orig( @_ );
+ return @result;
+};
+
+around readings => sub {
+ my $orig = shift;
+ my $self = shift;
+ my @result = grep { $_->sub_class ne 'segment' } $self->$orig( @_ );
+ return @result;
+};
+
+around segments => sub {
+ my $orig = shift;
+ my $self = shift;
+ my @result = grep { $_->sub_class eq 'segment' } $self->$orig( @_ );
return @result;
};
my( $self, $source, $target, $label ) = @_;
my @paths = $source->edges_to( $target );
my @relevant = grep { $_->label eq $label } @paths;
- return scalar @paths;
+ return scalar @relevant;
+}
+
+## Dealing with groups of readings, i.e. segments.
+
+sub add_segment {
+ my( $self, @items ) = @_;
+ my $segment = Text::Tradition::Collation::Segment->new( 'members' => \@items );
+ return $segment;
}
## Dealing with relationships between readings. This is a different
## sort of graph edge.
sub add_relationship {
- my( $self, $type, $source, $target, $global ) = @_;
+ my( $self, $source, $target, $options ) = @_;
# Make sure there is not another relationship between these two
- # readings already
+ # readings or segments already
$source = $self->reading( $source )
- unless ref( $source ) eq 'Text::Tradition::Collation::Reading';
+ unless ref( $source ) && $source->isa( 'Graph::Easy::Node' );
$target = $self->reading( $target )
- unless ref( $target ) eq 'Text::Tradition::Collation::Reading';
+ unless ref( $target ) && $target->isa( 'Graph::Easy::Node' );
foreach my $rel ( $source->edges_to( $target ) ) {
- if( $rel->label eq $type && $rel->class eq 'edge.relationship' ) {
+ if( $rel->label eq $options->{'type'} && $rel->class eq 'edge.relationship' ) {
return;
}
}
+ $options->{'orig_relation'} = [ $source, $target ];
- my $rel = Text::Tradition::Collation::Relationship->new(
- 'sort' => $type,
- 'global' => $global,
- 'orig_relation' => [ $source, $target ],
- );
- print STDERR sprintf( "Setting relationship %s between readings %s (%s)"
- . " and %s (%s)\n", $type,
- $source->label, $source->name,
- $target->label, $target->name );
+ my $rel = Text::Tradition::Collation::Relationship->new( %$options );
$self->graph->add_edge( $source, $target, $rel );
- if( $global ) {
+ if( $options->{'global'} ) {
# Look for all readings with the source label, and if there are
# colocated readings with the target label, join them too.
foreach my $r ( $self->readings() ) {
if( @colocated ) {
warn "Multiple readings with same label at same position!"
if @colocated > 1;
- my $dup_rel = Text::Tradition::Collation::Relationship->new(
- 'sort' => $type,
- 'global' => $global,
- 'orig_relation' => [ $source, $target ],
- );
+ my $dup_rel = Text::Tradition::Collation::Relationship->new( %$options );
$self->graph->add_edge( $r, $colocated[0], $dup_rel );
}
}
11, "white", "filled", $self->graph->get_attribute( 'node', 'shape' ) );
foreach my $reading ( $self->readings ) {
+ # Need not output nodes without separate labels
next if $reading->name eq $reading->label;
+ # TODO output readings or segments, but not both
+ next if $reading->class eq 'node.segment';
$dot .= sprintf( "\t\"%s\" [ label=\"%s\" ]\n", $reading->name, $reading->label );
}
$root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
# Add the data keys for nodes
- my @node_data = ( 'name', 'reading', 'identical', 'position' );
- # HACKY HACKY HACK Relationship data
my %node_data_keys;
my $ndi = 0;
- foreach my $datum ( @node_data ) {
+ foreach my $datum ( qw/ name reading identical position class / ) {
$node_data_keys{$datum} = 'dn'.$ndi++;
my $key = $root->addNewChild( $graphml_ns, 'key' );
$key->setAttribute( 'attr.name', $datum );
# Add the data keys for edges, i.e. witnesses
my $edi = 0;
my %edge_data_keys;
- foreach my $edge_key( qw/ witness_main witness_ante_corr relationship / ) {
+ foreach my $edge_key( qw/ witness_main witness_ante_corr relationship class / ) {
$edge_data_keys{$edge_key} = 'de'.$edi++;
my $key = $root->addNewChild( $graphml_ns, 'key' );
$key->setAttribute( 'attr.name', $edge_key );
my $node_ctr = 0;
my %node_hash;
+ # Add our readings to the graph
foreach my $n ( sort { $a->name cmp $b->name } $self->readings ) {
- my %this_node_data = ();
- foreach my $datum ( @node_data ) {
- my $key = $node_data_keys{$datum};
- if( $datum eq 'name' ) {
- $this_node_data{$key} = $n->name;
- } elsif( $datum eq 'reading' ) {
- $this_node_data{$key} = $n->label;
- } elsif( $datum eq 'identical' && $n->has_primary ) {
- $this_node_data{$key} = $n->primary->name;
- } elsif( $datum eq 'position' ) {
- $this_node_data{$key} = $n->position;
- }
- }
my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
my $node_xmlid = 'n' . $node_ctr++;
$node_hash{ $n->name } = $node_xmlid;
$node_el->setAttribute( 'id', $node_xmlid );
-
- foreach my $dk ( keys %this_node_data ) {
- my $d_el = $node_el->addNewChild( $graphml_ns, 'data' );
- $d_el->setAttribute( 'key', $dk );
- $d_el->appendText( $this_node_data{$dk} );
- }
+ _add_graphml_data( $node_el, $node_data_keys{'name'}, $n->name );
+ _add_graphml_data( $node_el, $node_data_keys{'reading'}, $n->label );
+ _add_graphml_data( $node_el, $node_data_keys{'position'}, $n->position );
+ _add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
+ _add_graphml_data( $node_el, $node_data_keys{'identical'}, $n->primary->name )
+ if $n->has_primary;
}
- # Add the path edges
+ # Add any segments we have
+ foreach my $n ( sort { $a->name cmp $b->name } $self->segments ) {
+ my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
+ my $node_xmlid = 'n' . $node_ctr++;
+ $node_hash{ $n->name } = $node_xmlid;
+ $node_el->setAttribute( 'id', $node_xmlid );
+ _add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
+ _add_graphml_data( $node_el, $node_data_keys{'name'}, $n->name );
+ }
+
+ # Add the path, relationship, and segment edges
my $edge_ctr = 0;
foreach my $e ( sort { $a->from->name cmp $b->from->name } $self->graph->edges() ) {
my( $name, $from, $to ) = ( 'e'.$edge_ctr++,
$edge_el->setAttribute( 'source', $from );
$edge_el->setAttribute( 'target', $to );
$edge_el->setAttribute( 'id', $name );
- if( $e->class() eq 'edge.path' ) {
+ # Add the edge class
+ _add_graphml_data( $edge_el, $edge_data_keys{'class'}, $e->sub_class );
+ if( $e->sub_class eq 'path' ) {
# It's a witness path, so add the witness
my $base = $e->label;
my $key = $edge_data_keys{'witness_main'};
$base = $1;
$key = $edge_data_keys{'witness_ante_corr'};
}
- my $wit_el = $edge_el->addNewChild( $graphml_ns, 'data' );
- $wit_el->setAttribute( 'key', $key );
- $wit_el->appendText( $base );
- } else {
+ _add_graphml_data( $edge_el, $key, $base );
+ } elsif( $e->sub_class eq 'relationship' ) {
# It's a relationship
- my $rel_el = $edge_el->addNewChild( $graphml_ns, 'data' );
- $rel_el->setAttribute( 'key', $edge_data_keys{'relationship'} );
- $rel_el->appendText( $e->label() );
- }
+ _add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $e->label );
+ } # else a segment, nothing to record but source, target, class
}
# Return the thing
return $graphml->toString(1);
}
-sub _make_xml_attr {
- my $str = shift;
- $str =~ s/\s/_/g;
- $str =~ s/\W//g;
- $str =~ "a$str" if $str =~ /^\d/;
- return $str;
+sub _add_graphml_data {
+ my( $el, $key, $value ) = @_;
+ my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
+ return unless defined $value;
+ $data_el->setAttribute( 'key', $key );
+ $data_el->appendText( $value );
}
sub collapse_graph_paths {
my $majority = int( scalar( @{$self->tradition->witnesses} ) / 2 ) + 1;
# But don't compress if there are only a few witnesses.
$majority = 4 if $majority < 4;
- foreach my $node( $self->readings ) {
+ foreach my $node ( $self->readings ) {
my $newlabels = {};
# We will visit each node, so we only look ahead.
foreach my $edge ( $node->outgoing() ) {
@common_readings = _find_common( \@common_readings, $wit->path );
@common_readings = _find_common( \@common_readings, $wit->uncorrected_path );
}
+ map { $_->make_common } @common_readings;
return @common_readings;
}
foreach my $idx( 0 .. $#chain-1 ) {
my $source = $chain[$idx];
my $target = $chain[$idx+1];
- $self->add_path( $source, $target, "$sig (a.c.)" )
+ $self->add_path( $source, $target, $sig.$self->ac_label )
unless $self->has_path( $source, $target, $sig );
}
}
print STDERR "Calculating positions in " . $wit->sigil . "\n";
_update_positions_from_path( $wit->path, @ordered_common );
_update_positions_from_path( $wit->uncorrected_path, @ordered_common )
- if $wit->has_uncorrected;
+ if $wit->has_ante_corr;
}
# DEBUG
subtype 'RelationshipVector',
=> as 'ArrayRef',
=> where { @$_ == 2
- && $_->[0]->isa( 'Text::Tradition::Collation::Reading' )
- && $_->[1]->isa( 'Text::Tradition::Collation::Reading' )
+ && $_->[0]->isa( 'Graph::Easy::Node' )
+ && $_->[1]->isa( 'Graph::Easy::Node' )
},
message { 'Argument should be [ SourceReading, TargetReading ]' };
default => 0,
);
+has 'non_correctable' => (
+ is => 'rw',
+ isa => 'Bool',
+ );
+
+has 'non_independent' => (
+ is => 'rw',
+ isa => 'Bool',
+ );
+
sub FOREIGNBUILDARGS {
my $class = shift;
my %args = @_;
--- /dev/null
+package Text::Tradition::Collation::Segment;
+
+use Moose;
+use MooseX::NonMoose;
+
+extends 'Graph::Easy::Node';
+
+# A segment is a special 'invisible' node that is a set of Readings.
+# We should never display these, but it is useful to have them
+# available for many-to-many relationship mappings.
+
+has 'members' => (
+ is => 'rw',
+ isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
+ required => 1,
+);
+
+sub FOREIGNBUILDARGS {
+ my $class = shift;
+ my %args = @_;
+
+ # Name the segment after its member elements.
+ my $nodename = join( ' ', map { $_->name } @{$args{'members'}} );
+ return ( 'name', $nodename );
+}
+
+sub BUILD {
+ my( $self, $args ) = @_;
+ $self->set_attribute( 'class', 'segment' );
+
+ foreach my $r ( @{$self->members} ) {
+ my $seg_edge = $r->parent->add_edge( $r, $self, 'segment' );
+ $seg_edge->set_attribute( 'class', 'segment' );
+ }
+}
+
+# For now, a segment has no position in the graph. Eventually it might
+# have the position of its first member.
+sub has_position {
+ return undef;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
+######################################################
+## copied from Graph::Easy::Parser docs
+######################################################
+# when overriding nodes, we also need ::Anon
+
+package Text::Tradition::Collation::Segment::Anon;
+use Moose;
+use MooseX::NonMoose;
+extends 'Text::Tradition::Collation::Segment';
+extends 'Graph::Easy::Node::Anon';
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+# use base qw/Text::Tradition::Collation::Segment/;
+# use base qw/Graph::Easy::Node::Anon/;
+
+######################################################
+# and :::Empty
+
+package Text::Tradition::Collation::Segment::Empty;
+use Moose;
+use MooseX::NonMoose;
+extends 'Graph::Easy::Node::Empty';
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+# use base qw/Text::Tradition::Collation::Segment/;
+
+######################################################
=cut
-my $SHORTEND = 20; # Debug var - set this to limit the number of lines parsed
+my $SHORTEND = ''; # Debug var - set this to limit the number of lines parsed
my %base_text_index;
my $edits_required = {};
foreach my $w ( grep { $_ !~ /_post$/ } keys %$edits_required ) {
print STDERR "Creating witness $w\n";
my $witness_obj = $collation->tradition->add_witness( sigil => $w );
- my $debug = undef; # $w eq 'Vb10';
- my ( $text_seq, $ac ) = apply_edits( $collation,
- $edits_required->{$w},
- $edits_required->{$w."_post"},
- $debug );
-
- my @repeated = _check_for_repeated( @$text_seq );
- warn "Repeated elements @repeated in $w"
+ my $debug; # = $w eq 'Vb11';
+ my @ante_corr_seq = apply_edits( $collation, $edits_required->{$w}, $debug );
+ my @post_corr_seq = apply_edits( $collation, $edits_required->{$w."_post"}, $debug )
+ if exists( $edits_required->{$w."_post"} );
+
+ my @repeated = _check_for_repeated( @ante_corr_seq );
+ warn "Repeated elements @repeated in $w a.c."
+ if @repeated;
+ @repeated = _check_for_repeated( @post_corr_seq );
+ warn "Repeated elements @repeated in $w p.c."
if @repeated;
+
# Now save these paths in my witness object
- $witness_obj->path( $text_seq );
- if( $ac ) {
- $witness_obj->uncorrected( $ac );
+ if( @post_corr_seq ) {
+ $witness_obj->path( \@post_corr_seq );
+ $witness_obj->uncorrected_path( \@ante_corr_seq );
+ } else {
+ $witness_obj->path( \@ante_corr_seq );
}
}
$collation->del_reading( $_ );
}
+ ### HACKY HACKY Do some one-off path corrections here.
+ if( $collation->linear ) {
+ # What?
+ } else {
+ my $c = $collation;
+ # Vb5:
+ my $path = $c->tradition->witness('Vb5')->path;
+ splice( @$path, 1436, 0, $c->reading('106,14') );
+ # Vb11:
+ $path = $c->tradition->witness('Vb11')->path;
+ $c->merge_readings( $c->reading('rdg_1/16.3.0'), $c->reading('rdg_1/16.2.1') );
+ splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( '16,1' ) );
+ # Vb12 a.c.:
+ $path = $c->tradition->witness('Vb12')->uncorrected_path;
+ splice( @$path, 1828, 1, $c->reading('rdg_2/137.5.0') );
+ # Vb13:
+ $path = $c->tradition->witness('Vb13')->path;
+ splice( @$path, 782, 0, $c->reading( '58,5' ) );
+ # Vb20 a.c.:
+ $path = $c->tradition->witness('Vb20')->uncorrected_path;
+ splice( @$path, 1251, 1, $c->reading( '94,6' ) );
+ # Vb26:
+ $path = $c->tradition->witness('Vb26')->path;
+ splice( @$path, 618, 0, $c->reading('46,2') )
+ }
+
# Now walk paths and calculate positions.
my @common_readings =
$collation->make_witness_paths();
if( @same ) {
foreach my $i ( 0 .. $#same ) {
unless( $merged{$same[$i]->name} ) {
- print STDERR sprintf( "Merging %s into %s\n",
- $vw->name,
- $same[$i]->name );
+ #print STDERR sprintf( "Merging %s into %s\n",
+ # $vw->name,
+ # $same[$i]->name );
$collation->merge_readings( $same[$i], $vw );
$merged{$same[$i]->name} = 1;
$matched = $i;
my( $collation, $app, $lemma, $variants ) = @_;
foreach my $rkey ( keys %$variants ) {
my $var = $variants->{$rkey}->{'reading'};
- my $typekey = sprintf( "_%s_type", $rkey );
- my $type = $app->{$typekey};
+ my $type = $app->{sprintf( "_%s_type", $rkey )};
+ my $noncorr = $app->{sprintf( "_%s_non_corr", $rkey )};
+ my $nonindep = $app->{sprintf( "_%s_non_indep", $rkey )};
+
+ my %rel_options = ();
+ $rel_options{'non_correctable'} = $noncorr if $noncorr && $noncorr =~ /^\d$/;
+ $rel_options{'non_indep'} = $nonindep if $nonindep && $nonindep =~ /^\d$/;
if( $type =~ /^(inv|tr|rep)$/i ) {
# Transposition or repetition: look for nodes with the
# same label but different IDs and mark them.
$type = 'repetition' if $type =~ /^rep/i;
+ $rel_options{'sort'} = $type;
my %labels;
foreach my $r ( @$lemma ) {
$labels{cmp_str( $r )} = $r;
$r->name ne $labels{$r->label}->name ) {
if( $type eq 'repetition' ) {
# Repetition
- $collation->add_relationship( $type, $r, $labels{$r->label} );
+ $collation->add_relationship( $r, $labels{$r->label}, \%rel_options );
} else {
# Transposition
$r->set_identical( $labels{$r->label} );
}
}
}
- } elsif( $type =~ /^(gr|sp(el)?)$/i ) {
- # Grammar/spelling: this can be a one-to-one or one-to-many
- # mapping. We should think about merging readings if it is
- # one-to-many.
+ } elsif( $type =~ /^(gr|lex|sp(el)?)$/i ) {
+
+ # Grammar/spelling/lexical: this can be a one-to-one or
+ # one-to-many mapping. We should think about merging
+ # readings if it is one-to-many.
+
$type = 'grammatical' if $type =~ /gr/i;
$type = 'spelling' if $type =~ /sp/i;
$type = 'repetition' if $type =~ /rep/i;
+ $type = 'lexical' if $type =~ /lex/i;
+ $rel_options{'sort'} = $type;
if( @$lemma == @$var ) {
foreach my $i ( 0 .. $#{$lemma} ) {
- $collation->add_relationship( $type, $var->[$i],
- $lemma->[$i] );
- }
- } elsif ( @$lemma > @$var && @$var == 1 ) {
- # Merge the lemma readings into one
- ## TODO This is a bad solution. We need a real one-to-many
- ## mapping.
- my $ln1 = shift @$lemma;
- foreach my $ln ( @$lemma ) {
- $collation->merge_readings( $ln1, $ln, ' ' );
- }
- $lemma = [ $ln1 ];
- $collation->add_relationship( $type, $var->[0], $lemma->[0] );
- } elsif ( @$lemma < @$var && @$lemma == 1 ) {
- my $vn1 = shift @$var;
- foreach my $vn ( @$var ) {
- $collation->merge_readings( $vn1, $vn, ' ' );
- }
- $var = [ $vn1 ];
- $collation->add_relationship( $type, $var->[0], $lemma->[0] );
+ $collation->add_relationship( $var->[$i], $lemma->[$i],
+ \%rel_options );
+ }
} else {
- warn "Cannot set $type relationship on a many-to-many variant";
+ # An uneven many-to-many mapping. Make a segment out of
+ # whatever we have.
+ my $lemseg = @$lemma > 1 ? $collation->add_segment( @$lemma ) : $lemma->[0];
+ my $varseg = @$var > 1 ? $collation->add_segment( @$var ) : $var->[0];
+ $collation->add_relationship( $varseg, $lemseg, \%rel_options );
}
- } elsif( $type !~ /^(lex|add|om)$/i ) {
+ } elsif( $type !~ /^(add|om)$/i ) {
warn "Unrecognized type $type";
}
}
sub apply_edits {
- my( $collation, $edit_sequence, $corrected_edit_sequence, $debug ) = @_;
-
- # Index the ante- and post-correctione edits that we have, so that
- # for each spot in the text we can apply the original witness
- # state and then apply its corrected state, if applicable.
- my $all_edits = {};
- foreach my $c ( @$edit_sequence ) {
- my $lemma_index = $base_text_index{$c->[0]};
- $all_edits->{$lemma_index}->{'ac'} = $c;
- # If the text carries no corrections, pc == ac.
- $all_edits->{$lemma_index}->{'pc'} = $c
- unless $corrected_edit_sequence;
- }
- foreach my $c ( @$corrected_edit_sequence ) {
- my $lemma_index = $base_text_index{$c->[0]};
- $all_edits->{$lemma_index}->{'pc'} = $c;
- }
-
+ my( $collation, $edit_sequence, $debug ) = @_;
my @lemma_text = $collation->reading_sequence( $collation->start,
$collation->reading( '#END#' ) );
my $drift = 0;
- my @ac_sequence;
- foreach my $lemma_index ( sort keys %$all_edits ) {
- my $ac = $all_edits->{$lemma_index}->{'ac'};
- my $pc = $all_edits->{$lemma_index}->{'pc'};
- my $realoffset = $lemma_index + $drift;
- if( $ac && $pc && $ac eq $pc ) {
- # No correction, just apply the edit
- my( $lemma_start, $length, $items ) = @$pc;
- splice( @lemma_text, $realoffset, $length, @$items );
- $drift += @$items + $length;
- } elsif ( !$pc ) {
- # Lemma text is unaltered, save a.c. as an 'uncorrection'
- my( $lemma_start, $length, $items ) = @$ac;
- push( @ac_sequence, [ $realoffset, $length, $items ] );
- } elsif ( !$ac ) {
- # Apply the edit, save lemma text as an 'uncorrection'
- my( $lemma_start, $length, $items ) = @$pc;
- my @old = splice( @lemma_text, $realoffset, $length, @$items );
- $drift += @$items + $length;
- push( @ac_sequence, [ $realoffset, scalar( @$items ), \@old ] );
- } else {
- # Apply the p.c. edit, then save the a.c. edit as an
- # 'uncorrection' on the p.c. text
- my( $lemma_start, $length, $items ) = @$pc;
- my @old = splice( @lemma_text, $realoffset, $length, @$items );
- $drift += @$items + $length;
- push( @ac_sequence, [ $realoffset, scalar( @$items ), \@old ] );
+ foreach my $correction ( @$edit_sequence ) {
+ my( $lemma_start, $length, $items ) = @$correction;
+ my $offset = $base_text_index{$lemma_start};
+ my $realoffset = $offset + $drift;
+ if( $debug ||
+ $lemma_text[$realoffset]->name ne $lemma_start ) {
+ my @this_phrase = @lemma_text[$realoffset..$realoffset+$length-1];
+ my @base_phrase;
+ my $i = $realoffset;
+ my $l = $collation->reading( $lemma_start );
+ while( $i < $realoffset+$length ) {
+ push( @base_phrase, $l );
+ $l = $collation->next_reading( $l );
+ $i++;
+ }
+
+ print STDERR sprintf( "Trying to replace %s (%s) starting at %d " .
+ "with %s (%s) with drift %d\n",
+ join( ' ', map {$_->label} @base_phrase ),
+ join( ' ', map {$_->name} @base_phrase ),
+ $realoffset,
+ join( ' ', map {$_->label} @$items ),
+ join( ' ', map {$_->name} @$items ),
+ $drift,
+ ) if $debug;
+
+ if( $lemma_text[$realoffset]->name ne $lemma_start ) {
+ warn( sprintf( "Should be replacing %s (%s) with %s (%s) " .
+ "but %s (%s) is there instead",
+ join( ' ', map {$_->label} @base_phrase ),
+ join( ' ', map {$_->name} @base_phrase ),
+ join( ' ', map {$_->label} @$items ),
+ join( ' ', map {$_->name} @$items ),
+ join( ' ', map {$_->label} @this_phrase ),
+ join( ' ', map {$_->name} @this_phrase ),
+ ) );
+ # next;
+ }
}
+ splice( @lemma_text, $realoffset, $length, @$items );
+ $drift += @$items - $length;
}
- return( \@lemma_text, \@ac_sequence );
+ return @lemma_text;
}
-
-# sub _apply_sequence_splice {
-# my( $collation, $sequence, $correction
-
+
# Helper function. Given a witness sigil, if it is a post-correctione
# sigil,return the base witness. If not, return a false value.
$apparatus->{'rdg_0'} = $linehash{'text'} if $linehash{'text'};
$apparatus->{'rdg_' . ++$rdg_ctr} = $linehash{'variant'};
foreach my $attr ( @fields[3..8] ) {
- $apparatus->{"_rdg_${rdg_ctr}_$attr"} = $linehash{$attr} if $linehash{$attr};
+ $apparatus->{"_rdg_${rdg_ctr}_$attr"} = $linehash{$attr} if defined $linehash{$attr};
}
foreach my $k ( @fields[10..$#fields] ) {
$apparatus->{$k} = 'rdg_0'
unless exists $apparatus->{$k};
} elsif ( $variant_rdg =~ /^1/ ) {
+ warn sprintf( "Already found variant reading %s for %s at %s!",
+ $apparatus->{$k}, $k, $apparatus->{_id} )
+ if exists $apparatus->{$k} && $apparatus->{$k} ne 'rdg_0';
$apparatus->{$k} = 'rdg_' . $rdg_ctr;
} else { # else for $, we don't list the MS
warn "Unparsed variant indicator $variant_rdg for $k in " .
use Moose;
use Moose::Util::TypeConstraints;
-subtype 'Correction',
- => as 'ArrayRef',
- => where { return 0 unless @$_ == 3;
- return 0 unless $_->[0] =~ /^\d+$/;
- return 0 unless $_->[1] =~ /^\d+$/;
- foreach my $x ( @{$_->[2]} ) {
- return $0 unless $x->isa( 'Text::Tradition::Collation::Reading' );
- }
- return 1;
- },
- => message { "Correction must be ref of [ offset, length, replacement_list ]" };
-
-
# Sigil. Required identifier for a witness.
has 'sigil' => (
is => 'ro',
predicate => 'has_path',
);
-# Uncorrection. This is an array of sets of reading nodes that show
-# where the witness was corrected.
-has 'uncorrected' => (
+has 'uncorrected_path' => (
is => 'rw',
- isa => 'ArrayRef[Correction]',
- predicate => 'has_uncorrected',
+ isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
+ predicate => 'has_ante_corr',
);
$self->$orig( @_ );
};
-sub uncorrected_path {
- my $self = shift;
- my @path;
- push( @path, @{$self->path} );
- foreach my $corr ( @{$self->uncorrected} ) {
- splice( @path, $corr->[0], $corr->[1], @{$corr->[2]} );
- }
- return \@path;
-}
-
no Moose;
__PACKAGE__->meta->make_immutable;
is( scalar @svg_edges, 32, "Correct number of edges in the graph" );
# Test for the correct common nodes
-my @expected_nodes = map { [ $_, 1 ] } qw/ #START# n1 n5 n6 n7 n12
- n16 n19 n20 n27 /;
+my @common_nodes = ( '#START#' );
+push( @common_nodes, qw/ n1 n5 n6 n7 n12 n16 n19 n20 n27 / );
+my @expected_nodes = map { [ $_, 1 ] } @common_nodes;
foreach my $idx ( qw/2 3 4 8 10 11 13 16 17 18/ ) {
splice( @expected_nodes, $idx, 0, [ "node_null", undef ] );
}
return join( ' ', @words );
}
+# Test that the common nodes are marked common
+foreach my $cn ( @common_nodes ) {
+ ok( $collation->reading( $cn )->is_common, "Node $cn is marked common" );
+}
+
# Test the manuscript paths
my $wit_a = '# when april with his showers sweet with fruit the drought of march has pierced unto the root #';
my $wit_b = '# when showers sweet with april fruit the march of drought has pierced to the root #';
$string = '# when ... ... showers sweet with ... fruit ... march of ... has pierced unto ... ... #';
is( make_text( @active_nodes ), $string, "Got the right text" );
+# Now start testing some position identifiers
+# 2. 'april with his' have no colocated
+# 3. 'april' 2 has no colocated
+# 4. 'teh' and 'the'
+# 5. 'drought' & 'march'
+# 6. 'march' & 'drought'
+# 7. 'unto' 'the' 'root'...
+# 'unto can match 'to' or 'teh'
+# 'the' can match 'teh' or 'rood'
+# 'root' can mach 'rood'
+
+foreach my $cn ( @common_nodes ) {
+ my $cnr = $collation->reading( $cn );
+ is( scalar( $collation->same_position_as( $cnr ) ), 0, "Node $cn has no colocations" );
+}
+
+my %expected_colocations = (
+ 'n2' => [], # april
+ 'n3' => [], # with
+ 'n4' => [], # his
+ 'n11' => [], # april
+ 'n8' => [ 'n13' ], # teh -> the
+ 'n13' => [ 'n8' ], # the -> teh
+ 'n14' => [ 'n15' ], # drought -> march
+ 'n18' => [ 'n17' ], # drought -> march
+ 'n17' => [ 'n18' ], # march -> drought
+ 'n15' => [ 'n14' ], # march -> drought
+ 'n21' => [ 'n9', 'n22' ], # unto -> to, teh
+ 'n22' => [ 'n9', 'n21' ], # to -> unto, teh
+ 'n9' => [ 'n21', 'n22', 'n23' ], # teh -> unto, to, the
+ 'n23' => [ 'n9', 'n25' ], # the -> teh, rood
+ 'n25' => [ 'n9', 'n26' ], # rood -> the, root
+ 'n26' => [ 'n25' ], # root -> rood
+);
+
+foreach my $n ( keys %expected_colocations ) {
+ my $nr = $collation->reading( $n );
+ my @colocated = sort( map { $_->name } $collation->same_position_as( $nr ) );
+ is_deeply( \@colocated, $expected_colocations{$n}, "Colocated nodes for $n correct" );
+}
+
done_testing();