# Output substitute start/end readings if necessary
if( $startrank ) {
- $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
+ $dot .= "\t\"#SUBSTART#\" [ label=\"...\",id=\"__SUBSTART__\" ];\n";
}
if( $endrank ) {
- $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
+ $dot .= "\t\"#SUBEND#\" [ label=\"...\",id=\"__SUBEND__\" ];\n";
}
if( $STRAIGHTENHACK ) {
## HACK part 1
$label = "-$label" if $reading->join_prior;
$label =~ s/\"/\\\"/g;
$rattrs->{'label'} = $label;
+ $rattrs->{'id'} = $reading->id;
$rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
$dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
}
'Str' => 'string',
'Int' => 'int',
'Bool' => 'boolean',
+ 'ReadingID' => 'string',
'RelationshipType' => 'string',
'RelationshipScope' => 'string',
);
is( $c->common_predecessor( 'n24', 'n23' )->id,
'n20', "Found correct common predecessor" );
is( $c->common_successor( 'n24', 'n23' )->id,
- '#END#', "Found correct common successor" );
+ '__END__', "Found correct common successor" );
is( $c->common_predecessor( 'n19', 'n17' )->id,
'n16', "Found correct common predecessor for readings on same path" );
is( $c->common_successor( 'n21', 'n10' )->id,
- '#END#', "Found correct common successor for readings on same path" );
+ '__END__', "Found correct common successor for readings on same path" );
=end testing
package Text::Tradition::Collation::Reading;
use Moose;
+use Moose::Util::TypeConstraints;
use JSON qw/ from_json /;
use Module::Load;
use Text::Tradition::Error;
+use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx );
use YAML::XS;
use overload '""' => \&_stringify, 'fallback' => 1;
+subtype 'ReadingID',
+ as 'Str',
+ where { $_ =~ /\A$xml10_name_rx\z/ },
+ message { 'Reading ID must be a valid XML attribute string' };
+
+no Moose::Util::TypeConstraints;
+
=head1 NAME
Text::Tradition::Collation::Reading - represents a reading (usually a word)
has 'id' => (
is => 'ro',
- isa => 'Str',
+ isa => 'ReadingID',
required => 1,
);
if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
$args->{'text'} = '#LACUNA#';
} elsif( exists $args->{'is_start'} ) {
- $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
+ $args->{'id'} = '__START__'; # Change the ID to ensure we have only one
$args->{'text'} = '#START#';
$args->{'rank'} = 0;
} elsif( exists $args->{'is_end'} ) {
- $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
+ $args->{'id'} = '__END__'; # Change the ID to ensure we have only one
$args->{'text'} = '#END#';
} elsif( exists $args->{'is_ph'} ) {
$args->{'text'} = $args->{'id'};
}
+ # Backwards compatibility for non-XMLname IDs
+ my $rid = $args->{'id'};
+ $rid =~ s/\#/__/g;
+ $rid =~ s/[\/,]/./g;
+ if( $rid !~ /^$xml10_namestartchar_rx/ ) {
+ $rid = 'r'.$rid;
+ }
+ $args->{'id'} = $rid;
+
$class->$orig( $args );
};
# Test 1.1: try to equate nodes that are prevented with an intermediate collation
ok( $t1, "Parsed test fragment file" );
my $c1 = $t1->collation;
-my $trel = $c1->get_relationship( '9,2', '9,3' );
+my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
"Troublesome relationship exists" );
is( $trel->type, 'collated', "Troublesome relationship is a collation" );
# Try to make the link we want
try {
- $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
+ $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
ok( 1, "Added cross-collation relationship as expected" );
} catch( Text::Tradition::Error $e ) {
ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
# Test 1.2: attempt merge of an identical reading
try {
- $c1->merge_readings( '9,3', '11,5' );
+ $c1->merge_readings( 'r9.3', 'r11.5' );
ok( 1, "Successfully merged reading 'pontifex'" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Merge of mergeable readings failed: $e->message" );
# Test 1.3: attempt relationship with a meta reading (should fail)
try {
- $c1->add_relationship( '8,1', '9,2', { 'type' => 'collated' } );
+ $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
ok( 0, "Allowed a meta-reading to be used in a relationship" );
} catch ( Text::Tradition::Error $e ) {
is( $e->message, 'Cannot set relationship on a meta reading',
# equivalence
my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
my $c2 = $t2->collation;
-$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
-my $trel2 = $c2->get_relationship( '9,2', '9,3' );
+$c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
+my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
"Created blocking relationship" );
is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
# This time the link ought to fail
try {
- $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
+ $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
ok( 0, "Added cross-equivalent bad relationship" );
} catch ( Text::Tradition::Error $e ) {
like( $e->message, qr/witness loop/,
# Test 1: try to equate nodes that are prevented with an intermediate collation
my $c3 = $t3->collation;
try {
- $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
ok( 1, "Added straightforward transposition" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to add normal transposition: " . $e->message );
}
try {
- $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
ok( 1, "Added straightforward transposition complement" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to add normal transposition complement: " . $e->message );
# Test 3.2: try to make a transposition that could be a parallel.
try {
- $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
ok( 0, "Added bad colocated transposition" );
} catch ( Text::Tradition::Error $e ) {
like( $e->message, qr/Readings appear to be colocated/,
# Test 3.3: make the parallel, and then make the transposition again.
try {
- $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
+ $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
ok( 1, "Equated identical readings for transposition" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to equate identical readings: " . $e->message );
}
try {
- $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
ok( 1, "Added straightforward transposition complement" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to add normal transposition complement: " . $e->message );
my $group = $self->equivalence( $node );
my $nodelist = $self->eqreadings( $group );
if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
- print STDERR "Removing equivalence $group for $node\n" if $node eq '451,2';
$self->remove_eqreadings( $group );
} elsif( @$nodelist == 1 ) {
warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
" in group that should have only $node";
} else {
- print STDERR "Removing $node from equivalence $group\n" if $node eq '451,2';
- my @newlist = grep { $_ ne $node } @$nodelist;
+ my @newlist = grep { $_ ne $node } @$nodelist;
$self->set_eqreadings( $group, \@newlist );
$self->remove_equivalence( $node );
}
my( $self, $source, $target ) = @_;
my $seq = $self->equivalence( $source );
my $teq = $self->equivalence( $target );
- print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n"
- if grep { $_ eq '451,2' } @_;
$self->equivalence_graph->delete_edge( $seq, $teq );
}
my $teq = $self->equivalence( $target );
# Nothing to do if they are already equivalent...
return if $seq eq $teq;
- print STDERR "Making equivalence for $source -> $target\n"
- if grep { $_ eq '451,2' } @_;
my $sourcepool = $self->eqreadings( $seq );
# and add them to the target readings.
- print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n"
- if grep { $_ eq '451,2' } @_;
push( @{$self->eqreadings( $teq )}, @$sourcepool );
map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
# Then merge the nodes in the equivalence graph.
map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
# If these groups intersect, they are still connected; do nothing.
foreach my $el ( keys %tng ) {
- if( exists $sng{$el} ) {
- print STDERR "Equivalence break $source / $target is a noop\n"
- if grep { $_ eq '451,2' } @_;
- return;
- }
+ return if( exists $sng{$el} );
}
- print STDERR "Breaking equivalence $source / $target\n"
- if grep { $_ eq '451,2' } @_;
# If they don't intersect, then we split the nodes in the graph and in
# the hashes. First figure out which group has which name
my $oldgroup = $self->equivalence( $source ); # same as $target
$r = $c->add_reading( { id => 'n'.$counter++,
text => $item->{'content'} } );
} elsif ( $item->{'type'} eq 'anchor' ) {
- $r = $c->add_reading( { id => '#ANCHOR_' . $item->{'content'} . '#',
+ $r = $c->add_reading( { id => '__ANCHOR_' . $item->{'content'} . '__',
is_ph => 1 } );
} elsif ( $item->{'type'} eq 'app' ) {
- my $tag = '#APP_' . $counter++ . '#';
+ my $tag = '__APP_' . $counter++ . '__';
$r = $c->add_reading( { id => $tag, is_ph => 1 } );
$apps{$tag} = $item->{'content'};
}
# Get the lemma, which is all the readings between app and anchor,
# excluding other apps or anchors.
my @lemma = _return_lemma( $c, $app_id, $anchor );
- my $lemma_str = join( ' ', grep { $_ !~ /^\#/ } map { $_->text } @lemma );
+ my $lemma_str = join( ' ', grep { $_ !~ /^__/ } map { $_->text } @lemma );
# For each reading, send its text to 'interpret' along with the lemma,
# and then save the list of witnesses that these tokens belong to.
my %wit_rdgs; # Maps from witnesses to the variant text
my $ctr = 0;
my $tag = $app_id;
- $tag =~ s/^\#APP_(.*)\#$/$1/;
+ $tag =~ s/^\__APP_(.*)\__$/$1/;
foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) {
my @text;
my @rdg_nodes;
if( $interpreted eq '#LACUNA#' ) {
- push( @rdg_nodes, $c->add_reading( { id => $tag . "/" . $ctr++,
+ push( @rdg_nodes, $c->add_reading( { id => 'r'.$tag.".".$ctr++,
is_lacuna => 1 } ) );
} else {
foreach my $w ( split( /\s+/, $interpreted ) ) {
- my $r = $c->add_reading( { id => $tag . "/" . $ctr++,
+ my $r = $c->add_reading( { id => 'r'.$tag.".".$ctr++,
text => $w } );
push( @rdg_nodes, $r );
}
sub _anchor_name {
my $xmlid = shift;
$xmlid =~ s/^\#//;
- return sprintf( "#ANCHOR_%s#", $xmlid );
+ return sprintf( "__ANCHOR_%s__", $xmlid );
}
sub _return_lemma {
my( $c, $app, $anchor ) = @_;
- my @nodes = grep { $_->id !~ /^\#A(PP|NCHOR)/ }
+ my @nodes = grep { $_->id !~ /^__A(PP|NCHOR)/ }
$c->reading_sequence( $c->reading( $app ), $c->reading( $anchor ),
$c->baselabel );
return @nodes;
if( exists( $unique{$word} ) ) {
$rdg = $unique{$word};
} else {
- my %args = ( 'id' => join( ',', $idx, $j+1 ),
+ my %args = ( 'id' => 'r' . join( '.', $idx, $j+1 ),
'rank' => $idx,
'text' => $word,
'collation' => $c );
}
}
- # Add the nodes to the graph.
+ # Add the nodes to the graph.
+ # Note any reading IDs that were changed in order to comply with XML
+ # name restrictions; we have to hardcode start & end.
+ my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
# print STDERR "Adding collation readings\n";
foreach my $n ( @{$graph_data->{'nodes'}} ) {
next;
}
my $gnode = $collation->add_reading( $n );
+ if( $gnode->id ne $n->{'id'} ) {
+ $namechange{$n->{'id'}} = $gnode->id;
+ }
}
# Now add the edges.
# print STDERR "Adding collation path edges\n";
foreach my $e ( @{$graph_data->{'edges'}} ) {
- my $from = $collation->reading( $e->{'source'}->{'id'} );
- my $to = $collation->reading( $e->{'target'}->{'id'} );
+ my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
+ ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
+ my $targetid = exists $namechange{$e->{'target'}->{'id'}}
+ ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
+ my $from = $collation->reading( $sourceid );
+ my $to = $collation->reading( $targetid );
warn "No witness label on path edge!" unless $e->{'witness'};
my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
# TODO check that scoping does trt
$rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
- my $from = $collation->reading( $e->{'source'}->{'id'} );
- my $to = $collation->reading( $e->{'target'}->{'id'} );
+ my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
+ ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
+ my $targetid = exists $namechange{$e->{'target'}->{'id'}}
+ ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
+ my $from = $collation->reading( $sourceid );
+ my $to = $collation->reading( $targetid );
delete $e->{'source'};
delete $e->{'target'};
# The remaining keys are relationship attributes.
my $ctr = 1;
foreach my $w ( keys %unique ) {
my $rargs = {
- 'id' => "$index,$ctr",
+ 'id' => "r$index.$ctr",
'rank' => $index,
'text' => $w,
};
is( $c->common_predecessor( 'n24', 'n23' )->id,
'n20', "Found correct common predecessor" );
is( $c->common_successor( 'n24', 'n23' )->id,
- '#END#', "Found correct common successor" );
+ '__END__', "Found correct common successor" );
is( $c->common_predecessor( 'n19', 'n17' )->id,
'n16', "Found correct common predecessor for readings on same path" );
is( $c->common_successor( 'n21', 'n10' )->id,
- '#END#', "Found correct common successor for readings on same path" );
+ '__END__', "Found correct common successor for readings on same path" );
}
# Test 1.1: try to equate nodes that are prevented with an intermediate collation
ok( $t1, "Parsed test fragment file" );
my $c1 = $t1->collation;
-my $trel = $c1->get_relationship( '9,2', '9,3' );
+my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
"Troublesome relationship exists" );
is( $trel->type, 'collated', "Troublesome relationship is a collation" );
# Try to make the link we want
try {
- $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
+ $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
ok( 1, "Added cross-collation relationship as expected" );
} catch( Text::Tradition::Error $e ) {
ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
# Test 1.2: attempt merge of an identical reading
try {
- $c1->merge_readings( '9,3', '11,5' );
+ $c1->merge_readings( 'r9.3', 'r11.5' );
ok( 1, "Successfully merged reading 'pontifex'" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Merge of mergeable readings failed: $e->message" );
# Test 1.3: attempt relationship with a meta reading (should fail)
try {
- $c1->add_relationship( '8,1', '9,2', { 'type' => 'collated' } );
+ $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
ok( 0, "Allowed a meta-reading to be used in a relationship" );
} catch ( Text::Tradition::Error $e ) {
is( $e->message, 'Cannot set relationship on a meta reading',
# equivalence
my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
my $c2 = $t2->collation;
-$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
-my $trel2 = $c2->get_relationship( '9,2', '9,3' );
+$c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
+my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
"Created blocking relationship" );
is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
# This time the link ought to fail
try {
- $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
+ $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
ok( 0, "Added cross-equivalent bad relationship" );
} catch ( Text::Tradition::Error $e ) {
like( $e->message, qr/witness loop/,
# Test 1: try to equate nodes that are prevented with an intermediate collation
my $c3 = $t3->collation;
try {
- $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
ok( 1, "Added straightforward transposition" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to add normal transposition: " . $e->message );
}
try {
- $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
ok( 1, "Added straightforward transposition complement" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to add normal transposition complement: " . $e->message );
# Test 3.2: try to make a transposition that could be a parallel.
try {
- $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
ok( 0, "Added bad colocated transposition" );
} catch ( Text::Tradition::Error $e ) {
like( $e->message, qr/Readings appear to be colocated/,
# Test 3.3: make the parallel, and then make the transposition again.
try {
- $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
+ $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
ok( 1, "Equated identical readings for transposition" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to equate identical readings: " . $e->message );
}
try {
- $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
+ $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
ok( 1, "Added straightforward transposition complement" );
} catch ( Text::Tradition::Error $e ) {
ok( 0, "Failed to add normal transposition complement: " . $e->message );