my @sig_parts = $xpc->findnodes( 'descendant::text()', $wit_el );
my $sig = _stringify_sigil( @sig_parts );
print STDERR "Adding witness $sig\n";
- $tradition->add_witness( sigil => $sig, source => $wit_el->toString() );
+ $tradition->add_witness( sigil => $sig, sourcetype => 'collation' );
$sigil_for{'#'.$id} = $sig; # Make life easy by keying on the ID ref syntax
}
$r = $c->add_reading( { id => 'n'.$counter++,
text => $item->{'content'} } );
} elsif ( $item->{'type'} eq 'anchor' ) {
- $r = $c->add_reading( { id => '#ANCHOR_' . $item->{'content'} . '#',
+ $r = $c->add_reading( { id => '__ANCHOR_' . $item->{'content'} . '__',
is_ph => 1 } );
} elsif ( $item->{'type'} eq 'app' ) {
- my $tag = '#APP_' . $counter++ . '#';
+ my $tag = '__APP_' . $counter++ . '__';
$r = $c->add_reading( { id => $tag, is_ph => 1 } );
$apps{$tag} = $item->{'content'};
}
sub _stringify_sigil {
my( @nodes ) = @_;
my @parts = grep { /\w/ } map { $_->data } @nodes;
- return join( '', @parts );
+ my $whole = join( '', @parts );
+ $whole =~ s/\W//g;
+ return $whole;
}
# Get rid of all the formatting elements that get in the way of tokenization.
# Get the lemma, which is all the readings between app and anchor,
# excluding other apps or anchors.
my @lemma = _return_lemma( $c, $app_id, $anchor );
- my $lemma_str = join( ' ', grep { $_ !~ /^\#/ } map { $_->text } @lemma );
+ my $lemma_str = join( ' ', grep { $_ !~ /^__/ } map { $_->text } @lemma );
# For each reading, send its text to 'interpret' along with the lemma,
# and then save the list of witnesses that these tokens belong to.
my %wit_rdgs; # Maps from witnesses to the variant text
my $ctr = 0;
my $tag = $app_id;
- $tag =~ s/^\#APP_(.*)\#$/$1/;
+ $tag =~ s/^\__APP_(.*)\__$/$1/;
foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) {
my @text;
my @rdg_nodes;
if( $interpreted eq '#LACUNA#' ) {
- push( @rdg_nodes, $c->add_reading( { id => $tag . "/" . $ctr++,
+ push( @rdg_nodes, $c->add_reading( { id => 'r'.$tag.".".$ctr++,
is_lacuna => 1 } ) );
} else {
foreach my $w ( split( /\s+/, $interpreted ) ) {
- my $r = $c->add_reading( { id => $tag . "/" . $ctr++,
+ my $r = $c->add_reading( { id => 'r'.$tag.".".$ctr++,
text => $w } );
push( @rdg_nodes, $r );
}
}
# Now collate the variant readings, since it is not done for us.
- $DB::single = 1 if @lemma > 10;
collate_variants( $c, \@lemma, values %wit_rdgs );
# Now add the witness paths for each reading.
sub _anchor_name {
my $xmlid = shift;
$xmlid =~ s/^\#//;
- return sprintf( "#ANCHOR_%s#", $xmlid );
+ return sprintf( "__ANCHOR_%s__", $xmlid );
}
sub _return_lemma {
my( $c, $app, $anchor ) = @_;
- my @nodes = grep { $_->id !~ /^\#A(PP|NCHOR)/ }
+ my @nodes = grep { $_->id !~ /^__A(PP|NCHOR)/ }
$c->reading_sequence( $c->reading( $app ), $c->reading( $anchor ),
$c->baselabel );
return @nodes;