foreach my $item ( @base_text ) {
my $r;
if( $item->{'type'} eq 'token' ) {
- $r = $c->add_reading( 'n'.$counter++ );
- $r->text( $item->{'content'} );
+ $r = $c->add_reading( { id => 'n'.$counter++,
+ text => $item->{'content'} } );
} elsif ( $item->{'type'} eq 'anchor' ) {
- $r = $c->add_reading( '#ANCHOR_' . $item->{'content'} . '#' );
- $r->is_meta(1);
+ $r = $c->add_reading( { id => '#ANCHOR_' . $item->{'content'} . '#',
+ is_ph => 1 } );
} elsif ( $item->{'type'} eq 'app' ) {
my $tag = '#APP_' . $counter++ . '#';
- $r = $c->add_reading( $tag );
- $r->is_meta(1);
+ $r = $c->add_reading( { id => $tag, is_ph => 1 } );
$apps{$tag} = $item->{'content'};
}
$c->add_path( $last, $r, $c->baselabel );
# Finally, add explicit witness paths, remove the base paths, and remove
# the app/anchor tags.
expand_all_paths( $c );
+
+ # Save the text for each witness so that we can ensure consistency
+ # later on
+ $tradition->collation->text_from_paths();
}
sub _stringify_sigil {
my $str = $xn->data;
$str =~ s/^\s+//;
foreach my $w ( split( /\s+/, $str ) ) {
- # HACK to cope with mismatched doublequotes
- $w =~ s/\"//g;
push( @readings, { 'type' => 'token', 'content' => $w } );
}
} elsif( $xn->nodeName eq 'hi' ) {
my $ctr = 0;
my $tag = $app_id;
$tag =~ s/^\#APP_(.*)\#$/$1/;
- $DB::single = 1 if $tag < 2;
foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) {
my @text;
foreach ( $rdg->childNodes ) {
push( @text, _get_base( $_ ) );
}
- my $interpreted = @text
- ? interpret( join( ' ', map { $_->{'content'} } @text ), $lemma_str )
- : '';
- my @rdg_nodes;
- foreach my $w ( split( /\s+/, $interpreted ) ) {
- my $r = $c->add_reading( $tag . "/" . $ctr++ );
- $r->text( $w );
- push( @rdg_nodes, $r );
+ my( $interpreted, $flag ) = ( '', undef );
+ if( @text ) {
+ ( $interpreted, $flag ) = interpret(
+ join( ' ', map { $_->{'content'} } @text ), $lemma_str );
}
+ next if( $interpreted eq $lemma_str ) && !$flag; # Reading is lemma.
+ my @rdg_nodes;
+ if( $interpreted eq '#LACUNA#' ) {
+ push( @rdg_nodes, $c->add_reading( { id => $tag . "/" . $ctr++,
+ is_lacuna => 1 } ) );
+ } else {
+ foreach my $w ( split( /\s+/, $interpreted ) ) {
+ my $r = $c->add_reading( { id => $tag . "/" . $ctr++,
+ text => $w } );
+ push( @rdg_nodes, $r );
+ }
+ }
# For each listed wit, save the reading.
foreach my $wit ( split( /\s+/, $rdg->getAttribute( 'wit' ) ) ) {
+ $wit .= $flag if $flag;
$wit_rdgs{$wit} = \@rdg_nodes;
}
+
# Does the reading have an ID? If so it probably has a witDetail
# attached, and we need to read it.
if( $rdg->hasAttribute( 'xml:id' ) ) {
+ warn "Witdetail on meta reading" if $flag; # this could get complicated.
my $rid = $rdg->getAttribute( 'xml:id' );
my $xpc = XML::LibXML::XPathContext->new( $xn );
my @details = $xpc->findnodes( './witDetail[@target="'.$rid.'"]' );
}
# Now collate the variant readings, since it is not done for us.
- collate_variants( $c, \@lemma, values %wit_rdgs );
-
+ collate_variants( $c, \@lemma, values %wit_rdgs );
+
# Now add the witness paths for each reading.
foreach my $wit_id ( keys %wit_rdgs ) {
my $witstr = get_sigil( $wit_id, $c );
sub _return_lemma {
my( $c, $app, $anchor ) = @_;
- my $app_node = $c->graph->node( $app );
- my $anchor_node = $c->graph->node( $anchor );
- my @nodes = grep { $_->name !~ /^\#A(PP|NCHOR)/ }
- $c->reading_sequence( $app_node, $anchor_node, $c->baselabel );
+ my @nodes = grep { $_->id !~ /^\#A(PP|NCHOR)/ }
+ $c->reading_sequence( $c->reading( $app ), $c->reading( $anchor ),
+ $c->baselabel );
return @nodes;
}
my $oldreading = $reading;
# $lemma =~ s/\s+[[:punct:]]+$//;
# $reading =~ s/\s*\(?sic([\s\w.]+)?\)?$//;
+ my $flag; # In case of p.c. indications
my @words = split( /\s+/, $lemma );
if( $reading =~ /^(.*) praem.$/ ) {
$reading = "$1 $lemma";
} elsif( $reading =~ /^(.*) add.$/ ) {
$reading = "$lemma $1";
- } elsif( $reading eq 'om.'
- || $reading =~ /locus [uv]acuus/
- || $reading =~ /inscriptionem compegi e/ # TODO huh?
- || $reading eq 'def.' # TODO huh?
+ } elsif( $reading =~ /add. alia manu/
+ || $reading =~ /inscriptionem compegi e/ # TODO huh?
+ || $reading eq 'inc.' # TODO huh?
+ ) {
+ # Ignore it.
+ $reading = $lemma;
+ } elsif( $reading =~ /locus [uv]acuus/
+ || $reading eq 'def.'
) {
+ $reading = '#LACUNA#';
+ } elsif( $reading eq 'om.' ) {
$reading = '';
- } elsif( $reading eq 'inv.' ) {
+ } elsif( $reading =~ /^in[uv]\.$/ ) {
# Hope it is two words.
print STDERR "WARNING: want to invert a lemma that is not two words\n"
unless scalar( @words ) == 2;
$reading = join( ' ', reverse( @words ) );
- } elsif( $reading eq 'iter.' ) {
+ } elsif( $reading =~ /^iter(\.|at)$/ ) {
# Repeat the lemma
$reading = "$lemma $lemma";
- } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
+ } elsif( $reading eq 'in marg.' ) {
+ # There was nothing before a correction.
+ $reading = '';
+ $flag = '_ac';
+ } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
# The first and last N words captured should replace the first and
# last N words of the lemma.
my @begin = split( /\s+/, $1 );
$reading = join( ' ', @words );
}
}
- print STDERR "Interpreted $oldreading as $reading given $lemma\n";
- return $reading;
+ if( $oldreading ne $reading || $flag || $oldreading =~ /\./ ) {
+ my $int = $reading;
+ $int .= " ($flag)" if $flag;
+ print STDERR "Interpreted $oldreading as $int given $lemma\n";
+ }
+ return( $reading, $flag );
}
sub _parse_wit_detail {
# Walk the collation and fish out the paths for each witness
foreach my $wit ( $c->tradition->witnesses ) {
my $sig = $wit->sigil;
- my @path = grep { $_->name !~ /(APP|ANCHOR)/ }
+ my @path = grep { !$_->is_ph }
$c->reading_sequence( $c->start, $c->end, $sig );
$wit->path( \@path );
if( $has_ac{$sig} ) {
- my @ac_path = grep { $_->name !~ /(APP|ANCHOR)/ }
- $c->reading_sequence( $c->start, $c->end, $sig.$c->ac_label, $sig );
+ my @ac_path = grep { !$_->is_ph }
+ $c->reading_sequence( $c->start, $c->end, $sig.$c->ac_label );
$wit->uncorrected_path( \@ac_path );
}
}
# Delete the anchors
- foreach my $anchor ( grep { $_->name =~ /(APP|ANCHOR)/ } $c->readings ) {
+ foreach my $anchor ( grep { $_->is_ph } $c->readings ) {
$c->del_reading( $anchor );
}
- # Delete all edges
- map { $c->del_path( $_ ) } $c->paths;
+ # Delete the base edges
+ map { $c->del_path( $_, $c->baselabel ) } $c->paths;
# Make the path edges
$c->make_witness_paths();