my( $tradition, $opts ) = @_;
my $c = $tradition->collation; # Some shorthand
+ ## DEBUG/TEST
+ $opts->{interpret_transposition} = 1;
+
# First, parse the XML.
my( $tei, $xpc ) = _remove_formatting( $opts );
return unless $tei; # we have already warned.
# Now we can parse the apparatus entries, and add the variant readings
# to the graph.
-
foreach my $app_id ( keys %apps ) {
- _add_readings( $c, $app_id );
+ _add_readings( $c, $app_id, $opts );
}
# Finally, add explicit witness paths, remove the base paths, and remove
}
sub _add_readings {
- my( $c, $app_id ) = @_;
+ my( $c, $app_id, $opts ) = @_;
my $xn = $apps{$app_id};
# If the app is of type a1, it is an apparatus criticus.
# If it is of type a2, it is an apparatus codicum and might not
$tag =~ s/^\__APP_(.*)\__$/$1/;
foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) {
+ my @witlist = split( /\s+/, $rdg->getAttribute( 'wit' ) );
my @text;
foreach ( $rdg->childNodes ) {
push( @text, _get_base( $_ ) );
my( $interpreted, $flag ) = ( '', undef );
if( @text ) {
( $interpreted, $flag ) = interpret(
- join( ' ', map { $_->{'content'} } @text ), $lemma_str );
+ join( ' ', map { $_->{'content'} } @text ), $lemma_str, $anchor, $opts );
}
- next if( $interpreted eq $lemma_str ) && !$flag; # Reading is lemma.
+ next if( $interpreted eq $lemma_str ) && !keys %$flag; # Reading is lemma.
my @rdg_nodes;
+ my @transp_nodes;
if( $interpreted eq '#LACUNA#' ) {
push( @rdg_nodes, $c->add_reading( { id => 'r'.$tag.".".$ctr++,
is_lacuna => 1 } ) );
+ } elsif( $flag->{'TR'} ) {
+ # Our reading is transposed to after the given string. Look
+ # down the collation base text and try to find it.
+ # The @rdg_nodes should remain blank here, so that the correct
+ # omission goes into the graph.
+ foreach my $w ( split( /\s+/, $interpreted ) ) {
+ my $r = $c->add_reading( { id => 'r'.$tag.".".$ctr++,
+ text => $w } );
+ push( @transp_nodes, $r );
+ }
+ if( $anchor && @lemma ) {
+ _attach_transposition( $c, \@lemma, $anchor, \@transp_nodes,
+ \@witlist, $flag->{'TR'} );
+ }
} else {
- if ( $flag && $flag eq 'START'
+ if ( $flag->{'START'}
&& $c->prior_reading( $app_id, $c->baselabel ) ne $c->start ) {
# Add a lacuna for the witness start.
push( @rdg_nodes, $c->add_reading( { id => 'r'.$tag.".".$ctr++,
is_lacuna => 1 } ) );
- $flag = '';
}
foreach my $w ( split( /\s+/, $interpreted ) ) {
my $r = $c->add_reading( { id => 'r'.$tag.".".$ctr++,
text => $w } );
push( @rdg_nodes, $r );
}
- if( $flag && $flag eq 'END'
+ if( $flag->{'END'}
&& $c->next_reading( $app_id, $c->baselabel ) ne $c->end ) {
# Add a lacuna for the witness end.
push( @rdg_nodes, $c->add_reading( { id => 'r'.$tag.".".$ctr++,
is_lacuna => 1 } ) );
- $flag = '';
}
}
# For each listed wit, save the reading.
- foreach my $wit ( split( /\s+/, $rdg->getAttribute( 'wit' ) ) ) {
- $wit .= $flag if $flag;
+ foreach my $wit ( @witlist ) {
+ $wit .= '_ac' if $flag->{'AC'};
$wit_rdgs{$wit} = \@rdg_nodes;
}
return @nodes;
}
+# Make a best-effort attempt to attach a transposition farther down the line.
+# $lemmaseq contains the Reading objects of the lemma
+# $anchor contains the point at which we should start scanning for a match
+# $rdgseq contains the Reading objects of the transposed reading
+# (should be identical to the lemma)
+# $witlist contains the list of applicable witnesses
+# $reftxt contains the text to match, after which the $rdgseq should go.
+sub _attach_transposition {
+ my( $c, $lemmaseq, $anchor, $rdgseq, $witlist, $reftxt ) = @_;
+ my @refwords = split( /\s+/, $reftxt );
+ my $checked = $c->reading( $anchor );
+ my $found;
+ while( $checked ne $c->end && !$found ) {
+ my $next = $c->next_reading( $checked, $c->baselabel );
+ if( $next->text eq $refwords[0] ) {
+ # See if the entire sequence of words matches.
+ $found = $next;
+ foreach my $w ( 1..$#refwords ) {
+ $found = $c->next_reading( $next, $c->baselabel );
+ unless( $found->text eq $refwords[$w] ) {
+ $found = undef;
+ last;
+ }
+ }
+ }
+ $checked = $next;
+ }
+ if( $found ) {
+ # The $found variable should now contain the reading after which we
+ # should stick the transposition.
+ my $fnext = $c->next_reading( $found, $c->baselabel );
+ my $aclabel = $c->ac_label;
+ foreach my $wit_id ( @$witlist ) {
+ my $witstr = _get_sigil( $wit_id, $aclabel );
+ _add_wit_path( $c, $rdgseq, $found->id, $fnext->id, $witstr );
+ }
+ # ...and add the transposition relationship between lemma and rdgseq.
+ if( @$lemmaseq == @$rdgseq ) {
+ foreach my $i ( 0..$#{$lemmaseq} ) {
+ $c->add_relationship( $lemmaseq->[$i], $rdgseq->[$i],
+ { type => 'transposition', annotation => 'Detected by CTE' } );
+ }
+ } else {
+ say STDERR "ERROR: lemma and transposed sequence different lengths?!"
+ }
+ } else {
+ say STDERR "WARNING: Unable to find $reftxt in base text for transposition";
+ map { $c->del_reading( $_ ) } @$rdgseq;
+ }
+}
+
=head2 interpret( $reading, $lemma )
Given a string in $reading and a corresponding lemma in $lemma, interpret what
sub interpret {
# A utility function to change apparatus-ese into a full variant.
- my( $reading, $lemma ) = @_;
+ my( $reading, $lemma, $anchor, $opts ) = @_;
return $reading if $reading eq $lemma;
my $oldreading = $reading;
# $lemma =~ s/\s+[[:punct:]]+$//;
- my $flag; # In case of p.c. indications
+ my $flag = {}; # To pass back extra info about the interpretation
my @words = split( /\s+/, $lemma );
$reading =~ s/[[:punct:]]?\bsic\b([[:punct:]]+)?//g;
if( $reading =~ /^(.*) praem.$/ ) {
} elsif( $reading eq 'in marg.' ) {
# There was nothing before a correction.
$reading = '';
- $flag = '_ac';
+ $flag->{'AC'} = 1;
+ } elsif( $reading =~ /^(.*?)\s*\(?sic([\s\w!.]+)?\)?$/ ) {
+ # Discard any 'sic' notation; indeed, indeed.
+ $reading = $1;
+ if( $reading =~ /^(\W+)$/ ) {
+ # Nothing left but punctuation, so effectively it's the lemma.
+ $reading = $lemma;
+ }
} elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) {
# The first and last N words captured should replace the first and
# last N words of the lemma.
splice( @words, -(scalar @end), scalar @end, @end );
$reading = join( ' ', @words );
}
+ } elsif( $opts->{interpret_transposition} &&
+ $reading =~ /^\s*post\s*(.*?)\s*(tr(ans(p)?)?)?\.?\s*$/ ) {
+ # Try to deal with transposed readings
+ ## DEBUG
+ say STDERR "Will attempt transposition: $reading at $anchor";
+ $reading = $lemma;
+ $flag->{'TR'} = $1;
+ # Look for processed witStart and witEnd tags
} elsif( $reading =~ /^\#START\#\s*(.*)$/ ) {
$reading = $1;
- $flag = 'START';
+ $flag->{'START'} = 1;
} elsif( $reading =~ /^(.*?)\s*\#END\#$/ ) {
$reading = $1;
- $flag = 'END';
- }
- if( $oldreading ne $reading || $flag || $oldreading =~ /\./ ) {
- my $int = $reading;
- $int .= " ($flag)" if $flag;
- # say STDERR "Interpreted $oldreading as $int given $lemma";
+ $flag->{'END'} = 1;
}
return( $reading, $flag );
}