my $substitutions = {}; # Keep track of merged readings
my $app_anchors = {}; # Track apparatus references
my $app_ac = {}; # Save a.c. readings
+my $app_count; # Keep track of how many apps we have
# Create the package variables for tag names.
# Parse the TEI file.
sub parse {
- my( $tradition, $xml_str ) = @_;
+ my( $tradition, $opts ) = @_;
# First, parse the XML.
my $parser = XML::LibXML->new();
- my $doc = $parser->parse_string( $xml_str );
+ my $doc;
+ if( exists $opts->{'string'} ) {
+ $doc = $parser->parse_string( $opts->{'string'} );
+ } elsif ( exists $opts->{'file'} ) {
+ $doc = $parser->parse_file( $opts->{'file'} );
+ } else {
+ warn "Could not find string or file option to parse";
+ return;
+ }
my $tei = $doc->documentElement();
my $xpc = XML::LibXML::XPathContext->new( $tei );
my $ns;
my $source = $wit_el->toString();
$tradition->add_witness( sigil => $sig, source => $source );
}
-
map { $text->{$_->sigil} = [] } $tradition->witnesses;
+
# Look for all word/seg node IDs and note their pre-existence.
my @attrs = $xpc->findnodes( "//$W|$SEG/attribute::xml:id" );
save_preexisting_nodeids( @attrs );
+ # Count up how many apps we have.
+ my @apps = $xpc->findnodes( "//$APP" );
+ $app_count = scalar( @apps );
+
# Now go through the children of the text element and pull out the
# actual text.
foreach my $xml_el ( $xpc->findnodes( "//$TEXT" ) ) {
}
$source = $rdg;
}
+ print STDERR "Adding a.c. version for witness $sig\n";
$tradition->witness( $sig )->uncorrected_path( \@uncorrected );
}
}
$tradition->collation->del_reading( $tradition->collation->reading( $_ ) );
}
$tradition->collation->calculate_ranks();
+
+ # Now that we have ranks, see if we have distinct nodes with identical
+ # text and identical rank that can be merged.
+ $tradition->collation->flatten_ranks();
}
sub _clean_sequence {
if( $rdg =~ /^PH-(.*)$/ ) {
# It is a placeholder. Keep it only if we need it.
my $app_id = $1;
- if( exists $app_ac->{$wit}->{$app_id} ) {
+ if( exists $app_ac->{$wit} &&
+ exists $app_ac->{$wit}->{$app_id} ) {
print STDERR "Retaining empty placeholder for $app_id\n";
push( @clean_sequence, $rdg );
}
## Returns the list of readings, if any, created on the run.
{
- my @active_wits;
+ my %active_wits;
my $current_app;
+ my $seen_apps;
sub _get_readings {
my( $tradition, $xn, $in_var, $ac, @cur_wits ) = @_;
- @cur_wits = @active_wits unless $in_var;
+ @cur_wits = grep { $active_wits{$_} } keys %active_wits unless $in_var;
my @new_readings;
if( $xn->nodeType == XML_TEXT_NODE ) {
#print STDERR "Handling text node " . $str . "\n";
# Check that all the witnesses we have are active.
foreach my $c ( @cur_wits ) {
- warn "Could not find $c in active wits"
- unless grep { $c eq $_ } @active_wits;
+ warn "$c is not among active wits" unless $active_wits{$c};
}
$str =~ s/^\s+//;
my $final = $str =~ s/\s+$//;
#print STDERR "Handling word " . $xn->toString . "\n";
# Check that all the witnesses we have are active.
foreach my $c ( @cur_wits ) {
- warn "Could not find $c in active wits"
- unless grep { $c eq $_ } @active_wits;
+ warn "$c is not among active wits" unless $active_wits{$c};
}
my $xml_id = $xn->getAttribute( 'xml:id' );
my $rdg = make_reading( $tradition->collation, $xn->textContent, $xml_id );
push( @{$text->{$_}}, $rdg ) unless $ac;
}
} elsif ( $xn->nodeName eq 'app' ) {
+ $seen_apps++;
$current_app = $xn->getAttribute( 'xml:id' );
# print STDERR "Handling app $current_app\n";
# Keep the reading sets in this app.
} elsif ( $xn->nodeName eq 'lem' || $xn->nodeName eq 'rdg' ) {
# Alter the current witnesses and recurse.
#print STDERR "Handling reading for " . $xn->getAttribute( 'wit' ) . "\n";
+ # TODO handle p.c. and s.l. designations too
$ac = $xn->getAttribute( 'type' ) && $xn->getAttribute( 'type' ) eq 'a.c.';
my @rdg_wits = get_sigla( $xn );
@rdg_wits = ( 'base' ) unless @rdg_wits; # Allow for editorially-supplied readings
} elsif( $xn->nodeName eq 'witStart' ) {
# Add the relevant wit(s) to the active list.
#print STDERR "Handling witStart\n";
- push( @active_wits, @cur_wits );
+ map { $active_wits{$_} = 1 } @cur_wits;
+ # Record a lacuna in all non-active witnesses if this is
+ # the first app. Get the full list from $text.
+ if( $seen_apps == 1 ) {
+ my $i = 0;
+ foreach my $sig ( keys %$text ) {
+ next if $active_wits{$sig};
+ my $l = $tradition->collation->add_lacuna( $current_app . "_$i" );
+ $i++;
+ push( @{$text->{$sig}}, $l );
+ }
+ }
} elsif( $xn->nodeName eq 'witEnd' ) {
# Take the relevant wit(s) out of the list.
#print STDERR "Handling witEnd\n";
- my $regexp = '^(' . join( '|', @cur_wits ) . ')$';
- @active_wits = grep { $_ !~ /$regexp/ } @active_wits;
+ map { $active_wits{$_} = undef } @cur_wits;
+ # Record a lacuna, unless this is the last app.
+ unless( $seen_apps == $app_count ) {
+ foreach my $i ( 0 .. $#cur_wits ) {
+ my $w = $cur_wits[$i];
+ my $l = $tradition->collation->add_lacuna( $current_app . "_$i" );
+ push( @{$text->{$w}}, $l );
+ }
+ }
} elsif( $xn->nodeName eq 'witDetail' ) {
# Ignore these for now.
return;