X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FTEI.pm;h=6eb554139f78cbaf4f2c00a63671da150d90cd45;hb=1f7aa795ef1c5a8567cf241e59c496ea56576ede;hp=a86adf799b0507b85411d815789fd1638a59b94a;hpb=e867486f69f12dc06304594022c298935d1c7fb9;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/TEI.pm b/lib/Text/Tradition/Parser/TEI.pm index a86adf7..6eb5541 100644 --- a/lib/Text/Tradition/Parser/TEI.pm +++ b/lib/Text/Tradition/Parser/TEI.pm @@ -129,7 +129,7 @@ sub parse { map { $text->{$_->sigil} = [] } $tradition->witnesses; # Look for all word/seg node IDs and note their pre-existence. - my @attrs = $xpc->findnodes( "//$W|$SEG/attribute::xml:id" ); + my @attrs = $xpc->findnodes( "//$W/attribute::xml:id" ); _save_preexisting_nodeids( @attrs ); # Count up how many apps we have. @@ -147,7 +147,6 @@ sub parse { # Join them up. my $c = $tradition->collation; foreach my $sig ( keys %$text ) { - next if $sig eq 'base'; # Skip base text readings with no witnesses. # Determine the list of readings for my $sequence = $text->{$sig}; my @real_sequence = ( $c->start ); @@ -159,7 +158,6 @@ sub parse { $c->add_path( $source, $rdg, $sig ); $source = $rdg; } - $tradition->witness( $sig )->path( \@real_sequence ); # See if we need to make an a.c. version of the witness. if( exists $app_ac->{$sig} ) { my @uncorrected; @@ -181,12 +179,12 @@ sub parse { $source = $rdg; } print STDERR "Adding a.c. version for witness $sig\n"; - $tradition->witness( $sig )->uncorrected_path( \@uncorrected ); + $tradition->witness( $sig )->is_layered( 1 ); } } # Calculate the ranks for the nodes. - $tradition->collation->calculate_ranks(); + $tradition->collation->calculate_ranks(); # Now that we have ranks, see if we have distinct nodes with identical # text and identical rank that can be merged. @@ -335,7 +333,7 @@ sub _return_rdg { # TODO handle p.c. and s.l. designations too $ac = $xn->getAttribute( 'type' ) && $xn->getAttribute( 'type' ) eq 'a.c.'; my @rdg_wits = _get_sigla( $xn ); - @rdg_wits = ( 'base' ) unless @rdg_wits; # Allow for editorially-supplied readings + return unless @rdg_wits; # Skip readings that appear in no witnesses my @words; foreach ( $xn->childNodes ) { my @rdg_set = _get_readings( $tradition, $_, 1, $ac, @rdg_wits ); @@ -388,7 +386,8 @@ sub _return_rdg { push( @{$text->{$w}}, $l ); } } - } elsif( $xn->nodeName eq 'witDetail' ) { + } elsif( $xn->nodeName eq 'witDetail' + || $xn->nodeName eq 'note' ) { # Ignore these for now. return; } else { @@ -433,6 +432,7 @@ sub _get_sigla { my @wits; if( ref( $rdg ) eq 'XML::LibXML::Element' ) { my $witstr = $rdg->getAttribute( 'wit' ); + return () unless $witstr; $witstr =~ s/^\s+//; $witstr =~ s/\s+$//; @wits = split( /\s+/, $witstr );