is( ref( $trad->witness( 'MsBJ' ) ), 'Text::Tradition::Witness',
"Found second JSON witness" );
-# # Test an XML witness via file
-# my $xmlwit = $trad->add_witness( 'sourcetype' => 'xmldesc',
-# 'file' => 't/data/witnesses/teiwit.xml' );
-# is( ref( $xmlwit ), 'Text::Tradition::Witness', "Created witness from XML file" );
-# if( $xmlwit ) {
-# is( $xmlwit->sigil, 'V887', "XML witness has correct sigil" );
-# ok( $xmlwit->is_layered, "Picked up correction layer" );
-# is( @{$xmlwit->path}, 185, "Got correct text length" );
-# is( @{$xmlwit->uncorrected_path}, 185, "Got correct a.c. text length" );
-# }
+# Test an XML witness via file
+my $xmlwit = $trad->add_witness( 'sourcetype' => 'xmldesc',
+ 'file' => 't/data/witnesses/teiwit.xml' );
+is( ref( $xmlwit ), 'Text::Tradition::Witness', "Created witness from XML file" );
+if( $xmlwit ) {
+ is( $xmlwit->sigil, 'V887', "XML witness has correct sigil" );
+ ok( $xmlwit->is_layered, "Picked up correction layer" );
+ is( @{$xmlwit->text}, 182, "Got correct text length" );
+ is( @{$xmlwit->layertext}, 182, "Got correct a.c. text length" );
+}
+my @allwitwords = grep { $_->id =~ /^V887/ } $c->readings;
+is( @allwitwords, 184, "Reused appropriate readings" );
## Test use_text
+my $xpwit = $trad->add_witness( 'sourcetype' => 'xmldesc',
+ 'file' => 't/data/witnesses/group.xml',
+ 'use_text' => '//tei:group/tei:text[2]' );
+is( ref( $xpwit ), 'Text::Tradition::Witness', "Created witness from XML group" );
+if( $xpwit ) {
+ is( $xpwit->sigil, 'G', "XML part witness has correct sigil" );
+ ok( !$xpwit->is_layered, "Picked up no correction layer" );
+ is( @{$xpwit->text}, 157, "Got correct text length" );
+}
+
=end testing
writer => '_set_sigil',
);
+has 'language' => (
+ is => 'ro',
+ isa => 'Str',
+ default => 'Default',
+ );
+
# Other identifying information
has 'identifier' => (
is => 'rw',
isa => 'Str',
);
+# Source. Can be XML obj, JSON data struct, or string.
+# Not used if the witness is created by parsing a collation.
has 'sourcetype' => (
is => 'ro',
isa => 'SourceType',
required => 1,
);
-has 'language' => (
- is => 'ro',
- isa => 'Str',
- default => 'Default',
- );
-
-# Source. Can be XML obj, JSON data struct, or string.
-# Not used if the witness is created by parsing a collation.
has 'file' => (
is => 'ro',
isa => 'Str',
isa => 'Str',
);
-has 'msdesc' => ( # if we started with a TEI doc
- is => 'ro',
- isa => 'XML::LibXML::Element',
- predicate => 'has_msdesc',
- writer => '_save_msdesc',
- );
-
# Text. This is an array of strings (i.e. word tokens).
# TODO Think about how to handle this for the case of pre-prepared
# collations, where the tokens are in the graph already.
clearer => 'clear_path',
);
+## TODO change the name of this
has 'uncorrected_path' => (
is => 'rw',
isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
# Get the identifier
if( my $desc = $xpc->find( $tags{msDesc} ) ) {
my $descnode = $desc->get_node(1);
- $self->_save_msdesc( $descnode );
# First try to use settlement/repository/idno.
my( $setNode, $reposNode, $idNode ) =
( $xpc->find( $tags{settlement}, $descnode )->get_node(1),
message => "No text element in document '" . $self->{'identifier'} . "!" );
}
+ my @text = map { $_->text } @words;
+ my @layertext = map { $_->text } @layerwords;
$self->path( \@words );
- my $a = join( ' ', map { $_->text } @words );
- my $b = join( ' ', map { $_->text } @layerwords );
- if( $a ne $b ) {
+ $self->text( \@text );
+ if( join( ' ', @text ) ne join( ' ', @layertext ) ) {
$self->uncorrected_path( \@layerwords );
+ $self->layertext( \@layertext );
}
- # TODO set self->text
}
sub _tokenize_text {
# Hunt down each wrapped word/seg, and make an object (or two objects)
# of it, if necessary.
foreach my $c ( $xpc->findnodes( $xpexpr, $pg ) ) {
- my( $text, $uncorr ) = _get_word_object( $c );
+ my( $text, $uncorr ) = _get_word_strings( $c );
# try {
# ( $text, $uncorr ) = _get_word_object( $c );
# } catch( Text::Tradition::Error $e
my $word_excluded = 0;
my $xpc = _xpc_for_el( $node );
# TODO This does not cope with nested add/dels.
- my @addition = $xpc->findnodes( 'ancestor::' . $tags{add} );
- my @deletion = $xpc->findnodes( 'ancestor::' . $tags{del} );
+ my @addition = $xpc->findnodes( 'ancestor::' . substr( $tags{add}, 2 ) );
+ my @deletion = $xpc->findnodes( 'ancestor::' . substr( $tags{del}, 2 ) );
foreach my $c ($node->childNodes() ) {
if( $c->nodeName eq 'num'
&& defined $c->getAttribute( 'value' ) ) {
$word_excluded = 1 if $c->nodeName =~ /^(fw|sic)$/;
next;
} elsif( $c->nodeName eq 'add' ) {
- my( $use, $discard ) = _get_text_from_node( $c );
+ my( $use, $discard ) = _get_word_strings( $c );
$text .= $use;
} elsif( $c->nodeName eq 'del' ) {
- my( $discard, $use ) = _get_text_from_node( $c );
+ my( $discard, $use ) = _get_word_strings( $c );
$uncorrtext .= $use;
} else {
- my $tagtxt;
+ my ( $tagtxt, $taguncorr );
if( ref( $c ) eq 'XML::LibXML::Text' ) {
# A text node.
$tagtxt = $c->textContent;
+ $taguncorr = $c->textContent;
} else {
- $tagtxt = _get_text_from_node( $c );
+ ( $tagtxt, $taguncorr ) = _get_word_strings( $c );
}
if( $strip_leading_space ) {
$tagtxt =~ s/^[\s\n]+//s;
+ $taguncorr =~ s/^[\s\n]+//s;
# Unset the flag as soon as we see non-whitespace.
$strip_leading_space = 0 if $tagtxt;
}
$text .= $tagtxt;
- $uncorrtext .= $tagtxt;
+ $uncorrtext .= $taguncorr;
}
}
throw( ident => "text not found",
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<TEI xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns="http://www.tei-c.org/ns/1.0">
+ <teiHeader>
+ <fileDesc>
+ <titleStmt>
+ <title>Lorem ipsum</title>
+ <author>Cero</author>
+ <respStmt xml:id="tla">
+ <resp>Transcription by</resp>
+ <name> Tara L Andrews</name>
+ </respStmt>
+ </titleStmt>
+ <publicationStmt>
+ <p>Unpublished use case</p>
+ </publicationStmt>
+ <sourceDesc>
+ <msDesc xml:id="G">
+ <msIdentifier>
+ <msName>Lorem Ipsum Test</msName>
+ </msIdentifier>
+ </msDesc>
+ </sourceDesc>
+ </fileDesc>
+ </teiHeader>
+ <text>
+ <front>
+ <head>A selection of pseudo-Latin texts</head>
+ </front>
+ <group>
+ <text>
+ <body>
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec nec mi et felis gravida
+ hendrerit ornare eget lorem. Proin nisi sem, aliquam eget aliquet ut, lacinia sed erat.
+ Morbi posuere euismod turpis et volutpat. Mauris suscipit, nisi eget dignissim con
+sectetur, tortor felis tristique lorem, lacinia feugiat leo ante vel diam. Duis ac
+ mauris libero, at suscipit risus. Curabitur orci nunc, commodo sed ornare sed, dictum et
+ nunc. Nulla facilisi. Suspendisse vestibulum dignissim turpis, ut pellentesque orci
+ convallis aliquet. Mauris metus purus, ullamcorper ut imperdiet et, tristique fermentum
+ arcu. Donec congue blandit aliquet. Nunc semper mollis mollis. Nulla tempus, augue vitae
+ iaculis vulputate, neque diam placerat risus, lacinia luctus purus mauris in ligula.
+ Fusce vehicula eleifend pharetra. Cras nec libero diam, at semper lacus. Nulla
+ tristique, ligula id lobortis volutpat, eros metus condimentum orci, in interdum lorem
+ nisi ut justo. </p>
+ </body>
+ </text>
+ <text>
+ <body>
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec nec mi et felis gravida
+ hendrerit ornare eget lorem. Proin nisi sem, aliquam eget aliquet ut, lacinia sed erat.
+ Morbi posuere euismod turpis et volutpat. Mauris suscipit, nisi eget dignissim
+ consectetur, tortor felis tristique lorem, lacinia feugiat leo ante vel diam. Duis ac
+ mauris libero, at suscipit risus. Curabitur orci nunc, commodo sed ornare sed, dictum et
+ nunc. Nulla facilisi. Suspendisse vestibulum dignissim turpis, ut pellentesque orci
+ convallis aliquet. Mauris metus purus, ullamcorper ut imperdiet et, tristique fermentum
+ arcu. Donec congue blandit aliquet. Nunc semper mollis mollis. Nulla tempus, augue vitae
+ iaculis vulputate, neque diam placerat risus, lacinia luctus purus mauris in ligula.
+ Fusce vehicula eleifend pharetra. Cras nec libero diam, at semper lacus. Nulla
+ tristique, ligula id lobortis volutpat, eros metus condimentum orci, in interdum lorem
+ nisi ut justo. Fusce felis ante, vestibulum condimentum aliquet eget, lobortis quis
+ nibh. Quisque eget malesuada sem. Integer congue luctus rhoncus. </p>
+ </body>
+ </text>
+ </group>
+ </text>
+</TEI>
+
is( ref( $trad->witness( 'MsBJ' ) ), 'Text::Tradition::Witness',
"Found second JSON witness" );
-# # Test an XML witness via file
-# my $xmlwit = $trad->add_witness( 'sourcetype' => 'xmldesc',
-# 'file' => 't/data/witnesses/teiwit.xml' );
-# is( ref( $xmlwit ), 'Text::Tradition::Witness', "Created witness from XML file" );
-# if( $xmlwit ) {
-# is( $xmlwit->sigil, 'V887', "XML witness has correct sigil" );
-# ok( $xmlwit->is_layered, "Picked up correction layer" );
-# is( @{$xmlwit->path}, 185, "Got correct text length" );
-# is( @{$xmlwit->uncorrected_path}, 185, "Got correct a.c. text length" );
-# }
+# Test an XML witness via file
+my $xmlwit = $trad->add_witness( 'sourcetype' => 'xmldesc',
+ 'file' => 't/data/witnesses/teiwit.xml' );
+is( ref( $xmlwit ), 'Text::Tradition::Witness', "Created witness from XML file" );
+if( $xmlwit ) {
+ is( $xmlwit->sigil, 'V887', "XML witness has correct sigil" );
+ ok( $xmlwit->is_layered, "Picked up correction layer" );
+ is( @{$xmlwit->text}, 182, "Got correct text length" );
+ is( @{$xmlwit->layertext}, 182, "Got correct a.c. text length" );
+}
+my @allwitwords = grep { $_->id =~ /^V887/ } $c->readings;
+is( @allwitwords, 184, "Reused appropriate readings" );
## Test use_text
+my $xpwit = $trad->add_witness( 'sourcetype' => 'xmldesc',
+ 'file' => 't/data/witnesses/group.xml',
+ 'use_text' => '//tei:group/tei:text[2]' );
+is( ref( $xpwit ), 'Text::Tradition::Witness', "Created witness from XML group" );
+if( $xpwit ) {
+ is( $xpwit->sigil, 'G', "XML part witness has correct sigil" );
+ ok( !$xpwit->is_layered, "Picked up no correction layer" );
+ is( @{$xpwit->text}, 157, "Got correct text length" );
+}
}