From: Tara L Andrews Date: Thu, 7 Jun 2012 00:56:43 +0000 (+0200) Subject: add test for lexeme serialization choke X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=scpubgit%2Fstemmatology.git;a=commitdiff_plain;h=6ba69acdba350d403384d150614d0294d17828a1 add test for lexeme serialization choke --- diff --git a/t/data/lexformat.xml b/t/data/lexformat.xml new file mode 100644 index 0000000..1f10b4f --- /dev/null +++ b/t/data/lexformat.xml @@ -0,0 +1,1757 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (a.c.) + base text + Latin + 1 + 3.2 + Sermo Augustini 170 + digraph stemma { l [ class=hypothetical ]; v [ class=hypothetical ]; "λ" [ class=hypothetical ]; F [ class=extant ]; "F (a.c.)" [ class=extant ]; L1 [ class=extant ]; L2 [ class=extant ]; L3 [ class=extant ]; "L3 (a.c.)" [ class=extant ]; L4 [ class=extant ]; "L4 (a.c.)" [ class=extant ]; L5 [ class=extant ]; "L5 (a.c.)" [ class=extant ]; L6 [ class=extant ]; "L6 (a.c.)" [ class=extant ]; L7 [ class=extant ]; "L7 (a.c.)" [ class=extant ]; L8 [ class=extant ]; "L8 (a.c.)" [ class=extant ]; L9 [ class=extant ]; "L9 (a.c.)" [ class=extant ]; M [ class=extant ]; "M (a.c.)" [ class=extant ]; V10 [ class=extant ]; "V10 (a.c.)" [ class=extant ]; V11 [ class=extant ]; "V11 (a.c.)" [ class=extant ]; W [ class=extant ]; "W (a.c.)" [ class=extant ]; maur [ class=extant ]; "F (a.c.)" -> F; "F (a.c.)" -> v; "F (a.c.)" -> "λ"; F -> v; F -> "λ"; L1 -> L2; "L3 (a.c.)" -> L3; "L3 (a.c.)" -> maur; L3 -> maur; "L4 (a.c.)" -> L4; "L5 (a.c.)" -> L5; "L6 (a.c.)" -> L6; "L7 (a.c.)" -> L7; "L8 (a.c.)" -> L8; "L8 (a.c.)" -> maur; L8 -> maur; "L9 (a.c.)" -> L9; "M (a.c.)" -> M; "V10 (a.c.)" -> L3; "V10 (a.c.)" -> "L3 (a.c.)"; "V10 (a.c.)" -> V10; V10 -> L3; V10 -> "L3 (a.c.)"; "V11 (a.c.)" -> V11; "W (a.c.)" -> W; l -> L1; l -> L3; l -> "L3 (a.c.)"; l -> L4; l -> "L4 (a.c.)"; l -> L5; l -> "L5 (a.c.)"; l -> L6; l -> "L6 (a.c.)"; l -> L7; l -> "L7 (a.c.)"; l -> L8; l -> "L8 (a.c.)"; l -> L9; l -> "L9 (a.c.)"; v -> L7; v -> "L7 (a.c.)"; v -> V10; v -> "V10 (a.c.)"; v -> V11; v -> "V11 (a.c.)"; "λ" -> M; "λ" -> "M (a.c.)"; "λ" -> W; "λ" -> "W (a.c.)"; "λ" -> l;} + , + + + Default + #END# + 1 + __END__ + 27 + + + Default + #START# + 1 + __START__ + 0 + + + Latin + [{"wordform_matchlist":["Latin // qui // cat@conj","Latin // qui // cat@adv type@int","Latin // qui // cat@adv type@rel","Latin // qui // cat@pron type@rel gender@masc num@sing case@nom","Latin // qui // cat@pron type@rel gender@masc num@pl case@nom"],"language":"Latin","string":"qui"}] + qui + n1007 + 1 + 1 + + + Latin + [{"wordform_matchlist":["Latin // diligo // cat@verb mode@ind tense@pres pers@3 num@sing voice@act"],"language":"Latin","form":"Latin // diligo // cat@verb mode@ind tense@pres pers@3 num@sing voice@act","is_disambiguated":"1","string":"diligit"}] + diligit + n1008 + 2 + 1 + + + Latin + [{"wordform_matchlist":["Latin // mundus // cat@noun gender@neut num@sing case@acc","Latin // mundus // cat@adj gender@neut num@sing case@voc","Latin // mundus // cat@noun gender@masc num@sing case@acc","Latin // mundus // cat@noun gender@neut num@pl case@gen","Latin // mundus // cat@noun gender@masc num@pl case@gen","Latin // mundus // cat@adj gender@neut num@sing case@acc","Latin // mundus // cat@adj gender@neut num@pl case@gen","Latin // mundus // cat@adj gender@neut num@sing case@nom","Latin // mundus // cat@adj gender@masc num@sing case@acc","Latin // mundus // cat@noun gender@neut num@sing case@voc","Latin // mundus // cat@adj gender@masc num@pl case@gen","Latin // mundus // cat@noun gender@neut num@sing case@nom"],"language":"Latin","string":"mundum"}] + mundum + n1009 + 3 + 1 + + + Latin + [{"wordform_matchlist":["Latin // in // cat@prep"],"language":"Latin","form":"Latin // in // cat@prep","is_disambiguated":"1","string":"in"}] + in + n1010 + 4 + 1 + + + Latin + [{"wordform_matchlist":["Latin // mundus // cat@adj gender@neut num@sing case@dat","Latin // mundus // cat@adj gender@neut num@sing case@abl","Latin // mundus // cat@noun gender@masc num@sing case@dat","Latin // mundus // cat@noun gender@neut num@sing case@dat","Latin // mundus // cat@adj gender@masc num@sing case@dat","Latin // mundus // cat@noun gender@masc num@sing case@abl","Latin // mundus // cat@noun gender@neut num@sing case@abl","Latin // mundus // cat@adj gender@masc num@sing case@abl"],"language":"Latin","string":"mundo"}] + mundo + n1011 + 5 + 1 + + + Latin + [{"wordform_matchlist":["Latin // habito // cat@verb mode@ind tense@pres pers@3 num@sing voice@act"],"language":"Latin","form":"Latin // habito // cat@verb mode@ind tense@pres pers@3 num@sing voice@act","is_disambiguated":"1","string":"habitat"}] + habitat + n1013 + 6 + 0 + + + Latin + [{"wordform_matchlist":["Latin // cum // cat@prep","Latin // cum // cat@conj"],"language":"Latin","string":"cum"}] + cum + n1014 + 7 + 1 + + + Latin + [{"wordform_matchlist":["Latin // princeps // cat@adj gender@masc num@sing case@abl","Latin // princeps // cat@noun gender@masc num@sing case@abl","Latin // princeps // cat@adj gender@fem num@sing case@abl","Latin // princeps // cat@adj gender@neut num@sing case@abl"],"language":"Latin","string":"principe"}] + principe + n1015 + 8 + 1 + + + Latin + [{"wordform_matchlist":["Latin // mundus // cat@noun gender@neut num@sing case@gen","Latin // mundus // cat@noun gender@masc num@pl case@voc","Latin // mundus // cat@adj gender@neut num@sing case@gen","Latin // mundus // cat@noun gender@masc num@pl case@nom","Latin // mundus // cat@adj gender@masc num@pl case@nom","Latin // mundus // cat@adj gender@masc num@sing case@gen","Latin // mundus // cat@noun gender@masc num@sing case@gen","Latin // mundus // cat@adj gender@masc num@pl case@voc"],"language":"Latin","string":"mundi"}] + mundi + n1017 + 10 + 0 + + + Latin + [{"wordform_matchlist":["Latin // omnis // cat@adj gender@masc num@pl case@acc","Latin // omnis // cat@adj gender@masc num@pl case@nom","Latin // omnis // cat@adj gender@masc num@pl case@voc","Latin // omnis // cat@adj gender@fem num@pl case@nom","Latin // omnis // cat@adj gender@fem num@pl case@acc","Latin // omnis // cat@adj gender@fem num@pl case@voc"],"language":"Latin","string":"Omnes"}] + Omnes + n1018 + 11 + 1 + + + Latin + [{"wordform_matchlist":["Latin // ergo // cat@adv"],"language":"Latin","form":"Latin // ergo // cat@adv","is_disambiguated":"1","string":"ergo"}] + ergo + n1020 + 12 + 0 + + + Latin + [{"language":"Latin","string":"dilectores"}] + dilectores + n1021 + 13 + 1 + + + Latin + [{"wordform_matchlist":["Latin // mundus // cat@adj gender@masc num@pl case@nom","Latin // mundus // cat@noun gender@neut num@sing case@gen","Latin // mundus // cat@noun gender@masc num@pl case@voc","Latin // mundus // cat@adj gender@masc num@sing case@gen","Latin // mundus // cat@adj gender@neut num@sing case@gen","Latin // mundus // cat@noun gender@masc num@sing case@gen","Latin // mundus // cat@adj gender@masc num@pl case@voc","Latin // mundus // cat@noun gender@masc num@pl case@nom"],"language":"Latin","string":"mundi"}] + mundi + n1022 + 14 + 1 + + + Latin + [{"wordform_matchlist":["Latin // ipse // cat@det gender@masc num@pl case@nom","Latin // ipse // cat@det gender@masc num@sing case@dat","Latin // ipse // cat@det gender@neut num@sing case@dat","Latin // ipse // cat@det gender@masc num@pl case@voc","Latin // ipse // cat@det gender@fem num@sing case@dat"],"language":"Latin","string":"ipsi"}] + ipsi + n1023 + 15 + 1 + + + Latin + [{"wordform_matchlist":["Latin // sum // cat@verb mode@ind tense@pres pers@3 num@pl voice@act"],"language":"Latin","form":"Latin // sum // cat@verb mode@ind tense@pres pers@3 num@pl voice@act","is_disambiguated":"1","string":"sunt"}] + sunt + n1024 + 16 + 1 + + + Latin + [{"wordform_matchlist":["Latin // mundus // cat@adj gender@masc num@sing case@nom","Latin // mundus // cat@noun gender@masc num@sing case@nom"],"language":"Latin","string":"mundus"}] + mundus + n1025 + 17 + 1 + + + Latin + [{"wordform_matchlist":["Latin // habitator // cat@noun gender@masc num@pl case@acc","Latin // habitator // cat@noun gender@masc num@pl case@nom","Latin // habitator // cat@noun gender@masc num@pl case@voc"],"language":"Latin","string":"habitatores"}] + habitatores + n1026 + 18 + 1 + + + Latin + [{"wordform_matchlist":["Latin // mundus // cat@adj gender@masc num@pl case@nom","Latin // mundus // cat@noun gender@neut num@sing case@gen","Latin // mundus // cat@noun gender@masc num@pl case@voc","Latin // mundus // cat@adj gender@masc num@sing case@gen","Latin // mundus // cat@adj gender@neut num@sing case@gen","Latin // mundus // cat@noun gender@masc num@sing case@gen","Latin // mundus // cat@adj gender@masc num@pl case@voc","Latin // mundus // cat@noun gender@masc num@pl case@nom"],"language":"Latin","string":"mundi"}] + mundi + n1027 + 19 + 1 + + + Latin + [{"wordform_matchlist":["Latin // non // cat@adv"],"language":"Latin","form":"Latin // non // cat@adv","is_disambiguated":"1","string":"non"}] + non + n1028 + 20 + 1 + + + Latin + [{"wordform_matchlist":["Latin // carnis // cat@noun gender@fem num@sing case@abl"],"language":"Latin","form":"Latin // carnis // cat@noun gender@fem num@sing case@abl","is_disambiguated":"1","string":"carne"}] + carne + n1029 + 21 + 1 + + + Latin + [{"wordform_matchlist":["Latin // qui // cat@pron type@rel gender@neut num@sing case@acc","Latin // qui // cat@pron type@rel gender@neut num@sing case@nom"],"language":"Latin","string":"quod"}] + quod + n1030 + 22 + 1 + + + Latin + [{"wordform_matchlist":["Latin // omnis // cat@adj gender@masc num@pl case@acc","Latin // omnis // cat@adj gender@masc num@pl case@nom","Latin // omnis // cat@adj gender@masc num@pl case@voc","Latin // omnis // cat@adj gender@fem num@pl case@nom","Latin // omnis // cat@adj gender@fem num@pl case@acc","Latin // omnis // cat@adj gender@fem num@pl case@voc"],"language":"Latin","string":"omnes"}] + omnes + n1031 + 23 + 1 + + + Latin + [{"wordform_matchlist":["Latin // iustus // cat@adj gender@masc num@pl case@nom","Latin // iustus // cat@adj gender@masc num@sing case@gen","Latin // iustus // cat@adj gender@neut num@sing case@gen","Latin // iustus // cat@adj gender@masc num@pl case@voc"],"language":"Latin","string":"iusti"}] + iusti + n1032 + 24 + 1 + + + Latin + [{"wordform_matchlist":["Latin // sed // cat@conj"],"language":"Latin","form":"Latin // sed // cat@conj","is_disambiguated":"1","string":"sed"}] + sed + n1034 + 25 + 0 + + + Latin + [{"wordform_matchlist":["Latin // animus // cat@noun gender@masc num@sing case@dat","Latin // animus // cat@noun gender@masc num@sing case@abl"],"language":"Latin","string":"animo"}] + animo + n1035 + 26 + 1 + + + Latin + [{"wordform_matchlist":["Latin // habito // cat@verb mode@ind tense@pres pers@3 num@sing voice@act"],"language":"Latin","form":"Latin // habito // cat@verb mode@ind tense@pres pers@3 num@sing voice@act","is_disambiguated":"1","string":"habitat"}] + habitat + r1012.2 + 9 + 0 + + + Latin + [{"wordform_matchlist":["Latin // ita // cat@conj"],"language":"Latin","form":"Latin // ita // cat@conj","is_disambiguated":"1","string":"itaque"}] + itaque + r1019.0 + 12 + 0 + + + F + + + L1 + + + L2 + + + L3 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + V10 + + + V11 + + + W + + + flor + + + M + + + am + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + V10 + + + V11 + + + W + + + flor + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + M + + + am + + + maur + + + ulim + + + F + + + L3 + + + L7 + + + V10 + + + V11 + + + W + + + flor + + + L1 + + + L2 + + + (a.c.) + L3 + + + L4 + + + L5 + + + L6 + + + L8 + + + L9 + + + F + + + L3 + + + L7 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + am + + + maur + + + ulim + + + F + + + M + + + V10 + + + V11 + + + W + + + flor + + + F + + + M + + + V10 + + + V11 + + + W + + + flor + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + L4 + + + F + + + L1 + + + L2 + + + L3 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + F + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + M + + + V10 + + + V11 + + + W + + + am + + + flor + + + maur + + + ulim + + + L4 + + + M + + + am + + + maur + + + ulim + + + L1 + + + L2 + + + L3 + + + L4 + + + L5 + + + L6 + + + L7 + + + L8 + + + L9 + + + am + + + maur + + + ulim + + + + + __END__ + + + __START__ + + + n1017 + + + n1018 + + + n1020 + + + n1021 + + + n1022 + + + n1023 + + + n1024 + + + n1025 + + + n1026 + + + n1027 + + + n1007 + + + n1028 + + + n1029 + + + n1030 + + + n1031 + + + n1032 + + + n1034 + + + n1035 + + + r1012.2 + + + r1019.0 + + + n1008 + + + n1009 + + + n1010 + + + n1011 + + + n1013 + + + n1014 + + + n1015 + + + habitat + habitat + transposition + local + + + diff --git a/t/lexeme_serialize.t b/t/lexeme_serialize.t index 6bb714a..7db1bae 100644 --- a/t/lexeme_serialize.t +++ b/t/lexeme_serialize.t @@ -37,4 +37,11 @@ foreach my $r ( $tf->collation->readings ) { } } +# Test a snippet of tradition with possibly-problematic saved lexemes +my $tf3 = Text::Tradition->new( + 'input' => 'Self', + 'file' => 't/data/lexformat.xml' ); +is( ref $tf3, 'Text::Tradition', + "Successfully parsed tradition with incomplete lexemes" ); + done_testing();