add witness role for collation regularization
[scpubgit/stemmatology.git] / morphology / lib / Text / Tradition / Language / Latin.pm
CommitLineData
5271a011 1package Text::Tradition::Language::Latin;
2
3use strict;
4use warnings;
5use Module::Load;
0ce8c0cf 6use parent qw/ Text::Tradition::Language::Perseus /;
5271a011 7
8=head1 NAME
9
10Text::Tradition::Language::Latin - language-specific module for Latin
11
12=head1 DESCRIPTION
13
0ce8c0cf 14Implements morphology lookup for Latin words in context. This module
f8862b58 15depends on the Lingua::Morph::Perseus module for access to PhiloLogic database data.
5271a011 16
17=head1 SUBROUTINES
18
19=head2 lemmatize( $text )
20
0ce8c0cf 21Evaluates the string using Treetagger and Perseus, and returns the results.
5271a011 22
307d8db9 23=head2 reading_lookup( $word )
24
25Returns a single-word morphological lookup of the given word using Perseus.
26
5271a011 27=begin testing
28
29use Text::Tradition;
30use_ok( 'Text::Tradition::Language::Latin' );
31
307d8db9 32my $trad = Text::Tradition->new(
33 'language' => 'Latin',
34 'file' => 't/data/legendfrag.xml',
35 'input' => 'Self' );
36
f8862b58 37eval "use Lingua::Morph::Perseus";
5271a011 38my $err = $@;
5271a011 39SKIP: {
f8862b58 40 skip "Package Lingua::Morph::Perseus not found" if $err;
5271a011 41
5271a011 42 $trad->lemmatize();
43 my $ambig = 0;
44 foreach my $r ( $trad->collation->readings ) {
45 next if $r->is_meta;
46 ok( $r->has_lexemes, "Reading $r has one or more lexemes" );
47 my @lex = $r->lexemes;
48 my $lexstr = join( '', map { $_->string } @lex );
49 my $textstr = $r->text;
50 $textstr =~ s/\s+//g;
51 is( $textstr, $lexstr, "Lexemes for reading $r match the reading" );
52 foreach my $l ( @lex ) {
fe77efe0 53 next unless $l->matches;
5271a011 54 next if $l->is_disambiguated;
55 printf( "Ambiguous lexeme %s for reading %s:\n\t%s\n", $l->string, $r->id,
56 join( "\n\t", map { $_->lemma . ': ' . $_->morphology->to_string } $l->matching_forms ) );
57 $ambig++;
58 }
59 }
f8862b58 60 is( $ambig, 4, "Found 4 ambiguous forms as expected" );
5271a011 61}
307d8db9 62
63# Try exporting some witnesses
64my $e_v = 'in suetia uenerabilis pontifex beatus henricus in anglia oriundus';
65my $struct_v = $trad->witness('V')->export_as_json;
66my $g_v = join( ' ', map { $_->{'n'} } @{$struct_v->{'tokens'}} );
67is( $g_v, $e_v, "Got expected regularization of witness V" );
68my $e_n = 'in suetia beatus henricus uenerabilis pontifex de anglia oriundus';
69my $struct_n = $trad->witness('N')->export_as_json;
70my $g_n = join( ' ', map { $_->{'n'} } @{$struct_n->{'tokens'}} );
71is( $g_n, $e_n, "Got expected regularization of witness N" );
72
5271a011 73
74=end testing
75
76=cut
77
0ce8c0cf 78our $dbhandle;
79
5271a011 80sub lemmatize {
0ce8c0cf 81 return __PACKAGE__->perseus_lemmatize( @_ );
5271a011 82}
83
5271a011 84sub reading_lookup {
0ce8c0cf 85 return __PACKAGE__->perseus_reading_lookup( @_ );
75ae2b25 86}
87
307d8db9 88=head2 regularize( $text )
89
90Returns a regularized form of the reading for the purposes of collation.
91
92=cut
93
94sub regularize {
95 my( $word ) = @_;
96 $word = lc( $word );
97 $word =~ s/v/u/g;
98 $word =~ s/w/u/g;
99 $word =~ s/j/i/g;
100 $word =~ s/ci/ti/g;
101 $word =~ s/cha/ca/g;
102 return $word;
103}
104
227d4a11 1051;
e92d4229 106