add witness role for collation regularization
[scpubgit/stemmatology.git] / morphology / lib / Text / Tradition / Language / Greek.pm
CommitLineData
0ce8c0cf 1package Text::Tradition::Language::Greek;
2
3use strict;
4use warnings;
5use Module::Load;
6use parent qw/ Text::Tradition::Language::Perseus /;
307d8db9 7use Text::Tradition::Language::Base qw/ unicode_regularize /;
0ce8c0cf 8
9=head1 NAME
10
11Text::Tradition::Language::Greek - language-specific module for Greek
12
13=head1 DESCRIPTION
14
15Implements morphology lookup for Greek words in context. This module
16depends on the Lingua::Morph::Perseus module for access to PhiloLogic database data.
17
18=head1 SUBROUTINES
19
20=head2 lemmatize( $text )
21
22Evaluates the string using Treetagger and Perseus, and returns the results.
23
307d8db9 24=head2 reading_lookup( $word )
25
26Returns a single-word morphological lookup of the given word using Perseus.
27
0ce8c0cf 28=begin testing
29
30use Text::Tradition;
31use_ok( 'Text::Tradition::Language::Greek' );
32
33eval "use Lingua::Morph::Perseus";
34my $err = $@;
35
36SKIP: {
37 skip "Greek linguistic data not read yet";
38
39 my $trad = Text::Tradition->new(
40 'language' => 'Greek',
41 'file' => 't/data/florilegium_graphml.xml',
42 'input' => 'Self' );
43 $trad->lemmatize();
44 my $ambig = 0;
45 foreach my $r ( $trad->collation->readings ) {
46 next if $r->is_meta;
47 ok( $r->has_lexemes, "Reading $r has one or more lexemes" );
48 my @lex = $r->lexemes;
49 my $lexstr = join( '', map { $_->string } @lex );
50 my $textstr = $r->text;
51 $textstr =~ s/\s+//g;
52 is( $textstr, $lexstr, "Lexemes for reading $r match the reading" );
53 foreach my $l ( @lex ) {
54 next unless $l->matches;
55 next if $l->is_disambiguated;
56 printf( "Ambiguous lexeme %s for reading %s:\n\t%s\n", $l->string, $r->id,
57 join( "\n\t", map { $_->lemma . ': ' . $_->morphology->to_string } $l->matching_forms ) );
58 $ambig++;
59 }
60 }
61 is( $ambig, 4, "Found 4 ambiguous forms as expected" );
62}
63
64=end testing
65
66=cut
67
68our $dbhandle;
69
70sub lemmatize {
71 return __PACKAGE__->perseus_lemmatize( @_ );
72}
73
74sub reading_lookup {
75 return __PACKAGE__->perseus_reading_lookup( @_ );
76}
77
307d8db9 78=head2 regularize( $text )
79
80Returns a regularized form of the reading for the purposes of collation.
81
82=cut
83
84sub regularize {
85 return unicode_regularize( @_ );
86}
87
0ce8c0cf 881;
e92d4229 89