1 package Text::Tradition::Language::Latin;
6 use Text::Tradition::Language::Base qw/ lemmatize_treetagger lfs_morph_tags /;
11 Text::Tradition::Language::Latin - language-specific module for Latin
15 Implements morphology lookup for French words in context. This module
16 depends on the Lingua::Morph::Perseus module for access to PhiloLogic database data.
20 =head2 lemmatize( $text )
22 Evaluates the string using the Flemm package, and returns the results.
27 use_ok( 'Text::Tradition::Language::Latin' );
29 eval "use Lingua::Morph::Perseus";
33 skip "Package Lingua::Morph::Perseus not found" if $err;
35 my $trad = Text::Tradition->new(
36 'language' => 'Latin',
37 'file' => 't/data/legendfrag.xml',
41 foreach my $r ( $trad->collation->readings ) {
43 ok( $r->has_lexemes, "Reading $r has one or more lexemes" );
44 my @lex = $r->lexemes;
45 my $lexstr = join( '', map { $_->string } @lex );
46 my $textstr = $r->text;
48 is( $textstr, $lexstr, "Lexemes for reading $r match the reading" );
49 foreach my $l ( @lex ) {
50 next unless $l->matches;
51 next if $l->is_disambiguated;
52 printf( "Ambiguous lexeme %s for reading %s:\n\t%s\n", $l->string, $r->id,
53 join( "\n\t", map { $_->lemma . ': ' . $_->morphology->to_string } $l->matching_forms ) );
57 is( $ambig, 4, "Found 4 ambiguous forms as expected" );
65 my $tradition = shift;
67 'language' => 'Latin',
68 'callback' => sub { _perseus_lookup_tt( @_ ) }
70 return lemmatize_treetagger( $tradition, %opts );
73 =head2 reading_lookup( $rdg[, $rdg, ...] )
75 Looks up one or more readings using the Perseus package, and returns the
76 possible results. This skips the tree tagger / tokenizer, returning any
77 match for the word string(s) in the morphology DB.
83 return map { _perseus_lookup_str( $_ ) } @words;
86 =head2 morphology_tags
88 Return a data structure describing the available parts of speech and their attributes.
93 return lfs_morph_tags();
103 load 'Lingua::Morph::Perseus';
104 $morph = Lingua::Morph::Perseus->connect( 'Latin' );
106 warn "Cannot do Latin word lemmatization without Lingua::Morph::Perseus: @_";
112 sub _perseus_lookup_tt {
113 my( $orig, $pos, $lemma ) = split( /\t/, $_[0] );
115 return unless $morph;
116 # Discard results that don't match the lemma, unless lemma is unknown
118 unless( $lemma eq '<unknown>' || $lemma =~ /^\W+$/ ) {
120 map { $lems{$_} = 1; $lems{lc($_)} = 1 } split( /\|/, $lemma );
121 $lookupopts->{'lemma'} = [ keys %lems ];
123 $lookupopts->{'ttpos'} = $pos if $pos;
125 my $result = $morph->lexicon_lookup( $orig, $lookupopts );
126 # unless( !keys( %$lookupopts ) || $result->{'filtered'} ) {
127 # warn "Filter on $pos / $lemma returned no results; using all results";
129 my @ret = @{$result->{'objects'}};
130 my %unique_wordforms;
131 foreach my $obj ( @ret ) {
132 my $wf = _wordform_from_row( $obj );
133 $unique_wordforms{$wf->to_string} = $wf;
135 return values( %unique_wordforms );
138 sub _perseus_lookup_str {
141 return unless $morph;
142 # Simple morph DB lookup, and return the results.
143 my $result = $morph->lexicon_lookup( $orig );
144 return map { _wordform_from_row( $_ ) } @{$result->{'objects'}};
149 sub _wordform_from_row {
151 my $lemma = $rowobj->lemma;
152 $lemma =~ s/^(\D+)\d*$/$1/;
153 my $wf = Text::Tradition::Collation::Reading::WordForm->new(
154 'language' => 'Latin',
156 'morphology' => $rowobj->morphology,