morphology/lib/Text/Tradition/Language/Latin.pm

   1 package Text::Tradition::Language::Latin;
   2
   3 use strict;
   4 use warnings;
   5 use Module::Load;
   6 use parent qw/ Text::Tradition::Language::Perseus /;
   7
   8 =head1 NAME
   9
  10 Text::Tradition::Language::Latin - language-specific module for Latin
  11
  12 =head1 DESCRIPTION
  13
  14 Implements morphology lookup for Latin words in context.  This module
  15 depends on the Lingua::Morph::Perseus module for access to PhiloLogic database data.
  16
  17 =head1 SUBROUTINES
  18
  19 =head2 lemmatize( $text )
  20
  21 Evaluates the string using Treetagger and Perseus, and returns the results.
  22
  23 =head2 reading_lookup( $word )
  24
  25 Returns a single-word morphological lookup of the given word using Perseus.
  26
  27 =begin testing
  28
  29 use Text::Tradition;
  30 use_ok( 'Text::Tradition::Language::Latin' );
  31
  32 my $trad = Text::Tradition->new(
  33         'language' => 'Latin',
  34         'file' => 't/data/legendfrag.xml',
  35         'input' => 'Self' );
  36
  37 eval "use Lingua::Morph::Perseus";
  38 my $err = $@;
  39 SKIP: {
  40         skip "Package Lingua::Morph::Perseus not found" if $err;
  41
  42         $trad->lemmatize();
  43         my $ambig = 0;
  44         foreach my $r ( $trad->collation->readings ) {
  45                 next if $r->is_meta;
  46                 ok( $r->has_lexemes, "Reading $r has one or more lexemes" );
  47                 my @lex = $r->lexemes;
  48                 my $lexstr = join( '', map { $_->string } @lex );
  49                 my $textstr = $r->text;
  50                 $textstr =~ s/\s+//g;
  51                 is( $textstr, $lexstr, "Lexemes for reading $r match the reading" );
  52                 foreach my $l ( @lex ) {
  53                         next unless $l->matches;
  54                         next if $l->is_disambiguated;
  55                         printf( "Ambiguous lexeme %s for reading %s:\n\t%s\n", $l->string, $r->id,
  56                                 join( "\n\t", map { $_->lemma . ': ' . $_->morphology->to_string } $l->matching_forms ) );
  57                         $ambig++;
  58                 }
  59         }
  60         is( $ambig, 4, "Found 4 ambiguous forms as expected" );
  61 }
  62
  63 # Try exporting some witnesses
  64 my $e_v = 'in suetia uenerabilis pontifex beatus henricus in anglia oriundus';
  65 my $struct_v = $trad->witness('V')->export_as_json;
  66 my $g_v = join( ' ', map { $_->{'n'} } @{$struct_v->{'tokens'}} );
  67 is( $g_v, $e_v, "Got expected regularization of witness V" );
  68 my $e_n = 'in suetia beatus henricus uenerabilis pontifex de anglia oriundus';
  69 my $struct_n = $trad->witness('N')->export_as_json;
  70 my $g_n = join( ' ', map { $_->{'n'} } @{$struct_n->{'tokens'}} );
  71 is( $g_n, $e_n, "Got expected regularization of witness N" );
  72
  73
  74 =end testing
  75
  76 =cut
  77
  78 our $dbhandle;
  79
  80 sub lemmatize {
  81         return __PACKAGE__->perseus_lemmatize( @_ );
  82 }
  83
  84 sub reading_lookup {
  85         return __PACKAGE__->perseus_reading_lookup( @_ );
  86 }
  87
  88 =head2 regularize( $text )
  89
  90 Returns a regularized form of the reading for the purposes of collation.
  91
  92 =cut
  93
  94 sub regularize {
  95         my( $word ) = @_;
  96         $word = lc( $word );
  97         $word =~ s/v/u/g;
  98         $word =~ s/w/u/g;
  99         $word =~ s/j/i/g;
 100         $word =~ s/ci/ti/g;
 101         $word =~ s/cha/ca/g;
 102         return $word;
 103 }
 104
 105 1;
 106