refactor English/French shared TT logic into Base.pm
[scpubgit/stemmatology.git] / lib / Text / Tradition / Language / English.pm
CommitLineData
f4b6b4d0 1package Text::Tradition::Language::English;
2
3use strict;
4use warnings;
e0f6836a 5use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger /;
f4b6b4d0 6use TryCatch;
7
f4b6b4d0 8=head1 NAME
9
10Text::Tradition::Language::English - language-specific module for English
11
12=head1 DESCRIPTION
13
14Implements morphology lookup for English words in context. This module
15depends on the TreeTagger software
16(L<http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/>), which is
17(for now) expected to be installed in $MORPHDIR/TreeTagger.
18
19=head1 SUBROUTINES
20
21=head2 lemmatize( $text )
22
23Evaluates the string using the TreeTagger, and returns the results.
24
25=begin testing
26
27binmode STDOUT, ':utf8';
28use Text::Tradition;
29use_ok( 'Text::Tradition::Language::English' );
30
31=end testing
32
33=cut
34
35sub lemmatize {
36 my $tradition = shift;
e0f6836a 37 my %opts = (
38 'language' => 'French',
39 'callback' => sub { _parse_wordform( @_ ) }
40 );
41 return lemmatize_treetagger( $tradition, %opts );
f4b6b4d0 42}
43
44=head2 reading_lookup( $rdg[, $rdg, ...] )
45
46Looks up one or more readings using the Flemm package, and returns the
47possible results. This uses the same logic as L<lemmatize> above for the
48entire tradition, but can also be used to (re-)analyze individual readings.
49
50=cut
51
52sub reading_lookup {
e0f6836a 53 my( @path ) = @_;
54 my %opts = (
55 'language' => 'French',
56 'callback' => sub { _parse_wordform( @_ ) },
57 'path' => \@path,
58 );
59 return reading_lookup_treetagger( %opts );
f4b6b4d0 60}
61
62# Utility function to turn a TreeTagger result into a WordForm
63sub _parse_wordform {
64 my $tagresult = shift;
65 my( $orig, $tag, $lemma ) = split( /\t/, $tagresult );
66 my $morphobj = Lingua::TagSet::TreeTagger->tag2structure( $tag );
67 if( $morphobj ) {
68 return Text::Tradition::Collation::Reading::WordForm->new(
69 'language' => 'English',
70 'lemma' => $lemma,
71 'morphology' => $morphobj,
72 );
73 } else {
74 warn "No morphology found for word: $_";
75 }
76}
77
781;
79
80=head2 TODO
81
82=over
83
e0f6836a 84=item * Tests!
f4b6b4d0 85
86=back
87
88=head1 LICENSE
89
90This package is free software and is provided "as is" without express
91or implied warranty. You can redistribute it and/or modify it under
92the same terms as Perl itself.
93
94=head1 AUTHOR
95
96Tara L Andrews E<lt>aurum@cpan.orgE<gt>