Commit | Line | Data |
f4b6b4d0 |
1 | package Text::Tradition::Language::English; |
2 | |
3 | use strict; |
4 | use warnings; |
e0f6836a |
5 | use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger /; |
f4b6b4d0 |
6 | use TryCatch; |
7 | |
f4b6b4d0 |
8 | =head1 NAME |
9 | |
10 | Text::Tradition::Language::English - language-specific module for English |
11 | |
12 | =head1 DESCRIPTION |
13 | |
14 | Implements morphology lookup for English words in context. This module |
15 | depends on the TreeTagger software |
16 | (L<http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/>), which is |
17 | (for now) expected to be installed in $MORPHDIR/TreeTagger. |
18 | |
19 | =head1 SUBROUTINES |
20 | |
21 | =head2 lemmatize( $text ) |
22 | |
23 | Evaluates the string using the TreeTagger, and returns the results. |
24 | |
25 | =begin testing |
26 | |
27 | binmode STDOUT, ':utf8'; |
28 | use Text::Tradition; |
29 | use_ok( 'Text::Tradition::Language::English' ); |
30 | |
31 | =end testing |
32 | |
33 | =cut |
34 | |
35 | sub lemmatize { |
36 | my $tradition = shift; |
e0f6836a |
37 | my %opts = ( |
5271a011 |
38 | 'language' => 'English', |
e0f6836a |
39 | 'callback' => sub { _parse_wordform( @_ ) } |
40 | ); |
41 | return lemmatize_treetagger( $tradition, %opts ); |
f4b6b4d0 |
42 | } |
43 | |
44 | =head2 reading_lookup( $rdg[, $rdg, ...] ) |
45 | |
46 | Looks up one or more readings using the Flemm package, and returns the |
47 | possible results. This uses the same logic as L<lemmatize> above for the |
48 | entire tradition, but can also be used to (re-)analyze individual readings. |
49 | |
50 | =cut |
51 | |
52 | sub reading_lookup { |
e0f6836a |
53 | my( @path ) = @_; |
54 | my %opts = ( |
55 | 'language' => 'French', |
56 | 'callback' => sub { _parse_wordform( @_ ) }, |
57 | 'path' => \@path, |
58 | ); |
59 | return reading_lookup_treetagger( %opts ); |
f4b6b4d0 |
60 | } |
61 | |
62 | # Utility function to turn a TreeTagger result into a WordForm |
63 | sub _parse_wordform { |
64 | my $tagresult = shift; |
65 | my( $orig, $tag, $lemma ) = split( /\t/, $tagresult ); |
66 | my $morphobj = Lingua::TagSet::TreeTagger->tag2structure( $tag ); |
67 | if( $morphobj ) { |
68 | return Text::Tradition::Collation::Reading::WordForm->new( |
69 | 'language' => 'English', |
70 | 'lemma' => $lemma, |
71 | 'morphology' => $morphobj, |
72 | ); |
73 | } else { |
74 | warn "No morphology found for word: $_"; |
75 | } |
76 | } |
77 | |
78 | 1; |
79 | |
80 | =head2 TODO |
81 | |
82 | =over |
83 | |
e0f6836a |
84 | =item * Tests! |
f4b6b4d0 |
85 | |
86 | =back |
87 | |
88 | =head1 LICENSE |
89 | |
90 | This package is free software and is provided "as is" without express |
91 | or implied warranty. You can redistribute it and/or modify it under |
92 | the same terms as Perl itself. |
93 | |
94 | =head1 AUTHOR |
95 | |
96 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |