1 package Text::Tradition::Language::English;
5 use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger
11 Text::Tradition::Language::English - language-specific module for English
15 Implements morphology lookup for English words in context. This module
16 depends on the TreeTagger software
17 (L<http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/>), which is
18 (for now) expected to be installed in $MORPHDIR/TreeTagger.
22 =head2 lemmatize( $text )
24 Evaluates the string using the TreeTagger, and returns the results.
28 binmode STDOUT, ':utf8';
30 use_ok( 'Text::Tradition::Language::English' );
37 my $tradition = shift;
39 'language' => 'English',
40 'callback' => sub { _parse_wordform( @_ ) }
42 return lemmatize_treetagger( $tradition, %opts );
45 =head2 reading_lookup( $rdg[, $rdg, ...] )
47 Looks up one or more readings using the Flemm package, and returns the
48 possible results. This uses the same logic as L<lemmatize> above for the
49 entire tradition, but can also be used to (re-)analyze individual readings.
56 'language' => 'French',
57 'callback' => sub { _parse_wordform( @_ ) },
60 return reading_lookup_treetagger( %opts );
63 =head2 morphology_tags
65 Return a data structure describing the available parts of speech and their attributes.
70 return lfs_morph_tags();
73 # Utility function to turn a TreeTagger result into a WordForm
75 my $tagresult = shift;
76 my( $orig, $tag, $lemma ) = split( /\t/, $tagresult );
77 my $morphobj = Lingua::TagSet::TreeTagger->tag2structure( $tag );
79 return Text::Tradition::Collation::Reading::WordForm->new(
80 'language' => 'English',
82 'morphology' => $morphobj,
85 warn "No morphology found for word: $_";
101 This package is free software and is provided "as is" without express
102 or implied warranty. You can redistribute it and/or modify it under
103 the same terms as Perl itself.
107 Tara L Andrews E<lt>aurum@cpan.orgE<gt>