introspect for morphology values; include these in help; make sure Perseus results...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Language / English.pm
CommitLineData
f4b6b4d0 1package Text::Tradition::Language::English;
2
3use strict;
4use warnings;
75ae2b25 5use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger
6 lfs_morph_tags /;
f4b6b4d0 7use TryCatch;
8
f4b6b4d0 9=head1 NAME
10
11Text::Tradition::Language::English - language-specific module for English
12
13=head1 DESCRIPTION
14
15Implements morphology lookup for English words in context. This module
16depends on the TreeTagger software
17(L<http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/>), which is
18(for now) expected to be installed in $MORPHDIR/TreeTagger.
19
20=head1 SUBROUTINES
21
22=head2 lemmatize( $text )
23
24Evaluates the string using the TreeTagger, and returns the results.
25
26=begin testing
27
28binmode STDOUT, ':utf8';
29use Text::Tradition;
30use_ok( 'Text::Tradition::Language::English' );
31
32=end testing
33
34=cut
35
36sub lemmatize {
37 my $tradition = shift;
e0f6836a 38 my %opts = (
5271a011 39 'language' => 'English',
e0f6836a 40 'callback' => sub { _parse_wordform( @_ ) }
41 );
42 return lemmatize_treetagger( $tradition, %opts );
f4b6b4d0 43}
44
45=head2 reading_lookup( $rdg[, $rdg, ...] )
46
47Looks up one or more readings using the Flemm package, and returns the
48possible results. This uses the same logic as L<lemmatize> above for the
49entire tradition, but can also be used to (re-)analyze individual readings.
50
51=cut
52
53sub reading_lookup {
e0f6836a 54 my( @path ) = @_;
55 my %opts = (
56 'language' => 'French',
57 'callback' => sub { _parse_wordform( @_ ) },
58 'path' => \@path,
59 );
60 return reading_lookup_treetagger( %opts );
f4b6b4d0 61}
62
75ae2b25 63=head2 morphology_tags
64
65Return a data structure describing the available parts of speech and their attributes.
66
67=cut
68
69sub morphology_tags {
70 return lfs_morph_tags();
71}
72
f4b6b4d0 73# Utility function to turn a TreeTagger result into a WordForm
74sub _parse_wordform {
75 my $tagresult = shift;
76 my( $orig, $tag, $lemma ) = split( /\t/, $tagresult );
77 my $morphobj = Lingua::TagSet::TreeTagger->tag2structure( $tag );
78 if( $morphobj ) {
79 return Text::Tradition::Collation::Reading::WordForm->new(
80 'language' => 'English',
81 'lemma' => $lemma,
82 'morphology' => $morphobj,
83 );
84 } else {
85 warn "No morphology found for word: $_";
86 }
87}
88
891;
90
91=head2 TODO
92
93=over
94
e0f6836a 95=item * Tests!
f4b6b4d0 96
97=back
98
99=head1 LICENSE
100
101This package is free software and is provided "as is" without express
102or implied warranty. You can redistribute it and/or modify it under
103the same terms as Perl itself.
104
105=head1 AUTHOR
106
107Tara L Andrews E<lt>aurum@cpan.orgE<gt>