X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=scpubgit%2Fstemmatology.git;a=blobdiff_plain;f=lib%2FText%2FTradition%2FLanguage%2FEnglish.pm;h=a2dc236fed9e8c3b7fcf06bf398e545a3142ea02;hp=ea38a7f245067e3aabd4f60d477564de77ff9bab;hb=f8862b584dcc04728d3bff48ea7c19cb9a078772;hpb=fedee8dac79426f8a1f7ae70d95478c6fcd5d69a diff --git a/lib/Text/Tradition/Language/English.pm b/lib/Text/Tradition/Language/English.pm index ea38a7f..a2dc236 100644 --- a/lib/Text/Tradition/Language/English.pm +++ b/lib/Text/Tradition/Language/English.pm @@ -2,6 +2,7 @@ package Text::Tradition::Language::English; use strict; use warnings; +use Lingua::TagSet::TreeTagger::English; use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger lfs_morph_tags /; use TryCatch; @@ -74,15 +75,17 @@ sub morphology_tags { sub _parse_wordform { my $tagresult = shift; my( $orig, $tag, $lemma ) = split( /\t/, $tagresult ); - my $morphobj = Lingua::TagSet::TreeTagger->tag2structure( $tag ); + return () unless $tag =~ /\w/; # skip punct-only "tags" + my $morphobj = Lingua::TagSet::TreeTagger::English->tag2structure( $tag ); if( $morphobj ) { - return Text::Tradition::Collation::Reading::WordForm->new( + return ( Text::Tradition::Collation::Reading::WordForm->new( 'language' => 'English', 'lemma' => $lemma, 'morphology' => $morphobj, - ); + ) ); } else { - warn "No morphology found for word: $_"; + warn "No morphology found for word: $tagresult"; + return (); } }