X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FLanguage%2FBase.pm;h=954c30ca2160a6e1024a7305e44911c07cbdda7d;hb=75ae2b25a9925f075714d1471b211db3c30ffb10;hp=98d262510746e16860f142adc61925b34cf376a6;hpb=a3ef385df58ad17a298859b21256b253eab584de;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Language/Base.pm b/lib/Text/Tradition/Language/Base.pm index 98d2625..954c30c 100644 --- a/lib/Text/Tradition/Language/Base.pm +++ b/lib/Text/Tradition/Language/Base.pm @@ -13,7 +13,8 @@ use Text::Tradition::Collation::Reading::Lexeme; use Text::Tradition::Collation::Reading::WordForm; use TryCatch; -@EXPORT_OK = qw/ lemmatize_treetagger reading_lookup_treetagger /; +@EXPORT_OK = qw/ lemmatize_treetagger reading_lookup_treetagger treetagger_struct + multext_struct lfs_morph_tags /; =head1 NAME @@ -244,6 +245,52 @@ sub _treetag_string { return $tagresult->as_text(); } +## HACK function to correct for TagSet::TreeTagger brokenness +sub treetagger_struct { + my $pos = shift; + $pos =~ s/PREP/PRP/; + return Lingua::TagSet::TreeTagger->tag2structure( $pos ); +} + +sub multext_struct { + my $pos = shift; + # No known hacks needed + return Lingua::TagSet::Multext->tag2structure( $pos ); +} + +=head2 lfs_morph_tags + +Return a data structure describing the available parts of speech and their attributes +from the Lingua::Features::Structure class currently defined. + +=cut + +sub lfs_morph_tags { + load('Lingua::Features::StructureType'); + my $tagset = { 'structures' => [], 'features' => {} }; + foreach my $lfs ( sort { _by_structid( $a->id, $b->id ) } Lingua::Features::StructureType->types() ) { + my $tsstruct = { 'id' => $lfs->id, 'use_features' => [] }; + foreach my $ftid ( Lingua::Features::StructureType->type($lfs->id)->features ) { + my $ftype = $lfs->feature_type( $ftid ); + my $tfstruct = { 'id' => $ftid, 'values' => [] }; + foreach my $fval( $ftype->values ) { + push( @{$tfstruct->{'values'}}, + { 'short' => $fval, 'long' => $ftype->value_name( $fval ) } ); + } + push( @{$tsstruct->{'use_features'}}, $ftid ); + $tagset->{'features'}->{$ftid} = $tfstruct; + } + push( @{$tagset->{'structures'}}, $tsstruct ); + } + return $tagset; +} + +sub _by_structid { + my( $a, $b ) = @_; + return -1 if $a eq 'cat'; + return 1 if $b eq 'cat'; + return $a cmp $b; +} 1;