X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FLanguage%2FBase.pm;h=954c30ca2160a6e1024a7305e44911c07cbdda7d;hb=75ae2b25a9925f075714d1471b211db3c30ffb10;hp=05f81c0d8a94ad00b91009da83dcb98bc4c21aea;hpb=5271a01157263fd46b8d504e0ddb0b349a3fb9c4;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Language/Base.pm b/lib/Text/Tradition/Language/Base.pm index 05f81c0..954c30c 100644 --- a/lib/Text/Tradition/Language/Base.pm +++ b/lib/Text/Tradition/Language/Base.pm @@ -14,7 +14,7 @@ use Text::Tradition::Collation::Reading::WordForm; use TryCatch; @EXPORT_OK = qw/ lemmatize_treetagger reading_lookup_treetagger treetagger_struct - multext_struct /; + multext_struct lfs_morph_tags /; =head1 NAME @@ -258,6 +258,40 @@ sub multext_struct { return Lingua::TagSet::Multext->tag2structure( $pos ); } +=head2 lfs_morph_tags + +Return a data structure describing the available parts of speech and their attributes +from the Lingua::Features::Structure class currently defined. + +=cut + +sub lfs_morph_tags { + load('Lingua::Features::StructureType'); + my $tagset = { 'structures' => [], 'features' => {} }; + foreach my $lfs ( sort { _by_structid( $a->id, $b->id ) } Lingua::Features::StructureType->types() ) { + my $tsstruct = { 'id' => $lfs->id, 'use_features' => [] }; + foreach my $ftid ( Lingua::Features::StructureType->type($lfs->id)->features ) { + my $ftype = $lfs->feature_type( $ftid ); + my $tfstruct = { 'id' => $ftid, 'values' => [] }; + foreach my $fval( $ftype->values ) { + push( @{$tfstruct->{'values'}}, + { 'short' => $fval, 'long' => $ftype->value_name( $fval ) } ); + } + push( @{$tsstruct->{'use_features'}}, $ftid ); + $tagset->{'features'}->{$ftid} = $tfstruct; + } + push( @{$tagset->{'structures'}}, $tsstruct ); + } + return $tagset; +} + +sub _by_structid { + my( $a, $b ) = @_; + return -1 if $a eq 'cat'; + return 1 if $b eq 'cat'; + return $a cmp $b; +} + 1; =head2 TODO