use improvements in Lingua packages throughout our lexeme tagging
[scpubgit/stemmatology.git] / lib / Text / Tradition / Language / Base.pm
index 954c30c..dab12f2 100644 (file)
@@ -6,15 +6,12 @@ use Encode qw/ encode_utf8 decode_utf8 /;
 use Exporter 'import';
 use vars qw/ @EXPORT_OK /;
 use IPC::Run qw/ run /;
-use Lingua::TagSet::Multext;
-use Lingua::TagSet::TreeTagger;
 use Module::Load;
 use Text::Tradition::Collation::Reading::Lexeme;
 use Text::Tradition::Collation::Reading::WordForm;
 use TryCatch;
 
-@EXPORT_OK = qw/ lemmatize_treetagger reading_lookup_treetagger treetagger_struct
-       multext_struct lfs_morph_tags /;
+@EXPORT_OK = qw/ lemmatize_treetagger reading_lookup_treetagger lfs_morph_tags /;
 
 =head1 NAME
 
@@ -245,19 +242,6 @@ sub _treetag_string {
        return $tagresult->as_text();
 }
 
-## HACK function to correct for TagSet::TreeTagger brokenness
-sub treetagger_struct {
-       my $pos = shift;
-       $pos =~ s/PREP/PRP/;
-       return Lingua::TagSet::TreeTagger->tag2structure( $pos );
-}
-
-sub multext_struct {
-       my $pos = shift;
-       # No known hacks needed
-       return Lingua::TagSet::Multext->tag2structure( $pos );
-}
-
 =head2 lfs_morph_tags
 
 Return a data structure describing the available parts of speech and their attributes
@@ -269,16 +253,25 @@ sub lfs_morph_tags {
        load('Lingua::Features::StructureType');
        my $tagset = { 'structures' => [], 'features' => {} };
        foreach my $lfs ( sort { _by_structid( $a->id, $b->id ) } Lingua::Features::StructureType->types() ) {
-               my $tsstruct = { 'id' => $lfs->id, 'use_features' => [] };
+               my $tsstruct = { 'id' => $lfs->id, 'desc' => $lfs->desc, 'use_features' => [] };
                foreach my $ftid ( Lingua::Features::StructureType->type($lfs->id)->features ) {
                        my $ftype = $lfs->feature_type( $ftid );
-                       my $tfstruct = { 'id' => $ftid, 'values' => [] };
-                       foreach my $fval( $ftype->values ) {
-                               push( @{$tfstruct->{'values'}}, 
-                                       { 'short' => $fval, 'long' => $ftype->value_name( $fval ) } );
+                       if( !$ftype && $lfs->base ) {
+                               $ftype = $lfs->base->feature_type( $ftid );
+                       }
+                       if( $ftype ) {
+                               push( @{$tsstruct->{'use_features'}}, $ftid );
+                               if( $ftid eq 'type' ) {
+                                       # Type values change according to category
+                                       $ftid .= " (" . $lfs->id . ")";
+                               }
+                               my $tfstruct = { 'id' => $ftid, 'values' => [] };
+                               foreach my $fval( $ftype->values ) {
+                                       push( @{$tfstruct->{'values'}}, 
+                                               { 'short' => $fval, 'long' => $ftype->value_name( $fval ) } );
+                               }
+                               $tagset->{'features'}->{$ftid} = $tfstruct;
                        }
-                       push( @{$tsstruct->{'use_features'}}, $ftid );
-                       $tagset->{'features'}->{$ftid} = $tfstruct;
                }
                push( @{$tagset->{'structures'}}, $tsstruct );
        }