From: Tara L Andrews Date: Sat, 2 Jun 2012 14:54:15 +0000 (+0200) Subject: introspect for morphology values; include these in help; make sure Perseus results... X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=75ae2b25a9925f075714d1471b211db3c30ffb10;p=scpubgit%2Fstemmatology.git introspect for morphology values; include these in help; make sure Perseus results are unique --- diff --git a/lib/Text/Tradition/Collation/Reading/WordForm.pm b/lib/Text/Tradition/Collation/Reading/WordForm.pm index 4335857..ebfa445 100644 --- a/lib/Text/Tradition/Collation/Reading/WordForm.pm +++ b/lib/Text/Tradition/Collation/Reading/WordForm.pm @@ -68,9 +68,8 @@ around BUILDARGS => sub { my $class = shift; my $args = @_ == 1 ? $_[0] : { @_ }; if( exists $args->{'JSON'} ) { - $DB::single = 1; my @data = split( / \/\/ /, $args->{'JSON'} ); - print STDERR "Attempting to parse " . $data[2] . " into structure"; + # print STDERR "Attempting to parse " . $data[2] . " into structure"; my $morph = Lingua::Features::Structure->from_string( $data[2] ); $args = { 'language' => $data[0], 'lemma' => $data[1], 'morphology' => $morph }; diff --git a/lib/Text/Tradition/Language/Base.pm b/lib/Text/Tradition/Language/Base.pm index 05f81c0..954c30c 100644 --- a/lib/Text/Tradition/Language/Base.pm +++ b/lib/Text/Tradition/Language/Base.pm @@ -14,7 +14,7 @@ use Text::Tradition::Collation::Reading::WordForm; use TryCatch; @EXPORT_OK = qw/ lemmatize_treetagger reading_lookup_treetagger treetagger_struct - multext_struct /; + multext_struct lfs_morph_tags /; =head1 NAME @@ -258,6 +258,40 @@ sub multext_struct { return Lingua::TagSet::Multext->tag2structure( $pos ); } +=head2 lfs_morph_tags + +Return a data structure describing the available parts of speech and their attributes +from the Lingua::Features::Structure class currently defined. + +=cut + +sub lfs_morph_tags { + load('Lingua::Features::StructureType'); + my $tagset = { 'structures' => [], 'features' => {} }; + foreach my $lfs ( sort { _by_structid( $a->id, $b->id ) } Lingua::Features::StructureType->types() ) { + my $tsstruct = { 'id' => $lfs->id, 'use_features' => [] }; + foreach my $ftid ( Lingua::Features::StructureType->type($lfs->id)->features ) { + my $ftype = $lfs->feature_type( $ftid ); + my $tfstruct = { 'id' => $ftid, 'values' => [] }; + foreach my $fval( $ftype->values ) { + push( @{$tfstruct->{'values'}}, + { 'short' => $fval, 'long' => $ftype->value_name( $fval ) } ); + } + push( @{$tsstruct->{'use_features'}}, $ftid ); + $tagset->{'features'}->{$ftid} = $tfstruct; + } + push( @{$tagset->{'structures'}}, $tsstruct ); + } + return $tagset; +} + +sub _by_structid { + my( $a, $b ) = @_; + return -1 if $a eq 'cat'; + return 1 if $b eq 'cat'; + return $a cmp $b; +} + 1; =head2 TODO diff --git a/lib/Text/Tradition/Language/English.pm b/lib/Text/Tradition/Language/English.pm index f477759..ea38a7f 100644 --- a/lib/Text/Tradition/Language/English.pm +++ b/lib/Text/Tradition/Language/English.pm @@ -2,7 +2,8 @@ package Text::Tradition::Language::English; use strict; use warnings; -use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger /; +use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger + lfs_morph_tags /; use TryCatch; =head1 NAME @@ -59,6 +60,16 @@ sub reading_lookup { return reading_lookup_treetagger( %opts ); } +=head2 morphology_tags + +Return a data structure describing the available parts of speech and their attributes. + +=cut + +sub morphology_tags { + return lfs_morph_tags(); +} + # Utility function to turn a TreeTagger result into a WordForm sub _parse_wordform { my $tagresult = shift; diff --git a/lib/Text/Tradition/Language/French.pm b/lib/Text/Tradition/Language/French.pm index f820586..fa884ff 100644 --- a/lib/Text/Tradition/Language/French.pm +++ b/lib/Text/Tradition/Language/French.pm @@ -3,7 +3,7 @@ package Text::Tradition::Language::French; use strict; use warnings; use Module::Load qw/ load /; -use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger treetagger_struct multext_struct /; +use Text::Tradition::Language::Base qw/ lemmatize_treetagger reading_lookup_treetagger treetagger_struct multext_struct lfs_morph_tags /; use TryCatch; =head1 NAME @@ -103,6 +103,16 @@ sub reading_lookup { return reading_lookup_treetagger( %opts ); } +=head2 morphology_tags + +Return a data structure describing the available parts of speech and their attributes. + +=cut + +sub morphology_tags { + return lfs_morph_tags(); +} + # Closure and utility function for the package lemmatizer { my $lemmatizer; diff --git a/lib/Text/Tradition/Language/Latin.pm b/lib/Text/Tradition/Language/Latin.pm index 9f11767..935547a 100644 --- a/lib/Text/Tradition/Language/Latin.pm +++ b/lib/Text/Tradition/Language/Latin.pm @@ -3,7 +3,9 @@ package Text::Tradition::Language::Latin; use strict; use warnings; use Module::Load; -use Text::Tradition::Language::Base qw/ lemmatize_treetagger treetagger_struct /; +use Morph::Perseus::Structure; +use Text::Tradition::Language::Base qw/ lemmatize_treetagger treetagger_struct + lfs_morph_tags /; use TryCatch; =head1 NAME @@ -86,6 +88,22 @@ sub reading_lookup { return map { _perseus_lookup_str( $_ ) } @words; } +=head2 morphology_tags + +Return a data structure describing the available parts of speech and their attributes. + +=cut + +sub morphology_tags { + try { + load 'Morph::Perseus::Structure'; + } catch { + warn "Not using Perseus Latin tags"; + } + return lfs_morph_tags(); +} + + { my $morph; @@ -172,9 +190,10 @@ sub reading_lookup { } @ret = @orig unless @ret; - my @wordforms; + my %unique_wordforms; foreach my $obj ( @ret ) { - push( @wordforms, _wordform_from_row( $obj ) ); + my $wf = _wordform_from_row( $obj ); + $unique_wordforms{$wf->to_string} = $wf; } ## TODO Use TreeTagger info - requires serious hacking of Lingua::TagSet # Discard results that don't match the given TreeTagger POS, unless @@ -186,7 +205,7 @@ sub reading_lookup { # @ttmatch = @wordforms; # } # return @ttmatch; - return @wordforms; + return values( %unique_wordforms ); } sub _perseus_lookup_str { @@ -208,7 +227,6 @@ sub _wordform_from_row { warn "Could not create morphology structure from " . $rowobj->code . ": $!"; } - $DB::single = 1 unless $mpstruct; my $lemma = $rowobj->lemma; $lemma =~ s/^(\D+)\d*$/$1/; my $wf = Text::Tradition::Collation::Reading::WordForm->new( diff --git a/stemmaweb/lib/stemmaweb/Controller/Relation.pm b/stemmaweb/lib/stemmaweb/Controller/Relation.pm index 38cae7e..8ec7673 100644 --- a/stemmaweb/lib/stemmaweb/Controller/Relation.pm +++ b/stemmaweb/lib/stemmaweb/Controller/Relation.pm @@ -1,5 +1,6 @@ package stemmaweb::Controller::Relation; use Moose; +use Module::Load; use namespace::autoclean; use TryCatch; @@ -29,19 +30,6 @@ sub index :Path :Args(0) { $c->stash->{'template'} = 'relate.tt'; } -=head2 help - - GET relation/help - -Returns the help window HTML. - -=cut - -sub help :Local :Args(0) { - my( $self, $c ) = @_; - $c->stash->{'template'} = 'relatehelp.tt'; -} - =head2 definitions GET relation/definitions @@ -134,9 +122,38 @@ sub main :Chained('text') :PathPart('') :Args(0) { $c->stash->{'startseg'} = $startseg if defined $startseg; $c->stash->{'svg_string'} = $svg_str; $c->stash->{'text_title'} = $tradition->name; + $c->stash->{'text_lang'} = $tradition->language; $c->stash->{'template'} = 'relate.tt'; } +=head2 help + + GET relation/help/$language + +Returns the help window HTML. + +=cut + +sub help :Local :Args(1) { + my( $self, $c, $lang ) = @_; + # Display the morphological help for the language if it is defined. + if( $lang && $lang ne 'Default' ) { + my $mod = 'Text::Tradition::Language::' . $lang; + try { + load( $mod ); + } catch { + $c->log->debug("Warning: could not load $mod"); + } + my $has_mod = $mod->can('morphology_tags'); + $DB::single = 1; + if( $has_mod ) { + my $tagset = &$has_mod; + $c->stash->{'tagset'} = $tagset; + } + } + $c->stash->{'template'} = 'relatehelp.tt'; +} + =head2 relationships GET relation/$textid/relationships diff --git a/stemmaweb/root/src/relate.tt b/stemmaweb/root/src/relate.tt index 2aa7d3a..3e35dc7 100644 --- a/stemmaweb/root/src/relate.tt +++ b/stemmaweb/root/src/relate.tt @@ -13,7 +13,7 @@ $(document).ready(function () {

Relationship mapper

[% text_title %]

diff --git a/stemmaweb/root/src/relatehelp.tt b/stemmaweb/root/src/relatehelp.tt index 607f22d..9995d28 100644 --- a/stemmaweb/root/src/relatehelp.tt +++ b/stemmaweb/root/src/relatehelp.tt @@ -11,7 +11,7 @@

The premise of the tool is that, once a set of texts has been collated, there will be a need to chart the relationships between the variants—are they substantially the same word? Different words meaning the same thing? Is one an orthographic variant of the other that should be excluded from any eventual apparatus?

-

Instructions for use

+

Making relationships between words

The tool itself is an interface for allowing these relationships to be declared. The collation is presented as a variant graph running from left to right. In a variant graph, each node is a reading, and each witness takes a single path through the readings from beginning to end. When readings appear vertically aligned with each other, it is an indication that they are variant readings, occurring at the same point in the text. @@ -35,6 +35,41 @@

The relationships are displayed as colored paths between readings; while in 'edit' mode, clicking on a relationship path will display the information associated with it, and give the user an option to delete it. Deletion of a 'global' relationship will remove that relationship throughout the graph. When you are ready to move elsewhere in the graph, click the 'hand' icon to return to select mode.

-

Please note that this tool is known to work with recent versions of Firefox (e.g. 8, 9, 10); it is known not to work with Safari and Chrome.

+[% IF language != 'NONE' %] +

Adding [% language %] morphological information to readings

+

It is also possible to add morphological information to the readings in this text (that is, lemma and morphological tagging). Double click on any reading to bring up the morphology info. The options therein are:

+ + + +

If initial lemmatization has been performed on the text, a number of readings may appear in yellow rather than green; this means that there are multiple possible morphologies for the reading in question. Double click on the reading to select and save the correct morphology.

+[% END -%] [% PROCESS footer.tt %] \ No newline at end of file