From: Tara L Andrews Date: Fri, 14 Sep 2012 12:41:05 +0000 (+0200) Subject: split tradition language into morphology module; add license blurb to morphology... X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e92d422902558330248353c72c5297be253b9ddd;p=scpubgit%2Fstemmatology.git split tradition language into morphology module; add license blurb to morphology packages --- diff --git a/base/lib/Text/Tradition.pm b/base/lib/Text/Tradition.pm index fb66a03..fda7b3c 100644 --- a/base/lib/Text/Tradition.pm +++ b/base/lib/Text/Tradition.pm @@ -18,6 +18,7 @@ eval { with 'Text::Tradition::HasStemma'; }; if( $@ ) { warn "Text::Tradition::Analysis not found. Disabling stemma analysis functionality"; }; +eval { with 'Text::Tradition::Language'; }; has 'collation' => ( is => 'ro', @@ -44,12 +45,6 @@ has 'name' => ( default => 'Tradition', ); -has 'language' => ( - is => 'rw', - isa => 'Str', - predicate => 'has_language', - ); - has '_initialized' => ( is => 'ro', isa => 'Bool', @@ -80,8 +75,6 @@ around 'add_witness' => sub { # TODO allow add of a Witness object? my %args = @_ == 1 ? %{$_[0]} : @_; $args{'tradition'} = $self; - $args{'language'} = $self->language - if( $self->language && !exists $args{'language'} ); my $new_wit = Text::Tradition::Witness->new( %args ); $self->$orig( $new_wit->sigil => $new_wit ); return $new_wit; @@ -326,25 +319,6 @@ sub add_json_witnesses { } } -=head2 lemmatize - -Calls the appropriate lemmatization function for the language of the -tradition. Will throw an error if the Morphology package is not installed. - -=cut - -# TODO find a better way to hook this -sub lemmatize { - my $self = shift; - unless( $self->has_language ) { - warn "Please set a language to lemmatize a tradition"; - return; - } - my $mod = "Text::Tradition::Language::" . $self->language; - load( $mod ); - $mod->can( 'lemmatize' )->( $self ); -} - sub throw { Text::Tradition::Error->throw( 'ident' => 'Tradition error', diff --git a/base/lib/Text/Tradition/Collation.pm b/base/lib/Text/Tradition/Collation.pm index 216e731..7c7335f 100644 --- a/base/lib/Text/Tradition/Collation.pm +++ b/base/lib/Text/Tradition/Collation.pm @@ -243,7 +243,7 @@ sub add_reading { # If we are initializing an empty collation, don't assume that we # have set a tradition. delete $args{'init'}; - } elsif( $self->tradition->has_language && !exists $args{'language'} ) { + } elsif( $self->tradition->can('language') && !exists $args{'language'} ) { $args{'language'} = $self->tradition->language; } $reading = Text::Tradition::Collation::Reading->new( diff --git a/base/lib/Text/Tradition/Collation/RelationshipStore.pm b/base/lib/Text/Tradition/Collation/RelationshipStore.pm index 4342bd9..117f55a 100644 --- a/base/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/base/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -260,9 +260,9 @@ use Text::Tradition; use TryCatch; my $t1; -warning_is { +warnings_exist { $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); -} 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading', +} [qr/Cannot set relationship on a meta reading/], "Got expected relationship drop warning on parse"; # Test 1.1: try to equate nodes that are prevented with an intermediate collation @@ -319,9 +319,9 @@ try { # Test 2.1: try to equate nodes that are prevented with a real intermediate # equivalence my $t2; -warning_is { +warnings_exist { $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); -} 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading', +} [qr/Cannot set relationship on a meta reading/], "Got expected relationship drop warning on parse"; my $c2 = $t2->collation; $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } ); @@ -529,7 +529,6 @@ sub _find_applicable { my( $self, $rel ) = @_; my $c = $self->collation; # TODO Someday we might use a case sensitive language. - my $lang = $c->tradition->language; my @vectors; my @identical_readings; if( $rel->type eq 'orthographic' ) { diff --git a/base/lib/Text/Tradition/Parser/Self.pm b/base/lib/Text/Tradition/Parser/Self.pm index 5d0858a..4ca6e63 100644 --- a/base/lib/Text/Tradition/Parser/Self.pm +++ b/base/lib/Text/Tradition/Parser/Self.pm @@ -121,7 +121,10 @@ if( $t ) { # TODO add a relationship, add a stemma, write graphml, reparse it, check that # the new data is there -$t->language('Greek'); +my $language_enabled = $t->can('language'); +if( $language_enabled ) { + $t->language('Greek'); +} my $stemma_enabled = $t->can('add_stemma'); if( $stemma_enabled ) { $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); @@ -139,7 +142,9 @@ if( $newt ) { is( scalar $newt->collation->paths, 376, "Collation has all paths" ); is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); - is( $newt->language, 'Greek', "Tradition has correct language setting" ); + if( $language_enabled ) { + is( $newt->language, 'Greek', "Tradition has correct language setting" ); + } my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); ok( $rel, "Found set relationship" ); is( $rel->annotation, 'This is some note', "Relationship has its properties" ); @@ -214,6 +219,12 @@ sub parse { } else { warn "Analysis module not installed; DROPPING stemmata"; } + } elsif( $gkey eq 'language' ) { + if( $tradition->can('language') ) { + $tradition->language( $val ); + } else { + warn "Morphology module not installed; DROPPING language"; + } } elsif( $gkey eq 'user' ) { # Assign the tradition to the user if we can if( exists $opts->{'userstore'} ) { diff --git a/base/lib/Text/Tradition/Witness.pm b/base/lib/Text/Tradition/Witness.pm index ea87cfc..5c32b36 100644 --- a/base/lib/Text/Tradition/Witness.pm +++ b/base/lib/Text/Tradition/Witness.pm @@ -53,10 +53,6 @@ behavior is to use the first defined text. If this is not desired, use_text should be set to an XPath expression that will select the correct text. -=item * language - The name of the applicable L -module for language handling. Usually inherited from the language set in -the L object, and defaults to Default. - =item * identifier - The recognized name of the manuscript, e.g. a library identifier. Taken from the msDesc element for a TEI file. @@ -107,10 +103,6 @@ text, if any, of the manuscript. This should not change after the witness has been instantiated, and the path through the collation should always match it. -=head2 language - -Accessor method to get the witness language. - =head2 identifier Accessor method for the witness identifier. @@ -216,12 +208,6 @@ has 'sigil' => ( writer => '_set_sigil', ); -has 'language' => ( - is => 'ro', - isa => 'Str', - default => 'Default', - ); - # Other identifying information has 'identifier' => ( is => 'rw', @@ -605,7 +591,7 @@ sub _split_words { my @words; foreach my $w ( @raw_words ) { my $id = $self->sigil . 'r'. $c++; - my %opts = ( 'text' => $w, 'id' => $id, 'language' => $self->language ); + my %opts = ( 'text' => $w, 'id' => $id ); my $w_obj = $self->tradition->collation->add_reading( \%opts ); # Skip any words that have been canonized out of existence. next if( length( $w_obj->text ) == 0 ); diff --git a/base/t/text_tradition_collation_relationshipstore.t b/base/t/text_tradition_collation_relationshipstore.t index 4f666e0..6fe65da 100644 --- a/base/t/text_tradition_collation_relationshipstore.t +++ b/base/t/text_tradition_collation_relationshipstore.t @@ -47,9 +47,9 @@ use Text::Tradition; use TryCatch; my $t1; -warning_is { +warnings_exist { $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); -} 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading', +} [qr/Cannot set relationship on a meta reading/], "Got expected relationship drop warning on parse"; # Test 1.1: try to equate nodes that are prevented with an intermediate collation @@ -106,9 +106,9 @@ try { # Test 2.1: try to equate nodes that are prevented with a real intermediate # equivalence my $t2; -warning_is { +warnings_exist { $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); -} 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading', +} [qr/Cannot set relationship on a meta reading/], "Got expected relationship drop warning on parse"; my $c2 = $t2->collation; $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } ); diff --git a/base/t/text_tradition_parser_self.t b/base/t/text_tradition_parser_self.t index f53b16f..ed921aa 100644 --- a/base/t/text_tradition_parser_self.t +++ b/base/t/text_tradition_parser_self.t @@ -34,7 +34,10 @@ if( $t ) { # TODO add a relationship, add a stemma, write graphml, reparse it, check that # the new data is there -$t->language('Greek'); +my $language_enabled = $t->can('language'); +if( $language_enabled ) { + $t->language('Greek'); +} my $stemma_enabled = $t->can('add_stemma'); if( $stemma_enabled ) { $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); @@ -52,7 +55,9 @@ if( $newt ) { is( scalar $newt->collation->paths, 376, "Collation has all paths" ); is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); - is( $newt->language, 'Greek', "Tradition has correct language setting" ); + if( $language_enabled ) { + is( $newt->language, 'Greek', "Tradition has correct language setting" ); + } my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); ok( $rel, "Found set relationship" ); is( $rel->annotation, 'This is some note', "Relationship has its properties" ); diff --git a/morphology/lib/Text/Tradition/Language.pm b/morphology/lib/Text/Tradition/Language.pm new file mode 100644 index 0000000..bf221aa --- /dev/null +++ b/morphology/lib/Text/Tradition/Language.pm @@ -0,0 +1,67 @@ +package Text::Tradition::Language; + +use strict; +use warnings; +use Moose::Role; + +=head1 NAME + +Text::Tradition::Language - add-on role to enable language awareness and +morphology functions to a Text::Tradition object. See also +L for individual reading morphologies. + +=head1 METHODS + +=head2 language + +Accessor for the primary language of the tradition. Must correspond to one +of the Text::Tradition::Language::* modules in this package. + +=cut + +has 'language' => ( + is => 'rw', + isa => 'Str', + predicate => 'has_language', + ); + +before 'language' => sub { + my $self = shift; + if( @_ && $_[0] ne 'Default' ) { + # We are trying to set the language; check that the corresponding + # module exists. + eval "require Text::Tradition::Language::".$_[0]; + if( $@ ) { + throw( "Cannot load language module for @_: $@" ); + } + } +}; + +=head2 lemmatize + +Calls the appropriate lemmatization function for the language of the +tradition. + +=cut + +sub lemmatize { + my $self = shift; + unless( $self->has_language ) { + throw( "Please set a language to lemmatize a tradition" ); + } + my $mod = "Text::Tradition::Language::" . $self->language; + load( $mod ); + $mod->can( 'lemmatize' )->( $self ); +} + +1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE diff --git a/morphology/lib/Text/Tradition/Language/Armenian.pm b/morphology/lib/Text/Tradition/Language/Armenian.pm index 8d09353..f6b8aa9 100644 --- a/morphology/lib/Text/Tradition/Language/Armenian.pm +++ b/morphology/lib/Text/Tradition/Language/Armenian.pm @@ -71,3 +71,13 @@ sub reading_lookup { } 1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE diff --git a/morphology/lib/Text/Tradition/Language/Greek.pm b/morphology/lib/Text/Tradition/Language/Greek.pm index 28d69cf..d857417 100644 --- a/morphology/lib/Text/Tradition/Language/Greek.pm +++ b/morphology/lib/Text/Tradition/Language/Greek.pm @@ -71,3 +71,13 @@ sub reading_lookup { } 1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE diff --git a/morphology/lib/Text/Tradition/Language/Latin.pm b/morphology/lib/Text/Tradition/Language/Latin.pm index 2f4a42a..9ba62da 100644 --- a/morphology/lib/Text/Tradition/Language/Latin.pm +++ b/morphology/lib/Text/Tradition/Language/Latin.pm @@ -71,3 +71,13 @@ sub reading_lookup { } 1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE diff --git a/morphology/lib/Text/Tradition/Language/Perseus.pm b/morphology/lib/Text/Tradition/Language/Perseus.pm index ebbcd54..4939a2c 100644 --- a/morphology/lib/Text/Tradition/Language/Perseus.pm +++ b/morphology/lib/Text/Tradition/Language/Perseus.pm @@ -134,3 +134,13 @@ sub _wordform_from_row { } 1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE diff --git a/morphology/lib/Text/Tradition/Morphology.pm b/morphology/lib/Text/Tradition/Morphology.pm index 77c98ad..1c3b913 100644 --- a/morphology/lib/Text/Tradition/Morphology.pm +++ b/morphology/lib/Text/Tradition/Morphology.pm @@ -194,4 +194,14 @@ after '_combine' => sub { } }; -1; \ No newline at end of file +1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE