morphology/lib/Text/Tradition/Morphology.pm

   1 package Text::Tradition::Morphology;
   2
   3 use strict;
   4 use warnings;
   5 use JSON qw/ from_json /;
   6 use Moose::Role;
   7 use Module::Load;
   8 use Text::Tradition::Collation::Reading::Lexeme;
   9
  10 use vars qw/ $VERSION /;
  11 $VERSION = "0.1";
  12
  13 =head1 NAME
  14
  15 Text::Tradition::Morphology - morphology plugin for Text::Tradition
  16
  17 =head1 DESCRIPTION
  18
  19 The Text::Tradition::Morphology package enables lemma and part-of-speech
  20 information for traditions and their Reading objects. This distribution
  21 includes the L<Text::Tradition::Language> role for Traditions, the
  22 L<Text::Tradition::Morphology> role (this package) for Readings, and a set
  23 of Language::* modules for language-specific lemmatization.
  24
  25 See L<Text::Tradition::Collation::Reading::Lexeme> for more about the
  26 morphology object structure.
  27
  28 =cut
  29
  30 requires 'is_identical', 'is_combinable', '_combine';
  31
  32 has 'grammar_invalid' => (
  33         is => 'rw',
  34         isa => 'Bool',
  35         default => undef,
  36         );
  37
  38 has 'is_nonsense' => (
  39         is => 'rw',
  40         isa => 'Bool',
  41         default => undef,
  42         );
  43
  44 has 'normal_form' => (
  45         is => 'rw',
  46         isa => 'Str',
  47         predicate => '_has_normal_form',
  48         clearer => '_clear_normal_form',
  49         );
  50
  51 # Holds the lexemes for the reading.
  52 has 'reading_lexemes' => (
  53         traits => ['Array'],
  54         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
  55         handles => {
  56                 lexeme => 'get',
  57                 lexemes => 'elements',
  58                 has_lexemes => 'count',
  59                 clear_lexemes => 'clear',
  60                 add_lexeme => 'push',
  61                 },
  62         default => sub { [] },
  63         );
  64
  65
  66
  67 # Make normal_form default to text, transparently.
  68 around 'normal_form' => sub {
  69         my $orig = shift;
  70         my $self = shift;
  71         my( $arg ) = @_;
  72         if( $arg && $arg eq $self->text ) {
  73                 $self->_clear_normal_form;
  74                 return $arg;
  75         } elsif( !$arg && !$self->_has_normal_form ) {
  76                 return $self->text;
  77         } else {
  78                 $self->$orig( @_ );
  79         }
  80 };
  81
  82 =head1 READING METHODS
  83
  84 Methods for the morphological information (if any) attached to readings.
  85 A reading may be made up of multiple lexemes; the concatenated lexeme
  86 strings ought to match the reading's normalized form.
  87
  88 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
  89 on Lexeme objects and their attributes.
  90
  91 =head2 has_lexemes
  92
  93 Returns a true value if the reading has any attached lexemes.
  94
  95 =head2 lexemes
  96
  97 Returns the Lexeme objects (if any) attached to the reading.
  98
  99 =head2 clear_lexemes
 100
 101 Wipes any associated Lexeme objects out of the reading.
 102
 103 =head2 add_lexeme( $lexobj )
 104
 105 Adds the Lexeme in $lexobj to the list of lexemes.
 106
 107 =head2 lemmatize
 108
 109 If the language of the reading is set, this method will use the appropriate
 110 Language model to determine the lexemes that belong to this reading.  See
 111 L<Text::Tradition::Language::lemmatize> if you wish to lemmatize an entire tradition.
 112
 113 =cut
 114
 115 sub lemmatize {
 116         my $self = shift;
 117         unless( $self->has_language ) {
 118                 warn "Please set a language to lemmatize a tradition";
 119                 return;
 120         }
 121         my $mod = "Text::Tradition::Language::" . $self->language;
 122         load( $mod );
 123         $mod->can( 'reading_lookup' )->( $self );
 124
 125 }
 126
 127 # For graph serialization. Return a JSON representation of the associated
 128 # reading lexemes.
 129 sub _serialize_lexemes {
 130         my $self = shift;
 131         my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
 132         return $json->encode( [ $self->lexemes ] );
 133 }
 134
 135 # Given a JSON representation of the lexemes, instantiate them and add
 136 # them to the reading.
 137 sub _deserialize_lexemes {
 138         my( $self, $json ) = @_;
 139         my $data = from_json( $json );
 140         return unless @$data;
 141
 142         my @lexemes;
 143         foreach my $lexhash ( @$data ) {
 144                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
 145                         'JSON' => $lexhash ) );
 146         }
 147         $self->clear_lexemes;
 148         $self->add_lexeme( @lexemes );
 149 }
 150
 151 sub disambiguated {
 152         my $self = shift;
 153         return 0 unless $self->has_lexemes;
 154         return !grep { !$_->is_disambiguated } $self->lexemes;
 155 }
 156
 157 sub filter_lexemes {
 158         my $self = shift;
 159         # While we are here, get rid of any extra wordforms from a disambiguated
 160         # reading.
 161         if( $self->disambiguated ) {
 162                 foreach my $lex ( $self->lexemes ) {
 163                         $lex->clear_matching_forms();
 164                         $lex->add_matching_form( $lex->form );
 165                 }
 166         }
 167 }
 168
 169 around 'is_identical' => sub {
 170         my $orig = shift;
 171         my $self = shift;
 172         my $other = shift;
 173         # If the base class returns true, do an extra check to make sure the
 174         # lexemes also match.
 175         my $answer = $self->$orig( $other );
 176         if( $answer ) {
 177                 if( $self->disambiguated && $other->disambiguated ) {
 178                         my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
 179                         my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
 180                         $answer = undef unless $rform eq $uform;
 181                 } elsif( $self->disambiguated xor $other->disambiguated ) {
 182                         $answer = undef;
 183                 }
 184         }
 185         return $answer;
 186 };
 187
 188 around 'is_combinable' => sub {
 189         my $orig = shift;
 190         my $self = shift;
 191         # If the reading is marked with invalid grammar or as a nonsense reading,
 192         # it is no longer combinable.
 193         return undef if $self->grammar_invalid || $self->is_nonsense;
 194         return $self->$orig();
 195 };
 196
 197 after '_combine' => sub {
 198         my $self = shift;
 199         my $other = shift;
 200         my $joinstr = shift;
 201         $self->normal_form(
 202                 join( $joinstr, $self->normal_form, $other->normal_form ) );
 203         # Combine the lexemes present in the readings
 204         if( $self->has_lexemes && $other->has_lexemes ) {
 205                 $self->add_lexeme( $other->lexemes );
 206         }
 207 };
 208
 209 1;
 210
 211 =head1 LICENSE
 212
 213 This package is free software and is provided "as is" without express
 214 or implied warranty.  You can redistribute it and/or modify it under
 215 the same terms as Perl itself.
 216
 217 =head1 AUTHOR
 218
 219 Tara L Andrews E<lt>aurum@cpan.orgE<gt>