morphology/lib/Text/Tradition/Morphology.pm

   1 package Text::Tradition::Morphology;
   2
   3 use strict;
   4 use warnings;
   5 use JSON qw/ from_json /;
   6 use Moose::Role;
   7 use Module::Load;
   8 use Text::Tradition::Collation::Reading::Lexeme;
   9
  10 use vars qw/ $VERSION /;
  11 $VERSION = "0.1";
  12
  13 =head1 NAME
  14
  15 Text::Tradition::Morphology - add-on to associate lemma and part-of-speech
  16 information to Text::Tradition::Collation::Reading objects
  17
  18 =cut
  19
  20 has 'grammar_invalid' => (
  21         is => 'rw',
  22         isa => 'Bool',
  23         default => undef,
  24         );
  25
  26 has 'is_nonsense' => (
  27         is => 'rw',
  28         isa => 'Bool',
  29         default => undef,
  30         );
  31
  32 has 'normal_form' => (
  33         is => 'rw',
  34         isa => 'Str',
  35         predicate => '_has_normal_form',
  36         clearer => '_clear_normal_form',
  37         );
  38
  39 # Holds the lexemes for the reading.
  40 has 'reading_lexemes' => (
  41         traits => ['Array'],
  42         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
  43         handles => {
  44                 lexeme => 'get',
  45                 lexemes => 'elements',
  46                 has_lexemes => 'count',
  47                 clear_lexemes => 'clear',
  48                 add_lexeme => 'push',
  49                 },
  50         default => sub { [] },
  51         );
  52
  53
  54
  55 # Make normal_form default to text, transparently.
  56 around 'normal_form' => sub {
  57         my $orig = shift;
  58         my $self = shift;
  59         my( $arg ) = @_;
  60         if( $arg && $arg eq $self->text ) {
  61                 $self->_clear_normal_form;
  62                 return $arg;
  63         } elsif( !$arg && !$self->_has_normal_form ) {
  64                 return $self->text;
  65         } else {
  66                 $self->$orig( @_ );
  67         }
  68 };
  69
  70 =head1 METHODS
  71
  72 Methods for the morphological information (if any) attached to readings.
  73 A reading may be made up of multiple lexemes; the concatenated lexeme
  74 strings ought to match the reading's normalized form.
  75
  76 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
  77 on Lexeme objects and their attributes.
  78
  79 =head2 has_lexemes
  80
  81 Returns a true value if the reading has any attached lexemes.
  82
  83 =head2 lexemes
  84
  85 Returns the Lexeme objects (if any) attached to the reading.
  86
  87 =head2 clear_lexemes
  88
  89 Wipes any associated Lexeme objects out of the reading.
  90
  91 =head2 add_lexeme( $lexobj )
  92
  93 Adds the Lexeme in $lexobj to the list of lexemes.
  94
  95 =head2 lemmatize
  96
  97 If the language of the reading is set, this method will use the appropriate
  98 Language model to determine the lexemes that belong to this reading.  See
  99 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
 100
 101 =cut
 102
 103 sub lemmatize {
 104         my $self = shift;
 105         unless( $self->has_language ) {
 106                 warn "Please set a language to lemmatize a tradition";
 107                 return;
 108         }
 109         my $mod = "Text::Tradition::Language::" . $self->language;
 110         load( $mod );
 111         $mod->can( 'reading_lookup' )->( $self );
 112
 113 }
 114
 115 # For graph serialization. Return a JSON representation of the associated
 116 # reading lexemes.
 117 sub _serialize_lexemes {
 118         my $self = shift;
 119         my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
 120         return $json->encode( [ $self->lexemes ] );
 121 }
 122
 123 # Given a JSON representation of the lexemes, instantiate them and add
 124 # them to the reading.
 125 sub _deserialize_lexemes {
 126         my( $self, $json ) = @_;
 127         my $data = from_json( $json );
 128         return unless @$data;
 129
 130         my @lexemes;
 131         foreach my $lexhash ( @$data ) {
 132                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
 133                         'JSON' => $lexhash ) );
 134         }
 135         $self->clear_lexemes;
 136         $self->add_lexeme( @lexemes );
 137 }
 138
 139 sub disambiguated {
 140         my $self = shift;
 141         return 0 unless $self->has_lexemes;
 142         return !grep { !$_->is_disambiguated } $self->lexemes;
 143 }
 144
 145 sub filter_lexemes {
 146         my $self = shift;
 147         # While we are here, get rid of any extra wordforms from a disambiguated
 148         # reading.
 149         if( $self->disambiguated ) {
 150                 foreach my $lex ( $self->lexemes ) {
 151                         $lex->clear_matching_forms();
 152                         $lex->add_matching_form( $lex->form );
 153                 }
 154         }
 155 }
 156
 157 around 'is_identical' => sub {
 158         my $orig = shift;
 159         my $self = shift;
 160         my $other = shift;
 161         # If the base class returns true, do an extra check to make sure the
 162         # lexemes also match.
 163         my $answer = $self->$orig( $other );
 164         if( $answer ) {
 165                 if( $self->disambiguated && $other->disambiguated ) {
 166                         my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
 167                         my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
 168                         $answer = undef unless $rform eq $uform;
 169                 } elsif( $self->disambiguated xor $other->disambiguated ) {
 170                         $answer = undef;
 171                 }
 172         }
 173         return $answer;
 174 };
 175
 176 around 'is_combinable' => sub {
 177         my $orig = shift;
 178         my $self = shift;
 179         # If the reading is marked with invalid grammar or as a nonsense reading,
 180         # it is no longer combinable.
 181         return undef if $self->grammar_invalid || $self->is_nonsense;
 182         return $self->$orig();
 183 };
 184
 185 after '_combine' => sub {
 186         my $self = shift;
 187         my $other = shift;
 188         my $joinstr = shift;
 189         $self->normal_form(
 190                 join( $joinstr, $self->normal_form, $other->normal_form ) );
 191         # Combine the lexemes present in the readings
 192         if( $self->has_lexemes && $other->has_lexemes ) {
 193                 $self->add_lexeme( $other->lexemes );
 194         }
 195 };
 196
 197 1;