morphology/lib/Text/Tradition/Morphology.pm

   1 package Text::Tradition::Morphology;
   2
   3 use strict;
   4 use warnings;
   5 use JSON qw/ from_json /;
   6 use Moose::Role;
   7 use Module::Load;
   8 use Text::Tradition::Collation::Reading::Lexeme;
   9
  10 =head1 NAME
  11
  12 Text::Tradition::Morphology - add-on to associate lemma and part-of-speech
  13 information to Text::Tradition::Collation::Reading objects
  14
  15 =cut
  16
  17 has 'grammar_invalid' => (
  18         is => 'rw',
  19         isa => 'Bool',
  20         default => undef,
  21         );
  22
  23 has 'is_nonsense' => (
  24         is => 'rw',
  25         isa => 'Bool',
  26         default => undef,
  27         );
  28
  29 has 'normal_form' => (
  30         is => 'rw',
  31         isa => 'Str',
  32         predicate => '_has_normal_form',
  33         clearer => '_clear_normal_form',
  34         );
  35
  36 # Holds the lexemes for the reading.
  37 has 'reading_lexemes' => (
  38         traits => ['Array'],
  39         isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
  40         handles => {
  41                 lexeme => 'get',
  42                 lexemes => 'elements',
  43                 has_lexemes => 'count',
  44                 clear_lexemes => 'clear',
  45                 add_lexeme => 'push',
  46                 },
  47         default => sub { [] },
  48         );
  49
  50
  51
  52 # Make normal_form default to text, transparently.
  53 around 'normal_form' => sub {
  54         my $orig = shift;
  55         my $self = shift;
  56         my( $arg ) = @_;
  57         if( $arg && $arg eq $self->text ) {
  58                 $self->_clear_normal_form;
  59                 return $arg;
  60         } elsif( !$arg && !$self->_has_normal_form ) {
  61                 return $self->text;
  62         } else {
  63                 $self->$orig( @_ );
  64         }
  65 };
  66
  67 =head1 METHODS
  68
  69 Methods for the morphological information (if any) attached to readings.
  70 A reading may be made up of multiple lexemes; the concatenated lexeme
  71 strings ought to match the reading's normalized form.
  72
  73 See L<Text::Tradition::Collation::Reading::Lexeme> for more information
  74 on Lexeme objects and their attributes.
  75
  76 =head2 has_lexemes
  77
  78 Returns a true value if the reading has any attached lexemes.
  79
  80 =head2 lexemes
  81
  82 Returns the Lexeme objects (if any) attached to the reading.
  83
  84 =head2 clear_lexemes
  85
  86 Wipes any associated Lexeme objects out of the reading.
  87
  88 =head2 add_lexeme( $lexobj )
  89
  90 Adds the Lexeme in $lexobj to the list of lexemes.
  91
  92 =head2 lemmatize
  93
  94 If the language of the reading is set, this method will use the appropriate
  95 Language model to determine the lexemes that belong to this reading.  See
  96 L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
  97
  98 =cut
  99
 100 sub lemmatize {
 101         my $self = shift;
 102         unless( $self->has_language ) {
 103                 warn "Please set a language to lemmatize a tradition";
 104                 return;
 105         }
 106         my $mod = "Text::Tradition::Language::" . $self->language;
 107         load( $mod );
 108         $mod->can( 'reading_lookup' )->( $self );
 109
 110 }
 111
 112 # For graph serialization. Return a JSON representation of the associated
 113 # reading lexemes.
 114 sub _serialize_lexemes {
 115         my $self = shift;
 116         my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
 117         return $json->encode( [ $self->lexemes ] );
 118 }
 119
 120 # Given a JSON representation of the lexemes, instantiate them and add
 121 # them to the reading.
 122 sub _deserialize_lexemes {
 123         my( $self, $json ) = @_;
 124         my $data = from_json( $json );
 125         return unless @$data;
 126
 127         my @lexemes;
 128         foreach my $lexhash ( @$data ) {
 129                 push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
 130                         'JSON' => $lexhash ) );
 131         }
 132         $self->clear_lexemes;
 133         $self->add_lexeme( @lexemes );
 134 }
 135
 136 sub disambiguated {
 137         my $self = shift;
 138         return 0 unless $self->has_lexemes;
 139         return !grep { !$_->is_disambiguated } $self->lexemes;
 140 }
 141
 142 sub filter_lexemes {
 143         my $self = shift;
 144         # While we are here, get rid of any extra wordforms from a disambiguated
 145         # reading.
 146         if( $self->disambiguated ) {
 147                 foreach my $lex ( $self->lexemes ) {
 148                         $lex->clear_matching_forms();
 149                         $lex->add_matching_form( $lex->form );
 150                 }
 151         }
 152 }
 153
 154 around 'is_identical' => sub {
 155         my $orig = shift;
 156         my $self = shift;
 157         my $other = shift;
 158         # If the base class returns true, do an extra check to make sure the
 159         # lexemes also match.
 160         my $answer = $self->$orig( $other );
 161         if( $answer ) {
 162                 if( $self->disambiguated && $other->disambiguated ) {
 163                         my $rform = join( '//', map { $_->form->to_string } $self->lexemes );
 164                         my $uform = join( '//', map { $_->form->to_string } $other->lexemes );
 165                         $answer = undef unless $rform eq $uform;
 166                 } elsif( $self->disambiguated xor $other->disambiguated ) {
 167                         $answer = undef;
 168                 }
 169         }
 170         return $answer;
 171 };
 172
 173 around 'is_combinable' => sub {
 174         my $orig = shift;
 175         my $self = shift;
 176         # If the reading is marked with invalid grammar or as a nonsense reading,
 177         # it is no longer combinable.
 178         return undef if $self->grammar_invalid || $self->is_nonsense;
 179         return $self->$orig();
 180 };
 181
 182 after '_combine' => sub {
 183         my $self = shift;
 184         my $other = shift;
 185         my $joinstr = shift;
 186         $self->normal_form(
 187                 join( $joinstr, $self->normal_form, $other->normal_form ) );
 188         # Combine the lexemes present in the readings
 189         if( $self->has_lexemes && $other->has_lexemes ) {
 190                 $self->add_lexeme( $other->lexemes );
 191         }
 192 };
 193
 194 1;