Commit | Line | Data |
cca4f996 |
1 | package Text::Tradition::Collation::Reading::WordForm; |
2 | |
70745e70 |
3 | use Lingua::Features::Structure; |
7604424b |
4 | use JSON (); |
5 | use Moose; |
a7f4020a |
6 | use Text::Tradition::Error; |
7 | use TryCatch; |
cca4f996 |
8 | |
9 | =head1 NAME |
10 | |
11 | Text::Tradition::Collation::Reading::WordForm - represents a |
12 | language/lemma/morphology triplet that can be associated with a Reading. |
13 | |
14 | =head1 DESCRIPTION |
15 | |
16 | Text::Tradition is a library for representation and analysis of collated |
17 | texts, particularly medieval ones. A word form is used for the analysis of |
18 | Reading objects; it consists of a lemma, a language, and a code to |
19 | represent its part of speech. In general the word forms for a particular |
20 | language should be read from / written to some morphological database. |
21 | |
22 | =head1 METHODS |
23 | |
24 | =head2 new |
25 | |
26 | Creates a new word form from the passed options. |
27 | |
28 | =head2 language |
29 | |
30 | Returns the language to which this word form belongs. |
31 | |
32 | =head2 lemma |
33 | |
34 | Returns the lemma for the word form. |
35 | |
36 | =head2 morphology |
37 | |
38 | Returns an array representing this word's morphology. The contents of the |
39 | array depend on the language being used. |
40 | |
41 | =cut |
42 | |
43 | has 'language' => ( |
44 | is => 'ro', |
45 | isa => 'Str', |
46 | required => 1, |
47 | ); |
48 | |
cca4f996 |
49 | has 'lemma' => ( |
50 | is => 'ro', |
51 | isa => 'Str', |
52 | required => 1, |
53 | ); |
54 | |
896fe649 |
55 | has 'morphstr' => ( |
cca4f996 |
56 | is => 'ro', |
896fe649 |
57 | isa => 'Str', |
cca4f996 |
58 | required => 1, |
59 | ); |
60 | |
70745e70 |
61 | around BUILDARGS => sub { |
62 | my $orig = shift; |
63 | my $class = shift; |
64 | my $args = @_ == 1 ? $_[0] : { @_ }; |
7604424b |
65 | if( exists $args->{'JSON'} ) { |
da83693e |
66 | my @data = split( / \/\/ /, $args->{'JSON'} ); |
75ae2b25 |
67 | # print STDERR "Attempting to parse " . $data[2] . " into structure"; |
da83693e |
68 | $args = { 'language' => $data[0], 'lemma' => $data[1], |
896fe649 |
69 | 'morphstr' => $data[2] }; |
70 | } elsif( exists $args->{'morphology'} ) { |
71 | # Backwards compat |
72 | my $mobj = delete $args->{'morphology'}; |
73 | $args->{'morphstr'} = $mobj->to_string() |
74 | if ref $mobj; |
70745e70 |
75 | } |
76 | $class->$orig( $args ); |
77 | }; |
896fe649 |
78 | |
79 | =head2 morphology |
80 | |
81 | Returns a Lingua::Features::Structure object that corresponds to morphstr. |
82 | |
83 | =cut |
84 | |
85 | sub morphology { |
86 | my $self = shift; |
87 | return unless $self->morphstr; |
88 | my $struct; |
89 | try { |
90 | $struct = Lingua::Features::Structure->from_string( $self->morphstr ); |
91 | } catch { |
92 | throw( "Morphology string " . $self->morphstr . " does not parse" ); |
93 | } |
94 | return $struct; |
95 | } |
70745e70 |
96 | |
6ad2ce78 |
97 | =head2 to_string |
98 | |
99 | Returns a string combination of language/lemma/morphology that can be used |
100 | in equivalence testing. |
101 | |
102 | =cut |
103 | |
104 | sub to_string { |
cca4f996 |
105 | my $self = shift; |
7604424b |
106 | return JSON->new->convert_blessed(1)->encode( $self ); |
107 | } |
108 | |
da83693e |
109 | # Rather than spitting it out as a JSON hash, encode it as a string so that |
110 | # the XML serialization doesn't become insane. |
7604424b |
111 | sub TO_JSON { |
112 | my $self = shift; |
da83693e |
113 | return sprintf( "%s // %s // %s", $self->language, $self->lemma, |
896fe649 |
114 | $self->morphstr ); |
cca4f996 |
115 | } |
6ad2ce78 |
116 | |
a7f4020a |
117 | sub throw { |
118 | Text::Tradition::Error->throw( |
119 | 'ident' => 'Wordform error', |
120 | 'message' => $_[0], |
121 | ); |
122 | } |
123 | |
cca4f996 |
124 | no Moose; |
125 | __PACKAGE__->meta->make_immutable; |
126 | |
127 | 1; |
128 | |