Commit | Line | Data |
cca4f996 |
1 | package Text::Tradition::Collation::Reading::WordForm; |
2 | |
70745e70 |
3 | use Lingua::Features::Structure; |
7604424b |
4 | use JSON (); |
5 | use Moose; |
a7f4020a |
6 | use Text::Tradition::Error; |
7 | use TryCatch; |
cca4f996 |
8 | |
9 | =head1 NAME |
10 | |
11 | Text::Tradition::Collation::Reading::WordForm - represents a |
12 | language/lemma/morphology triplet that can be associated with a Reading. |
13 | |
14 | =head1 DESCRIPTION |
15 | |
16 | Text::Tradition is a library for representation and analysis of collated |
17 | texts, particularly medieval ones. A word form is used for the analysis of |
18 | Reading objects; it consists of a lemma, a language, and a code to |
19 | represent its part of speech. In general the word forms for a particular |
20 | language should be read from / written to some morphological database. |
21 | |
22 | =head1 METHODS |
23 | |
24 | =head2 new |
25 | |
26 | Creates a new word form from the passed options. |
27 | |
28 | =head2 language |
29 | |
30 | Returns the language to which this word form belongs. |
31 | |
32 | =head2 lemma |
33 | |
34 | Returns the lemma for the word form. |
35 | |
36 | =head2 morphology |
37 | |
38 | Returns an array representing this word's morphology. The contents of the |
39 | array depend on the language being used. |
40 | |
41 | =cut |
42 | |
43 | has 'language' => ( |
44 | is => 'ro', |
45 | isa => 'Str', |
46 | required => 1, |
47 | ); |
48 | |
49 | # TODO do we need this? |
50 | has 'form' => ( |
51 | is => 'ro', |
52 | isa => 'Str', |
53 | # required => 1, |
54 | ); |
55 | |
56 | has 'lemma' => ( |
57 | is => 'ro', |
58 | isa => 'Str', |
59 | required => 1, |
60 | ); |
61 | |
62 | has 'morphology' => ( |
63 | is => 'ro', |
6ad2ce78 |
64 | isa => 'Lingua::Features::Structure', |
cca4f996 |
65 | required => 1, |
66 | ); |
67 | |
70745e70 |
68 | around BUILDARGS => sub { |
69 | my $orig = shift; |
70 | my $class = shift; |
71 | my $args = @_ == 1 ? $_[0] : { @_ }; |
7604424b |
72 | if( exists $args->{'JSON'} ) { |
da83693e |
73 | my @data = split( / \/\/ /, $args->{'JSON'} ); |
75ae2b25 |
74 | # print STDERR "Attempting to parse " . $data[2] . " into structure"; |
a7f4020a |
75 | my $morph; |
76 | try { |
77 | $morph = Lingua::Features::Structure->from_string( $data[2] ); |
78 | } catch { |
79 | throw("Could not parse string " . $data[2] . " into morphological structure"); |
80 | } |
da83693e |
81 | $args = { 'language' => $data[0], 'lemma' => $data[1], |
82 | 'morphology' => $morph }; |
70745e70 |
83 | } |
84 | $class->$orig( $args ); |
85 | }; |
86 | |
6ad2ce78 |
87 | =head2 to_string |
88 | |
89 | Returns a string combination of language/lemma/morphology that can be used |
90 | in equivalence testing. |
91 | |
92 | =cut |
93 | |
94 | sub to_string { |
cca4f996 |
95 | my $self = shift; |
7604424b |
96 | return JSON->new->convert_blessed(1)->encode( $self ); |
97 | } |
98 | |
da83693e |
99 | # Rather than spitting it out as a JSON hash, encode it as a string so that |
100 | # the XML serialization doesn't become insane. |
7604424b |
101 | sub TO_JSON { |
102 | my $self = shift; |
da83693e |
103 | return sprintf( "%s // %s // %s", $self->language, $self->lemma, |
104 | $self->morphology->to_string() ); |
cca4f996 |
105 | } |
6ad2ce78 |
106 | |
a7f4020a |
107 | sub throw { |
108 | Text::Tradition::Error->throw( |
109 | 'ident' => 'Wordform error', |
110 | 'message' => $_[0], |
111 | ); |
112 | } |
113 | |
cca4f996 |
114 | no Moose; |
115 | __PACKAGE__->meta->make_immutable; |
116 | |
117 | 1; |
118 | |
119 | =head1 LICENSE |
120 | |
121 | This package is free software and is provided "as is" without express |
122 | or implied warranty. You can redistribute it and/or modify it under |
123 | the same terms as Perl itself. |
124 | |
125 | =head1 AUTHOR |
126 | |
127 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |