Commit | Line | Data |
784877d9 |
1 | package Text::Tradition::Collation::Reading; |
2 | |
8e1394aa |
3 | use Moose; |
6ad2ce78 |
4 | use Module::Load; |
e4b0f464 |
5 | use overload '""' => \&_stringify, 'fallback' => 1; |
784877d9 |
6 | |
3a2ebbf4 |
7 | =head1 NAME |
784877d9 |
8 | |
4aea6e9b |
9 | Text::Tradition::Collation::Reading - represents a reading (usually a word) |
10 | in a collation. |
11 | |
3a2ebbf4 |
12 | =head1 DESCRIPTION |
784877d9 |
13 | |
3a2ebbf4 |
14 | Text::Tradition is a library for representation and analysis of collated |
15 | texts, particularly medieval ones. A 'reading' refers to a unit of text, |
16 | usually a word, that appears in one or more witnesses (manuscripts) of the |
17 | tradition; the text of a given witness is composed of a set of readings in |
18 | a particular sequence |
784877d9 |
19 | |
3a2ebbf4 |
20 | =head1 METHODS |
1ca1163d |
21 | |
3a2ebbf4 |
22 | =head2 new |
8e1394aa |
23 | |
4aea6e9b |
24 | Creates a new reading in the given collation with the given attributes. |
3a2ebbf4 |
25 | Options include: |
94c00c71 |
26 | |
3a2ebbf4 |
27 | =over 4 |
784877d9 |
28 | |
4aea6e9b |
29 | =item collation - The Text::Tradition::Collation object to which this |
30 | reading belongs. Required. |
e2902068 |
31 | |
3a2ebbf4 |
32 | =item id - A unique identifier for this reading. Required. |
910a0a6d |
33 | |
3a2ebbf4 |
34 | =item text - The word or other text of the reading. |
784877d9 |
35 | |
3a2ebbf4 |
36 | =item is_start - The reading is the starting point for the collation. |
3265b0ce |
37 | |
3a2ebbf4 |
38 | =item is_end - The reading is the ending point for the collation. |
784877d9 |
39 | |
3a2ebbf4 |
40 | =item is_lacuna - The 'reading' represents a known gap in the text. |
de51424a |
41 | |
4aea6e9b |
42 | =item is_ph - A temporary placeholder for apparatus parsing purposes. Do |
43 | not use unless you know what you are doing. |
12720144 |
44 | |
4aea6e9b |
45 | =item rank - The sequence number of the reading. This should probably not |
46 | be set manually. |
d047cd52 |
47 | |
3a2ebbf4 |
48 | =back |
8e1394aa |
49 | |
3a2ebbf4 |
50 | One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required. |
8e1394aa |
51 | |
3a2ebbf4 |
52 | =head2 collation |
94c00c71 |
53 | |
3a2ebbf4 |
54 | =head2 id |
94c00c71 |
55 | |
3a2ebbf4 |
56 | =head2 text |
4cdd82f1 |
57 | |
3a2ebbf4 |
58 | =head2 is_start |
4cdd82f1 |
59 | |
3a2ebbf4 |
60 | =head2 is_end |
4a8828f0 |
61 | |
3a2ebbf4 |
62 | =head2 is_lacuna |
4a8828f0 |
63 | |
3a2ebbf4 |
64 | =head2 rank |
4a8828f0 |
65 | |
3a2ebbf4 |
66 | Accessor methods for the given attributes. |
d047cd52 |
67 | |
3a2ebbf4 |
68 | =cut |
d047cd52 |
69 | |
3a2ebbf4 |
70 | has 'collation' => ( |
71 | is => 'ro', |
72 | isa => 'Text::Tradition::Collation', |
73 | # required => 1, |
74 | weak_ref => 1, |
75 | ); |
d047cd52 |
76 | |
3a2ebbf4 |
77 | has 'id' => ( |
78 | is => 'ro', |
79 | isa => 'Str', |
80 | required => 1, |
81 | ); |
d047cd52 |
82 | |
3a2ebbf4 |
83 | has 'text' => ( |
84 | is => 'ro', |
85 | isa => 'Str', |
86 | required => 1, |
49d4f2ac |
87 | writer => 'alter_text', |
3a2ebbf4 |
88 | ); |
0e47f4f6 |
89 | |
fae52efd |
90 | has 'language' => ( |
91 | is => 'ro', |
92 | isa => 'Str', |
6ad2ce78 |
93 | predicate => 'has_language', |
fae52efd |
94 | ); |
95 | |
3a2ebbf4 |
96 | has 'is_start' => ( |
97 | is => 'ro', |
98 | isa => 'Bool', |
99 | default => undef, |
100 | ); |
101 | |
102 | has 'is_end' => ( |
103 | is => 'ro', |
104 | isa => 'Bool', |
105 | default => undef, |
106 | ); |
107 | |
108 | has 'is_lacuna' => ( |
109 | is => 'ro', |
110 | isa => 'Bool', |
111 | default => undef, |
112 | ); |
12720144 |
113 | |
114 | has 'is_ph' => ( |
115 | is => 'ro', |
116 | isa => 'Bool', |
117 | default => undef, |
118 | ); |
d4b75f44 |
119 | |
120 | has 'is_common' => ( |
121 | is => 'rw', |
122 | isa => 'Bool', |
123 | default => undef, |
124 | ); |
3a2ebbf4 |
125 | |
126 | has 'rank' => ( |
127 | is => 'rw', |
128 | isa => 'Int', |
129 | predicate => 'has_rank', |
ca6e6095 |
130 | clearer => 'clear_rank', |
3a2ebbf4 |
131 | ); |
fd602649 |
132 | |
133 | ## For morphological analysis |
134 | |
135 | has 'normal_form' => ( |
136 | is => 'rw', |
137 | isa => 'Str', |
138 | predicate => 'has_normal_form', |
139 | ); |
140 | |
cca4f996 |
141 | # Holds the word form. If is_disambiguated is true, the form at index zero |
142 | # is the correct one. |
d3e7842a |
143 | has 'reading_lexemes' => ( |
4d9593df |
144 | traits => ['Array'], |
d3e7842a |
145 | isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]', |
4d9593df |
146 | handles => { |
147 | lexemes => 'elements', |
cca4f996 |
148 | has_lexemes => 'count', |
d3e7842a |
149 | clear_lexemes => 'clear', |
150 | add_lexeme => 'push', |
4d9593df |
151 | }, |
d3e7842a |
152 | default => sub { [] }, |
fd602649 |
153 | ); |
154 | |
629e27b0 |
155 | ## For prefix/suffix readings |
156 | |
157 | has 'join_prior' => ( |
158 | is => 'ro', |
159 | isa => 'Bool', |
160 | default => undef, |
161 | ); |
162 | |
163 | has 'join_next' => ( |
164 | is => 'ro', |
165 | isa => 'Bool', |
166 | default => undef, |
167 | ); |
168 | |
3a2ebbf4 |
169 | |
170 | around BUILDARGS => sub { |
171 | my $orig = shift; |
172 | my $class = shift; |
173 | my $args; |
174 | if( @_ == 1 ) { |
175 | $args = shift; |
176 | } else { |
177 | $args = { @_ }; |
178 | } |
b0b4421a |
179 | |
3a2ebbf4 |
180 | # If one of our special booleans is set, we change the text and the |
181 | # ID to match. |
1d310495 |
182 | if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) { |
56eefa04 |
183 | $args->{'text'} = '#LACUNA#'; |
3a2ebbf4 |
184 | } elsif( exists $args->{'is_start'} ) { |
185 | $args->{'id'} = '#START#'; # Change the ID to ensure we have only one |
186 | $args->{'text'} = '#START#'; |
187 | $args->{'rank'} = 0; |
188 | } elsif( exists $args->{'is_end'} ) { |
189 | $args->{'id'} = '#END#'; # Change the ID to ensure we have only one |
190 | $args->{'text'} = '#END#'; |
12720144 |
191 | } elsif( exists $args->{'is_ph'} ) { |
192 | $args->{'text'} = $args->{'id'}; |
3a2ebbf4 |
193 | } |
194 | |
195 | $class->$orig( $args ); |
196 | }; |
197 | |
198 | =head2 is_meta |
199 | |
200 | A meta attribute (ha ha), which should be true if any of our 'special' |
201 | booleans are true. Implies that the reading does not represent a bit |
202 | of text found in a witness. |
203 | |
204 | =cut |
205 | |
206 | sub is_meta { |
207 | my $self = shift; |
12720144 |
208 | return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph; |
3a2ebbf4 |
209 | } |
210 | |
027d819c |
211 | =head1 Convenience methods |
212 | |
213 | =head2 related_readings |
214 | |
215 | Calls Collation's related_readings with $self as the first argument. |
216 | |
217 | =cut |
218 | |
3a2ebbf4 |
219 | sub related_readings { |
220 | my $self = shift; |
221 | return $self->collation->related_readings( $self, @_ ); |
222 | } |
223 | |
7f52eac8 |
224 | =head2 witnesses |
225 | |
226 | Calls Collation's reading_witnesses with $self as the first argument. |
227 | |
228 | =cut |
229 | |
230 | sub witnesses { |
231 | my $self = shift; |
232 | return $self->collation->reading_witnesses( $self, @_ ); |
233 | } |
234 | |
027d819c |
235 | =head2 predecessors |
236 | |
237 | Returns a list of Reading objects that immediately precede $self in the collation. |
238 | |
239 | =cut |
240 | |
22222af9 |
241 | sub predecessors { |
242 | my $self = shift; |
243 | my @pred = $self->collation->sequence->predecessors( $self->id ); |
244 | return map { $self->collation->reading( $_ ) } @pred; |
245 | } |
246 | |
027d819c |
247 | =head2 successors |
248 | |
249 | Returns a list of Reading objects that immediately follow $self in the collation. |
250 | |
251 | =cut |
252 | |
22222af9 |
253 | sub successors { |
254 | my $self = shift; |
255 | my @succ = $self->collation->sequence->successors( $self->id ); |
256 | return map { $self->collation->reading( $_ ) } @succ; |
257 | } |
258 | |
027d819c |
259 | =head2 set_identical( $other_reading) |
260 | |
261 | Backwards compatibility method, to add a transposition relationship |
262 | between $self and $other_reading. Don't use this. |
263 | |
264 | =cut |
265 | |
1d310495 |
266 | sub set_identical { |
267 | my( $self, $other ) = @_; |
268 | return $self->collation->add_relationship( $self, $other, |
269 | { 'type' => 'transposition' } ); |
270 | } |
271 | |
3a2ebbf4 |
272 | sub _stringify { |
273 | my $self = shift; |
274 | return $self->id; |
275 | } |
d047cd52 |
276 | |
4d9593df |
277 | =head1 MORPHOLOGY |
278 | |
279 | A few methods to try to tack on morphological information. |
280 | |
6ad2ce78 |
281 | =head2 lexemes |
06e7cbc7 |
282 | |
6ad2ce78 |
283 | =head2 has_lexemes |
284 | |
285 | =head2 clear_lexemes |
286 | |
287 | =head2 add_lexeme |
288 | |
289 | =head2 lemmatize |
06e7cbc7 |
290 | |
4d9593df |
291 | =cut |
292 | |
6ad2ce78 |
293 | sub lemmatize { |
294 | my $self = shift; |
295 | unless( $self->has_language ) { |
296 | warn "Please set a language to lemmatize a tradition"; |
297 | return; |
298 | } |
299 | my $mod = "Text::Tradition::Language::" . $self->language; |
300 | load( $mod ); |
301 | $mod->can( 'reading_lookup' )->( $self ); |
302 | |
303 | } |
4d9593df |
304 | |
305 | ## Utility methods |
306 | |
2acf0892 |
307 | sub TO_JSON { |
308 | my $self = shift; |
309 | return $self->text; |
310 | } |
311 | |
4d9593df |
312 | ## TODO will need a throw() here |
313 | |
314 | no Moose; |
315 | __PACKAGE__->meta->make_immutable; |
316 | |
021bdbac |
317 | 1; |