Commit | Line | Data |
784877d9 |
1 | package Text::Tradition::Collation::Reading; |
2 | |
8e1394aa |
3 | use Moose; |
a445ce40 |
4 | use Moose::Util qw/ does_role apply_all_roles /; |
10e4b1ac |
5 | use Moose::Util::TypeConstraints; |
70745e70 |
6 | use Text::Tradition::Error; |
10e4b1ac |
7 | use XML::Easy::Syntax qw( $xml10_name_rx $xml10_namestartchar_rx ); |
e4b0f464 |
8 | use overload '""' => \&_stringify, 'fallback' => 1; |
784877d9 |
9 | |
10e4b1ac |
10 | subtype 'ReadingID', |
11 | as 'Str', |
12 | where { $_ =~ /\A$xml10_name_rx\z/ }, |
13 | message { 'Reading ID must be a valid XML attribute string' }; |
14 | |
15 | no Moose::Util::TypeConstraints; |
16 | |
37bf09f4 |
17 | # Enable plugin(s) if available |
18 | eval { with 'Text::Tradition::Morphology'; }; |
19 | # Morphology package is not on CPAN, so don't warn of its absence |
20 | # if( $@ ) { |
21 | # warn "Text::Tradition::Morphology not found: $@. Disabling lexeme functionality"; |
22 | # }; |
23 | |
3a2ebbf4 |
24 | =head1 NAME |
784877d9 |
25 | |
4aea6e9b |
26 | Text::Tradition::Collation::Reading - represents a reading (usually a word) |
27 | in a collation. |
28 | |
3a2ebbf4 |
29 | =head1 DESCRIPTION |
784877d9 |
30 | |
3a2ebbf4 |
31 | Text::Tradition is a library for representation and analysis of collated |
32 | texts, particularly medieval ones. A 'reading' refers to a unit of text, |
33 | usually a word, that appears in one or more witnesses (manuscripts) of the |
34 | tradition; the text of a given witness is composed of a set of readings in |
35 | a particular sequence |
784877d9 |
36 | |
3a2ebbf4 |
37 | =head1 METHODS |
1ca1163d |
38 | |
3a2ebbf4 |
39 | =head2 new |
8e1394aa |
40 | |
4aea6e9b |
41 | Creates a new reading in the given collation with the given attributes. |
3a2ebbf4 |
42 | Options include: |
94c00c71 |
43 | |
3a2ebbf4 |
44 | =over 4 |
784877d9 |
45 | |
4aea6e9b |
46 | =item collation - The Text::Tradition::Collation object to which this |
47 | reading belongs. Required. |
e2902068 |
48 | |
3a2ebbf4 |
49 | =item id - A unique identifier for this reading. Required. |
910a0a6d |
50 | |
3a2ebbf4 |
51 | =item text - The word or other text of the reading. |
784877d9 |
52 | |
3a2ebbf4 |
53 | =item is_start - The reading is the starting point for the collation. |
3265b0ce |
54 | |
3a2ebbf4 |
55 | =item is_end - The reading is the ending point for the collation. |
784877d9 |
56 | |
3a2ebbf4 |
57 | =item is_lacuna - The 'reading' represents a known gap in the text. |
de51424a |
58 | |
4aea6e9b |
59 | =item is_ph - A temporary placeholder for apparatus parsing purposes. Do |
60 | not use unless you know what you are doing. |
12720144 |
61 | |
4aea6e9b |
62 | =item rank - The sequence number of the reading. This should probably not |
63 | be set manually. |
d047cd52 |
64 | |
3a2ebbf4 |
65 | =back |
8e1394aa |
66 | |
3a2ebbf4 |
67 | One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required. |
8e1394aa |
68 | |
3a2ebbf4 |
69 | =head2 collation |
94c00c71 |
70 | |
3a2ebbf4 |
71 | =head2 id |
94c00c71 |
72 | |
3a2ebbf4 |
73 | =head2 text |
4cdd82f1 |
74 | |
3a2ebbf4 |
75 | =head2 is_start |
4cdd82f1 |
76 | |
3a2ebbf4 |
77 | =head2 is_end |
4a8828f0 |
78 | |
3a2ebbf4 |
79 | =head2 is_lacuna |
4a8828f0 |
80 | |
3a2ebbf4 |
81 | =head2 rank |
4a8828f0 |
82 | |
3a2ebbf4 |
83 | Accessor methods for the given attributes. |
d047cd52 |
84 | |
3a2ebbf4 |
85 | =cut |
d047cd52 |
86 | |
3a2ebbf4 |
87 | has 'collation' => ( |
88 | is => 'ro', |
89 | isa => 'Text::Tradition::Collation', |
90 | # required => 1, |
91 | weak_ref => 1, |
92 | ); |
d047cd52 |
93 | |
3a2ebbf4 |
94 | has 'id' => ( |
95 | is => 'ro', |
10e4b1ac |
96 | isa => 'ReadingID', |
3a2ebbf4 |
97 | required => 1, |
98 | ); |
d047cd52 |
99 | |
3a2ebbf4 |
100 | has 'text' => ( |
101 | is => 'ro', |
102 | isa => 'Str', |
103 | required => 1, |
49d4f2ac |
104 | writer => 'alter_text', |
3a2ebbf4 |
105 | ); |
0e47f4f6 |
106 | |
fae52efd |
107 | has 'language' => ( |
108 | is => 'ro', |
109 | isa => 'Str', |
6ad2ce78 |
110 | predicate => 'has_language', |
fae52efd |
111 | ); |
112 | |
3a2ebbf4 |
113 | has 'is_start' => ( |
114 | is => 'ro', |
115 | isa => 'Bool', |
116 | default => undef, |
117 | ); |
118 | |
119 | has 'is_end' => ( |
120 | is => 'ro', |
121 | isa => 'Bool', |
122 | default => undef, |
123 | ); |
124 | |
125 | has 'is_lacuna' => ( |
126 | is => 'ro', |
127 | isa => 'Bool', |
128 | default => undef, |
129 | ); |
12720144 |
130 | |
131 | has 'is_ph' => ( |
132 | is => 'ro', |
133 | isa => 'Bool', |
134 | default => undef, |
135 | ); |
d4b75f44 |
136 | |
137 | has 'is_common' => ( |
138 | is => 'rw', |
139 | isa => 'Bool', |
140 | default => undef, |
141 | ); |
3a2ebbf4 |
142 | |
143 | has 'rank' => ( |
144 | is => 'rw', |
145 | isa => 'Int', |
146 | predicate => 'has_rank', |
ca6e6095 |
147 | clearer => 'clear_rank', |
3a2ebbf4 |
148 | ); |
fd602649 |
149 | |
629e27b0 |
150 | ## For prefix/suffix readings |
151 | |
152 | has 'join_prior' => ( |
153 | is => 'ro', |
154 | isa => 'Bool', |
155 | default => undef, |
339786dd |
156 | writer => '_set_join_prior', |
629e27b0 |
157 | ); |
158 | |
159 | has 'join_next' => ( |
160 | is => 'ro', |
161 | isa => 'Bool', |
162 | default => undef, |
339786dd |
163 | writer => '_set_join_next', |
629e27b0 |
164 | ); |
165 | |
3a2ebbf4 |
166 | |
167 | around BUILDARGS => sub { |
168 | my $orig = shift; |
169 | my $class = shift; |
170 | my $args; |
171 | if( @_ == 1 ) { |
172 | $args = shift; |
173 | } else { |
174 | $args = { @_ }; |
175 | } |
b0b4421a |
176 | |
3a2ebbf4 |
177 | # If one of our special booleans is set, we change the text and the |
178 | # ID to match. |
44924224 |
179 | if( exists $args->{'is_lacuna'} && $args->{'is_lacuna'} && !exists $args->{'text'} ) { |
56eefa04 |
180 | $args->{'text'} = '#LACUNA#'; |
44924224 |
181 | } elsif( exists $args->{'is_start'} && $args->{'is_start'} ) { |
10e4b1ac |
182 | $args->{'id'} = '__START__'; # Change the ID to ensure we have only one |
3a2ebbf4 |
183 | $args->{'text'} = '#START#'; |
184 | $args->{'rank'} = 0; |
44924224 |
185 | } elsif( exists $args->{'is_end'} && $args->{'is_end'} ) { |
10e4b1ac |
186 | $args->{'id'} = '__END__'; # Change the ID to ensure we have only one |
3a2ebbf4 |
187 | $args->{'text'} = '#END#'; |
44924224 |
188 | } elsif( exists $args->{'is_ph'} && $args->{'is_ph'} ) { |
12720144 |
189 | $args->{'text'} = $args->{'id'}; |
3a2ebbf4 |
190 | } |
191 | |
10e4b1ac |
192 | # Backwards compatibility for non-XMLname IDs |
193 | my $rid = $args->{'id'}; |
194 | $rid =~ s/\#/__/g; |
195 | $rid =~ s/[\/,]/./g; |
196 | if( $rid !~ /^$xml10_namestartchar_rx/ ) { |
197 | $rid = 'r'.$rid; |
198 | } |
199 | $args->{'id'} = $rid; |
200 | |
3a2ebbf4 |
201 | $class->$orig( $args ); |
202 | }; |
203 | |
a445ce40 |
204 | # Look for a lexeme-string argument in the build args; if there, pull in the |
205 | # morphology role if possible. |
70745e70 |
206 | sub BUILD { |
207 | my( $self, $args ) = @_; |
208 | if( exists $args->{'lexemes'} ) { |
37bf09f4 |
209 | unless( $self->can( '_deserialize_lexemes' ) ) { |
210 | warn "No morphology package installed; DROPPING lexemes"; |
211 | return; |
a445ce40 |
212 | } |
70745e70 |
213 | $self->_deserialize_lexemes( $args->{'lexemes'} ); |
214 | } |
215 | } |
216 | |
3a2ebbf4 |
217 | =head2 is_meta |
218 | |
219 | A meta attribute (ha ha), which should be true if any of our 'special' |
220 | booleans are true. Implies that the reading does not represent a bit |
221 | of text found in a witness. |
222 | |
223 | =cut |
224 | |
225 | sub is_meta { |
226 | my $self = shift; |
12720144 |
227 | return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph; |
3a2ebbf4 |
228 | } |
229 | |
a445ce40 |
230 | =head2 is_identical( $other_reading ) |
231 | |
232 | Returns true if the reading is identical to the other reading. The basic test |
233 | is equality of ->text attributes, but this may be wrapped or overridden by |
234 | extensions. |
235 | |
236 | =cut |
237 | |
238 | sub is_identical { |
239 | my( $self, $other ) = @_; |
240 | return $self->text eq $other->text; |
241 | } |
242 | |
243 | =head2 is_combinable |
244 | |
245 | Returns true if the reading may in theory be combined into a multi-reading |
246 | segment within the collation graph. The reading must not be a meta reading, |
247 | and it must not have any relationships in its own right with any others. |
248 | This test may be wrapped or overridden by extensions. |
249 | |
250 | =cut |
251 | |
252 | sub is_combinable { |
253 | my $self = shift; |
254 | return undef if $self->is_meta; |
255 | return !$self->related_readings(); |
256 | } |
257 | |
258 | # Not really meant for public consumption. Adopt the text of the other reading |
259 | # into this reading. |
260 | sub _combine { |
261 | my( $self, $other, $joinstr ) = @_; |
262 | $self->alter_text( join( $joinstr, $self->text, $other->text ) ); |
263 | # Change this reading to a joining one if necessary |
264 | $self->_set_join_next( $other->join_next ); |
265 | } |
266 | |
027d819c |
267 | =head1 Convenience methods |
268 | |
269 | =head2 related_readings |
270 | |
271 | Calls Collation's related_readings with $self as the first argument. |
272 | |
273 | =cut |
274 | |
3a2ebbf4 |
275 | sub related_readings { |
276 | my $self = shift; |
277 | return $self->collation->related_readings( $self, @_ ); |
278 | } |
279 | |
7f52eac8 |
280 | =head2 witnesses |
281 | |
282 | Calls Collation's reading_witnesses with $self as the first argument. |
283 | |
284 | =cut |
285 | |
286 | sub witnesses { |
287 | my $self = shift; |
288 | return $self->collation->reading_witnesses( $self, @_ ); |
289 | } |
290 | |
027d819c |
291 | =head2 predecessors |
292 | |
293 | Returns a list of Reading objects that immediately precede $self in the collation. |
294 | |
295 | =cut |
296 | |
22222af9 |
297 | sub predecessors { |
298 | my $self = shift; |
299 | my @pred = $self->collation->sequence->predecessors( $self->id ); |
300 | return map { $self->collation->reading( $_ ) } @pred; |
301 | } |
302 | |
027d819c |
303 | =head2 successors |
304 | |
305 | Returns a list of Reading objects that immediately follow $self in the collation. |
306 | |
307 | =cut |
308 | |
22222af9 |
309 | sub successors { |
310 | my $self = shift; |
311 | my @succ = $self->collation->sequence->successors( $self->id ); |
312 | return map { $self->collation->reading( $_ ) } @succ; |
313 | } |
314 | |
a445ce40 |
315 | ## Utility methods |
1d310495 |
316 | |
3a2ebbf4 |
317 | sub _stringify { |
318 | my $self = shift; |
319 | return $self->id; |
320 | } |
d047cd52 |
321 | |
2acf0892 |
322 | sub TO_JSON { |
323 | my $self = shift; |
324 | return $self->text; |
325 | } |
326 | |
70745e70 |
327 | sub throw { |
328 | Text::Tradition::Error->throw( |
329 | 'ident' => 'Reading error', |
330 | 'message' => $_[0], |
331 | ); |
332 | } |
4d9593df |
333 | |
334 | no Moose; |
335 | __PACKAGE__->meta->make_immutable; |
336 | |
021bdbac |
337 | 1; |