Commit | Line | Data |
784877d9 |
1 | package Text::Tradition::Collation::Reading; |
2 | |
8e1394aa |
3 | use Moose; |
e4b0f464 |
4 | use overload '""' => \&_stringify, 'fallback' => 1; |
784877d9 |
5 | |
3a2ebbf4 |
6 | =head1 NAME |
784877d9 |
7 | |
3a2ebbf4 |
8 | Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation. |
eca16057 |
9 | |
3a2ebbf4 |
10 | =head1 DESCRIPTION |
784877d9 |
11 | |
3a2ebbf4 |
12 | Text::Tradition is a library for representation and analysis of collated |
13 | texts, particularly medieval ones. A 'reading' refers to a unit of text, |
14 | usually a word, that appears in one or more witnesses (manuscripts) of the |
15 | tradition; the text of a given witness is composed of a set of readings in |
16 | a particular sequence |
784877d9 |
17 | |
3a2ebbf4 |
18 | =head1 METHODS |
1ca1163d |
19 | |
3a2ebbf4 |
20 | =head2 new |
8e1394aa |
21 | |
3a2ebbf4 |
22 | Creates a new reading in the given collation with the given attributes. |
23 | Options include: |
94c00c71 |
24 | |
3a2ebbf4 |
25 | =over 4 |
784877d9 |
26 | |
3a2ebbf4 |
27 | =item collation - The Text::Tradition::Collation object to which this reading belongs. Required. |
e2902068 |
28 | |
3a2ebbf4 |
29 | =item id - A unique identifier for this reading. Required. |
910a0a6d |
30 | |
3a2ebbf4 |
31 | =item text - The word or other text of the reading. |
784877d9 |
32 | |
3a2ebbf4 |
33 | =item is_start - The reading is the starting point for the collation. |
3265b0ce |
34 | |
3a2ebbf4 |
35 | =item is_end - The reading is the ending point for the collation. |
784877d9 |
36 | |
3a2ebbf4 |
37 | =item is_lacuna - The 'reading' represents a known gap in the text. |
de51424a |
38 | |
12720144 |
39 | =item is_ph - A temporary placeholder for apparatus parsing purposes. Do not use unless you know what you are doing. |
40 | |
3a2ebbf4 |
41 | =item rank - The sequence number of the reading. This should probably not be set manually. |
d047cd52 |
42 | |
3a2ebbf4 |
43 | =back |
8e1394aa |
44 | |
3a2ebbf4 |
45 | One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required. |
8e1394aa |
46 | |
3a2ebbf4 |
47 | =head2 collation |
94c00c71 |
48 | |
3a2ebbf4 |
49 | =head2 id |
94c00c71 |
50 | |
3a2ebbf4 |
51 | =head2 text |
4cdd82f1 |
52 | |
3a2ebbf4 |
53 | =head2 is_start |
4cdd82f1 |
54 | |
3a2ebbf4 |
55 | =head2 is_end |
4a8828f0 |
56 | |
3a2ebbf4 |
57 | =head2 is_lacuna |
4a8828f0 |
58 | |
3a2ebbf4 |
59 | =head2 rank |
4a8828f0 |
60 | |
3a2ebbf4 |
61 | Accessor methods for the given attributes. |
d047cd52 |
62 | |
3a2ebbf4 |
63 | =cut |
d047cd52 |
64 | |
3a2ebbf4 |
65 | has 'collation' => ( |
66 | is => 'ro', |
67 | isa => 'Text::Tradition::Collation', |
68 | # required => 1, |
69 | weak_ref => 1, |
70 | ); |
d047cd52 |
71 | |
3a2ebbf4 |
72 | has 'id' => ( |
73 | is => 'ro', |
74 | isa => 'Str', |
75 | required => 1, |
76 | ); |
d047cd52 |
77 | |
3a2ebbf4 |
78 | has 'text' => ( |
79 | is => 'ro', |
80 | isa => 'Str', |
81 | required => 1, |
49d4f2ac |
82 | writer => 'alter_text', |
3a2ebbf4 |
83 | ); |
0e47f4f6 |
84 | |
85 | has 'punctuation' => ( |
86 | traits => ['Array'], |
87 | isa => 'ArrayRef[HashRef[Str]]', |
88 | default => sub { [] }, |
89 | handles => { |
90 | punctuation => 'elements', |
91 | add_punctuation => 'push', |
92 | }, |
93 | ); |
94 | |
95 | has 'separate_punctuation' => ( |
96 | is => 'ro', |
97 | isa => 'Bool', |
98 | default => 1, |
99 | ); |
d047cd52 |
100 | |
3a2ebbf4 |
101 | has 'is_start' => ( |
102 | is => 'ro', |
103 | isa => 'Bool', |
104 | default => undef, |
105 | ); |
106 | |
107 | has 'is_end' => ( |
108 | is => 'ro', |
109 | isa => 'Bool', |
110 | default => undef, |
111 | ); |
112 | |
113 | has 'is_lacuna' => ( |
114 | is => 'ro', |
115 | isa => 'Bool', |
116 | default => undef, |
117 | ); |
12720144 |
118 | |
119 | has 'is_ph' => ( |
120 | is => 'ro', |
121 | isa => 'Bool', |
122 | default => undef, |
123 | ); |
3a2ebbf4 |
124 | |
125 | has 'rank' => ( |
126 | is => 'rw', |
127 | isa => 'Int', |
128 | predicate => 'has_rank', |
129 | ); |
130 | |
131 | |
132 | around BUILDARGS => sub { |
133 | my $orig = shift; |
134 | my $class = shift; |
135 | my $args; |
136 | if( @_ == 1 ) { |
137 | $args = shift; |
138 | } else { |
139 | $args = { @_ }; |
140 | } |
141 | |
142 | # If one of our special booleans is set, we change the text and the |
143 | # ID to match. |
144 | |
1d310495 |
145 | if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) { |
56eefa04 |
146 | $args->{'text'} = '#LACUNA#'; |
3a2ebbf4 |
147 | } elsif( exists $args->{'is_start'} ) { |
148 | $args->{'id'} = '#START#'; # Change the ID to ensure we have only one |
149 | $args->{'text'} = '#START#'; |
150 | $args->{'rank'} = 0; |
151 | } elsif( exists $args->{'is_end'} ) { |
152 | $args->{'id'} = '#END#'; # Change the ID to ensure we have only one |
153 | $args->{'text'} = '#END#'; |
12720144 |
154 | } elsif( exists $args->{'is_ph'} ) { |
155 | $args->{'text'} = $args->{'id'}; |
3a2ebbf4 |
156 | } |
157 | |
158 | $class->$orig( $args ); |
159 | }; |
160 | |
0e47f4f6 |
161 | # Post-process the given text, stripping punctuation if we are asked. |
162 | sub BUILD { |
163 | my $self = shift; |
164 | if( $self->separate_punctuation && !$self->is_meta ) { |
165 | my $pos = 0; |
166 | my $wspunct = ''; # word sans punctuation |
167 | foreach my $char ( split( //, $self->text ) ) { |
168 | if( $char =~ /^[[:punct:]]$/ ) { |
169 | $self->add_punctuation( { 'char' => $char, 'pos' => $pos } ); |
170 | } else { |
171 | $wspunct .= $char; |
172 | } |
173 | $pos++; |
174 | } |
175 | $self->alter_text( $wspunct ); |
176 | } |
177 | } |
178 | |
179 | sub punctuated_form { |
180 | my $self = shift; |
181 | my $word = $self->text; |
182 | foreach my $p ( sort { $a->{pos} <=> $b->{pos} } $self->punctuation ) { |
183 | substr( $word, $p->{pos}, 0, $p->{char} ); |
184 | } |
185 | return $word; |
186 | } |
187 | |
3a2ebbf4 |
188 | =head2 is_meta |
189 | |
190 | A meta attribute (ha ha), which should be true if any of our 'special' |
191 | booleans are true. Implies that the reading does not represent a bit |
192 | of text found in a witness. |
193 | |
194 | =cut |
195 | |
196 | sub is_meta { |
197 | my $self = shift; |
12720144 |
198 | return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph; |
3a2ebbf4 |
199 | } |
200 | |
201 | # Some syntactic sugar |
202 | sub related_readings { |
203 | my $self = shift; |
204 | return $self->collation->related_readings( $self, @_ ); |
205 | } |
206 | |
22222af9 |
207 | sub predecessors { |
208 | my $self = shift; |
209 | my @pred = $self->collation->sequence->predecessors( $self->id ); |
210 | return map { $self->collation->reading( $_ ) } @pred; |
211 | } |
212 | |
213 | sub successors { |
214 | my $self = shift; |
215 | my @succ = $self->collation->sequence->successors( $self->id ); |
216 | return map { $self->collation->reading( $_ ) } @succ; |
217 | } |
218 | |
1d310495 |
219 | sub set_identical { |
220 | my( $self, $other ) = @_; |
221 | return $self->collation->add_relationship( $self, $other, |
222 | { 'type' => 'transposition' } ); |
223 | } |
224 | |
3a2ebbf4 |
225 | sub _stringify { |
226 | my $self = shift; |
227 | return $self->id; |
228 | } |
d047cd52 |
229 | |
021bdbac |
230 | no Moose; |
231 | __PACKAGE__->meta->make_immutable; |
d047cd52 |
232 | |
021bdbac |
233 | 1; |
d047cd52 |
234 | |