add JSON alignment table parsing
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm
CommitLineData
784877d9 1package Text::Tradition::Collation::Reading;
2
8e1394aa 3use Moose;
e4b0f464 4use overload '""' => \&_stringify, 'fallback' => 1;
784877d9 5
3a2ebbf4 6=head1 NAME
784877d9 7
3a2ebbf4 8Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
eca16057 9
3a2ebbf4 10=head1 DESCRIPTION
784877d9 11
3a2ebbf4 12Text::Tradition is a library for representation and analysis of collated
13texts, particularly medieval ones. A 'reading' refers to a unit of text,
14usually a word, that appears in one or more witnesses (manuscripts) of the
15tradition; the text of a given witness is composed of a set of readings in
16a particular sequence
784877d9 17
3a2ebbf4 18=head1 METHODS
1ca1163d 19
3a2ebbf4 20=head2 new
8e1394aa 21
3a2ebbf4 22Creates a new reading in the given collation with the given attributes.
23Options include:
94c00c71 24
3a2ebbf4 25=over 4
784877d9 26
3a2ebbf4 27=item collation - The Text::Tradition::Collation object to which this reading belongs. Required.
e2902068 28
3a2ebbf4 29=item id - A unique identifier for this reading. Required.
910a0a6d 30
3a2ebbf4 31=item text - The word or other text of the reading.
784877d9 32
3a2ebbf4 33=item is_start - The reading is the starting point for the collation.
3265b0ce 34
3a2ebbf4 35=item is_end - The reading is the ending point for the collation.
784877d9 36
3a2ebbf4 37=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a 38
12720144 39=item is_ph - A temporary placeholder for apparatus parsing purposes. Do not use unless you know what you are doing.
40
3a2ebbf4 41=item rank - The sequence number of the reading. This should probably not be set manually.
d047cd52 42
3a2ebbf4 43=back
8e1394aa 44
3a2ebbf4 45One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa 46
3a2ebbf4 47=head2 collation
94c00c71 48
3a2ebbf4 49=head2 id
94c00c71 50
3a2ebbf4 51=head2 text
4cdd82f1 52
3a2ebbf4 53=head2 is_start
4cdd82f1 54
3a2ebbf4 55=head2 is_end
4a8828f0 56
3a2ebbf4 57=head2 is_lacuna
4a8828f0 58
3a2ebbf4 59=head2 rank
4a8828f0 60
3a2ebbf4 61Accessor methods for the given attributes.
d047cd52 62
3a2ebbf4 63=cut
d047cd52 64
3a2ebbf4 65has 'collation' => (
66 is => 'ro',
67 isa => 'Text::Tradition::Collation',
68 # required => 1,
69 weak_ref => 1,
70 );
d047cd52 71
3a2ebbf4 72has 'id' => (
73 is => 'ro',
74 isa => 'Str',
75 required => 1,
76 );
d047cd52 77
3a2ebbf4 78has 'text' => (
79 is => 'ro',
80 isa => 'Str',
81 required => 1,
49d4f2ac 82 writer => 'alter_text',
3a2ebbf4 83 );
0e47f4f6 84
85has 'punctuation' => (
86 traits => ['Array'],
87 isa => 'ArrayRef[HashRef[Str]]',
88 default => sub { [] },
89 handles => {
90 punctuation => 'elements',
91 add_punctuation => 'push',
92 },
93 );
94
95has 'separate_punctuation' => (
96 is => 'ro',
97 isa => 'Bool',
98 default => 1,
99 );
d047cd52 100
3a2ebbf4 101has 'is_start' => (
102 is => 'ro',
103 isa => 'Bool',
104 default => undef,
105 );
106
107has 'is_end' => (
108 is => 'ro',
109 isa => 'Bool',
110 default => undef,
111 );
112
113has 'is_lacuna' => (
114 is => 'ro',
115 isa => 'Bool',
116 default => undef,
117 );
12720144 118
119has 'is_ph' => (
120 is => 'ro',
121 isa => 'Bool',
122 default => undef,
123 );
3a2ebbf4 124
125has 'rank' => (
126 is => 'rw',
127 isa => 'Int',
128 predicate => 'has_rank',
129 );
130
131
132around BUILDARGS => sub {
133 my $orig = shift;
134 my $class = shift;
135 my $args;
136 if( @_ == 1 ) {
137 $args = shift;
138 } else {
139 $args = { @_ };
140 }
141
a731e73a 142 # Did we get a JSON token to parse into a reading? If so, massage it.
143 if( exists $args->{'json'} ) {
144 my $j = delete $args->{'json'};
145
146 # If we have separated punctuation and don't want it, restore it.
147 if( exists $j->{'punctuation'}
148 && exists $args->{'separate_punctuation'}
149 && !$args->{'separate_punctuation'} ) {
150 $args->{'text'} = _restore_punct( $j->{'t'}, $j->{'punctuation'} );
151
152 # In all other cases, keep text and punct as they are.
153 } else {
154 $args->{'text'} = $j->{'t'};
155 # we don't use comparison or canonical forms here
156 $args->{'punctuation'} = $j->{'punctuation'}
157 if exists $j->{'punctuation'};
158 }
159 }
160
3a2ebbf4 161 # If one of our special booleans is set, we change the text and the
162 # ID to match.
1d310495 163 if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04 164 $args->{'text'} = '#LACUNA#';
3a2ebbf4 165 } elsif( exists $args->{'is_start'} ) {
166 $args->{'id'} = '#START#'; # Change the ID to ensure we have only one
167 $args->{'text'} = '#START#';
168 $args->{'rank'} = 0;
169 } elsif( exists $args->{'is_end'} ) {
170 $args->{'id'} = '#END#'; # Change the ID to ensure we have only one
171 $args->{'text'} = '#END#';
12720144 172 } elsif( exists $args->{'is_ph'} ) {
173 $args->{'text'} = $args->{'id'};
3a2ebbf4 174 }
175
176 $class->$orig( $args );
177};
178
0e47f4f6 179# Post-process the given text, stripping punctuation if we are asked.
180sub BUILD {
181 my $self = shift;
a731e73a 182 if( $self->separate_punctuation && !$self->is_meta
183 && !$self->punctuation ) {
0e47f4f6 184 my $pos = 0;
185 my $wspunct = ''; # word sans punctuation
186 foreach my $char ( split( //, $self->text ) ) {
187 if( $char =~ /^[[:punct:]]$/ ) {
188 $self->add_punctuation( { 'char' => $char, 'pos' => $pos } );
189 } else {
190 $wspunct .= $char;
191 }
192 $pos++;
193 }
194 $self->alter_text( $wspunct );
195 }
196}
197
198sub punctuated_form {
199 my $self = shift;
a731e73a 200 return _restore_punct( $self->text, $self->punctuation );
201}
202
203sub _restore_punct {
204 my( $word, @punct ) = @_;
205 foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @punct ) {
0e47f4f6 206 substr( $word, $p->{pos}, 0, $p->{char} );
207 }
208 return $word;
a731e73a 209}
0e47f4f6 210
3a2ebbf4 211=head2 is_meta
212
213A meta attribute (ha ha), which should be true if any of our 'special'
214booleans are true. Implies that the reading does not represent a bit
215of text found in a witness.
216
217=cut
218
219sub is_meta {
220 my $self = shift;
12720144 221 return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;
3a2ebbf4 222}
223
224# Some syntactic sugar
225sub related_readings {
226 my $self = shift;
227 return $self->collation->related_readings( $self, @_ );
228}
229
22222af9 230sub predecessors {
231 my $self = shift;
232 my @pred = $self->collation->sequence->predecessors( $self->id );
233 return map { $self->collation->reading( $_ ) } @pred;
234}
235
236sub successors {
237 my $self = shift;
238 my @succ = $self->collation->sequence->successors( $self->id );
239 return map { $self->collation->reading( $_ ) } @succ;
240}
241
1d310495 242sub set_identical {
243 my( $self, $other ) = @_;
244 return $self->collation->add_relationship( $self, $other,
245 { 'type' => 'transposition' } );
246}
247
3a2ebbf4 248sub _stringify {
249 my $self = shift;
250 return $self->id;
251}
d047cd52 252
021bdbac 253no Moose;
254__PACKAGE__->meta->make_immutable;
d047cd52 255
021bdbac 2561;
d047cd52 257