[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm

package Text::Tradition::Collation::Reading;

use Moose;
use JSON qw/ from_json /;
use Module::Load;
use Text::Tradition::Error;
use YAML::XS;
use overload '""' => \&_stringify, 'fallback' => 1;

=head1 NAME

Text::Tradition::Collation::Reading - represents a reading (usually a word)
in a collation.

=head1 DESCRIPTION

Text::Tradition is a library for representation and analysis of collated
texts, particularly medieval ones.  A 'reading' refers to a unit of text,
usually a word, that appears in one or more witnesses (manuscripts) of the
tradition; the text of a given witness is composed of a set of readings in
a particular sequence

=head1 METHODS

=head2 new

Creates a new reading in the given collation with the given attributes.
Options include:

=over 4

=item collation - The Text::Tradition::Collation object to which this
reading belongs.  Required.

=item id - A unique identifier for this reading. Required.

=item text - The word or other text of the reading.

=item is_start - The reading is the starting point for the collation.

=item is_end - The reading is the ending point for the collation.

=item is_lacuna - The 'reading' represents a known gap in the text.

=item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
not use unless you know what you are doing.

=item rank - The sequence number of the reading. This should probably not
be set manually.

=back

One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.

=head2 collation

=head2 id

=head2 text

=head2 is_start

=head2 is_end

=head2 is_lacuna

=head2 rank

Accessor methods for the given attributes.

=cut

has 'collation' => (
	is => 'ro',
	isa => 'Text::Tradition::Collation',
	# required => 1,
	weak_ref => 1,
	);

has 'id' => (
	is => 'ro',
	isa => 'Str',
	required => 1,
	);

has 'text' => (
	is => 'ro',
	isa => 'Str',
	required => 1,
	writer => 'alter_text',
	);
	
has 'language' => (
	is => 'ro',
	isa => 'Str',
	predicate => 'has_language',
	);
	
has 'is_start' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);

has 'is_end' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);
    
has 'is_lacuna' => (
    is => 'ro',
    isa => 'Bool',
	default => undef,
    );
    
has 'is_ph' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);
	
has 'is_common' => (
	is => 'rw',
	isa => 'Bool',
	default => undef,
	);

has 'rank' => (
    is => 'rw',
    isa => 'Int',
    predicate => 'has_rank',
    clearer => 'clear_rank',
    );
    
## For morphological analysis

has 'normal_form' => (
	is => 'rw',
	isa => 'Str',
	predicate => 'has_normal_form',
	);

# Holds the lexemes for the reading.
has 'reading_lexemes' => (
	traits => ['Array'],
	isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
	handles => {
		lexemes => 'elements',
		has_lexemes => 'count',
		clear_lexemes => 'clear',
		add_lexeme => 'push',
		},
	default => sub { [] },
	);
	
## For prefix/suffix readings

has 'join_prior' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);
	
has 'join_next' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);


around BUILDARGS => sub {
	my $orig = shift;
	my $class = shift;
	my $args;
	if( @_ == 1 ) {
		$args = shift;
	} else {
		$args = { @_ };
	}
			
	# If one of our special booleans is set, we change the text and the
	# ID to match.
	if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
		$args->{'text'} = '#LACUNA#';
	} elsif( exists $args->{'is_start'} ) {
		$args->{'id'} = '#START#';  # Change the ID to ensure we have only one
		$args->{'text'} = '#START#';
		$args->{'rank'} = 0;
	} elsif( exists $args->{'is_end'} ) {
		$args->{'id'} = '#END#';	# Change the ID to ensure we have only one
		$args->{'text'} = '#END#';
	} elsif( exists $args->{'is_ph'} ) {
		$args->{'text'} = $args->{'id'};
	}
	
	$class->$orig( $args );
};

# Look for a lexeme-string argument in the build args.
sub BUILD {
	my( $self, $args ) = @_;
	if( exists $args->{'lexemes'} ) {
		$self->_deserialize_lexemes( $args->{'lexemes'} );
	}
}

=head2 is_meta

A meta attribute (ha ha), which should be true if any of our 'special'
booleans are true.  Implies that the reading does not represent a bit 
of text found in a witness.

=cut

sub is_meta {
	my $self = shift;
	return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;	
}

=head1 Convenience methods

=head2 related_readings

Calls Collation's related_readings with $self as the first argument.

=cut

sub related_readings {
	my $self = shift;
	return $self->collation->related_readings( $self, @_ );
}

=head2 witnesses 

Calls Collation's reading_witnesses with $self as the first argument.

=cut

sub witnesses {
	my $self = shift;
	return $self->collation->reading_witnesses( $self, @_ );
}

=head2 predecessors

Returns a list of Reading objects that immediately precede $self in the collation.

=cut

sub predecessors {
	my $self = shift;
	my @pred = $self->collation->sequence->predecessors( $self->id );
	return map { $self->collation->reading( $_ ) } @pred;
}

=head2 successors

Returns a list of Reading objects that immediately follow $self in the collation.

=cut

sub successors {
	my $self = shift;
	my @succ = $self->collation->sequence->successors( $self->id );
	return map { $self->collation->reading( $_ ) } @succ;
}

=head2 set_identical( $other_reading)

Backwards compatibility method, to add a transposition relationship
between $self and $other_reading.  Don't use this.

=cut

sub set_identical {
	my( $self, $other ) = @_;
	return $self->collation->add_relationship( $self, $other, 
		{ 'type' => 'transposition' } );
}

sub _stringify {
	my $self = shift;
	return $self->id;
}

=head1 MORPHOLOGY

Methods for the morphological information (if any) attached to readings.
A reading may be made up of multiple lexemes; the concatenated lexeme
strings ought to match the reading's normalized form.
 
See L<Text::Tradition::Collation::Reading::Lexeme> for more information
on Lexeme objects and their attributes.

=head2 has_lexemes

Returns a true value if the reading has any attached lexemes.

=head2 lexemes

Returns the Lexeme objects (if any) attached to the reading.

=head2 clear_lexemes

Wipes any associated Lexeme objects out of the reading.

=head2 add_lexeme( $lexobj )

Adds the Lexeme in $lexobj to the list of lexemes.

=head2 lemmatize

If the language of the reading is set, this method will use the appropriate
Language model to determine the lexemes that belong to this reading.  See
L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.

=cut

sub lemmatize {
	my $self = shift;
	unless( $self->has_language ) {
		warn "Please set a language to lemmatize a tradition";
		return;
	}
	my $mod = "Text::Tradition::Language::" . $self->language;
	load( $mod );
	$mod->can( 'reading_lookup' )->( $self );

}

# For graph serialization. Return a JSON representation of the associated
# reading lexemes.
sub _serialize_lexemes {
	my $self = shift;
	my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
	return $json->encode( [ $self->lexemes ] );
}

# Given a JSON representation of the lexemes, instantiate them and add
# them to the reading.
sub _deserialize_lexemes {
	my( $self, $json ) = @_;
	my $data = from_json( $json );
	return unless @$data;
	
	# Need to have the lexeme module in order to have lexemes.
	eval { use Text::Tradition::Collation::Reading::Lexeme; };
	throw( $@ ) if $@;
	
	# Good to go - add the lexemes.
	my @lexemes;
	foreach my $lexhash ( @$data ) {
		push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
			'JSON' => $lexhash ) );
	}
	$self->clear_lexemes;
	$self->add_lexeme( @lexemes );
}

## Utility methods

sub TO_JSON {
	my $self = shift;
	return $self->text;
}

sub throw {
	Text::Tradition::Error->throw( 
		'ident' => 'Reading error',
		'message' => $_[0],
		);
}

no Moose;
__PACKAGE__->meta->make_immutable;

1;
Commit	Line	Data
784877d9	1	package Text::Tradition::Collation::Reading;
784877d9	2
8e1394aa	3	use Moose;
7604424b	4	use JSON qw/ from_json /;
6ad2ce78	5	use Module::Load;
70745e70	6	use Text::Tradition::Error;
7cd9f181	7	use YAML::XS;
e4b0f464	8	use overload '""' => \&_stringify, 'fallback' => 1;
784877d9	9
3a2ebbf4	10	=head1 NAME
784877d9	11
4aea6e9b	12	Text::Tradition::Collation::Reading - represents a reading (usually a word)
	13	in a collation.
	14
3a2ebbf4	15	=head1 DESCRIPTION
784877d9	16
3a2ebbf4	17	Text::Tradition is a library for representation and analysis of collated
	18	texts, particularly medieval ones. A 'reading' refers to a unit of text,
	19	usually a word, that appears in one or more witnesses (manuscripts) of the
	20	tradition; the text of a given witness is composed of a set of readings in
	21	a particular sequence
784877d9	22
3a2ebbf4	23	=head1 METHODS
1ca1163d	24
3a2ebbf4	25	=head2 new
8e1394aa	26
4aea6e9b	27	Creates a new reading in the given collation with the given attributes.
3a2ebbf4	28	Options include:
94c00c71	29
3a2ebbf4	30	=over 4
784877d9	31
4aea6e9b	32	=item collation - The Text::Tradition::Collation object to which this
4aea6e9b	33	reading belongs. Required.
e2902068	34
3a2ebbf4	35	=item id - A unique identifier for this reading. Required.
910a0a6d	36
3a2ebbf4	37	=item text - The word or other text of the reading.
784877d9	38
3a2ebbf4	39	=item is_start - The reading is the starting point for the collation.
3265b0ce	40
3a2ebbf4	41	=item is_end - The reading is the ending point for the collation.
784877d9	42
3a2ebbf4	43	=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a	44
4aea6e9b	45	=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
4aea6e9b	46	not use unless you know what you are doing.
12720144	47
4aea6e9b	48	=item rank - The sequence number of the reading. This should probably not
4aea6e9b	49	be set manually.
d047cd52	50
3a2ebbf4	51	=back
8e1394aa	52
3a2ebbf4	53	One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa	54
3a2ebbf4	55	=head2 collation
94c00c71	56
3a2ebbf4	57	=head2 id
94c00c71	58
3a2ebbf4	59	=head2 text
4cdd82f1	60
3a2ebbf4	61	=head2 is_start
4cdd82f1	62
3a2ebbf4	63	=head2 is_end
4a8828f0	64
3a2ebbf4	65	=head2 is_lacuna
4a8828f0	66
3a2ebbf4	67	=head2 rank
4a8828f0	68
3a2ebbf4	69	Accessor methods for the given attributes.
d047cd52	70
3a2ebbf4	71	=cut
d047cd52	72
3a2ebbf4	73	has 'collation' => (
	74	is => 'ro',
	75	isa => 'Text::Tradition::Collation',
	76	# required => 1,
	77	weak_ref => 1,
	78	);
d047cd52	79
3a2ebbf4	80	has 'id' => (
	81	is => 'ro',
	82	isa => 'Str',
	83	required => 1,
	84	);
d047cd52	85
3a2ebbf4	86	has 'text' => (
	87	is => 'ro',
	88	isa => 'Str',
	89	required => 1,
49d4f2ac	90	writer => 'alter_text',
3a2ebbf4	91	);
0e47f4f6	92
fae52efd	93	has 'language' => (
	94	is => 'ro',
	95	isa => 'Str',
6ad2ce78	96	predicate => 'has_language',
fae52efd	97	);
fae52efd	98
3a2ebbf4	99	has 'is_start' => (
	100	is => 'ro',
	101	isa => 'Bool',
	102	default => undef,
	103	);
	104
	105	has 'is_end' => (
	106	is => 'ro',
	107	isa => 'Bool',
	108	default => undef,
	109	);
	110
	111	has 'is_lacuna' => (
	112	is => 'ro',
	113	isa => 'Bool',
	114	default => undef,
	115	);
12720144	116
	117	has 'is_ph' => (
	118	is => 'ro',
	119	isa => 'Bool',
	120	default => undef,
	121	);
d4b75f44	122
	123	has 'is_common' => (
	124	is => 'rw',
	125	isa => 'Bool',
	126	default => undef,
	127	);
3a2ebbf4	128
	129	has 'rank' => (
	130	is => 'rw',
	131	isa => 'Int',
	132	predicate => 'has_rank',
ca6e6095	133	clearer => 'clear_rank',
3a2ebbf4	134	);
fd602649	135
	136	## For morphological analysis
	137
	138	has 'normal_form' => (
	139	is => 'rw',
	140	isa => 'Str',
	141	predicate => 'has_normal_form',
	142	);
	143
7cd9f181	144	# Holds the lexemes for the reading.
d3e7842a	145	has 'reading_lexemes' => (
4d9593df	146	traits => ['Array'],
d3e7842a	147	isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
4d9593df	148	handles => {
4d9593df	149	lexemes => 'elements',
cca4f996	150	has_lexemes => 'count',
d3e7842a	151	clear_lexemes => 'clear',
d3e7842a	152	add_lexeme => 'push',
4d9593df	153	},
d3e7842a	154	default => sub { [] },
fd602649	155	);
fd602649	156
629e27b0	157	## For prefix/suffix readings
	158
	159	has 'join_prior' => (
	160	is => 'ro',
	161	isa => 'Bool',
	162	default => undef,
	163	);
	164
	165	has 'join_next' => (
	166	is => 'ro',
	167	isa => 'Bool',
	168	default => undef,
	169	);
	170
3a2ebbf4	171
	172	around BUILDARGS => sub {
	173	my $orig = shift;
	174	my $class = shift;
	175	my $args;
	176	if( @_ == 1 ) {
	177	$args = shift;
	178	} else {
	179	$args = { @_ };
	180	}
b0b4421a	181
3a2ebbf4	182	# If one of our special booleans is set, we change the text and the
3a2ebbf4	183	# ID to match.
1d310495	184	if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04	185	$args->{'text'} = '#LACUNA#';
3a2ebbf4	186	} elsif( exists $args->{'is_start'} ) {
	187	$args->{'id'} = '#START#'; # Change the ID to ensure we have only one
	188	$args->{'text'} = '#START#';
	189	$args->{'rank'} = 0;
	190	} elsif( exists $args->{'is_end'} ) {
	191	$args->{'id'} = '#END#'; # Change the ID to ensure we have only one
	192	$args->{'text'} = '#END#';
12720144	193	} elsif( exists $args->{'is_ph'} ) {
12720144	194	$args->{'text'} = $args->{'id'};
3a2ebbf4	195	}
	196
	197	$class->$orig( $args );
	198	};
	199
70745e70	200	# Look for a lexeme-string argument in the build args.
	201	sub BUILD {
	202	my( $self, $args ) = @_;
	203	if( exists $args->{'lexemes'} ) {
	204	$self->_deserialize_lexemes( $args->{'lexemes'} );
	205	}
	206	}
	207
3a2ebbf4	208	=head2 is_meta
	209
	210	A meta attribute (ha ha), which should be true if any of our 'special'
	211	booleans are true. Implies that the reading does not represent a bit
	212	of text found in a witness.
	213
	214	=cut
	215
	216	sub is_meta {
	217	my $self = shift;
12720144	218	return $self->is_start \|\| $self->is_end \|\| $self->is_lacuna \|\| $self->is_ph;
3a2ebbf4	219	}
3a2ebbf4	220
027d819c	221	=head1 Convenience methods
	222
	223	=head2 related_readings
	224
	225	Calls Collation's related_readings with $self as the first argument.
	226
	227	=cut
	228
3a2ebbf4	229	sub related_readings {
	230	my $self = shift;
	231	return $self->collation->related_readings( $self, @_ );
	232	}
	233
7f52eac8	234	=head2 witnesses
	235
	236	Calls Collation's reading_witnesses with $self as the first argument.
	237
	238	=cut
	239
	240	sub witnesses {
	241	my $self = shift;
	242	return $self->collation->reading_witnesses( $self, @_ );
	243	}
	244
027d819c	245	=head2 predecessors
	246
	247	Returns a list of Reading objects that immediately precede $self in the collation.
	248
	249	=cut
	250
22222af9	251	sub predecessors {
	252	my $self = shift;
	253	my @pred = $self->collation->sequence->predecessors( $self->id );
	254	return map { $self->collation->reading( $_ ) } @pred;
	255	}
	256
027d819c	257	=head2 successors
	258
	259	Returns a list of Reading objects that immediately follow $self in the collation.
	260
	261	=cut
	262
22222af9	263	sub successors {
	264	my $self = shift;
	265	my @succ = $self->collation->sequence->successors( $self->id );
	266	return map { $self->collation->reading( $_ ) } @succ;
	267	}
	268
027d819c	269	=head2 set_identical( $other_reading)
	270
	271	Backwards compatibility method, to add a transposition relationship
	272	between $self and $other_reading. Don't use this.
	273
	274	=cut
	275
1d310495	276	sub set_identical {
	277	my( $self, $other ) = @_;
	278	return $self->collation->add_relationship( $self, $other,
	279	{ 'type' => 'transposition' } );
	280	}
	281
3a2ebbf4	282	sub _stringify {
	283	my $self = shift;
	284	return $self->id;
	285	}
d047cd52	286
4d9593df	287	=head1 MORPHOLOGY
4d9593df	288
7cd9f181	289	Methods for the morphological information (if any) attached to readings.
	290	A reading may be made up of multiple lexemes; the concatenated lexeme
	291	strings ought to match the reading's normalized form.
	292
	293	See L<Text::Tradition::Collation::Reading::Lexeme> for more information
	294	on Lexeme objects and their attributes.
	295
	296	=head2 has_lexemes
	297
	298	Returns a true value if the reading has any attached lexemes.
4d9593df	299
6ad2ce78	300	=head2 lexemes
06e7cbc7	301
7cd9f181	302	Returns the Lexeme objects (if any) attached to the reading.
6ad2ce78	303
	304	=head2 clear_lexemes
	305
7cd9f181	306	Wipes any associated Lexeme objects out of the reading.
	307
	308	=head2 add_lexeme( $lexobj )
6ad2ce78	309
7cd9f181	310	Adds the Lexeme in $lexobj to the list of lexemes.
	311
	312	=head2 lemmatize
	313
	314	If the language of the reading is set, this method will use the appropriate
	315	Language model to determine the lexemes that belong to this reading. See
	316	L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
06e7cbc7	317
4d9593df	318	=cut
4d9593df	319
6ad2ce78	320	sub lemmatize {
	321	my $self = shift;
	322	unless( $self->has_language ) {
	323	warn "Please set a language to lemmatize a tradition";
	324	return;
	325	}
	326	my $mod = "Text::Tradition::Language::" . $self->language;
	327	load( $mod );
	328	$mod->can( 'reading_lookup' )->( $self );
	329
	330	}
4d9593df	331
7604424b	332	# For graph serialization. Return a JSON representation of the associated
7cd9f181	333	# reading lexemes.
	334	sub _serialize_lexemes {
	335	my $self = shift;
7604424b	336	my $json = JSON->new->allow_blessed(1)->convert_blessed(1);
7604424b	337	return $json->encode( [ $self->lexemes ] );
7cd9f181	338	}
70745e70	339
7604424b	340	# Given a JSON representation of the lexemes, instantiate them and add
7604424b	341	# them to the reading.
70745e70	342	sub _deserialize_lexemes {
7604424b	343	my( $self, $json ) = @_;
	344	my $data = from_json( $json );
	345	return unless @$data;
70745e70	346
7604424b	347	# Need to have the lexeme module in order to have lexemes.
7604424b	348	eval { use Text::Tradition::Collation::Reading::Lexeme; };
70745e70	349	throw( $@ ) if $@;
	350
	351	# Good to go - add the lexemes.
	352	my @lexemes;
7604424b	353	foreach my $lexhash ( @$data ) {
	354	push( @lexemes, Text::Tradition::Collation::Reading::Lexeme->new(
	355	'JSON' => $lexhash ) );
70745e70	356	}
	357	$self->clear_lexemes;
	358	$self->add_lexeme( @lexemes );
	359	}
7cd9f181	360
4d9593df	361	## Utility methods
4d9593df	362
2acf0892	363	sub TO_JSON {
	364	my $self = shift;
	365	return $self->text;
	366	}
	367
70745e70	368	sub throw {
	369	Text::Tradition::Error->throw(
	370	'ident' => 'Reading error',
	371	'message' => $_[0],
	372	);
	373	}
4d9593df	374
	375	no Moose;
	376	__PACKAGE__->meta->make_immutable;
	377
021bdbac	378	1;