[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / Reading.pm

package Text::Tradition::Collation::Reading;

use Moose;
use Module::Load;
use YAML::XS;
use overload '""' => \&_stringify, 'fallback' => 1;

=head1 NAME

Text::Tradition::Collation::Reading - represents a reading (usually a word)
in a collation.

=head1 DESCRIPTION

Text::Tradition is a library for representation and analysis of collated
texts, particularly medieval ones.  A 'reading' refers to a unit of text,
usually a word, that appears in one or more witnesses (manuscripts) of the
tradition; the text of a given witness is composed of a set of readings in
a particular sequence

=head1 METHODS

=head2 new

Creates a new reading in the given collation with the given attributes.
Options include:

=over 4

=item collation - The Text::Tradition::Collation object to which this
reading belongs.  Required.

=item id - A unique identifier for this reading. Required.

=item text - The word or other text of the reading.

=item is_start - The reading is the starting point for the collation.

=item is_end - The reading is the ending point for the collation.

=item is_lacuna - The 'reading' represents a known gap in the text.

=item is_ph - A temporary placeholder for apparatus parsing purposes.  Do
not use unless you know what you are doing.

=item rank - The sequence number of the reading. This should probably not
be set manually.

=back

One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.

=head2 collation

=head2 id

=head2 text

=head2 is_start

=head2 is_end

=head2 is_lacuna

=head2 rank

Accessor methods for the given attributes.

=cut

has 'collation' => (
	is => 'ro',
	isa => 'Text::Tradition::Collation',
	# required => 1,
	weak_ref => 1,
	);

has 'id' => (
	is => 'ro',
	isa => 'Str',
	required => 1,
	);

has 'text' => (
	is => 'ro',
	isa => 'Str',
	required => 1,
	writer => 'alter_text',
	);
	
has 'language' => (
	is => 'ro',
	isa => 'Str',
	predicate => 'has_language',
	);
	
has 'is_start' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);

has 'is_end' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);
    
has 'is_lacuna' => (
    is => 'ro',
    isa => 'Bool',
	default => undef,
    );
    
has 'is_ph' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);
	
has 'is_common' => (
	is => 'rw',
	isa => 'Bool',
	default => undef,
	);

has 'rank' => (
    is => 'rw',
    isa => 'Int',
    predicate => 'has_rank',
    clearer => 'clear_rank',
    );
    
## For morphological analysis

has 'normal_form' => (
	is => 'rw',
	isa => 'Str',
	predicate => 'has_normal_form',
	);

# Holds the lexemes for the reading.
has 'reading_lexemes' => (
	traits => ['Array'],
	isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
	handles => {
		lexemes => 'elements',
		has_lexemes => 'count',
		clear_lexemes => 'clear',
		add_lexeme => 'push',
		},
	default => sub { [] },
	);
	
## For prefix/suffix readings

has 'join_prior' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);
	
has 'join_next' => (
	is => 'ro',
	isa => 'Bool',
	default => undef,
	);


around BUILDARGS => sub {
	my $orig = shift;
	my $class = shift;
	my $args;
	if( @_ == 1 ) {
		$args = shift;
	} else {
		$args = { @_ };
	}
			
	# If one of our special booleans is set, we change the text and the
	# ID to match.
	if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
		$args->{'text'} = '#LACUNA#';
	} elsif( exists $args->{'is_start'} ) {
		$args->{'id'} = '#START#';  # Change the ID to ensure we have only one
		$args->{'text'} = '#START#';
		$args->{'rank'} = 0;
	} elsif( exists $args->{'is_end'} ) {
		$args->{'id'} = '#END#';	# Change the ID to ensure we have only one
		$args->{'text'} = '#END#';
	} elsif( exists $args->{'is_ph'} ) {
		$args->{'text'} = $args->{'id'};
	}
	
	$class->$orig( $args );
};

=head2 is_meta

A meta attribute (ha ha), which should be true if any of our 'special'
booleans are true.  Implies that the reading does not represent a bit 
of text found in a witness.

=cut

sub is_meta {
	my $self = shift;
	return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph;	
}

=head1 Convenience methods

=head2 related_readings

Calls Collation's related_readings with $self as the first argument.

=cut

sub related_readings {
	my $self = shift;
	return $self->collation->related_readings( $self, @_ );
}

=head2 witnesses 

Calls Collation's reading_witnesses with $self as the first argument.

=cut

sub witnesses {
	my $self = shift;
	return $self->collation->reading_witnesses( $self, @_ );
}

=head2 predecessors

Returns a list of Reading objects that immediately precede $self in the collation.

=cut

sub predecessors {
	my $self = shift;
	my @pred = $self->collation->sequence->predecessors( $self->id );
	return map { $self->collation->reading( $_ ) } @pred;
}

=head2 successors

Returns a list of Reading objects that immediately follow $self in the collation.

=cut

sub successors {
	my $self = shift;
	my @succ = $self->collation->sequence->successors( $self->id );
	return map { $self->collation->reading( $_ ) } @succ;
}

=head2 set_identical( $other_reading)

Backwards compatibility method, to add a transposition relationship
between $self and $other_reading.  Don't use this.

=cut

sub set_identical {
	my( $self, $other ) = @_;
	return $self->collation->add_relationship( $self, $other, 
		{ 'type' => 'transposition' } );
}

sub _stringify {
	my $self = shift;
	return $self->id;
}

=head1 MORPHOLOGY

Methods for the morphological information (if any) attached to readings.
A reading may be made up of multiple lexemes; the concatenated lexeme
strings ought to match the reading's normalized form.
 
See L<Text::Tradition::Collation::Reading::Lexeme> for more information
on Lexeme objects and their attributes.

=head2 has_lexemes

Returns a true value if the reading has any attached lexemes.

=head2 lexemes

Returns the Lexeme objects (if any) attached to the reading.

=head2 clear_lexemes

Wipes any associated Lexeme objects out of the reading.

=head2 add_lexeme( $lexobj )

Adds the Lexeme in $lexobj to the list of lexemes.

=head2 lemmatize

If the language of the reading is set, this method will use the appropriate
Language model to determine the lexemes that belong to this reading.  See
L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.

=cut

sub lemmatize {
	my $self = shift;
	unless( $self->has_language ) {
		warn "Please set a language to lemmatize a tradition";
		return;
	}
	my $mod = "Text::Tradition::Language::" . $self->language;
	load( $mod );
	$mod->can( 'reading_lookup' )->( $self );

}

# For graph serialization. Return a string representation of the associated
# reading lexemes.
sub _serialize_lexemes {
	my $self = shift;
	my @lexstrs;
	foreach my $l ( $self->lexemes ) {
		my @mf;
		foreach my $wf ( $l->matching_forms ) {
			push( @mf, $wf->to_string );
		}
		my $form = $l->form ? $l->form->to_string : '';
		push( @lexstrs, join( '|L|', $l->language, $l->string, $form, 
			join( '|M|', @mf ) ) );
	}
	return join( '|R|', @lexstrs );
}
		

## Utility methods

sub TO_JSON {
	my $self = shift;
	return $self->text;
}

## TODO will need a throw() here

no Moose;
__PACKAGE__->meta->make_immutable;

1;
Commit	Line	Data
784877d9	1	package Text::Tradition::Collation::Reading;
784877d9	2
8e1394aa	3	use Moose;
6ad2ce78	4	use Module::Load;
7cd9f181	5	use YAML::XS;
e4b0f464	6	use overload '""' => \&_stringify, 'fallback' => 1;
784877d9	7
3a2ebbf4	8	=head1 NAME
784877d9	9
4aea6e9b	10	Text::Tradition::Collation::Reading - represents a reading (usually a word)
	11	in a collation.
	12
3a2ebbf4	13	=head1 DESCRIPTION
784877d9	14
3a2ebbf4	15	Text::Tradition is a library for representation and analysis of collated
	16	texts, particularly medieval ones. A 'reading' refers to a unit of text,
	17	usually a word, that appears in one or more witnesses (manuscripts) of the
	18	tradition; the text of a given witness is composed of a set of readings in
	19	a particular sequence
784877d9	20
3a2ebbf4	21	=head1 METHODS
1ca1163d	22
3a2ebbf4	23	=head2 new
8e1394aa	24
4aea6e9b	25	Creates a new reading in the given collation with the given attributes.
3a2ebbf4	26	Options include:
94c00c71	27
3a2ebbf4	28	=over 4
784877d9	29
4aea6e9b	30	=item collation - The Text::Tradition::Collation object to which this
4aea6e9b	31	reading belongs. Required.
e2902068	32
3a2ebbf4	33	=item id - A unique identifier for this reading. Required.
910a0a6d	34
3a2ebbf4	35	=item text - The word or other text of the reading.
784877d9	36
3a2ebbf4	37	=item is_start - The reading is the starting point for the collation.
3265b0ce	38
3a2ebbf4	39	=item is_end - The reading is the ending point for the collation.
784877d9	40
3a2ebbf4	41	=item is_lacuna - The 'reading' represents a known gap in the text.
de51424a	42
4aea6e9b	43	=item is_ph - A temporary placeholder for apparatus parsing purposes. Do
4aea6e9b	44	not use unless you know what you are doing.
12720144	45
4aea6e9b	46	=item rank - The sequence number of the reading. This should probably not
4aea6e9b	47	be set manually.
d047cd52	48
3a2ebbf4	49	=back
8e1394aa	50
3a2ebbf4	51	One of 'text', 'is_start', 'is_end', or 'is_lacuna' is required.
8e1394aa	52
3a2ebbf4	53	=head2 collation
94c00c71	54
3a2ebbf4	55	=head2 id
94c00c71	56
3a2ebbf4	57	=head2 text
4cdd82f1	58
3a2ebbf4	59	=head2 is_start
4cdd82f1	60
3a2ebbf4	61	=head2 is_end
4a8828f0	62
3a2ebbf4	63	=head2 is_lacuna
4a8828f0	64
3a2ebbf4	65	=head2 rank
4a8828f0	66
3a2ebbf4	67	Accessor methods for the given attributes.
d047cd52	68
3a2ebbf4	69	=cut
d047cd52	70
3a2ebbf4	71	has 'collation' => (
	72	is => 'ro',
	73	isa => 'Text::Tradition::Collation',
	74	# required => 1,
	75	weak_ref => 1,
	76	);
d047cd52	77
3a2ebbf4	78	has 'id' => (
	79	is => 'ro',
	80	isa => 'Str',
	81	required => 1,
	82	);
d047cd52	83
3a2ebbf4	84	has 'text' => (
	85	is => 'ro',
	86	isa => 'Str',
	87	required => 1,
49d4f2ac	88	writer => 'alter_text',
3a2ebbf4	89	);
0e47f4f6	90
fae52efd	91	has 'language' => (
	92	is => 'ro',
	93	isa => 'Str',
6ad2ce78	94	predicate => 'has_language',
fae52efd	95	);
fae52efd	96
3a2ebbf4	97	has 'is_start' => (
	98	is => 'ro',
	99	isa => 'Bool',
	100	default => undef,
	101	);
	102
	103	has 'is_end' => (
	104	is => 'ro',
	105	isa => 'Bool',
	106	default => undef,
	107	);
	108
	109	has 'is_lacuna' => (
	110	is => 'ro',
	111	isa => 'Bool',
	112	default => undef,
	113	);
12720144	114
	115	has 'is_ph' => (
	116	is => 'ro',
	117	isa => 'Bool',
	118	default => undef,
	119	);
d4b75f44	120
	121	has 'is_common' => (
	122	is => 'rw',
	123	isa => 'Bool',
	124	default => undef,
	125	);
3a2ebbf4	126
	127	has 'rank' => (
	128	is => 'rw',
	129	isa => 'Int',
	130	predicate => 'has_rank',
ca6e6095	131	clearer => 'clear_rank',
3a2ebbf4	132	);
fd602649	133
	134	## For morphological analysis
	135
	136	has 'normal_form' => (
	137	is => 'rw',
	138	isa => 'Str',
	139	predicate => 'has_normal_form',
	140	);
	141
7cd9f181	142	# Holds the lexemes for the reading.
d3e7842a	143	has 'reading_lexemes' => (
4d9593df	144	traits => ['Array'],
d3e7842a	145	isa => 'ArrayRef[Text::Tradition::Collation::Reading::Lexeme]',
4d9593df	146	handles => {
4d9593df	147	lexemes => 'elements',
cca4f996	148	has_lexemes => 'count',
d3e7842a	149	clear_lexemes => 'clear',
d3e7842a	150	add_lexeme => 'push',
4d9593df	151	},
d3e7842a	152	default => sub { [] },
fd602649	153	);
fd602649	154
629e27b0	155	## For prefix/suffix readings
	156
	157	has 'join_prior' => (
	158	is => 'ro',
	159	isa => 'Bool',
	160	default => undef,
	161	);
	162
	163	has 'join_next' => (
	164	is => 'ro',
	165	isa => 'Bool',
	166	default => undef,
	167	);
	168
3a2ebbf4	169
	170	around BUILDARGS => sub {
	171	my $orig = shift;
	172	my $class = shift;
	173	my $args;
	174	if( @_ == 1 ) {
	175	$args = shift;
	176	} else {
	177	$args = { @_ };
	178	}
b0b4421a	179
3a2ebbf4	180	# If one of our special booleans is set, we change the text and the
3a2ebbf4	181	# ID to match.
1d310495	182	if( exists $args->{'is_lacuna'} && !exists $args->{'text'} ) {
56eefa04	183	$args->{'text'} = '#LACUNA#';
3a2ebbf4	184	} elsif( exists $args->{'is_start'} ) {
	185	$args->{'id'} = '#START#'; # Change the ID to ensure we have only one
	186	$args->{'text'} = '#START#';
	187	$args->{'rank'} = 0;
	188	} elsif( exists $args->{'is_end'} ) {
	189	$args->{'id'} = '#END#'; # Change the ID to ensure we have only one
	190	$args->{'text'} = '#END#';
12720144	191	} elsif( exists $args->{'is_ph'} ) {
12720144	192	$args->{'text'} = $args->{'id'};
3a2ebbf4	193	}
	194
	195	$class->$orig( $args );
	196	};
	197
	198	=head2 is_meta
	199
	200	A meta attribute (ha ha), which should be true if any of our 'special'
	201	booleans are true. Implies that the reading does not represent a bit
	202	of text found in a witness.
	203
	204	=cut
	205
	206	sub is_meta {
	207	my $self = shift;
12720144	208	return $self->is_start \|\| $self->is_end \|\| $self->is_lacuna \|\| $self->is_ph;
3a2ebbf4	209	}
3a2ebbf4	210
027d819c	211	=head1 Convenience methods
	212
	213	=head2 related_readings
	214
	215	Calls Collation's related_readings with $self as the first argument.
	216
	217	=cut
	218
3a2ebbf4	219	sub related_readings {
	220	my $self = shift;
	221	return $self->collation->related_readings( $self, @_ );
	222	}
	223
7f52eac8	224	=head2 witnesses
	225
	226	Calls Collation's reading_witnesses with $self as the first argument.
	227
	228	=cut
	229
	230	sub witnesses {
	231	my $self = shift;
	232	return $self->collation->reading_witnesses( $self, @_ );
	233	}
	234
027d819c	235	=head2 predecessors
	236
	237	Returns a list of Reading objects that immediately precede $self in the collation.
	238
	239	=cut
	240
22222af9	241	sub predecessors {
	242	my $self = shift;
	243	my @pred = $self->collation->sequence->predecessors( $self->id );
	244	return map { $self->collation->reading( $_ ) } @pred;
	245	}
	246
027d819c	247	=head2 successors
	248
	249	Returns a list of Reading objects that immediately follow $self in the collation.
	250
	251	=cut
	252
22222af9	253	sub successors {
	254	my $self = shift;
	255	my @succ = $self->collation->sequence->successors( $self->id );
	256	return map { $self->collation->reading( $_ ) } @succ;
	257	}
	258
027d819c	259	=head2 set_identical( $other_reading)
	260
	261	Backwards compatibility method, to add a transposition relationship
	262	between $self and $other_reading. Don't use this.
	263
	264	=cut
	265
1d310495	266	sub set_identical {
	267	my( $self, $other ) = @_;
	268	return $self->collation->add_relationship( $self, $other,
	269	{ 'type' => 'transposition' } );
	270	}
	271
3a2ebbf4	272	sub _stringify {
	273	my $self = shift;
	274	return $self->id;
	275	}
d047cd52	276
4d9593df	277	=head1 MORPHOLOGY
4d9593df	278
7cd9f181	279	Methods for the morphological information (if any) attached to readings.
	280	A reading may be made up of multiple lexemes; the concatenated lexeme
	281	strings ought to match the reading's normalized form.
	282
	283	See L<Text::Tradition::Collation::Reading::Lexeme> for more information
	284	on Lexeme objects and their attributes.
	285
	286	=head2 has_lexemes
	287
	288	Returns a true value if the reading has any attached lexemes.
4d9593df	289
6ad2ce78	290	=head2 lexemes
06e7cbc7	291
7cd9f181	292	Returns the Lexeme objects (if any) attached to the reading.
6ad2ce78	293
	294	=head2 clear_lexemes
	295
7cd9f181	296	Wipes any associated Lexeme objects out of the reading.
	297
	298	=head2 add_lexeme( $lexobj )
6ad2ce78	299
7cd9f181	300	Adds the Lexeme in $lexobj to the list of lexemes.
	301
	302	=head2 lemmatize
	303
	304	If the language of the reading is set, this method will use the appropriate
	305	Language model to determine the lexemes that belong to this reading. See
	306	L<Text::Tradition::lemmatize> if you wish to lemmatize an entire tradition.
06e7cbc7	307
4d9593df	308	=cut
4d9593df	309
6ad2ce78	310	sub lemmatize {
	311	my $self = shift;
	312	unless( $self->has_language ) {
	313	warn "Please set a language to lemmatize a tradition";
	314	return;
	315	}
	316	my $mod = "Text::Tradition::Language::" . $self->language;
	317	load( $mod );
	318	$mod->can( 'reading_lookup' )->( $self );
	319
	320	}
4d9593df	321
7cd9f181	322	# For graph serialization. Return a string representation of the associated
	323	# reading lexemes.
	324	sub _serialize_lexemes {
	325	my $self = shift;
c3e04fb5	326	my @lexstrs;
	327	foreach my $l ( $self->lexemes ) {
	328	my @mf;
	329	foreach my $wf ( $l->matching_forms ) {
	330	push( @mf, $wf->to_string );
	331	}
	332	my $form = $l->form ? $l->form->to_string : '';
	333	push( @lexstrs, join( '\|L\|', $l->language, $l->string, $form,
	334	join( '\|M\|', @mf ) ) );
	335	}
	336	return join( '\|R\|', @lexstrs );
7cd9f181	337	}
	338
	339
4d9593df	340	## Utility methods
4d9593df	341
2acf0892	342	sub TO_JSON {
	343	my $self = shift;
	344	return $self->text;
	345	}
	346
4d9593df	347	## TODO will need a throw() here
	348
	349	no Moose;
	350	__PACKAGE__->meta->make_immutable;
	351
021bdbac	352	1;