[scpubgit/stemmatology.git] / lib / Text / Tradition / Language / French.pm

package Text::Tradition::Language::French;

use Encode qw/ encode_utf8 decode_utf8 /;
use IPC::Run qw/ run /;
use Lingua::TagSet::Multext;
use Lingua::TagSet::TreeTagger;
use Module::Load;
use Text::Tradition::Collation::Reading::Lexeme;
use Text::Tradition::Collation::Reading::WordForm;
use TryCatch;

my $MORPHDIR = '/Users/tla/Projects/morphology';

=head1 NAME

Text::Tradition::Language::French - language-specific module for French

=head1 DESCRIPTION

Implements morphology lookup for French words in context.  This module
depends on the Flemm module for French lemmatization
(L<http://www.univ-nancy2.fr/pers/namer/Outils.htm#fl3> in conjunction with
the TreeTagger software
(L<http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/>), which is
(for now) expected to be installed in $MORPHDIR/TreeTagger.

=head1 SUBROUTINES

=head2 lemmatize( $text )

Evaluates the string using the Flemm package, and returns the results.

=begin testing

binmode STDOUT, ':utf8';
use Text::Tradition;
use_ok( 'Text::Tradition::Language::French' );

eval "use Flemm";
my $err = $@;

SKIP: {
	skip "Package Flemm not found" if $err;
	my $tf = Text::Tradition->new(
		'input' => 'Self',
		'file' => 't/data/besoin.xml',
		'language' => 'French' );
		
	is( $tf->language, 'French', "Set language okay" );
	$tf->lemmatize();
	# Test the lemmatization. How many readings now have morphological info?
	# Do the lexemes match the reading?
	my $ambig = 0;
	foreach my $r ( $tf->collation->readings ) {
		next if $r->is_meta;
		ok( $r->has_lexemes, "Reading $r has one or more lexemes" );
		my @lex = $r->lexemes;
		my $lexstr = join( '', map { $_->string } @lex );
		my $textstr = $r->text;
		$textstr =~ s/\s+//g;
		is( $textstr, $lexstr, "Lexemes for reading $r match the reading" );
		foreach my $l ( @lex ) {
			next if $l->is_disambiguated;
	# 		printf( "Ambiguous lexeme %s for reading %s:\n\t%s\n", $l->string, $r->id,
	# 			join( "\n\t", map { $_->lemma . ': ' . $_->morphology->to_string } $l->matching_forms ) );
			$ambig++;
		}
	}
	is( $ambig, 102, "Found 102 ambiguous forms as expected" );
	
	# Try setting the normal form of a reading and re-analyzing
	my $mr = $tf->collation->reading('99,2');
	is( $mr->text, 'minspire', "Picked correct test reading" );
	is( $mr->language, 'French', "Reading has correct language setting" );
	$mr->normal_form( "m'inspire" );
	$mr->lemmatize;
	is( $mr->lexemes, 2, "Got two lexemes for new m'inspire reading" );
}

=end testing

=cut

sub lemmatize {
	my $tradition = shift;

	# Given a tradition, lemmatize it witness by witness and see what we get.
	my $workdir = File::Temp->newdir();
	my $c = $tradition->collation;
	# First, clear out all existing lexemes from the readings. Save the
	# path as long as we went to the trouble of generating it.
	my %witness_paths;
	foreach my $wit ( $tradition->witnesses ) {
		my @sigla = ( $wit->sigil );
		push( @sigla, $wit->sigil . $c->ac_label ) if $wit->is_layered;
		foreach my $sig ( @sigla ) {
			my @path = grep { !$_->is_meta } 
				$c->reading_sequence( $c->start, $c->end, $sig );
			map { $_->clear_lexemes } @path;
			$witness_paths{$sig} = \@path;
		}
	}
	
	foreach my $sig ( keys %witness_paths ) {
		# Get the text as a sequence of readings and as a string
		print STDERR "Morphologizing witness $sig\n";
		_lemmatize_sequence( undef, @{$witness_paths{$sig}} );
	}
}

sub _update_reading_lexemes {
	my( $replace, $reading, @lexemes ) = @_;
	if( $reading->has_lexemes && !$replace ) {
		# We need to merge what is in @lexemes with what we have already.
		my @oldlex = $reading->lexemes;
		my $cmp1 = join( '||', map { $_->string } @oldlex );
		my $cmp2 = join( '||', map { $_->string } @lexemes );
		if ( @oldlex == @lexemes && $cmp1 eq $cmp2 ) {
			# The lexeme strings are the same, so merge the possible
			# word forms from new to old.
			foreach my $i ( 0 .. $#lexemes ) {
				my $ol = $oldlex[$i];
				my $nl = $lexemes[$i];
				my %ofw;
				map { $ofw{$_->to_string} = 1 } $ol->matching_forms;
				foreach my $form ( $nl->matching_forms ) {
					unless( $ofw{$form->to_string} ) {
						print STDERR "Adding form " . $form->to_string . 
							" to lexeme " . $nl->string . " at $reading\n";
						$ol->add_matching_form( $form );
						$ol->is_disambiguated(0);
					}
				}
			}
		} else {
			warn "Lexeme layout for $reading changed; replacing the lot";
			$reading->clear_lexemes;
			$reading->add_lexeme( @lexemes );
		}
	} else {
		$reading->clear_lexemes if $replace;
		$reading->add_lexeme( @lexemes );
	}
}

=head2 reading_lookup( $rdg[, $rdg, ...] )

Looks up one or more readings using the Flemm package, and returns the
possible results.  This uses the same logic as L<lemmatize> above for the
entire tradition, but can also be used to (re-)analyze individual readings.

=cut

sub reading_lookup {
	return _lemmatize_sequence( 1, @_ );
}

sub _lemmatize_sequence {
	my( $replace, @path ) = @_;
	$DB::single = 1 if $replace;
	my $tagresult = _treetag_string( _text_from_path( 1, @path ) );
	if( $tagresult ) {
		# Map the tagged words onto the original readings, splitting 
		# them up into lexemes where necessary.
		# NOTE we can have multiple lexemes in a reading, but not
		# multiple readings to a lexeme.
		my @tags = split( /\n/, $tagresult );
		my @lexemes;
		my $curr_rdg = shift @path;
		my @curr_lexemes;
		my $unused_rdg_part;
		foreach my $tag ( @tags ) {
			# Get the original word
			my( $lexeme, @rest ) = split( /\t/, $tag );
			# Lemmatize the whole
			my @forms = _parse_wordform( _flemm_lookup( $tag ) );
			my $lexobj = Text::Tradition::Collation::Reading::Lexeme->new(
				'string' => $lexeme, 'language' => 'French',
				'wordform_matchlist' => \@forms );
			# Find the next non-meta reading
			while( $curr_rdg && $curr_rdg->is_meta ) {
				$curr_rdg = shift @path;
			}
			unless( $curr_rdg ) {
				warn "Ran out of readings in sequence for " . $wit->sigil
					. " at $lexeme";
				last;
			}
			my $curr_rdg_text = $curr_rdg->has_normal_form 
				? $curr_rdg->normal_form : $curr_rdg->text;
			if( $unused_rdg_part &&
				$unused_rdg_part =~ /^\Q$lexeme\E(\s*)(.*)$/ ) {
				# Nth part of curr_rdg
				$unused_rdg_part = $2;
				push( @curr_lexemes, $lexobj );
			} elsif( $curr_rdg_text =~ /^\Q$lexeme\E(\s*)(.*)$/ ) {
				# Flag an error if there is already an unused reading part.
				warn "Skipped over unused text $unused_rdg_part at $curr_rdg"
					if $unused_rdg_part;
				$unused_rdg_part = $2; # will be empty if the whole reading matched
				push( @curr_lexemes, $lexobj );
			} else {
				# We do not cope with the idea of a lexeme being 
				# spread across multiple readings.
				warn "Word sequence changed unexpectedly in text";
				# See if we can find a matching reading
				my @lookahead;
				my $matched;
				while( my $nr = shift @path ) {
					my $nrtext = $nr->has_normal_form ? $nr->normal_form : $nr->text;
					if( $nrtext =~ /^\Q$lexeme\E/ ) {
						$curr_rdg = $lookahead[-1] if @lookahead;
						$matched = 1;
						last;
					} else {
						push( @lookahead, $nr );
					}
				}
				# No match? Restore the state we had
				unless( $matched ) {
					unshift( @path, @lookahead );
				}
				# Trigger a move
				$unused_rdg_part = '';
			}
			
			unless( $unused_rdg_part ) {
				# Record the lexemes for the given reading.
				#print STDERR sprintf( "Adding lexeme(s) %s to reading %s (%s)\n",
				#	join( ' ', map { $_->string } @curr_lexemes ),
				#	$curr_rdg->id, $curr_rdg->text );
				_update_reading_lexemes( $replace, $curr_rdg, @curr_lexemes );
				$curr_rdg = shift @path;
				@curr_lexemes = ();
			}
		}
	}
}

# Utility function so that we can cheat and use it when we need both the path
# and its text.
sub _text_from_path {
	my( $normalize, @path ) = @_;
	my $pathtext = '';
	my $last;
	foreach my $r ( @path ) {
		unless ( $r->join_prior || !$last || $last->join_next ) {
			$pathtext .= ' ';
		} 
		$pathtext .= ( $normalize && $r->has_normal_form ) 
			? $r->normal_form : $r->text;
		$last = $r;
	}
	return $pathtext;
}

# Utility function that actually calls the tree tagger.
sub _treetag_string {
	my( $text ) = @_;
	my $wittext = encode_utf8( $text );
	# Then see if we have TreeTagger
	my $taggercmd = "$MORPHDIR/TreeTagger/cmd/tree-tagger-french-utf8";
	unless( -f $taggercmd ) {
		warn "Cannot do French word lemmatization without TreeTagger";
		return;
	}
	# OK, we can run it then.
	my @cmd = ( $taggercmd );
	my( $tagresult, $err ); # Capture the output and error
	run( \@cmd, \$wittext, \$tagresult, \$err );
	# TODO check for error
	return decode_utf8( $tagresult );
}

# Closure and utility function for the package lemmatizer
{
	my $lemmatizer;
	
	sub _flemm_lookup {
		# First try to load Flemm
		unless( $lemmatizer ) {
			try {
				load 'Flemm';
				$lemmatizer = Flemm->new( 'Encoding' => 'utf8', 'Tagger' => 'treetagger' );
			} catch {
				warn "Cannot do French word lemmatization without Flemm: @_";
				return;
			}
		}
		return $lemmatizer->lemmatize( @_ )
	}
	
}

# Utility function to turn a Flemm result into a WordForm
sub _parse_wordform {
	my $flemmobj = shift;
	# For now just parse the string, until we make sense of the documentation.
	my @results = split( / \|\| /, $flemmobj->getResult );
	my @forms;
	foreach ( @results ) {
		my( $orig, $tag, $lemma ) = split( /\t/, $_ );
		my( $pos, $morph ) = split( /:/, $tag );
		my $morphobj;
		if( $morph ) {
			$morphobj = Lingua::TagSet::Multext->tag2structure( $morph );
		} else {
			# Use the TreeTagger info if there is no Flemm morphology.
			$morphobj = Lingua::TagSet::TreeTagger->tag2structure( $pos );
		}
		if( $morphobj ) {
			my $wf = Text::Tradition::Collation::Reading::WordForm->new(
				'language' => 'French',
				'lemma' => $lemma,
				'morphology' => $morphobj,
				);
			push( @forms, $wf );
		} else {
			warn "No morphology found for word: $_";
		}
	}
	return @forms;
}

1;

=head2 TODO

=over

=item * Handle package dependencies more gracefully

=back

=head1 LICENSE

This package is free software and is provided "as is" without express
or implied warranty.  You can redistribute it and/or modify it under
the same terms as Perl itself.

=head1 AUTHOR

Tara L Andrews E<lt>aurum@cpan.orgE<gt>
Commit	Line	Data
cca4f996	1	package Text::Tradition::Language::French;
cca4f996	2
d3e7842a	3	use Encode qw/ encode_utf8 decode_utf8 /;
6ad2ce78	4	use IPC::Run qw/ run /;
	5	use Lingua::TagSet::Multext;
	6	use Lingua::TagSet::TreeTagger;
d3e7842a	7	use Module::Load;
d3e7842a	8	use Text::Tradition::Collation::Reading::Lexeme;
cca4f996	9	use Text::Tradition::Collation::Reading::WordForm;
d3e7842a	10	use TryCatch;
	11
	12	my $MORPHDIR = '/Users/tla/Projects/morphology';
cca4f996	13
	14	=head1 NAME
	15
6ad2ce78	16	Text::Tradition::Language::French - language-specific module for French
cca4f996	17
	18	=head1 DESCRIPTION
	19
6ad2ce78	20	Implements morphology lookup for French words in context. This module
	21	depends on the Flemm module for French lemmatization
	22	(L<http://www.univ-nancy2.fr/pers/namer/Outils.htm#fl3> in conjunction with
	23	the TreeTagger software
	24	(L<http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/>), which is
	25	(for now) expected to be installed in $MORPHDIR/TreeTagger.
cca4f996	26
	27	=head1 SUBROUTINES
	28
	29	=head2 lemmatize( $text )
	30
	31	Evaluates the string using the Flemm package, and returns the results.
	32
6ad2ce78	33	=begin testing
	34
	35	binmode STDOUT, ':utf8';
	36	use Text::Tradition;
	37	use_ok( 'Text::Tradition::Language::French' );
	38
	39	eval "use Flemm";
	40	my $err = $@;
	41
	42	SKIP: {
	43	skip "Package Flemm not found" if $err;
	44	my $tf = Text::Tradition->new(
	45	'input' => 'Self',
	46	'file' => 't/data/besoin.xml',
	47	'language' => 'French' );
	48
	49	is( $tf->language, 'French', "Set language okay" );
	50	$tf->lemmatize();
	51	# Test the lemmatization. How many readings now have morphological info?
	52	# Do the lexemes match the reading?
	53	my $ambig = 0;
	54	foreach my $r ( $tf->collation->readings ) {
	55	next if $r->is_meta;
	56	ok( $r->has_lexemes, "Reading $r has one or more lexemes" );
	57	my @lex = $r->lexemes;
	58	my $lexstr = join( '', map { $_->string } @lex );
	59	my $textstr = $r->text;
	60	$textstr =~ s/\s+//g;
	61	is( $textstr, $lexstr, "Lexemes for reading $r match the reading" );
	62	foreach my $l ( @lex ) {
	63	next if $l->is_disambiguated;
	64	# printf( "Ambiguous lexeme %s for reading %s:\n\t%s\n", $l->string, $r->id,
	65	# join( "\n\t", map { $_->lemma . ': ' . $_->morphology->to_string } $l->matching_forms ) );
	66	$ambig++;
	67	}
	68	}
	69	is( $ambig, 102, "Found 102 ambiguous forms as expected" );
	70
	71	# Try setting the normal form of a reading and re-analyzing
	72	my $mr = $tf->collation->reading('99,2');
	73	is( $mr->text, 'minspire', "Picked correct test reading" );
	74	is( $mr->language, 'French', "Reading has correct language setting" );
	75	$mr->normal_form( "m'inspire" );
	76	$mr->lemmatize;
	77	is( $mr->lexemes, 2, "Got two lexemes for new m'inspire reading" );
	78	}
	79
	80	=end testing
	81
cca4f996	82	=cut
	83
	84	sub lemmatize {
d3e7842a	85	my $tradition = shift;
	86
	87	# Given a tradition, lemmatize it witness by witness and see what we get.
	88	my $workdir = File::Temp->newdir();
	89	my $c = $tradition->collation;
	90	# First, clear out all existing lexemes from the readings. Save the
	91	# path as long as we went to the trouble of generating it.
	92	my %witness_paths;
	93	foreach my $wit ( $tradition->witnesses ) {
	94	my @sigla = ( $wit->sigil );
	95	push( @sigla, $wit->sigil . $c->ac_label ) if $wit->is_layered;
	96	foreach my $sig ( @sigla ) {
	97	my @path = grep { !$_->is_meta }
	98	$c->reading_sequence( $c->start, $c->end, $sig );
	99	map { $_->clear_lexemes } @path;
	100	$witness_paths{$sig} = \@path;
	101	}
	102	}
cca4f996	103
d3e7842a	104	foreach my $sig ( keys %witness_paths ) {
	105	# Get the text as a sequence of readings and as a string
	106	print STDERR "Morphologizing witness $sig\n";
6ad2ce78	107	_lemmatize_sequence( undef, @{$witness_paths{$sig}} );
d3e7842a	108	}
	109	}
	110
	111	sub _update_reading_lexemes {
6ad2ce78	112	my( $replace, $reading, @lexemes ) = @_;
6ad2ce78	113	if( $reading->has_lexemes && !$replace ) {
d3e7842a	114	# We need to merge what is in @lexemes with what we have already.
	115	my @oldlex = $reading->lexemes;
	116	my $cmp1 = join( '\|\|', map { $_->string } @oldlex );
	117	my $cmp2 = join( '\|\|', map { $_->string } @lexemes );
6ad2ce78	118	if ( @oldlex == @lexemes && $cmp1 eq $cmp2 ) {
d3e7842a	119	# The lexeme strings are the same, so merge the possible
	120	# word forms from new to old.
	121	foreach my $i ( 0 .. $#lexemes ) {
	122	my $ol = $oldlex[$i];
	123	my $nl = $lexemes[$i];
	124	my %ofw;
6ad2ce78	125	map { $ofw{$_->to_string} = 1 } $ol->matching_forms;
d3e7842a	126	foreach my $form ( $nl->matching_forms ) {
6ad2ce78	127	unless( $ofw{$form->to_string} ) {
6ad2ce78	128	print STDERR "Adding form " . $form->to_string .
d3e7842a	129	" to lexeme " . $nl->string . " at $reading\n";
	130	$ol->add_matching_form( $form );
	131	$ol->is_disambiguated(0);
	132	}
	133	}
	134	}
	135	} else {
d3e7842a	136	warn "Lexeme layout for $reading changed; replacing the lot";
	137	$reading->clear_lexemes;
	138	$reading->add_lexeme( @lexemes );
	139	}
	140	} else {
6ad2ce78	141	$reading->clear_lexemes if $replace;
d3e7842a	142	$reading->add_lexeme( @lexemes );
d3e7842a	143	}
cca4f996	144	}
cca4f996	145
6ad2ce78	146	=head2 reading_lookup( $rdg[, $rdg, ...] )
cca4f996	147
6ad2ce78	148	Looks up one or more readings using the Flemm package, and returns the
	149	possible results. This uses the same logic as L<lemmatize> above for the
	150	entire tradition, but can also be used to (re-)analyze individual readings.
cca4f996	151
	152	=cut
	153
6ad2ce78	154	sub reading_lookup {
	155	return _lemmatize_sequence( 1, @_ );
	156	}
	157
	158	sub _lemmatize_sequence {
	159	my( $replace, @path ) = @_;
	160	$DB::single = 1 if $replace;
	161	my $tagresult = _treetag_string( _text_from_path( 1, @path ) );
	162	if( $tagresult ) {
	163	# Map the tagged words onto the original readings, splitting
	164	# them up into lexemes where necessary.
	165	# NOTE we can have multiple lexemes in a reading, but not
	166	# multiple readings to a lexeme.
	167	my @tags = split( /\n/, $tagresult );
	168	my @lexemes;
	169	my $curr_rdg = shift @path;
	170	my @curr_lexemes;
	171	my $unused_rdg_part;
	172	foreach my $tag ( @tags ) {
	173	# Get the original word
	174	my( $lexeme, @rest ) = split( /\t/, $tag );
	175	# Lemmatize the whole
	176	my @forms = _parse_wordform( _flemm_lookup( $tag ) );
	177	my $lexobj = Text::Tradition::Collation::Reading::Lexeme->new(
	178	'string' => $lexeme, 'language' => 'French',
	179	'wordform_matchlist' => \@forms );
	180	# Find the next non-meta reading
	181	while( $curr_rdg && $curr_rdg->is_meta ) {
	182	$curr_rdg = shift @path;
	183	}
	184	unless( $curr_rdg ) {
	185	warn "Ran out of readings in sequence for " . $wit->sigil
	186	. " at $lexeme";
	187	last;
	188	}
	189	my $curr_rdg_text = $curr_rdg->has_normal_form
	190	? $curr_rdg->normal_form : $curr_rdg->text;
	191	if( $unused_rdg_part &&
	192	$unused_rdg_part =~ /^\Q$lexeme\E(\s)(.)$/ ) {
	193	# Nth part of curr_rdg
	194	$unused_rdg_part = $2;
	195	push( @curr_lexemes, $lexobj );
	196	} elsif( $curr_rdg_text =~ /^\Q$lexeme\E(\s)(.)$/ ) {
	197	# Flag an error if there is already an unused reading part.
	198	warn "Skipped over unused text $unused_rdg_part at $curr_rdg"
	199	if $unused_rdg_part;
	200	$unused_rdg_part = $2; # will be empty if the whole reading matched
	201	push( @curr_lexemes, $lexobj );
	202	} else {
	203	# We do not cope with the idea of a lexeme being
	204	# spread across multiple readings.
	205	warn "Word sequence changed unexpectedly in text";
	206	# See if we can find a matching reading
	207	my @lookahead;
	208	my $matched;
	209	while( my $nr = shift @path ) {
	210	my $nrtext = $nr->has_normal_form ? $nr->normal_form : $nr->text;
	211	if( $nrtext =~ /^\Q$lexeme\E/ ) {
	212	$curr_rdg = $lookahead[-1] if @lookahead;
	213	$matched = 1;
	214	last;
	215	} else {
	216	push( @lookahead, $nr );
	217	}
218	}
219	# No match? Restore the state we had
220	unless( $matched ) {
221	unshift( @path, @lookahead );
222	}
223	# Trigger a move
224	$unused_rdg_part = '';
225	}
226
227	unless( $unused_rdg_part ) {
228	# Record the lexemes for the given reading.
229	#print STDERR sprintf( "Adding lexeme(s) %s to reading %s (%s)\n",
230	# join( ' ', map { $_->string } @curr_lexemes ),
231	# $curr_rdg->id, $curr_rdg->text );
232	_update_reading_lexemes( $replace, $curr_rdg, @curr_lexemes );
233	$curr_rdg = shift @path;
234	@curr_lexemes = ();
235	}
236	}
237	}
238	}
239
240	# Utility function so that we can cheat and use it when we need both the path
241	# and its text.
242	sub _text_from_path {
243	my( $normalize, @path ) = @_;
244	my $pathtext = '';
245	my $last;
246	foreach my $r ( @path ) {
247	unless ( $r->join_prior \|\| !$last \|\| $last->join_next ) {
248	$pathtext .= ' ';
249	}
250	$pathtext .= ( $normalize && $r->has_normal_form )
251	? $r->normal_form : $r->text;
252	$last = $r;
d3e7842a	253	}
6ad2ce78	254	return $pathtext;
d3e7842a	255	}
	256
	257	# Utility function that actually calls the tree tagger.
	258	sub _treetag_string {
	259	my( $text ) = @_;
	260	my $wittext = encode_utf8( $text );
	261	# Then see if we have TreeTagger
	262	my $taggercmd = "$MORPHDIR/TreeTagger/cmd/tree-tagger-french-utf8";
	263	unless( -f $taggercmd ) {
	264	warn "Cannot do French word lemmatization without TreeTagger";
	265	return;
	266	}
	267	# OK, we can run it then.
	268	my @cmd = ( $taggercmd );
	269	my( $tagresult, $err ); # Capture the output and error
	270	run( \@cmd, \$wittext, \$tagresult, \$err );
	271	# TODO check for error
	272	return decode_utf8( $tagresult );
	273	}
	274
	275	# Closure and utility function for the package lemmatizer
	276	{
	277	my $lemmatizer;
	278
	279	sub _flemm_lookup {
	280	# First try to load Flemm
	281	unless( $lemmatizer ) {
	282	try {
	283	load 'Flemm';
	284	$lemmatizer = Flemm->new( 'Encoding' => 'utf8', 'Tagger' => 'treetagger' );
	285	} catch {
	286	warn "Cannot do French word lemmatization without Flemm: @_";
	287	return;
	288	}
	289	}
	290	return $lemmatizer->lemmatize( @_ )
	291	}
cca4f996	292
	293	}
	294
d3e7842a	295	# Utility function to turn a Flemm result into a WordForm
	296	sub _parse_wordform {
	297	my $flemmobj = shift;
	298	# For now just parse the string, until we make sense of the documentation.
	299	my @results = split( / \\|\\| /, $flemmobj->getResult );
	300	my @forms;
	301	foreach ( @results ) {
	302	my( $orig, $tag, $lemma ) = split( /\t/, $_ );
	303	my( $pos, $morph ) = split( /:/, $tag );
6ad2ce78	304	my $morphobj;
	305	if( $morph ) {
	306	$morphobj = Lingua::TagSet::Multext->tag2structure( $morph );
	307	} else {
	308	# Use the TreeTagger info if there is no Flemm morphology.
	309	$morphobj = Lingua::TagSet::TreeTagger->tag2structure( $pos );
	310	}
	311	if( $morphobj ) {
	312	my $wf = Text::Tradition::Collation::Reading::WordForm->new(
	313	'language' => 'French',
	314	'lemma' => $lemma,
	315	'morphology' => $morphobj,
	316	);
	317	push( @forms, $wf );
	318	} else {
	319	warn "No morphology found for word: $_";
	320	}
d3e7842a	321	}
	322	return @forms;
	323	}
	324
	325	1;
	326
6ad2ce78	327	=head2 TODO
	328
	329	=over
	330
	331	=item * Handle package dependencies more gracefully
	332
	333	=back
	334
cca4f996	335	=head1 LICENSE
	336
	337	This package is free software and is provided "as is" without express
	338	or implied warranty. You can redistribute it and/or modify it under
	339	the same terms as Perl itself.
	340
	341	=head1 AUTHOR
	342
	343	Tara L Andrews E<lt>aurum@cpan.orgE<gt>