Commit | Line | Data |
6ad2ce78 |
1 | #!/usr/bin/perl -w |
2 | |
3 | use strict; |
4 | use Test::More 'no_plan'; |
5 | $| = 1; |
6 | |
7 | |
8 | |
9 | # =begin testing |
10 | { |
11 | binmode STDOUT, ':utf8'; |
12 | use Text::Tradition; |
13 | use_ok( 'Text::Tradition::Language::French' ); |
14 | |
15 | eval "use Flemm"; |
16 | my $err = $@; |
17 | |
18 | SKIP: { |
19 | skip "Package Flemm not found" if $err; |
20 | my $tf = Text::Tradition->new( |
21 | 'input' => 'Self', |
22 | 'file' => 't/data/besoin.xml', |
23 | 'language' => 'French' ); |
24 | |
25 | is( $tf->language, 'French', "Set language okay" ); |
26 | $tf->lemmatize(); |
27 | # Test the lemmatization. How many readings now have morphological info? |
28 | # Do the lexemes match the reading? |
29 | my $ambig = 0; |
f8862b58 |
30 | my $flemmed = 0; |
6ad2ce78 |
31 | foreach my $r ( $tf->collation->readings ) { |
32 | next if $r->is_meta; |
33 | ok( $r->has_lexemes, "Reading $r has one or more lexemes" ); |
34 | my @lex = $r->lexemes; |
35 | my $lexstr = join( '', map { $_->string } @lex ); |
36 | my $textstr = $r->text; |
37 | $textstr =~ s/\s+//g; |
38 | is( $textstr, $lexstr, "Lexemes for reading $r match the reading" ); |
39 | foreach my $l ( @lex ) { |
f8862b58 |
40 | # Check to see if Flemm actually ran |
41 | foreach my $wf ( $l->matching_forms ) { |
42 | $flemmed++ if $wf->morphology->get_feature('num'); |
43 | } |
6ad2ce78 |
44 | next if $l->is_disambiguated; |
6ad2ce78 |
45 | $ambig++; |
46 | } |
47 | } |
48 | is( $ambig, 102, "Found 102 ambiguous forms as expected" ); |
f8862b58 |
49 | ok( $flemmed > 500, "Found enough Flemm info in wordforms" ); |
6ad2ce78 |
50 | |
51 | # Try setting the normal form of a reading and re-analyzing |
e0f6836a |
52 | my $mr = $tf->collation->reading('r99.2'); |
6ad2ce78 |
53 | is( $mr->text, 'minspire', "Picked correct test reading" ); |
54 | is( $mr->language, 'French', "Reading has correct language setting" ); |
55 | $mr->normal_form( "m'inspire" ); |
56 | $mr->lemmatize; |
f8862b58 |
57 | my @l = $mr->lexemes; |
58 | is( @l, 2, "Got two lexemes for new m'inspire reading" ); |
59 | is( $l[0]->form->to_string, |
60 | '"French // se|le|lui // cat@pron type@pers pers@1 num@sing case@acc|dat"', |
61 | "New reading has correct first lexeme" ); |
6ad2ce78 |
62 | } |
63 | } |
64 | |
65 | |
66 | |
67 | |
68 | 1; |