From: Tara L Andrews Date: Thu, 12 Jul 2012 23:12:20 +0000 (+0200) Subject: add some more data to the analysis structure X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=354cc918ae96a100b8bbb332aba3ff1023a16f0a;p=scpubgit%2Fstemmatology.git add some more data to the analysis structure --- diff --git a/lib/Text/Tradition/Analysis.pm b/lib/Text/Tradition/Analysis.pm index 85fe90a..c51f3fd 100644 --- a/lib/Text/Tradition/Analysis.pm +++ b/lib/Text/Tradition/Analysis.pm @@ -8,6 +8,7 @@ use Exporter 'import'; use Graph; use JSON qw/ encode_json decode_json /; use LWP::UserAgent; +use Text::LevenshteinXS qw/ distance /; use Text::Tradition; use Text::Tradition::Stemma; use TryCatch; @@ -669,7 +670,6 @@ sub analyze_location { my $subgraph = {}; my $acstr = $c->ac_label; my @acwits; - $DB::single = 1 if $variant_row->{id} == 87; # Note which witnesses positively belong to which group foreach my $rdghash ( @{$variant_row->{'readings'}} ) { my $rid = $rdghash->{'readingid'}; @@ -745,19 +745,36 @@ sub analyze_location { # Resolve the relationship of the parent to the reading, and # save it in our hash. my $pobj = $c->reading( $p ); - my $relation; my $prep = $pobj ? $pobj->id . ' (' . $pobj->text . ')' : $p; + my $phash = { 'label' => $prep }; if( $pobj ) { my $rel = $c->get_relationship( $p, $rdghash->{readingid} ); if( $rel ) { - $relation = { type => $rel->type }; + $phash->{relation} = { type => $rel->type }; if( $rel->has_annotation ) { - $relation->{'annotation'} = $rel->annotation; + $phash->{relation}->{'annotation'} = $rel->annotation; + } + } elsif( $rdghash->{readingid} eq '(omitted)' ) { + $phash->{relation} = { type => 'deletion' }; + } elsif( $rdghash->{text} ) { + # Check for sheer word similarity. + my $rtext = $rdghash->{text}; + my $ptext = $pobj->text; + my $min = length( $rtext ) > length( $ptext ) + ? length( $ptext ) : length( $rtext ); + my $distance = distance( $rtext, $ptext ); + if( $distance < $min ) { + $phash->{relation} = { type => 'wordsimilar' }; } } + # Get the attributes of the parent object while we are here + $phash->{'text'} = $pobj->text if $pobj; + $phash->{'is_nonsense'} = $pobj->is_nonsense; + $phash->{'is_ungrammatical'} = $pobj->grammar_invalid; + } elsif( $p eq '(omitted)' ) { + $phash->{relation} = { type => 'addition' }; } - my $phash = { 'label' => $prep, 'relation' => $relation }; - $phash->{'text'} = $pobj->text if $pobj; + # Save it $rdgparents->{$p} = $phash; }