From: Tara L Andrews Date: Thu, 2 Feb 2012 20:06:03 +0000 (+0100) Subject: calculate common readings when we parse X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=15db7774a381c3ffff41a26bcb9f9e7bc9e65515;p=scpubgit%2Fstemmatology.git calculate common readings when we parse --- diff --git a/lib/Text/Tradition/Parser/BaseText.pm b/lib/Text/Tradition/Parser/BaseText.pm index 96d54ff..7513135 100644 --- a/lib/Text/Tradition/Parser/BaseText.pm +++ b/lib/Text/Tradition/Parser/BaseText.pm @@ -303,7 +303,7 @@ sub merge_base { # $rel->type, $rel->from->id, $rel->to->id ); # } # } - $collation->calculate_ranks(); + $collation->calculate_common_readings(); # will implicitly rank } =item B diff --git a/lib/Text/Tradition/Parser/CollateX.pm b/lib/Text/Tradition/Parser/CollateX.pm index 0d9db22..7191f7e 100644 --- a/lib/Text/Tradition/Parser/CollateX.pm +++ b/lib/Text/Tradition/Parser/CollateX.pm @@ -156,7 +156,7 @@ sub parse { } # Rank the readings. - $collation->calculate_ranks() if $collation->linear; + $collation->calculate_common_readings(); # will implicitly rank # Save the text for each witness so that we can ensure consistency # later on diff --git a/lib/Text/Tradition/Parser/JSON.pm b/lib/Text/Tradition/Parser/JSON.pm index 3e30651..cc960b4 100644 --- a/lib/Text/Tradition/Parser/JSON.pm +++ b/lib/Text/Tradition/Parser/JSON.pm @@ -195,6 +195,7 @@ sub make_nodes { my( $c, $idx, @tokens ) = @_; my %unique; my @readings; + my $commonctr = 0; foreach my $j ( 0 .. $#tokens ) { if( $tokens[$j] ) { my $word = _restore_punct( $tokens[$j] ); @@ -203,17 +204,30 @@ sub make_nodes { $rdg = $unique{$word}; } else { my %args = ( 'id' => join( ',', $idx, $j+1 ), + 'rank' => $idx, 'text' => $word, 'collation' => $c ); - $args{'is_lacuna'} = 1 if $word eq '#LACUNA#'; + if( $word eq '#LACUNA#' ) { + $args{'is_lacuna'} = 1 + } else { + $commonctr++; + } $rdg = Text::Tradition::Collation::Reading->new( %args ); $unique{$word} = $rdg; } push( @readings, $rdg ); } else { + $commonctr++; push( @readings, undef ); } } + if( $commonctr == 1 ) { + # Whichever reading isn't a lacuna is a common node. + foreach my $rdg ( values %unique ) { + next if $rdg->is_lacuna; + $rdg->is_common( 1 ); + } + } map { $c->add_reading( $_ ) } values( %unique ); return @readings; } diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index b1082f3..ca58c33 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -118,11 +118,11 @@ if( $t ) { =cut my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, - $START_KEY, $END_KEY, $LACUNA_KEY, + $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY, $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY, $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY ) = qw/ id text identical rank - is_start is_end is_lacuna + is_start is_end is_lacuna is_common source target witness extra relationship scope non_correctable non_independent /; @@ -167,6 +167,7 @@ sub parse { my $reading_options = { 'id' => $n->{$IDKEY}, 'is_lacuna' => $n->{$LACUNA_KEY}, + 'is_common' => $n->{$COMMON_KEY}, }; my $rank = $n->{$RANK_KEY}; $reading_options->{'rank'} = $rank if $rank; diff --git a/lib/Text/Tradition/Parser/TEI.pm b/lib/Text/Tradition/Parser/TEI.pm index 3131774..3d5fd03 100644 --- a/lib/Text/Tradition/Parser/TEI.pm +++ b/lib/Text/Tradition/Parser/TEI.pm @@ -189,6 +189,9 @@ sub parse { # text and identical rank that can be merged. $tradition->collation->flatten_ranks(); + # And now that we've done that, calculate the common nodes. + $tradition->collation->calculate_common_readings(); + # Save the text for each witness so that we can ensure consistency # later on $tradition->collation->text_from_paths(); diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm index 40cf668..ba58660 100644 --- a/lib/Text/Tradition/Parser/Tabular.pm +++ b/lib/Text/Tradition/Parser/Tabular.pm @@ -184,7 +184,6 @@ sub parse { if( $word ) { my $reading = $nodes->{$word}; my $wit = $witnesses[$w]; - $DB::single = 1 unless $wit; push( @{$wit->path}, $reading ); } # else skip it for empty readings. } @@ -247,8 +246,10 @@ sub parse { sub make_nodes { my( $collation, $row, $index ) = @_; my %unique; + my $commonctr = 0; # Holds the number of unique readings + gaps, ex. lacunae. foreach my $w ( @$row ) { $unique{$w} = 1 if $w; + $commonctr +=1 unless ( $w && $w eq '#LACUNA#' ); } my $ctr = 1; foreach my $w ( keys %unique ) { @@ -257,7 +258,11 @@ sub make_nodes { 'rank' => $index, 'text' => $w, }; - $rargs->{'is_lacuna'} = 1 if $w eq '#LACUNA#'; + if( $w eq '#LACUNA#' ) { + $rargs->{'is_lacuna'} = 1; + } elsif( $commonctr == 1 ) { + $rargs->{'is_common'} = 1; + } my $r = $collation->add_reading( $rargs ); $unique{$w} = $r; $ctr++;