From: Tara L Andrews Date: Mon, 16 Jan 2012 18:56:27 +0000 (+0100) Subject: break out punctuation from the rest of the reading text X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0e47f4f67650eaa804f9e23b3241718d8eb94433;p=scpubgit%2Fstemmatology.git break out punctuation from the rest of the reading text --- diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index 6906da8..7c0daa9 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -81,6 +81,22 @@ has 'text' => ( required => 1, writer => 'alter_text', ); + +has 'punctuation' => ( + traits => ['Array'], + isa => 'ArrayRef[HashRef[Str]]', + default => sub { [] }, + handles => { + punctuation => 'elements', + add_punctuation => 'push', + }, + ); + +has 'separate_punctuation' => ( + is => 'ro', + isa => 'Bool', + default => 1, + ); has 'is_start' => ( is => 'ro', @@ -142,6 +158,33 @@ around BUILDARGS => sub { $class->$orig( $args ); }; +# Post-process the given text, stripping punctuation if we are asked. +sub BUILD { + my $self = shift; + if( $self->separate_punctuation && !$self->is_meta ) { + my $pos = 0; + my $wspunct = ''; # word sans punctuation + foreach my $char ( split( //, $self->text ) ) { + if( $char =~ /^[[:punct:]]$/ ) { + $self->add_punctuation( { 'char' => $char, 'pos' => $pos } ); + } else { + $wspunct .= $char; + } + $pos++; + } + $self->alter_text( $wspunct ); + } +} + +sub punctuated_form { + my $self = shift; + my $word = $self->text; + foreach my $p ( sort { $a->{pos} <=> $b->{pos} } $self->punctuation ) { + substr( $word, $p->{pos}, 0, $p->{char} ); + } + return $word; +} + =head2 is_meta A meta attribute (ha ha), which should be true if any of our 'special' diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm index e632737..f81ec36 100644 --- a/lib/Text/Tradition/Parser/Tabular.pm +++ b/lib/Text/Tradition/Parser/Tabular.pm @@ -68,8 +68,8 @@ is( ref( $t ), 'Text::Tradition', "Parsed florilegium CSV file" ); ### TODO Check these figures if( $t ) { - is( scalar $t->collation->readings, 312, "Collation has all readings" ); - is( scalar $t->collation->paths, 363, "Collation has all paths" ); + is( scalar $t->collation->readings, 311, "Collation has all readings" ); + is( scalar $t->collation->paths, 361, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } diff --git a/t/text_tradition_collation.t b/t/text_tradition_collation.t new file mode 100644 index 0000000..bb283fa --- /dev/null +++ b/t/text_tradition_collation.t @@ -0,0 +1,35 @@ +#!/usr/bin/perl -w + +use strict; +use Test::More 'no_plan'; +$| = 1; + + + +# =begin testing +{ +use Text::Tradition; + +my $cxfile = 't/data/Collatex-16.xml'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'CollateX', + 'file' => $cxfile, + ); +my $c = $t->collation; + +is( $c->common_predecessor( $c->reading('n9'), $c->reading('n23') )->id, + 'n20', "Found correct common predecessor" ); +is( $c->common_successor( $c->reading('n9'), $c->reading('n23') )->id, + '#END#', "Found correct common successor" ); + +is( $c->common_predecessor( $c->reading('n19'), $c->reading('n17') )->id, + 'n16', "Found correct common predecessor for readings on same path" ); +is( $c->common_successor( $c->reading('n21'), $c->reading('n26') )->id, + '#END#', "Found correct common successor for readings on same path" ); +} + + + + +1; diff --git a/t/text_tradition_parser_tabular.t b/t/text_tradition_parser_tabular.t index d7f450c..5426a76 100644 --- a/t/text_tradition_parser_tabular.t +++ b/t/text_tradition_parser_tabular.t @@ -25,8 +25,8 @@ is( ref( $t ), 'Text::Tradition', "Parsed florilegium CSV file" ); ### TODO Check these figures if( $t ) { - is( scalar $t->collation->readings, 312, "Collation has all readings" ); - is( scalar $t->collation->paths, 363, "Collation has all paths" ); + is( scalar $t->collation->readings, 311, "Collation has all readings" ); + is( scalar $t->collation->paths, 361, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } }