From: Tara L Andrews Date: Wed, 18 Jan 2012 20:29:11 +0000 (+0100) Subject: remove punctuation handling logic; we will do this with relationships instead X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=30f0df340a2e9ca60316a1175f09f7388c113289;p=scpubgit%2Fstemmatology.git remove punctuation handling logic; we will do this with relationships instead --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index f713311..ee8e9da 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -66,12 +66,6 @@ has 'linear' => ( default => 1, ); -has 'collapse_punctuation' => ( - is => 'rw', - isa => 'Bool', - default => 1, - ); - has 'ac_label' => ( is => 'rw', isa => 'Str', @@ -133,8 +127,6 @@ belongs. Required. transposed readings should be treated as two linked readings rather than one, and therefore whether the collation graph is acyclic. Defaults to true. -=item * collapse_punctuation - TODO - =item * baselabel - The default label for the path taken by a base text (if any). Defaults to 'base text'. @@ -154,8 +146,6 @@ the like. Defaults to ' (a.c.)'. =head2 linear -=head2 collapse_punctuation - =head2 wit_list_separator =head2 baselabel @@ -554,7 +544,7 @@ sub as_dot { $used{$reading->id} = 1; # Need not output nodes without separate labels next if $reading->id eq $reading->text; - my $label = $reading->punctuated_form; + my $label = $reading->text; $label =~ s/\"/\\\"/g; $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label ); } @@ -774,7 +764,6 @@ sub as_graphml { $node_el->setAttribute( 'id', $node_xmlid ); foreach my $d ( keys %node_data ) { my $nval = $n->$d; - $nval = $n->punctuated_form if $d eq 'text'; _add_graphml_data( $node_el, $node_data_keys{$d}, $nval ) if defined $nval; } diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index 6664fee..5dfa552 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -82,22 +82,6 @@ has 'text' => ( writer => 'alter_text', ); -has 'punctuation' => ( - traits => ['Array'], - isa => 'ArrayRef[HashRef[Str]]', - default => sub { [] }, - handles => { - punctuation => 'elements', - add_punctuation => 'push', - }, - ); - -has 'separate_punctuation' => ( - is => 'ro', - isa => 'Bool', - default => 1, - ); - has 'is_start' => ( is => 'ro', isa => 'Bool', @@ -143,18 +127,12 @@ around BUILDARGS => sub { if( exists $args->{'json'} ) { my $j = delete $args->{'json'}; - # If we have separated punctuation and don't want it, restore it. - if( exists $j->{'punctuation'} - && exists $args->{'separate_punctuation'} - && !$args->{'separate_punctuation'} ) { + # If we have separated punctuation, restore it. + if( exists $j->{'punctuation'} ) { $args->{'text'} = _restore_punct( $j->{'t'}, $j->{'punctuation'} ); - - # In all other cases, keep text and punct as they are. } else { $args->{'text'} = $j->{'t'}; - # we don't use comparison or canonical forms here - $args->{'punctuation'} = $j->{'punctuation'} - if exists $j->{'punctuation'}; + # we don't use comparison or canonical forms yet } } @@ -176,30 +154,7 @@ around BUILDARGS => sub { $class->$orig( $args ); }; -# Post-process the given text, stripping punctuation if we are asked. -sub BUILD { - my $self = shift; - if( $self->separate_punctuation && !$self->is_meta - && !$self->punctuation ) { - my $pos = 0; - my $wspunct = ''; # word sans punctuation - foreach my $char ( split( //, $self->text ) ) { - if( $char =~ /^[[:punct:]]$/ ) { - $self->add_punctuation( { 'char' => $char, 'pos' => $pos } ); - } else { - $wspunct .= $char; - } - $pos++; - } - $self->alter_text( $wspunct ); - } -} - -sub punctuated_form { - my $self = shift; - return _restore_punct( $self->text, $self->punctuation ); -} - +# Utility function for parsing JSON from nCritic sub _restore_punct { my( $word, @punct ) = @_; foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @punct ) { diff --git a/lib/Text/Tradition/Parser/JSON.pm b/lib/Text/Tradition/Parser/JSON.pm index 8d0aa8c..913a21b 100644 --- a/lib/Text/Tradition/Parser/JSON.pm +++ b/lib/Text/Tradition/Parser/JSON.pm @@ -87,7 +87,7 @@ sub parse { my $table = from_json( $opts->{'string'} ); # Create the witnesses - my @witnesses; + my @witnesses; # Keep the ordered list of our witnesses my %ac_wits; # Track these for later removal foreach my $sigil ( map { $_->{'witness'} } @{$table->{'alignment'}} ) { my $wit = $tradition->add_witness( 'sigil' => $sigil ); @@ -163,19 +163,28 @@ $index. Returns an array of readings of the same size as the original @tokenlis sub make_nodes { my( $c, $idx, @tokens ) = @_; my %unique; - my $ctr = 1; - foreach my $t ( @tokens ) { - next unless $t; - my $id = join( ',', $idx, $ctr++ ); - my $rdg = Text::Tradition::Collation::Reading->new( - 'id' => $id, 'json' => $t, 'collation' => $c ); - my $comptoken = $c->collapse_punctuation ? $rdg->text - : $rdg->punctuated_form; - $unique{$comptoken} = $rdg; - $t->{'comptoken'} = $comptoken; + my @readings; + foreach my $j ( 0 .. $#tokens ) { + if( $tokens[$j] ) { + my $t = $tokens[$j]; + my $rdg; + if( exists( $unique{$t->{'t'}} ) ) { + $rdg = $unique{$t->{'t'}}; + } else { + my %args = ( 'id' => join( ',', $idx, $j+1 ), + 'json' => $t, + 'collation' => $c ); + $args{'is_lacuna'} = 1 if $t->{'t'} eq '#LACUNA#'; + $rdg = Text::Tradition::Collation::Reading->new( %args ); + $unique{$t->{'t'}} = $rdg; + } + push( @readings, $rdg ); + } else { + push( @readings, undef ); + } } map { $c->add_reading( $_ ) } values( %unique ); - return map { $_ && $unique{$_->{'comptoken'}} } @tokens; + return @readings; } 1;