default => 1,
);
-has 'collapse_punctuation' => (
- is => 'rw',
- isa => 'Bool',
- default => 1,
- );
-
has 'ac_label' => (
is => 'rw',
isa => 'Str',
transposed readings should be treated as two linked readings rather than one,
and therefore whether the collation graph is acyclic. Defaults to true.
-=item * collapse_punctuation - TODO
-
=item * baselabel - The default label for the path taken by a base text
(if any). Defaults to 'base text'.
=head2 linear
-=head2 collapse_punctuation
-
=head2 wit_list_separator
=head2 baselabel
$used{$reading->id} = 1;
# Need not output nodes without separate labels
next if $reading->id eq $reading->text;
- my $label = $reading->punctuated_form;
+ my $label = $reading->text;
$label =~ s/\"/\\\"/g;
$dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
}
$node_el->setAttribute( 'id', $node_xmlid );
foreach my $d ( keys %node_data ) {
my $nval = $n->$d;
- $nval = $n->punctuated_form if $d eq 'text';
_add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
if defined $nval;
}
writer => 'alter_text',
);
-has 'punctuation' => (
- traits => ['Array'],
- isa => 'ArrayRef[HashRef[Str]]',
- default => sub { [] },
- handles => {
- punctuation => 'elements',
- add_punctuation => 'push',
- },
- );
-
-has 'separate_punctuation' => (
- is => 'ro',
- isa => 'Bool',
- default => 1,
- );
-
has 'is_start' => (
is => 'ro',
isa => 'Bool',
if( exists $args->{'json'} ) {
my $j = delete $args->{'json'};
- # If we have separated punctuation and don't want it, restore it.
- if( exists $j->{'punctuation'}
- && exists $args->{'separate_punctuation'}
- && !$args->{'separate_punctuation'} ) {
+ # If we have separated punctuation, restore it.
+ if( exists $j->{'punctuation'} ) {
$args->{'text'} = _restore_punct( $j->{'t'}, $j->{'punctuation'} );
-
- # In all other cases, keep text and punct as they are.
} else {
$args->{'text'} = $j->{'t'};
- # we don't use comparison or canonical forms here
- $args->{'punctuation'} = $j->{'punctuation'}
- if exists $j->{'punctuation'};
+ # we don't use comparison or canonical forms yet
}
}
$class->$orig( $args );
};
-# Post-process the given text, stripping punctuation if we are asked.
-sub BUILD {
- my $self = shift;
- if( $self->separate_punctuation && !$self->is_meta
- && !$self->punctuation ) {
- my $pos = 0;
- my $wspunct = ''; # word sans punctuation
- foreach my $char ( split( //, $self->text ) ) {
- if( $char =~ /^[[:punct:]]$/ ) {
- $self->add_punctuation( { 'char' => $char, 'pos' => $pos } );
- } else {
- $wspunct .= $char;
- }
- $pos++;
- }
- $self->alter_text( $wspunct );
- }
-}
-
-sub punctuated_form {
- my $self = shift;
- return _restore_punct( $self->text, $self->punctuation );
-}
-
+# Utility function for parsing JSON from nCritic
sub _restore_punct {
my( $word, @punct ) = @_;
foreach my $p ( sort { $a->{pos} <=> $b->{pos} } @punct ) {
my $table = from_json( $opts->{'string'} );
# Create the witnesses
- my @witnesses;
+ my @witnesses; # Keep the ordered list of our witnesses
my %ac_wits; # Track these for later removal
foreach my $sigil ( map { $_->{'witness'} } @{$table->{'alignment'}} ) {
my $wit = $tradition->add_witness( 'sigil' => $sigil );
sub make_nodes {
my( $c, $idx, @tokens ) = @_;
my %unique;
- my $ctr = 1;
- foreach my $t ( @tokens ) {
- next unless $t;
- my $id = join( ',', $idx, $ctr++ );
- my $rdg = Text::Tradition::Collation::Reading->new(
- 'id' => $id, 'json' => $t, 'collation' => $c );
- my $comptoken = $c->collapse_punctuation ? $rdg->text
- : $rdg->punctuated_form;
- $unique{$comptoken} = $rdg;
- $t->{'comptoken'} = $comptoken;
+ my @readings;
+ foreach my $j ( 0 .. $#tokens ) {
+ if( $tokens[$j] ) {
+ my $t = $tokens[$j];
+ my $rdg;
+ if( exists( $unique{$t->{'t'}} ) ) {
+ $rdg = $unique{$t->{'t'}};
+ } else {
+ my %args = ( 'id' => join( ',', $idx, $j+1 ),
+ 'json' => $t,
+ 'collation' => $c );
+ $args{'is_lacuna'} = 1 if $t->{'t'} eq '#LACUNA#';
+ $rdg = Text::Tradition::Collation::Reading->new( %args );
+ $unique{$t->{'t'}} = $rdg;
+ }
+ push( @readings, $rdg );
+ } else {
+ push( @readings, undef );
+ }
}
map { $c->add_reading( $_ ) } values( %unique );
- return map { $_ && $unique{$_->{'comptoken'}} } @tokens;
+ return @readings;
}
1;