Removes the given reading from the collation, implicitly removing its
paths and relationships.
-=head2 merge_readings( $main, $second, $concatenate, $with_str )
-
-Merges the $second reading into the $main one. If $concatenate is true, then
-the merged node will carry the text of both readings, concatenated with either
-$with_str (if specified) or a sensible default (the empty string if the
-appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
-
-The first two arguments may be either readings or reading IDs.
-
=head2 has_reading( $id )
Predicate to see whether a given reading ID is in the graph.
$self->relations->add_type( %args );
}
+sub get_relationship_type {
+ my( $self, $name ) = @_;
+ return $self->relations->has_type( $name )
+ ? $self->relations->type( $name ) : undef;
+}
+
### Reading construct/destruct functions
sub add_reading {
$self->$orig( $arg );
};
+=head2 merge_readings( $main, $second, $concatenate, $with_str )
+
+Merges the $second reading into the $main one. If $concatenate is true, then
+the merged node will carry the text of both readings, concatenated with either
+$with_str (if specified) or a sensible default (the empty string if the
+appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
+
+The first two arguments may be either readings or reading IDs.
+
=begin testing
use Text::Tradition;
|| $del_obj eq $self->start || $del_obj eq $self->end );
throw( "Cannot combine text of meta readings" ) if $combine;
}
+ # We can only merge readings in a linear graph if:
+ # - they are contiguous with only one edge between them, OR
+ # - they are at equivalent ranks in the graph.
+ if( $self->linear ) {
+ my @delpred = $del_obj->predecessors;
+ my @keptsuc = $kept_obj->successors;
+ unless ( @delpred == 1 && $delpred[0] eq $kept_obj
+ && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
+ my( $is_ok, $msg ) = $self->relations->relationship_valid(
+ $kept_obj, $del_obj, 'collated' );
+ unless( $is_ok ) {
+ throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
+ }
+ }
+ }
+
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
my $kept = $kept_obj->id;
$self->del_reading( $deleted );
}
+=head2 merge_related( @relationship_types )
+
+Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
+
+WARNING: This operation cannot be undone.
+
+=cut
+
+=begin testing
+
+use Text::Tradition;
+use TryCatch;
+
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'Self',
+ 'file' => 't/data/legendfrag.xml',
+ );
+my $c = $t->collation;
+
+my %rdg_ids;
+map { $rdg_ids{$_} = 1 } $c->readings;
+$c->merge_related( 'orthographic' );
+is( scalar( $c->readings ), keys( %rdg_ids ) - 8,
+ "Successfully collapsed orthographic variation" );
+map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.1 /;
+foreach my $rid ( keys %rdg_ids ) {
+ my $exp = $rdg_ids{$rid};
+ is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
+ ( $exp ? "retained" : "removed" ) );
+}
+ok( $c->linear, "Graph is still linear" );
+try {
+ $c->calculate_ranks; # This should succeed
+ ok( 1, "Can still calculate ranks on the new graph" );
+} catch {
+ ok( 0, "Rank calculation on merged graph failed: $@" );
+}
+
+# Now add some transpositions
+$c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
+$c->merge_related( 'transposition' );
+is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
+ "Transposed relationship is merged away" );
+ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
+ok( !$c->linear, "Graph is no longer linear" );
+try {
+ $c->calculate_ranks; # This should fail
+ ok( 0, "Rank calculation happened on nonlinear graph?!" );
+} catch ( Text::Tradition::Error $e ) {
+ is( $e->message, 'Cannot calculate ranks on a non-linear graph',
+ "Rank calculation on merged graph threw an error" );
+}
+
+
+
+=end testing
+
+=cut
+
+# TODO: there should be a way to display merged without affecting the underlying data!
+
+sub merge_related {
+ my $self = shift;
+ my %reltypehash;
+ map { $reltypehash{$_} = 1 } @_;
+
+ # Set up the filter for finding related readings
+ my $filter = sub {
+ exists $reltypehash{$_[0]->type};
+ };
+
+ my $linear = 1;
+ # Go through all readings looking for related ones
+ foreach my $r ( $self->readings ) {
+ next unless $self->reading( "$r" ); # might have been deleted meanwhile
+ my @related = $self->related_readings( $r, $filter );
+ if( @related ) {
+ push( @related, $r );
+ @related = sort {
+ scalar $b->witnesses <=> scalar $a->witnesses
+ } @related;
+ my $keep = shift @related;
+ foreach my $delr ( @related ) {
+ $linear = undef
+ unless( $self->get_relationship( $keep, $delr )->colocated );
+ $self->merge_readings( $keep, $delr );
+ }
+ }
+ }
+ $self->linear( $linear );
+}
+
=head2 compress_readings
Where possible in the graph, compresses plain sequences of readings into a
=begin testing
+use Test::More::UTF8;
use Text::Tradition;
+use TryCatch;
my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
# Detach the erroneously collated reading
-my $newr = $sc->duplicate_reading( 'n131', 'Ba96' );
+my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
ok( $newr, "New reading was created" );
ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
+my $csucc = $sc->common_successor( 'n131', 'n131_0' );
+is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
# Check that the bad transposition is gone
+is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
+# The collation should not be fixed
+my @pairs = $sc->identical_readings();
+is( scalar @pairs, 0, "Not re-collated yet" );
# Fix the collation
-ok( $sc->add_relationship( 'n124', 'n131_0', { type => 'collated', scope => 'local' } ),
- "Collated the readings correctly" );
-$sc->calculate_ranks();
-$sc->flatten_ranks();
+ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
+@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
+is( scalar @pairs, 3, "Found three more identical readings" );
is( $sc->end->rank, 11, "The ranks shifted appropriately" );
+$sc->flatten_ranks();
is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
+# Check that we can't "duplicate" a reading with no wits or with all wits
+try {
+ my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
+ ok( 0, "Reading duplication without witnesses throws an error" );
+} catch( Text::Tradition::Error $e ) {
+ like( $e->message, qr/Must specify one or more witnesses/,
+ "Reading duplication without witnesses throws the expected error" );
+} catch {
+ ok( 0, "Reading duplication without witnesses threw the wrong error" );
+}
+
+try {
+ my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
+ ok( 0, "Reading duplication with all witnesses throws an error" );
+} catch( Text::Tradition::Error $e ) {
+ like( $e->message, qr/Cannot join all witnesses/,
+ "Reading duplication with all witnesses throws the expected error" );
+} catch {
+ ok( 0, "Reading duplication with all witnesses threw the wrong error" );
+}
+
=end testing
=cut
sub duplicate_reading {
my( $self, $r, @wits ) = @_;
- # Add the new reading, duplicating $r.
+ # Check that we are not doing anything unwise.
+ throw( "Must specify one or more witnesses for the duplicated reading" )
+ unless @wits;
unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
$r = $self->reading( $r );
}
throw( "Cannot duplicate a meta-reading" )
if $r->is_meta;
-
+ throw( "Cannot join all witnesses to the new reading" )
+ if scalar( @wits ) == scalar( $r->witnesses );
+
# Get all the reading attributes and duplicate them.
my $rmeta = Text::Tradition::Collation::Reading->meta;
my %args;
$self->add_path( $newr, $next, $wit );
}
- # Hash the reading ranks and find the closest common successor to our
- # two readings
- my %rrk;
+ # If the graph is ranked, we need to look for relationships that are now
+ # invalid (i.e. 'non-colocation' types that might now be colocated) and
+ # remove them. If not, we can skip it.
my $succ;
+ my %rrk;
+ my @deleted_relations;
if( $self->end->has_rank ) {
+ # Find the point where we can stop checking
$succ = $self->common_successor( $r, $newr );
+
+ # Hash the existing ranks
foreach my $rdg ( $self->readings ) {
$rrk{$rdg->id} = $rdg->rank;
}
- }
-
- # Rebuild the equivalence graph and calculate the new ranks
- $self->relations->rebuild_equivalence();
- $self->calculate_ranks();
+ # Calculate the new ranks
+ $self->calculate_ranks();
- # Check for invalid non-colocated relationships among changed-rank readings
- # from where the ranks start changing up to $succ
- if( $self->end->has_rank ) {
+ # Check for invalid non-colocated relationships among changed-rank readings
+ # from where the ranks start changing up to $succ
my $lastrank = $succ->rank;
foreach my $rdg ( $self->readings ) {
next if $rdg->rank > $lastrank;
my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
next unless @noncolo;
foreach my $nc ( @noncolo ) {
- $self->relations->verify_or_delete( $rdg, $nc );
+ unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
+ push( @deleted_relations, [ $rdg->id, $nc->id ] );
+ }
}
}
}
- return $newr;
+ return ( $newr, @deleted_relations );
}
sub _generate_dup_id {
if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
@args = @{$_[0]};
}
- my( $source, $target ) = $self->_stringify_args( @args );
- $self->$orig( $source, $target );
+ my @stringargs = $self->_stringify_args( @args );
+ $self->$orig( @stringargs );
};
=head2 reading_witnesses( $reading )
foreach my $edge ( @edges ) {
# Do we need to output this edge?
if( $used{$edge->[0]} && $used{$edge->[1]} ) {
- my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
+ my $label = $self->_path_display_label( $opts,
+ $self->path_witnesses( $edge ) );
my $variables = { %edge_attrs, 'label' => $label };
# Account for the rank gap if necessary
if( $filter eq 'transposition' ) {
$filter =~ qr/^transposition$/;
}
+ my %typecolors;
+ my @types = sort( map { $_->name } $self->relations->types );
+ if( exists $opts->{graphcolors} ) {
+ foreach my $tdx ( 0 .. $#types ) {
+ $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
+ }
+ } else {
+ map { $typecolors{$_} = '#FFA14F' } @types;
+ }
foreach my $redge ( $self->relationships ) {
if( $used{$redge->[0]} && $used{$redge->[1]} ) {
- if( $filter ne 'all' ) {
- my $rel = $self->get_relationship( $redge );
- next unless $rel->type =~ /$filter/;
- my $variables = {
- arrowhead => 'none',
- color => '#FFA14F',
- constraint => 'false',
- label => uc( substr( $rel->type, 0, 4 ) ),
- penwidth => '3',
- };
- $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
- $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
+ my $rel = $self->get_relationship( $redge );
+ next unless $filter eq 'all' || $rel->type =~ /$filter/;
+ my $variables = {
+ arrowhead => 'none',
+ color => $typecolors{$rel->type},
+ constraint => 'false',
+ penwidth => '3',
+ };
+ unless( exists $opts->{graphcolors} ) {
+ $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
}
+ $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
+ $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
}
}
}
# Add substitute start and end edges if necessary
foreach my $node ( keys %substart ) {
- my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
+ my $witstr = $self->_path_display_label( $opts,
+ $self->path_witnesses( $substart{$node}, $node ) );
my $variables = { %edge_attrs, 'label' => $witstr };
my $nrdg = $self->reading( $node );
if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
$dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
}
foreach my $node ( keys %subend ) {
- my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
+ my $witstr = $self->_path_display_label( $opts,
+ $self->path_witnesses( $node, $subend{$node} ) );
my $variables = { %edge_attrs, 'label' => $witstr };
my $varopts = _dot_attr_string( $variables );
$dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
# witnesses only where the main witness is not also in the list.
sub _path_display_label {
my $self = shift;
+ my $opts = shift;
my %wits;
map { $wits{$_} = 1 } @_;
}
}
- # See if we are in a majority situation.
- my $maj = scalar( $self->tradition->witnesses ) * 0.6;
- $maj = $maj > 5 ? $maj : 5;
- if( scalar keys %wits > $maj ) {
- unshift( @disp_ac, 'majority' );
- return join( ', ', @disp_ac );
- } else {
+ if( $opts->{'explicit_wits'} ) {
return join( ', ', sort keys %wits );
+ } else {
+ # See if we are in a majority situation.
+ my $maj = scalar( $self->tradition->witnesses ) * 0.6;
+ $maj = $maj > 5 ? $maj : 5;
+ if( scalar keys %wits > $maj ) {
+ unshift( @disp_ac, 'majority' );
+ return join( ', ', @disp_ac );
+ } else {
+ return join( ', ', sort keys %wits );
+ }
}
}
Returns a CSV alignment table representation of the collation graph, one
row per witness (or witness uncorrected.)
+=head2 as_tsv
+
+Returns a tab-separated alignment table representation of the collation graph,
+one row per witness (or witness uncorrected.)
+
+=begin testing
+
+use Text::Tradition;
+use Text::CSV;
+
+my $READINGS = 311;
+my $PATHS = 361;
+my $WITS = 13;
+my $WITAC = 4;
+
+my $datafile = 't/data/florilegium_tei_ps.xml';
+my $tradition = Text::Tradition->new( 'input' => 'TEI',
+ 'name' => 'test0',
+ 'file' => $datafile,
+ 'linear' => 1 );
+
+my $c = $tradition->collation;
+# Export the thing to CSV
+my $csvstr = $c->as_csv();
+# Count the columns
+my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
+my @lines = split(/\n/, $csvstr );
+ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
+is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
+my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
+ok( @q_ac, "Found a layered witness" );
+
+my $t2 = Text::Tradition->new( input => 'Tabular',
+ name => 'test2',
+ string => $csvstr,
+ sep_char => ',' );
+is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
+is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
+
+# Now do it with TSV
+my $tsvstr = $c->as_tsv();
+my $t3 = Text::Tradition->new( input => 'Tabular',
+ name => 'test3',
+ string => $tsvstr,
+ sep_char => "\t" );
+is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
+is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
+
+my $table = $c->alignment_table;
+my $noaccsv = $c->as_csv({ noac => 1 });
+my @noaclines = split(/\n/, $noaccsv );
+ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
+is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
+is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
+
+my $safecsv = $c->as_csv({ safe_ac => 1});
+my @safelines = split(/\n/, $safecsv );
+ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
+is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
+@q_ac = grep { $_ eq 'Q__L' } $csv->fields;
+ok( @q_ac, "Found a sanitized layered witness" );
+is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
+
+=end testing
+
=cut
-sub as_csv {
- my( $self ) = @_;
- my $table = $self->alignment_table;
- my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
+sub _tabular {
+ my( $self, $opts ) = @_;
+ my $table = $self->alignment_table( $opts );
+ my $csv_options = { binary => 1, quote_null => 0 };
+ $csv_options->{'sep_char'} = $opts->{fieldsep};
+ if( $opts->{fieldsep} eq "\t" ) {
+ # If it is really tab separated, nothing is an escape char.
+ $csv_options->{'quote_char'} = undef;
+ $csv_options->{'escape_char'} = '';
+ }
+ my $csv = Text::CSV->new( $csv_options );
my @result;
# Make the header row
$csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
- push( @result, decode_utf8( $csv->string ) );
+ push( @result, $csv->string );
# Make the rest of the rows
foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
$csv->combine( @row );
- push( @result, decode_utf8( $csv->string ) );
+ push( @result, $csv->string );
}
return join( "\n", @result );
}
+sub as_csv {
+ my $self = shift;
+ my $opts = shift || {};
+ $opts->{fieldsep} = ',';
+ return $self->_tabular( $opts );
+}
+
+sub as_tsv {
+ my $self = shift;
+ my $opts = shift || {};
+ $opts->{fieldsep} = "\t";
+ return $self->_tabular( $opts );
+}
+
=head2 alignment_table
Return a reference to an alignment table, in a slightly enhanced CollateX
=cut
sub alignment_table {
- my( $self ) = @_;
- return $self->cached_table if $self->has_cached_table;
+ my( $self, $opts ) = @_;
+ if( $self->has_cached_table ) {
+ return $self->cached_table
+ unless $opts->{noac} || $opts->{safe_ac};
+ }
# Make sure we can do this
throw( "Need a linear graph in order to make an alignment table" )
my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
$witobj->{'identifier'} = $wit->identifier if $wit->identifier;
push( @{$table->{'alignment'}}, $witobj );
- if( $wit->is_layered ) {
+ if( $wit->is_layered && !$opts->{noac} ) {
my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
$wit->sigil.$self->ac_label );
my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
- my $witacobj = { 'witness' => $wit->sigil.$self->ac_label,
+ my $witlabel = $opts->{safe_ac}
+ ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
+ my $witacobj = { 'witness' => $witlabel,
'tokens' => \@ac_row };
$witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
push( @{$table->{'alignment'}}, $witacobj );
}
}
- $self->cached_table( $table );
+ unless( $opts->{noac} || $opts->{safe_ac} ) {
+ $self->cached_table( $table );
+ }
return $table;
}
sub calculate_ranks {
my $self = shift;
# Save the existing ranks, in case we need to invalidate the cached SVG.
+ throw( "Cannot calculate ranks on a non-linear graph" )
+ unless $self->linear;
my %existing_ranks;
map { $existing_ranks{$_} = $_->rank } $self->readings;