if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
$arg = $arg->id;
}
-
# Remove the reading from the graphs.
$self->sequence->delete_vertex( $arg );
$self->relations->delete_vertex( $arg );
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
- my( $kept, $deleted ) = $self->_stringify_args( @_ );
+ my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
# The kept reading should inherit the paths and the relationships
# of the deleted reading.
my @vector = ( $kept );
push( @vector, $path->[1] ) if $path->[0] eq $deleted;
unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
+ next if $vector[0] eq $vector[1]; # Don't add a self loop
my %wits = %{$self->sequence->get_edge_attributes( @$path )};
$self->sequence->add_edge( @vector );
my $fwits = $self->sequence->get_edge_attributes( @vector );
foreach my $rel ( $self->relations->edges_at( $deleted ) ) {
my @vector = ( $kept );
push( @vector, $rel->[0] eq $deleted ? $rel->[1] : $rel->[0] );
+ next if $vector[0] eq $vector[1]; # Don't add a self loop
# Is there a relationship here already? If so, keep it.
# TODO Warn about conflicting relationships
next if $self->relations->has_edge( @vector );
}
# Do the deletion deed.
+ if( $combine_char ) {
+ my $kept_obj = $self->reading( $kept );
+ my $new_text = join( $combine_char, $kept_obj->text,
+ $self->reading( $deleted )->text );
+ $kept_obj->alter_text( $new_text );
+ }
$self->del_reading( $deleted );
}
sub del_path {
my $self = shift;
+ my @args;
+ if( ref( $_[0] ) eq 'ARRAY' ) {
+ my $e = shift @_;
+ @args = ( @$e, @_ );
+ } else {
+ @args = @_;
+ }
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
- my( $source, $target, $wit ) = $self->_stringify_args( @_ );
+ my( $source, $target, $wit ) = $self->_stringify_args( @args );
if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
- $self->sequence->del_edge_attribute( $source, $target, $wit );
+ $self->sequence->delete_edge_attribute( $source, $target, $wit );
}
unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
$self->sequence->delete_edge( $source, $target );
=over 4
-=item * type - Can be one of spelling, orthographic, grammatical, meaning, repetition, transposition. The first three are only valid relationships between readings that occur at the same point in the text.
+=item * type - Can be one of spelling, orthographic, grammatical, meaning, lexical, collated, repetition, transposition. All but the last two are only valid relationships between readings that occur at the same point in the text.
=item * non_correctable - (Optional) True if the reading would not have been corrected independently.
# Check the options
if( !defined $options->{'type'} ||
- $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|repetition|transposition)$/i ) {
+ $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|lexical|collated|repetition|transposition)$/i ) {
my $t = $options->{'type'} ? $options->{'type'} : '';
- return( undef, "Invalid or missing type" . $options->{'type'} );
+ return( undef, "Invalid or missing type " . $options->{'type'} );
}
- if( $options->{'type'} =~ /^(spelling|orthographic|grammatical|meaning)$/ ) {
+ unless ( $options->{'type'} =~ /^(repetition|transposition)$/ ) {
$options->{'colocated'} = 1;
}
if( $self->relations->has_edge( $source, $target ) ) {
return ( undef, "Relationship already exists between these readings" );
}
- if( $options->{'colocated'} && !$self->relationship_valid( $source, $target ) ) {
+ if( !$self->relationship_valid( $source, $target, $options->{'type'} ) ) {
return ( undef, 'Relationship creates witness loop' );
}
}
sub relationship_valid {
- my( $self, $source, $target ) = @_;
- # Check that linking the source and target in a relationship won't lead
- # to a path loop for any witness. First make a lookup table of all the
- # readings related to either the source or the target.
- my @proposed_related = ( $source, $target );
- push( @proposed_related, $source->related_readings( 'colocated' ) );
- push( @proposed_related, $target->related_readings( 'colocated' ) );
- my %pr_ids;
- map { $pr_ids{ $_->id } = 1 } @proposed_related;
-
- # None of these proposed related readings should have a neighbor that
- # is also in proposed_related.
- foreach my $pr ( keys %pr_ids ) {
- foreach my $neighbor( $self->sequence->neighbors( $pr ) ) {
- return 0 if exists $pr_ids{$neighbor};
- }
- }
-
- return 1;
+ my( $self, $source, $target, $rel ) = @_;
+ if( $rel eq 'repetition' ) {
+ return 1;
+ } elsif ( $rel eq 'transposition' ) {
+ # Check that the two readings do not appear in the same witness.
+ my %seen_wits;
+ map { $seen_wits{$_} = 1 } $self->reading_witnesses( $source );
+ foreach my $w ( $self->reading_witnesses( $target ) ) {
+ return 0 if $seen_wits{$w};
+ }
+ return 1;
+ } else {
+ # Check that linking the source and target in a relationship won't lead
+ # to a path loop for any witness. First make a lookup table of all the
+ # readings related to either the source or the target.
+ my @proposed_related = ( $source, $target );
+ push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
+ push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
+ my %pr_ids;
+ map { $pr_ids{ $_ } = 1 } @proposed_related;
+
+ # None of these proposed related readings should have a neighbor that
+ # is also in proposed_related.
+ foreach my $pr ( keys %pr_ids ) {
+ foreach my $neighbor( $self->sequence->neighbors( $pr ) ) {
+ return 0 if exists $pr_ids{$neighbor};
+ }
+ }
+ return 1;
+ }
+}
+
+# Return a list of the witnesses in which the reading appears.
+sub reading_witnesses {
+ my( $self, $reading ) = @_;
+ # We need only check either the incoming or the outgoing edges; I have
+ # arbitrarily chosen "incoming".
+ my %all_witnesses;
+ foreach my $e ( $self->sequence->edges_to( $reading ) ) {
+ my $wits = $self->sequence->get_edge_attributes( @$e );
+ @all_witnesses{ keys %$wits } = 1;
+ }
+ return keys %all_witnesses;
}
sub related_readings {
if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
$reading = $reading->id;
$return_object = 1;
- print STDERR "Returning related objects\n";
- } else {
- print STDERR "Returning related object names\n";
+# print STDERR "Returning related objects\n";
+# } else {
+# print STDERR "Returning related object names\n";
}
my @related = $self->relations->all_reachable( $reading );
if( $colocated ) {
print $graph->as_svg( $recalculate );
-Returns an SVG string that represents the graph. Uses GraphViz to do
-this, because Graph::Easy doesn\'t cope well with long graphs. Unless
-$recalculate is passed (and is a true value), the method will return a
-cached copy of the SVG after the first call to the method.
+Returns an SVG string that represents the graph, via as_dot and graphviz.
=cut
foreach my $reading ( $self->readings ) {
# Need not output nodes without separate labels
next if $reading->id eq $reading->text;
- $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $reading->text );
+ my $label = $reading->text;
+ $label =~ s/\"/\\\"/g;
+ $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
}
# TODO do something sensible for relationships
foreach my $edge ( @edges ) {
my %variables = ( 'color' => '#000000',
'fontcolor' => '#000000',
- 'label' => join( ', ', $self->path_witnesses( $edge ) ),
+ 'label' => join( ', ', $self->path_display_label( $edge ) ),
);
my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
+ # Account for the rank gap if necessary
+ my $rankgap = $self->reading( $edge->[1] )->rank
+ - $self->reading( $edge->[0] )->rank;
+ $varopts .= ", minlen=$rankgap" if $rankgap > 1;
$dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n",
$edge->[0], $edge->[1], $varopts );
}
return sort @wits;
}
+sub path_display_label {
+ my( $self, $edge ) = @_;
+ my @wits = $self->path_witnesses( $edge );
+ my $maj = scalar( $self->tradition->witnesses ) * 0.6;
+ if( scalar @wits > $maj ) {
+ return 'majority';
+ } else {
+ return join( ', ', @wits );
+ }
+}
+
+
=item B<as_graphml>
print $graph->as_graphml( $recalculate )
# Add the data keys for the graph
my %graph_data_keys;
my $gdi = 0;
- my @graph_attributes = qw/ wit_list_separator baselabel linear ac_label /;
+ my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
foreach my $datum ( @graph_attributes ) {
$graph_data_keys{$datum} = 'dg'.$gdi++;
my $key = $root->addNewChild( $graphml_ns, 'key' );
my $ndi = 0;
my %node_data = (
id => 'string',
- reading => 'string',
+ text => 'string',
rank => 'string',
is_start => 'boolean',
is_end => 'boolean',
my $edi = 0;
my %edge_data_keys;
my %edge_data = (
- class => 'string', # Path or relationship?
witness => 'string', # ID/label for a path
relationship => 'string', # ID/label for a relationship
extra => 'boolean', # Path key
$key->setAttribute( 'id', $edge_data_keys{$datum} );
}
- # Add the collation graph itself
- my $graph = $root->addNewChild( $graphml_ns, 'graph' );
- $graph->setAttribute( 'edgedefault', 'directed' );
- $graph->setAttribute( 'id', $self->tradition->name );
- $graph->setAttribute( 'parse.edgeids', 'canonical' );
- $graph->setAttribute( 'parse.edges', scalar($self->paths) );
- $graph->setAttribute( 'parse.nodeids', 'canonical' );
- $graph->setAttribute( 'parse.nodes', scalar($self->readings) );
- $graph->setAttribute( 'parse.order', 'nodesfirst' );
+ # Add the collation graphs themselves
+ my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
+ $sgraph->setAttribute( 'edgedefault', 'directed' );
+ $sgraph->setAttribute( 'id', $self->tradition->name );
+ $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
+ $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
+ $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
+ $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
+ $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
+
+ my $rgraph = $root->addNewChild( $graphml_ns, 'graph' );
+ $rgraph->setAttribute( 'edgedefault', 'undirected' );
+ $rgraph->setAttribute( 'id', 'relationships' );
+ $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
+ $rgraph->setAttribute( 'parse.edges', scalar($self->relationships) );
+ $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
+ $rgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
+ $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
# Collation attribute data
foreach my $datum ( @graph_attributes ) {
- _add_graphml_data( $graph, $graph_data_keys{$datum}, $self->$datum );
+ my $value = $datum eq 'version' ? '3.0' : $self->$datum;
+ _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
}
my $node_ctr = 0;
my %node_hash;
- # Add our readings to the graph
+ # Add our readings to the graphs
foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
- my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
+ # Add to the main graph
+ my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
my $node_xmlid = 'n' . $node_ctr++;
$node_hash{ $n->id } = $node_xmlid;
$node_el->setAttribute( 'id', $node_xmlid );
- _add_graphml_data( $node_el, $node_data_keys{'id'}, $n->id );
- _add_graphml_data( $node_el, $node_data_keys{'reading'}, $n->text );
- _add_graphml_data( $node_el, $node_data_keys{'rank'}, $n->rank )
- if $n->has_rank;
+ foreach my $d ( keys %node_data ) {
+ my $nval = $n->$d;
+ _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
+ if defined $nval;
+ }
+ # Add to the relationships graph
+ my $rnode_el = $rgraph->addNewChild( $graphml_ns, 'node' );
+ $rnode_el->setAttribute( 'id', $node_xmlid );
}
- # Add the path edges
+ # Add the path edges to the sequence graph
my $edge_ctr = 0;
foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
# We add an edge in the graphml for every witness in $e.
my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
$node_hash{ $e->[0] },
$node_hash{ $e->[1] } );
- my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
+ my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
$edge_el->setAttribute( 'source', $from );
$edge_el->setAttribute( 'target', $to );
$edge_el->setAttribute( 'id', $id );
- # Add the edge class
- _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
# It's a witness path, so add the witness
my $base = $wit;
}
}
- # Add the relationship edges
+ # Add the relationship edges to the relationships graph
foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->relationships ) {
my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
$node_hash{ $e->[0] },
$node_hash{ $e->[1] } );
- my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
+ my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
$edge_el->setAttribute( 'source', $from );
$edge_el->setAttribute( 'target', $to );
$edge_el->setAttribute( 'id', $id );
- # Add the edge class
- _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'relationship' );
my $data = $self->relations->get_edge_attributes( @$e );
# It's a relationship, so save the relationship data
print $graph->as_csv( $recalculate )
Returns a CSV alignment table representation of the collation graph, one
-row per witness (or witness uncorrected.) Unless $recalculate is passed
-(and is a true value), the method will return a cached copy of the CSV
-after the first call to the method.
+row per witness (or witness uncorrected.)
=cut
my $table = $self->make_alignment_table;
my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
my @result;
- foreach my $row ( @$table ) {
- $csv->combine( @$row );
+ # Make the header row
+ $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
+ push( @result, decode_utf8( $csv->string ) );
+ # Make the rest of the rows
+ foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
+ my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
+ my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
+ $csv->combine( @row );
push( @result, decode_utf8( $csv->string ) );
}
return join( "\n", @result );
}
-# Make an alignment table - $noderefs controls whether the objects
-# in the table are the nodes or simply their readings.
+=item B<make_alignment_table>
+
+my $table = $graph->make_alignment_table( $use_refs, \@wits_to_include )
+
+Return a reference to an alignment table, in a slightly enhanced CollateX
+format which looks like this:
+
+ $table = { alignment => [ { witness => "SIGIL",
+ tokens => [ { t => "READINGTEXT" }, ... ] },
+ { witness => "SIG2",
+ tokens => [ { t => "READINGTEXT" }, ... ] },
+ ... ],
+ length => TEXTLEN };
+
+If $use_refs is set to 1, the reading object is returned in the table
+instead of READINGTEXT; if not, the text of the reading is returned.
+If $wits_to_include is set to a hashref, only the witnesses whose sigil
+keys have a true hash value will be included.
+
+=cut
sub make_alignment_table {
my( $self, $noderefs, $include ) = @_;
warn "Need a linear graph in order to make an alignment table";
return;
}
- my $table;
+ my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
my @all_pos = ( 1 .. $self->end->rank - 1 );
foreach my $wit ( $self->tradition->witnesses ) {
+ if( $include ) {
+ next unless $include->{$wit->sigil};
+ }
# print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
- unshift( @row, $wit->sigil );
- push( @$table, \@row );
+ push( @{$table->{'alignment'}},
+ { 'witness' => $wit->sigil, 'tokens' => \@row } );
if( $wit->is_layered ) {
my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
$wit->sigil.$self->ac_label, $wit->sigil );
my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
- unshift( @ac_row, $wit->sigil . $self->ac_label );
- push( @$table, \@ac_row );
+ push( @{$table->{'alignment'}},
+ { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
}
}
-
- if( $include ) {
- my $winnowed = [];
- # Winnow out the rows for any witness not included.
- foreach my $row ( @$table ) {
- next unless $include->{$row->[0]};
- push( @$winnowed, $row );
- }
- $table = $winnowed;
- }
-
- # Return a table where the witnesses read in columns rather than rows.
- my $turned = _turn_table( $table );
- # TODO We should really go through and delete empty rows.
- return $turned;
+ return $table;
}
sub _make_witness_row {
my( $path, $positions, $noderefs ) = @_;
my %char_hash;
map { $char_hash{$_} = undef } @$positions;
+ my $debug = 0;
foreach my $rdg ( @$path ) {
my $rtext = $rdg->text;
$rtext = '#LACUNA#' if $rdg->is_lacuna;
+ print STDERR "rank " . $rdg->rank . "\n" if $debug;
# print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
- $char_hash{$rdg->rank} = $noderefs ? $rdg : $rtext;
+ $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
+ : { 't' => $rtext };
}
my @row = map { $char_hash{$_} } @$positions;
# Fill in lacuna markers for undef spots in the row
# If we are using node reference, make the lacuna node appear many times
# in the table. If not, use the lacuna tag.
if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
- $el = $noderefs ? $last_el : '#LACUNA#';
+ $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
}
push( @filled_row, $el );
$last_el = $el;
# Tiny utility function to say if a table element is a lacuna
sub _el_is_lacuna {
my $el = shift;
- return 1 if $el eq '#LACUNA#';
- return 1 if ref( $el ) eq 'Text::Tradition::Collation::Reading'
- && $el->is_lacuna;
+ return 1 if $el->{'t'} eq '#LACUNA#';
+ return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
+ && $el->{'t'}->is_lacuna;
return 0;
}
$seen{$n->id} = 1;
my $next = $self->next_reading( $n, $witness, $backup );
- $DB::single = 1 if $next->id eq $end->id;
unless( $next ) {
warn "Did not find any path for $witness from reading " . $n->id;
last;
# Return the successor via the corresponding path.
my $self = shift;
my $answer = $self->_find_linked_reading( 'next', @_ );
+ return undef unless $answer;
return $self->reading( $answer );
}
if $base_le;
# Got this far? We have no appropriate path.
- warn "Could not find $direction node from " . $node->label
+ warn "Could not find $direction node from " . $node->id
. " along path $path";
return undef;
}
sub make_witness_paths {
my( $self ) = @_;
foreach my $wit ( $self->tradition->witnesses ) {
- print STDERR "Making path for " . $wit->sigil . "\n";
+ # print STDERR "Making path for " . $wit->sigil . "\n";
$self->make_witness_path( $wit );
}
}
foreach my $n ( $self->sequence->successors( $r->id ) ) {
my( $tfrom, $tto ) = ( $rel_containers{$r->id},
$rel_containers{$n} );
+ $DB::single = 1 unless $tfrom && $tto;
$topo_graph->add_edge( $tfrom, $tto );
}
}
my $key = $rdg->rank . "||" . $rdg->text;
if( exists $unique_rank_rdg{$key} ) {
# Combine!
- print STDERR "Combining readings at same rank: $key\n";
+ # print STDERR "Combining readings at same rank: $key\n";
$self->merge_readings( $unique_rank_rdg{$key}, $rdg );
} else {
$unique_rank_rdg{$key} = $rdg;
=over
-=item * Rationalize edge classes
-
-=item * Port the internal graph from Graph::Easy to Graph
+=item * Think about making Relationship objects again
=back