package Text::Tradition;
+use Module::Load;
use Moose;
use Text::Tradition::Collation;
use Text::Tradition::Witness;
is => 'rw',
isa => 'ArrayRef[Text::Tradition::Witness]',
handles => {
- all_options => 'elements',
- add_option => 'push',
- map_options => 'map',
- option_count => 'count',
- sorted_options => 'sort',
+ all => 'elements',
+ add => 'push',
},
+ default => sub { [] },
);
sub BUILD {
# TODO Now how to collate these?
}
} else {
- # Else we got passed args intended for the collator.
- $init_args->{'tradition'} = $self;
- $self->_save_collation( Text::Tradition::Collation->new( %$init_args ) );
- $self->witnesses( $self->collation->create_witnesses() );
+ # Else we need to parse some collation data. Make a Collation object
+ my $collation = Text::Tradition::Collation->new( %$init_args,
+ 'tradition' => $self );
+ $self->_save_collation( $collation );
+
+ # Call the appropriate parser on the given data
+ my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %$init_args );
+ my $format = shift( @formats );
+ unless( $format ) {
+ warn "No data given to create a collation; will initialize an empty one";
+ }
+ if( $format && $format =~ /^(CSV|CTE)$/ &&
+ !exists $init_args->{'base'} ) {
+ warn "Cannot make a collation from $format without a base text";
+ return;
+ }
+
+ # Starting point for all texts
+ my $last_node = $collation->add_reading( '#START#' );
+
+ # Now do the parsing.
+ my @sigla;
+ if( $format ) {
+ my @parseargs;
+ if( $format =~ /^(CSV|CTE)$/ ) {
+ @parseargs = ( 'base' => $init_args->{'base'},
+ 'data' => $init_args->{$format},
+ 'format' => $format );
+ $format = 'BaseText';
+ } else {
+ @parseargs = ( $init_args->{ $format } );
+ }
+ my $mod = "Text::Tradition::Parser::$format";
+ load( $mod );
+ $mod->can('parse')->( $self, @parseargs );
+ }
}
}
+sub add_witness {
+ my $self = shift;
+ my $new_wit = Text::Tradition::Witness->new( @_ );
+ push( @{$self->witnesses}, $new_wit );
+}
+
# The user will usually be instantiating a Tradition object, and
# examining its collation. The information about the tradition can
# come via several routes:
use Graph::Easy;
use IPC::Run qw( run binary );
-use Module::Load;
use Text::Tradition::Collation::Reading;
use Moose;
predicate => 'has_graphml',
);
+has 'wit_list_separator' => (
+ is => 'rw',
+ isa => 'Str',
+ default => ', ',
+ );
+
# The collation can be created two ways:
# 1. Collate a set of witnesses (with CollateX I guess) and process
# the results as in 2.
sub BUILD {
my( $self, $args ) = @_;
-
- # Call the appropriate parser on the given data
- my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %$args );
- my $format = shift( @formats );
- unless( $format ) {
- warn "No data given to create a graph; will initialize an empty one";
- }
- if( $format && $format =~ /^(CSV|CTE)$/ && !exists $args->{'base'} ) {
- warn "Cannot make a graph from $format without a base text";
- return;
- }
-
- # Initialize our graph object.
$self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
- $self->graph->set_attribute( 'node', 'shape', 'ellipse' );
- # Starting point for all texts
- my $last_node = $self->add_reading( '#START#' );
-
- # Now do the parsing.
- my @sigla;
- if( $format ) {
- my @parseargs;
- if( $format =~ /^(CSV|CTE)$/ ) {
- @parseargs = ( 'base' => $args->{'base'},
- 'data' => $args->{$format},
- 'format' => $format );
- $format = 'BaseText';
- } else {
- @parseargs = ( $args->{ $format } );
- }
- my $mod = "Text::Tradition::Parser::$format";
- load( $mod );
- # TODO parse needs to return witness IDs
- @sigla = $mod->can('parse')->( $self, @parseargs );
- }
- # Do we need to initialize the witnesses?
- unless( $args->{'have_witnesses'} ) {
- # initialize Witness objects for all our witnesses
- my @witnesses;
- foreach my $sigil ( @sigla ) {
- push( @witnesses, Text::Tradition::Witness->new( 'sigil' => $sigil ) );
- }
- $self->tradition->witnesses( \@witnesses );
- }
+ # Pass through any graph-specific options.
+ my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
+ $self->graph->set_attribute( 'node', 'shape', $shape );
}
# Wrappers around some methods
print $graph->as_svg( $recalculate );
Returns an SVG string that represents the graph. Uses GraphViz to do
-this, because Graph::Easy doesn't cope well with long graphs. Unless
+this, because Graph::Easy doesn\'t cope well with long graphs. Unless
$recalculate is passed (and is a true value), the method will return a
cached copy of the SVG after the first call to the method.
=cut
sub start {
- # Return the beginning node of the graph.
+ # Return the beginning reading of the graph.
my $self = shift;
my( $new_start ) = @_;
if( $new_start ) {
return $self->reading('#START#');
}
-=item B<next_word>
+=item B<next_reading>
-my $next_node = $graph->next_word( $node, $path );
+my $next_reading = $graph->next_reading( $reading, $witpath );
-Returns the node that follows the given node along the given witness
+Returns the reading that follows the given reading along the given witness
path. TODO These are badly named.
=cut
-sub next_word {
+sub next_reading {
# Return the successor via the corresponding edge.
my $self = shift;
- return $self->_find_linked_word( 'next', @_ );
+ return $self->_find_linked_reading( 'next', @_ );
}
-=item B<prior_word>
+=item B<prior_reading>
-my $prior_node = $graph->prior_word( $node, $path );
+my $prior_reading = $graph->prior_reading( $reading, $witpath );
-Returns the node that precedes the given node along the given witness
+Returns the reading that precedes the given reading along the given witness
path. TODO These are badly named.
=cut
-sub prior_word {
+sub prior_reading {
# Return the predecessor via the corresponding edge.
my $self = shift;
- return $self->_find_linked_word( 'prior', @_ );
+ return $self->_find_linked_reading( 'prior', @_ );
}
-sub _find_linked_word {
+sub _find_linked_reading {
my( $self, $direction, $node, $edge ) = @_;
$edge = 'base text' unless $edge;
my @linked_edges = $direction eq 'next'
# We have to find the linked edge that contains all of the
# witnesses supplied in $edge.
- my @edge_wits = split( /, /, $edge );
+ my @edge_wits = $self->witnesses_of_label( $edge );
foreach my $le ( @linked_edges ) {
- my @le_wits = split( /, /, $le->name() );
+ my @le_wits = $self->witnesses_of_label( $le->name );
if( _is_within( \@edge_wits, \@le_wits ) ) {
# This is the right edge.
return $direction eq 'next' ? $le->to() : $le->from();
return undef;
}
-sub create_witnesses {
- # TODO Given a new collation, make a bunch of Witness objects.
+# Some set logic.
+sub _is_within {
+ my( $set1, $set2 ) = @_;
+ my $ret = 1;
+ foreach my $el ( @$set1 ) {
+ $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
+ }
+ return $ret;
+}
+
+# Walk the paths for each witness in the graph, and return the nodes
+# that the graph has in common.
+
+sub walk_witness_paths {
+ my( $self, $end ) = @_;
+ # For each witness, walk the path through the graph.
+ # Then we need to find the common nodes.
+ # TODO This method is going to fall down if we have a very gappy
+ # text in the collation.
+ my $paths = {};
+ my @common_nodes;
+ foreach my $wit ( @{$self->tradition->witnesses} ) {
+ my $curr_reading = $self->start;
+ my @wit_path = ( $curr_reading );
+ my %seen_readings;
+ # TODO Detect loops at some point
+ while( $curr_reading->name ne $end->name ) {
+ if( $seen_readings{$curr_reading->name} ) {
+ warn "Detected loop walking path for witness " . $wit->sigil
+ . " at reading " . $curr_reading->name;
+ last;
+ }
+ my $next_reading = $self->next_reading( $curr_reading,
+ $wit->sigil );
+ push( @wit_path, $next_reading );
+ $seen_readings{$curr_reading->name} = 1;
+ $curr_reading = $next_reading;
+ }
+ $wit->path( \@wit_path );
+ if( @common_nodes ) {
+ my @cn;
+ foreach my $n ( @wit_path ) {
+ push( @cn, $n ) if grep { $_ eq $n } @common_nodes;
+ }
+ @common_nodes = ();
+ push( @common_nodes, @cn );
+ } else {
+ push( @common_nodes, @wit_path );
+ }
+ }
+
+ # Mark all the nodes as either common or not.
+ foreach my $cn ( @common_nodes ) {
+ print STDERR "Setting " . $cn->name . " as common node\n";
+ $cn->make_common;
+ }
+ foreach my $n ( $self->readings() ) {
+ $n->make_variant unless $n->is_common;
+ }
+}
+
+sub common_readings {
+ my $self = shift;
+ my @common = grep { $_->is_common } $self->readings();
+ return @common;
+}
+
+# Calculate the relative positions of nodes in the graph, if they
+# were not given to us.
+sub calculate_positions {
+ my $self = shift;
+
+ # We have to calculate the position identifiers for each word,
+ # keyed on the common nodes. This will be 'fun'. The end result
+ # is a hash per witness, whose key is the word node and whose
+ # value is its position in the text. Common nodes are always N,1
+ # so have identical positions in each text.
+ my @common = $self->common_readings();
+
+ my $node_pos = {};
+ foreach my $wit ( @{$self->tradition->witnesses} ) {
+ # First we walk each path, making a matrix for each witness that
+ # corresponds to its eventual position identifier. Common nodes
+ # always start a new row, and are thus always in the first column.
+
+ my $wit_matrix = [];
+ my $cn = 0; # We should hit the common readings in order.
+ my $row = [];
+ foreach my $wn ( @{$wit->path} ) {
+ if( $wn eq $common[$cn] ) {
+ # Set up to look for the next common node, and
+ # start a new row of words.
+ $cn++;
+ push( @$wit_matrix, $row ) if scalar( @$row );
+ $row = [];
+ }
+ push( @$row, $wn );
+ }
+ push( @$wit_matrix, $row ); # Push the last row onto the matrix
+
+ # Now we have a matrix per witness, so that each row in the
+ # matrix begins with a common node, and continues with all the
+ # variant words that appear in the witness. We turn this into
+ # real positions in row,cell format. But we need some
+ # trickery in order to make sure that each node gets assigned
+ # to only one position.
+
+ foreach my $li ( 1..scalar(@$wit_matrix) ) {
+ foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
+ my $reading = $wit_matrix->[$li-1]->[$di-1];
+ my $position = "$li,$di";
+ # If we have seen this node before, we need to compare
+ # its position with what went before.
+ unless( $reading->has_position &&
+ _cmp_position( $position, $reading->position ) < 1 ) {
+ # The new position ID replaces the old one.
+ $reading->position( $position );
+ } # otherwise, the old position needs to stay.
+ }
+ }
+ }
+}
+
+sub _cmp_position {
+ my( $a, $b ) = @_;
+ my @pos_a = split(/,/, $a );
+ my @pos_b = split(/,/, $b );
- return [];
+ my $big_cmp = $pos_a[0] <=> $pos_b[0];
+ return $big_cmp if $big_cmp;
+ # else
+ return $pos_a[1] <=> $pos_b[1];
}
+
+# Return the string that joins together a list of witnesses for
+# display on a single path.
+sub path_label {
+ my $self = shift;
+ return join( $self->wit_list_separator, @_ );
+}
+
+sub witnesses_of_label {
+ my $self = shift;
+ my $regex = $self->wit_list_separator;
+ return split( /^\Q$regex\E$/, @_ );
+}
no Moose;
__PACKAGE__->meta->make_immutable;
has 'position' => (
is => 'rw',
isa => 'Position',
+ predicate => 'has_position',
);
# This contains an array of reading objects; the array is a pool,
$self->relationships->{ $rel } = $value;
}
+sub is_common {
+ my( $self ) = shift;
+ return $self->get_attribute( 'class' ) eq 'common';
+}
+
+sub make_common {
+ my( $self ) = shift;
+ $self->set_attribute( 'class', 'common' );
+}
+
+sub make_variant {
+ my( $self ) = shift;
+ $self->set_attribute( 'class', 'variant' );
+}
+
no Moose;
__PACKAGE__->meta->make_immutable;
=cut
+use vars qw/ $xpc %nodedata /;
+
sub parse {
- my( $collation, $graphml_str ) = @_;
+ my( $tradition, $graphml_str ) = @_;
+ my $collation = $tradition->collation;
my $parser = XML::LibXML->new();
my $doc = $parser->parse_string( $graphml_str );
my $graphml = $doc->documentElement();
- my $xpc = XML::LibXML::XPathContext->new( $graphml );
+ $xpc = XML::LibXML::XPathContext->new( $graphml );
$xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' );
# First get the ID keys, for witnesses and for collation data
- my %nodedata;
my %witnesses;
foreach my $k ( $xpc->findnodes( '//g:key' ) ) {
# Each key has a 'for' attribute; the edge keys are witnesses, and
# the node keys contain an ID and string for each node.
if( $k->getAttribute( 'for' ) eq 'node' ) {
+ # The node data keys we expect are:
+ # 'number' -> unique node identifier
+ # 'token' -> reading for the node
+ # 'identical' -> the node of which this node is
+ # a transposed version
+ # 'position' -> a calculated position for the node
$nodedata{ $k->getAttribute( 'attr.name' ) } = $k->getAttribute( 'id' );
} else {
$witnesses{ $k->getAttribute( 'id' ) } = $k->getAttribute( 'attr.name' );
}
}
+ my $has_explicit_positions = defined $nodedata{'position'};
+
+ # Add the witnesses that we have found
+ foreach my $wit ( values %witnesses ) {
+ $tradition->add_witness( 'sigil' => $wit );
+ }
+
my $graph_el = $xpc->find( '/g:graphml/g:graph' )->[0];
# Add the nodes to the graph. First delete the start node, because
my $extra_data = {};
my @nodes = $xpc->findnodes( '//g:node' );
foreach my $n ( @nodes ) {
- my $lookup_xpath = './g:data[@key="%s"]/child::text()';
- my $id = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{'number'} ), $n );
- my $label = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{'token'} ), $n );
+ my $id = _lookup_node_data( $n, 'number' );
+ my $label = _lookup_node_data( $n, 'token' );
my $gnode = $collation->add_reading( $id );
$node_name{ $n->getAttribute('id') } = $id;
$gnode->set_attribute( 'label', $label );
- # Now get the rest of the data
+ # Now get the rest of the data, i.e. not the ID or label
my $extra = {};
- my @keys = grep { $_ !~ /^(number|token)$/ } keys( %nodedata );
- foreach my $k ( @keys ) {
- my $data = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{ $k } ), $n );
- $extra->{ $k } = $data;
+ foreach my $k ( keys %nodedata ) {
+ next if $k =~ /^(number|token)$/;
+ $extra->{ $k } = _lookup_node_data( $n, $k );
}
$extra_data->{ $id } = $extra;
}
# Label according to the witnesses present.
my @wit_ids = $xpc->findnodes( './g:data/attribute::key', $e );
my @wit_names = map { $witnesses{ $_->getValue() } } @wit_ids;
- my $label = join( ', ', @wit_names );
-
+ my $label = $collation->path_label( @wit_names );
$collation->add_path( $from, $to, $label );
}
$node_name{ $xpc->findvalue( $id_xpath, $tn ) } ) );
}
-
# Find the beginning and end nodes of the graph. The beginning node
# has no incoming edges; the end node has no outgoing edges.
my( $begin_node, $end_node );
foreach my $gnode ( $collation->readings() ) {
- print STDERR "Checking node " . $gnode->name . "\n";
+ # print STDERR "Checking node " . $gnode->name . "\n";
my @outgoing = $gnode->outgoing();
my @incoming = $gnode->incoming();
unless( scalar @incoming ) {
warn "Already have a beginning node" if $begin_node;
- my $node_xml_id = $node_id{ $gnode->name() };
- my @bn = $xpc->findnodes( '//g:node[@id="' . $node_xml_id . '"]' );
- warn "XPath did not find a node for id $node_xml_id"
- unless scalar @bn;
- $begin_node = $bn[0];
+ $begin_node = $gnode;
$collation->start( $gnode );
- $node_name{ $begin_node->getAttribute( 'id' ) } = '#START#';
- $node_id{'#START#'} = $begin_node->getAttribute( 'id' );
}
unless( scalar @outgoing ) {
warn "Already have an ending node" if $end_node;
- my $node_xml_id = $node_id{ $gnode->name() };
- my @bn = $xpc->findnodes( '//g:node[@id="' . $node_xml_id . '"]' );
- warn "XPath did not find a node for id $node_xml_id"
- unless scalar @bn;
- $end_node = $bn[0];
+ $end_node = $gnode;
}
}
- # Now for each witness, walk the path through the graph.
- # Then we need to find the common nodes.
- # TODO This method is going to fall down if we have a very gappy
- # text in the collation.
- # TODO think about whether it makes more sense to do this in the
- # XML or in the graph. Right now it's the XML.
- my $paths = {};
- my @common_nodes;
- foreach my $wit ( keys %witnesses ) {
- my $node_id = $begin_node->getAttribute('id');
- my @wit_path = ( $node_name{ $node_id } );
- # TODO Detect loops at some point
- while( $node_id ne $end_node->getAttribute('id') ) {
- # Find the node which is the target of the edge whose
- # source is $node_id and applies to this witness.
- my $xpath_expr = '//g:edge[child::g:data[@key="'
- . $wit . '"] and attribute::source="'
- . $node_id . '"]';
- my $next_edge = $xpc->find( $xpath_expr, $graph_el )->[0];
- print STDERR " - at $wit / $node_id\n";
- $node_id = $next_edge->getAttribute('target');
- push( @wit_path, $node_name{ $node_id } );
- }
- $paths->{ $witnesses{ $wit }} = \@wit_path;
- if( @common_nodes ) {
- my @cn;
- foreach my $n ( @wit_path) {
- push( @cn, $n ) if grep { $_ eq $n } @common_nodes;
- }
- @common_nodes = ();
- push( @common_nodes, @cn );
- } else {
- push( @common_nodes, @wit_path );
+ $collation->walk_witness_paths( $end_node );
+ # Now we have added the witnesses and their paths, so have also
+ # implicitly marked the common nodes. Now we can calculate their
+ # explicit permissions. This is separate because it won't always
+ # be necessary with the GraphML parsing.
+ $collation->calculate_positions() unless $has_explicit_positions;
+ if( $has_explicit_positions ) {
+ # Record the positions that came with each graph node.
+ # TODO we really need to translate these into our own style of
+ # position identifier. That's why we defer this until now.
+ foreach my $node_id ( keys %$extra_data ) {
+ my $pos = $extra_data->{$node_id}->{'position'};
+ $collation->reading( $node_name{$node_id} )->position( $pos );
}
+ } else {
+ # Calculate a position for each graph node.
+ $collation->calculate_positions();
}
+}
- # Mark all the nodes as either common or not.
- foreach my $cn ( @common_nodes ) {
- print STDERR "Setting $cn as common node\n";
- $collation->reading( $cn )->set_attribute( 'class', 'common' );
- }
- foreach my $n ( $collation->readings() ) {
- $n->set_attribute( 'class', 'variant' )
- unless $n->get_attribute( 'class' ) eq 'common';
- }
-
- # Now calculate graph positions.
- # $collation->make_positions( \@common_nodes, $paths );
-
+sub _lookup_node_data {
+ my( $xmlnode, $key ) = @_;
+ my $lookup_xpath = './g:data[@key="%s"]/child::text()';
+ my $data = $xpc->findvalue( sprintf( $lookup_xpath, $nodedata{$key} ),
+ $xmlnode );
+ return $data;
}
=back
predicate => 'has_source',
);
+has 'path' => (
+ is => 'rw',
+ isa => 'ArrayRef[Text::Tradition::Collation::Reading]',
+ );
+
sub BUILD {
my $self = shift;
if( $self->has_source ) {