use strict;
use warnings;
-use Text::Tradition::Parser::GraphML;
+use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
=head1 NAME
Text::Tradition::Parser::CollateX
+=head1 SYNOPSIS
+
+ use Text::Tradition;
+
+ my $t_from_file = Text::Tradition->new(
+ 'name' => 'my text',
+ 'input' => 'CollateX',
+ 'file' => '/path/to/collation.xml'
+ );
+
+ my $t_from_string = Text::Tradition->new(
+ 'name' => 'my text',
+ 'input' => 'CollateX',
+ 'string' => $collation_xml,
+ );
+
=head1 DESCRIPTION
Parser module for Text::Tradition, given a GraphML file from the
=head1 METHODS
-=over
+=head2 B<parse>
+
+parse( $tradition, $init_options );
+
+Takes an initialized Text::Tradition object and a set of options; creates
+the appropriate nodes and edges on the graph. The options hash should
+include either a 'file' argument or a 'string' argument, depending on the
+source of the XML to be parsed.
-=item B<parse>
+=begin testing
-parse( $graph, $graphml_string );
+use Text::Tradition;
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+eval { no warnings; binmode $DB::OUT, ":utf8"; };
-Takes an initialized Text::Tradition::Graph object and a string
-containing the GraphML; creates the appropriate nodes and edges on the
-graph.
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'CollateX',
+ 'file' => $cxfile,
+ );
+
+is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
+if( $t ) {
+ is( scalar $t->collation->readings, 26, "Collation has all readings" );
+ is( scalar $t->collation->paths, 49, "Collation has all paths" );
+ is( scalar $t->witnesses, 3, "Collation has all witnesses" );
+
+ # Check an 'identical' node
+ my $transposed = $t->collation->reading( 'n15' );
+ ok( $transposed->has_primary, "Reading links to transposed primary" );
+ is( $transposed->primary->name, 'n17', "Correct transposition link" );
+}
+
+=end testing
=cut
my $TRANSKEY = 'identical';
sub parse {
- my( $tradition, $graphml_str ) = @_;
- my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str );
+ my( $tradition, $opts ) = @_;
+ my $graph_data = graphml_parse( $opts );
my $collation = $tradition->collation;
- my %witnesses; # Keep track of the witnesses we encounter as we
- # run through the graph data.
-
- # Add the nodes to the graph. First delete the start node, because
- # GraphML graphs will have their own start nodes.
- $collation->del_reading( $collation->start() );
- $collation->del_reading( $collation->end() );
+ # First add the readings to the graph.
my $extra_data = {}; # Keep track of info to be processed after all
# nodes have been created
foreach my $n ( @{$graph_data->{'nodes'}} ) {
- my %node_data = %$n;
- my $nodeid = delete $node_data{$IDKEY};
- my $token = delete $node_data{$CONTENTKEY};
- unless( defined $nodeid && defined $token ) {
+ unless( defined $n->{$IDKEY} && defined $n->{$CONTENTKEY} ) {
warn "Did not find an ID or token for graph node, can't add it";
next;
}
- my $gnode = $collation->add_reading( $nodeid );
- $gnode->text( $token );
-
- # Whatever is left is extra info to be processed later.
+ my %node_data = %$n;
+ my $gnode_args = {
+ 'collation' => $collation,
+ 'id' => delete $node_data{$IDKEY},
+ 'text' => delete $node_data{$CONTENTKEY},
+ };
+ my $gnode = $collation->add_reading( $gnode_args );
+
+ # Whatever is left is extra info to be processed later,
+ # e.g. a transposition link.
if( keys %node_data ) {
- $extra_data->{$nodeid} = \%node_data;
+ $extra_data->{$gnode->id} = \%node_data;
}
}
- # Now add the edges.
+ # Now add the path edges.
foreach my $e ( @{$graph_data->{'edges'}} ) {
my %edge_data = %$e;
my $from = delete $edge_data{'source'};
foreach my $ekey ( keys %edge_data ) {
my $wit = $edge_data{$ekey};
# Create the witness object if it does not yet exist.
- unless( $witnesses{$wit} ) {
+ unless( $tradition->witness( $wit ) ) {
$tradition->add_witness( 'sigil' => $wit );
- $witnesses{$wit} = 1;
}
$collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $wit );
}
foreach my $nodeid ( keys %$extra_data ) {
my $ed = $extra_data->{$nodeid};
if( exists $ed->{$TRANSKEY} ) {
-
my $tn_reading = $collation->reading( $nodeid );
my $main_reading = $collation->reading( $ed->{$TRANSKEY} );
if( $collation->linear ) {
- $tn_reading->set_identical( $main_reading );
+ $collation->add_relationship( $tn_reading, $main_reading,
+ { type => 'transposition' } );
} else {
$collation->merge_readings( $main_reading, $tn_reading );
}
# Find the beginning and end nodes of the graph. The beginning node
# has no incoming edges; the end node has no outgoing edges.
my( $begin_node, $end_node );
- foreach my $gnode ( $collation->readings() ) {
- # print STDERR "Checking node " . $gnode->name . "\n";
- my @outgoing = $gnode->outgoing();
- my @incoming = $gnode->incoming();
-
- unless( scalar @incoming ) {
- warn "Already have a beginning node" if $begin_node;
- $begin_node = $gnode;
- $collation->start( $gnode );
- }
- unless( scalar @outgoing ) {
- warn "Already have an ending node" if $end_node;
- $end_node = $gnode;
- $collation->end( $gnode );
- }
+ my @starts = $collation->sequence->source_vertices();
+ my @ends = $collation->sequence->sink_vertices();
+ if( @starts != 1 ) {
+ warn "Found more or less than one start vertex: @starts";
+ } else {
+ $collation->merge_readings( $collation->start, @starts );
+ }
+ if( @ends != 1 ) {
+ warn "Found more or less than one end vertex: @ends";
+ } else {
+ $collation->merge_readings( $collation->end, @ends );
}
- # TODO Need to populate $wit->path / uncorrected_path
-
- # Now we have added the witnesses and their paths, so we can
- # calculate their explicit positions.
- # TODO CollateX does this, and we should just have it exported there.
- $collation->calculate_ranks();
+ # Rank the readings.
+ $collation->calculate_ranks() if $collation->linear;
}
+=head1 BUGS / TODO
+
+=over
+
+=item * Make this into a stream parser with GraphML
+
+=item * Use CollateX-calculated ranks instead of recalculating our own
+
=back
=head1 LICENSE