no Moose;
__PACKAGE__->meta->make_immutable;
+
+=head1 BUGS / TODO
+
+=over
+
+=item * Rationalize edge classes
+
+=item * Port the internal graph from Graph::Easy to Graph
+
+=back
use strict;
use warnings;
-use Text::Tradition::Parser::GraphML;
+use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /;
=head1 NAME
Text::Tradition::Parser::CollateX
+=head1 SYNOPSIS
+
+ use Text::Tradition;
+
+ my $t_from_file = Text::Tradition->new(
+ 'name' => 'my text',
+ 'input' => 'CollateX',
+ 'file' => '/path/to/collation.xml'
+ );
+
+ my $t_from_string = Text::Tradition->new(
+ 'name' => 'my text',
+ 'input' => 'CollateX',
+ 'string' => $collation_xml,
+ );
+
=head1 DESCRIPTION
Parser module for Text::Tradition, given a GraphML file from the
=head1 METHODS
-=over
-
-=item B<parse>
+=head2 B<parse>
parse( $tradition, $init_options );
-Takes an initialized Text::Tradition::Graph object and its initialization
-options, including the data source; creates the appropriate nodes and edges
-on the graph.
+Takes an initialized Text::Tradition object and a set of options; creates
+the appropriate nodes and edges on the graph. The options hash should
+include either a 'file' argument or a 'string' argument, depending on the
+source of the XML to be parsed.
+
+=begin testing
+
+use Text::Tradition;
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+eval { no warnings; binmode $DB::OUT, ":utf8"; };
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'CollateX',
+ 'file' => $cxfile,
+ );
+
+is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
+if( $t ) {
+ is( scalar $t->collation->readings, 26, "Collation has all readings" );
+ is( scalar $t->collation->paths, 49, "Collation has all paths" );
+ is( scalar $t->witnesses, 3, "Collation has all witnesses" );
+
+ # Check an 'identical' node
+ my $transposed = $t->collation->reading( 'n15' );
+ ok( $transposed->has_primary, "Reading links to transposed primary" );
+ is( $transposed->primary->name, 'n17', "Correct transposition link" );
+}
+
+=end testing
=cut
sub parse {
my( $tradition, $opts ) = @_;
- my $graph_data = Text::Tradition::Parser::GraphML::parse( $opts );
+ my $graph_data = graphml_parse( $opts );
my $collation = $tradition->collation;
my %witnesses; # Keep track of the witnesses we encounter as we
# run through the graph data.
}
}
- # TODO Need to populate $wit->path / uncorrected_path
+ # Set the $witness->path arrays for each wit.
+ populate_witness_path( $tradition );
- # Now we have added the witnesses and their paths, so we can
- # calculate their explicit positions.
- # TODO CollateX does this, and we should just have it exported there.
+ # Rank the readings.
$collation->calculate_ranks();
}
+=head1 BUGS / TODO
+
+=over
+
+=item * Make this into a stream parser with GraphML
+
+=item * Use CollateX-calculated ranks instead of recalculating our own
+
=back
=head1 LICENSE
use strict;
use warnings;
+use Exporter 'import';
+use vars qw/ @EXPORT_OK $xpc /;
+
use XML::LibXML;
use XML::LibXML::XPathContext;
+@EXPORT_OK = qw/ graphml_parse populate_witness_path /;
+
=head1 NAME
Text::Tradition::Parser::GraphML
=head1 METHODS
-=over
-
-=item B<parse>
+=head2 B<graphml_parse>( $init_opts )
parse( $init_opts );
=cut
-use vars qw/ $xpc $graphattr $nodedata $witnesses /;
-
# Return graph -> nodeid -> { key1/val1, key2/val2, key3/val3 ... }
# -> edgeid -> { source, target, wit1/val1, wit2/val2 ...}
-sub parse {
+sub graphml_parse {
my( $opts ) = @_;
my $graph_hash = { 'nodes' => [],
'edges' => [] };
-
+
my $parser = XML::LibXML->new();
my $doc;
if( exists $opts->{'string'} ) {
return;
}
+ my( $graphattr, $nodedata, $witnesses ) = ( {}, {}, {} );
my $graphml = $doc->documentElement();
$xpc = XML::LibXML::XPathContext->new( $graphml );
$xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' );
return $graph_hash;
}
+=head2 B<populate_witness_path>( $tradition )
+
+Given a tradition, populate the 'path' and 'uncorrected_path' attributes
+of all of its witnesses. Useful for all formats based on the graph itself.
+
+=cut
+
+sub populate_witness_path {
+ my ( $tradition, $ante_corr ) = @_;
+ my $c = $tradition->collation;
+ print STDERR "Walking paths for witnesses\n";
+ foreach my $wit ( $tradition->witnesses ) {
+ my @path = $c->reading_sequence( $c->start, $c->end, $wit->sigil );
+ $wit->path( \@path );
+ if( $ante_corr->{$wit->sigil} ) {
+ # Get the uncorrected path too
+ my @uc = $c->reading_sequence( $c->start, $c->end,
+ $wit->sigil . $c->ac_label, $wit->sigil );
+ $wit->uncorrected_path( \@uc );
+ }
+ }
+}
+
sub _lookup_node_data {
my( $xmlnode, $key ) = @_;
my $lookup_xpath = './g:data[@key="%s"]/child::text()';
=head1 NAME
-Text::Tradition::Parser::CSV
+Text::Tradition::Parser::KUL
=head1 DESCRIPTION
Parser module for Text::Tradition, given a list of variants as a CSV
file and a reference text as a plaintext file with appropriate line
-breaks.
+breaks. The CSV file is a specialized format developed at KU Leuven,
+and other formats are vastly preferred.
=head1 METHODS
-=over
-
-=item B<read>
+=head2 B<read>
my @apparatus = read( $csv_file );
return @app_list;
}
-=back
-
=head1 LICENSE
This package is free software and is provided "as is" without express
use strict;
use warnings;
-use Text::Tradition::Parser::GraphML;
+use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /;
=head1 NAME
Text::Tradition::Parser::GraphML
+=head1 SYNOPSIS
+
+ use Text::Tradition;
+
+ my $t_from_file = Text::Tradition->new(
+ 'name' => 'my text',
+ 'input' => 'Self',
+ 'file' => '/path/to/tradition.xml'
+ );
+
+ my $t_from_string = Text::Tradition->new(
+ 'name' => 'my text',
+ 'input' => 'Self',
+ 'string' => $tradition_xml,
+ );
+
=head1 DESCRIPTION
Parser module for Text::Tradition to read in its own GraphML output format.
-TODO document what this format is.
+GraphML is a relatively simple graph description language; a 'graph' element
+can have 'node' and 'edge' elements, and each of these can have simple 'data'
+elements for attributes to be saved.
-=head1 METHODS
+The graph itself has attributes as in the Collation object:
+
+=over
+
+=item * linear
+
+=item * ac_label
+
+=item * baselabel
+
+=item * wit_list_separator
+
+=back
+
+The node objects have the following attributes:
=over
-=item B<parse>
+=item * name
+
+=item * reading
+
+=item * identical
+
+=item * rank
+
+=item * class
+
+=back
+
+The edge objects have the following attributes:
+
+=over
+
+=item * class
+
+=item * witness (for 'path' class edges)
+
+=item * extra (for 'path' class edges)
+
+=item * relationship (for 'relationship' class edges)
+
+=item * equal_rank (for 'relationship' class edges)
-parse( $graph, $graphml_string );
+=item * non_correctable (for 'relationship' class edges)
-Takes an initialized Text::Tradition::Graph object and a string
-containing the GraphML; creates the appropriate nodes and edges on the
-graph.
+=item * non_independent (for 'relationship' class edges)
+
+=back
+
+=head1 METHODS
+
+=head2 B<parse>
+
+parse( $graph, $opts );
+
+Takes an initialized Text::Tradition object and a set of options; creates
+the appropriate nodes and edges on the graph. The options hash should
+include either a 'file' argument or a 'string' argument, depending on the
+source of the XML to be parsed.
+
+=begin testing
+
+use Text::Tradition;
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+eval { no warnings; binmode $DB::OUT, ":utf8"; };
+
+my $tradition = 't/data/florilegium_graphml.xml';
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'Self',
+ 'file' => $tradition,
+ );
+
+is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
+if( $t ) {
+ is( scalar $t->collation->readings, 319, "Collation has all readings" );
+ is( scalar $t->collation->paths, 2854, "Collation has all paths" );
+ is( scalar $t->witnesses, 13, "Collation has all witnesses" );
+}
+
+=end testing
=cut
-# TODO share these with Collation.pm somehow
my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
$SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY )
= qw/ name reading identical rank class
sub parse {
my( $tradition, $opts ) = @_;
- my $graph_data = Text::Tradition::Parser::GraphML::parse( $opts );
+ my $graph_data = graphml_parse( $opts );
- # TODO this is begging for stream parsing instead of multiple loops.
-
my $collation = $tradition->collation;
my %witnesses;
# Set up the graph-global attributes. They will appear in the
# hash under their accessor names.
- # TODO Consider simplifying this for nodes & edges as well.
print STDERR "Setting graph globals\n";
foreach my $gkey ( keys %{$graph_data->{'attr'}} ) {
my $val = $graph_data->{'attr'}->{$gkey};
}
# Add the nodes to the graph.
- # TODO Are we adding extra start/end nodes?
my $extra_data = {}; # Keep track of data that needs to be processed
# after the nodes & edges are created.
# Create the node. Current valid classes are common and meta.
# Everything else is a normal reading.
- ## TODO RATIONALIZE THESE CLASSES
my $gnode = $collation->add_reading( $nodeid );
$gnode->text( $reading );
$gnode->make_common if $class eq 'common';
# Now add the edges.
print STDERR "Adding graph edges\n";
+ my $has_ante_corr = {};
foreach my $e ( @{$graph_data->{'edges'}} ) {
my $from = $e->{$SOURCE_KEY};
my $to = $e->{$TARGET_KEY};
$tradition->add_witness( sigil => $wit );
$witnesses{$wit} = 1;
}
- $witnesses{$wit} = 2 if $extra;
+ $has_ante_corr->{$wit} = 1 if $extra;
} elsif( $class eq 'relationship' ) {
# We need the metadata about the relationship.
my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
}
# Set the $witness->path arrays for each wit.
- print STDERR "Walking paths for witnesses\n";
- foreach my $wit ( $tradition->witnesses ) {
- my @path = $collation->reading_sequence( $collation->start, $collation->end,
- $wit->sigil );
- $wit->path( \@path );
- if( $witnesses{$wit->sigil} == 2 ) {
- # Get the uncorrected path too
- my @uc = $collation->reading_sequence( $collation->start, $collation->end,
- $wit->sigil . $collation->ac_label, $wit->sigil );
- $wit->uncorrected_path( \@uc );
- }
- }
+ populate_witness_path( $tradition, $has_ante_corr );
}
+1;
+
+=head1 BUGS / TODO
+
+=over
+
+=item * Make this into a stream parser with GraphML
+
+=item * Simply field -> attribute correspondence for nodes and edges
+
+=item * Share key name constants with Collation.pm
+
=back
=head1 LICENSE
=head1 AUTHOR
-Tara L Andrews, aurum@cpan.org
-
-=cut
-
-1;
+Tara L Andrews E<lt>aurum@cpan.orgE<gt>
=head1 METHODS
-=over
-
-=item B<parse>( $tradition, $option_hash )
+=head2 B<parse>( $tradition, $option_hash )
Takes an initialized tradition and a set of options; creates the
appropriate nodes and edges on the graph, as well as the appropriate
return $real;
}
-=begin testing
-
## TODO test specific sorts of nodes of the parallel-seg XML.
-=end testing
-
## Recursive helper function to help us navigate through nested XML,
## picking out the text. $tradition is the tradition, needed for
## making readings; $xn is the XML node currently being looked at,
=item * More unit testing
+=item * Handle special designations apart from a.c.
+
+=item * Mark common nodes within collated variants
+
=back
=head1 LICENSE
=head1 METHODS
-=over
-
-=item B<parse>( $tradition, $option_hash )
+=head2 B<parse>( $tradition, $option_hash )
Takes an initialized tradition and a set of options; creates the
appropriate nodes and edges on the graph, as well as the appropriate
@readings are Text::Tradition::Collation::Reading objects that appear
on the collation graph.
-TODO: Handle collapsed and non-collapsed transpositions.
-
=cut
sub collate_variants {
--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Test::More 'no_plan';
+$| = 1;
+
+
+
+# =begin testing
+{
+use Text::Tradition;
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+eval { no warnings; binmode $DB::OUT, ":utf8"; };
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'CollateX',
+ 'file' => $cxfile,
+ );
+
+is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
+if( $t ) {
+ is( scalar $t->collation->readings, 26, "Collation has all readings" );
+ is( scalar $t->collation->paths, 49, "Collation has all paths" );
+ is( scalar $t->witnesses, 3, "Collation has all witnesses" );
+
+ # Check an 'identical' node
+ my $transposed = $t->collation->reading( 'n15' );
+ ok( $transposed->has_primary, "Reading links to transposed primary" );
+ is( $transposed->primary->name, 'n17', "Correct transposition link" );
+}
+}
+
+
+
+
+1;
--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Test::More 'no_plan';
+$| = 1;
+
+
+
+# =begin testing
+{
+use Text::Tradition;
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+eval { no warnings; binmode $DB::OUT, ":utf8"; };
+
+my $tradition = 't/data/florilegium_graphml.xml';
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'Self',
+ 'file' => $tradition,
+ );
+
+is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
+if( $t ) {
+ is( scalar $t->collation->readings, 319, "Collation has all readings" );
+ is( scalar $t->collation->paths, 2854, "Collation has all paths" );
+ is( scalar $t->witnesses, 13, "Collation has all witnesses" );
+}
+}
+
+
+
+
+1;
# =begin testing
{
-## TODO test specific sorts of nodes of the parallel-seg XML.
-}
-
-
-
-# =begin testing
-{
use XML::LibXML;
use XML::LibXML::XPathContext;
use Text::Tradition::Parser::TEI;