AAAAAAGkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAA
AAAAAADKPI0jSCsAAAAILogRbWFrZV90cmFkaXRpb24u
cGwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
- AAAAAAAAAAAAAAAAAAAAACJK7cp/KfxURVhUAAAAAP//
+ AAAAAAAAAAAAAAAAAAAAADA/68quyjNURVhUAAAAAP//
//8AAAkgAAAAAAAAAAAAAAAAAAAADHN0ZW1tYXRvbG9n
- eQAQAAgAAMo8cQMAAAARAAgAAMp/DdwAAAABABAACC6I
+ eQAQAAgAAMo8cQMAAAARAAgAAMqurhMAAAABABAACC6I
AAckkwAFBAYAAL8xAAIAQk1hY2ludG9zaCBIRDpVc2Vy
czoAdGxhOgBQcm9qZWN0czoAc3RlbW1hdG9sb2d5OgBt
YWtlX3RyYWRpdGlvbi5wbAAOACQAEQBtAGEAawBlAF8A
$self->_save_collation( $collation );
# Call the appropriate parser on the given data
- my @formats = grep { /^(Self|CollateX|CSV|CTE|KUL|TEI|Tabular)$/ } keys( %$init_args );
- my $format = shift( @formats );
+ my @format_standalone = qw/ Self CollateX CSV CTE TEI Tabular /;
+ my @format_basetext = qw/ KUL /;
+ my $use_base;
+ my $format = $init_args->{'input'};
unless( $format ) {
warn "No data given to create a collation; will initialize an empty one";
}
- if( $format && $format =~ /^(KUL)$/ &&
- !exists $init_args->{'base'} ) {
- warn "Cannot make a collation from $format without a base text";
+ if( $format && !( grep { $_ eq $format } @format_standalone )
+ && !( grep { $_ eq $format } @format_basetext ) ) {
+ warn "Unrecognized input format $format; not parsing";
return;
}
+ if( $format && grep { $_ eq $format } @format_basetext ) {
+ $use_base = 1;
+ if( !exists $init_args->{'base'} ) {
+ warn "Cannot make a collation from $format without a base text";
+ return;
+ }
+ }
# Now do the parsing.
- my @sigla;
if( $format ) {
- my @parseargs;
- if( $format =~ /^(KUL)$/ ) {
- $init_args->{'data'} = $init_args->{$format};
- $init_args->{'format'} = $format;
- $format = 'BaseText';
- @parseargs = %$init_args;
- } else {
- @parseargs = ( $init_args->{ $format } );
+ if( $use_base ) {
+ $format = 'BaseText'; # Use the BaseText module for parsing,
+ # but retain the original input arg.
}
my $mod = "Text::Tradition::Parser::$format";
load( $mod );
- $mod->can('parse')->( $self, @parseargs );
+ $mod->can('parse')->( $self, $init_args );
}
}
}
=cut
sub parse {
- my( $tradition, %opts ) = @_;
+ my( $tradition, $opts ) = @_;
- my $format_mod = 'Text::Tradition::Parser::' . $opts{'format'};
+ my $format_mod = 'Text::Tradition::Parser::' . $opts->{'input'};
load( $format_mod );
- my @apparatus_entries = $format_mod->can('read')->( $opts{'data'} );
- merge_base( $tradition->collation, $opts{'base'}, @apparatus_entries );
+ my @apparatus_entries = $format_mod->can('read')->( $opts->{'file'} );
+ merge_base( $tradition->collation, $opts->{'base'}, @apparatus_entries );
}
=item B<merge_base>
my %apps; # Save the apparatus XML for a given ID.
sub parse {
- my( $tradition, $xml_str ) = @_;
+ my( $tradition, $opts ) = @_;
my $c = $tradition->collation; # Some shorthand
# First, parse the XML.
my $parser = XML::LibXML->new();
- my $doc = $parser->parse_string( $xml_str );
+ my $doc;
+ if( exists $opts->{'string'} ) {
+ $doc = $parser->parse_string( $opts->{'string'} );
+ } elsif ( exists $opts->{'file'} ) {
+ $doc = $parser->parse_file( $opts->{'file'} );
+ } else {
+ warn "Could not find string or file option to parse";
+ return;
+ }
my $tei = $doc->documentElement();
my $xpc = XML::LibXML::XPathContext->new( $tei );
=item B<parse>
-parse( $graph, $graphml_string );
+parse( $tradition, $init_options );
-Takes an initialized Text::Tradition::Graph object and a string
-containing the GraphML; creates the appropriate nodes and edges on the
-graph.
+Takes an initialized Text::Tradition::Graph object and its initialization
+options, including the data source; creates the appropriate nodes and edges
+on the graph.
=cut
my $TRANSKEY = 'identical';
sub parse {
- my( $tradition, $graphml_str ) = @_;
- my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str );
+ my( $tradition, $opts ) = @_;
+ my $graph_data = Text::Tradition::Parser::GraphML::parse( $opts );
my $collation = $tradition->collation;
my %witnesses; # Keep track of the witnesses we encounter as we
# run through the graph data.
=item B<parse>
-parse( $graphml_string );
+parse( $init_opts );
-Takes a string containing the GraphML; returns a list of nodes, edges,
+Takes a set of Tradition initialization options, among which should be either
+'file' or 'string'; parses that file or string and returns a list of nodes, edges,
and their associated data.
=cut
# -> edgeid -> { source, target, wit1/val1, wit2/val2 ...}
sub parse {
- my( $graphml_str ) = @_;
+ my( $opts ) = @_;
my $graph_hash = { 'nodes' => [],
'edges' => [] };
my $parser = XML::LibXML->new();
- my $doc = $parser->parse_string( $graphml_str );
+ my $doc;
+ if( exists $opts->{'string'} ) {
+ $doc = $parser->parse_string( $opts->{'string'} );
+ } elsif ( exists $opts->{'file'} ) {
+ $doc = $parser->parse_file( $opts->{'file'} );
+ } else {
+ warn "Could not find string or file option to parse";
+ return;
+ }
+
my $graphml = $doc->documentElement();
$xpc = XML::LibXML::XPathContext->new( $graphml );
$xpc->registerNs( 'g', 'http://graphml.graphdrawing.org/xmlns' );
use strict;
use warnings;
use Storable qw /dclone/;
-use Text::CSV::Simple;
+use Text::CSV::Simple; # TODO convert to CSV_XS
=head1 NAME
=cut
sub read {
- my( $csv_file ) = @_;
+ my( $opts ) = @_;
my $parser = Text::CSV::Simple->new();
my @fields = qw/ reference text variant type context non_corr non_indep
length total origin /;
- my @lines = $parser->read_file( $ARGV[0] );
+ my @lines = $parser->read_file( $opts->{'file'} );
my @labels = @{shift( @lines )};
push( @fields, @labels[10..$#labels] );
source target witness extra relationship/;
sub parse {
- my( $tradition, $graphml_str ) = @_;
+ my( $tradition, $opts ) = @_;
+ my $graph_data = Text::Tradition::Parser::GraphML::parse( $opts );
# TODO this is begging for stream parsing instead of multiple loops.
- my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str );
my $collation = $tradition->collation;
my %witnesses;
# Parse the TEI file.
sub parse {
- my( $tradition, $xml_str ) = @_;
+ my( $tradition, $opts ) = @_;
# First, parse the XML.
my $parser = XML::LibXML->new();
- my $doc = $parser->parse_string( $xml_str );
+ my $doc;
+ if( exists $opts->{'string'} ) {
+ $doc = $parser->parse_string( $opts->{'string'} );
+ } elsif ( exists $opts->{'file'} ) {
+ $doc = $parser->parse_file( $opts->{'file'} );
+ } else {
+ warn "Could not find string or file option to parse";
+ return;
+ }
my $tei = $doc->documentElement();
my $xpc = XML::LibXML::XPathContext->new( $tei );
my $ns;
=cut
sub parse {
- my( $tradition, $tab_str ) = @_;
- # TODO Allow setting of sep_char
+ my( $tradition, $opts ) = @_;
my $c = $tradition->collation; # shorthand
- my $csv = Text::CSV_XS->new( { binary => 1, # binary for UTF-8
- sep_char => "\t" } );
- my @lines = split( "\n", $tab_str );
+ my $csv = Text::CSV_XS->new( {
+ binary => 1, # binary for UTF-8
+ sep_char => exists $opts->{'sep_char'} ? $opts->{'sep_char'} : "\t" }
+ );
+ # TODO Handle being given a file
+
my $alignment_table;
- foreach my $l ( @lines ) {
- my $status = $csv->parse( $l );
- if( $status ) {
- push( @$alignment_table, [ $csv->fields ] );
- } else {
- warn "Could not parse line $l: " . $csv->error_input;
+ if( exists $opts->{'string' } ) {
+ my @lines = split( "\n", $opts->{'string'} );
+ foreach my $l ( @lines ) {
+ my $status = $csv->parse( $l );
+ if( $status ) {
+ push( @$alignment_table, [ $csv->fields ] );
+ } else {
+ warn "Could not parse line $l: " . $csv->error_input;
+ }
}
+ } elsif( exists $opts->{'file'} ) {
+ open( my $fh, $opts->{'file'} ) or die "Could not open input file " . $opts->{'file'};
+ while( my $row = $csv->getline( $fh ) ) {
+ push( @$alignment_table, $row );
+ }
+ close $fh;
+ } else {
+ warn "Could not find string or file option to parse";
+ return;
}
-
+
# Set up the witnesses we find in the first line
my @witnesses;
foreach my $sigil ( @{$alignment_table->[0]} ) {
help( "$informat input needs a base text" );
}
-# CSV parsing requires a filename; XML parsing requires a string.
my $input = $ARGV[0];
-unless( $informat eq 'KUL' || $informat eq 'CSV' ) {
- my @lines;
- open( INFILE, "$input" ) or die "Could not read $input";
- binmode INFILE, ':utf8';
- @lines = <INFILE>;
- close INFILE;
- $input = join( '', @lines );
-}
# First: read the base. Make a graph, but also note which
# nodes represent line beginnings.
-my %args = ( $informat => $input,
+my %args = ( 'input' => $informat,
+ 'file' => $input,
'linear' => $linear );
$args{'base'} = $inbase if $inbase;
my $tradition = Text::Tradition->new( %args );
#!/usr/bin/env perl
use strict;
use warnings;
-use Test::More tests => 2;
+use lib 'lib';
+use Test::More;
+use Text::Tradition;
-BEGIN { use_ok 'Catalyst::Test', 'lemmatizer' }
+BEGIN { use_ok 'Text::Tradition' }
-ok( request('/')->is_success, 'Request should succeed' );
+# A simple test, just to make sure we can parse a graph.
+my $datafile = 't/data/florilegium_graphml.xml';
+my $tradition = Text::Tradition->new( 'input' => 'TEI',
+ 'name' => 'test0',
+ 'file' => $datafile,
+ 'linear' => 1 );
+
+ok( $tradition, "Got a tradition object" );
+is( scalar $tradition->witnesses, 13, "Found all witnesses" );
+ok( $tradition->collation, "Tradition has a collation" );
+
+my $c = $tradition->collation;
+is( scalar $c->readings, 236, "Collation has all readings" );
+is( scalar $c->paths, 1838, "Collation has all paths" );
+is( scalar $c->relationships, 0, "Collation has all relationships" );
+
+done_testing;
\ No newline at end of file