use strict;
use warnings;
use Text::CSV;
+use Text::Tradition::Error;
+use TryCatch;
=head1 NAME
# Check that we only have collation relationships where we need them
is( scalar $t->collation->relationships, 3, "Redundant collations were removed" );
+## Check excel parsing
+
+my $xls = 't/data/armexample.xls';
+my $xt = Text::Tradition->new(
+ 'name' => 'excel test',
+ 'input' => 'Tabular',
+ 'file' => $xls,
+ 'xls' => 1
+ );
+
+is( ref( $xt ), 'Text::Tradition', "Parsed test Excel 97-2004 file" );
+my %xls_wits;
+map { $xls_wits{$_} = 0 } qw/ Wit1 Wit2 Wit3 /;
+foreach my $wit ( $xt->witnesses ) {
+ $xls_wits{$wit->sigil} = 1;
+}
+is( scalar keys %xls_wits, 3, "No extra witnesses were made" );
+foreach my $k ( keys %xls_wits ) {
+ ok( $xls_wits{$k}, "Witness $k still exists" );
+}
+is( scalar $xt->collation->readings, 11, "Got correct number of test readings" );
+is( scalar $xt->collation->paths, 13, "Got correct number of reading paths" );
+is( $xt->collation->reading('r5.1')->text, "\x{587}",
+ "Correct decoding of at least one reading" );
+
=end testing
=cut
sub parse {
my( $tradition, $opts ) = @_;
my $c = $tradition->collation; # shorthand
- my $csv_options = { 'binary' => 1 };
- $csv_options->{'sep_char'} = $opts->{'sep_char'} || "\t";
- if( $csv_options->{'sep_char'} eq "\t" ) {
- # If it is really tab separated, nothing is an escape char.
- $csv_options->{'quote_char'} = undef;
- $csv_options->{'escape_char'} = undef;
- }
- my $csv = Text::CSV->new( $csv_options );
-
my $alignment_table;
- if( exists $opts->{'string' } ) {
- my @lines = split( "\n", $opts->{'string'} );
- foreach my $l ( @lines ) {
- my $status = $csv->parse( $l );
- if( $status ) {
- push( @$alignment_table, [ $csv->fields ] );
- } else {
- warn "Could not parse line $l: " . $csv->error_input;
- }
- }
- } elsif( exists $opts->{'file'} ) {
- open( my $fh, $opts->{'file'} )
- or warn "Could not open input file " . $opts->{'file'};
- binmode( $fh, ':utf8' );
- while( my $row = $csv->getline( $fh ) ) {
- push( @$alignment_table, $row );
- }
- close $fh;
+ if( $opts->{'xls'} ) {
+ try {
+ require Spreadsheet::ParseExcel;
+ } catch {
+ throw( "Need module Spreadsheet::ParseExcel to parse .xls files" );
+ }
+ unless( exists $opts->{'file'} ) {
+ throw( "Must pass the filename for Excel parsing" );
+ }
+ my $parser = Spreadsheet::ParseExcel->new();
+ my $workbook = $parser->parse( $opts->{'file'} );
+ unless( defined $workbook && defined $workbook->worksheet(0) ) {
+ throw( "Failed to parse file " . $opts->{'file'} . ": " . $parser->error() );
+ }
+ # Use the first worksheet
+ my $sheet = $workbook->worksheet(0);
+ my( $rmin, $rmax ) = $sheet->row_range();
+ my( $cmin, $cmax ) = $sheet->col_range();
+ unless( $cmax && $rmax ) {
+ throw( "Found no rows or no columns in first worksheet" );
+ }
+ # Populate the alignment table. We only want columns that have
+ # a sigil in row zero.
+ my %sigcols = ();
+ push( @$alignment_table, [] );
+ foreach my $col ( $cmin .. $cmax ) {
+ my $cell = $sheet->get_cell( $rmin, $col );
+ my $cellval = $cell ? $cell->value() : undef;
+ if( $cellval ) {
+ $sigcols{$col} = 1;
+ push( @{$alignment_table->[0]}, $cellval );
+ }
+ }
+ # Now go through the rest of the rows and pick up the columns
+ # that were headed by a sigil.
+ foreach my $row ( $rmin+1 .. $rmax ) {
+ my @tablerow;
+ foreach my $col ( $cmin .. $cmax ) {
+ next unless $sigcols{$col};
+ my $cell = $sheet->get_cell( $row, $col );
+ my $cellval = $cell ? $cell->value() : undef;
+ push( @tablerow, $cell ? $cell->value() : undef );
+ }
+ push( @$alignment_table, \@tablerow );
+ }
} else {
- warn "Could not find string or file option to parse";
- return;
- }
-
+ # Assume it is a comma-, tab-, or whatever-separated format.
+ my $csv_options = { 'binary' => 1 };
+ $csv_options->{'sep_char'} = $opts->{'sep_char'} || "\t";
+ if( $csv_options->{'sep_char'} eq "\t" ) {
+ # If it is really tab separated, nothing is an escape char.
+ $csv_options->{'quote_char'} = undef;
+ $csv_options->{'escape_char'} = undef;
+ }
+ my $csv = Text::CSV->new( $csv_options );
+
+ if( exists $opts->{'string' } ) {
+ my @lines = split( "\n", $opts->{'string'} );
+ foreach my $l ( @lines ) {
+ my $status = $csv->parse( $l );
+ if( $status ) {
+ push( @$alignment_table, [ $csv->fields ] );
+ } else {
+ warn "Could not parse line $l: " . $csv->error_input;
+ }
+ }
+ } elsif( exists $opts->{'file'} ) {
+ open( my $fh, $opts->{'file'} )
+ or warn "Could not open input file " . $opts->{'file'};
+ binmode( $fh, ':utf8' );
+ while( my $row = $csv->getline( $fh ) ) {
+ push( @$alignment_table, $row );
+ }
+ close $fh;
+ } else {
+ warn "Could not find string or file option to parse";
+ return;
+ }
+ }
# Set up the witnesses we find in the first line
my @witnesses;
my %ac_wits; # Track layered witness -> main witness mapping
# Check that we only have collation relationships where we need them
is( scalar $t->collation->relationships, 3, "Redundant collations were removed" );
+
+## Check excel parsing
+
+my $xls = 't/data/armexample.xls';
+my $xt = Text::Tradition->new(
+ 'name' => 'excel test',
+ 'input' => 'Tabular',
+ 'file' => $xls,
+ 'xls' => 1
+ );
+
+is( ref( $xt ), 'Text::Tradition', "Parsed test Excel 97-2004 file" );
+my %xls_wits;
+map { $xls_wits{$_} = 0 } qw/ Wit1 Wit2 Wit3 /;
+foreach my $wit ( $xt->witnesses ) {
+ $xls_wits{$wit->sigil} = 1;
+}
+is( scalar keys %xls_wits, 3, "No extra witnesses were made" );
+foreach my $k ( keys %xls_wits ) {
+ ok( $xls_wits{$k}, "Witness $k still exists" );
+}
+is( scalar $xt->collation->readings, 11, "Got correct number of test readings" );
+is( scalar $xt->collation->paths, 13, "Got correct number of reading paths" );
+is( $xt->collation->reading('r5.1')->text, "\x{587}",
+ "Correct decoding of at least one reading" );
}