X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FTranslator%2FParser%2FExcel.pm;h=846095a4ef776b39065c9a6b3f298cb47f3fac43;hb=c0ec0e22d3f0e3852c00daac5ef5763010b410c3;hp=2fb3c29dc3d2f1c45138a5d71b52d958ab15519d;hpb=ab0aa01028291badd0e421da7fc6310627e11978;p=dbsrgits%2FSQL-Translator.git diff --git a/lib/SQL/Translator/Parser/Excel.pm b/lib/SQL/Translator/Parser/Excel.pm index 2fb3c29..846095a 100644 --- a/lib/SQL/Translator/Parser/Excel.pm +++ b/lib/SQL/Translator/Parser/Excel.pm @@ -1,26 +1,5 @@ package SQL::Translator::Parser::Excel; -# ------------------------------------------------------------------- -# Copyright (C) 2003 Ken Y. Clark , -# darren chamberlain , -# Chris Mungall , -# Mike Mellilo -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -# 02111-1307 USA -# ------------------------------------------------------------------- - =head1 NAME SQL::Translator::Parser::Excel - parser for Excel @@ -28,21 +7,32 @@ SQL::Translator::Parser::Excel - parser for Excel =head1 SYNOPSIS use SQL::Translator; - use SQL::Translator::Parser::Excel; my $translator = SQL::Translator->new; - $translator->parser("SQL::Translator::Parser::Excel"); + $translator->parser('Excel'); =head1 DESCRIPTION -Parses an Excel spreadsheet file for SQL::Translator. You can then -turn the data into a database tables or graphs. +Parses an Excel spreadsheet file using Spreadsheet::ParseExcel. + +=head1 OPTIONS + +=over + +=item * scan_fields + +Indicates that the columns should be scanned to determine data types +and field sizes. True by default. + +=back =cut use strict; -use vars qw($DEBUG $VERSION @EXPORT_OK); +use warnings; +our ($DEBUG, @EXPORT_OK); $DEBUG = 0 unless defined $DEBUG; +our $VERSION = '1.59'; use Spreadsheet::ParseExcel; use Exporter; @@ -52,6 +42,12 @@ use base qw(Exporter); @EXPORT_OK = qw(parse); +my %ET_to_ST = ( + 'Text' => 'VARCHAR', + 'Date' => 'DATETIME', + 'Numeric' => 'DOUBLE', +); + # ------------------------------------------------------------------- # parse($tr, $data) # @@ -60,58 +56,124 @@ use base qw(Exporter); # ------------------------------------------------------------------- sub parse { my ($tr, $data) = @_; - my $filename = $tr->filename || return; - my $wb = Spreadsheet::ParseExcel::Workbook->Parse($filename); - my (%parsed, $wb_count, $num); - my $table_no = 0; - - my $wb_count = $wb->{SheetCount}; - for $num (0 .. $wb_count - 1) { - my $ws = $wb->Worksheet($num); - my $name = $ws->{Name} || ++$table_no; - - $name = normalize_name($name); + my $args = $tr->parser_args; + my $filename = $tr->filename || return; + my $wb = Spreadsheet::ParseExcel::Workbook->Parse( $filename ); + my $schema = $tr->schema; + my $table_no = 0; + + my $wb_count = $wb->{'SheetCount'} || 0; + for my $num ( 0 .. $wb_count - 1 ) { + $table_no++; + my $ws = $wb->Worksheet( $num ); + my $table_name = normalize_name( $ws->{'Name'} || "Table$table_no" ); my @cols = $ws->ColRange; next unless $cols[1] > 0; - $parsed{$name} = { - table_name => $name, - type => undef, - indices => [ {} ], - fields => { }, - }; - - for my $col ($cols[0] .. $cols[1]) { - my $cell = $ws->Cell(0, $col); - $parsed{$name}->{'fields'}->{$cell->{Val}} = { - type => 'field', - order => $col, - name => $cell->{Val}, - - # Default datatype is 'char' - data_type => ET_to_ST($cell->{Type}), - - # default size is 8bits; something more reasonable? - size => [ 255 ], - null => 1, - default => '', - is_auto_inc => undef, - - # field field is the primary key - is_primary_key => ($col == 0) ? 1 : undef, + my $table = $schema->add_table( name => $table_name ); + + my @field_names = (); + for my $col ( $cols[0] .. $cols[1] ) { + my $cell = $ws->Cell(0, $col); + my $col_name = normalize_name( $cell->{'Val'} ); + my $data_type = ET_to_ST( $cell->{'Type'} ); + push @field_names, $col_name; + + my $field = $table->add_field( + name => $col_name, + data_type => $data_type, + default_value => '', + size => 255, + is_nullable => 1, + is_auto_increment => undef, + ) or die $table->error; + + if ( $col == 0 ) { + $table->primary_key( $field->name ); + $field->is_primary_key(1); + } + } + + # + # If directed, look at every field's values to guess size and type. + # + unless ( + defined $args->{'scan_fields'} && + $args->{'scan_fields'} == 0 + ) { + my %field_info = map { $_, {} } @field_names; + + for( + my $iR = $ws->{'MinRow'} == 0 ? 1 : $ws->{'MinRow'}; + defined $ws->{'MaxRow'} && $iR <= $ws->{'MaxRow'}; + $iR++ + ) { + for ( + my $iC = $ws->{'MinCol'}; + defined $ws->{'MaxCol'} && $iC <= $ws->{'MaxCol'}; + $iC++ + ) { + my $field = $field_names[ $iC ]; + my $data = $ws->{'Cells'}[ $iR ][ $iC ]->{'_Value'}; + next if !defined $data || $data eq ''; + my $size = [ length $data ]; + my $type; + + if ( $data =~ /^-?\d+$/ ) { + $type = 'integer'; + } + elsif ( + $data =~ /^-?[,\d]+\.[\d+]?$/ + || + $data =~ /^-?[,\d]+?\.\d+$/ + || + $data =~ /^-?\.\d+$/ + ) { + $type = 'float'; + my ( $w, $d ) = + map { s/,//g; length $_ || 1 } + split( /\./, $data ) + ; + $size = [ $w + $d, $d ]; + } + else { + $type = 'char'; + } + + for my $i ( 0, 1 ) { + next unless defined $size->[ $i ]; + my $fsize = $field_info{ $field }{'size'}[ $i ] || 0; + if ( $size->[ $i ] > $fsize ) { + $field_info{ $field }{'size'}[ $i ] = $size->[ $i ]; + } + } + + $field_info{ $field }{ $type }++; + } + } + + for my $field ( keys %field_info ) { + my $size = $field_info{ $field }{'size'} || [ 1 ]; + my $data_type = + $field_info{ $field }{'char'} ? 'char' : + $field_info{ $field }{'float'} ? 'float' : + $field_info{ $field }{'integer'} ? 'integer' : 'char'; + + if ( $data_type eq 'char' && scalar @$size == 2 ) { + $size = [ $size->[0] + $size->[1] ]; + } + + my $field = $table->get_field( $field ); + $field->size( $size ) if $size; + $field->data_type( $data_type ); } } } - return \%parsed; + return 1; } -my %ET_to_ST = ( - 'Text' => 'VARCHAR', - 'Date' => 'DATETIME', - 'Numeric' => 'DOUBLE', -); sub ET_to_ST { my $et = shift; $ET_to_ST{$et} || $ET_to_ST{'Text'}; @@ -119,16 +181,23 @@ sub ET_to_ST { 1; +# ------------------------------------------------------------------- +# Education is an admirable thing, +# but it is as well to remember that +# nothing that is worth knowing can be taught. +# Oscar Wilde +# ------------------------------------------------------------------- + =pod =head1 AUTHORS Mike Mellilo , -darren chamberlain Edlc@users.sourceforge.netE -Ken Y. Clark Ekclark@cpan.orgE +darren chamberlain Edlc@users.sourceforge.netE, +Ken Y. Clark Ekclark@cpan.orgE. =head1 SEE ALSO -perl(1), Spreadsheet::ParseExcel. +Spreadsheet::ParseExcel, SQL::Translator. =cut