X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FTranslator%2FParser%2FExcel.pm;h=0d98f6ba826bd5d77ad00cc92bd721b1f51ef82a;hb=89e19730168d5c04041cfbef132b6d40cb4c8a51;hp=aa96be65abbb38ecc21c0ccfc1133c7f815108e0;hpb=19de19919249b990f9930b1f746debca9e2c03ab;p=dbsrgits%2FSQL-Translator.git diff --git a/lib/SQL/Translator/Parser/Excel.pm b/lib/SQL/Translator/Parser/Excel.pm index aa96be6..0d98f6b 100644 --- a/lib/SQL/Translator/Parser/Excel.pm +++ b/lib/SQL/Translator/Parser/Excel.pm @@ -1,10 +1,7 @@ -package SQL::Translator::Parser::MySQL; +package SQL::Translator::Parser::Excel; # ------------------------------------------------------------------- -# ------------------------------------------------------------------- -# Copyright (C) 2003 Ken Y. Clark , -# darren chamberlain , -# Chris Mungall +# Copyright (C) 2002-2009 SQLFairy Authors # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -28,99 +25,197 @@ SQL::Translator::Parser::Excel - parser for Excel =head1 SYNOPSIS use SQL::Translator; - use SQL::Translator::Parser::Excel; my $translator = SQL::Translator->new; - $translator->parser("SQL::Translator::Parser::Excel"); + $translator->parser('Excel'); =head1 DESCRIPTION - The basic point of this module is to parse out any SQL or DB Schema information - from an Excel spreadsheet file. +Parses an Excel spreadsheet file using Spreadsheet::ParseExcel. + +=head1 OPTIONS + +=over + +=item * scan_fields + +Indicates that the columns should be scanned to determine data types +and field sizes. True by default. + +=back =cut use strict; -use vars qw[ $DEBUG $VERSION @EXPORT_OK ]; -$DEBUG = 0 unless defined $DEBUG; +use vars qw($DEBUG $VERSION @EXPORT_OK); +$DEBUG = 0 unless defined $DEBUG; +$VERSION = '1.59'; -use Data::Dumper; -#use Spreadsheet::ParseExcel; +use Spreadsheet::ParseExcel; use Exporter; +use SQL::Translator::Utils qw(debug normalize_name); + use base qw(Exporter); @EXPORT_OK = qw(parse); +my %ET_to_ST = ( + 'Text' => 'VARCHAR', + 'Date' => 'DATETIME', + 'Numeric' => 'DOUBLE', +); - - +# ------------------------------------------------------------------- +# parse($tr, $data) +# +# Note that $data, in the case of this parser, is unuseful. +# Spreadsheet::ParseExcel works on files, not data streams. # ------------------------------------------------------------------- sub parse { - my ( $translator, $data ) = @_; - my $parsed = { - table1 => { - "type" => undef, - "indices" => [ { } ], - "fields" => { }, - }, - }; - - - my $tr = new Spreadsheet::ParseExcel; - $tr->Parse($data); - my ($R, $C); - $R = 1; # For now we will assume all column names are in the first row - - my @parsed = map { return $tr->{Cells}[$R][$C] } ( $C = $tr->{MinCol} ; $C <= $tr->{MaxCol} ; $C++;) ; - - - for (my $i = 0; $i < @parsed; $i++) { - $parsed->{"table1"}->{"fields"}->{$parsed[$i]} = { - type => "field", - order => $i, - name => $parsed[$i], - - # Default datatype is "char" - data_type => "char", - - # default size is 8bits; something more reasonable? - size => 255, - null => 1, - default => "", - is_auto_inc => undef, - - # field field is the primary key - is_primary_key => ($i == 0) ? 1 : undef, + my ($tr, $data) = @_; + my $args = $tr->parser_args; + my $filename = $tr->filename || return; + my $wb = Spreadsheet::ParseExcel::Workbook->Parse( $filename ); + my $schema = $tr->schema; + my $table_no = 0; + + my $wb_count = $wb->{'SheetCount'} || 0; + for my $num ( 0 .. $wb_count - 1 ) { + $table_no++; + my $ws = $wb->Worksheet( $num ); + my $table_name = normalize_name( $ws->{'Name'} || "Table$table_no" ); + + my @cols = $ws->ColRange; + next unless $cols[1] > 0; + + my $table = $schema->add_table( name => $table_name ); + + my @field_names = (); + for my $col ( $cols[0] .. $cols[1] ) { + my $cell = $ws->Cell(0, $col); + my $col_name = normalize_name( $cell->{'Val'} ); + my $data_type = ET_to_ST( $cell->{'Type'} ); + push @field_names, $col_name; + + my $field = $table->add_field( + name => $col_name, + data_type => $data_type, + default_value => '', + size => 255, + is_nullable => 1, + is_auto_increment => undef, + ) or die $table->error; + + if ( $col == 0 ) { + $table->primary_key( $field->name ); + $field->is_primary_key(1); + } } - } - - # Field 0 is primary key, by default, so add an index - for ($parsed->{"table1"}->{"indices"}->[0]) { - $_->{"type"} = "primary_key"; - $_->{"name"} = undef; - $_->{"fields"} = [ $parsed[0] ]; + # + # If directed, look at every field's values to guess size and type. + # + unless ( + defined $args->{'scan_fields'} && + $args->{'scan_fields'} == 0 + ) { + my %field_info = map { $_, {} } @field_names; + + for( + my $iR = $ws->{'MinRow'} == 0 ? 1 : $ws->{'MinRow'}; + defined $ws->{'MaxRow'} && $iR <= $ws->{'MaxRow'}; + $iR++ + ) { + for ( + my $iC = $ws->{'MinCol'}; + defined $ws->{'MaxCol'} && $iC <= $ws->{'MaxCol'}; + $iC++ + ) { + my $field = $field_names[ $iC ]; + my $data = $ws->{'Cells'}[ $iR ][ $iC ]->{'_Value'}; + next if !defined $data || $data eq ''; + my $size = [ length $data ]; + my $type; + + if ( $data =~ /^-?\d+$/ ) { + $type = 'integer'; + } + elsif ( + $data =~ /^-?[,\d]+\.[\d+]?$/ + || + $data =~ /^-?[,\d]+?\.\d+$/ + || + $data =~ /^-?\.\d+$/ + ) { + $type = 'float'; + my ( $w, $d ) = + map { s/,//g; length $_ || 1 } + split( /\./, $data ) + ; + $size = [ $w + $d, $d ]; + } + else { + $type = 'char'; + } + + for my $i ( 0, 1 ) { + next unless defined $size->[ $i ]; + my $fsize = $field_info{ $field }{'size'}[ $i ] || 0; + if ( $size->[ $i ] > $fsize ) { + $field_info{ $field }{'size'}[ $i ] = $size->[ $i ]; + } + } + + $field_info{ $field }{ $type }++; + } + } + + for my $field ( keys %field_info ) { + my $size = $field_info{ $field }{'size'} || [ 1 ]; + my $data_type = + $field_info{ $field }{'char'} ? 'char' : + $field_info{ $field }{'float'} ? 'float' : + $field_info{ $field }{'integer'} ? 'integer' : 'char'; + + if ( $data_type eq 'char' && scalar @$size == 2 ) { + $size = [ $size->[0] + $size->[1] ]; + } + + my $field = $table->get_field( $field ); + $field->size( $size ) if $size; + $field->data_type( $data_type ); + } + } } + return 1; +} - - return $parsed; - - +# ------------------------------------------------------------------- +sub ET_to_ST { + my $et = shift; + $ET_to_ST{$et} || $ET_to_ST{'Text'}; } 1; +# ------------------------------------------------------------------- +# Education is an admirable thing, +# but it is as well to remember that +# nothing that is worth knowing can be taught. +# Oscar Wilde +# ------------------------------------------------------------------- =pod -=head1 AUTHOR +=head1 AUTHORS -Ken Y. Clark Ekclark@cpan.orgE, -Chris Mungall +Mike Mellilo , +darren chamberlain Edlc@users.sourceforge.netE, +Ken Y. Clark Ekclark@cpan.orgE. =head1 SEE ALSO -perl(1), Spreadsheet::ParseExcel. +Spreadsheet::ParseExcel, SQL::Translator. =cut