Some refinements in assigning field types, size of field for floats.
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator / Parser / Excel.pm
CommitLineData
7909b81e 1package SQL::Translator::Parser::Excel;
19de1991 2
3# -------------------------------------------------------------------
854b7210 4# $Id: Excel.pm,v 1.10 2003-10-10 15:58:11 kycl4rk Exp $
5eb4a350 5# -------------------------------------------------------------------
19de1991 6# Copyright (C) 2003 Ken Y. Clark <kclark@cpan.org>,
7# darren chamberlain <darren@cpan.org>,
7909b81e 8# Chris Mungall <cjm@fruitfly.org>,
9# Mike Mellilo <mmelillo@users.sourceforge.net>
19de1991 10#
11# This program is free software; you can redistribute it and/or
12# modify it under the terms of the GNU General Public License as
13# published by the Free Software Foundation; version 2.
14#
15# This program is distributed in the hope that it will be useful, but
16# WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18# General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23# 02111-1307 USA
24# -------------------------------------------------------------------
25
26=head1 NAME
27
28SQL::Translator::Parser::Excel - parser for Excel
29
30=head1 SYNOPSIS
31
32 use SQL::Translator;
19de1991 33
34 my $translator = SQL::Translator->new;
854b7210 35 $translator->parser('Excel');
19de1991 36
37=head1 DESCRIPTION
38
854b7210 39Parses an Excel spreadsheet file using Spreadsheet::ParseExcel.
19de1991 40
5eb4a350 41=head1 OPTIONS
42
43=over
44
45=item * scan_fields
46
47Indicates that the columns should be scanned to determine data types
48and field sizes. True by default.
49
50=back
51
19de1991 52=cut
53
54use strict;
ab0aa010 55use vars qw($DEBUG $VERSION @EXPORT_OK);
56$DEBUG = 0 unless defined $DEBUG;
854b7210 57$VERSION = sprintf "%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/;
19de1991 58
7909b81e 59use Spreadsheet::ParseExcel;
19de1991 60use Exporter;
ab0aa010 61use SQL::Translator::Utils qw(debug normalize_name);
62
19de1991 63use base qw(Exporter);
64
65@EXPORT_OK = qw(parse);
66
8c7f5c7b 67my %ET_to_ST = (
68 'Text' => 'VARCHAR',
69 'Date' => 'DATETIME',
70 'Numeric' => 'DOUBLE',
71);
72
19de1991 73# -------------------------------------------------------------------
ab0aa010 74# parse($tr, $data)
75#
76# Note that $data, in the case of this parser, is unuseful.
77# Spreadsheet::ParseExcel works on files, not data streams.
78# -------------------------------------------------------------------
19de1991 79sub parse {
ab0aa010 80 my ($tr, $data) = @_;
5eb4a350 81 my $args = $tr->parser_args;
8c7f5c7b 82 my $filename = $tr->filename || return;
83 my $wb = Spreadsheet::ParseExcel::Workbook->Parse( $filename );
84 my $schema = $tr->schema;
85 my $table_no = 0;
86
8c7f5c7b 87 my $wb_count = $wb->{'SheetCount'} || 0;
88 for my $num ( 0 .. $wb_count - 1 ) {
89 $table_no++;
90 my $ws = $wb->Worksheet( $num );
91 my $table_name = normalize_name( $ws->{'Name'} || "Table$table_no" );
ab0aa010 92
93 my @cols = $ws->ColRange;
94 next unless $cols[1] > 0;
95
8c7f5c7b 96 my $table = $schema->add_table( name => $table_name );
ab0aa010 97
5eb4a350 98 my @field_names = ();
8c7f5c7b 99 for my $col ( $cols[0] .. $cols[1] ) {
100 my $cell = $ws->Cell(0, $col);
101 my $col_name = normalize_name( $cell->{'Val'} );
102 my $data_type = ET_to_ST( $cell->{'Type'} );
5eb4a350 103 push @field_names, $col_name;
8c7f5c7b 104
8c7f5c7b 105 my $field = $table->add_field(
106 name => $col_name,
107 data_type => $data_type,
108 default_value => '',
109 size => 255,
110 is_nullable => 1,
111 is_auto_increment => undef,
112 ) or die $table->error;
113
114 if ( $col == 0 ) {
115 $table->primary_key( $field->name );
116 $field->is_primary_key(1);
ab0aa010 117 }
19de1991 118 }
5eb4a350 119
120 #
121 # If directed, look at every field's values to guess size and type.
122 #
123 unless (
124 defined $args->{'scan_fields'} &&
125 $args->{'scan_fields'} == 0
126 ) {
127 my %field_info = map { $_, {} } @field_names;
128
129 for(
130 my $iR = $ws->{'MinRow'} == 0 ? 1 : $ws->{'MinRow'};
131 defined $ws->{'MaxRow'} && $iR <= $ws->{'MaxRow'};
132 $iR++
133 ) {
134 for (
135 my $iC = $ws->{'MinCol'};
136 defined $ws->{'MaxCol'} && $iC <= $ws->{'MaxCol'};
137 $iC++
138 ) {
139 my $field = $field_names[ $iC ];
140 my $data = $ws->{'Cells'}[ $iR ][ $iC ]->{'_Value'};
5b9183fb 141 next if !defined $data || $data eq '';
5eb4a350 142 my $size = [ length $data ];
143 my $type;
144
145 if ( $data =~ /^-?\d+$/ ) {
146 $type = 'integer';
147 }
148 elsif (
149 $data =~ /^-?[,\d]+\.[\d+]?$/
150 ||
151 $data =~ /^-?[,\d]+?\.\d+$/
152 ||
153 $data =~ /^-?\.\d+$/
154 ) {
155 $type = 'float';
156 my ( $w, $d ) = map { s/,//g; $_ } split( /\./, $data );
854b7210 157 $size = [ length $w + length $d, length $d ];
5eb4a350 158 }
159 else {
160 $type = 'char';
161 }
162
163 for my $i ( 0, 1 ) {
164 next unless defined $size->[ $i ];
165 my $fsize = $field_info{ $field }{'size'}[ $i ] || 0;
166 if ( $size->[ $i ] > $fsize ) {
167 $field_info{ $field }{'size'}[ $i ] = $size->[ $i ];
168 }
169 }
170
171 $field_info{ $field }{ $type }++;
172 }
173 }
174
175 for my $field ( keys %field_info ) {
5b9183fb 176 my $size = $field_info{ $field }{'size'} || 0;
5eb4a350 177 my $data_type =
5b9183fb 178 $field_info{ $field }{'char'} ? 'char' :
179 $field_info{ $field }{'float'} ? 'float' :
180 $field_info{ $field }{'integer'} ? 'integer' : 'char';
181
182 if ( $data_type eq 'char' && scalar @$size == 2 ) {
183 $size = [ $size->[0] + $size->[1] ];
184 }
5eb4a350 185
186 my $field = $table->get_field( $field );
5b9183fb 187 $field->size( $size ) if $size;
5eb4a350 188 $field->data_type( $data_type );
189 }
190 }
19de1991 191 }
192
f62bd16c 193 return 1;
ab0aa010 194}
19de1991 195
5eb4a350 196# -------------------------------------------------------------------
ab0aa010 197sub ET_to_ST {
198 my $et = shift;
199 $ET_to_ST{$et} || $ET_to_ST{'Text'};
19de1991 200}
201
2021;
203
8c7f5c7b 204# -------------------------------------------------------------------
205# Education is an admirable thing,
206# but it is as well to remember that
207# nothing that is worth knowing can be taught.
208# Oscar Wilde
209# -------------------------------------------------------------------
210
19de1991 211=pod
212
7909b81e 213=head1 AUTHORS
19de1991 214
7909b81e 215Mike Mellilo <mmelillo@users.sourceforge.net>,
5eb4a350 216darren chamberlain E<lt>dlc@users.sourceforge.netE<gt>,
217Ken Y. Clark E<lt>kclark@cpan.orgE<gt>.
19de1991 218
219=head1 SEE ALSO
220
5eb4a350 221Spreadsheet::ParseExcel, SQL::Translator.
19de1991 222
223=cut