Commit | Line | Data |
7909b81e |
1 | package SQL::Translator::Parser::Excel; |
19de1991 |
2 | |
3 | # ------------------------------------------------------------------- |
854b7210 |
4 | # $Id: Excel.pm,v 1.10 2003-10-10 15:58:11 kycl4rk Exp $ |
5eb4a350 |
5 | # ------------------------------------------------------------------- |
19de1991 |
6 | # Copyright (C) 2003 Ken Y. Clark <kclark@cpan.org>, |
7 | # darren chamberlain <darren@cpan.org>, |
7909b81e |
8 | # Chris Mungall <cjm@fruitfly.org>, |
9 | # Mike Mellilo <mmelillo@users.sourceforge.net> |
19de1991 |
10 | # |
11 | # This program is free software; you can redistribute it and/or |
12 | # modify it under the terms of the GNU General Public License as |
13 | # published by the Free Software Foundation; version 2. |
14 | # |
15 | # This program is distributed in the hope that it will be useful, but |
16 | # WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | # General Public License for more details. |
19 | # |
20 | # You should have received a copy of the GNU General Public License |
21 | # along with this program; if not, write to the Free Software |
22 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
23 | # 02111-1307 USA |
24 | # ------------------------------------------------------------------- |
25 | |
26 | =head1 NAME |
27 | |
28 | SQL::Translator::Parser::Excel - parser for Excel |
29 | |
30 | =head1 SYNOPSIS |
31 | |
32 | use SQL::Translator; |
19de1991 |
33 | |
34 | my $translator = SQL::Translator->new; |
854b7210 |
35 | $translator->parser('Excel'); |
19de1991 |
36 | |
37 | =head1 DESCRIPTION |
38 | |
854b7210 |
39 | Parses an Excel spreadsheet file using Spreadsheet::ParseExcel. |
19de1991 |
40 | |
5eb4a350 |
41 | =head1 OPTIONS |
42 | |
43 | =over |
44 | |
45 | =item * scan_fields |
46 | |
47 | Indicates that the columns should be scanned to determine data types |
48 | and field sizes. True by default. |
49 | |
50 | =back |
51 | |
19de1991 |
52 | =cut |
53 | |
54 | use strict; |
ab0aa010 |
55 | use vars qw($DEBUG $VERSION @EXPORT_OK); |
56 | $DEBUG = 0 unless defined $DEBUG; |
854b7210 |
57 | $VERSION = sprintf "%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/; |
19de1991 |
58 | |
7909b81e |
59 | use Spreadsheet::ParseExcel; |
19de1991 |
60 | use Exporter; |
ab0aa010 |
61 | use SQL::Translator::Utils qw(debug normalize_name); |
62 | |
19de1991 |
63 | use base qw(Exporter); |
64 | |
65 | @EXPORT_OK = qw(parse); |
66 | |
8c7f5c7b |
67 | my %ET_to_ST = ( |
68 | 'Text' => 'VARCHAR', |
69 | 'Date' => 'DATETIME', |
70 | 'Numeric' => 'DOUBLE', |
71 | ); |
72 | |
19de1991 |
73 | # ------------------------------------------------------------------- |
ab0aa010 |
74 | # parse($tr, $data) |
75 | # |
76 | # Note that $data, in the case of this parser, is unuseful. |
77 | # Spreadsheet::ParseExcel works on files, not data streams. |
78 | # ------------------------------------------------------------------- |
19de1991 |
79 | sub parse { |
ab0aa010 |
80 | my ($tr, $data) = @_; |
5eb4a350 |
81 | my $args = $tr->parser_args; |
8c7f5c7b |
82 | my $filename = $tr->filename || return; |
83 | my $wb = Spreadsheet::ParseExcel::Workbook->Parse( $filename ); |
84 | my $schema = $tr->schema; |
85 | my $table_no = 0; |
86 | |
8c7f5c7b |
87 | my $wb_count = $wb->{'SheetCount'} || 0; |
88 | for my $num ( 0 .. $wb_count - 1 ) { |
89 | $table_no++; |
90 | my $ws = $wb->Worksheet( $num ); |
91 | my $table_name = normalize_name( $ws->{'Name'} || "Table$table_no" ); |
ab0aa010 |
92 | |
93 | my @cols = $ws->ColRange; |
94 | next unless $cols[1] > 0; |
95 | |
8c7f5c7b |
96 | my $table = $schema->add_table( name => $table_name ); |
ab0aa010 |
97 | |
5eb4a350 |
98 | my @field_names = (); |
8c7f5c7b |
99 | for my $col ( $cols[0] .. $cols[1] ) { |
100 | my $cell = $ws->Cell(0, $col); |
101 | my $col_name = normalize_name( $cell->{'Val'} ); |
102 | my $data_type = ET_to_ST( $cell->{'Type'} ); |
5eb4a350 |
103 | push @field_names, $col_name; |
8c7f5c7b |
104 | |
8c7f5c7b |
105 | my $field = $table->add_field( |
106 | name => $col_name, |
107 | data_type => $data_type, |
108 | default_value => '', |
109 | size => 255, |
110 | is_nullable => 1, |
111 | is_auto_increment => undef, |
112 | ) or die $table->error; |
113 | |
114 | if ( $col == 0 ) { |
115 | $table->primary_key( $field->name ); |
116 | $field->is_primary_key(1); |
ab0aa010 |
117 | } |
19de1991 |
118 | } |
5eb4a350 |
119 | |
120 | # |
121 | # If directed, look at every field's values to guess size and type. |
122 | # |
123 | unless ( |
124 | defined $args->{'scan_fields'} && |
125 | $args->{'scan_fields'} == 0 |
126 | ) { |
127 | my %field_info = map { $_, {} } @field_names; |
128 | |
129 | for( |
130 | my $iR = $ws->{'MinRow'} == 0 ? 1 : $ws->{'MinRow'}; |
131 | defined $ws->{'MaxRow'} && $iR <= $ws->{'MaxRow'}; |
132 | $iR++ |
133 | ) { |
134 | for ( |
135 | my $iC = $ws->{'MinCol'}; |
136 | defined $ws->{'MaxCol'} && $iC <= $ws->{'MaxCol'}; |
137 | $iC++ |
138 | ) { |
139 | my $field = $field_names[ $iC ]; |
140 | my $data = $ws->{'Cells'}[ $iR ][ $iC ]->{'_Value'}; |
5b9183fb |
141 | next if !defined $data || $data eq ''; |
5eb4a350 |
142 | my $size = [ length $data ]; |
143 | my $type; |
144 | |
145 | if ( $data =~ /^-?\d+$/ ) { |
146 | $type = 'integer'; |
147 | } |
148 | elsif ( |
149 | $data =~ /^-?[,\d]+\.[\d+]?$/ |
150 | || |
151 | $data =~ /^-?[,\d]+?\.\d+$/ |
152 | || |
153 | $data =~ /^-?\.\d+$/ |
154 | ) { |
155 | $type = 'float'; |
156 | my ( $w, $d ) = map { s/,//g; $_ } split( /\./, $data ); |
854b7210 |
157 | $size = [ length $w + length $d, length $d ]; |
5eb4a350 |
158 | } |
159 | else { |
160 | $type = 'char'; |
161 | } |
162 | |
163 | for my $i ( 0, 1 ) { |
164 | next unless defined $size->[ $i ]; |
165 | my $fsize = $field_info{ $field }{'size'}[ $i ] || 0; |
166 | if ( $size->[ $i ] > $fsize ) { |
167 | $field_info{ $field }{'size'}[ $i ] = $size->[ $i ]; |
168 | } |
169 | } |
170 | |
171 | $field_info{ $field }{ $type }++; |
172 | } |
173 | } |
174 | |
175 | for my $field ( keys %field_info ) { |
5b9183fb |
176 | my $size = $field_info{ $field }{'size'} || 0; |
5eb4a350 |
177 | my $data_type = |
5b9183fb |
178 | $field_info{ $field }{'char'} ? 'char' : |
179 | $field_info{ $field }{'float'} ? 'float' : |
180 | $field_info{ $field }{'integer'} ? 'integer' : 'char'; |
181 | |
182 | if ( $data_type eq 'char' && scalar @$size == 2 ) { |
183 | $size = [ $size->[0] + $size->[1] ]; |
184 | } |
5eb4a350 |
185 | |
186 | my $field = $table->get_field( $field ); |
5b9183fb |
187 | $field->size( $size ) if $size; |
5eb4a350 |
188 | $field->data_type( $data_type ); |
189 | } |
190 | } |
19de1991 |
191 | } |
192 | |
f62bd16c |
193 | return 1; |
ab0aa010 |
194 | } |
19de1991 |
195 | |
5eb4a350 |
196 | # ------------------------------------------------------------------- |
ab0aa010 |
197 | sub ET_to_ST { |
198 | my $et = shift; |
199 | $ET_to_ST{$et} || $ET_to_ST{'Text'}; |
19de1991 |
200 | } |
201 | |
202 | 1; |
203 | |
8c7f5c7b |
204 | # ------------------------------------------------------------------- |
205 | # Education is an admirable thing, |
206 | # but it is as well to remember that |
207 | # nothing that is worth knowing can be taught. |
208 | # Oscar Wilde |
209 | # ------------------------------------------------------------------- |
210 | |
19de1991 |
211 | =pod |
212 | |
7909b81e |
213 | =head1 AUTHORS |
19de1991 |
214 | |
7909b81e |
215 | Mike Mellilo <mmelillo@users.sourceforge.net>, |
5eb4a350 |
216 | darren chamberlain E<lt>dlc@users.sourceforge.netE<gt>, |
217 | Ken Y. Clark E<lt>kclark@cpan.orgE<gt>. |
19de1991 |
218 | |
219 | =head1 SEE ALSO |
220 | |
5eb4a350 |
221 | Spreadsheet::ParseExcel, SQL::Translator. |
19de1991 |
222 | |
223 | =cut |