Commit | Line | Data |
046f18e5 |
1 | package SQL::Translator::Parser::xSV; |
2 | |
49e1eb70 |
3 | # ------------------------------------------------------------------- |
2008ecf3 |
4 | # $Id: xSV.pm,v 1.9 2003-06-06 00:05:44 kycl4rk Exp $ |
49e1eb70 |
5 | # ------------------------------------------------------------------- |
abfa405a |
6 | # Copyright (C) 2003 Ken Y. Clark <kclark@cpan.org>, |
825ed07b |
7 | # darren chamberlain <darren@cpan.org> |
046f18e5 |
8 | # |
9 | # This program is free software; you can redistribute it and/or |
10 | # modify it under the terms of the GNU General Public License as |
11 | # published by the Free Software Foundation; version 2. |
12 | # |
13 | # This program is distributed in the hope that it will be useful, but |
14 | # WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | # General Public License for more details. |
17 | # |
18 | # You should have received a copy of the GNU General Public License |
19 | # along with this program; if not, write to the Free Software |
20 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
21 | # 02111-1307 USA |
22 | # ------------------------------------------------------------------- |
23 | |
825ed07b |
24 | =head1 NAME |
25 | |
26 | SQL::Translator::Parser::xSV - parser for arbitrarily delimited text files |
27 | |
28 | =head1 SYNOPSIS |
29 | |
30 | use SQL::Translator; |
31 | use SQL::Translator::Parser::xSV; |
32 | |
33 | my $translator = SQL::Translator->new( |
34 | parser => 'xSV', |
35 | parser_args => { field_separator => "\t" }, |
36 | ); |
37 | |
38 | =head1 DESCRIPTION |
39 | |
40 | Parses arbitrarily delimited text files. See the |
41 | Text::RecordParser manpage for arguments on how to parse the file |
42 | (e.g., C<field_separator>, C<record_separator>). Other arguments |
43 | include: |
44 | |
45 | =over |
46 | |
47 | =item * scan_fields |
48 | |
49 | Indicates that the columns should be scanned to determine data types |
9d90f9cd |
50 | and field sizes. True by default. |
825ed07b |
51 | |
52 | =item * trim_fields |
53 | |
54 | A shortcut to sending filters to Text::RecordParser, will create |
55 | callbacks that trim leading and trailing spaces from fields and headers. |
9d90f9cd |
56 | True by default. |
825ed07b |
57 | |
58 | =back |
59 | |
60 | Field names will automatically be normalized by |
61 | C<SQL::Translator::Utils::normalize>. |
62 | |
63 | =cut |
64 | |
65 | # ------------------------------------------------------------------- |
66 | |
046f18e5 |
67 | use strict; |
68 | use vars qw($VERSION @EXPORT); |
2008ecf3 |
69 | $VERSION = sprintf "%d.%02d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/; |
046f18e5 |
70 | |
71 | use Exporter; |
72 | use Text::ParseWords qw(quotewords); |
825ed07b |
73 | use Text::RecordParser; |
74 | use SQL::Translator::Utils qw(debug normalize_name); |
046f18e5 |
75 | |
76 | use base qw(Exporter); |
77 | @EXPORT = qw(parse); |
78 | |
825ed07b |
79 | # |
046f18e5 |
80 | # Passed a SQL::Translator instance and a string containing the data |
825ed07b |
81 | # |
046f18e5 |
82 | sub parse { |
70944bc5 |
83 | my ( $tr, $data ) = @_; |
825ed07b |
84 | my $args = $tr->parser_args; |
85 | my $parser = Text::RecordParser->new( |
86 | field_separator => $args->{'field_separator'} || ',', |
87 | record_separator => $args->{'record_separator'} || "\n", |
88 | data => $data, |
89 | header_filter => \&normalize_name, |
90 | ); |
91 | |
92 | $parser->field_filter( sub { $_ = shift; s/^\s+|\s+$//g; $_ } ) |
9d90f9cd |
93 | unless defined $args->{'trim_fields'} && $args->{'trim_fields'} == 0; |
825ed07b |
94 | |
95 | # |
96 | # Create skeleton structure, mostly empty. |
97 | # |
98 | my $parsed = { |
99 | table1 => { |
100 | type => undef, |
101 | indices => [ { } ], |
102 | fields => { }, |
046f18e5 |
103 | }, |
104 | }; |
105 | |
70944bc5 |
106 | my $schema = $tr->schema; |
9d90f9cd |
107 | my $table = $schema->add_table( name => 'table1' ); |
108 | |
825ed07b |
109 | # |
110 | # Get the field names from the first row. |
111 | # |
112 | $parser->bind_header; |
113 | my @field_names = $parser->field_list; |
046f18e5 |
114 | |
825ed07b |
115 | for ( my $i = 0; $i < @field_names; $i++ ) { |
116 | $parsed->{'table1'}{'fields'}{ $field_names[$i] } = { |
117 | type => 'field', |
046f18e5 |
118 | order => $i, |
825ed07b |
119 | name => $field_names[$i], |
046f18e5 |
120 | |
121 | # Default datatype is "char" |
9d90f9cd |
122 | data_type => 'char', |
046f18e5 |
123 | |
124 | # default size is 8bits; something more reasonable? |
ab0aa010 |
125 | size => [ 255 ], |
046f18e5 |
126 | null => 1, |
825ed07b |
127 | default => '', |
046f18e5 |
128 | is_auto_inc => undef, |
129 | |
130 | # field field is the primary key |
131 | is_primary_key => ($i == 0) ? 1 : undef, |
9d90f9cd |
132 | }; |
133 | |
134 | my $field = $table->add_field( |
135 | name => $field_names[$i], |
136 | data_type => 'char', |
137 | default_value => '', |
138 | size => 255, |
139 | is_nullable => 1, |
140 | is_auto_increment => undef, |
141 | ) or die $table->error; |
142 | |
143 | if ( $i == 0 ) { |
144 | $table->primary_key( $field->name ); |
145 | $field->is_primary_key(1); |
046f18e5 |
146 | } |
147 | } |
148 | |
825ed07b |
149 | # |
150 | # If directed, look at every field's values to guess size and type. |
151 | # |
9d90f9cd |
152 | unless ( |
153 | defined $args->{'scan_fields'} && |
154 | $args->{'scan_fields'} == 0 |
155 | ) { |
825ed07b |
156 | my %field_info = map { $_, {} } @field_names; |
157 | while ( my $rec = $parser->fetchrow_hashref ) { |
158 | for my $field ( @field_names ) { |
159 | my $data = defined $rec->{ $field } ? $rec->{ $field } : ''; |
2008ecf3 |
160 | my $size = [ length $data ]; |
825ed07b |
161 | my $type; |
162 | |
163 | if ( $data =~ /^-?\d+$/ ) { |
164 | $type = 'integer'; |
165 | } |
2008ecf3 |
166 | elsif ( |
167 | $data =~ /^-?[,\d]+\.[\d+]?$/ |
168 | || |
169 | $data =~ /^-?[,\d]+?\.\d+$/ |
170 | || |
171 | $data =~ /^-?\.\d+$/ |
172 | ) { |
825ed07b |
173 | $type = 'float'; |
2008ecf3 |
174 | my ( $w, $d ) = map { s/,//g; $_ } split( /\./, $data ); |
175 | $size = [ length $w, length $d ]; |
825ed07b |
176 | } |
177 | else { |
178 | $type = 'char'; |
179 | } |
180 | |
2008ecf3 |
181 | for my $i ( 0, 1 ) { |
182 | next unless defined $size->[ $i ]; |
183 | my $fsize = $field_info{ $field }{'size'}[ $i ] || 0; |
184 | if ( $size->[ $i ] > $fsize ) { |
185 | $field_info{ $field }{'size'}[ $i ] = $size->[ $i ]; |
186 | } |
825ed07b |
187 | } |
188 | |
189 | $field_info{ $field }{ $type }++; |
190 | } |
191 | } |
192 | |
193 | for my $field ( keys %field_info ) { |
9d90f9cd |
194 | my $size = $field_info{ $field }{'size'}; |
195 | my $data_type = |
196 | $field_info{ $field }{'char'} ? 'char' : |
197 | $field_info{ $field }{'float'} ? 'float' : 'integer'; |
198 | |
825ed07b |
199 | $parsed->{'table1'}{'fields'}{ $field }{'size'} = |
2008ecf3 |
200 | $field_info{ $field }{'size'}; |
825ed07b |
201 | |
9d90f9cd |
202 | $parsed->{'table1'}{'fields'}{ $field }{'data_type'} = $data_type; |
203 | |
204 | my $field = $table->get_field( $field ); |
205 | $field->size( $size ); |
206 | $field->data_type( $data_type ); |
825ed07b |
207 | } |
208 | } |
209 | |
210 | # |
046f18e5 |
211 | # Field 0 is primary key, by default, so add an index |
825ed07b |
212 | # |
213 | for ( $parsed->{'table1'}->{'indices'}->[0] ) { |
214 | $_->{'type'} = 'primary_key'; |
215 | $_->{'name'} = undef; |
216 | $_->{'fields'} = [ $field_names[0] ]; |
046f18e5 |
217 | } |
218 | |
219 | return $parsed; |
220 | } |
221 | |
046f18e5 |
222 | 1; |
825ed07b |
223 | |
224 | # ------------------------------------------------------------------- |
225 | =pod |
226 | |
227 | =head1 AUTHOR |
228 | |
229 | Darren Chamberlain E<lt>darren@cpan.orgE<gt>, |
230 | Ken Y. Clark E<lt>kclark@cpan.orgE<gt>. |
231 | |
232 | =head1 SEE ALSO |
233 | |
234 | Text::RecordParser. |
235 | |
236 | =cut |