Updated to use Text::RecordParser and added scanning of fields, more
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator / Parser / xSV.pm
CommitLineData
046f18e5 1package SQL::Translator::Parser::xSV;
2
49e1eb70 3# -------------------------------------------------------------------
825ed07b 4# $Id: xSV.pm,v 1.6 2003-05-07 20:36:59 kycl4rk Exp $
49e1eb70 5# -------------------------------------------------------------------
abfa405a 6# Copyright (C) 2003 Ken Y. Clark <kclark@cpan.org>,
825ed07b 7# darren chamberlain <darren@cpan.org>
046f18e5 8#
9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License as
11# published by the Free Software Foundation; version 2.
12#
13# This program is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16# General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21# 02111-1307 USA
22# -------------------------------------------------------------------
23
825ed07b 24=head1 NAME
25
26SQL::Translator::Parser::xSV - parser for arbitrarily delimited text files
27
28=head1 SYNOPSIS
29
30 use SQL::Translator;
31 use SQL::Translator::Parser::xSV;
32
33 my $translator = SQL::Translator->new(
34 parser => 'xSV',
35 parser_args => { field_separator => "\t" },
36 );
37
38=head1 DESCRIPTION
39
40Parses arbitrarily delimited text files. See the
41Text::RecordParser manpage for arguments on how to parse the file
42(e.g., C<field_separator>, C<record_separator>). Other arguments
43include:
44
45=over
46
47=item * scan_fields
48
49Indicates that the columns should be scanned to determine data types
50and field sizes.
51
52=item * trim_fields
53
54A shortcut to sending filters to Text::RecordParser, will create
55callbacks that trim leading and trailing spaces from fields and headers.
56
57=back
58
59Field names will automatically be normalized by
60C<SQL::Translator::Utils::normalize>.
61
62=cut
63
64# -------------------------------------------------------------------
65
046f18e5 66use strict;
67use vars qw($VERSION @EXPORT);
825ed07b 68$VERSION = sprintf "%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/;
046f18e5 69
70use Exporter;
71use Text::ParseWords qw(quotewords);
825ed07b 72use Text::RecordParser;
73use SQL::Translator::Utils qw(debug normalize_name);
046f18e5 74
75use base qw(Exporter);
76@EXPORT = qw(parse);
77
825ed07b 78#
046f18e5 79# Passed a SQL::Translator instance and a string containing the data
825ed07b 80#
046f18e5 81sub parse {
82 my ($tr, $data) = @_;
83
825ed07b 84 my $args = $tr->parser_args;
85 my $parser = Text::RecordParser->new(
86 field_separator => $args->{'field_separator'} || ',',
87 record_separator => $args->{'record_separator'} || "\n",
88 data => $data,
89 header_filter => \&normalize_name,
90 );
91
92 $parser->field_filter( sub { $_ = shift; s/^\s+|\s+$//g; $_ } )
93 if $args->{'trim_fields'};
94
95 #
96 # Create skeleton structure, mostly empty.
97 #
98 my $parsed = {
99 table1 => {
100 type => undef,
101 indices => [ { } ],
102 fields => { },
046f18e5 103 },
104 };
105
825ed07b 106 #
107 # Get the field names from the first row.
108 #
109 $parser->bind_header;
110 my @field_names = $parser->field_list;
046f18e5 111
825ed07b 112 for ( my $i = 0; $i < @field_names; $i++ ) {
113 $parsed->{'table1'}{'fields'}{ $field_names[$i] } = {
114 type => 'field',
046f18e5 115 order => $i,
825ed07b 116 name => $field_names[$i],
046f18e5 117
118 # Default datatype is "char"
119 data_type => "char",
120
121 # default size is 8bits; something more reasonable?
ab0aa010 122 size => [ 255 ],
046f18e5 123 null => 1,
825ed07b 124 default => '',
046f18e5 125 is_auto_inc => undef,
126
127 # field field is the primary key
128 is_primary_key => ($i == 0) ? 1 : undef,
129 }
130 }
131
825ed07b 132 #
133 # If directed, look at every field's values to guess size and type.
134 #
135 if ( $args->{'scan_fields'} ) {
136 my %field_info = map { $_, {} } @field_names;
137 while ( my $rec = $parser->fetchrow_hashref ) {
138 for my $field ( @field_names ) {
139 my $data = defined $rec->{ $field } ? $rec->{ $field } : '';
140 my $size = length $data;
141 my $type;
142
143 if ( $data =~ /^-?\d+$/ ) {
144 $type = 'integer';
145 }
146 elsif ( $data =~ /^[\d.-]+$/ ) {
147 $type = 'float';
148 }
149 else {
150 $type = 'char';
151 }
152
153 if ( $size > $field_info{ $field }{'size'} ) {
154 $field_info{ $field }{'size'} = $size;
155 }
156
157 $field_info{ $field }{ $type }++;
158 }
159 }
160
161 for my $field ( keys %field_info ) {
162 $parsed->{'table1'}{'fields'}{ $field }{'size'} =
163 [ $field_info{ $field }{'size'} ];
164
165 $parsed->{'table1'}{'fields'}{ $field }{'data_type'} =
166 $field_info{ $field }{'char'} ? 'char' :
167 $field_info{ $field }{'float'} ? 'float' : 'integer';
168 }
169 }
170
171 #
046f18e5 172 # Field 0 is primary key, by default, so add an index
825ed07b 173 #
174 for ( $parsed->{'table1'}->{'indices'}->[0] ) {
175 $_->{'type'} = 'primary_key';
176 $_->{'name'} = undef;
177 $_->{'fields'} = [ $field_names[0] ];
046f18e5 178 }
179
180 return $parsed;
181}
182
046f18e5 1831;
825ed07b 184
185# -------------------------------------------------------------------
186=pod
187
188=head1 AUTHOR
189
190Darren Chamberlain E<lt>darren@cpan.orgE<gt>,
191Ken Y. Clark E<lt>kclark@cpan.orgE<gt>.
192
193=head1 SEE ALSO
194
195Text::RecordParser.
196
197=cut