X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FTranslator%2FParser%2FxSV.pm;h=3e199b21faa48ba99f1f3275bfe701884f524e37;hb=0c04c5a2210135419771878dc7e341a1cba52cca;hp=b77e015c8e26ca436861a90831517f3835fe95ff;hpb=046f18e5498f3d690e7857d8f97fa54e17da2356;p=dbsrgits%2FSQL-Translator.git diff --git a/lib/SQL/Translator/Parser/xSV.pm b/lib/SQL/Translator/Parser/xSV.pm index b77e015..3e199b2 100644 --- a/lib/SQL/Translator/Parser/xSV.pm +++ b/lib/SQL/Translator/Parser/xSV.pm @@ -1,84 +1,178 @@ package SQL::Translator::Parser::xSV; -#----------------------------------------------------- -# $Id: xSV.pm,v 1.1 2002-03-25 14:27:23 dlc Exp $ -#----------------------------------------------------- -# Copyright (C) 2002 Ken Y. Clark , -# darren chamberlain -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -# 02111-1307 USA -# ------------------------------------------------------------------- +=head1 NAME + +SQL::Translator::Parser::xSV - parser for arbitrarily delimited text files + +=head1 SYNOPSIS + + use SQL::Translator; + use SQL::Translator::Parser::xSV; + + my $translator = SQL::Translator->new( + parser => 'xSV', + parser_args => { field_separator => "\t" }, + ); + +=head1 DESCRIPTION + +Parses arbitrarily delimited text files. See the +Text::RecordParser manpage for arguments on how to parse the file +(e.g., C, C). Other arguments +include: + +=head1 OPTIONS + +=over + +=item * scan_fields + +Indicates that the columns should be scanned to determine data types +and field sizes. True by default. + +=item * trim_fields + +A shortcut to sending filters to Text::RecordParser, will create +callbacks that trim leading and trailing spaces from fields and headers. +True by default. + +=back + +Field names will automatically be normalized by +C. + +=cut use strict; -use vars qw($VERSION @EXPORT); -$VERSION = sprintf "%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/; +use warnings; +our @EXPORT; +our $VERSION = '1.59'; use Exporter; use Text::ParseWords qw(quotewords); +use Text::RecordParser; +use SQL::Translator::Utils qw(debug normalize_name); use base qw(Exporter); @EXPORT = qw(parse); +# # Passed a SQL::Translator instance and a string containing the data +# sub parse { - my ($tr, $data) = @_; - - # Skeleton structure, mostly empty - my $parsed = { - table1 => { - "type" => undef, - "indeces" => [ { } ], - "fields" => { }, - }, - }; - - # Discard all but the first line - $data = (split m,$/,, $data)[0]; - - my @parsed = quotewords(',\s*', 0, $data); - - for (my $i = 0; $i < @parsed; $i++) { - $parsed->{"table1"}->{"fields"}->{$parsed[$i]} = { - type => "field", - order => $i, - name => $parsed[$i], - - # Default datatype is "char" - data_type => "char", - - # default size is 8bits; something more reasonable? - size => 255, - null => 1, - default => "", - is_auto_inc => undef, - - # field field is the primary key - is_primary_key => ($i == 0) ? 1 : undef, + my ( $tr, $data ) = @_; + my $args = $tr->parser_args; + my $parser = Text::RecordParser->new( + field_separator => $args->{'field_separator'} || ',', + record_separator => $args->{'record_separator'} || "\n", + data => $data, + header_filter => \&normalize_name, + ); + + $parser->field_filter( sub { $_ = shift || ''; s/^\s+|\s+$//g; $_ } ) + unless defined $args->{'trim_fields'} && $args->{'trim_fields'} == 0; + + my $schema = $tr->schema; + my $table = $schema->add_table( name => 'table1' ); + + # + # Get the field names from the first row. + # + $parser->bind_header; + my @field_names = $parser->field_list; + + for ( my $i = 0; $i < @field_names; $i++ ) { + my $field = $table->add_field( + name => $field_names[$i], + data_type => 'char', + default_value => '', + size => 255, + is_nullable => 1, + is_auto_increment => undef, + ) or die $table->error; + + if ( $i == 0 ) { + $table->primary_key( $field->name ); + $field->is_primary_key(1); } } - # Field 0 is primary key, by default, so add an index - for ($parsed->{"table1"}->{"indeces"}->[0]) { - $_->{"type"} = "primary_key"; - $_->{"name"} = undef; - $_->{"fields"} = [ $parsed[0] ]; + # + # If directed, look at every field's values to guess size and type. + # + unless ( + defined $args->{'scan_fields'} && + $args->{'scan_fields'} == 0 + ) { + my %field_info = map { $_, {} } @field_names; + while ( my $rec = $parser->fetchrow_hashref ) { + for my $field ( @field_names ) { + my $data = defined $rec->{ $field } ? $rec->{ $field } : ''; + my $size = [ length $data ]; + my $type; + + if ( $data =~ /^-?\d+$/ ) { + $type = 'integer'; + } + elsif ( + $data =~ /^-?[,\d]+\.[\d+]?$/ + || + $data =~ /^-?[,\d]+?\.\d+$/ + || + $data =~ /^-?\.\d+$/ + ) { + $type = 'float'; + my ( $w, $d ) = + map { s/,//g; length $_ || 1 } split( /\./, $data ); + $size = [ $w + $d, $d ]; + } + else { + $type = 'char'; + } + + for my $i ( 0, 1 ) { + next unless defined $size->[ $i ]; + my $fsize = $field_info{ $field }{'size'}[ $i ] || 0; + if ( $size->[ $i ] > $fsize ) { + $field_info{ $field }{'size'}[ $i ] = $size->[ $i ]; + } + } + + $field_info{ $field }{ $type }++; + } + } + + for my $field ( keys %field_info ) { + my $size = $field_info{ $field }{'size'} || [ 1 ]; + my $data_type = + $field_info{ $field }{'char'} ? 'char' : + $field_info{ $field }{'float'} ? 'float' : + $field_info{ $field }{'integer'} ? 'integer' : 'char'; + + if ( $data_type eq 'char' && scalar @$size == 2 ) { + $size = [ $size->[0] + $size->[1] ]; + } + + my $field = $table->get_field( $field ); + $field->size( $size ); + $field->data_type( $data_type ); + } } - return $parsed; + return 1; } - 1; -__END__ + +=pod + +=head1 AUTHORS + +Darren Chamberlain Edarren@cpan.orgE, +Ken Y. Clark Ekclark@cpan.orgE. + +=head1 SEE ALSO + +Text::RecordParser, SQL::Translator. + +=cut