Commit | Line | Data |
046f18e5 |
1 | package SQL::Translator::Parser::xSV; |
2 | |
825ed07b |
3 | =head1 NAME |
4 | |
5 | SQL::Translator::Parser::xSV - parser for arbitrarily delimited text files |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use SQL::Translator; |
10 | use SQL::Translator::Parser::xSV; |
11 | |
12 | my $translator = SQL::Translator->new( |
13 | parser => 'xSV', |
14 | parser_args => { field_separator => "\t" }, |
15 | ); |
16 | |
17 | =head1 DESCRIPTION |
18 | |
ea93df61 |
19 | Parses arbitrarily delimited text files. See the |
825ed07b |
20 | Text::RecordParser manpage for arguments on how to parse the file |
21 | (e.g., C<field_separator>, C<record_separator>). Other arguments |
22 | include: |
23 | |
5eb4a350 |
24 | =head1 OPTIONS |
25 | |
825ed07b |
26 | =over |
27 | |
28 | =item * scan_fields |
29 | |
30 | Indicates that the columns should be scanned to determine data types |
9d90f9cd |
31 | and field sizes. True by default. |
825ed07b |
32 | |
33 | =item * trim_fields |
34 | |
ea93df61 |
35 | A shortcut to sending filters to Text::RecordParser, will create |
825ed07b |
36 | callbacks that trim leading and trailing spaces from fields and headers. |
9d90f9cd |
37 | True by default. |
825ed07b |
38 | |
39 | =back |
40 | |
ea93df61 |
41 | Field names will automatically be normalized by |
7b09a302 |
42 | C<SQL::Translator::Utils::normalize_name>. |
825ed07b |
43 | |
44 | =cut |
45 | |
046f18e5 |
46 | use strict; |
f27f9229 |
47 | use warnings; |
0c04c5a2 |
48 | our @EXPORT; |
49 | our $VERSION = '1.59'; |
046f18e5 |
50 | |
51 | use Exporter; |
52 | use Text::ParseWords qw(quotewords); |
825ed07b |
53 | use Text::RecordParser; |
54 | use SQL::Translator::Utils qw(debug normalize_name); |
046f18e5 |
55 | |
56 | use base qw(Exporter); |
57 | @EXPORT = qw(parse); |
58 | |
825ed07b |
59 | # |
046f18e5 |
60 | # Passed a SQL::Translator instance and a string containing the data |
825ed07b |
61 | # |
046f18e5 |
62 | sub parse { |
70944bc5 |
63 | my ( $tr, $data ) = @_; |
825ed07b |
64 | my $args = $tr->parser_args; |
65 | my $parser = Text::RecordParser->new( |
66 | field_separator => $args->{'field_separator'} || ',', |
67 | record_separator => $args->{'record_separator'} || "\n", |
68 | data => $data, |
69 | header_filter => \&normalize_name, |
70 | ); |
71 | |
ea93df61 |
72 | $parser->field_filter( sub { $_ = shift || ''; s/^\s+|\s+$//g; $_ } ) |
9d90f9cd |
73 | unless defined $args->{'trim_fields'} && $args->{'trim_fields'} == 0; |
825ed07b |
74 | |
70944bc5 |
75 | my $schema = $tr->schema; |
9d90f9cd |
76 | my $table = $schema->add_table( name => 'table1' ); |
77 | |
825ed07b |
78 | # |
79 | # Get the field names from the first row. |
80 | # |
81 | $parser->bind_header; |
82 | my @field_names = $parser->field_list; |
046f18e5 |
83 | |
825ed07b |
84 | for ( my $i = 0; $i < @field_names; $i++ ) { |
9d90f9cd |
85 | my $field = $table->add_field( |
86 | name => $field_names[$i], |
87 | data_type => 'char', |
88 | default_value => '', |
89 | size => 255, |
90 | is_nullable => 1, |
91 | is_auto_increment => undef, |
92 | ) or die $table->error; |
93 | |
94 | if ( $i == 0 ) { |
95 | $table->primary_key( $field->name ); |
96 | $field->is_primary_key(1); |
046f18e5 |
97 | } |
98 | } |
99 | |
825ed07b |
100 | # |
101 | # If directed, look at every field's values to guess size and type. |
102 | # |
ea93df61 |
103 | unless ( |
9d90f9cd |
104 | defined $args->{'scan_fields'} && |
105 | $args->{'scan_fields'} == 0 |
106 | ) { |
825ed07b |
107 | my %field_info = map { $_, {} } @field_names; |
108 | while ( my $rec = $parser->fetchrow_hashref ) { |
109 | for my $field ( @field_names ) { |
110 | my $data = defined $rec->{ $field } ? $rec->{ $field } : ''; |
2008ecf3 |
111 | my $size = [ length $data ]; |
825ed07b |
112 | my $type; |
113 | |
114 | if ( $data =~ /^-?\d+$/ ) { |
115 | $type = 'integer'; |
116 | } |
ea93df61 |
117 | elsif ( |
118 | $data =~ /^-?[,\d]+\.[\d+]?$/ |
2008ecf3 |
119 | || |
ea93df61 |
120 | $data =~ /^-?[,\d]+?\.\d+$/ |
2008ecf3 |
121 | || |
ea93df61 |
122 | $data =~ /^-?\.\d+$/ |
2008ecf3 |
123 | ) { |
825ed07b |
124 | $type = 'float'; |
ea93df61 |
125 | my ( $w, $d ) = |
35ea7ccb |
126 | map { s/,//g; length $_ || 1 } split( /\./, $data ); |
127 | $size = [ $w + $d, $d ]; |
825ed07b |
128 | } |
129 | else { |
130 | $type = 'char'; |
131 | } |
132 | |
2008ecf3 |
133 | for my $i ( 0, 1 ) { |
134 | next unless defined $size->[ $i ]; |
135 | my $fsize = $field_info{ $field }{'size'}[ $i ] || 0; |
136 | if ( $size->[ $i ] > $fsize ) { |
137 | $field_info{ $field }{'size'}[ $i ] = $size->[ $i ]; |
138 | } |
825ed07b |
139 | } |
140 | |
141 | $field_info{ $field }{ $type }++; |
142 | } |
143 | } |
144 | |
145 | for my $field ( keys %field_info ) { |
6808d3e9 |
146 | my $size = $field_info{ $field }{'size'} || [ 1 ]; |
ea93df61 |
147 | my $data_type = |
148 | $field_info{ $field }{'char'} ? 'char' : |
35ea7ccb |
149 | $field_info{ $field }{'float'} ? 'float' : |
150 | $field_info{ $field }{'integer'} ? 'integer' : 'char'; |
9d90f9cd |
151 | |
6808d3e9 |
152 | if ( $data_type eq 'char' && scalar @$size == 2 ) { |
153 | $size = [ $size->[0] + $size->[1] ]; |
154 | } |
155 | |
9d90f9cd |
156 | my $field = $table->get_field( $field ); |
157 | $field->size( $size ); |
158 | $field->data_type( $data_type ); |
825ed07b |
159 | } |
160 | } |
161 | |
f62bd16c |
162 | return 1; |
046f18e5 |
163 | } |
164 | |
046f18e5 |
165 | 1; |
825ed07b |
166 | |
825ed07b |
167 | =pod |
168 | |
90075866 |
169 | =head1 AUTHORS |
825ed07b |
170 | |
171 | Darren Chamberlain E<lt>darren@cpan.orgE<gt>, |
11ad2df9 |
172 | Ken Y. Clark E<lt>kclark@cpan.orgE<gt>. |
825ed07b |
173 | |
174 | =head1 SEE ALSO |
175 | |
5eb4a350 |
176 | Text::RecordParser, SQL::Translator. |
825ed07b |
177 | |
178 | =cut |