Added CSV parser and a test.
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator.pm
CommitLineData
16dc9970 1package SQL::Translator;
2
b346d8f1 3# ----------------------------------------------------------------------
9a7841dd 4# $Id: Translator.pm,v 1.4 2002-03-21 18:50:53 dlc Exp $
b346d8f1 5# ----------------------------------------------------------------------
077ebf34 6# Copyright (C) 2002 Ken Y. Clark <kycl4rk@users.sourceforge.net>,
7# darren chamberlain <darren@cpan.org>
1fd8c91f 8#
077ebf34 9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License as
11# published by the Free Software Foundation; version 2.
ca10f295 12#
077ebf34 13# This program is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16# General Public License for more details.
ca10f295 17#
077ebf34 18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21# 02111-1307 USA
ca10f295 22# -------------------------------------------------------------------
23
24=head1 NAME
25
26SQL::Translator - convert schema from one database to another
27
28=head1 SYNOPSIS
29
30 use SQL::Translator;
31 my $translator = SQL::Translator->new;
077ebf34 32
b346d8f1 33 my $output = $translator->translate(
34 from => "MySQL",
35 to => "Oracle",
36 filename => $file,
37 ) or die $translator->error;
ca10f295 38 print $output;
39
40=head1 DESCRIPTION
41
42This module attempts to simplify the task of converting one database
43create syntax to another through the use of Parsers and Producers.
44The idea is that any Parser can be used with any Producer in the
b346d8f1 45conversion process. So, if you wanted PostgreSQL-to-Oracle, you would
46use the PostgreSQL parser and the Oracle producer.
ca10f295 47
077ebf34 48Currently, the existing parsers use Parse::RecDescent, but this not
49a requirement, or even a recommendation. New parser modules don't
50necessarily have to use Parse::RecDescent, as long as the module
51implements the appropriate API. With this separation of code, it is
ca10f295 52hoped that developers will find it easy to add more database dialects
53by using what's written, writing only what they need, and then
54contributing their parsers or producers back to the project.
55
56=cut
16dc9970 57
58use strict;
ca10f295 59use vars qw($VERSION $DEFAULT_SUB $DEBUG);
9a7841dd 60$VERSION = sprintf "%d.%02d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/;
ca10f295 61$DEBUG = 1 unless defined $DEBUG;
16dc9970 62
b346d8f1 63# ----------------------------------------------------------------------
64# The default behavior is to "pass through" values (note that the
65# SQL::Translator instance is the first value ($_[0]), and the stuff
66# to be parsed is the second value ($_[1])
67# ----------------------------------------------------------------------
68$DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
16dc9970 69
ca10f295 70*isa = \&UNIVERSAL::isa;
16dc9970 71
b346d8f1 72use Carp qw(carp);
1fd8c91f 73
ca10f295 74=head1 CONSTRUCTOR
16dc9970 75
077ebf34 76The constructor is called B<new>, and accepts a optional hash of options.
ca10f295 77Valid options are:
16dc9970 78
ca10f295 79=over 4
80
81=item parser (aka from)
82
83=item producer (aka to)
84
85=item filename
86
87=back
88
89All options are, well, optional; these attributes can be set via
b346d8f1 90instance methods. Internally, they are; no (non-syntactical)
91advantage is gained by passing options to the constructor.
ca10f295 92
93=cut
94
95# {{{ new
b346d8f1 96# ----------------------------------------------------------------------
97# new([ARGS])
98# The constructor.
dfb4c915 99#
b346d8f1 100# new takes an optional hash of arguments. These arguments may
101# include a parser, specified with the keys "parser" or "from",
102# and a producer, specified with the keys "producer" or "to".
dfb4c915 103#
b346d8f1 104# The values that can be passed as the parser or producer are
105# given directly to the parser or producer methods, respectively.
106# See the appropriate method description below for details about
107# what each expects/accepts.
108#
109# TODO
110# * Support passing an input (filename or string) as with
111# translate
112# ----------------------------------------------------------------------
16dc9970 113sub new {
16dc9970 114 my $class = shift;
ca10f295 115 my $args = isa($_[0], 'HASH') ? shift : { @_ };
116 my $self = bless { } => $class;
1fd8c91f 117
b346d8f1 118 # ------------------------------------------------------------------
119 # Set the parser and producer.
ca10f295 120 #
b346d8f1 121 # If a 'parser' or 'from' parameter is passed in, use that as the
122 # parser; if a 'producer' or 'to' parameter is passed in, use that
123 # as the producer; both default to $DEFAULT_SUB.
124 # ------------------------------------------------------------------
ca10f295 125 $self->parser( $args->{'parser'} || $args->{'from'} || $DEFAULT_SUB);
126 $self->producer($args->{'producer'} || $args->{'to'} || $DEFAULT_SUB);
127
b346d8f1 128 # ------------------------------------------------------------------
ca10f295 129 # Clear the error
b346d8f1 130 # ------------------------------------------------------------------
ca10f295 131 $self->error_out("");
132
133 return $self;
dfb4c915 134}
ca10f295 135# }}}
1fd8c91f 136
ca10f295 137=head1 METHODS
138
ca10f295 139=head2 B<producer>
140
141The B<producer> method is an accessor/mutator, used to retrieve or
142define what subroutine is called to produce the output. A subroutine
b346d8f1 143defined as a producer will be invoked as a function (not a method) and
144passed 2 parameters: its container SQL::Translator instance and a
145data structure. It is expected that the function transform the data
146structure to a string. The SQL::Transformer instance is provided for
147informational purposes; for example, the type of the parser can be
148retrieved using the B<parser_type> method, and the B<error> and
149B<debug> methods can be called when needed.
150
151When defining a producer, one of several things can be passed
152in: A module name (e.g., My::Groovy::Producer), a module name
153relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
154module name and function combination (My::Groovy::Producer::transmogrify),
155or a reference to an anonymous subroutine. If a full module name is
156passed in (for the purposes of this method, a string containing "::"
157is considered to be a module name), it is treated as a package, and a
158function called "produce" will be invoked: $modulename::produce. If
159$modulename cannot be loaded, the final portion is stripped off and
160treated as a function. In other words, if there is no file named
161My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
162My/Groovy/Producer.pm and use transmogrify as the name of the function,
163instead of the default "produce".
ca10f295 164
165 my $tr = SQL::Translator->new;
166
077ebf34 167 # This will invoke My::Groovy::Producer::produce($tr, $data)
ca10f295 168 $tr->producer("My::Groovy::Producer");
169
077ebf34 170 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
ca10f295 171 $tr->producer("Sybase");
172
b346d8f1 173 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
174 # assuming that My::Groovy::Producer::transmogrify is not a module
175 # on disk.
176 # $tr->producer("My::Groovy::Producer::transmogrify);
177
077ebf34 178 # This will invoke the referenced subroutine directly, as
179 # $subref->($tr, $data);
ca10f295 180 $tr->producer(\&my_producer);
181
077ebf34 182There is also a method named B<producer_type>, which is a string
183containing the classname to which the above B<produce> function
184belongs. In the case of anonymous subroutines, this method returns
185the string "CODE".
186
ca10f295 187=cut
ca10f295 188
077ebf34 189# {{{ producer and producer_type
ca10f295 190sub producer {
1fd8c91f 191 my $self = shift;
b346d8f1 192
193 # {{{ producer as a mutator
ca10f295 194 if (@_) {
195 my $producer = shift;
b346d8f1 196
197 # {{{ Passed a module name (string containing "::")
ca10f295 198 if ($producer =~ /::/) {
077ebf34 199 my $func_name;
b346d8f1 200
201 # {{{ Module name was passed directly
202 # We try to load the name; if it doesn't load, there's
203 # a possibility that it has a function name attached to
204 # it.
077ebf34 205 if (load($producer)) {
206 $func_name = "produce";
b346d8f1 207 } # }}}
208
209 # {{{ Module::function was passed
210 else {
211 # Passed Module::Name::function; try to recover
077ebf34 212 my @func_parts = split /::/, $producer;
213 $func_name = pop @func_parts;
214 $producer = join "::", @func_parts;
b346d8f1 215
216 # If this doesn't work, then we have a legitimate
217 # problem.
077ebf34 218 load($producer) or die "Can't load $producer: $@";
b346d8f1 219 } # }}}
077ebf34 220
b346d8f1 221 # {{{ get code reference and assign
077ebf34 222 $self->{'producer'} = \&{ "$producer\::$func_name" };
223 $self->{'producer_type'} = $producer;
224 $self->debug("Got 'producer': $producer\::$func_name");
b346d8f1 225 # }}}
226 } # }}}
227
228 # {{{ passed an anonymous subroutine reference
229 elsif (isa($producer, 'CODE')) {
ca10f295 230 $self->{'producer'} = $producer;
077ebf34 231 $self->{'producer_type'} = "CODE";
ca10f295 232 $self->debug("Got 'producer': code ref");
b346d8f1 233 } # }}}
234
235 # {{{ passed a string containing no "::"; relative package name
236 else {
ca10f295 237 my $Pp = sprintf "SQL::Translator::Producer::$producer";
238 load($Pp) or die "Can't load $Pp: $@";
077ebf34 239 $self->{'producer'} = \&{ "$Pp\::produce" };
240 $self->{'producer_type'} = $Pp;
ca10f295 241 $self->debug("Got producer: $Pp");
b346d8f1 242 } # }}}
243
ca10f295 244 # At this point, $self->{'producer'} contains a subroutine
b346d8f1 245 # reference that is ready to run
246 } # }}}
247
ca10f295 248 return $self->{'producer'};
249};
077ebf34 250
251sub producer_type { $_[0]->{'producer_type'} }
ca10f295 252# }}}
253
254=head2 B<parser>
255
256The B<parser> method defines or retrieves a subroutine that will be
257called to perform the parsing. The basic idea is the same as that of
258B<producer> (see above), except the default subroutine name is
077ebf34 259"parse", and will be invoked as $module_name::parse($tr, $data).
260Also, the parser subroutine will be passed a string containing the
261entirety of the data to be parsed (or possibly a reference to a string?).
ca10f295 262
263 # Invokes SQL::Translator::Parser::MySQL::parse()
264 $tr->parser("MySQL");
265
266 # Invokes My::Groovy::Parser::parse()
267 $tr->parser("My::Groovy::Parser");
268
269 # Invoke an anonymous subroutine directly
270 $tr->parser(sub {
077ebf34 271 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
ca10f295 272 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
273 return $dumper->Dump;
274 });
275
276=cut
277
077ebf34 278# {{{ parser and parser_type
ca10f295 279sub parser {
280 my $self = shift;
b346d8f1 281
282 # {{{ parser as a mutator
ca10f295 283 if (@_) {
284 my $parser = shift;
b346d8f1 285
286 # {{{ Passed a module name (string containing "::")
ca10f295 287 if ($parser =~ /::/) {
b346d8f1 288 my $func_name;
289
290 # {{{ Module name was passed directly
291 # We try to load the name; if it doesn't load, there's
292 # a possibility that it has a function name attached to
293 # it.
294 if (load($parser)) {
295 $func_name = "parse";
296 } # }}}
297
298 # {{{ Module::function was passed
299 else {
300 # Passed Module::Name::function; try to recover
301 my @func_parts = split /::/, $parser;
302 $func_name = pop @func_parts;
303 $parser = join "::", @func_parts;
304
305 # If this doesn't work, then we have a legitimate
306 # problem.
307 load($parser) or die "Can't load $parser: $@";
308 } # }}}
309
310 # {{{ get code reference and assign
311 $self->{'parser'} = \&{ "$parser\::$func_name" };
077ebf34 312 $self->{'parser_type'} = $parser;
b346d8f1 313 $self->debug("Got parser: $parser\::$func_name");
314 # }}}
315 } # }}}
316
317 # {{{ passed an anonymous subroutine reference
318 elsif (isa($parser, 'CODE')) {
ca10f295 319 $self->{'parser'} = $parser;
077ebf34 320 $self->{'parser_type'} = "CODE";
b346d8f1 321 $self->debug("Got 'parser': code ref");
322 } # }}}
323
324 # {{{ passed a string containing no "::"; relative package name
325 else {
326 my $Pp = sprintf "SQL::Translator::Parser::$parser";
ca10f295 327 load($Pp) or die "Can't load $Pp: $@";
328 $self->{'parser'} = \&{ "$Pp\::parse" };
077ebf34 329 $self->{'parser_type'} = $Pp;
ca10f295 330 $self->debug("Got parser: $Pp");
b346d8f1 331 } # }}}
332
333 # At this point, $self->{'parser'} contains a subroutine
334 # reference that is ready to run
335 } # }}}
336
337
ca10f295 338 return $self->{'parser'};
16dc9970 339}
1fd8c91f 340
077ebf34 341sub parser_type { $_[0]->{'parser_type'} }
ca10f295 342# }}}
16dc9970 343
ca10f295 344=head2 B<translate>
345
346The B<translate> method calls the subroutines referenced by the
347B<parser> and B<producer> data members (described above). It accepts
348as arguments a number of things, in key => value format, including
349(potentially) a parser and a producer (they are passed directly to the
350B<parser> and B<producer> methods).
351
352Here is how the parameter list to B<translate> is parsed:
353
354=over
355
356=item *
357
3581 argument means it's the data to be parsed; which could be a string
b346d8f1 359(filename) or a refernce to a scalar (a string stored in memory), or a
360reference to a hash, which is parsed as being more than one argument
361(see next section).
ca10f295 362
363 # Parse the file /path/to/datafile
364 my $output = $tr->translate("/path/to/datafile");
365
b346d8f1 366 # Parse the data contained in the string $data
ca10f295 367 my $output = $tr->translate(\$data);
368
369=item *
370
077ebf34 371More than 1 argument means its a hash of things, and it might be
372setting a parser, producer, or datasource (this key is named
b346d8f1 373"filename" or "file" if it's a file, or "data" for a SCALAR reference.
ca10f295 374
375 # As above, parse /path/to/datafile, but with different producers
376 for my $prod ("MySQL", "XML", "Sybase") {
377 print $tr->translate(
378 producer => $prod,
379 filename => "/path/to/datafile",
380 );
381 }
382
383 # The filename hash key could also be:
ca10f295 384 datasource => \$data,
385
386You get the idea.
387
388=back
389
390=cut
391
392# {{{ translate
16dc9970 393sub translate {
ca10f295 394 my $self = shift;
395 my ($args, $parser, $producer);
396
397 if (@_ == 1) {
398 if (isa($_[0], 'HASH')) {
399 # Passed a hashref
077ebf34 400 $self->debug("translate: Got a hashref");
ca10f295 401 $args = $_[0];
402 }
ca10f295 403 elsif (isa($_[0], 'SCALAR')) {
404 # passed a ref to a string; deref it
077ebf34 405 $self->debug("translate: Got a SCALAR reference (string)");
ca10f295 406 $args = { data => ${$_[0]} };
407 }
b346d8f1 408 elsif (! ref $_[0]) {
ca10f295 409 # Not a ref, it's a filename
077ebf34 410 $self->debug("translate: Got a filename");
ca10f295 411 $args = { filename => $_[0] };
412 }
b346d8f1 413 else {
414 # We're not impressed. Take your empty string and leave.
415 return "";
416 }
16dc9970 417 }
418 else {
b346d8f1 419 # You must pass in a hash, or you get nothing.
420 return "" if @_ % 2;
ca10f295 421 $args = { @_ };
16dc9970 422 }
423
b346d8f1 424 if ((defined $args->{'filename'} || defined $args->{'file'}) &&
425 not $args->{'data'}) {
ca10f295 426 local *FH;
427 local $/;
428
b346d8f1 429 open FH, $args->{'filename'}
430 or die "Can't open $args->{'filename'} for reading: $!";
ca10f295 431 $args->{'data'} = <FH>;
b346d8f1 432 close FH or die "Can't close $args->{'filename'}: $!";
16dc9970 433 }
ca10f295 434
435 #
436 # Last chance to bail out; if there's nothing in the data
437 # key of %args, back out.
438 #
b346d8f1 439 return "" unless defined $args->{'data'};
077ebf34 440
ca10f295 441 #
442 # Local reference to the parser subroutine
443 #
444 if ($parser = ($args->{'parser'} || $args->{'from'})) {
445 $self->parser($parser);
446 } else {
447 $parser = $self->parser;
16dc9970 448 }
449
450 #
ca10f295 451 # Local reference to the producer subroutine
16dc9970 452 #
ca10f295 453 if ($producer = ($args->{'producer'} || $args->{'to'})) {
454 $self->producer($producer);
455 } else {
456 $producer = $self->producer;
16dc9970 457 }
458
ca10f295 459 #
460 # Execute the parser, then execute the producer with that output
461 #
b346d8f1 462 return $producer->($self, $parser->($self, $args->{'data'}));
16dc9970 463}
ca10f295 464# }}}
465
466=head2 B<error>
16dc9970 467
ca10f295 468The error method returns the last error.
469
470=cut
471
472# {{{ error
16dc9970 473#-----------------------------------------------------
ca10f295 474sub error {
16dc9970 475#
ca10f295 476# Return the last error.
16dc9970 477#
ca10f295 478 return shift()->{'error'} || '';
479}
480# }}}
481
482=head2 B<error_out>
483
484Record the error and return undef. The error can be retrieved by
485calling programs using $tr->error.
486
487For Parser or Producer writers, primarily.
488
489=cut
490
491# {{{ error_out
492sub error_out {
16dc9970 493 my $self = shift;
ca10f295 494 if ( my $error = shift ) {
495 $self->{'error'} = $error;
16dc9970 496 }
ca10f295 497 return;
16dc9970 498}
ca10f295 499# }}}
1fd8c91f 500
ca10f295 501=head2 B<debug>
502
503If the global variable $SQL::Translator::DEBUG is set to a true value,
504then calls to $tr->debug($msg) will be carped to STDERR. If $DEBUG is
505not set, then this method does nothing.
506
507=cut
508
509# {{{ debug
ca10f295 510sub debug {
16dc9970 511 my $self = shift;
ca10f295 512 carp @_ if ($DEBUG);
16dc9970 513}
ca10f295 514# }}}
515
516# {{{ load
517sub load {
518 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
519 return 1 if $INC{$module};
520
521 eval { require $module };
522
523 return if ($@);
524 return 1;
1fd8c91f 525}
ca10f295 526# }}}
16dc9970 527
5281;
529
ca10f295 530__END__
16dc9970 531#-----------------------------------------------------
532# Rescue the drowning and tie your shoestrings.
533# Henry David Thoreau
534#-----------------------------------------------------
535
ca10f295 536=head1 AUTHOR
16dc9970 537
ca10f295 538Ken Y. Clark, E<lt>kclark@logsoft.comE<gt>,
539darren chamberlain E<lt>darren@cpan.orgE<gt>
dfb4c915 540
ca10f295 541=head1 COPYRIGHT
16dc9970 542
ca10f295 543This program is free software; you can redistribute it and/or modify
544it under the terms of the GNU General Public License as published by
545the Free Software Foundation; version 2.
dfb4c915 546
ca10f295 547This program is distributed in the hope that it will be useful, but
548WITHOUT ANY WARRANTY; without even the implied warranty of
549MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
550General Public License for more details.
16dc9970 551
ca10f295 552You should have received a copy of the GNU General Public License
553along with this program; if not, write to the Free Software
554Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
555USA
16dc9970 556
557=head1 SEE ALSO
558
ca10f295 559L<perl>, L<Parse::RecDescent>
16dc9970 560
561=cut