Added support for producer_args and parser_args.
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator.pm
CommitLineData
16dc9970 1package SQL::Translator;
2
b346d8f1 3# ----------------------------------------------------------------------
e2158c40 4# $Id: Translator.pm,v 1.5 2002-03-26 12:46:54 dlc Exp $
b346d8f1 5# ----------------------------------------------------------------------
077ebf34 6# Copyright (C) 2002 Ken Y. Clark <kycl4rk@users.sourceforge.net>,
7# darren chamberlain <darren@cpan.org>
1fd8c91f 8#
077ebf34 9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License as
11# published by the Free Software Foundation; version 2.
ca10f295 12#
077ebf34 13# This program is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16# General Public License for more details.
ca10f295 17#
077ebf34 18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21# 02111-1307 USA
ca10f295 22# -------------------------------------------------------------------
23
24=head1 NAME
25
26SQL::Translator - convert schema from one database to another
27
28=head1 SYNOPSIS
29
30 use SQL::Translator;
31 my $translator = SQL::Translator->new;
077ebf34 32
b346d8f1 33 my $output = $translator->translate(
34 from => "MySQL",
35 to => "Oracle",
36 filename => $file,
37 ) or die $translator->error;
ca10f295 38 print $output;
39
40=head1 DESCRIPTION
41
42This module attempts to simplify the task of converting one database
43create syntax to another through the use of Parsers and Producers.
44The idea is that any Parser can be used with any Producer in the
b346d8f1 45conversion process. So, if you wanted PostgreSQL-to-Oracle, you would
46use the PostgreSQL parser and the Oracle producer.
ca10f295 47
077ebf34 48Currently, the existing parsers use Parse::RecDescent, but this not
49a requirement, or even a recommendation. New parser modules don't
50necessarily have to use Parse::RecDescent, as long as the module
51implements the appropriate API. With this separation of code, it is
ca10f295 52hoped that developers will find it easy to add more database dialects
53by using what's written, writing only what they need, and then
54contributing their parsers or producers back to the project.
55
56=cut
16dc9970 57
58use strict;
ca10f295 59use vars qw($VERSION $DEFAULT_SUB $DEBUG);
e2158c40 60$VERSION = sprintf "%d.%02d", q$Revision: 1.5 $ =~ /(\d+)\.(\d+)/;
ca10f295 61$DEBUG = 1 unless defined $DEBUG;
16dc9970 62
b346d8f1 63# ----------------------------------------------------------------------
64# The default behavior is to "pass through" values (note that the
65# SQL::Translator instance is the first value ($_[0]), and the stuff
66# to be parsed is the second value ($_[1])
67# ----------------------------------------------------------------------
68$DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
16dc9970 69
ca10f295 70*isa = \&UNIVERSAL::isa;
16dc9970 71
b346d8f1 72use Carp qw(carp);
1fd8c91f 73
ca10f295 74=head1 CONSTRUCTOR
16dc9970 75
077ebf34 76The constructor is called B<new>, and accepts a optional hash of options.
ca10f295 77Valid options are:
16dc9970 78
ca10f295 79=over 4
80
81=item parser (aka from)
82
83=item producer (aka to)
84
85=item filename
86
87=back
88
89All options are, well, optional; these attributes can be set via
b346d8f1 90instance methods. Internally, they are; no (non-syntactical)
91advantage is gained by passing options to the constructor.
ca10f295 92
93=cut
94
95# {{{ new
b346d8f1 96# ----------------------------------------------------------------------
97# new([ARGS])
98# The constructor.
dfb4c915 99#
b346d8f1 100# new takes an optional hash of arguments. These arguments may
101# include a parser, specified with the keys "parser" or "from",
102# and a producer, specified with the keys "producer" or "to".
dfb4c915 103#
b346d8f1 104# The values that can be passed as the parser or producer are
105# given directly to the parser or producer methods, respectively.
106# See the appropriate method description below for details about
107# what each expects/accepts.
108#
109# TODO
110# * Support passing an input (filename or string) as with
111# translate
112# ----------------------------------------------------------------------
16dc9970 113sub new {
16dc9970 114 my $class = shift;
ca10f295 115 my $args = isa($_[0], 'HASH') ? shift : { @_ };
116 my $self = bless { } => $class;
1fd8c91f 117
b346d8f1 118 # ------------------------------------------------------------------
119 # Set the parser and producer.
ca10f295 120 #
b346d8f1 121 # If a 'parser' or 'from' parameter is passed in, use that as the
122 # parser; if a 'producer' or 'to' parameter is passed in, use that
123 # as the producer; both default to $DEFAULT_SUB.
124 # ------------------------------------------------------------------
ca10f295 125 $self->parser( $args->{'parser'} || $args->{'from'} || $DEFAULT_SUB);
126 $self->producer($args->{'producer'} || $args->{'to'} || $DEFAULT_SUB);
127
b346d8f1 128 # ------------------------------------------------------------------
e2158c40 129 # Set the parser_args and producer_args
130 # ------------------------------------------------------------------
131 for my $pargs (qw(parser_args producer_args)) {
132 $self->$pargs($args->{$pargs}) if defined $args->{$pargs};
133 }
134
135 # ------------------------------------------------------------------
ca10f295 136 # Clear the error
b346d8f1 137 # ------------------------------------------------------------------
ca10f295 138 $self->error_out("");
139
140 return $self;
dfb4c915 141}
ca10f295 142# }}}
1fd8c91f 143
ca10f295 144=head1 METHODS
145
ca10f295 146=head2 B<producer>
147
148The B<producer> method is an accessor/mutator, used to retrieve or
149define what subroutine is called to produce the output. A subroutine
b346d8f1 150defined as a producer will be invoked as a function (not a method) and
151passed 2 parameters: its container SQL::Translator instance and a
152data structure. It is expected that the function transform the data
153structure to a string. The SQL::Transformer instance is provided for
154informational purposes; for example, the type of the parser can be
155retrieved using the B<parser_type> method, and the B<error> and
156B<debug> methods can be called when needed.
157
158When defining a producer, one of several things can be passed
159in: A module name (e.g., My::Groovy::Producer), a module name
160relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
161module name and function combination (My::Groovy::Producer::transmogrify),
162or a reference to an anonymous subroutine. If a full module name is
163passed in (for the purposes of this method, a string containing "::"
164is considered to be a module name), it is treated as a package, and a
165function called "produce" will be invoked: $modulename::produce. If
166$modulename cannot be loaded, the final portion is stripped off and
167treated as a function. In other words, if there is no file named
168My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
169My/Groovy/Producer.pm and use transmogrify as the name of the function,
170instead of the default "produce".
ca10f295 171
172 my $tr = SQL::Translator->new;
173
077ebf34 174 # This will invoke My::Groovy::Producer::produce($tr, $data)
ca10f295 175 $tr->producer("My::Groovy::Producer");
176
077ebf34 177 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
ca10f295 178 $tr->producer("Sybase");
179
b346d8f1 180 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
181 # assuming that My::Groovy::Producer::transmogrify is not a module
182 # on disk.
e2158c40 183 $tr->producer("My::Groovy::Producer::transmogrify");
b346d8f1 184
077ebf34 185 # This will invoke the referenced subroutine directly, as
186 # $subref->($tr, $data);
ca10f295 187 $tr->producer(\&my_producer);
188
077ebf34 189There is also a method named B<producer_type>, which is a string
190containing the classname to which the above B<produce> function
191belongs. In the case of anonymous subroutines, this method returns
192the string "CODE".
193
e2158c40 194Finally, there is a method named B<producer_args>, which is both an
195accessor and a mutator. Arbitrary data may be stored in name => value
196pairs for the producer subroutine to access:
197
198 sub My::Random::producer {
199 my ($tr, $data) = @_;
200 my $pr_args = $tr->producer_args();
201
202 # $pr_args is a hashref.
203
204Extra data passed to the B<producer> method is passed to
205B<producer_args>:
206
207 $tr->producer("xSV", delimiter => ',\s*');
208
209 # In SQL::Translator::Producer::xSV:
210 my $args = $tr->producer_args;
211 my $delimiter = $args->{'delimiter'}; # value is => ,\s*
212
ca10f295 213=cut
ca10f295 214
077ebf34 215# {{{ producer and producer_type
ca10f295 216sub producer {
1fd8c91f 217 my $self = shift;
b346d8f1 218
219 # {{{ producer as a mutator
ca10f295 220 if (@_) {
221 my $producer = shift;
b346d8f1 222
223 # {{{ Passed a module name (string containing "::")
ca10f295 224 if ($producer =~ /::/) {
077ebf34 225 my $func_name;
b346d8f1 226
227 # {{{ Module name was passed directly
228 # We try to load the name; if it doesn't load, there's
229 # a possibility that it has a function name attached to
230 # it.
077ebf34 231 if (load($producer)) {
232 $func_name = "produce";
b346d8f1 233 } # }}}
234
235 # {{{ Module::function was passed
236 else {
237 # Passed Module::Name::function; try to recover
077ebf34 238 my @func_parts = split /::/, $producer;
239 $func_name = pop @func_parts;
240 $producer = join "::", @func_parts;
b346d8f1 241
242 # If this doesn't work, then we have a legitimate
243 # problem.
077ebf34 244 load($producer) or die "Can't load $producer: $@";
b346d8f1 245 } # }}}
077ebf34 246
b346d8f1 247 # {{{ get code reference and assign
077ebf34 248 $self->{'producer'} = \&{ "$producer\::$func_name" };
249 $self->{'producer_type'} = $producer;
e2158c40 250 $self->debug("Got producer: $producer\::$func_name");
b346d8f1 251 # }}}
252 } # }}}
253
254 # {{{ passed an anonymous subroutine reference
255 elsif (isa($producer, 'CODE')) {
ca10f295 256 $self->{'producer'} = $producer;
077ebf34 257 $self->{'producer_type'} = "CODE";
ca10f295 258 $self->debug("Got 'producer': code ref");
b346d8f1 259 } # }}}
260
261 # {{{ passed a string containing no "::"; relative package name
262 else {
ca10f295 263 my $Pp = sprintf "SQL::Translator::Producer::$producer";
264 load($Pp) or die "Can't load $Pp: $@";
077ebf34 265 $self->{'producer'} = \&{ "$Pp\::produce" };
266 $self->{'producer_type'} = $Pp;
ca10f295 267 $self->debug("Got producer: $Pp");
b346d8f1 268 } # }}}
269
ca10f295 270 # At this point, $self->{'producer'} contains a subroutine
b346d8f1 271 # reference that is ready to run
e2158c40 272
273 # {{{ Anything left? If so, it's producer_args
274 $self->produser_args(@_) if (@_);
275 # }}}
b346d8f1 276 } # }}}
277
ca10f295 278 return $self->{'producer'};
279};
077ebf34 280
e2158c40 281# {{{ producer_type
282# producer_type is an accessor that allows producer subs to get
283# information about their origin. This is poptentially important;
284# since all producer subs are called as subroutine refernces, there is
285# no way for a producer to find out which package the sub lives in
286# originally, for example.
287sub producer_type { $_[0]->{'producer_type'} } # }}}
288
289# {{{ producer_args
290# Arbitrary name => value pairs of paramters can be passed to a
291# producer using this method.
292sub producer_args {
293 my $self = shift;
294 if (@_) {
295 my $args = isa($_[0], 'HASH') ? shift : { @_ };
296 $self->{'producer_args'} = $args;
297 }
298 $self->{'producer_args'};
299} # }}}
ca10f295 300# }}}
301
302=head2 B<parser>
303
304The B<parser> method defines or retrieves a subroutine that will be
305called to perform the parsing. The basic idea is the same as that of
306B<producer> (see above), except the default subroutine name is
077ebf34 307"parse", and will be invoked as $module_name::parse($tr, $data).
308Also, the parser subroutine will be passed a string containing the
309entirety of the data to be parsed (or possibly a reference to a string?).
ca10f295 310
311 # Invokes SQL::Translator::Parser::MySQL::parse()
312 $tr->parser("MySQL");
313
314 # Invokes My::Groovy::Parser::parse()
315 $tr->parser("My::Groovy::Parser");
316
317 # Invoke an anonymous subroutine directly
318 $tr->parser(sub {
077ebf34 319 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
ca10f295 320 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
321 return $dumper->Dump;
322 });
323
e2158c40 324There is also B<parser_type> and B<parser_args>, which perform
325analogously to B<producer_type> and B<producer_args>
326
ca10f295 327=cut
328
e2158c40 329# {{{ parser, parser_type, and parser_args
ca10f295 330sub parser {
331 my $self = shift;
b346d8f1 332
333 # {{{ parser as a mutator
ca10f295 334 if (@_) {
335 my $parser = shift;
b346d8f1 336
337 # {{{ Passed a module name (string containing "::")
ca10f295 338 if ($parser =~ /::/) {
b346d8f1 339 my $func_name;
340
341 # {{{ Module name was passed directly
342 # We try to load the name; if it doesn't load, there's
343 # a possibility that it has a function name attached to
344 # it.
345 if (load($parser)) {
346 $func_name = "parse";
347 } # }}}
348
349 # {{{ Module::function was passed
350 else {
351 # Passed Module::Name::function; try to recover
352 my @func_parts = split /::/, $parser;
353 $func_name = pop @func_parts;
354 $parser = join "::", @func_parts;
355
356 # If this doesn't work, then we have a legitimate
357 # problem.
358 load($parser) or die "Can't load $parser: $@";
359 } # }}}
360
361 # {{{ get code reference and assign
362 $self->{'parser'} = \&{ "$parser\::$func_name" };
077ebf34 363 $self->{'parser_type'} = $parser;
b346d8f1 364 $self->debug("Got parser: $parser\::$func_name");
365 # }}}
366 } # }}}
367
368 # {{{ passed an anonymous subroutine reference
369 elsif (isa($parser, 'CODE')) {
ca10f295 370 $self->{'parser'} = $parser;
077ebf34 371 $self->{'parser_type'} = "CODE";
b346d8f1 372 $self->debug("Got 'parser': code ref");
373 } # }}}
374
375 # {{{ passed a string containing no "::"; relative package name
376 else {
377 my $Pp = sprintf "SQL::Translator::Parser::$parser";
ca10f295 378 load($Pp) or die "Can't load $Pp: $@";
379 $self->{'parser'} = \&{ "$Pp\::parse" };
077ebf34 380 $self->{'parser_type'} = $Pp;
ca10f295 381 $self->debug("Got parser: $Pp");
b346d8f1 382 } # }}}
383
384 # At this point, $self->{'parser'} contains a subroutine
385 # reference that is ready to run
b346d8f1 386
e2158c40 387 $self->parser_args(@_) if (@_);
388 } # }}}
b346d8f1 389
ca10f295 390 return $self->{'parser'};
16dc9970 391}
1fd8c91f 392
077ebf34 393sub parser_type { $_[0]->{'parser_type'} }
e2158c40 394
395# {{{ parser_args
396sub parser_args {
397 my $self = shift;
398 if (@_) {
399 my $args = isa($_[0], 'HASH') ? shift : { @_ };
400 $self->{'parser_args'} = $args;
401 }
402 $self->{'parser_args'};
403} # }}}
ca10f295 404# }}}
16dc9970 405
ca10f295 406=head2 B<translate>
407
408The B<translate> method calls the subroutines referenced by the
409B<parser> and B<producer> data members (described above). It accepts
410as arguments a number of things, in key => value format, including
411(potentially) a parser and a producer (they are passed directly to the
412B<parser> and B<producer> methods).
413
414Here is how the parameter list to B<translate> is parsed:
415
416=over
417
418=item *
419
4201 argument means it's the data to be parsed; which could be a string
b346d8f1 421(filename) or a refernce to a scalar (a string stored in memory), or a
422reference to a hash, which is parsed as being more than one argument
423(see next section).
ca10f295 424
425 # Parse the file /path/to/datafile
426 my $output = $tr->translate("/path/to/datafile");
427
b346d8f1 428 # Parse the data contained in the string $data
ca10f295 429 my $output = $tr->translate(\$data);
430
431=item *
432
077ebf34 433More than 1 argument means its a hash of things, and it might be
434setting a parser, producer, or datasource (this key is named
b346d8f1 435"filename" or "file" if it's a file, or "data" for a SCALAR reference.
ca10f295 436
437 # As above, parse /path/to/datafile, but with different producers
438 for my $prod ("MySQL", "XML", "Sybase") {
439 print $tr->translate(
440 producer => $prod,
441 filename => "/path/to/datafile",
442 );
443 }
444
445 # The filename hash key could also be:
ca10f295 446 datasource => \$data,
447
448You get the idea.
449
450=back
451
452=cut
453
454# {{{ translate
16dc9970 455sub translate {
ca10f295 456 my $self = shift;
457 my ($args, $parser, $producer);
458
459 if (@_ == 1) {
460 if (isa($_[0], 'HASH')) {
461 # Passed a hashref
077ebf34 462 $self->debug("translate: Got a hashref");
ca10f295 463 $args = $_[0];
464 }
ca10f295 465 elsif (isa($_[0], 'SCALAR')) {
466 # passed a ref to a string; deref it
077ebf34 467 $self->debug("translate: Got a SCALAR reference (string)");
ca10f295 468 $args = { data => ${$_[0]} };
469 }
b346d8f1 470 elsif (! ref $_[0]) {
ca10f295 471 # Not a ref, it's a filename
077ebf34 472 $self->debug("translate: Got a filename");
ca10f295 473 $args = { filename => $_[0] };
474 }
b346d8f1 475 else {
476 # We're not impressed. Take your empty string and leave.
477 return "";
478 }
16dc9970 479 }
480 else {
b346d8f1 481 # You must pass in a hash, or you get nothing.
482 return "" if @_ % 2;
ca10f295 483 $args = { @_ };
16dc9970 484 }
485
b346d8f1 486 if ((defined $args->{'filename'} || defined $args->{'file'}) &&
487 not $args->{'data'}) {
ca10f295 488 local *FH;
489 local $/;
490
b346d8f1 491 open FH, $args->{'filename'}
492 or die "Can't open $args->{'filename'} for reading: $!";
ca10f295 493 $args->{'data'} = <FH>;
b346d8f1 494 close FH or die "Can't close $args->{'filename'}: $!";
16dc9970 495 }
ca10f295 496
497 #
498 # Last chance to bail out; if there's nothing in the data
499 # key of %args, back out.
500 #
b346d8f1 501 return "" unless defined $args->{'data'};
077ebf34 502
ca10f295 503 #
504 # Local reference to the parser subroutine
505 #
506 if ($parser = ($args->{'parser'} || $args->{'from'})) {
507 $self->parser($parser);
508 } else {
509 $parser = $self->parser;
16dc9970 510 }
511
512 #
ca10f295 513 # Local reference to the producer subroutine
16dc9970 514 #
ca10f295 515 if ($producer = ($args->{'producer'} || $args->{'to'})) {
516 $self->producer($producer);
517 } else {
518 $producer = $self->producer;
16dc9970 519 }
520
ca10f295 521 #
522 # Execute the parser, then execute the producer with that output
523 #
b346d8f1 524 return $producer->($self, $parser->($self, $args->{'data'}));
16dc9970 525}
ca10f295 526# }}}
527
528=head2 B<error>
16dc9970 529
ca10f295 530The error method returns the last error.
531
532=cut
533
534# {{{ error
16dc9970 535#-----------------------------------------------------
ca10f295 536sub error {
16dc9970 537#
ca10f295 538# Return the last error.
16dc9970 539#
ca10f295 540 return shift()->{'error'} || '';
541}
542# }}}
543
544=head2 B<error_out>
545
546Record the error and return undef. The error can be retrieved by
547calling programs using $tr->error.
548
549For Parser or Producer writers, primarily.
550
551=cut
552
553# {{{ error_out
554sub error_out {
16dc9970 555 my $self = shift;
ca10f295 556 if ( my $error = shift ) {
557 $self->{'error'} = $error;
16dc9970 558 }
ca10f295 559 return;
16dc9970 560}
ca10f295 561# }}}
1fd8c91f 562
ca10f295 563=head2 B<debug>
564
565If the global variable $SQL::Translator::DEBUG is set to a true value,
566then calls to $tr->debug($msg) will be carped to STDERR. If $DEBUG is
567not set, then this method does nothing.
568
569=cut
570
571# {{{ debug
ca10f295 572sub debug {
16dc9970 573 my $self = shift;
ca10f295 574 carp @_ if ($DEBUG);
16dc9970 575}
ca10f295 576# }}}
577
578# {{{ load
579sub load {
580 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
581 return 1 if $INC{$module};
582
583 eval { require $module };
584
585 return if ($@);
586 return 1;
1fd8c91f 587}
ca10f295 588# }}}
16dc9970 589
5901;
591
ca10f295 592__END__
16dc9970 593#-----------------------------------------------------
594# Rescue the drowning and tie your shoestrings.
595# Henry David Thoreau
596#-----------------------------------------------------
597
ca10f295 598=head1 AUTHOR
16dc9970 599
ca10f295 600Ken Y. Clark, E<lt>kclark@logsoft.comE<gt>,
601darren chamberlain E<lt>darren@cpan.orgE<gt>
dfb4c915 602
ca10f295 603=head1 COPYRIGHT
16dc9970 604
ca10f295 605This program is free software; you can redistribute it and/or modify
606it under the terms of the GNU General Public License as published by
607the Free Software Foundation; version 2.
dfb4c915 608
ca10f295 609This program is distributed in the hope that it will be useful, but
610WITHOUT ANY WARRANTY; without even the implied warranty of
611MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
612General Public License for more details.
16dc9970 613
ca10f295 614You should have received a copy of the GNU General Public License
615along with this program; if not, write to the Free Software
616Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
617USA
16dc9970 618
619=head1 SEE ALSO
620
ca10f295 621L<perl>, L<Parse::RecDescent>
16dc9970 622
623=cut