1 package SQL::Translator;
3 # ----------------------------------------------------------------------
4 # $Id: Translator.pm,v 1.12 2002-11-22 03:03:40 kycl4rk Exp $
5 # ----------------------------------------------------------------------
6 # Copyright (C) 2002 Ken Y. Clark <kclark@cpan.org>,
7 # darren chamberlain <darren@cpan.org>
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License as
11 # published by the Free Software Foundation; version 2.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 # -------------------------------------------------------------------
26 SQL::Translator - convert schema from one database to another
32 my $translator = SQL::Translator->new(
33 xlate => $xlate || {}, # Overrides for field translation
34 debug => $debug, # Print debug info
35 trace => $trace, # Print Parse::RecDescent trace
36 no_comments => $no_comments, # Don't include comments in output
39 my $output = $translator->translate(
43 ) or die $translator->error;
49 This module attempts to simplify the task of converting one database
50 create syntax to another through the use of Parsers (which understand
51 the sourced format) and Producers (which understand the destination
52 format). The idea is that any Parser can be used with any Producer in
53 the conversion process. So, if you wanted PostgreSQL-to-Oracle, you
54 would use the PostgreSQL parser and the Oracle producer.
59 use vars qw( $VERSION $REVISION $DEFAULT_SUB $DEBUG $ERROR );
60 use base 'Class::Base';
63 $REVISION = sprintf "%d.%02d", q$Revision: 1.12 $ =~ /(\d+)\.(\d+)/;
64 $DEBUG = 0 unless defined $DEBUG;
69 use File::Spec::Functions qw(catfile);
70 use File::Basename qw(dirname);
73 # ----------------------------------------------------------------------
74 # The default behavior is to "pass through" values (note that the
75 # SQL::Translator instance is the first value ($_[0]), and the stuff
76 # to be parsed is the second value ($_[1])
77 # ----------------------------------------------------------------------
78 $DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
82 The constructor is called B<new>, and accepts a optional hash of options.
87 =item parser (aka from)
91 =item producer (aka to)
95 =item filename (aka file)
103 All options are, well, optional; these attributes can be set via
104 instance methods. Internally, they are; no (non-syntactical)
105 advantage is gained by passing options to the constructor.
109 # ----------------------------------------------------------------------
113 # new takes an optional hash of arguments. These arguments may
114 # include a parser, specified with the keys "parser" or "from",
115 # and a producer, specified with the keys "producer" or "to".
117 # The values that can be passed as the parser or producer are
118 # given directly to the parser or producer methods, respectively.
119 # See the appropriate method description below for details about
120 # what each expects/accepts.
121 # ----------------------------------------------------------------------
123 my ( $self, $config ) = @_;
126 # Set the parser and producer.
128 # If a 'parser' or 'from' parameter is passed in, use that as the
129 # parser; if a 'producer' or 'to' parameter is passed in, use that
130 # as the producer; both default to $DEFAULT_SUB.
132 $self->parser ($config->{'parser'} || $config->{'from'} || $DEFAULT_SUB);
133 $self->producer($config->{'producer'} || $config->{'to'} || $DEFAULT_SUB);
136 # Set the parser_args and producer_args
138 for my $pargs ( qw[ parser_args producer_args ] ) {
139 $self->$pargs( $config->{$pargs} ) if defined $config->{ $pargs };
143 # Set the data source, if 'filename' or 'file' is provided.
145 $config->{'filename'} ||= $config->{'file'} || "";
146 $self->filename( $config->{'filename'} ) if $config->{'filename'};
149 # Finally, if there is a 'data' parameter, use that in
150 # preference to filename and file
152 if ( my $data = $config->{'data'} ) {
153 $self->data( $data );
157 # Set various other options.
159 $self->{'debug'} = defined $config->{'debug'} ? $config->{'debug'} : $DEBUG;
161 $self->trace( $config->{'trace'} );
163 $self->custom_translate( $config->{'xlate'} );
165 $self->no_comments( $config->{'no_comments'} );
172 # ----------------------------------------------------------------------
173 =head2 B<custom_translate>
175 Allows the user to override default translation of fields. For example,
176 if a MySQL "text" field would normally be converted to a "long" for Oracle,
177 the user could specify to change it to a "CLOB." Accepts a hashref where
178 keys are the "from" value and values are the "to," returns the current
183 sub custom_translate {
185 $self->{'custom_translate'} = shift if @_;
186 return $self->{'custom_translate'} || {};
189 # ----------------------------------------------------------------------
190 =head2 B<no_comments>
192 Toggles whether to print comments in the output. Accepts a true or false
193 value, returns the current value.
200 if ( defined $arg ) {
201 $self->{'no_comments'} = $arg ? 1 : 0;
203 return $self->{'no_comments'} || 0;
206 # ----------------------------------------------------------------------
209 The B<producer> method is an accessor/mutator, used to retrieve or
210 define what subroutine is called to produce the output. A subroutine
211 defined as a producer will be invoked as a function (not a method) and
212 passed 2 parameters: its container SQL::Translator instance and a
213 data structure. It is expected that the function transform the data
214 structure to a string. The SQL::Transformer instance is provided for
215 informational purposes; for example, the type of the parser can be
216 retrieved using the B<parser_type> method, and the B<error> and
217 B<debug> methods can be called when needed.
219 When defining a producer, one of several things can be passed
220 in: A module name (e.g., My::Groovy::Producer), a module name
221 relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
222 module name and function combination (My::Groovy::Producer::transmogrify),
223 or a reference to an anonymous subroutine. If a full module name is
224 passed in (for the purposes of this method, a string containing "::"
225 is considered to be a module name), it is treated as a package, and a
226 function called "produce" will be invoked: $modulename::produce. If
227 $modulename cannot be loaded, the final portion is stripped off and
228 treated as a function. In other words, if there is no file named
229 My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
230 My/Groovy/Producer.pm and use transmogrify as the name of the function,
231 instead of the default "produce".
233 my $tr = SQL::Translator->new;
235 # This will invoke My::Groovy::Producer::produce($tr, $data)
236 $tr->producer("My::Groovy::Producer");
238 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
239 $tr->producer("Sybase");
241 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
242 # assuming that My::Groovy::Producer::transmogrify is not a module
244 $tr->producer("My::Groovy::Producer::transmogrify");
246 # This will invoke the referenced subroutine directly, as
247 # $subref->($tr, $data);
248 $tr->producer(\&my_producer);
250 There is also a method named B<producer_type>, which is a string
251 containing the classname to which the above B<produce> function
252 belongs. In the case of anonymous subroutines, this method returns
255 Finally, there is a method named B<producer_args>, which is both an
256 accessor and a mutator. Arbitrary data may be stored in name => value
257 pairs for the producer subroutine to access:
259 sub My::Random::producer {
260 my ($tr, $data) = @_;
261 my $pr_args = $tr->producer_args();
263 # $pr_args is a hashref.
265 Extra data passed to the B<producer> method is passed to
268 $tr->producer("xSV", delimiter => ',\s*');
270 # In SQL::Translator::Producer::xSV:
271 my $args = $tr->producer_args;
272 my $delimiter = $args->{'delimiter'}; # value is ,\s*
276 # producer and producer_type
280 # producer as a mutator
282 my $producer = shift;
284 # Passed a module name (string containing "::")
285 if ($producer =~ /::/) {
288 # Module name was passed directly
289 # We try to load the name; if it doesn't load, there's
290 # a possibility that it has a function name attached to
292 if (load($producer)) {
293 $func_name = "produce";
296 # Module::function was passed
298 # Passed Module::Name::function; try to recover
299 my @func_parts = split /::/, $producer;
300 $func_name = pop @func_parts;
301 $producer = join "::", @func_parts;
303 # If this doesn't work, then we have a legitimate
305 load($producer) or die "Can't load $producer: $@";
308 # get code reference and assign
309 $self->{'producer'} = \&{ "$producer\::$func_name" };
310 $self->{'producer_type'} = $producer;
311 $self->debug("Got producer: $producer\::$func_name\n");
314 # passed an anonymous subroutine reference
315 elsif (isa($producer, 'CODE')) {
316 $self->{'producer'} = $producer;
317 $self->{'producer_type'} = "CODE";
318 $self->debug("Got producer: code ref\n");
321 # passed a string containing no "::"; relative package name
323 my $Pp = sprintf "SQL::Translator::Producer::$producer";
324 load($Pp) or die "Can't load $Pp: $@";
325 $self->{'producer'} = \&{ "$Pp\::produce" };
326 $self->{'producer_type'} = $Pp;
327 $self->debug("Got producer: $Pp\n");
330 # At this point, $self->{'producer'} contains a subroutine
331 # reference that is ready to run
333 # Anything left? If so, it's producer_args
334 $self->producer_args(@_) if (@_);
337 return $self->{'producer'};
340 # ----------------------------------------------------------------------
343 # producer_type is an accessor that allows producer subs to get
344 # information about their origin. This is poptentially important;
345 # since all producer subs are called as subroutine refernces, there is
346 # no way for a producer to find out which package the sub lives in
347 # originally, for example.
348 # ----------------------------------------------------------------------
349 sub producer_type { $_[0]->{'producer_type'} }
351 # ----------------------------------------------------------------------
354 # Arbitrary name => value pairs of paramters can be passed to a
355 # producer using this method.
356 # ----------------------------------------------------------------------
360 my $args = isa($_[0], 'HASH') ? shift : { @_ };
361 $self->{'producer_args'} = $args;
363 $self->{'producer_args'};
366 # ----------------------------------------------------------------------
369 The B<parser> method defines or retrieves a subroutine that will be
370 called to perform the parsing. The basic idea is the same as that of
371 B<producer> (see above), except the default subroutine name is
372 "parse", and will be invoked as $module_name::parse($tr, $data).
373 Also, the parser subroutine will be passed a string containing the
374 entirety of the data to be parsed (or possibly a reference to a string?).
376 # Invokes SQL::Translator::Parser::MySQL::parse()
377 $tr->parser("MySQL");
379 # Invokes My::Groovy::Parser::parse()
380 $tr->parser("My::Groovy::Parser");
382 # Invoke an anonymous subroutine directly
384 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
385 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
386 return $dumper->Dump;
389 There is also B<parser_type> and B<parser_args>, which perform
390 analogously to B<producer_type> and B<producer_args>
397 # parser as a mutator
401 # Passed a module name (string containing "::")
402 if ($parser =~ /::/) {
405 # Module name was passed directly
406 # We try to load the name; if it doesn't load, there's
407 # a possibility that it has a function name attached to
410 $func_name = "parse";
413 # Module::function was passed
415 # Passed Module::Name::function; try to recover
416 my @func_parts = split /::/, $parser;
417 $func_name = pop @func_parts;
418 $parser = join "::", @func_parts;
420 # If this doesn't work, then we have a legitimate
422 load($parser) or die "Can't load $parser: $@";
425 # get code reference and assign
426 $self->{'parser'} = \&{ "$parser\::$func_name" };
427 $self->{'parser_type'} = $parser;
428 $self->debug("Got parser: $parser\::$func_name\n");
431 # passed an anonymous subroutine reference
432 elsif ( isa( $parser, 'CODE' ) ) {
433 $self->{'parser'} = $parser;
434 $self->{'parser_type'} = "CODE";
435 $self->debug("Got parser: code ref\n");
438 # passed a string containing no "::"; relative package name
440 my $Pp = "SQL::Translator::Parser::$parser";
441 load( $Pp ) or die "Can't load $Pp: $@";
442 $self->{'parser'} = \&{ "$Pp\::parse" };
443 $self->{'parser_type'} = $Pp;
444 $self->debug("Got parser: $Pp\n");
448 # At this point, $self->{'parser'} contains a subroutine
449 # reference that is ready to run
451 $self->parser_args( @_ ) if (@_);
454 return $self->{'parser'};
457 # ----------------------------------------------------------------------
458 sub parser_type { $_[0]->{'parser_type'} }
460 # ----------------------------------------------------------------------
464 my $args = isa($_[0], 'HASH') ? shift : { @_ };
465 $self->{'parser_args'} = $args;
467 $self->{'parser_args'};
470 # ----------------------------------------------------------------------
473 The B<translate> method calls the subroutines referenced by the
474 B<parser> and B<producer> data members (described above). It accepts
475 as arguments a number of things, in key => value format, including
476 (potentially) a parser and a producer (they are passed directly to the
477 B<parser> and B<producer> methods).
479 Here is how the parameter list to B<translate> is parsed:
485 1 argument means it's the data to be parsed; which could be a string
486 (filename) or a refernce to a scalar (a string stored in memory), or a
487 reference to a hash, which is parsed as being more than one argument
490 # Parse the file /path/to/datafile
491 my $output = $tr->translate("/path/to/datafile");
493 # Parse the data contained in the string $data
494 my $output = $tr->translate(\$data);
498 More than 1 argument means its a hash of things, and it might be
499 setting a parser, producer, or datasource (this key is named
500 "filename" or "file" if it's a file, or "data" for a SCALAR reference.
502 # As above, parse /path/to/datafile, but with different producers
503 for my $prod ("MySQL", "XML", "Sybase") {
504 print $tr->translate(
506 filename => "/path/to/datafile",
510 # The filename hash key could also be:
511 datasource => \$data,
517 # ----------------------------------------------------------------------
518 =head2 B<filename>, B<data>
520 Using the B<filename> method, the filename of the data to be parsed
521 can be set. This method can be used in conjunction with the B<data>
522 method, below. If both the B<filename> and B<data> methods are
523 invoked as mutators, the data set in the B<data> method is used.
525 $tr->filename("/my/data/files/create.sql");
529 my $create_script = do {
531 open CREATE, "/my/data/files/create.sql" or die $!;
534 $tr->data(\$create_script);
536 B<filename> takes a string, which is interpreted as a filename.
537 B<data> takes a reference to a string, which is used as the data to be
538 parsed. If a filename is set, then that file is opened and read when
539 the B<translate> method is called, as long as the data instance
544 # filename - get or set the filename
548 my $filename = shift;
550 my $msg = "Cannot use directory '$filename' as input source";
551 return $self->error($msg);
552 } elsif (-f _ && -r _) {
553 $self->{'filename'} = $filename;
554 $self->debug("Got filename: '$self->{'filename'}'\n");
556 my $msg = "Cannot use '$filename' as input source: ".
557 "file does not exist or is not readable.";
558 return $self->error($msg);
565 # ----------------------------------------------------------------------
566 # data - get or set the data
567 # if $self->{'data'} is not set, but $self->{'filename'} is, then
568 # $self->{'filename'} is opened and read, whith the results put into
573 # Set $self->{'data'} to $_[0], if it is provided.
576 if (isa($data, "SCALAR")) {
577 $self->{'data'} = $data;
579 elsif (! ref $data) {
580 $self->{'data'} = \$data;
584 # If we have a filename but no data yet, populate.
585 if (not $self->{'data'} and my $filename = $self->filename) {
586 $self->debug("Opening '$filename' to get contents.\n");
591 unless (open FH, $filename) {
592 return $self->error("Can't read file '$filename': $!");
596 $self->{'data'} = \$data;
599 return $self->error("Can't close file '$filename': $!");
603 return $self->{'data'};
606 # ----------------------------------------------------------------------
611 Turns on/off the tracing option of Parse::RecDescent.
618 if ( defined $arg ) {
619 $self->{'trace'} = $arg ? 1 : 0;
621 return $self->{'trace'} || 0;
624 # ----------------------------------------------------------------------
627 my ($args, $parser, $parser_type, $producer, $producer_type);
628 my ($parser_output, $producer_output);
632 # Passed a reference to a hash?
633 if (isa($_[0], 'HASH')) {
635 $self->debug("translate: Got a hashref\n");
639 # Passed a reference to a string containing the data
640 elsif (isa($_[0], 'SCALAR')) {
641 # passed a ref to a string
642 $self->debug("translate: Got a SCALAR reference (string)\n");
646 # Not a reference; treat it as a filename
647 elsif (! ref $_[0]) {
648 # Not a ref, it's a filename
649 $self->debug("translate: Got a filename\n");
650 $self->filename($_[0]);
653 # Passed something else entirely.
655 # We're not impressed. Take your empty string and leave.
658 # Actually, if data, parser, and producer are set, then we
659 # can continue. Too bad, because I like my comment
661 return "" unless ($self->data &&
667 # You must pass in a hash, or you get nothing.
672 # ----------------------------------------------------------------------
673 # Can specify the data to be transformed using "filename", "file",
674 # "data", or "datasource".
675 # ----------------------------------------------------------------------
676 if (my $filename = ($args->{'filename'} || $args->{'file'})) {
677 $self->filename($filename);
680 if (my $data = ($self->{'data'} || $self->{'datasource'})) {
684 # ----------------------------------------------------------------
686 # ----------------------------------------------------------------
687 my $data = $self->data;
688 unless (length $$data) {
689 return $self->error("Empty data file!");
692 # ----------------------------------------------------------------
693 # Local reference to the parser subroutine
694 # ----------------------------------------------------------------
695 if ($parser = ($args->{'parser'} || $args->{'from'})) {
696 $self->parser($parser);
698 $parser = $self->parser;
699 $parser_type = $self->parser_type;
701 # ----------------------------------------------------------------
702 # Local reference to the producer subroutine
703 # ----------------------------------------------------------------
704 if ($producer = ($args->{'producer'} || $args->{'to'})) {
705 $self->producer($producer);
707 $producer = $self->producer;
708 $producer_type = $self->producer_type;
710 # ----------------------------------------------------------------
711 # Execute the parser, then execute the producer with that output.
712 # Allowances are made for each piece to die, or fail to compile,
713 # since the referenced subroutines could be almost anything. In
714 # the future, each of these might happen in a Safe environment,
715 # depending on how paranoid we want to be.
716 # ----------------------------------------------------------------
717 eval { $parser_output = $parser->($self, $$data) };
718 if ($@ || ! $parser_output) {
719 my $msg = sprintf "translate: Error with parser '%s': %s",
720 $parser_type, ($@) ? $@ : " no results";
721 return $self->error($msg);
724 eval { $producer_output = $producer->($self, $parser_output) };
725 if ($@ || ! $producer_output) {
726 my $msg = sprintf "translate: Error with producer '%s': %s",
727 $producer_type, ($@) ? $@ : " no results";
728 return $self->error($msg);
731 return $producer_output;
734 # ----------------------------------------------------------------------
736 require SQL::Translator::Producer;
737 my $path = catfile(dirname($INC{'SQL/Translator/Producer.pm'}), "Producer");
738 my $dh = IO::Dir->new($path);
740 my @available = map { join "::", "SQL::Translator::Producer", $_ }
741 grep /\.pm$/, $dh->read;
746 # ----------------------------------------------------------------------
748 require SQL::Translator::Parser;
749 my $path = catfile(dirname($INC{'SQL/Translator/Parser.pm'}), "Parser");
750 my $dh = IO::Dir->new($path);
752 my @available = map { join "::", "SQL::Translator::Parser", $_ }
753 grep /\.pm$/, $dh->read;
758 # ----------------------------------------------------------------------
760 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
761 return 1 if $INC{$module};
763 eval { require $module };
769 # ----------------------------------------------------------------------
770 sub isa { UNIVERSAL::isa($_[0], $_[1]) }
774 #-----------------------------------------------------
775 # Rescue the drowning and tie your shoestrings.
776 # Henry David Thoreau
777 #-----------------------------------------------------
783 Ken Y. Clark, E<lt>kclark@cpan.org<gt>,
784 darren chamberlain E<lt>darren@cpan.orgE<gt>,
785 Chris Mungall E<lt>cjm@fruitfly.orgE<gt>
789 This program is free software; you can redistribute it and/or modify
790 it under the terms of the GNU General Public License as published by
791 the Free Software Foundation; version 2.
793 This program is distributed in the hope that it will be useful, but
794 WITHOUT ANY WARRANTY; without even the implied warranty of
795 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
796 General Public License for more details.
798 You should have received a copy of the GNU General Public License
799 along with this program; if not, write to the Free Software
800 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
805 L<perl>, L<Parse::RecDescent>