1 package SQL::Translator;
3 # ----------------------------------------------------------------------
4 # $Id: Translator.pm,v 1.15 2003-01-27 17:04:43 dlc Exp $
5 # ----------------------------------------------------------------------
6 # Copyright (C) 2003 Ken Y. Clark <kclark@cpan.org>,
7 # darren chamberlain <darren@cpan.org>,
8 # Chris Mungall <cjm@fruitfly.org>
10 # This program is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU General Public License as
12 # published by the Free Software Foundation; version 2.
14 # This program is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 # -------------------------------------------------------------------
27 SQL::Translator - convert schema from one database to another
33 my $translator = SQL::Translator->new(
34 xlate => $xlate || {}, # Overrides for field translation
35 debug => $debug, # Print debug info
36 trace => $trace, # Print Parse::RecDescent trace
37 no_comments => $no_comments, # Don't include comments in output
38 show_warnings => $show_warnings, # Print name mutations, conflicts
39 add_drop_table => $add_drop_table, # Add "drop table" statements
42 my $output = $translator->translate(
46 ) or die $translator->error;
52 This module attempts to simplify the task of converting one database
53 create syntax to another through the use of Parsers (which understand
54 the source format) and Producers (which understand the destination
55 format). The idea is that any Parser can be used with any Producer in
56 the conversion process. So, if you wanted Postgres-to-Oracle, you
57 would use the Postgres parser and the Oracle producer.
62 use vars qw( $VERSION $REVISION $DEFAULT_SUB $DEBUG $ERROR );
63 use base 'Class::Base';
66 $REVISION = sprintf "%d.%02d", q$Revision: 1.15 $ =~ /(\d+)\.(\d+)/;
67 $DEBUG = 0 unless defined $DEBUG;
72 use File::Spec::Functions qw(catfile);
73 use File::Basename qw(dirname);
76 # ----------------------------------------------------------------------
77 # The default behavior is to "pass through" values (note that the
78 # SQL::Translator instance is the first value ($_[0]), and the stuff
79 # to be parsed is the second value ($_[1])
80 # ----------------------------------------------------------------------
81 $DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
85 The constructor is called B<new>, and accepts a optional hash of options.
90 =item parser (aka from)
94 =item producer (aka to)
98 =item filename (aka file)
106 All options are, well, optional; these attributes can be set via
107 instance methods. Internally, they are; no (non-syntactical)
108 advantage is gained by passing options to the constructor.
112 # ----------------------------------------------------------------------
116 # new takes an optional hash of arguments. These arguments may
117 # include a parser, specified with the keys "parser" or "from",
118 # and a producer, specified with the keys "producer" or "to".
120 # The values that can be passed as the parser or producer are
121 # given directly to the parser or producer methods, respectively.
122 # See the appropriate method description below for details about
123 # what each expects/accepts.
124 # ----------------------------------------------------------------------
126 my ( $self, $config ) = @_;
129 # Set the parser and producer.
131 # If a 'parser' or 'from' parameter is passed in, use that as the
132 # parser; if a 'producer' or 'to' parameter is passed in, use that
133 # as the producer; both default to $DEFAULT_SUB.
135 $self->parser ($config->{'parser'} || $config->{'from'} || $DEFAULT_SUB);
136 $self->producer($config->{'producer'} || $config->{'to'} || $DEFAULT_SUB);
139 # Set the parser_args and producer_args
141 for my $pargs ( qw[ parser_args producer_args ] ) {
142 $self->$pargs( $config->{$pargs} ) if defined $config->{ $pargs };
146 # Set the data source, if 'filename' or 'file' is provided.
148 $config->{'filename'} ||= $config->{'file'} || "";
149 $self->filename( $config->{'filename'} ) if $config->{'filename'};
152 # Finally, if there is a 'data' parameter, use that in
153 # preference to filename and file
155 if ( my $data = $config->{'data'} ) {
156 $self->data( $data );
160 # Set various other options.
162 $self->{'debug'} = defined $config->{'debug'} ? $config->{'debug'} : $DEBUG;
165 $self->add_drop_table( $config->{'add_drop_table'} );
167 $self->custom_translate( $config->{'xlate'} );
169 $self->no_comments( $config->{'no_comments'} );
171 $self->show_warnings( $config->{'show_warnings'} );
173 $self->trace( $config->{'trace'} );
180 =head2 B<add_drop_table>
182 Toggles whether or not to add "DROP TABLE" statements just before the
189 if ( defined (my $arg = shift) ) {
190 $self->{'add_drop_table'} = $arg ? 1 : 0;
192 return $self->{'add_drop_table'} || 0;
196 =head2 B<custom_translate>
198 Allows the user to override default translation of fields. For example,
199 if a MySQL "text" field would normally be converted to a "long" for Oracle,
200 the user could specify to change it to a "CLOB." Accepts a hashref where
201 keys are the "from" value and values are the "to," returns the current
206 sub custom_translate {
208 $self->{'custom_translate'} = shift if @_;
209 return $self->{'custom_translate'} || {};
212 =head2 B<no_comments>
214 Toggles whether to print comments in the output. Accepts a true or false
215 value, returns the current value.
222 if ( defined $arg ) {
223 $self->{'no_comments'} = $arg ? 1 : 0;
225 return $self->{'no_comments'} || 0;
230 The B<producer> method is an accessor/mutator, used to retrieve or
231 define what subroutine is called to produce the output. A subroutine
232 defined as a producer will be invoked as a function (I<not a method>)
233 and passed 2 parameters: its container SQL::Translator instance and a
234 data structure. It is expected that the function transform the data
235 structure to a string. The SQL::Transformer instance is provided for
236 informational purposes; for example, the type of the parser can be
237 retrieved using the B<parser_type> method, and the B<error> and
238 B<debug> methods can be called when needed.
240 When defining a producer, one of several things can be passed
241 in: A module name (e.g., My::Groovy::Producer), a module name
242 relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
243 module name and function combination (My::Groovy::Producer::transmogrify),
244 or a reference to an anonymous subroutine. If a full module name is
245 passed in (for the purposes of this method, a string containing "::"
246 is considered to be a module name), it is treated as a package, and a
247 function called "produce" will be invoked: $modulename::produce. If
248 $modulename cannot be loaded, the final portion is stripped off and
249 treated as a function. In other words, if there is no file named
250 My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
251 My/Groovy/Producer.pm and use transmogrify as the name of the function,
252 instead of the default "produce".
254 my $tr = SQL::Translator->new;
256 # This will invoke My::Groovy::Producer::produce($tr, $data)
257 $tr->producer("My::Groovy::Producer");
259 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
260 $tr->producer("Sybase");
262 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
263 # assuming that My::Groovy::Producer::transmogrify is not a module
265 $tr->producer("My::Groovy::Producer::transmogrify");
267 # This will invoke the referenced subroutine directly, as
268 # $subref->($tr, $data);
269 $tr->producer(\&my_producer);
271 There is also a method named B<producer_type>, which is a string
272 containing the classname to which the above B<produce> function
273 belongs. In the case of anonymous subroutines, this method returns
276 Finally, there is a method named B<producer_args>, which is both an
277 accessor and a mutator. Arbitrary data may be stored in name => value
278 pairs for the producer subroutine to access:
280 sub My::Random::producer {
281 my ($tr, $data) = @_;
282 my $pr_args = $tr->producer_args();
284 # $pr_args is a hashref.
286 Extra data passed to the B<producer> method is passed to
289 $tr->producer("xSV", delimiter => ',\s*');
291 # In SQL::Translator::Producer::xSV:
292 my $args = $tr->producer_args;
293 my $delimiter = $args->{'delimiter'}; # value is ,\s*
297 # producer and producer_type
301 # producer as a mutator
303 my $producer = shift;
305 # Passed a module name (string containing "::")
306 if ($producer =~ /::/) {
309 # Module name was passed directly
310 # We try to load the name; if it doesn't load, there's
311 # a possibility that it has a function name attached to
313 if (load($producer)) {
314 $func_name = "produce";
317 # Module::function was passed
319 # Passed Module::Name::function; try to recover
320 my @func_parts = split /::/, $producer;
321 $func_name = pop @func_parts;
322 $producer = join "::", @func_parts;
324 # If this doesn't work, then we have a legitimate
326 load($producer) or die "Can't load $producer: $@";
329 # get code reference and assign
330 $self->{'producer'} = \&{ "$producer\::$func_name" };
331 $self->{'producer_type'} = $producer;
332 $self->debug("Got producer: $producer\::$func_name\n");
335 # passed an anonymous subroutine reference
336 elsif (isa($producer, 'CODE')) {
337 $self->{'producer'} = $producer;
338 $self->{'producer_type'} = "CODE";
339 $self->debug("Got producer: code ref\n");
342 # passed a string containing no "::"; relative package name
344 my $Pp = sprintf "SQL::Translator::Producer::$producer";
345 load($Pp) or die "Can't load $Pp: $@";
346 $self->{'producer'} = \&{ "$Pp\::produce" };
347 $self->{'producer_type'} = $Pp;
348 $self->debug("Got producer: $Pp\n");
351 # At this point, $self->{'producer'} contains a subroutine
352 # reference that is ready to run
354 # Anything left? If so, it's producer_args
355 $self->producer_args(@_) if (@_);
358 return $self->{'producer'};
361 # ----------------------------------------------------------------------
364 # producer_type is an accessor that allows producer subs to get
365 # information about their origin. This is poptentially important;
366 # since all producer subs are called as subroutine refernces, there is
367 # no way for a producer to find out which package the sub lives in
368 # originally, for example.
369 # ----------------------------------------------------------------------
370 sub producer_type { $_[0]->{'producer_type'} }
372 # ----------------------------------------------------------------------
375 # Arbitrary name => value pairs of paramters can be passed to a
376 # producer using this method.
378 # XXX All calls to producer_args with a value clobbers old values!
379 # Should probably check if $_[0] is undef, and delete stored
383 # unless (defined $_[0]) {
384 # %{ $self->{'producer_args'} } = ();
386 # my $args = isa($_[0], 'HASH') ? shift : { @_ };
387 # %{ $self->{'producer_args'} } = (
388 # %{ $self->{'producer_args'} },
392 # ----------------------------------------------------------------------
396 my $args = isa($_[0], 'HASH') ? shift : { @_ };
397 $self->{'producer_args'} = $args;
399 $self->{'producer_args'};
404 The B<parser> method defines or retrieves a subroutine that will be
405 called to perform the parsing. The basic idea is the same as that of
406 B<producer> (see above), except the default subroutine name is
407 "parse", and will be invoked as $module_name::parse($tr, $data).
408 Also, the parser subroutine will be passed a string containing the
409 entirety of the data to be parsed.
411 # Invokes SQL::Translator::Parser::MySQL::parse()
412 $tr->parser("MySQL");
414 # Invokes My::Groovy::Parser::parse()
415 $tr->parser("My::Groovy::Parser");
417 # Invoke an anonymous subroutine directly
419 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
420 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
421 return $dumper->Dump;
424 There is also B<parser_type> and B<parser_args>, which perform
425 analogously to B<producer_type> and B<producer_args>
432 # parser as a mutator
436 # Passed a module name (string containing "::")
437 if ($parser =~ /::/) {
440 # Module name was passed directly
441 # We try to load the name; if it doesn't load, there's
442 # a possibility that it has a function name attached to
445 $func_name = "parse";
448 # Module::function was passed
450 # Passed Module::Name::function; try to recover
451 my @func_parts = split /::/, $parser;
452 $func_name = pop @func_parts;
453 $parser = join "::", @func_parts;
455 # If this doesn't work, then we have a legitimate
457 load($parser) or die "Can't load $parser: $@";
460 # get code reference and assign
461 $self->{'parser'} = \&{ "$parser\::$func_name" };
462 $self->{'parser_type'} = $parser;
463 $self->debug("Got parser: $parser\::$func_name\n");
466 # passed an anonymous subroutine reference
467 elsif ( isa( $parser, 'CODE' ) ) {
468 $self->{'parser'} = $parser;
469 $self->{'parser_type'} = "CODE";
470 $self->debug("Got parser: code ref\n");
473 # passed a string containing no "::"; relative package name
475 my $Pp = "SQL::Translator::Parser::$parser";
476 load( $Pp ) or die "Can't load $Pp: $@";
477 $self->{'parser'} = \&{ "$Pp\::parse" };
478 $self->{'parser_type'} = $Pp;
479 $self->debug("Got parser: $Pp\n");
483 # At this point, $self->{'parser'} contains a subroutine
484 # reference that is ready to run
486 $self->parser_args( @_ ) if (@_);
489 return $self->{'parser'};
492 # ----------------------------------------------------------------------
493 sub parser_type { $_[0]->{'parser_type'} }
495 # ----------------------------------------------------------------------
496 # XXX See notes on producer_args, above
500 my $args = isa($_[0], 'HASH') ? shift : { @_ };
501 $self->{'parser_args'} = $args;
503 $self->{'parser_args'};
506 =head2 B<show_warnings>
508 Toggles whether to print warnings of name conflicts, identifier
509 mutations, etc. Probably only generated by producers to let the user
510 know when something won't translate very smoothly (e.g., MySQL "enum"
511 fields into Oracle). Accepts a true or false value, returns the
519 if ( defined $arg ) {
520 $self->{'show_warnings'} = $arg ? 1 : 0;
522 return $self->{'show_warnings'} || 0;
527 The B<translate> method calls the subroutines referenced by the
528 B<parser> and B<producer> data members (described above). It accepts
529 as arguments a number of things, in key => value format, including
530 (potentially) a parser and a producer (they are passed directly to the
531 B<parser> and B<producer> methods).
533 Here is how the parameter list to B<translate> is parsed:
539 1 argument means it's the data to be parsed; which could be a string
540 (filename) or a refernce to a scalar (a string stored in memory), or a
541 reference to a hash, which is parsed as being more than one argument
544 # Parse the file /path/to/datafile
545 my $output = $tr->translate("/path/to/datafile");
547 # Parse the data contained in the string $data
548 my $output = $tr->translate(\$data);
552 More than 1 argument means its a hash of things, and it might be
553 setting a parser, producer, or datasource (this key is named
554 "filename" or "file" if it's a file, or "data" for a SCALAR reference.
556 # As above, parse /path/to/datafile, but with different producers
557 for my $prod ("MySQL", "XML", "Sybase") {
558 print $tr->translate(
560 filename => "/path/to/datafile",
564 # The filename hash key could also be:
565 datasource => \$data,
571 =head2 B<filename>, B<data>
573 Using the B<filename> method, the filename of the data to be parsed
574 can be set. This method can be used in conjunction with the B<data>
575 method, below. If both the B<filename> and B<data> methods are
576 invoked as mutators, the data set in the B<data> method is used.
578 $tr->filename("/my/data/files/create.sql");
582 my $create_script = do {
584 open CREATE, "/my/data/files/create.sql" or die $!;
587 $tr->data(\$create_script);
589 B<filename> takes a string, which is interpreted as a filename.
590 B<data> takes a reference to a string, which is used as the data to be
591 parsed. If a filename is set, then that file is opened and read when
592 the B<translate> method is called, as long as the data instance
597 # filename - get or set the filename
601 my $filename = shift;
603 my $msg = "Cannot use directory '$filename' as input source";
604 return $self->error($msg);
605 } elsif (-f _ && -r _) {
606 $self->{'filename'} = $filename;
607 $self->debug("Got filename: '$self->{'filename'}'\n");
609 my $msg = "Cannot use '$filename' as input source: ".
610 "file does not exist or is not readable.";
611 return $self->error($msg);
618 # ----------------------------------------------------------------------
619 # data - get or set the data
620 # if $self->{'data'} is not set, but $self->{'filename'} is, then
621 # $self->{'filename'} is opened and read, whith the results put into
626 # Set $self->{'data'} to $_[0], if it is provided.
629 if (isa($data, "SCALAR")) {
630 $self->{'data'} = $data;
632 elsif (! ref $data) {
633 $self->{'data'} = \$data;
637 # If we have a filename but no data yet, populate.
638 if (not $self->{'data'} and my $filename = $self->filename) {
639 $self->debug("Opening '$filename' to get contents.\n");
644 unless (open FH, $filename) {
645 return $self->error("Can't read file '$filename': $!");
649 $self->{'data'} = \$data;
652 return $self->error("Can't close file '$filename': $!");
656 return $self->{'data'};
663 Turns on/off the tracing option of Parse::RecDescent.
670 if ( defined $arg ) {
671 $self->{'trace'} = $arg ? 1 : 0;
673 return $self->{'trace'} || 0;
676 # ----------------------------------------------------------------------
679 my ($args, $parser, $parser_type, $producer, $producer_type);
680 my ($parser_output, $producer_output);
684 # Passed a reference to a hash?
685 if (isa($_[0], 'HASH')) {
687 $self->debug("translate: Got a hashref\n");
691 # Passed a reference to a string containing the data
692 elsif (isa($_[0], 'SCALAR')) {
693 # passed a ref to a string
694 $self->debug("translate: Got a SCALAR reference (string)\n");
698 # Not a reference; treat it as a filename
699 elsif (! ref $_[0]) {
700 # Not a ref, it's a filename
701 $self->debug("translate: Got a filename\n");
702 $self->filename($_[0]);
705 # Passed something else entirely.
707 # We're not impressed. Take your empty string and leave.
710 # Actually, if data, parser, and producer are set, then we
711 # can continue. Too bad, because I like my comment
713 return "" unless ($self->data &&
719 # You must pass in a hash, or you get nothing.
724 # ----------------------------------------------------------------------
725 # Can specify the data to be transformed using "filename", "file",
726 # "data", or "datasource".
727 # ----------------------------------------------------------------------
728 if (my $filename = ($args->{'filename'} || $args->{'file'})) {
729 $self->filename($filename);
732 if (my $data = ($self->{'data'} || $self->{'datasource'})) {
736 # ----------------------------------------------------------------
738 # ----------------------------------------------------------------
739 my $data = $self->data;
740 unless (length $$data) {
741 return $self->error("Empty data file!");
744 # ----------------------------------------------------------------
745 # Local reference to the parser subroutine
746 # ----------------------------------------------------------------
747 if ($parser = ($args->{'parser'} || $args->{'from'})) {
748 $self->parser($parser);
750 $parser = $self->parser;
751 $parser_type = $self->parser_type;
753 # ----------------------------------------------------------------
754 # Local reference to the producer subroutine
755 # ----------------------------------------------------------------
756 if ($producer = ($args->{'producer'} || $args->{'to'})) {
757 $self->producer($producer);
759 $producer = $self->producer;
760 $producer_type = $self->producer_type;
762 # ----------------------------------------------------------------
763 # Execute the parser, then execute the producer with that output.
764 # Allowances are made for each piece to die, or fail to compile,
765 # since the referenced subroutines could be almost anything. In
766 # the future, each of these might happen in a Safe environment,
767 # depending on how paranoid we want to be.
768 # ----------------------------------------------------------------
769 eval { $parser_output = $parser->($self, $$data) };
770 if ($@ || ! $parser_output) {
771 my $msg = sprintf "translate: Error with parser '%s': %s",
772 $parser_type, ($@) ? $@ : " no results";
773 return $self->error($msg);
776 eval { $producer_output = $producer->($self, $parser_output) };
777 if ($@ || ! $producer_output) {
778 my $msg = sprintf "translate: Error with producer '%s': %s",
779 $producer_type, ($@) ? $@ : " no results";
780 return $self->error($msg);
783 return $producer_output;
786 # ----------------------------------------------------------------------
788 require SQL::Translator::Producer;
789 my $path = catfile(dirname($INC{'SQL/Translator/Producer.pm'}), "Producer");
790 my $dh = IO::Dir->new($path);
792 my @available = map { join "::", "SQL::Translator::Producer", $_ }
793 grep /\.pm$/, $dh->read;
798 # ----------------------------------------------------------------------
800 require SQL::Translator::Parser;
801 my $path = catfile(dirname($INC{'SQL/Translator/Parser.pm'}), "Parser");
802 my $dh = IO::Dir->new($path);
804 my @available = map { join "::", "SQL::Translator::Parser", $_ }
805 grep /\.pm$/, $dh->read;
810 # ----------------------------------------------------------------------
812 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
813 return 1 if $INC{$module};
815 eval { require $module };
821 # ----------------------------------------------------------------------
822 sub isa($$) { UNIVERSAL::isa($_[0], $_[1]) }
826 #-----------------------------------------------------
827 # Rescue the drowning and tie your shoestrings.
828 # Henry David Thoreau
829 #-----------------------------------------------------
835 Ken Y. Clark, E<lt>kclark@cpan.org<gt>,
836 darren chamberlain E<lt>darren@cpan.orgE<gt>,
837 Chris Mungall E<lt>cjm@fruitfly.orgE<gt>
841 This program is free software; you can redistribute it and/or modify
842 it under the terms of the GNU General Public License as published by
843 the Free Software Foundation; version 2.
845 This program is distributed in the hope that it will be useful, but
846 WITHOUT ANY WARRANTY; without even the implied warranty of
847 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
848 General Public License for more details.
850 You should have received a copy of the GNU General Public License
851 along with this program; if not, write to the Free Software
852 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
858 L<SQL::Translator::Parser>,
859 L<SQL::Translator::Producer>,