1 package SQL::Translator;
3 # ----------------------------------------------------------------------
4 # $Id: Translator.pm,v 1.14 2002-11-26 03:59:57 kycl4rk Exp $
5 # ----------------------------------------------------------------------
6 # Copyright (C) 2002 Ken Y. Clark <kclark@cpan.org>,
7 # darren chamberlain <darren@cpan.org>
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License as
11 # published by the Free Software Foundation; version 2.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 # -------------------------------------------------------------------
26 SQL::Translator - convert schema from one database to another
32 my $translator = SQL::Translator->new(
33 xlate => $xlate || {}, # Overrides for field translation
34 debug => $debug, # Print debug info
35 trace => $trace, # Print Parse::RecDescent trace
36 no_comments => $no_comments, # Don't include comments in output
37 show_warnings => $show_warnings, # Print name mutations, conflicts
38 add_drop_table => $add_drop_table, # Add "drop table" statements
41 my $output = $translator->translate(
45 ) or die $translator->error;
51 This module attempts to simplify the task of converting one database
52 create syntax to another through the use of Parsers (which understand
53 the sourced format) and Producers (which understand the destination
54 format). The idea is that any Parser can be used with any Producer in
55 the conversion process. So, if you wanted PostgreSQL-to-Oracle, you
56 would use the PostgreSQL parser and the Oracle producer.
61 use vars qw( $VERSION $REVISION $DEFAULT_SUB $DEBUG $ERROR );
62 use base 'Class::Base';
65 $REVISION = sprintf "%d.%02d", q$Revision: 1.14 $ =~ /(\d+)\.(\d+)/;
66 $DEBUG = 0 unless defined $DEBUG;
71 use File::Spec::Functions qw(catfile);
72 use File::Basename qw(dirname);
75 # ----------------------------------------------------------------------
76 # The default behavior is to "pass through" values (note that the
77 # SQL::Translator instance is the first value ($_[0]), and the stuff
78 # to be parsed is the second value ($_[1])
79 # ----------------------------------------------------------------------
80 $DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
84 The constructor is called B<new>, and accepts a optional hash of options.
89 =item parser (aka from)
93 =item producer (aka to)
97 =item filename (aka file)
105 All options are, well, optional; these attributes can be set via
106 instance methods. Internally, they are; no (non-syntactical)
107 advantage is gained by passing options to the constructor.
111 # ----------------------------------------------------------------------
115 # new takes an optional hash of arguments. These arguments may
116 # include a parser, specified with the keys "parser" or "from",
117 # and a producer, specified with the keys "producer" or "to".
119 # The values that can be passed as the parser or producer are
120 # given directly to the parser or producer methods, respectively.
121 # See the appropriate method description below for details about
122 # what each expects/accepts.
123 # ----------------------------------------------------------------------
125 my ( $self, $config ) = @_;
128 # Set the parser and producer.
130 # If a 'parser' or 'from' parameter is passed in, use that as the
131 # parser; if a 'producer' or 'to' parameter is passed in, use that
132 # as the producer; both default to $DEFAULT_SUB.
134 $self->parser ($config->{'parser'} || $config->{'from'} || $DEFAULT_SUB);
135 $self->producer($config->{'producer'} || $config->{'to'} || $DEFAULT_SUB);
138 # Set the parser_args and producer_args
140 for my $pargs ( qw[ parser_args producer_args ] ) {
141 $self->$pargs( $config->{$pargs} ) if defined $config->{ $pargs };
145 # Set the data source, if 'filename' or 'file' is provided.
147 $config->{'filename'} ||= $config->{'file'} || "";
148 $self->filename( $config->{'filename'} ) if $config->{'filename'};
151 # Finally, if there is a 'data' parameter, use that in
152 # preference to filename and file
154 if ( my $data = $config->{'data'} ) {
155 $self->data( $data );
159 # Set various other options.
161 $self->{'debug'} = defined $config->{'debug'} ? $config->{'debug'} : $DEBUG;
164 $self->add_drop_table( $config->{'add_drop_table'} );
166 $self->custom_translate( $config->{'xlate'} );
168 $self->no_comments( $config->{'no_comments'} );
170 $self->show_warnings( $config->{'show_warnings'} );
172 $self->trace( $config->{'trace'} );
179 # ----------------------------------------------------------------------
180 =head2 B<add_drop_table>
182 Toggles whether or not to add "DROP TABLE" statements just before the
189 if ( defined (my $arg = shift) ) {
190 $self->{'add_drop_table'} = $arg ? 1 : 0;
192 return $self->{'add_drop_table'} || 0;
196 # ----------------------------------------------------------------------
197 =head2 B<custom_translate>
199 Allows the user to override default translation of fields. For example,
200 if a MySQL "text" field would normally be converted to a "long" for Oracle,
201 the user could specify to change it to a "CLOB." Accepts a hashref where
202 keys are the "from" value and values are the "to," returns the current
207 sub custom_translate {
209 $self->{'custom_translate'} = shift if @_;
210 return $self->{'custom_translate'} || {};
213 # ----------------------------------------------------------------------
214 =head2 B<no_comments>
216 Toggles whether to print comments in the output. Accepts a true or false
217 value, returns the current value.
224 if ( defined $arg ) {
225 $self->{'no_comments'} = $arg ? 1 : 0;
227 return $self->{'no_comments'} || 0;
230 # ----------------------------------------------------------------------
233 The B<producer> method is an accessor/mutator, used to retrieve or
234 define what subroutine is called to produce the output. A subroutine
235 defined as a producer will be invoked as a function (not a method) and
236 passed 2 parameters: its container SQL::Translator instance and a
237 data structure. It is expected that the function transform the data
238 structure to a string. The SQL::Transformer instance is provided for
239 informational purposes; for example, the type of the parser can be
240 retrieved using the B<parser_type> method, and the B<error> and
241 B<debug> methods can be called when needed.
243 When defining a producer, one of several things can be passed
244 in: A module name (e.g., My::Groovy::Producer), a module name
245 relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
246 module name and function combination (My::Groovy::Producer::transmogrify),
247 or a reference to an anonymous subroutine. If a full module name is
248 passed in (for the purposes of this method, a string containing "::"
249 is considered to be a module name), it is treated as a package, and a
250 function called "produce" will be invoked: $modulename::produce. If
251 $modulename cannot be loaded, the final portion is stripped off and
252 treated as a function. In other words, if there is no file named
253 My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
254 My/Groovy/Producer.pm and use transmogrify as the name of the function,
255 instead of the default "produce".
257 my $tr = SQL::Translator->new;
259 # This will invoke My::Groovy::Producer::produce($tr, $data)
260 $tr->producer("My::Groovy::Producer");
262 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
263 $tr->producer("Sybase");
265 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
266 # assuming that My::Groovy::Producer::transmogrify is not a module
268 $tr->producer("My::Groovy::Producer::transmogrify");
270 # This will invoke the referenced subroutine directly, as
271 # $subref->($tr, $data);
272 $tr->producer(\&my_producer);
274 There is also a method named B<producer_type>, which is a string
275 containing the classname to which the above B<produce> function
276 belongs. In the case of anonymous subroutines, this method returns
279 Finally, there is a method named B<producer_args>, which is both an
280 accessor and a mutator. Arbitrary data may be stored in name => value
281 pairs for the producer subroutine to access:
283 sub My::Random::producer {
284 my ($tr, $data) = @_;
285 my $pr_args = $tr->producer_args();
287 # $pr_args is a hashref.
289 Extra data passed to the B<producer> method is passed to
292 $tr->producer("xSV", delimiter => ',\s*');
294 # In SQL::Translator::Producer::xSV:
295 my $args = $tr->producer_args;
296 my $delimiter = $args->{'delimiter'}; # value is ,\s*
300 # producer and producer_type
304 # producer as a mutator
306 my $producer = shift;
308 # Passed a module name (string containing "::")
309 if ($producer =~ /::/) {
312 # Module name was passed directly
313 # We try to load the name; if it doesn't load, there's
314 # a possibility that it has a function name attached to
316 if (load($producer)) {
317 $func_name = "produce";
320 # Module::function was passed
322 # Passed Module::Name::function; try to recover
323 my @func_parts = split /::/, $producer;
324 $func_name = pop @func_parts;
325 $producer = join "::", @func_parts;
327 # If this doesn't work, then we have a legitimate
329 load($producer) or die "Can't load $producer: $@";
332 # get code reference and assign
333 $self->{'producer'} = \&{ "$producer\::$func_name" };
334 $self->{'producer_type'} = $producer;
335 $self->debug("Got producer: $producer\::$func_name\n");
338 # passed an anonymous subroutine reference
339 elsif (isa($producer, 'CODE')) {
340 $self->{'producer'} = $producer;
341 $self->{'producer_type'} = "CODE";
342 $self->debug("Got producer: code ref\n");
345 # passed a string containing no "::"; relative package name
347 my $Pp = sprintf "SQL::Translator::Producer::$producer";
348 load($Pp) or die "Can't load $Pp: $@";
349 $self->{'producer'} = \&{ "$Pp\::produce" };
350 $self->{'producer_type'} = $Pp;
351 $self->debug("Got producer: $Pp\n");
354 # At this point, $self->{'producer'} contains a subroutine
355 # reference that is ready to run
357 # Anything left? If so, it's producer_args
358 $self->producer_args(@_) if (@_);
361 return $self->{'producer'};
364 # ----------------------------------------------------------------------
367 # producer_type is an accessor that allows producer subs to get
368 # information about their origin. This is poptentially important;
369 # since all producer subs are called as subroutine refernces, there is
370 # no way for a producer to find out which package the sub lives in
371 # originally, for example.
372 # ----------------------------------------------------------------------
373 sub producer_type { $_[0]->{'producer_type'} }
375 # ----------------------------------------------------------------------
378 # Arbitrary name => value pairs of paramters can be passed to a
379 # producer using this method.
381 # XXX All calls to producer_args with a value clobbers old values!
382 # Should probably check if $_[0] is undef, and delete stored
386 # unless (defined $_[0]) {
387 # %{ $self->{'producer_args'} } = ();
389 # my $args = isa($_[0], 'HASH') ? shift : { @_ };
390 # %{ $self->{'producer_args'} } = (
391 # %{ $self->{'producer_args'} },
395 # ----------------------------------------------------------------------
399 my $args = isa($_[0], 'HASH') ? shift : { @_ };
400 $self->{'producer_args'} = $args;
402 $self->{'producer_args'};
405 # ----------------------------------------------------------------------
408 The B<parser> method defines or retrieves a subroutine that will be
409 called to perform the parsing. The basic idea is the same as that of
410 B<producer> (see above), except the default subroutine name is
411 "parse", and will be invoked as $module_name::parse($tr, $data).
412 Also, the parser subroutine will be passed a string containing the
413 entirety of the data to be parsed.
415 # Invokes SQL::Translator::Parser::MySQL::parse()
416 $tr->parser("MySQL");
418 # Invokes My::Groovy::Parser::parse()
419 $tr->parser("My::Groovy::Parser");
421 # Invoke an anonymous subroutine directly
423 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
424 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
425 return $dumper->Dump;
428 There is also B<parser_type> and B<parser_args>, which perform
429 analogously to B<producer_type> and B<producer_args>
436 # parser as a mutator
440 # Passed a module name (string containing "::")
441 if ($parser =~ /::/) {
444 # Module name was passed directly
445 # We try to load the name; if it doesn't load, there's
446 # a possibility that it has a function name attached to
449 $func_name = "parse";
452 # Module::function was passed
454 # Passed Module::Name::function; try to recover
455 my @func_parts = split /::/, $parser;
456 $func_name = pop @func_parts;
457 $parser = join "::", @func_parts;
459 # If this doesn't work, then we have a legitimate
461 load($parser) or die "Can't load $parser: $@";
464 # get code reference and assign
465 $self->{'parser'} = \&{ "$parser\::$func_name" };
466 $self->{'parser_type'} = $parser;
467 $self->debug("Got parser: $parser\::$func_name\n");
470 # passed an anonymous subroutine reference
471 elsif ( isa( $parser, 'CODE' ) ) {
472 $self->{'parser'} = $parser;
473 $self->{'parser_type'} = "CODE";
474 $self->debug("Got parser: code ref\n");
477 # passed a string containing no "::"; relative package name
479 my $Pp = "SQL::Translator::Parser::$parser";
480 load( $Pp ) or die "Can't load $Pp: $@";
481 $self->{'parser'} = \&{ "$Pp\::parse" };
482 $self->{'parser_type'} = $Pp;
483 $self->debug("Got parser: $Pp\n");
487 # At this point, $self->{'parser'} contains a subroutine
488 # reference that is ready to run
490 $self->parser_args( @_ ) if (@_);
493 return $self->{'parser'};
496 # ----------------------------------------------------------------------
497 sub parser_type { $_[0]->{'parser_type'} }
499 # ----------------------------------------------------------------------
500 # XXX See notes on producer_args, above
504 my $args = isa($_[0], 'HASH') ? shift : { @_ };
505 $self->{'parser_args'} = $args;
507 $self->{'parser_args'};
510 # ----------------------------------------------------------------------
511 =head2 B<show_warnings>
513 Toggles whether to print warnings of name conflicts, identifier
514 mutations, etc. Probably only generated by producers to let the user
515 know when something won't translate very smoothly (e.g., MySQL "enum"
516 fields into Oracle). Accepts a true or false value, returns the
524 if ( defined $arg ) {
525 $self->{'show_warnings'} = $arg ? 1 : 0;
527 return $self->{'show_warnings'} || 0;
530 # ----------------------------------------------------------------------
533 The B<translate> method calls the subroutines referenced by the
534 B<parser> and B<producer> data members (described above). It accepts
535 as arguments a number of things, in key => value format, including
536 (potentially) a parser and a producer (they are passed directly to the
537 B<parser> and B<producer> methods).
539 Here is how the parameter list to B<translate> is parsed:
545 1 argument means it's the data to be parsed; which could be a string
546 (filename) or a refernce to a scalar (a string stored in memory), or a
547 reference to a hash, which is parsed as being more than one argument
550 # Parse the file /path/to/datafile
551 my $output = $tr->translate("/path/to/datafile");
553 # Parse the data contained in the string $data
554 my $output = $tr->translate(\$data);
558 More than 1 argument means its a hash of things, and it might be
559 setting a parser, producer, or datasource (this key is named
560 "filename" or "file" if it's a file, or "data" for a SCALAR reference.
562 # As above, parse /path/to/datafile, but with different producers
563 for my $prod ("MySQL", "XML", "Sybase") {
564 print $tr->translate(
566 filename => "/path/to/datafile",
570 # The filename hash key could also be:
571 datasource => \$data,
577 # ----------------------------------------------------------------------
578 =head2 B<filename>, B<data>
580 Using the B<filename> method, the filename of the data to be parsed
581 can be set. This method can be used in conjunction with the B<data>
582 method, below. If both the B<filename> and B<data> methods are
583 invoked as mutators, the data set in the B<data> method is used.
585 $tr->filename("/my/data/files/create.sql");
589 my $create_script = do {
591 open CREATE, "/my/data/files/create.sql" or die $!;
594 $tr->data(\$create_script);
596 B<filename> takes a string, which is interpreted as a filename.
597 B<data> takes a reference to a string, which is used as the data to be
598 parsed. If a filename is set, then that file is opened and read when
599 the B<translate> method is called, as long as the data instance
604 # filename - get or set the filename
608 my $filename = shift;
610 my $msg = "Cannot use directory '$filename' as input source";
611 return $self->error($msg);
612 } elsif (-f _ && -r _) {
613 $self->{'filename'} = $filename;
614 $self->debug("Got filename: '$self->{'filename'}'\n");
616 my $msg = "Cannot use '$filename' as input source: ".
617 "file does not exist or is not readable.";
618 return $self->error($msg);
625 # ----------------------------------------------------------------------
626 # data - get or set the data
627 # if $self->{'data'} is not set, but $self->{'filename'} is, then
628 # $self->{'filename'} is opened and read, whith the results put into
633 # Set $self->{'data'} to $_[0], if it is provided.
636 if (isa($data, "SCALAR")) {
637 $self->{'data'} = $data;
639 elsif (! ref $data) {
640 $self->{'data'} = \$data;
644 # If we have a filename but no data yet, populate.
645 if (not $self->{'data'} and my $filename = $self->filename) {
646 $self->debug("Opening '$filename' to get contents.\n");
651 unless (open FH, $filename) {
652 return $self->error("Can't read file '$filename': $!");
656 $self->{'data'} = \$data;
659 return $self->error("Can't close file '$filename': $!");
663 return $self->{'data'};
666 # ----------------------------------------------------------------------
671 Turns on/off the tracing option of Parse::RecDescent.
678 if ( defined $arg ) {
679 $self->{'trace'} = $arg ? 1 : 0;
681 return $self->{'trace'} || 0;
684 # ----------------------------------------------------------------------
687 my ($args, $parser, $parser_type, $producer, $producer_type);
688 my ($parser_output, $producer_output);
692 # Passed a reference to a hash?
693 if (isa($_[0], 'HASH')) {
695 $self->debug("translate: Got a hashref\n");
699 # Passed a reference to a string containing the data
700 elsif (isa($_[0], 'SCALAR')) {
701 # passed a ref to a string
702 $self->debug("translate: Got a SCALAR reference (string)\n");
706 # Not a reference; treat it as a filename
707 elsif (! ref $_[0]) {
708 # Not a ref, it's a filename
709 $self->debug("translate: Got a filename\n");
710 $self->filename($_[0]);
713 # Passed something else entirely.
715 # We're not impressed. Take your empty string and leave.
718 # Actually, if data, parser, and producer are set, then we
719 # can continue. Too bad, because I like my comment
721 return "" unless ($self->data &&
727 # You must pass in a hash, or you get nothing.
732 # ----------------------------------------------------------------------
733 # Can specify the data to be transformed using "filename", "file",
734 # "data", or "datasource".
735 # ----------------------------------------------------------------------
736 if (my $filename = ($args->{'filename'} || $args->{'file'})) {
737 $self->filename($filename);
740 if (my $data = ($self->{'data'} || $self->{'datasource'})) {
744 # ----------------------------------------------------------------
746 # ----------------------------------------------------------------
747 my $data = $self->data;
748 unless (length $$data) {
749 return $self->error("Empty data file!");
752 # ----------------------------------------------------------------
753 # Local reference to the parser subroutine
754 # ----------------------------------------------------------------
755 if ($parser = ($args->{'parser'} || $args->{'from'})) {
756 $self->parser($parser);
758 $parser = $self->parser;
759 $parser_type = $self->parser_type;
761 # ----------------------------------------------------------------
762 # Local reference to the producer subroutine
763 # ----------------------------------------------------------------
764 if ($producer = ($args->{'producer'} || $args->{'to'})) {
765 $self->producer($producer);
767 $producer = $self->producer;
768 $producer_type = $self->producer_type;
770 # ----------------------------------------------------------------
771 # Execute the parser, then execute the producer with that output.
772 # Allowances are made for each piece to die, or fail to compile,
773 # since the referenced subroutines could be almost anything. In
774 # the future, each of these might happen in a Safe environment,
775 # depending on how paranoid we want to be.
776 # ----------------------------------------------------------------
777 eval { $parser_output = $parser->($self, $$data) };
778 if ($@ || ! $parser_output) {
779 my $msg = sprintf "translate: Error with parser '%s': %s",
780 $parser_type, ($@) ? $@ : " no results";
781 return $self->error($msg);
784 eval { $producer_output = $producer->($self, $parser_output) };
785 if ($@ || ! $producer_output) {
786 my $msg = sprintf "translate: Error with producer '%s': %s",
787 $producer_type, ($@) ? $@ : " no results";
788 return $self->error($msg);
791 return $producer_output;
794 # ----------------------------------------------------------------------
796 require SQL::Translator::Producer;
797 my $path = catfile(dirname($INC{'SQL/Translator/Producer.pm'}), "Producer");
798 my $dh = IO::Dir->new($path);
800 my @available = map { join "::", "SQL::Translator::Producer", $_ }
801 grep /\.pm$/, $dh->read;
806 # ----------------------------------------------------------------------
808 require SQL::Translator::Parser;
809 my $path = catfile(dirname($INC{'SQL/Translator/Parser.pm'}), "Parser");
810 my $dh = IO::Dir->new($path);
812 my @available = map { join "::", "SQL::Translator::Parser", $_ }
813 grep /\.pm$/, $dh->read;
818 # ----------------------------------------------------------------------
820 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
821 return 1 if $INC{$module};
823 eval { require $module };
829 # ----------------------------------------------------------------------
830 sub isa { UNIVERSAL::isa($_[0], $_[1]) }
834 #-----------------------------------------------------
835 # Rescue the drowning and tie your shoestrings.
836 # Henry David Thoreau
837 #-----------------------------------------------------
843 Ken Y. Clark, E<lt>kclark@cpan.org<gt>,
844 darren chamberlain E<lt>darren@cpan.orgE<gt>,
845 Chris Mungall E<lt>cjm@fruitfly.orgE<gt>
849 This program is free software; you can redistribute it and/or modify
850 it under the terms of the GNU General Public License as published by
851 the Free Software Foundation; version 2.
853 This program is distributed in the hope that it will be useful, but
854 WITHOUT ANY WARRANTY; without even the implied warranty of
855 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
856 General Public License for more details.
858 You should have received a copy of the GNU General Public License
859 along with this program; if not, write to the Free Software
860 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
865 L<perl>, L<Parse::RecDescent>