1 package SQL::Translator;
3 # ----------------------------------------------------------------------
4 # $Id: Translator.pm,v 1.13 2002-11-25 14:48:34 dlc Exp $
5 # ----------------------------------------------------------------------
6 # Copyright (C) 2002 Ken Y. Clark <kclark@cpan.org>,
7 # darren chamberlain <darren@cpan.org>
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License as
11 # published by the Free Software Foundation; version 2.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 # -------------------------------------------------------------------
26 SQL::Translator - convert schema from one database to another
32 my $translator = SQL::Translator->new(
33 xlate => $xlate || {}, # Overrides for field translation
34 debug => $debug, # Print debug info
35 trace => $trace, # Print Parse::RecDescent trace
36 no_comments => $no_comments, # Don't include comments in output
39 my $output = $translator->translate(
43 ) or die $translator->error;
49 This module attempts to simplify the task of converting one database
50 create syntax to another through the use of Parsers (which understand
51 the sourced format) and Producers (which understand the destination
52 format). The idea is that any Parser can be used with any Producer in
53 the conversion process. So, if you wanted PostgreSQL-to-Oracle, you
54 would use the PostgreSQL parser and the Oracle producer.
59 use vars qw( $VERSION $REVISION $DEFAULT_SUB $DEBUG $ERROR );
60 use base 'Class::Base';
63 $REVISION = sprintf "%d.%02d", q$Revision: 1.13 $ =~ /(\d+)\.(\d+)/;
64 $DEBUG = 0 unless defined $DEBUG;
69 use File::Spec::Functions qw(catfile);
70 use File::Basename qw(dirname);
73 # ----------------------------------------------------------------------
74 # The default behavior is to "pass through" values (note that the
75 # SQL::Translator instance is the first value ($_[0]), and the stuff
76 # to be parsed is the second value ($_[1])
77 # ----------------------------------------------------------------------
78 $DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
82 The constructor is called B<new>, and accepts a optional hash of options.
87 =item parser (aka from)
91 =item producer (aka to)
95 =item filename (aka file)
103 All options are, well, optional; these attributes can be set via
104 instance methods. Internally, they are; no (non-syntactical)
105 advantage is gained by passing options to the constructor.
109 # ----------------------------------------------------------------------
113 # new takes an optional hash of arguments. These arguments may
114 # include a parser, specified with the keys "parser" or "from",
115 # and a producer, specified with the keys "producer" or "to".
117 # The values that can be passed as the parser or producer are
118 # given directly to the parser or producer methods, respectively.
119 # See the appropriate method description below for details about
120 # what each expects/accepts.
121 # ----------------------------------------------------------------------
123 my ( $self, $config ) = @_;
126 # Set the parser and producer.
128 # If a 'parser' or 'from' parameter is passed in, use that as the
129 # parser; if a 'producer' or 'to' parameter is passed in, use that
130 # as the producer; both default to $DEFAULT_SUB.
132 $self->parser ($config->{'parser'} || $config->{'from'} || $DEFAULT_SUB);
133 $self->producer($config->{'producer'} || $config->{'to'} || $DEFAULT_SUB);
136 # Set the parser_args and producer_args
138 for my $pargs ( qw[ parser_args producer_args ] ) {
139 $self->$pargs( $config->{$pargs} ) if defined $config->{ $pargs };
143 # Set the data source, if 'filename' or 'file' is provided.
145 $config->{'filename'} ||= $config->{'file'} || "";
146 $self->filename( $config->{'filename'} ) if $config->{'filename'};
149 # Finally, if there is a 'data' parameter, use that in
150 # preference to filename and file
152 if ( my $data = $config->{'data'} ) {
153 $self->data( $data );
157 # Set various other options.
159 $self->{'debug'} = defined $config->{'debug'} ? $config->{'debug'} : $DEBUG;
161 $self->trace( $config->{'trace'} );
163 $self->custom_translate( $config->{'xlate'} );
165 $self->no_comments( $config->{'no_comments'} );
172 # ----------------------------------------------------------------------
173 =head2 B<custom_translate>
175 Allows the user to override default translation of fields. For example,
176 if a MySQL "text" field would normally be converted to a "long" for Oracle,
177 the user could specify to change it to a "CLOB." Accepts a hashref where
178 keys are the "from" value and values are the "to," returns the current
183 sub custom_translate {
185 $self->{'custom_translate'} = shift if @_;
186 return $self->{'custom_translate'} || {};
189 # ----------------------------------------------------------------------
190 =head2 B<no_comments>
192 Toggles whether to print comments in the output. Accepts a true or false
193 value, returns the current value.
200 if ( defined $arg ) {
201 $self->{'no_comments'} = $arg ? 1 : 0;
203 return $self->{'no_comments'} || 0;
206 # ----------------------------------------------------------------------
209 The B<producer> method is an accessor/mutator, used to retrieve or
210 define what subroutine is called to produce the output. A subroutine
211 defined as a producer will be invoked as a function (not a method) and
212 passed 2 parameters: its container SQL::Translator instance and a
213 data structure. It is expected that the function transform the data
214 structure to a string. The SQL::Transformer instance is provided for
215 informational purposes; for example, the type of the parser can be
216 retrieved using the B<parser_type> method, and the B<error> and
217 B<debug> methods can be called when needed.
219 When defining a producer, one of several things can be passed
220 in: A module name (e.g., My::Groovy::Producer), a module name
221 relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
222 module name and function combination (My::Groovy::Producer::transmogrify),
223 or a reference to an anonymous subroutine. If a full module name is
224 passed in (for the purposes of this method, a string containing "::"
225 is considered to be a module name), it is treated as a package, and a
226 function called "produce" will be invoked: $modulename::produce. If
227 $modulename cannot be loaded, the final portion is stripped off and
228 treated as a function. In other words, if there is no file named
229 My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
230 My/Groovy/Producer.pm and use transmogrify as the name of the function,
231 instead of the default "produce".
233 my $tr = SQL::Translator->new;
235 # This will invoke My::Groovy::Producer::produce($tr, $data)
236 $tr->producer("My::Groovy::Producer");
238 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
239 $tr->producer("Sybase");
241 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
242 # assuming that My::Groovy::Producer::transmogrify is not a module
244 $tr->producer("My::Groovy::Producer::transmogrify");
246 # This will invoke the referenced subroutine directly, as
247 # $subref->($tr, $data);
248 $tr->producer(\&my_producer);
250 There is also a method named B<producer_type>, which is a string
251 containing the classname to which the above B<produce> function
252 belongs. In the case of anonymous subroutines, this method returns
255 Finally, there is a method named B<producer_args>, which is both an
256 accessor and a mutator. Arbitrary data may be stored in name => value
257 pairs for the producer subroutine to access:
259 sub My::Random::producer {
260 my ($tr, $data) = @_;
261 my $pr_args = $tr->producer_args();
263 # $pr_args is a hashref.
265 Extra data passed to the B<producer> method is passed to
268 $tr->producer("xSV", delimiter => ',\s*');
270 # In SQL::Translator::Producer::xSV:
271 my $args = $tr->producer_args;
272 my $delimiter = $args->{'delimiter'}; # value is ,\s*
276 # producer and producer_type
280 # producer as a mutator
282 my $producer = shift;
284 # Passed a module name (string containing "::")
285 if ($producer =~ /::/) {
288 # Module name was passed directly
289 # We try to load the name; if it doesn't load, there's
290 # a possibility that it has a function name attached to
292 if (load($producer)) {
293 $func_name = "produce";
296 # Module::function was passed
298 # Passed Module::Name::function; try to recover
299 my @func_parts = split /::/, $producer;
300 $func_name = pop @func_parts;
301 $producer = join "::", @func_parts;
303 # If this doesn't work, then we have a legitimate
305 load($producer) or die "Can't load $producer: $@";
308 # get code reference and assign
309 $self->{'producer'} = \&{ "$producer\::$func_name" };
310 $self->{'producer_type'} = $producer;
311 $self->debug("Got producer: $producer\::$func_name\n");
314 # passed an anonymous subroutine reference
315 elsif (isa($producer, 'CODE')) {
316 $self->{'producer'} = $producer;
317 $self->{'producer_type'} = "CODE";
318 $self->debug("Got producer: code ref\n");
321 # passed a string containing no "::"; relative package name
323 my $Pp = sprintf "SQL::Translator::Producer::$producer";
324 load($Pp) or die "Can't load $Pp: $@";
325 $self->{'producer'} = \&{ "$Pp\::produce" };
326 $self->{'producer_type'} = $Pp;
327 $self->debug("Got producer: $Pp\n");
330 # At this point, $self->{'producer'} contains a subroutine
331 # reference that is ready to run
333 # Anything left? If so, it's producer_args
334 $self->producer_args(@_) if (@_);
337 return $self->{'producer'};
340 # ----------------------------------------------------------------------
343 # producer_type is an accessor that allows producer subs to get
344 # information about their origin. This is poptentially important;
345 # since all producer subs are called as subroutine refernces, there is
346 # no way for a producer to find out which package the sub lives in
347 # originally, for example.
348 # ----------------------------------------------------------------------
349 sub producer_type { $_[0]->{'producer_type'} }
351 # ----------------------------------------------------------------------
354 # Arbitrary name => value pairs of paramters can be passed to a
355 # producer using this method.
357 # XXX All calls to producer_args with a value clobbers old values!
358 # Should probably check if $_[0] is undef, and delete stored
362 # unless (defined $_[0]) {
363 # %{ $self->{'producer_args'} } = ();
365 # my $args = isa($_[0], 'HASH') ? shift : { @_ };
366 # %{ $self->{'producer_args'} } = (
367 # %{ $self->{'producer_args'} },
371 # ----------------------------------------------------------------------
375 my $args = isa($_[0], 'HASH') ? shift : { @_ };
376 $self->{'producer_args'} = $args;
378 $self->{'producer_args'};
381 # ----------------------------------------------------------------------
384 The B<parser> method defines or retrieves a subroutine that will be
385 called to perform the parsing. The basic idea is the same as that of
386 B<producer> (see above), except the default subroutine name is
387 "parse", and will be invoked as $module_name::parse($tr, $data).
388 Also, the parser subroutine will be passed a string containing the
389 entirety of the data to be parsed.
391 # Invokes SQL::Translator::Parser::MySQL::parse()
392 $tr->parser("MySQL");
394 # Invokes My::Groovy::Parser::parse()
395 $tr->parser("My::Groovy::Parser");
397 # Invoke an anonymous subroutine directly
399 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
400 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
401 return $dumper->Dump;
404 There is also B<parser_type> and B<parser_args>, which perform
405 analogously to B<producer_type> and B<producer_args>
412 # parser as a mutator
416 # Passed a module name (string containing "::")
417 if ($parser =~ /::/) {
420 # Module name was passed directly
421 # We try to load the name; if it doesn't load, there's
422 # a possibility that it has a function name attached to
425 $func_name = "parse";
428 # Module::function was passed
430 # Passed Module::Name::function; try to recover
431 my @func_parts = split /::/, $parser;
432 $func_name = pop @func_parts;
433 $parser = join "::", @func_parts;
435 # If this doesn't work, then we have a legitimate
437 load($parser) or die "Can't load $parser: $@";
440 # get code reference and assign
441 $self->{'parser'} = \&{ "$parser\::$func_name" };
442 $self->{'parser_type'} = $parser;
443 $self->debug("Got parser: $parser\::$func_name\n");
446 # passed an anonymous subroutine reference
447 elsif ( isa( $parser, 'CODE' ) ) {
448 $self->{'parser'} = $parser;
449 $self->{'parser_type'} = "CODE";
450 $self->debug("Got parser: code ref\n");
453 # passed a string containing no "::"; relative package name
455 my $Pp = "SQL::Translator::Parser::$parser";
456 load( $Pp ) or die "Can't load $Pp: $@";
457 $self->{'parser'} = \&{ "$Pp\::parse" };
458 $self->{'parser_type'} = $Pp;
459 $self->debug("Got parser: $Pp\n");
463 # At this point, $self->{'parser'} contains a subroutine
464 # reference that is ready to run
466 $self->parser_args( @_ ) if (@_);
469 return $self->{'parser'};
472 # ----------------------------------------------------------------------
473 sub parser_type { $_[0]->{'parser_type'} }
475 # ----------------------------------------------------------------------
476 # XXX See notes on producer_args, above
480 my $args = isa($_[0], 'HASH') ? shift : { @_ };
481 $self->{'parser_args'} = $args;
483 $self->{'parser_args'};
486 # ----------------------------------------------------------------------
489 The B<translate> method calls the subroutines referenced by the
490 B<parser> and B<producer> data members (described above). It accepts
491 as arguments a number of things, in key => value format, including
492 (potentially) a parser and a producer (they are passed directly to the
493 B<parser> and B<producer> methods).
495 Here is how the parameter list to B<translate> is parsed:
501 1 argument means it's the data to be parsed; which could be a string
502 (filename) or a refernce to a scalar (a string stored in memory), or a
503 reference to a hash, which is parsed as being more than one argument
506 # Parse the file /path/to/datafile
507 my $output = $tr->translate("/path/to/datafile");
509 # Parse the data contained in the string $data
510 my $output = $tr->translate(\$data);
514 More than 1 argument means its a hash of things, and it might be
515 setting a parser, producer, or datasource (this key is named
516 "filename" or "file" if it's a file, or "data" for a SCALAR reference.
518 # As above, parse /path/to/datafile, but with different producers
519 for my $prod ("MySQL", "XML", "Sybase") {
520 print $tr->translate(
522 filename => "/path/to/datafile",
526 # The filename hash key could also be:
527 datasource => \$data,
533 # ----------------------------------------------------------------------
534 =head2 B<filename>, B<data>
536 Using the B<filename> method, the filename of the data to be parsed
537 can be set. This method can be used in conjunction with the B<data>
538 method, below. If both the B<filename> and B<data> methods are
539 invoked as mutators, the data set in the B<data> method is used.
541 $tr->filename("/my/data/files/create.sql");
545 my $create_script = do {
547 open CREATE, "/my/data/files/create.sql" or die $!;
550 $tr->data(\$create_script);
552 B<filename> takes a string, which is interpreted as a filename.
553 B<data> takes a reference to a string, which is used as the data to be
554 parsed. If a filename is set, then that file is opened and read when
555 the B<translate> method is called, as long as the data instance
560 # filename - get or set the filename
564 my $filename = shift;
566 my $msg = "Cannot use directory '$filename' as input source";
567 return $self->error($msg);
568 } elsif (-f _ && -r _) {
569 $self->{'filename'} = $filename;
570 $self->debug("Got filename: '$self->{'filename'}'\n");
572 my $msg = "Cannot use '$filename' as input source: ".
573 "file does not exist or is not readable.";
574 return $self->error($msg);
581 # ----------------------------------------------------------------------
582 # data - get or set the data
583 # if $self->{'data'} is not set, but $self->{'filename'} is, then
584 # $self->{'filename'} is opened and read, whith the results put into
589 # Set $self->{'data'} to $_[0], if it is provided.
592 if (isa($data, "SCALAR")) {
593 $self->{'data'} = $data;
595 elsif (! ref $data) {
596 $self->{'data'} = \$data;
600 # If we have a filename but no data yet, populate.
601 if (not $self->{'data'} and my $filename = $self->filename) {
602 $self->debug("Opening '$filename' to get contents.\n");
607 unless (open FH, $filename) {
608 return $self->error("Can't read file '$filename': $!");
612 $self->{'data'} = \$data;
615 return $self->error("Can't close file '$filename': $!");
619 return $self->{'data'};
622 # ----------------------------------------------------------------------
627 Turns on/off the tracing option of Parse::RecDescent.
634 if ( defined $arg ) {
635 $self->{'trace'} = $arg ? 1 : 0;
637 return $self->{'trace'} || 0;
640 # ----------------------------------------------------------------------
643 my ($args, $parser, $parser_type, $producer, $producer_type);
644 my ($parser_output, $producer_output);
648 # Passed a reference to a hash?
649 if (isa($_[0], 'HASH')) {
651 $self->debug("translate: Got a hashref\n");
655 # Passed a reference to a string containing the data
656 elsif (isa($_[0], 'SCALAR')) {
657 # passed a ref to a string
658 $self->debug("translate: Got a SCALAR reference (string)\n");
662 # Not a reference; treat it as a filename
663 elsif (! ref $_[0]) {
664 # Not a ref, it's a filename
665 $self->debug("translate: Got a filename\n");
666 $self->filename($_[0]);
669 # Passed something else entirely.
671 # We're not impressed. Take your empty string and leave.
674 # Actually, if data, parser, and producer are set, then we
675 # can continue. Too bad, because I like my comment
677 return "" unless ($self->data &&
683 # You must pass in a hash, or you get nothing.
688 # ----------------------------------------------------------------------
689 # Can specify the data to be transformed using "filename", "file",
690 # "data", or "datasource".
691 # ----------------------------------------------------------------------
692 if (my $filename = ($args->{'filename'} || $args->{'file'})) {
693 $self->filename($filename);
696 if (my $data = ($self->{'data'} || $self->{'datasource'})) {
700 # ----------------------------------------------------------------
702 # ----------------------------------------------------------------
703 my $data = $self->data;
704 unless (length $$data) {
705 return $self->error("Empty data file!");
708 # ----------------------------------------------------------------
709 # Local reference to the parser subroutine
710 # ----------------------------------------------------------------
711 if ($parser = ($args->{'parser'} || $args->{'from'})) {
712 $self->parser($parser);
714 $parser = $self->parser;
715 $parser_type = $self->parser_type;
717 # ----------------------------------------------------------------
718 # Local reference to the producer subroutine
719 # ----------------------------------------------------------------
720 if ($producer = ($args->{'producer'} || $args->{'to'})) {
721 $self->producer($producer);
723 $producer = $self->producer;
724 $producer_type = $self->producer_type;
726 # ----------------------------------------------------------------
727 # Execute the parser, then execute the producer with that output.
728 # Allowances are made for each piece to die, or fail to compile,
729 # since the referenced subroutines could be almost anything. In
730 # the future, each of these might happen in a Safe environment,
731 # depending on how paranoid we want to be.
732 # ----------------------------------------------------------------
733 eval { $parser_output = $parser->($self, $$data) };
734 if ($@ || ! $parser_output) {
735 my $msg = sprintf "translate: Error with parser '%s': %s",
736 $parser_type, ($@) ? $@ : " no results";
737 return $self->error($msg);
740 eval { $producer_output = $producer->($self, $parser_output) };
741 if ($@ || ! $producer_output) {
742 my $msg = sprintf "translate: Error with producer '%s': %s",
743 $producer_type, ($@) ? $@ : " no results";
744 return $self->error($msg);
747 return $producer_output;
750 # ----------------------------------------------------------------------
752 require SQL::Translator::Producer;
753 my $path = catfile(dirname($INC{'SQL/Translator/Producer.pm'}), "Producer");
754 my $dh = IO::Dir->new($path);
756 my @available = map { join "::", "SQL::Translator::Producer", $_ }
757 grep /\.pm$/, $dh->read;
762 # ----------------------------------------------------------------------
764 require SQL::Translator::Parser;
765 my $path = catfile(dirname($INC{'SQL/Translator/Parser.pm'}), "Parser");
766 my $dh = IO::Dir->new($path);
768 my @available = map { join "::", "SQL::Translator::Parser", $_ }
769 grep /\.pm$/, $dh->read;
774 # ----------------------------------------------------------------------
776 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
777 return 1 if $INC{$module};
779 eval { require $module };
785 # ----------------------------------------------------------------------
786 sub isa { UNIVERSAL::isa($_[0], $_[1]) }
790 #-----------------------------------------------------
791 # Rescue the drowning and tie your shoestrings.
792 # Henry David Thoreau
793 #-----------------------------------------------------
799 Ken Y. Clark, E<lt>kclark@cpan.org<gt>,
800 darren chamberlain E<lt>darren@cpan.orgE<gt>,
801 Chris Mungall E<lt>cjm@fruitfly.orgE<gt>
805 This program is free software; you can redistribute it and/or modify
806 it under the terms of the GNU General Public License as published by
807 the Free Software Foundation; version 2.
809 This program is distributed in the hope that it will be useful, but
810 WITHOUT ANY WARRANTY; without even the implied warranty of
811 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
812 General Public License for more details.
814 You should have received a copy of the GNU General Public License
815 along with this program; if not, write to the Free Software
816 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
821 L<perl>, L<Parse::RecDescent>