1 package SQL::Translator;
3 # ----------------------------------------------------------------------
4 # $Id: Translator.pm,v 1.9 2002-07-23 19:21:16 dlc Exp $
5 # ----------------------------------------------------------------------
6 # Copyright (C) 2002 Ken Y. Clark <kycl4rk@users.sourceforge.net>,
7 # darren chamberlain <darren@cpan.org>
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License as
11 # published by the Free Software Foundation; version 2.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 # -------------------------------------------------------------------
26 SQL::Translator - convert schema from one database to another
31 my $translator = SQL::Translator->new;
33 my $output = $translator->translate(
37 ) or die $translator->error;
42 This module attempts to simplify the task of converting one database
43 create syntax to another through the use of Parsers (which understand
44 the sourced format) and Producers (which understand the destination
45 format). The idea is that any Parser can be used with any Producer in
46 the conversion process. So, if you wanted PostgreSQL-to-Oracle, you
47 would use the PostgreSQL parser and the Oracle producer.
52 use vars qw($VERSION $DEFAULT_SUB $DEBUG $ERROR);
53 use base qw(Class::Base);
55 $VERSION = sprintf "%d.%02d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
56 $DEBUG = 1 unless defined $DEBUG;
61 # ----------------------------------------------------------------------
62 # The default behavior is to "pass through" values (note that the
63 # SQL::Translator instance is the first value ($_[0]), and the stuff
64 # to be parsed is the second value ($_[1])
65 # ----------------------------------------------------------------------
66 $DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
70 The constructor is called B<new>, and accepts a optional hash of options.
75 =item parser (aka from)
79 =item producer (aka to)
83 =item filename (aka file)
91 All options are, well, optional; these attributes can be set via
92 instance methods. Internally, they are; no (non-syntactical)
93 advantage is gained by passing options to the constructor.
97 # ----------------------------------------------------------------------
101 # new takes an optional hash of arguments. These arguments may
102 # include a parser, specified with the keys "parser" or "from",
103 # and a producer, specified with the keys "producer" or "to".
105 # The values that can be passed as the parser or producer are
106 # given directly to the parser or producer methods, respectively.
107 # See the appropriate method description below for details about
108 # what each expects/accepts.
109 # ----------------------------------------------------------------------
111 my ($self, $config) = @_;
113 # ------------------------------------------------------------------
114 # Set the parser and producer.
116 # If a 'parser' or 'from' parameter is passed in, use that as the
117 # parser; if a 'producer' or 'to' parameter is passed in, use that
118 # as the producer; both default to $DEFAULT_SUB.
119 # ------------------------------------------------------------------
120 $self->parser( $config->{'parser'} || $config->{'from'} || $DEFAULT_SUB);
121 $self->producer($config->{'producer'} || $config->{'to'} || $DEFAULT_SUB);
123 # ------------------------------------------------------------------
124 # Set the parser_args and producer_args
125 # ------------------------------------------------------------------
126 for my $pargs (qw(parser_args producer_args)) {
127 $self->$pargs($config->{$pargs}) if defined $config->{$pargs};
130 # ------------------------------------------------------------------
131 # Set the data source, if 'filename' or 'file' is provided.
132 # ------------------------------------------------------------------
133 $config->{'filename'} ||= $config->{'file'} || "";
134 $self->filename($config->{'filename'}) if $config->{'filename'};
136 # ------------------------------------------------------------------
137 # Finally, if there is a 'data' parameter, use that in preference
138 # to filename and file
139 # ------------------------------------------------------------------
140 if (my $data = $config->{'data'}) {
144 $self->{'debug'} = $DEBUG;
145 $self->{'debug'} = $config->{'debug'} if (defined $config->{'debug'});
154 The B<producer> method is an accessor/mutator, used to retrieve or
155 define what subroutine is called to produce the output. A subroutine
156 defined as a producer will be invoked as a function (not a method) and
157 passed 2 parameters: its container SQL::Translator instance and a
158 data structure. It is expected that the function transform the data
159 structure to a string. The SQL::Transformer instance is provided for
160 informational purposes; for example, the type of the parser can be
161 retrieved using the B<parser_type> method, and the B<error> and
162 B<debug> methods can be called when needed.
164 When defining a producer, one of several things can be passed
165 in: A module name (e.g., My::Groovy::Producer), a module name
166 relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
167 module name and function combination (My::Groovy::Producer::transmogrify),
168 or a reference to an anonymous subroutine. If a full module name is
169 passed in (for the purposes of this method, a string containing "::"
170 is considered to be a module name), it is treated as a package, and a
171 function called "produce" will be invoked: $modulename::produce. If
172 $modulename cannot be loaded, the final portion is stripped off and
173 treated as a function. In other words, if there is no file named
174 My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
175 My/Groovy/Producer.pm and use transmogrify as the name of the function,
176 instead of the default "produce".
178 my $tr = SQL::Translator->new;
180 # This will invoke My::Groovy::Producer::produce($tr, $data)
181 $tr->producer("My::Groovy::Producer");
183 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
184 $tr->producer("Sybase");
186 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
187 # assuming that My::Groovy::Producer::transmogrify is not a module
189 $tr->producer("My::Groovy::Producer::transmogrify");
191 # This will invoke the referenced subroutine directly, as
192 # $subref->($tr, $data);
193 $tr->producer(\&my_producer);
195 There is also a method named B<producer_type>, which is a string
196 containing the classname to which the above B<produce> function
197 belongs. In the case of anonymous subroutines, this method returns
200 Finally, there is a method named B<producer_args>, which is both an
201 accessor and a mutator. Arbitrary data may be stored in name => value
202 pairs for the producer subroutine to access:
204 sub My::Random::producer {
205 my ($tr, $data) = @_;
206 my $pr_args = $tr->producer_args();
208 # $pr_args is a hashref.
210 Extra data passed to the B<producer> method is passed to
213 $tr->producer("xSV", delimiter => ',\s*');
215 # In SQL::Translator::Producer::xSV:
216 my $args = $tr->producer_args;
217 my $delimiter = $args->{'delimiter'}; # value is ,\s*
221 # producer and producer_type
225 # producer as a mutator
227 my $producer = shift;
229 # Passed a module name (string containing "::")
230 if ($producer =~ /::/) {
233 # Module name was passed directly
234 # We try to load the name; if it doesn't load, there's
235 # a possibility that it has a function name attached to
237 if (load($producer)) {
238 $func_name = "produce";
241 # Module::function was passed
243 # Passed Module::Name::function; try to recover
244 my @func_parts = split /::/, $producer;
245 $func_name = pop @func_parts;
246 $producer = join "::", @func_parts;
248 # If this doesn't work, then we have a legitimate
250 load($producer) or die "Can't load $producer: $@";
253 # get code reference and assign
254 $self->{'producer'} = \&{ "$producer\::$func_name" };
255 $self->{'producer_type'} = $producer;
256 $self->debug("Got producer: $producer\::$func_name");
259 # passed an anonymous subroutine reference
260 elsif (isa($producer, 'CODE')) {
261 $self->{'producer'} = $producer;
262 $self->{'producer_type'} = "CODE";
263 $self->debug("Got producer: code ref");
266 # passed a string containing no "::"; relative package name
268 my $Pp = sprintf "SQL::Translator::Producer::$producer";
269 load($Pp) or die "Can't load $Pp: $@";
270 $self->{'producer'} = \&{ "$Pp\::produce" };
271 $self->{'producer_type'} = $Pp;
272 $self->debug("Got producer: $Pp");
275 # At this point, $self->{'producer'} contains a subroutine
276 # reference that is ready to run
278 # Anything left? If so, it's producer_args
279 $self->producer_args(@_) if (@_);
282 return $self->{'producer'};
285 # ----------------------------------------------------------------------
288 # producer_type is an accessor that allows producer subs to get
289 # information about their origin. This is poptentially important;
290 # since all producer subs are called as subroutine refernces, there is
291 # no way for a producer to find out which package the sub lives in
292 # originally, for example.
293 # ----------------------------------------------------------------------
294 sub producer_type { $_[0]->{'producer_type'} }
296 # ----------------------------------------------------------------------
299 # Arbitrary name => value pairs of paramters can be passed to a
300 # producer using this method.
301 # ----------------------------------------------------------------------
305 my $args = isa($_[0], 'HASH') ? shift : { @_ };
306 $self->{'producer_args'} = $args;
308 $self->{'producer_args'};
313 The B<parser> method defines or retrieves a subroutine that will be
314 called to perform the parsing. The basic idea is the same as that of
315 B<producer> (see above), except the default subroutine name is
316 "parse", and will be invoked as $module_name::parse($tr, $data).
317 Also, the parser subroutine will be passed a string containing the
318 entirety of the data to be parsed (or possibly a reference to a string?).
320 # Invokes SQL::Translator::Parser::MySQL::parse()
321 $tr->parser("MySQL");
323 # Invokes My::Groovy::Parser::parse()
324 $tr->parser("My::Groovy::Parser");
326 # Invoke an anonymous subroutine directly
328 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
329 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
330 return $dumper->Dump;
333 There is also B<parser_type> and B<parser_args>, which perform
334 analogously to B<producer_type> and B<producer_args>
341 # parser as a mutator
345 # Passed a module name (string containing "::")
346 if ($parser =~ /::/) {
349 # Module name was passed directly
350 # We try to load the name; if it doesn't load, there's
351 # a possibility that it has a function name attached to
354 $func_name = "parse";
357 # Module::function was passed
359 # Passed Module::Name::function; try to recover
360 my @func_parts = split /::/, $parser;
361 $func_name = pop @func_parts;
362 $parser = join "::", @func_parts;
364 # If this doesn't work, then we have a legitimate
366 load($parser) or die "Can't load $parser: $@";
369 # get code reference and assign
370 $self->{'parser'} = \&{ "$parser\::$func_name" };
371 $self->{'parser_type'} = $parser;
372 $self->debug("Got parser: $parser\::$func_name");
375 # passed an anonymous subroutine reference
376 elsif (isa($parser, 'CODE')) {
377 $self->{'parser'} = $parser;
378 $self->{'parser_type'} = "CODE";
379 $self->debug("Got parser: code ref");
382 # passed a string containing no "::"; relative package name
384 my $Pp = sprintf "SQL::Translator::Parser::$parser";
385 load($Pp) or die "Can't load $Pp: $@";
386 $self->{'parser'} = \&{ "$Pp\::parse" };
387 $self->{'parser_type'} = $Pp;
388 $self->debug("Got parser: $Pp");
391 # At this point, $self->{'parser'} contains a subroutine
392 # reference that is ready to run
394 $self->parser_args(@_) if (@_);
397 return $self->{'parser'};
400 sub parser_type { $_[0]->{'parser_type'} }
406 my $args = isa($_[0], 'HASH') ? shift : { @_ };
407 $self->{'parser_args'} = $args;
409 $self->{'parser_args'};
414 The B<translate> method calls the subroutines referenced by the
415 B<parser> and B<producer> data members (described above). It accepts
416 as arguments a number of things, in key => value format, including
417 (potentially) a parser and a producer (they are passed directly to the
418 B<parser> and B<producer> methods).
420 Here is how the parameter list to B<translate> is parsed:
426 1 argument means it's the data to be parsed; which could be a string
427 (filename) or a refernce to a scalar (a string stored in memory), or a
428 reference to a hash, which is parsed as being more than one argument
431 # Parse the file /path/to/datafile
432 my $output = $tr->translate("/path/to/datafile");
434 # Parse the data contained in the string $data
435 my $output = $tr->translate(\$data);
439 More than 1 argument means its a hash of things, and it might be
440 setting a parser, producer, or datasource (this key is named
441 "filename" or "file" if it's a file, or "data" for a SCALAR reference.
443 # As above, parse /path/to/datafile, but with different producers
444 for my $prod ("MySQL", "XML", "Sybase") {
445 print $tr->translate(
447 filename => "/path/to/datafile",
451 # The filename hash key could also be:
452 datasource => \$data,
458 =head2 B<filename>, B<data>
460 Using the B<filename> method, the filename of the data to be parsed
461 can be set. This method can be used in conjunction with the B<data>
462 method, below. If both the B<filename> and B<data> methods are
463 invoked as mutators, the data set in the B<data> method is used.
465 $tr->filename("/my/data/files/create.sql");
469 my $create_script = do {
471 open CREATE, "/my/data/files/create.sql" or die $!;
474 $tr->data(\$create_script);
476 B<filename> takes a string, which is interpreted as a filename.
477 B<data> takes a reference to a string, which is used as the data to be
478 parsed. If a filename is set, then that file is opened and read when
479 the B<translate> method is called, as long as the data instance
484 # filename - get or set the filename
488 my $filename = shift;
490 my $msg = "Cannot use directory '$filename' as input source";
491 return $self->error($msg);
492 } elsif (-f _ && -r _) {
493 $self->{'filename'} = $filename;
494 $self->debug("Got filename: $self->{'filename'}");
496 my $msg = "Cannot use '$filename' as input source: ".
497 "file does not exist or is not readable.";
498 return $self->error($msg);
505 # data - get or set the data
506 # if $self->{'data'} is not set, but $self->{'filename'} is, then
507 # $self->{'filename'} is opened and read, whith the results put into
512 # Set $self->{'data'} to $_[0], if it is provided.
515 if (isa($data, "SCALAR")) {
516 $self->{'data'} = $data;
518 elsif (! ref $data) {
519 $self->{'data'} = \$data;
523 # If we have a filename but no data yet, populate.
524 if (not $self->{'data'} and my $filename = $self->filename) {
525 $self->debug("Opening '$filename' to get contents...");
530 unless (open FH, $filename) {
531 return $self->error("Can't open $filename for reading: $!");
535 $self->{'data'} = \$data;
538 return $self->error("Can't close $filename: $!");
542 return $self->{'data'};
548 my ($args, $parser, $parser_type, $producer, $producer_type);
549 my ($parser_output, $producer_output);
553 # Passed a reference to a hash?
554 if (isa($_[0], 'HASH')) {
556 $self->debug("translate: Got a hashref");
560 # Passed a reference to a string containing the data
561 elsif (isa($_[0], 'SCALAR')) {
562 # passed a ref to a string
563 $self->debug("translate: Got a SCALAR reference (string)");
567 # Not a reference; treat it as a filename
568 elsif (! ref $_[0]) {
569 # Not a ref, it's a filename
570 $self->debug("translate: Got a filename");
571 $self->filename($_[0]);
574 # Passed something else entirely.
576 # We're not impressed. Take your empty string and leave.
579 # Actually, if data, parser, and producer are set, then we
580 # can continue. Too bad, because I like my comment
582 return "" unless ($self->data &&
588 # You must pass in a hash, or you get nothing.
593 # ----------------------------------------------------------------------
594 # Can specify the data to be transformed using "filename", "file",
595 # "data", or "datasource".
596 # ----------------------------------------------------------------------
597 if (my $filename = ($args->{'filename'} || $args->{'file'})) {
598 $self->filename($filename);
601 if (my $data = ($self->{'data'} || $self->{'datasource'})) {
605 # ----------------------------------------------------------------
607 # ----------------------------------------------------------------
608 my $data = $self->data;
609 unless (length $$data) {
610 return $self->error("Empty data file!");
613 # ----------------------------------------------------------------
614 # Local reference to the parser subroutine
615 # ----------------------------------------------------------------
616 if ($parser = ($args->{'parser'} || $args->{'from'})) {
617 $self->parser($parser);
619 $parser = $self->parser;
620 $parser_type = $self->parser_type;
622 # ----------------------------------------------------------------
623 # Local reference to the producer subroutine
624 # ----------------------------------------------------------------
625 if ($producer = ($args->{'producer'} || $args->{'to'})) {
626 $self->producer($producer);
628 $producer = $self->producer;
629 $producer_type = $self->producer_type;
631 # ----------------------------------------------------------------
632 # Execute the parser, then execute the producer with that output.
633 # Allowances are made for each piece to die, or fail to compile,
634 # since the referenced subroutines could be almost anything. In
635 # the future, each of these might happen in a Safe environment,
636 # depending on how paranoid we want to be.
637 # ----------------------------------------------------------------
638 eval { $parser_output = $parser->($self, $$data) };
639 if ($@ || ! $parser_output) {
640 my $msg = sprintf "translate: Error with parser '%s': %s",
641 $parser_type, ($@) ? $@ : " no results";
642 return $self->error($msg);
645 eval { $producer_output = $producer->($self, $parser_output) };
646 if ($@ || ! $producer_output) {
647 my $msg = sprintf "translate: Error with producer '%s': %s",
648 $producer_type, ($@) ? $@ : " no results";
649 return $self->error($msg);
652 return $producer_output;
656 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
657 return 1 if $INC{$module};
659 eval { require $module };
665 sub isa { UNIVERSAL::isa($_[0], $_[1]) }
670 #-----------------------------------------------------
671 # Rescue the drowning and tie your shoestrings.
672 # Henry David Thoreau
673 #-----------------------------------------------------
677 Ken Y. Clark, E<lt>kclark@logsoft.comE<gt>,
678 darren chamberlain E<lt>darren@cpan.orgE<gt>
682 This program is free software; you can redistribute it and/or modify
683 it under the terms of the GNU General Public License as published by
684 the Free Software Foundation; version 2.
686 This program is distributed in the hope that it will be useful, but
687 WITHOUT ANY WARRANTY; without even the implied warranty of
688 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
689 General Public License for more details.
691 You should have received a copy of the GNU General Public License
692 along with this program; if not, write to the Free Software
693 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
698 L<perl>, L<Parse::RecDescent>