1 package SQL::Translator;
3 # ----------------------------------------------------------------------
4 # $Id: Translator.pm,v 1.10 2002-11-20 04:03:03 kycl4rk Exp $
5 # ----------------------------------------------------------------------
6 # Copyright (C) 2002 Ken Y. Clark <kycl4rk@users.sourceforge.net>,
7 # darren chamberlain <darren@cpan.org>
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License as
11 # published by the Free Software Foundation; version 2.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 # -------------------------------------------------------------------
26 SQL::Translator - convert schema from one database to another
31 my $translator = SQL::Translator->new;
32 my $output = $translator->translate(
36 ) or die $translator->error;
41 This module attempts to simplify the task of converting one database
42 create syntax to another through the use of Parsers (which understand
43 the sourced format) and Producers (which understand the destination
44 format). The idea is that any Parser can be used with any Producer in
45 the conversion process. So, if you wanted PostgreSQL-to-Oracle, you
46 would use the PostgreSQL parser and the Oracle producer.
51 use vars qw( $VERSION $DEFAULT_SUB $DEBUG $ERROR );
52 use base 'Class::Base';
54 $VERSION = sprintf "%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/;
55 $DEBUG = 0 unless defined $DEBUG;
60 # ----------------------------------------------------------------------
61 # The default behavior is to "pass through" values (note that the
62 # SQL::Translator instance is the first value ($_[0]), and the stuff
63 # to be parsed is the second value ($_[1])
64 # ----------------------------------------------------------------------
65 $DEFAULT_SUB = sub { $_[1] } unless defined $DEFAULT_SUB;
69 The constructor is called B<new>, and accepts a optional hash of options.
74 =item parser (aka from)
78 =item producer (aka to)
82 =item filename (aka file)
90 All options are, well, optional; these attributes can be set via
91 instance methods. Internally, they are; no (non-syntactical)
92 advantage is gained by passing options to the constructor.
96 # ----------------------------------------------------------------------
100 # new takes an optional hash of arguments. These arguments may
101 # include a parser, specified with the keys "parser" or "from",
102 # and a producer, specified with the keys "producer" or "to".
104 # The values that can be passed as the parser or producer are
105 # given directly to the parser or producer methods, respectively.
106 # See the appropriate method description below for details about
107 # what each expects/accepts.
108 # ----------------------------------------------------------------------
110 my ( $self, $config ) = @_;
113 # Set the parser and producer.
115 # If a 'parser' or 'from' parameter is passed in, use that as the
116 # parser; if a 'producer' or 'to' parameter is passed in, use that
117 # as the producer; both default to $DEFAULT_SUB.
119 $self->parser ($config->{'parser'} || $config->{'from'} || $DEFAULT_SUB);
120 $self->producer($config->{'producer'} || $config->{'to'} || $DEFAULT_SUB);
123 # Set the parser_args and producer_args
125 for my $pargs ( qw[ parser_args producer_args ] ) {
126 $self->$pargs( $config->{$pargs} ) if defined $config->{ $pargs };
130 # Set the data source, if 'filename' or 'file' is provided.
132 $config->{'filename'} ||= $config->{'file'} || "";
133 $self->filename( $config->{'filename'} ) if $config->{'filename'};
136 # Finally, if there is a 'data' parameter, use that in
137 # preference to filename and file
139 if ( my $data = $config->{'data'} ) {
140 $self->data( $data );
143 $self->{'debug'} = defined $config->{'debug'} ? $config->{'debug'} : $DEBUG;
152 The B<producer> method is an accessor/mutator, used to retrieve or
153 define what subroutine is called to produce the output. A subroutine
154 defined as a producer will be invoked as a function (not a method) and
155 passed 2 parameters: its container SQL::Translator instance and a
156 data structure. It is expected that the function transform the data
157 structure to a string. The SQL::Transformer instance is provided for
158 informational purposes; for example, the type of the parser can be
159 retrieved using the B<parser_type> method, and the B<error> and
160 B<debug> methods can be called when needed.
162 When defining a producer, one of several things can be passed
163 in: A module name (e.g., My::Groovy::Producer), a module name
164 relative to the SQL::Translator::Producer namespace (e.g., MySQL), a
165 module name and function combination (My::Groovy::Producer::transmogrify),
166 or a reference to an anonymous subroutine. If a full module name is
167 passed in (for the purposes of this method, a string containing "::"
168 is considered to be a module name), it is treated as a package, and a
169 function called "produce" will be invoked: $modulename::produce. If
170 $modulename cannot be loaded, the final portion is stripped off and
171 treated as a function. In other words, if there is no file named
172 My/Groovy/Producer/transmogrify.pm, SQL::Translator will attempt to load
173 My/Groovy/Producer.pm and use transmogrify as the name of the function,
174 instead of the default "produce".
176 my $tr = SQL::Translator->new;
178 # This will invoke My::Groovy::Producer::produce($tr, $data)
179 $tr->producer("My::Groovy::Producer");
181 # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data)
182 $tr->producer("Sybase");
184 # This will invoke My::Groovy::Producer::transmogrify($tr, $data),
185 # assuming that My::Groovy::Producer::transmogrify is not a module
187 $tr->producer("My::Groovy::Producer::transmogrify");
189 # This will invoke the referenced subroutine directly, as
190 # $subref->($tr, $data);
191 $tr->producer(\&my_producer);
193 There is also a method named B<producer_type>, which is a string
194 containing the classname to which the above B<produce> function
195 belongs. In the case of anonymous subroutines, this method returns
198 Finally, there is a method named B<producer_args>, which is both an
199 accessor and a mutator. Arbitrary data may be stored in name => value
200 pairs for the producer subroutine to access:
202 sub My::Random::producer {
203 my ($tr, $data) = @_;
204 my $pr_args = $tr->producer_args();
206 # $pr_args is a hashref.
208 Extra data passed to the B<producer> method is passed to
211 $tr->producer("xSV", delimiter => ',\s*');
213 # In SQL::Translator::Producer::xSV:
214 my $args = $tr->producer_args;
215 my $delimiter = $args->{'delimiter'}; # value is ,\s*
219 # producer and producer_type
223 # producer as a mutator
225 my $producer = shift;
227 # Passed a module name (string containing "::")
228 if ($producer =~ /::/) {
231 # Module name was passed directly
232 # We try to load the name; if it doesn't load, there's
233 # a possibility that it has a function name attached to
235 if (load($producer)) {
236 $func_name = "produce";
239 # Module::function was passed
241 # Passed Module::Name::function; try to recover
242 my @func_parts = split /::/, $producer;
243 $func_name = pop @func_parts;
244 $producer = join "::", @func_parts;
246 # If this doesn't work, then we have a legitimate
248 load($producer) or die "Can't load $producer: $@";
251 # get code reference and assign
252 $self->{'producer'} = \&{ "$producer\::$func_name" };
253 $self->{'producer_type'} = $producer;
254 $self->debug("Got producer: $producer\::$func_name\n");
257 # passed an anonymous subroutine reference
258 elsif (isa($producer, 'CODE')) {
259 $self->{'producer'} = $producer;
260 $self->{'producer_type'} = "CODE";
261 $self->debug("Got producer: code ref\n");
264 # passed a string containing no "::"; relative package name
266 my $Pp = sprintf "SQL::Translator::Producer::$producer";
267 load($Pp) or die "Can't load $Pp: $@";
268 $self->{'producer'} = \&{ "$Pp\::produce" };
269 $self->{'producer_type'} = $Pp;
270 $self->debug("Got producer: $Pp\n");
273 # At this point, $self->{'producer'} contains a subroutine
274 # reference that is ready to run
276 # Anything left? If so, it's producer_args
277 $self->producer_args(@_) if (@_);
280 return $self->{'producer'};
283 # ----------------------------------------------------------------------
286 # producer_type is an accessor that allows producer subs to get
287 # information about their origin. This is poptentially important;
288 # since all producer subs are called as subroutine refernces, there is
289 # no way for a producer to find out which package the sub lives in
290 # originally, for example.
291 # ----------------------------------------------------------------------
292 sub producer_type { $_[0]->{'producer_type'} }
294 # ----------------------------------------------------------------------
297 # Arbitrary name => value pairs of paramters can be passed to a
298 # producer using this method.
299 # ----------------------------------------------------------------------
303 my $args = isa($_[0], 'HASH') ? shift : { @_ };
304 $self->{'producer_args'} = $args;
306 $self->{'producer_args'};
311 The B<parser> method defines or retrieves a subroutine that will be
312 called to perform the parsing. The basic idea is the same as that of
313 B<producer> (see above), except the default subroutine name is
314 "parse", and will be invoked as $module_name::parse($tr, $data).
315 Also, the parser subroutine will be passed a string containing the
316 entirety of the data to be parsed (or possibly a reference to a string?).
318 # Invokes SQL::Translator::Parser::MySQL::parse()
319 $tr->parser("MySQL");
321 # Invokes My::Groovy::Parser::parse()
322 $tr->parser("My::Groovy::Parser");
324 # Invoke an anonymous subroutine directly
326 my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]);
327 $dumper->Purity(1)->Terse(1)->Deepcopy(1);
328 return $dumper->Dump;
331 There is also B<parser_type> and B<parser_args>, which perform
332 analogously to B<producer_type> and B<producer_args>
339 # parser as a mutator
343 # Passed a module name (string containing "::")
344 if ($parser =~ /::/) {
347 # Module name was passed directly
348 # We try to load the name; if it doesn't load, there's
349 # a possibility that it has a function name attached to
352 $func_name = "parse";
355 # Module::function was passed
357 # Passed Module::Name::function; try to recover
358 my @func_parts = split /::/, $parser;
359 $func_name = pop @func_parts;
360 $parser = join "::", @func_parts;
362 # If this doesn't work, then we have a legitimate
364 load($parser) or die "Can't load $parser: $@";
367 # get code reference and assign
368 $self->{'parser'} = \&{ "$parser\::$func_name" };
369 $self->{'parser_type'} = $parser;
370 $self->debug("Got parser: $parser\::$func_name\n");
373 # passed an anonymous subroutine reference
374 elsif ( isa( $parser, 'CODE' ) ) {
375 $self->{'parser'} = $parser;
376 $self->{'parser_type'} = "CODE";
377 $self->debug("Got parser: code ref\n");
380 # passed a string containing no "::"; relative package name
382 my $Pp = "SQL::Translator::Parser::$parser";
383 load( $Pp ) or die "Can't load $Pp: $@";
384 $self->{'parser'} = \&{ "$Pp\::parse" };
385 $self->{'parser_type'} = $Pp;
386 $self->debug("Got parser: $Pp\n");
390 # At this point, $self->{'parser'} contains a subroutine
391 # reference that is ready to run
393 $self->parser_args( @_ ) if (@_);
396 return $self->{'parser'};
399 sub parser_type { $_[0]->{'parser_type'} }
405 my $args = isa($_[0], 'HASH') ? shift : { @_ };
406 $self->{'parser_args'} = $args;
408 $self->{'parser_args'};
413 The B<translate> method calls the subroutines referenced by the
414 B<parser> and B<producer> data members (described above). It accepts
415 as arguments a number of things, in key => value format, including
416 (potentially) a parser and a producer (they are passed directly to the
417 B<parser> and B<producer> methods).
419 Here is how the parameter list to B<translate> is parsed:
425 1 argument means it's the data to be parsed; which could be a string
426 (filename) or a refernce to a scalar (a string stored in memory), or a
427 reference to a hash, which is parsed as being more than one argument
430 # Parse the file /path/to/datafile
431 my $output = $tr->translate("/path/to/datafile");
433 # Parse the data contained in the string $data
434 my $output = $tr->translate(\$data);
438 More than 1 argument means its a hash of things, and it might be
439 setting a parser, producer, or datasource (this key is named
440 "filename" or "file" if it's a file, or "data" for a SCALAR reference.
442 # As above, parse /path/to/datafile, but with different producers
443 for my $prod ("MySQL", "XML", "Sybase") {
444 print $tr->translate(
446 filename => "/path/to/datafile",
450 # The filename hash key could also be:
451 datasource => \$data,
457 =head2 B<filename>, B<data>
459 Using the B<filename> method, the filename of the data to be parsed
460 can be set. This method can be used in conjunction with the B<data>
461 method, below. If both the B<filename> and B<data> methods are
462 invoked as mutators, the data set in the B<data> method is used.
464 $tr->filename("/my/data/files/create.sql");
468 my $create_script = do {
470 open CREATE, "/my/data/files/create.sql" or die $!;
473 $tr->data(\$create_script);
475 B<filename> takes a string, which is interpreted as a filename.
476 B<data> takes a reference to a string, which is used as the data to be
477 parsed. If a filename is set, then that file is opened and read when
478 the B<translate> method is called, as long as the data instance
483 # filename - get or set the filename
487 my $filename = shift;
489 my $msg = "Cannot use directory '$filename' as input source";
490 return $self->error($msg);
491 } elsif (-f _ && -r _) {
492 $self->{'filename'} = $filename;
493 $self->debug("Got filename: '$self->{'filename'}'\n");
495 my $msg = "Cannot use '$filename' as input source: ".
496 "file does not exist or is not readable.";
497 return $self->error($msg);
504 # data - get or set the data
505 # if $self->{'data'} is not set, but $self->{'filename'} is, then
506 # $self->{'filename'} is opened and read, whith the results put into
511 # Set $self->{'data'} to $_[0], if it is provided.
514 if (isa($data, "SCALAR")) {
515 $self->{'data'} = $data;
517 elsif (! ref $data) {
518 $self->{'data'} = \$data;
522 # If we have a filename but no data yet, populate.
523 if (not $self->{'data'} and my $filename = $self->filename) {
524 $self->debug("Opening '$filename' to get contents.\n");
529 unless (open FH, $filename) {
530 return $self->error("Can't read file '$filename': $!");
534 $self->{'data'} = \$data;
537 return $self->error("Can't close file '$filename': $!");
541 return $self->{'data'};
547 my ($args, $parser, $parser_type, $producer, $producer_type);
548 my ($parser_output, $producer_output);
552 # Passed a reference to a hash?
553 if (isa($_[0], 'HASH')) {
555 $self->debug("translate: Got a hashref\n");
559 # Passed a reference to a string containing the data
560 elsif (isa($_[0], 'SCALAR')) {
561 # passed a ref to a string
562 $self->debug("translate: Got a SCALAR reference (string)\n");
566 # Not a reference; treat it as a filename
567 elsif (! ref $_[0]) {
568 # Not a ref, it's a filename
569 $self->debug("translate: Got a filename\n");
570 $self->filename($_[0]);
573 # Passed something else entirely.
575 # We're not impressed. Take your empty string and leave.
578 # Actually, if data, parser, and producer are set, then we
579 # can continue. Too bad, because I like my comment
581 return "" unless ($self->data &&
587 # You must pass in a hash, or you get nothing.
592 # ----------------------------------------------------------------------
593 # Can specify the data to be transformed using "filename", "file",
594 # "data", or "datasource".
595 # ----------------------------------------------------------------------
596 if (my $filename = ($args->{'filename'} || $args->{'file'})) {
597 $self->filename($filename);
600 if (my $data = ($self->{'data'} || $self->{'datasource'})) {
604 # ----------------------------------------------------------------
606 # ----------------------------------------------------------------
607 my $data = $self->data;
608 unless (length $$data) {
609 return $self->error("Empty data file!");
612 # ----------------------------------------------------------------
613 # Local reference to the parser subroutine
614 # ----------------------------------------------------------------
615 if ($parser = ($args->{'parser'} || $args->{'from'})) {
616 $self->parser($parser);
618 $parser = $self->parser;
619 $parser_type = $self->parser_type;
621 # ----------------------------------------------------------------
622 # Local reference to the producer subroutine
623 # ----------------------------------------------------------------
624 if ($producer = ($args->{'producer'} || $args->{'to'})) {
625 $self->producer($producer);
627 $producer = $self->producer;
628 $producer_type = $self->producer_type;
630 # ----------------------------------------------------------------
631 # Execute the parser, then execute the producer with that output.
632 # Allowances are made for each piece to die, or fail to compile,
633 # since the referenced subroutines could be almost anything. In
634 # the future, each of these might happen in a Safe environment,
635 # depending on how paranoid we want to be.
636 # ----------------------------------------------------------------
637 eval { $parser_output = $parser->($self, $$data) };
638 if ($@ || ! $parser_output) {
639 my $msg = sprintf "translate: Error with parser '%s': %s",
640 $parser_type, ($@) ? $@ : " no results";
641 return $self->error($msg);
644 eval { $producer_output = $producer->($self, $parser_output) };
645 if ($@ || ! $producer_output) {
646 my $msg = sprintf "translate: Error with producer '%s': %s",
647 $producer_type, ($@) ? $@ : " no results";
648 return $self->error($msg);
651 return $producer_output;
655 my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" };
656 return 1 if $INC{$module};
658 eval { require $module };
664 sub isa { UNIVERSAL::isa($_[0], $_[1]) }
668 #-----------------------------------------------------
669 # Rescue the drowning and tie your shoestrings.
670 # Henry David Thoreau
671 #-----------------------------------------------------
677 Ken Y. Clark, E<lt>kclark@logsoft.comE<gt>,
678 darren chamberlain E<lt>darren@cpan.orgE<gt>
682 This program is free software; you can redistribute it and/or modify
683 it under the terms of the GNU General Public License as published by
684 the Free Software Foundation; version 2.
686 This program is distributed in the hope that it will be useful, but
687 WITHOUT ANY WARRANTY; without even the implied warranty of
688 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
689 General Public License for more details.
691 You should have received a copy of the GNU General Public License
692 along with this program; if not, write to the Free Software
693 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
698 L<perl>, L<Parse::RecDescent>