Commit | Line | Data |
16dc9970 |
1 | package SQL::Translator; |
2 | |
3 | #----------------------------------------------------- |
077ebf34 |
4 | # $Id: Translator.pm,v 1.3.2.2 2002-03-15 20:13:46 dlc Exp $ |
16dc9970 |
5 | #----------------------------------------------------- |
077ebf34 |
6 | # Copyright (C) 2002 Ken Y. Clark <kycl4rk@users.sourceforge.net>, |
7 | # darren chamberlain <darren@cpan.org> |
ca10f295 |
8 | # |
077ebf34 |
9 | # This program is free software; you can redistribute it and/or |
10 | # modify it under the terms of the GNU General Public License as |
11 | # published by the Free Software Foundation; version 2. |
ca10f295 |
12 | # |
077ebf34 |
13 | # This program is distributed in the hope that it will be useful, but |
14 | # WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | # General Public License for more details. |
ca10f295 |
17 | # |
077ebf34 |
18 | # You should have received a copy of the GNU General Public License |
19 | # along with this program; if not, write to the Free Software |
20 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
21 | # 02111-1307 USA |
ca10f295 |
22 | # ------------------------------------------------------------------- |
23 | |
24 | =head1 NAME |
25 | |
26 | SQL::Translator - convert schema from one database to another |
27 | |
28 | =head1 SYNOPSIS |
29 | |
30 | use SQL::Translator; |
31 | my $translator = SQL::Translator->new; |
077ebf34 |
32 | $translator->parser("MySQL"); |
33 | $translator->producer("Oracle"); |
34 | |
35 | my $output = $translator->translate($file) or die $translator->error; |
ca10f295 |
36 | print $output; |
37 | |
38 | =head1 DESCRIPTION |
39 | |
40 | This module attempts to simplify the task of converting one database |
41 | create syntax to another through the use of Parsers and Producers. |
42 | The idea is that any Parser can be used with any Producer in the |
43 | conversion process. So, if you wanted PostgreSQL-to-Oracle, you could |
44 | just write the PostgreSQL parser and use an existing Oracle producer. |
45 | |
077ebf34 |
46 | Currently, the existing parsers use Parse::RecDescent, but this not |
47 | a requirement, or even a recommendation. New parser modules don't |
48 | necessarily have to use Parse::RecDescent, as long as the module |
49 | implements the appropriate API. With this separation of code, it is |
ca10f295 |
50 | hoped that developers will find it easy to add more database dialects |
51 | by using what's written, writing only what they need, and then |
52 | contributing their parsers or producers back to the project. |
53 | |
54 | =cut |
16dc9970 |
55 | |
56 | use strict; |
ca10f295 |
57 | use vars qw($VERSION $DEFAULT_SUB $DEBUG); |
077ebf34 |
58 | $VERSION = sprintf "%d.%02d", q$Revision: 1.3.2.2 $ =~ /(\d+)\.(\d+)/; |
ca10f295 |
59 | $DEBUG = 1 unless defined $DEBUG; |
16dc9970 |
60 | |
ca10f295 |
61 | $DEFAULT_SUB = sub { $_[0] } unless defined $DEFAULT_SUB; |
077ebf34 |
62 | |
63 | *can = \&UNIVERSAL::can; |
ca10f295 |
64 | *isa = \&UNIVERSAL::isa; |
16dc9970 |
65 | |
ca10f295 |
66 | =head1 CONSTRUCTOR |
16dc9970 |
67 | |
077ebf34 |
68 | The constructor is called B<new>, and accepts a optional hash of options. |
ca10f295 |
69 | Valid options are: |
16dc9970 |
70 | |
ca10f295 |
71 | =over 4 |
72 | |
73 | =item parser (aka from) |
74 | |
75 | =item producer (aka to) |
76 | |
77 | =item filename |
78 | |
79 | =back |
80 | |
81 | All options are, well, optional; these attributes can be set via |
82 | instance methods. |
83 | |
84 | =cut |
85 | |
86 | # {{{ new |
16dc9970 |
87 | |
16dc9970 |
88 | sub new { |
16dc9970 |
89 | my $class = shift; |
ca10f295 |
90 | my $args = isa($_[0], 'HASH') ? shift : { @_ }; |
91 | my $self = bless { } => $class; |
16dc9970 |
92 | |
ca10f295 |
93 | # |
94 | # Set the parser and producer. If a 'parser' or 'from' parameter |
95 | # is passed in, use that as the parser; if a 'producer' or 'to' |
96 | # parameter is passed in, use that as the producer; both default |
97 | # to $DEFAULT_SUB. |
98 | # |
99 | $self->parser( $args->{'parser'} || $args->{'from'} || $DEFAULT_SUB); |
100 | $self->producer($args->{'producer'} || $args->{'to'} || $DEFAULT_SUB); |
101 | |
102 | # |
103 | # Clear the error |
104 | # |
105 | $self->error_out(""); |
106 | |
107 | return $self; |
16dc9970 |
108 | } |
ca10f295 |
109 | # }}} |
16dc9970 |
110 | |
ca10f295 |
111 | =head1 METHODS |
112 | |
113 | |
114 | =head2 B<producer> |
115 | |
116 | The B<producer> method is an accessor/mutator, used to retrieve or |
117 | define what subroutine is called to produce the output. A subroutine |
118 | defined as a producer subroutine will be invoked as a function (not a |
077ebf34 |
119 | method) and passed 2 parameters: its encompassing SQL::Translator |
120 | instance and a data structure. It is expected that the function |
121 | transform the data structure to the output format, and return a |
122 | string. The SQL::Transformer instance is provided for informational |
123 | purposes; the type of the parser, for example, can be retrieved using |
124 | the B<parser_type> method, and the B<error> and B<debug> methods can |
125 | be called when needed. |
ca10f295 |
126 | |
127 | When defining a producer, one of three things can be passed |
128 | in: A full module name (e.g., My::Groovy::Parser), a module name |
129 | relative to the SQL::Translator::Producer namespace (e.g., MySQL), or |
130 | a reference to an anonymous subroutine. If a full module name is |
131 | passed in, it is treated as a package, and a function called |
077ebf34 |
132 | "produce" will be invoked as $modulename::produce. |
ca10f295 |
133 | |
134 | my $tr = SQL::Translator->new; |
135 | |
077ebf34 |
136 | # This will invoke My::Groovy::Producer::produce($tr, $data) |
ca10f295 |
137 | $tr->producer("My::Groovy::Producer"); |
138 | |
077ebf34 |
139 | # This will invoke SQL::Translator::Producer::Sybase::produce($tr, $data) |
ca10f295 |
140 | $tr->producer("Sybase"); |
141 | |
077ebf34 |
142 | # This will invoke the referenced subroutine directly, as |
143 | # $subref->($tr, $data); |
ca10f295 |
144 | $tr->producer(\&my_producer); |
145 | |
077ebf34 |
146 | There is also a method named B<producer_type>, which is a string |
147 | containing the classname to which the above B<produce> function |
148 | belongs. In the case of anonymous subroutines, this method returns |
149 | the string "CODE". |
150 | |
ca10f295 |
151 | =cut |
ca10f295 |
152 | |
077ebf34 |
153 | # {{{ producer and producer_type |
ca10f295 |
154 | sub producer { |
16dc9970 |
155 | my $self = shift; |
ca10f295 |
156 | if (@_) { |
157 | my $producer = shift; |
158 | if ($producer =~ /::/) { |
077ebf34 |
159 | my $func_name; |
160 | if (load($producer)) { |
161 | $func_name = "produce"; |
162 | } else { |
163 | # Oops! Passed Module::Name::function; try to recover |
164 | my @func_parts = split /::/, $producer; |
165 | $func_name = pop @func_parts; |
166 | $producer = join "::", @func_parts; |
167 | load($producer) or die "Can't load $producer: $@"; |
168 | } |
169 | |
170 | $self->{'producer'} = \&{ "$producer\::$func_name" }; |
171 | $self->{'producer_type'} = $producer; |
172 | $self->debug("Got 'producer': $producer\::$func_name"); |
ca10f295 |
173 | } elsif (isa($producer, 'CODE')) { |
174 | $self->{'producer'} = $producer; |
077ebf34 |
175 | $self->{'producer_type'} = "CODE"; |
ca10f295 |
176 | $self->debug("Got 'producer': code ref"); |
177 | } else { |
178 | my $Pp = sprintf "SQL::Translator::Producer::$producer"; |
179 | load($Pp) or die "Can't load $Pp: $@"; |
077ebf34 |
180 | $self->{'producer'} = \&{ "$Pp\::produce" }; |
181 | $self->{'producer_type'} = $Pp; |
ca10f295 |
182 | $self->debug("Got producer: $Pp"); |
183 | } |
184 | # At this point, $self->{'producer'} contains a subroutine |
185 | # reference that is ready to run! |
16dc9970 |
186 | } |
ca10f295 |
187 | return $self->{'producer'}; |
188 | }; |
077ebf34 |
189 | |
190 | sub producer_type { $_[0]->{'producer_type'} } |
ca10f295 |
191 | # }}} |
192 | |
193 | =head2 B<parser> |
194 | |
195 | The B<parser> method defines or retrieves a subroutine that will be |
196 | called to perform the parsing. The basic idea is the same as that of |
197 | B<producer> (see above), except the default subroutine name is |
077ebf34 |
198 | "parse", and will be invoked as $module_name::parse($tr, $data). |
199 | Also, the parser subroutine will be passed a string containing the |
200 | entirety of the data to be parsed (or possibly a reference to a string?). |
ca10f295 |
201 | |
202 | # Invokes SQL::Translator::Parser::MySQL::parse() |
203 | $tr->parser("MySQL"); |
204 | |
205 | # Invokes My::Groovy::Parser::parse() |
206 | $tr->parser("My::Groovy::Parser"); |
207 | |
208 | # Invoke an anonymous subroutine directly |
209 | $tr->parser(sub { |
077ebf34 |
210 | my $dumper = Data::Dumper->new([ $_[1] ], [ "SQL" ]); |
ca10f295 |
211 | $dumper->Purity(1)->Terse(1)->Deepcopy(1); |
212 | return $dumper->Dump; |
213 | }); |
214 | |
215 | =cut |
216 | |
077ebf34 |
217 | # {{{ parser and parser_type |
ca10f295 |
218 | sub parser { |
219 | my $self = shift; |
220 | if (@_) { |
221 | my $parser = shift; |
222 | if ($parser =~ /::/) { |
223 | load($parser) or die "Can't load $parser: $@"; |
224 | $self->{'parser'} = \&{ "$parser\::parse" }; |
077ebf34 |
225 | $self->{'parser_type'} = $parser; |
ca10f295 |
226 | $self->debug("Got parser: $parser\::parse"); |
227 | } elsif (isa($parser, 'CODE')) { |
228 | $self->{'parser'} = $parser; |
077ebf34 |
229 | $self->{'parser_type'} = "CODE"; |
ca10f295 |
230 | $self->debug("Got parser: code ref"); |
231 | } else { |
232 | my $Pp = "SQL::Translator::Parser::$parser"; |
233 | load($Pp) or die "Can't load $Pp: $@"; |
234 | $self->{'parser'} = \&{ "$Pp\::parse" }; |
077ebf34 |
235 | $self->{'parser_type'} = $Pp; |
ca10f295 |
236 | $self->debug("Got parser: $Pp"); |
237 | } |
238 | # At this point, $self->{$pp} contains a subroutine |
239 | # reference that is ready to run! |
240 | } |
241 | return $self->{'parser'}; |
16dc9970 |
242 | } |
077ebf34 |
243 | |
244 | sub parser_type { $_[0]->{'parser_type'} } |
ca10f295 |
245 | # }}} |
16dc9970 |
246 | |
ca10f295 |
247 | =head2 B<translate> |
248 | |
249 | The B<translate> method calls the subroutines referenced by the |
250 | B<parser> and B<producer> data members (described above). It accepts |
251 | as arguments a number of things, in key => value format, including |
252 | (potentially) a parser and a producer (they are passed directly to the |
253 | B<parser> and B<producer> methods). |
254 | |
255 | Here is how the parameter list to B<translate> is parsed: |
256 | |
257 | =over |
258 | |
259 | =item * |
260 | |
261 | 1 argument means it's the data to be parsed; which could be a string |
262 | (filename), a reference to a GLOB (filehandle from which to read a |
263 | string), a refernce to a scalar (a string stored in memory), or a |
264 | reference to a hash (which means the same thing as below). |
265 | |
266 | # Parse the file /path/to/datafile |
267 | my $output = $tr->translate("/path/to/datafile"); |
268 | |
269 | # The same thing: |
270 | my $fh = IO::File->new("/path/to/datafile"); |
271 | my $output = $tr->translate($fh); |
272 | |
273 | # Again, the same thing: |
274 | my $fh = IO::File->new("/path/to/datafile"); |
275 | my $data = { local $/; <$fh> }; |
276 | my $output = $tr->translate(\$data); |
277 | |
278 | =item * |
279 | |
077ebf34 |
280 | More than 1 argument means its a hash of things, and it might be |
281 | setting a parser, producer, or datasource (this key is named |
282 | "filename" or "file" if it's a file, or "data" for a GLOB or |
283 | SCALAR reference). |
ca10f295 |
284 | |
285 | # As above, parse /path/to/datafile, but with different producers |
286 | for my $prod ("MySQL", "XML", "Sybase") { |
287 | print $tr->translate( |
288 | producer => $prod, |
289 | filename => "/path/to/datafile", |
290 | ); |
291 | } |
292 | |
293 | # The filename hash key could also be: |
294 | datasource => $fh, |
295 | |
296 | # or |
297 | datasource => \$data, |
298 | |
299 | You get the idea. |
300 | |
301 | =back |
302 | |
303 | =cut |
304 | |
305 | # {{{ translate |
16dc9970 |
306 | sub translate { |
ca10f295 |
307 | my $self = shift; |
308 | my ($args, $parser, $producer); |
309 | |
310 | if (@_ == 1) { |
311 | if (isa($_[0], 'HASH')) { |
312 | # Passed a hashref |
077ebf34 |
313 | $self->debug("translate: Got a hashref"); |
ca10f295 |
314 | $args = $_[0]; |
315 | } |
077ebf34 |
316 | elsif (my $getlines = can($_[0], "getlines")) { |
317 | # passed a IO::Handle derivative |
318 | # XXX Something about this does not work! |
319 | # XXX look into how Template does this. |
320 | $self->debug("translate: Got a IO::Handle subclass (can getlines)"); |
321 | my $fh = $_[0]; |
322 | $fh->setpos(0); |
323 | my $data = join '', $fh->$getlines; |
324 | $args = { data => $data }; |
325 | } |
ca10f295 |
326 | elsif (isa($_[0], 'GLOB')) { |
327 | # passed a filehandle; slurp it |
077ebf34 |
328 | $self->debug("translate: Got a GLOB"); |
ca10f295 |
329 | local $/; |
330 | $args = { data => <$_[0]> }; |
331 | } |
332 | elsif (isa($_[0], 'SCALAR')) { |
333 | # passed a ref to a string; deref it |
077ebf34 |
334 | $self->debug("translate: Got a SCALAR reference (string)"); |
ca10f295 |
335 | $args = { data => ${$_[0]} }; |
336 | } |
337 | else { |
338 | # Not a ref, it's a filename |
077ebf34 |
339 | $self->debug("translate: Got a filename"); |
ca10f295 |
340 | $args = { filename => $_[0] }; |
341 | } |
16dc9970 |
342 | } |
343 | else { |
ca10f295 |
344 | # Should we check if @_ % 2, or just eat the errors if they occur? |
345 | $args = { @_ }; |
16dc9970 |
346 | } |
347 | |
ca10f295 |
348 | if ((defined $args->{'filename'} || |
349 | defined $args->{'file'} ) && not $args->{'data'}) { |
350 | local *FH; |
351 | local $/; |
352 | |
077ebf34 |
353 | open FH, $args->{'filename'} or die "Can't open $args->{'filename'}: $!"; |
ca10f295 |
354 | $args->{'data'} = <FH>; |
355 | close FH or die $!; |
16dc9970 |
356 | } |
ca10f295 |
357 | |
358 | # |
359 | # Last chance to bail out; if there's nothing in the data |
360 | # key of %args, back out. |
361 | # |
362 | return unless defined $args->{'data'}; |
363 | |
077ebf34 |
364 | use Data::Dumper; |
365 | warn Dumper($args); |
366 | |
ca10f295 |
367 | # |
368 | # Local reference to the parser subroutine |
369 | # |
370 | if ($parser = ($args->{'parser'} || $args->{'from'})) { |
371 | $self->parser($parser); |
372 | } else { |
373 | $parser = $self->parser; |
16dc9970 |
374 | } |
375 | |
376 | # |
ca10f295 |
377 | # Local reference to the producer subroutine |
16dc9970 |
378 | # |
ca10f295 |
379 | if ($producer = ($args->{'producer'} || $args->{'to'})) { |
380 | $self->producer($producer); |
381 | } else { |
382 | $producer = $self->producer; |
16dc9970 |
383 | } |
384 | |
ca10f295 |
385 | # |
386 | # Execute the parser, then execute the producer with that output |
387 | # |
077ebf34 |
388 | my $translated = $parser->($self, $args->{'data'}); |
ca10f295 |
389 | |
077ebf34 |
390 | return $producer->($self, $translated); |
16dc9970 |
391 | } |
ca10f295 |
392 | # }}} |
393 | |
394 | =head2 B<error> |
16dc9970 |
395 | |
ca10f295 |
396 | The error method returns the last error. |
397 | |
398 | =cut |
399 | |
400 | # {{{ error |
16dc9970 |
401 | #----------------------------------------------------- |
ca10f295 |
402 | sub error { |
16dc9970 |
403 | # |
ca10f295 |
404 | # Return the last error. |
16dc9970 |
405 | # |
ca10f295 |
406 | return shift()->{'error'} || ''; |
407 | } |
408 | # }}} |
409 | |
410 | =head2 B<error_out> |
411 | |
412 | Record the error and return undef. The error can be retrieved by |
413 | calling programs using $tr->error. |
414 | |
415 | For Parser or Producer writers, primarily. |
416 | |
417 | =cut |
418 | |
419 | # {{{ error_out |
420 | sub error_out { |
16dc9970 |
421 | my $self = shift; |
ca10f295 |
422 | if ( my $error = shift ) { |
423 | $self->{'error'} = $error; |
16dc9970 |
424 | } |
ca10f295 |
425 | return; |
16dc9970 |
426 | } |
ca10f295 |
427 | # }}} |
16dc9970 |
428 | |
ca10f295 |
429 | =head2 B<debug> |
430 | |
431 | If the global variable $SQL::Translator::DEBUG is set to a true value, |
432 | then calls to $tr->debug($msg) will be carped to STDERR. If $DEBUG is |
433 | not set, then this method does nothing. |
434 | |
435 | =cut |
436 | |
437 | # {{{ debug |
438 | use Carp qw(carp); |
439 | sub debug { |
16dc9970 |
440 | my $self = shift; |
ca10f295 |
441 | carp @_ if ($DEBUG); |
16dc9970 |
442 | } |
ca10f295 |
443 | # }}} |
444 | |
445 | # {{{ load |
446 | sub load { |
447 | my $module = do { my $m = shift; $m =~ s[::][/]g; "$m.pm" }; |
448 | return 1 if $INC{$module}; |
449 | |
450 | eval { require $module }; |
451 | |
452 | return if ($@); |
453 | return 1; |
454 | } |
455 | # }}} |
16dc9970 |
456 | |
457 | 1; |
458 | |
ca10f295 |
459 | __END__ |
16dc9970 |
460 | #----------------------------------------------------- |
461 | # Rescue the drowning and tie your shoestrings. |
462 | # Henry David Thoreau |
463 | #----------------------------------------------------- |
464 | |
ca10f295 |
465 | =head1 AUTHOR |
16dc9970 |
466 | |
ca10f295 |
467 | Ken Y. Clark, E<lt>kclark@logsoft.comE<gt>, |
468 | darren chamberlain E<lt>darren@cpan.orgE<gt> |
16dc9970 |
469 | |
ca10f295 |
470 | =head1 COPYRIGHT |
16dc9970 |
471 | |
ca10f295 |
472 | This program is free software; you can redistribute it and/or modify |
473 | it under the terms of the GNU General Public License as published by |
474 | the Free Software Foundation; version 2. |
16dc9970 |
475 | |
ca10f295 |
476 | This program is distributed in the hope that it will be useful, but |
477 | WITHOUT ANY WARRANTY; without even the implied warranty of |
478 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
479 | General Public License for more details. |
16dc9970 |
480 | |
ca10f295 |
481 | You should have received a copy of the GNU General Public License |
482 | along with this program; if not, write to the Free Software |
483 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
484 | USA |
16dc9970 |
485 | |
486 | =head1 SEE ALSO |
487 | |
ca10f295 |
488 | L<perl>, L<Parse::RecDescent> |
16dc9970 |
489 | |
490 | =cut |