lib/SQL/Translator/Utils.pm

   1 package SQL::Translator::Utils;
   2
   3 use strict;
   4 use warnings;
   5 use Digest::SHA qw( sha1_hex );
   6 use File::Spec;
   7 use Class::Unload;
   8
   9 our $VERSION = '1.59';
  10 our $DEFAULT_COMMENT = '-- ';
  11
  12 use base qw(Exporter);
  13 our @EXPORT_OK = qw(
  14     debug normalize_name header_comment parse_list_arg truncate_id_uniquely
  15     $DEFAULT_COMMENT parse_mysql_version parse_dbms_version
  16     ddl_parser_instance
  17 );
  18 use constant COLLISION_TAG_LENGTH => 8;
  19
  20 sub debug {
  21     my ($pkg, $file, $line, $sub) = caller(0);
  22     {
  23         no strict qw(refs);
  24         return unless ${"$pkg\::DEBUG"};
  25     }
  26
  27     $sub =~ s/^$pkg\:://;
  28
  29     while (@_) {
  30         my $x = shift;
  31         chomp $x;
  32         $x =~ s/\bPKG\b/$pkg/g;
  33         $x =~ s/\bLINE\b/$line/g;
  34         $x =~ s/\bSUB\b/$sub/g;
  35         #warn '[' . $x . "]\n";
  36         print STDERR '[' . $x . "]\n";
  37     }
  38 }
  39
  40 sub normalize_name {
  41     my $name = shift or return '';
  42
  43     # The name can only begin with a-zA-Z_; if there's anything
  44     # else, prefix with _
  45     $name =~ s/^([^a-zA-Z_])/_$1/;
  46
  47     # anything other than a-zA-Z0-9_ in the non-first position
  48     # needs to be turned into _
  49     $name =~ tr/[a-zA-Z0-9_]/_/c;
  50
  51     # All duplicated _ need to be squashed into one.
  52     $name =~ tr/_/_/s;
  53
  54     # Trim a trailing _
  55     $name =~ s/_$//;
  56
  57     return $name;
  58 }
  59
  60 sub header_comment {
  61     my $producer = shift || caller;
  62     my $comment_char = shift;
  63     my $now = scalar localtime;
  64
  65     $comment_char = $DEFAULT_COMMENT
  66         unless defined $comment_char;
  67
  68     my $header_comment =<<"HEADER_COMMENT";
  69 ${comment_char}
  70 ${comment_char}Created by $producer
  71 ${comment_char}Created on $now
  72 ${comment_char}
  73 HEADER_COMMENT
  74
  75     # Any additional stuff passed in
  76     for my $additional_comment (@_) {
  77         $header_comment .= "${comment_char}${additional_comment}\n";
  78     }
  79
  80     return $header_comment;
  81 }
  82
  83 sub parse_list_arg {
  84     my $list = UNIVERSAL::isa( $_[0], 'ARRAY' ) ? shift : [ @_ ];
  85
  86     #
  87     # This protects stringification of references.
  88     #
  89     if ( @$list && ref $list->[0] ) {
  90         return $list;
  91     }
  92     #
  93     # This processes string-like arguments.
  94     #
  95     else {
  96         return [
  97             map { s/^\s+|\s+$//g; $_ }
  98             map { split /,/ }
  99             grep { defined && length } @$list
 100         ];
 101     }
 102 }
 103
 104 sub truncate_id_uniquely {
 105     my ( $desired_name, $max_symbol_length ) = @_;
 106
 107     return $desired_name
 108       unless defined $desired_name && length $desired_name > $max_symbol_length;
 109
 110     my $truncated_name = substr $desired_name, 0,
 111       $max_symbol_length - COLLISION_TAG_LENGTH - 1;
 112
 113     # Hex isn't the most space-efficient, but it skirts around allowed
 114     # charset issues
 115     my $digest = sha1_hex($desired_name);
 116     my $collision_tag = substr $digest, 0, COLLISION_TAG_LENGTH;
 117
 118     return $truncated_name
 119          . '_'
 120          . $collision_tag;
 121 }
 122
 123
 124 sub parse_mysql_version {
 125     my ($v, $target) = @_;
 126
 127     return undef unless $v;
 128
 129     $target ||= 'perl';
 130
 131     my @vers;
 132
 133     # X.Y.Z style
 134     if ( $v =~ / ^ (\d+) \. (\d{1,3}) (?: \. (\d{1,3}) )? $ /x ) {
 135         push @vers, $1, $2, $3;
 136     }
 137
 138     # XYYZZ (mysql) style
 139     elsif ( $v =~ / ^ (\d) (\d{2}) (\d{2}) $ /x ) {
 140         push @vers, $1, $2, $3;
 141     }
 142
 143     # XX.YYYZZZ (perl) style or simply X
 144     elsif ( $v =~ / ^ (\d+) (?: \. (\d{3}) (\d{3}) )? $ /x ) {
 145         push @vers, $1, $2, $3;
 146     }
 147     else {
 148         #how do I croak sanely here?
 149         die "Unparseable MySQL version '$v'";
 150     }
 151
 152     if ($target eq 'perl') {
 153         return sprintf ('%d.%03d%03d', map { $_ || 0 } (@vers) );
 154     }
 155     elsif ($target eq 'mysql') {
 156         return sprintf ('%d%02d%02d', map { $_ || 0 } (@vers) );
 157     }
 158     else {
 159         #how do I croak sanely here?
 160         die "Unknown version target '$target'";
 161     }
 162 }
 163
 164 sub parse_dbms_version {
 165     my ($v, $target) = @_;
 166
 167     return undef unless $v;
 168
 169     my @vers;
 170
 171     # X.Y.Z style
 172     if ( $v =~ / ^ (\d+) \. (\d{1,3}) (?: \. (\d{1,3}) )? $ /x ) {
 173         push @vers, $1, $2, $3;
 174     }
 175
 176     # XX.YYYZZZ (perl) style or simply X
 177     elsif ( $v =~ / ^ (\d+) (?: \. (\d{3}) (\d{3}) )? $ /x ) {
 178         push @vers, $1, $2, $3;
 179     }
 180     else {
 181         #how do I croak sanely here?
 182         die "Unparseable database server version '$v'";
 183     }
 184
 185     if ($target eq 'perl') {
 186         return sprintf ('%d.%03d%03d', map { $_ || 0 } (@vers) );
 187     }
 188     elsif ($target eq 'native') {
 189         return join '.' => grep defined, @vers;
 190     }
 191     else {
 192         #how do I croak sanely here?
 193         die "Unknown version target '$target'";
 194     }
 195 }
 196
 197 my ($parsers_libdir, $checkout_dir);
 198 sub ddl_parser_instance {
 199     my $type = shift;
 200
 201     # it may differ from our caller, even though currently this is not the case
 202     eval "require SQL::Translator::Parser::$type"
 203         or die "Unable to load grammar-spec container SQL::Translator::Parser::$type:\n$@";
 204
 205     unless ($parsers_libdir) {
 206
 207         # are we in a checkout?
 208         if ($checkout_dir = _find_co_root()) {
 209             $parsers_libdir = File::Spec->catdir($checkout_dir, 'share', 'PrecompiledParsers');
 210         }
 211         else {
 212             require File::ShareDir;
 213             $parsers_libdir = File::Spec->catdir(
 214               File::ShareDir::dist_dir('SQL-Translator'),
 215               'PrecompiledParsers'
 216             );
 217         }
 218
 219         unshift @INC, $parsers_libdir;
 220     }
 221
 222     my $precompiled_mod = "Parse::RecDescent::DDL::SQLT::$type";
 223
 224     # FIXME FIXME FIXME
 225     # Parse::RecDescent has horrible architecture where each precompiled parser
 226     # instance shares global state with all its siblings
 227     # What we do here is gross, but scarily efficient - the parser compilation
 228     # is much much slower than an unload/reload cycle
 229     Class::Unload->unload($precompiled_mod);
 230
 231     # There is also a sub-namespace that P::RD uses, but simply unsetting
 232     # $^W to stop redefine warnings seems to be enough
 233     #Class::Unload->unload("Parse::RecDescent::$precompiled_mod");
 234
 235     eval "local \$^W; require $precompiled_mod" or do {
 236         if ($checkout_dir) {
 237             die "Unable to find precompiled grammar for $type - run Makefile.PL to generate it\n";
 238         }
 239         else {
 240             die "Unable to load precompiled grammar for $type... this is not supposed to happen if you are not in a checkout, please file a bugreport:\n$@"
 241         }
 242     };
 243
 244     my $grammar_spec_fn = $INC{"SQL/Translator/Parser/$type.pm"};
 245     my $precompiled_fn = $INC{"Parse/RecDescent/DDL/SQLT/$type.pm"};
 246
 247     if (
 248         (stat($grammar_spec_fn))[9]
 249             >
 250         (stat($precompiled_fn))[9]
 251     ) {
 252         die (
 253             "Grammar spec '$grammar_spec_fn' is newer than precompiled parser '$precompiled_fn'"
 254           . ($checkout_dir
 255                 ? " - run Makefile.PL to regenerate stale versions\n"
 256                 : "... this is not supposed to happen if you are not in a checkout, please file a bugreport\n"
 257             )
 258         );
 259     }
 260
 261     return $precompiled_mod->new;
 262 }
 263
 264 # Try to determine the root of a checkout/untar if possible
 265 # or return undef
 266 sub _find_co_root {
 267
 268     my @mod_parts = split /::/, (__PACKAGE__ . '.pm');
 269     my $rel_path = join ('/', @mod_parts);  # %INC stores paths with / regardless of OS
 270
 271     return undef unless ($INC{$rel_path});
 272
 273     # a bit convoluted, but what we do here essentially is:
 274     #  - get the file name of this particular module
 275     #  - do 'cd ..' as many times as necessary to get to lib/SQL/Translator/../../..
 276
 277     my $root = (File::Spec::Unix->splitpath($INC{$rel_path}))[1];
 278     for (1 .. @mod_parts) {
 279         $root = File::Spec->catdir($root, File::Spec->updir);
 280     }
 281
 282     return ( -f File::Spec->catfile($root, 'Makefile.PL') )
 283         ? $root
 284         : undef
 285     ;
 286 }
 287
 288 1;
 289
 290 =pod
 291
 292 =head1 NAME
 293
 294 SQL::Translator::Utils - SQL::Translator Utility functions
 295
 296 =head1 SYNOPSIS
 297
 298   use SQL::Translator::Utils qw(debug);
 299   debug("PKG: Bad things happened");
 300
 301 =head1 DESCSIPTION
 302
 303 C<SQL::Translator::Utils> contains utility functions designed to be
 304 used from the other modules within the C<SQL::Translator> modules.
 305
 306 Nothing is exported by default.
 307
 308 =head1 EXPORTED FUNCTIONS AND CONSTANTS
 309
 310 =head2 debug
 311
 312 C<debug> takes 0 or more messages, which will be sent to STDERR using
 313 C<warn>.  Occurances of the strings I<PKG>, I<SUB>, and I<LINE>
 314 will be replaced by the calling package, subroutine, and line number,
 315 respectively, as reported by C<caller(1)>.
 316
 317 For example, from within C<foo> in F<SQL/Translator.pm>, at line 666:
 318
 319   debug("PKG: Error reading file at SUB/LINE");
 320
 321 Will warn
 322
 323   [SQL::Translator: Error reading file at foo/666]
 324
 325 The entire message is enclosed within C<[> and C<]> for visual clarity
 326 when STDERR is intermixed with STDOUT.
 327
 328 =head2 normalize_name
 329
 330 C<normalize_name> takes a string and ensures that it is suitable for
 331 use as an identifier.  This means: ensure that it starts with a letter
 332 or underscore, and that the rest of the string consists of only
 333 letters, numbers, and underscores.  A string that begins with
 334 something other than [a-zA-Z] will be prefixer with an underscore, and
 335 all other characters in the string will be replaced with underscores.
 336 Finally, a trailing underscore will be removed, because that's ugly.
 337
 338   normalize_name("Hello, world");
 339
 340 Produces:
 341
 342   Hello_world
 343
 344 A more useful example, from the C<SQL::Translator::Parser::Excel> test
 345 suite:
 346
 347   normalize_name("silly field (with random characters)");
 348
 349 returns:
 350
 351   silly_field_with_random_characters
 352
 353 =head2 header_comment
 354
 355 Create the header comment.  Takes 1 mandatory argument (the producer
 356 classname), an optional comment character (defaults to $DEFAULT_COMMENT),
 357 and 0 or more additional comments, which will be appended to the header,
 358 prefixed with the comment character.  If additional comments are provided,
 359 then a comment string must be provided ($DEFAULT_COMMENT is exported for
 360 this use).  For example, this:
 361
 362   package My::Producer;
 363
 364   use SQL::Translator::Utils qw(header_comment $DEFAULT_COMMENT);
 365
 366   print header_comment(__PACKAGE__,
 367                        $DEFAULT_COMMENT,
 368                        "Hi mom!");
 369
 370 produces:
 371
 372   --
 373   -- Created by My::Prodcuer
 374   -- Created on Fri Apr 25 06:56:02 2003
 375   --
 376   -- Hi mom!
 377   --
 378
 379 Note the gratuitous spacing.
 380
 381 =head2 parse_list_arg
 382
 383 Takes a string, list or arrayref (all of which could contain
 384 comma-separated values) and returns an array reference of the values.
 385 All of the following will return equivalent values:
 386
 387   parse_list_arg('id');
 388   parse_list_arg('id', 'name');
 389   parse_list_arg( 'id, name' );
 390   parse_list_arg( [ 'id', 'name' ] );
 391   parse_list_arg( qw[ id name ] );
 392
 393 =head2 truncate_id_uniquely
 394
 395 Takes a string ($desired_name) and int ($max_symbol_length). Truncates
 396 $desired_name to $max_symbol_length by including part of the hash of
 397 the full name at the end of the truncated name, giving a high
 398 probability that the symbol will be unique. For example,
 399
 400   truncate_id_uniquely( 'a' x 100, 64 )
 401   truncate_id_uniquely( 'a' x 99 . 'b', 64 );
 402   truncate_id_uniquely( 'a' x 99,  64 )
 403
 404 Will give three different results; specifically:
 405
 406   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_7f900025
 407   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_6191e39a
 408   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_8cd96af2
 409
 410 =head2 $DEFAULT_COMMENT
 411
 412 This is the default comment string, '-- ' by default.  Useful for
 413 C<header_comment>.
 414
 415 =head2 parse_mysql_version
 416
 417 Used by both L<Parser::MySQL|SQL::Translator::Parser::MySQL> and
 418 L<Producer::MySQL|SQL::Translator::Producer::MySQL> in order to provide a
 419 consistent format for both C<< parser_args->{mysql_parser_version} >> and
 420 C<< producer_args->{mysql_version} >> respectively. Takes any of the following
 421 version specifications:
 422
 423   5.0.3
 424   4.1
 425   3.23.2
 426   5
 427   5.001005  (perl style)
 428   30201     (mysql style)
 429
 430 =head2 parse_dbms_version
 431
 432 Takes a version string (X.Y.Z) or perl style (XX.YYYZZZ) and a target ('perl'
 433 or 'native') transforms the string to the given target style.
 434 to
 435
 436 =head1 AUTHORS
 437
 438 Darren Chamberlain E<lt>darren@cpan.orgE<gt>,
 439 Ken Y. Clark E<lt>kclark@cpan.orgE<gt>.
 440
 441 =cut