Release 1.61
[scpubgit/Q-Branch.git] / lib / SQL / Abstract / Test.pm
CommitLineData
fffe6900 1package SQL::Abstract::Test; # see doc at end of file
2
3use strict;
4use warnings;
5aad8cf3 5use base qw/Test::Builder::Module Exporter/;
fffe6900 6use Data::Dumper;
7use Carp;
4abea32b 8use Test::Builder;
9use Test::Deep qw(eq_deeply);
fffe6900 10
e7827ba2 11our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind
12 &eq_sql_bind &eq_sql &eq_bind
fffe6900 13 $case_sensitive $sql_differ/;
14
15our $case_sensitive = 0;
e40f5df9 16our $parenthesis_significant = 0;
fffe6900 17our $sql_differ; # keeps track of differing portion between SQLs
5aad8cf3 18our $tb = __PACKAGE__->builder;
fffe6900 19
25823711 20# Parser states for _recurse_parse()
01b64cb7 21use constant PARSE_TOP_LEVEL => 0;
5221d7fc 22use constant PARSE_IN_EXPR => 1;
23use constant PARSE_IN_PARENS => 2;
01b64cb7 24use constant PARSE_RHS => 3;
25823711 25
26# These SQL keywords always signal end of the current expression (except inside
27# of a parenthesized subexpression).
28# Format: A list of strings that will be compiled to extended syntax (ie.
29# /.../x) regexes, without capturing parentheses. They will be automatically
30# anchored to word boundaries to match the whole token).
31my @expression_terminator_sql_keywords = (
1b17d1b0 32 'SELECT',
25823711 33 'FROM',
34 '(?:
35 (?:
36 (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
37 (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
38 )?
39 JOIN
40 )',
41 'ON',
42 'WHERE',
b9a4fdae 43 'EXISTS',
25823711 44 'GROUP \s+ BY',
45 'HAVING',
46 'ORDER \s+ BY',
47 'LIMIT',
48 'OFFSET',
49 'FOR',
50 'UNION',
51 'INTERSECT',
52 'EXCEPT',
02288357 53 'RETURNING',
25823711 54);
55
01b64cb7 56# These are binary operator keywords always a single LHS and RHS
57# * AND/OR are handled separately as they are N-ary
9e8dab3f 58# * so is NOT as being unary
01b64cb7 59# * BETWEEN without paranthesis around the ANDed arguments (which
60# makes it a non-binary op) is detected and accomodated in
61# _recurse_parse()
b9a4fdae 62my $stuff_around_mathops = qr/[\w\s\`\'\"\)]/;
01b64cb7 63my @binary_op_keywords = (
30d09fa9 64 ( map
b9a4fdae 65 {
66 ' ^ ' . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ",
67 " (?<= $stuff_around_mathops)" . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ",
68 }
69 (qw/< > != <> = <= >=/)
30d09fa9 70 ),
71 ( map
72 { '\b (?: NOT \s+)?' . $_ . '\b' }
73 (qw/IN BETWEEN LIKE/)
74 ),
01b64cb7 75);
76
77my $tokenizer_re_str = join("\n\t|\n",
9e8dab3f 78 ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'),
30d09fa9 79 @binary_op_keywords,
01b64cb7 80);
81
b9a4fdae 82my $tokenizer_re = qr/ \s* ( $tokenizer_re_str | \( | \) | \? ) \s* /xi;
01b64cb7 83
35149895 84# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics
01b64cb7 85my @unrollable_ops = (
1b17d1b0 86 'ON',
87 'WHERE',
88 'GROUP \s+ BY',
89 'HAVING',
90 'ORDER \s+ BY',
91);
92
fffe6900 93sub is_same_sql_bind {
94 my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_;
95
96 # compare
25823711 97 my $same_sql = eq_sql($sql1, $sql2);
fffe6900 98 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
99
a6daa642 100 # call Test::Builder::ok
1a828f61 101 my $ret = $tb->ok($same_sql && $same_bind, $msg);
fffe6900 102
103 # add debugging info
104 if (!$same_sql) {
e7827ba2 105 _sql_differ_diag($sql1, $sql2);
fffe6900 106 }
107 if (!$same_bind) {
e7827ba2 108 _bind_differ_diag($bind_ref1, $bind_ref2);
fffe6900 109 }
1a828f61 110
111 # pass ok() result further
112 return $ret;
fffe6900 113}
114
e7827ba2 115sub is_same_sql {
116 my ($sql1, $sql2, $msg) = @_;
117
118 # compare
119 my $same_sql = eq_sql($sql1, $sql2);
120
121 # call Test::Builder::ok
1a828f61 122 my $ret = $tb->ok($same_sql, $msg);
e7827ba2 123
124 # add debugging info
125 if (!$same_sql) {
126 _sql_differ_diag($sql1, $sql2);
127 }
1a828f61 128
129 # pass ok() result further
130 return $ret;
e7827ba2 131}
132
133sub is_same_bind {
134 my ($bind_ref1, $bind_ref2, $msg) = @_;
135
136 # compare
137 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
138
139 # call Test::Builder::ok
1a828f61 140 my $ret = $tb->ok($same_bind, $msg);
e7827ba2 141
142 # add debugging info
143 if (!$same_bind) {
144 _bind_differ_diag($bind_ref1, $bind_ref2);
145 }
1a828f61 146
147 # pass ok() result further
148 return $ret;
e7827ba2 149}
150
151sub _sql_differ_diag {
152 my ($sql1, $sql2) = @_;
153
154 $tb->diag("SQL expressions differ\n"
155 ." got: $sql1\n"
156 ."expected: $sql2\n"
157 ."differing in :\n$sql_differ\n"
158 );
159}
160
161sub _bind_differ_diag {
162 my ($bind_ref1, $bind_ref2) = @_;
163
164 $tb->diag("BIND values differ\n"
165 ." got: " . Dumper($bind_ref1)
166 ."expected: " . Dumper($bind_ref2)
167 );
168}
169
170sub eq_sql_bind {
171 my ($sql1, $bind_ref1, $sql2, $bind_ref2) = @_;
172
173 return eq_sql($sql1, $sql2) && eq_bind($bind_ref1, $bind_ref2);
174}
175
176
fffe6900 177sub eq_bind {
178 my ($bind_ref1, $bind_ref2) = @_;
fffe6900 179
4abea32b 180 return eq_deeply($bind_ref1, $bind_ref2);
fffe6900 181}
182
183sub eq_sql {
25823711 184 my ($sql1, $sql2) = @_;
185
186 # parse
187 my $tree1 = parse($sql1);
188 my $tree2 = parse($sql2);
189
1b17d1b0 190 return 1 if _eq_sql($tree1, $tree2);
25823711 191}
192
193sub _eq_sql {
fffe6900 194 my ($left, $right) = @_;
195
939db550 196 # one is defined the other not
197 if ( (defined $left) xor (defined $right) ) {
198 return 0;
199 }
200 # one is undefined, then so is the other
201 elsif (not defined $left) {
d15c14cc 202 return 1;
203 }
1b17d1b0 204 # one is a list, the other is an op with a list
205 elsif (ref $left->[0] xor ref $right->[0]) {
206 $sql_differ = sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) );
fffe6900 207 return 0;
208 }
1b17d1b0 209 # one is a list, so is the other
210 elsif (ref $left->[0]) {
211 for (my $i = 0; $i <= $#$left or $i <= $#$right; $i++ ) {
212 return 0 if (not _eq_sql ($left->[$i], $right->[$i]) );
213 }
214 return 1;
215 }
216 # both are an op-list combo
217 else {
218
e40f5df9 219 # unroll parenthesis if possible/allowed
220 _parenthesis_unroll ($_) for ($left, $right);
1b17d1b0 221
222 # if operators are different
b9a4fdae 223 if ( $left->[0] ne $right->[0] ) {
1b17d1b0 224 $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n",
225 unparse($left),
226 unparse($right);
227 return 0;
228 }
229 # elsif operators are identical, compare operands
230 else {
b9a4fdae 231 if ($left->[0] eq 'LITERAL' ) { # unary
01b64cb7 232 (my $l = " $left->[1][0] " ) =~ s/\s+/ /g;
233 (my $r = " $right->[1][0] ") =~ s/\s+/ /g;
1b17d1b0 234 my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r);
01b64cb7 235 $sql_differ = "[$l] != [$r]\n" if not $eq;
1b17d1b0 236 return $eq;
237 }
238 else {
239 my $eq = _eq_sql($left->[1], $right->[1]);
240 $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) ) if not $eq;
241 return $eq;
242 }
fffe6900 243 }
244 }
245}
246
fffe6900 247sub parse {
248 my $s = shift;
249
25823711 250 # tokenize string, and remove all optional whitespace
251 my $tokens = [];
252 foreach my $token (split $tokenizer_re, $s) {
b9a4fdae 253 push @$tokens, $token if (length $token) && ($token =~ /\S/);
25823711 254 }
fffe6900 255
25823711 256 my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
fffe6900 257 return $tree;
258}
259
260sub _recurse_parse {
25823711 261 my ($tokens, $state) = @_;
fffe6900 262
263 my $left;
264 while (1) { # left-associative parsing
265
266 my $lookahead = $tokens->[0];
1b17d1b0 267 if ( not defined($lookahead)
268 or
269 ($state == PARSE_IN_PARENS && $lookahead eq ')')
270 or
01b64cb7 271 ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) )
272 or
9e8dab3f 273 ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) )
1b17d1b0 274 ) {
1b17d1b0 275 return $left;
1b17d1b0 276 }
fffe6900 277
278 my $token = shift @$tokens;
279
280 # nested expression in ()
b9a4fdae 281 if ($token eq '(' ) {
25823711 282 my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
09abf3a0 283 $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right);
284 $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right);
1b17d1b0 285 $left = $left ? [@$left, [PAREN => [$right] ]]
286 : [PAREN => [$right] ];
fffe6900 287 }
01b64cb7 288 # AND/OR
1b17d1b0 289 elsif ($token =~ /^ (?: OR | AND ) $/xi ) {
290 my $op = uc $token;
25823711 291 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 292
293 # Merge chunks if logic matches
294 if (ref $right and $op eq $right->[0]) {
295 $left = [ (shift @$right ), [$left, map { @$_ } @$right] ];
296 }
297 else {
298 $left = [$op => [$left, $right]];
299 }
fffe6900 300 }
01b64cb7 301 # binary operator keywords
302 elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) {
303 my $op = uc $token;
304 my $right = _recurse_parse($tokens, PARSE_RHS);
305
b9a4fdae 306 # A between with a simple LITERAL for a 1st RHS argument needs a
01b64cb7 307 # rerun of the search to (hopefully) find the proper AND construct
b9a4fdae 308 if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') {
01b64cb7 309 unshift @$tokens, $right->[1][0];
310 $right = _recurse_parse($tokens, PARSE_IN_EXPR);
311 }
312
313 $left = [$op => [$left, $right] ];
314 }
25823711 315 # expression terminator keywords (as they start a new expression)
1b17d1b0 316 elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) {
01b64cb7 317 my $op = uc $token;
25823711 318 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 319 $left = $left ? [@$left, [$op => [$right] ]]
01b64cb7 320 : [[ $op => [$right] ]];
25823711 321 }
9e8dab3f 322 # NOT (last as to allow all other NOT X pieces first)
323 elsif ( $token =~ /^ not $/ix ) {
324 my $op = uc $token;
325 my $right = _recurse_parse ($tokens, PARSE_RHS);
326 $left = $left ? [ @$left, [$op => [$right] ]]
327 : [[ $op => [$right] ]];
328
329 }
b9a4fdae 330 # literal (eat everything on the right until RHS termination)
fffe6900 331 else {
b9a4fdae 332 my $right = _recurse_parse ($tokens, PARSE_RHS);
333 $left = $left ? [$left, [LITERAL => [join ' ', $token, unparse($right)||()] ] ]
334 : [ LITERAL => [join ' ', $token, unparse($right)||()] ];
fffe6900 335 }
336 }
337}
338
e40f5df9 339sub _parenthesis_unroll {
340 my $ast = shift;
341
342 return if $parenthesis_significant;
343 return unless (ref $ast and ref $ast->[1]);
344
345 my $changes;
346 do {
347 my @children;
348 $changes = 0;
349
350 for my $child (@{$ast->[1]}) {
351 if (not ref $child or not $child->[0] eq 'PAREN') {
352 push @children, $child;
353 next;
354 }
355
356 # unroll nested parenthesis
357 while ($child->[1][0][0] eq 'PAREN') {
358 $child = $child->[1][0];
359 $changes++;
360 }
361
362 # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list
363 if (
364 ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR')
365 and
366 $child->[1][0][0] eq $ast->[0]
367 ) {
368 push @children, @{$child->[1][0][1]};
369 $changes++;
370 }
fffe6900 371
e40f5df9 372 # if the parent operator explcitly allows it nuke the parenthesis
373 elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) {
374 push @children, $child->[1][0];
375 $changes++;
376 }
377
b9a4fdae 378 # only one LITERAL element in the parenthesis
9e8dab3f 379 elsif (
b9a4fdae 380 @{$child->[1]} == 1 && $child->[1][0][0] eq 'LITERAL'
9e8dab3f 381 ) {
382 push @children, $child->[1][0];
383 $changes++;
384 }
385
b9a4fdae 386 # only one element in the parenthesis which is a binary op with two LITERAL sub-children
e40f5df9 387 elsif (
388 @{$child->[1]} == 1
389 and
390 grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords)
391 and
b9a4fdae 392 $child->[1][0][1][0][0] eq 'LITERAL'
e40f5df9 393 and
b9a4fdae 394 $child->[1][0][1][1][0] eq 'LITERAL'
e40f5df9 395 ) {
396 push @children, $child->[1][0];
397 $changes++;
398 }
399
400 # otherwise no more mucking for this pass
401 else {
402 push @children, $child;
403 }
404 }
405
406 $ast->[1] = \@children;
407
408 } while ($changes);
409
410}
fffe6900 411
412sub unparse {
413 my $tree = shift;
1b17d1b0 414
415 if (not $tree ) {
416 return '';
417 }
418 elsif (ref $tree->[0]) {
419 return join (" ", map { unparse ($_) } @$tree);
420 }
b9a4fdae 421 elsif ($tree->[0] eq 'LITERAL') {
01b64cb7 422 return $tree->[1][0];
1b17d1b0 423 }
424 elsif ($tree->[0] eq 'PAREN') {
01b64cb7 425 return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]});
1b17d1b0 426 }
01b64cb7 427 elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } @binary_op_keywords ) ) {
1b17d1b0 428 return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]});
429 }
430 else {
431 return sprintf '%s %s', $tree->[0], unparse ($tree->[1]);
432 }
fffe6900 433}
434
435
4361;
437
438
439__END__
440
441=head1 NAME
442
443SQL::Abstract::Test - Helper function for testing SQL::Abstract
444
445=head1 SYNOPSIS
446
447 use SQL::Abstract;
448 use Test::More;
e7827ba2 449 use SQL::Abstract::Test import => [qw/
450 is_same_sql_bind is_same_sql is_same_bind
451 eq_sql_bind eq_sql eq_bind
452 /];
ec9af79e 453
fffe6900 454 my ($sql, @bind) = SQL::Abstract->new->select(%args);
e7827ba2 455
fffe6900 456 is_same_sql_bind($given_sql, \@given_bind,
457 $expected_sql, \@expected_bind, $test_msg);
458
e7827ba2 459 is_same_sql($given_sql, $expected_sql, $test_msg);
460 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
461
462 my $is_same = eq_sql_bind($given_sql, \@given_bind,
463 $expected_sql, \@expected_bind);
464
465 my $sql_same = eq_sql($given_sql, $expected_sql);
466 my $bind_same = eq_bind(\@given_bind, \@expected_bind);
467
fffe6900 468=head1 DESCRIPTION
469
470This module is only intended for authors of tests on
471L<SQL::Abstract|SQL::Abstract> and related modules;
472it exports functions for comparing two SQL statements
473and their bound values.
474
475The SQL comparison is performed on I<abstract syntax>,
476ignoring differences in spaces or in levels of parentheses.
477Therefore the tests will pass as long as the semantics
478is preserved, even if the surface syntax has changed.
479
ec9af79e 480B<Disclaimer> : the semantic equivalence handling is pretty limited.
481A lot of effort goes into distinguishing significant from
482non-significant parenthesis, including AND/OR operator associativity.
483Currently this module does not support commutativity and more
484intelligent transformations like Morgan laws, etc.
485
486For a good overview of what this test framework is capable of refer
487to C<t/10test.t>
fffe6900 488
489=head1 FUNCTIONS
490
491=head2 is_same_sql_bind
492
493 is_same_sql_bind($given_sql, \@given_bind,
494 $expected_sql, \@expected_bind, $test_msg);
495
496Compares given and expected pairs of C<($sql, \@bind)>, and calls
e7827ba2 497L<Test::Builder/ok> on the result, with C<$test_msg> as message. If the test
498fails, a detailed diagnostic is printed. For clients which use L<Test::More>,
499this is the one of the three functions (L</is_same_sql_bind>, L</is_same_sql>,
500L</is_same_bind>) that needs to be imported.
501
502=head2 is_same_sql
503
504 is_same_sql($given_sql, $expected_sql, $test_msg);
505
506Compares given and expected SQL statements, and calls L<Test::Builder/ok> on
507the result, with C<$test_msg> as message. If the test fails, a detailed
508diagnostic is printed. For clients which use L<Test::More>, this is the one of
509the three functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>)
510that needs to be imported.
511
512=head2 is_same_bind
513
514 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
515
516Compares given and expected bind values, and calls L<Test::Builder/ok> on the
517result, with C<$test_msg> as message. If the test fails, a detailed diagnostic
518is printed. For clients which use L<Test::More>, this is the one of the three
519functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>) that needs
520to be imported.
521
522=head2 eq_sql_bind
523
524 my $is_same = eq_sql_bind($given_sql, \@given_bind,
525 $expected_sql, \@expected_bind);
526
527Compares given and expected pairs of C<($sql, \@bind)>. Similar to
528L</is_same_sql_bind>, but it just returns a boolean value and does not print
529diagnostics or talk to L<Test::Builder>.
fffe6900 530
531=head2 eq_sql
532
533 my $is_same = eq_sql($given_sql, $expected_sql);
534
e7827ba2 535Compares the abstract syntax of two SQL statements. Similar to L</is_same_sql>,
536but it just returns a boolean value and does not print diagnostics or talk to
537L<Test::Builder>. If the result is false, the global variable L</$sql_differ>
538will contain the SQL portion where a difference was encountered; this is useful
539for printing diagnostics.
fffe6900 540
541=head2 eq_bind
542
543 my $is_same = eq_sql(\@given_bind, \@expected_bind);
544
e7827ba2 545Compares two lists of bind values, taking into account the fact that some of
546the values may be arrayrefs (see L<SQL::Abstract/bindtype>). Similar to
547L</is_same_bind>, but it just returns a boolean value and does not print
548diagnostics or talk to L<Test::Builder>.
fffe6900 549
550=head1 GLOBAL VARIABLES
551
e7827ba2 552=head2 $case_sensitive
fffe6900 553
554If true, SQL comparisons will be case-sensitive. Default is false;
555
e40f5df9 556=head2 $parenthesis_significant
557
558If true, SQL comparison will preserve and report difference in nested
559parenthesis. Useful for testing the C<-nest> modifier. Defaults to false;
560
e7827ba2 561=head2 $sql_differ
fffe6900 562
563When L</eq_sql> returns false, the global variable
564C<$sql_differ> contains the SQL portion
565where a difference was encountered.
566
567
568=head1 SEE ALSO
569
a6daa642 570L<SQL::Abstract>, L<Test::More>, L<Test::Builder>.
fffe6900 571
25823711 572=head1 AUTHORS
fffe6900 573
574Laurent Dami, E<lt>laurent.dami AT etat geneve chE<gt>
575
25823711 576Norbert Buchmuller <norbi@nix.hu>
577
e96c510a 578Peter Rabbitson <ribasushi@cpan.org>
579
fffe6900 580=head1 COPYRIGHT AND LICENSE
581
582Copyright 2008 by Laurent Dami.
583
584This library is free software; you can redistribute it and/or modify
585it under the same terms as Perl itself.