Fix Tester to deal properly with NOT and single parenthesized expressions
[dbsrgits/SQL-Abstract.git] / lib / SQL / Abstract / Test.pm
CommitLineData
fffe6900 1package SQL::Abstract::Test; # see doc at end of file
2
3use strict;
4use warnings;
5aad8cf3 5use base qw/Test::Builder::Module Exporter/;
fffe6900 6use Data::Dumper;
7use Carp;
4abea32b 8use Test::Builder;
9use Test::Deep qw(eq_deeply);
fffe6900 10
e7827ba2 11our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind
12 &eq_sql_bind &eq_sql &eq_bind
fffe6900 13 $case_sensitive $sql_differ/;
14
15our $case_sensitive = 0;
e40f5df9 16our $parenthesis_significant = 0;
fffe6900 17our $sql_differ; # keeps track of differing portion between SQLs
5aad8cf3 18our $tb = __PACKAGE__->builder;
fffe6900 19
25823711 20# Parser states for _recurse_parse()
01b64cb7 21use constant PARSE_TOP_LEVEL => 0;
5221d7fc 22use constant PARSE_IN_EXPR => 1;
23use constant PARSE_IN_PARENS => 2;
01b64cb7 24use constant PARSE_RHS => 3;
25823711 25
26# These SQL keywords always signal end of the current expression (except inside
27# of a parenthesized subexpression).
28# Format: A list of strings that will be compiled to extended syntax (ie.
29# /.../x) regexes, without capturing parentheses. They will be automatically
30# anchored to word boundaries to match the whole token).
31my @expression_terminator_sql_keywords = (
1b17d1b0 32 'SELECT',
25823711 33 'FROM',
34 '(?:
35 (?:
36 (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
37 (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
38 )?
39 JOIN
40 )',
41 'ON',
42 'WHERE',
43 'GROUP \s+ BY',
44 'HAVING',
45 'ORDER \s+ BY',
46 'LIMIT',
47 'OFFSET',
48 'FOR',
49 'UNION',
50 'INTERSECT',
51 'EXCEPT',
52);
53
01b64cb7 54# These are binary operator keywords always a single LHS and RHS
55# * AND/OR are handled separately as they are N-ary
9e8dab3f 56# * so is NOT as being unary
01b64cb7 57# * BETWEEN without paranthesis around the ANDed arguments (which
58# makes it a non-binary op) is detected and accomodated in
59# _recurse_parse()
60my @binary_op_keywords = (
61 (map { "\Q$_\E" } (qw/< > != = <= >=/)),
62 '(?: NOT \s+)? LIKE',
63 '(?: NOT \s+)? BETWEEN',
64);
65
66my $tokenizer_re_str = join("\n\t|\n",
9e8dab3f 67 ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'),
01b64cb7 68 ( map { q! (?<= [\w\s\`\'\)] ) ! . $_ . q! (?= [\w\s\`\'\(] ) ! } @binary_op_keywords ),
69);
70
71my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi;
72
35149895 73# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics
01b64cb7 74my @unrollable_ops = (
1b17d1b0 75 'ON',
76 'WHERE',
77 'GROUP \s+ BY',
78 'HAVING',
79 'ORDER \s+ BY',
80);
81
fffe6900 82sub is_same_sql_bind {
83 my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_;
84
85 # compare
25823711 86 my $same_sql = eq_sql($sql1, $sql2);
fffe6900 87 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
88
a6daa642 89 # call Test::Builder::ok
1a828f61 90 my $ret = $tb->ok($same_sql && $same_bind, $msg);
fffe6900 91
92 # add debugging info
93 if (!$same_sql) {
e7827ba2 94 _sql_differ_diag($sql1, $sql2);
fffe6900 95 }
96 if (!$same_bind) {
e7827ba2 97 _bind_differ_diag($bind_ref1, $bind_ref2);
fffe6900 98 }
1a828f61 99
100 # pass ok() result further
101 return $ret;
fffe6900 102}
103
e7827ba2 104sub is_same_sql {
105 my ($sql1, $sql2, $msg) = @_;
106
107 # compare
108 my $same_sql = eq_sql($sql1, $sql2);
109
110 # call Test::Builder::ok
1a828f61 111 my $ret = $tb->ok($same_sql, $msg);
e7827ba2 112
113 # add debugging info
114 if (!$same_sql) {
115 _sql_differ_diag($sql1, $sql2);
116 }
1a828f61 117
118 # pass ok() result further
119 return $ret;
e7827ba2 120}
121
122sub is_same_bind {
123 my ($bind_ref1, $bind_ref2, $msg) = @_;
124
125 # compare
126 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
127
128 # call Test::Builder::ok
1a828f61 129 my $ret = $tb->ok($same_bind, $msg);
e7827ba2 130
131 # add debugging info
132 if (!$same_bind) {
133 _bind_differ_diag($bind_ref1, $bind_ref2);
134 }
1a828f61 135
136 # pass ok() result further
137 return $ret;
e7827ba2 138}
139
140sub _sql_differ_diag {
141 my ($sql1, $sql2) = @_;
142
143 $tb->diag("SQL expressions differ\n"
144 ." got: $sql1\n"
145 ."expected: $sql2\n"
146 ."differing in :\n$sql_differ\n"
147 );
148}
149
150sub _bind_differ_diag {
151 my ($bind_ref1, $bind_ref2) = @_;
152
153 $tb->diag("BIND values differ\n"
154 ." got: " . Dumper($bind_ref1)
155 ."expected: " . Dumper($bind_ref2)
156 );
157}
158
159sub eq_sql_bind {
160 my ($sql1, $bind_ref1, $sql2, $bind_ref2) = @_;
161
162 return eq_sql($sql1, $sql2) && eq_bind($bind_ref1, $bind_ref2);
163}
164
165
fffe6900 166sub eq_bind {
167 my ($bind_ref1, $bind_ref2) = @_;
fffe6900 168
4abea32b 169 return eq_deeply($bind_ref1, $bind_ref2);
fffe6900 170}
171
172sub eq_sql {
25823711 173 my ($sql1, $sql2) = @_;
174
175 # parse
176 my $tree1 = parse($sql1);
177 my $tree2 = parse($sql2);
178
1b17d1b0 179 return 1 if _eq_sql($tree1, $tree2);
25823711 180}
181
182sub _eq_sql {
fffe6900 183 my ($left, $right) = @_;
184
939db550 185 # one is defined the other not
186 if ( (defined $left) xor (defined $right) ) {
187 return 0;
188 }
189 # one is undefined, then so is the other
190 elsif (not defined $left) {
d15c14cc 191 return 1;
192 }
1b17d1b0 193 # one is a list, the other is an op with a list
194 elsif (ref $left->[0] xor ref $right->[0]) {
195 $sql_differ = sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) );
fffe6900 196 return 0;
197 }
1b17d1b0 198 # one is a list, so is the other
199 elsif (ref $left->[0]) {
200 for (my $i = 0; $i <= $#$left or $i <= $#$right; $i++ ) {
201 return 0 if (not _eq_sql ($left->[$i], $right->[$i]) );
202 }
203 return 1;
204 }
205 # both are an op-list combo
206 else {
207
e40f5df9 208 # unroll parenthesis if possible/allowed
209 _parenthesis_unroll ($_) for ($left, $right);
1b17d1b0 210
211 # if operators are different
212 if ($left->[0] ne $right->[0]) {
213 $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n",
214 unparse($left),
215 unparse($right);
216 return 0;
217 }
218 # elsif operators are identical, compare operands
219 else {
220 if ($left->[0] eq 'EXPR' ) { # unary operator
01b64cb7 221 (my $l = " $left->[1][0] " ) =~ s/\s+/ /g;
222 (my $r = " $right->[1][0] ") =~ s/\s+/ /g;
1b17d1b0 223 my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r);
01b64cb7 224 $sql_differ = "[$l] != [$r]\n" if not $eq;
1b17d1b0 225 return $eq;
226 }
227 else {
228 my $eq = _eq_sql($left->[1], $right->[1]);
229 $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) ) if not $eq;
230 return $eq;
231 }
fffe6900 232 }
233 }
234}
235
fffe6900 236sub parse {
237 my $s = shift;
238
25823711 239 # tokenize string, and remove all optional whitespace
240 my $tokens = [];
241 foreach my $token (split $tokenizer_re, $s) {
242 $token =~ s/\s+/ /g;
243 $token =~ s/\s+([^\w\s])/$1/g;
244 $token =~ s/([^\w\s])\s+/$1/g;
01b64cb7 245 push @$tokens, $token if length $token;
25823711 246 }
fffe6900 247
25823711 248 my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
fffe6900 249 return $tree;
250}
251
252sub _recurse_parse {
25823711 253 my ($tokens, $state) = @_;
fffe6900 254
255 my $left;
256 while (1) { # left-associative parsing
257
258 my $lookahead = $tokens->[0];
1b17d1b0 259 if ( not defined($lookahead)
260 or
261 ($state == PARSE_IN_PARENS && $lookahead eq ')')
262 or
01b64cb7 263 ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) )
264 or
9e8dab3f 265 ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) )
1b17d1b0 266 ) {
1b17d1b0 267 return $left;
1b17d1b0 268 }
fffe6900 269
270 my $token = shift @$tokens;
271
272 # nested expression in ()
273 if ($token eq '(') {
25823711 274 my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
09abf3a0 275 $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right);
276 $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right);
1b17d1b0 277 $left = $left ? [@$left, [PAREN => [$right] ]]
278 : [PAREN => [$right] ];
fffe6900 279 }
01b64cb7 280 # AND/OR
1b17d1b0 281 elsif ($token =~ /^ (?: OR | AND ) $/xi ) {
282 my $op = uc $token;
25823711 283 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 284
285 # Merge chunks if logic matches
286 if (ref $right and $op eq $right->[0]) {
287 $left = [ (shift @$right ), [$left, map { @$_ } @$right] ];
288 }
289 else {
290 $left = [$op => [$left, $right]];
291 }
fffe6900 292 }
01b64cb7 293 # binary operator keywords
294 elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) {
295 my $op = uc $token;
296 my $right = _recurse_parse($tokens, PARSE_RHS);
297
298 # A between with a simple EXPR for a 1st RHS argument needs a
299 # rerun of the search to (hopefully) find the proper AND construct
300 if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') {
301 unshift @$tokens, $right->[1][0];
302 $right = _recurse_parse($tokens, PARSE_IN_EXPR);
303 }
304
305 $left = [$op => [$left, $right] ];
306 }
25823711 307 # expression terminator keywords (as they start a new expression)
1b17d1b0 308 elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) {
01b64cb7 309 my $op = uc $token;
25823711 310 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 311 $left = $left ? [@$left, [$op => [$right] ]]
01b64cb7 312 : [[ $op => [$right] ]];
25823711 313 }
9e8dab3f 314 # NOT (last as to allow all other NOT X pieces first)
315 elsif ( $token =~ /^ not $/ix ) {
316 my $op = uc $token;
317 my $right = _recurse_parse ($tokens, PARSE_RHS);
318 $left = $left ? [ @$left, [$op => [$right] ]]
319 : [[ $op => [$right] ]];
320
321 }
fffe6900 322 # leaf expression
323 else {
01b64cb7 324 $left = $left ? [@$left, [EXPR => [$token] ] ]
325 : [ EXPR => [$token] ];
fffe6900 326 }
327 }
328}
329
e40f5df9 330sub _parenthesis_unroll {
331 my $ast = shift;
332
333 return if $parenthesis_significant;
334 return unless (ref $ast and ref $ast->[1]);
335
336 my $changes;
337 do {
338 my @children;
339 $changes = 0;
340
341 for my $child (@{$ast->[1]}) {
342 if (not ref $child or not $child->[0] eq 'PAREN') {
343 push @children, $child;
344 next;
345 }
346
347 # unroll nested parenthesis
348 while ($child->[1][0][0] eq 'PAREN') {
349 $child = $child->[1][0];
350 $changes++;
351 }
352
353 # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list
354 if (
355 ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR')
356 and
357 $child->[1][0][0] eq $ast->[0]
358 ) {
359 push @children, @{$child->[1][0][1]};
360 $changes++;
361 }
fffe6900 362
e40f5df9 363 # if the parent operator explcitly allows it nuke the parenthesis
364 elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) {
365 push @children, $child->[1][0];
366 $changes++;
367 }
368
9e8dab3f 369 # only one EXPR element in the parenthesis
370 elsif (
371 @{$child->[1]} == 1 && $child->[1][0][0] eq 'EXPR'
372 ) {
373 push @children, $child->[1][0];
374 $changes++;
375 }
376
e40f5df9 377 # only one element in the parenthesis which is a binary op with two EXPR sub-children
378 elsif (
379 @{$child->[1]} == 1
380 and
381 grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords)
382 and
383 $child->[1][0][1][0][0] eq 'EXPR'
384 and
385 $child->[1][0][1][1][0] eq 'EXPR'
386 ) {
387 push @children, $child->[1][0];
388 $changes++;
389 }
390
391 # otherwise no more mucking for this pass
392 else {
393 push @children, $child;
394 }
395 }
396
397 $ast->[1] = \@children;
398
399 } while ($changes);
400
401}
fffe6900 402
403sub unparse {
404 my $tree = shift;
1b17d1b0 405
406 if (not $tree ) {
407 return '';
408 }
409 elsif (ref $tree->[0]) {
410 return join (" ", map { unparse ($_) } @$tree);
411 }
412 elsif ($tree->[0] eq 'EXPR') {
01b64cb7 413 return $tree->[1][0];
1b17d1b0 414 }
415 elsif ($tree->[0] eq 'PAREN') {
01b64cb7 416 return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]});
1b17d1b0 417 }
01b64cb7 418 elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } @binary_op_keywords ) ) {
1b17d1b0 419 return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]});
420 }
421 else {
422 return sprintf '%s %s', $tree->[0], unparse ($tree->[1]);
423 }
fffe6900 424}
425
426
4271;
428
429
430__END__
431
432=head1 NAME
433
434SQL::Abstract::Test - Helper function for testing SQL::Abstract
435
436=head1 SYNOPSIS
437
438 use SQL::Abstract;
439 use Test::More;
e7827ba2 440 use SQL::Abstract::Test import => [qw/
441 is_same_sql_bind is_same_sql is_same_bind
442 eq_sql_bind eq_sql eq_bind
443 /];
ec9af79e 444
fffe6900 445 my ($sql, @bind) = SQL::Abstract->new->select(%args);
e7827ba2 446
fffe6900 447 is_same_sql_bind($given_sql, \@given_bind,
448 $expected_sql, \@expected_bind, $test_msg);
449
e7827ba2 450 is_same_sql($given_sql, $expected_sql, $test_msg);
451 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
452
453 my $is_same = eq_sql_bind($given_sql, \@given_bind,
454 $expected_sql, \@expected_bind);
455
456 my $sql_same = eq_sql($given_sql, $expected_sql);
457 my $bind_same = eq_bind(\@given_bind, \@expected_bind);
458
fffe6900 459=head1 DESCRIPTION
460
461This module is only intended for authors of tests on
462L<SQL::Abstract|SQL::Abstract> and related modules;
463it exports functions for comparing two SQL statements
464and their bound values.
465
466The SQL comparison is performed on I<abstract syntax>,
467ignoring differences in spaces or in levels of parentheses.
468Therefore the tests will pass as long as the semantics
469is preserved, even if the surface syntax has changed.
470
ec9af79e 471B<Disclaimer> : the semantic equivalence handling is pretty limited.
472A lot of effort goes into distinguishing significant from
473non-significant parenthesis, including AND/OR operator associativity.
474Currently this module does not support commutativity and more
475intelligent transformations like Morgan laws, etc.
476
477For a good overview of what this test framework is capable of refer
478to C<t/10test.t>
fffe6900 479
480=head1 FUNCTIONS
481
482=head2 is_same_sql_bind
483
484 is_same_sql_bind($given_sql, \@given_bind,
485 $expected_sql, \@expected_bind, $test_msg);
486
487Compares given and expected pairs of C<($sql, \@bind)>, and calls
e7827ba2 488L<Test::Builder/ok> on the result, with C<$test_msg> as message. If the test
489fails, a detailed diagnostic is printed. For clients which use L<Test::More>,
490this is the one of the three functions (L</is_same_sql_bind>, L</is_same_sql>,
491L</is_same_bind>) that needs to be imported.
492
493=head2 is_same_sql
494
495 is_same_sql($given_sql, $expected_sql, $test_msg);
496
497Compares given and expected SQL statements, and calls L<Test::Builder/ok> on
498the result, with C<$test_msg> as message. If the test fails, a detailed
499diagnostic is printed. For clients which use L<Test::More>, this is the one of
500the three functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>)
501that needs to be imported.
502
503=head2 is_same_bind
504
505 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
506
507Compares given and expected bind values, and calls L<Test::Builder/ok> on the
508result, with C<$test_msg> as message. If the test fails, a detailed diagnostic
509is printed. For clients which use L<Test::More>, this is the one of the three
510functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>) that needs
511to be imported.
512
513=head2 eq_sql_bind
514
515 my $is_same = eq_sql_bind($given_sql, \@given_bind,
516 $expected_sql, \@expected_bind);
517
518Compares given and expected pairs of C<($sql, \@bind)>. Similar to
519L</is_same_sql_bind>, but it just returns a boolean value and does not print
520diagnostics or talk to L<Test::Builder>.
fffe6900 521
522=head2 eq_sql
523
524 my $is_same = eq_sql($given_sql, $expected_sql);
525
e7827ba2 526Compares the abstract syntax of two SQL statements. Similar to L</is_same_sql>,
527but it just returns a boolean value and does not print diagnostics or talk to
528L<Test::Builder>. If the result is false, the global variable L</$sql_differ>
529will contain the SQL portion where a difference was encountered; this is useful
530for printing diagnostics.
fffe6900 531
532=head2 eq_bind
533
534 my $is_same = eq_sql(\@given_bind, \@expected_bind);
535
e7827ba2 536Compares two lists of bind values, taking into account the fact that some of
537the values may be arrayrefs (see L<SQL::Abstract/bindtype>). Similar to
538L</is_same_bind>, but it just returns a boolean value and does not print
539diagnostics or talk to L<Test::Builder>.
fffe6900 540
541=head1 GLOBAL VARIABLES
542
e7827ba2 543=head2 $case_sensitive
fffe6900 544
545If true, SQL comparisons will be case-sensitive. Default is false;
546
e40f5df9 547=head2 $parenthesis_significant
548
549If true, SQL comparison will preserve and report difference in nested
550parenthesis. Useful for testing the C<-nest> modifier. Defaults to false;
551
e7827ba2 552=head2 $sql_differ
fffe6900 553
554When L</eq_sql> returns false, the global variable
555C<$sql_differ> contains the SQL portion
556where a difference was encountered.
557
558
559=head1 SEE ALSO
560
a6daa642 561L<SQL::Abstract>, L<Test::More>, L<Test::Builder>.
fffe6900 562
25823711 563=head1 AUTHORS
fffe6900 564
565Laurent Dami, E<lt>laurent.dami AT etat geneve chE<gt>
566
25823711 567Norbert Buchmuller <norbi@nix.hu>
568
e96c510a 569Peter Rabbitson <ribasushi@cpan.org>
570
fffe6900 571=head1 COPYRIGHT AND LICENSE
572
573Copyright 2008 by Laurent Dami.
574
575This library is free software; you can redistribute it and/or modify
576it under the same terms as Perl itself.