Enhance/detabify
[dbsrgits/SQL-Abstract.git] / lib / SQL / Abstract / Test.pm
CommitLineData
fffe6900 1package SQL::Abstract::Test; # see doc at end of file
2
3use strict;
4use warnings;
5aad8cf3 5use base qw/Test::Builder::Module Exporter/;
fffe6900 6use Data::Dumper;
7use Carp;
4abea32b 8use Test::Builder;
9use Test::Deep qw(eq_deeply);
fffe6900 10
e7827ba2 11our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind
12 &eq_sql_bind &eq_sql &eq_bind
fffe6900 13 $case_sensitive $sql_differ/;
14
15our $case_sensitive = 0;
e40f5df9 16our $parenthesis_significant = 0;
fffe6900 17our $sql_differ; # keeps track of differing portion between SQLs
5aad8cf3 18our $tb = __PACKAGE__->builder;
fffe6900 19
25823711 20# Parser states for _recurse_parse()
01b64cb7 21use constant PARSE_TOP_LEVEL => 0;
5221d7fc 22use constant PARSE_IN_EXPR => 1;
23use constant PARSE_IN_PARENS => 2;
01b64cb7 24use constant PARSE_RHS => 3;
25823711 25
26# These SQL keywords always signal end of the current expression (except inside
27# of a parenthesized subexpression).
28# Format: A list of strings that will be compiled to extended syntax (ie.
29# /.../x) regexes, without capturing parentheses. They will be automatically
30# anchored to word boundaries to match the whole token).
31my @expression_terminator_sql_keywords = (
1b17d1b0 32 'SELECT',
25823711 33 'FROM',
34 '(?:
35 (?:
36 (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
37 (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
38 )?
39 JOIN
40 )',
41 'ON',
42 'WHERE',
43 'GROUP \s+ BY',
44 'HAVING',
45 'ORDER \s+ BY',
46 'LIMIT',
47 'OFFSET',
48 'FOR',
49 'UNION',
50 'INTERSECT',
51 'EXCEPT',
02288357 52 'RETURNING',
25823711 53);
54
01b64cb7 55# These are binary operator keywords always a single LHS and RHS
56# * AND/OR are handled separately as they are N-ary
9e8dab3f 57# * so is NOT as being unary
01b64cb7 58# * BETWEEN without paranthesis around the ANDed arguments (which
59# makes it a non-binary op) is detected and accomodated in
60# _recurse_parse()
30d09fa9 61my $stuff_around_mathops = qr/[\w\s\`\'\)]/;
01b64cb7 62my @binary_op_keywords = (
30d09fa9 63 ( map
64 { " (?<= $stuff_around_mathops) " . quotemeta $_ . "(?= $stuff_around_mathops )" }
65 (qw/< > != = <= >=/)
66 ),
67 ( map
68 { '\b (?: NOT \s+)?' . $_ . '\b' }
69 (qw/IN BETWEEN LIKE/)
70 ),
01b64cb7 71);
72
73my $tokenizer_re_str = join("\n\t|\n",
9e8dab3f 74 ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'),
30d09fa9 75 @binary_op_keywords,
01b64cb7 76);
77
78my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi;
79
35149895 80# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics
01b64cb7 81my @unrollable_ops = (
1b17d1b0 82 'ON',
83 'WHERE',
84 'GROUP \s+ BY',
85 'HAVING',
86 'ORDER \s+ BY',
87);
88
fffe6900 89sub is_same_sql_bind {
90 my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_;
91
92 # compare
25823711 93 my $same_sql = eq_sql($sql1, $sql2);
fffe6900 94 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
95
a6daa642 96 # call Test::Builder::ok
1a828f61 97 my $ret = $tb->ok($same_sql && $same_bind, $msg);
fffe6900 98
99 # add debugging info
100 if (!$same_sql) {
e7827ba2 101 _sql_differ_diag($sql1, $sql2);
fffe6900 102 }
103 if (!$same_bind) {
e7827ba2 104 _bind_differ_diag($bind_ref1, $bind_ref2);
fffe6900 105 }
1a828f61 106
107 # pass ok() result further
108 return $ret;
fffe6900 109}
110
e7827ba2 111sub is_same_sql {
112 my ($sql1, $sql2, $msg) = @_;
113
114 # compare
115 my $same_sql = eq_sql($sql1, $sql2);
116
117 # call Test::Builder::ok
1a828f61 118 my $ret = $tb->ok($same_sql, $msg);
e7827ba2 119
120 # add debugging info
121 if (!$same_sql) {
122 _sql_differ_diag($sql1, $sql2);
123 }
1a828f61 124
125 # pass ok() result further
126 return $ret;
e7827ba2 127}
128
129sub is_same_bind {
130 my ($bind_ref1, $bind_ref2, $msg) = @_;
131
132 # compare
133 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
134
135 # call Test::Builder::ok
1a828f61 136 my $ret = $tb->ok($same_bind, $msg);
e7827ba2 137
138 # add debugging info
139 if (!$same_bind) {
140 _bind_differ_diag($bind_ref1, $bind_ref2);
141 }
1a828f61 142
143 # pass ok() result further
144 return $ret;
e7827ba2 145}
146
147sub _sql_differ_diag {
148 my ($sql1, $sql2) = @_;
149
150 $tb->diag("SQL expressions differ\n"
151 ." got: $sql1\n"
152 ."expected: $sql2\n"
153 ."differing in :\n$sql_differ\n"
154 );
155}
156
157sub _bind_differ_diag {
158 my ($bind_ref1, $bind_ref2) = @_;
159
160 $tb->diag("BIND values differ\n"
161 ." got: " . Dumper($bind_ref1)
162 ."expected: " . Dumper($bind_ref2)
163 );
164}
165
166sub eq_sql_bind {
167 my ($sql1, $bind_ref1, $sql2, $bind_ref2) = @_;
168
169 return eq_sql($sql1, $sql2) && eq_bind($bind_ref1, $bind_ref2);
170}
171
172
fffe6900 173sub eq_bind {
174 my ($bind_ref1, $bind_ref2) = @_;
fffe6900 175
4abea32b 176 return eq_deeply($bind_ref1, $bind_ref2);
fffe6900 177}
178
179sub eq_sql {
25823711 180 my ($sql1, $sql2) = @_;
181
182 # parse
183 my $tree1 = parse($sql1);
184 my $tree2 = parse($sql2);
185
1b17d1b0 186 return 1 if _eq_sql($tree1, $tree2);
25823711 187}
188
189sub _eq_sql {
fffe6900 190 my ($left, $right) = @_;
191
939db550 192 # one is defined the other not
193 if ( (defined $left) xor (defined $right) ) {
194 return 0;
195 }
196 # one is undefined, then so is the other
197 elsif (not defined $left) {
d15c14cc 198 return 1;
199 }
1b17d1b0 200 # one is a list, the other is an op with a list
201 elsif (ref $left->[0] xor ref $right->[0]) {
202 $sql_differ = sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) );
fffe6900 203 return 0;
204 }
1b17d1b0 205 # one is a list, so is the other
206 elsif (ref $left->[0]) {
207 for (my $i = 0; $i <= $#$left or $i <= $#$right; $i++ ) {
208 return 0 if (not _eq_sql ($left->[$i], $right->[$i]) );
209 }
210 return 1;
211 }
212 # both are an op-list combo
213 else {
214
e40f5df9 215 # unroll parenthesis if possible/allowed
216 _parenthesis_unroll ($_) for ($left, $right);
1b17d1b0 217
218 # if operators are different
219 if ($left->[0] ne $right->[0]) {
220 $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n",
221 unparse($left),
222 unparse($right);
223 return 0;
224 }
225 # elsif operators are identical, compare operands
226 else {
227 if ($left->[0] eq 'EXPR' ) { # unary operator
01b64cb7 228 (my $l = " $left->[1][0] " ) =~ s/\s+/ /g;
229 (my $r = " $right->[1][0] ") =~ s/\s+/ /g;
1b17d1b0 230 my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r);
01b64cb7 231 $sql_differ = "[$l] != [$r]\n" if not $eq;
1b17d1b0 232 return $eq;
233 }
234 else {
235 my $eq = _eq_sql($left->[1], $right->[1]);
236 $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) ) if not $eq;
237 return $eq;
238 }
fffe6900 239 }
240 }
241}
242
fffe6900 243sub parse {
244 my $s = shift;
245
25823711 246 # tokenize string, and remove all optional whitespace
247 my $tokens = [];
248 foreach my $token (split $tokenizer_re, $s) {
249 $token =~ s/\s+/ /g;
250 $token =~ s/\s+([^\w\s])/$1/g;
251 $token =~ s/([^\w\s])\s+/$1/g;
01b64cb7 252 push @$tokens, $token if length $token;
25823711 253 }
fffe6900 254
25823711 255 my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
fffe6900 256 return $tree;
257}
258
259sub _recurse_parse {
25823711 260 my ($tokens, $state) = @_;
fffe6900 261
262 my $left;
263 while (1) { # left-associative parsing
264
265 my $lookahead = $tokens->[0];
1b17d1b0 266 if ( not defined($lookahead)
267 or
268 ($state == PARSE_IN_PARENS && $lookahead eq ')')
269 or
01b64cb7 270 ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) )
271 or
9e8dab3f 272 ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) )
1b17d1b0 273 ) {
1b17d1b0 274 return $left;
1b17d1b0 275 }
fffe6900 276
277 my $token = shift @$tokens;
278
279 # nested expression in ()
280 if ($token eq '(') {
25823711 281 my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
09abf3a0 282 $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right);
283 $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right);
1b17d1b0 284 $left = $left ? [@$left, [PAREN => [$right] ]]
285 : [PAREN => [$right] ];
fffe6900 286 }
01b64cb7 287 # AND/OR
1b17d1b0 288 elsif ($token =~ /^ (?: OR | AND ) $/xi ) {
289 my $op = uc $token;
25823711 290 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 291
292 # Merge chunks if logic matches
293 if (ref $right and $op eq $right->[0]) {
294 $left = [ (shift @$right ), [$left, map { @$_ } @$right] ];
295 }
296 else {
297 $left = [$op => [$left, $right]];
298 }
fffe6900 299 }
01b64cb7 300 # binary operator keywords
301 elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) {
302 my $op = uc $token;
303 my $right = _recurse_parse($tokens, PARSE_RHS);
304
305 # A between with a simple EXPR for a 1st RHS argument needs a
306 # rerun of the search to (hopefully) find the proper AND construct
307 if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') {
308 unshift @$tokens, $right->[1][0];
309 $right = _recurse_parse($tokens, PARSE_IN_EXPR);
310 }
311
312 $left = [$op => [$left, $right] ];
313 }
25823711 314 # expression terminator keywords (as they start a new expression)
1b17d1b0 315 elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) {
01b64cb7 316 my $op = uc $token;
25823711 317 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 318 $left = $left ? [@$left, [$op => [$right] ]]
01b64cb7 319 : [[ $op => [$right] ]];
25823711 320 }
9e8dab3f 321 # NOT (last as to allow all other NOT X pieces first)
322 elsif ( $token =~ /^ not $/ix ) {
323 my $op = uc $token;
324 my $right = _recurse_parse ($tokens, PARSE_RHS);
325 $left = $left ? [ @$left, [$op => [$right] ]]
326 : [[ $op => [$right] ]];
327
328 }
fffe6900 329 # leaf expression
330 else {
01b64cb7 331 $left = $left ? [@$left, [EXPR => [$token] ] ]
332 : [ EXPR => [$token] ];
fffe6900 333 }
334 }
335}
336
e40f5df9 337sub _parenthesis_unroll {
338 my $ast = shift;
339
340 return if $parenthesis_significant;
341 return unless (ref $ast and ref $ast->[1]);
342
343 my $changes;
344 do {
345 my @children;
346 $changes = 0;
347
348 for my $child (@{$ast->[1]}) {
349 if (not ref $child or not $child->[0] eq 'PAREN') {
350 push @children, $child;
351 next;
352 }
353
354 # unroll nested parenthesis
355 while ($child->[1][0][0] eq 'PAREN') {
356 $child = $child->[1][0];
357 $changes++;
358 }
359
360 # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list
361 if (
362 ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR')
363 and
364 $child->[1][0][0] eq $ast->[0]
365 ) {
366 push @children, @{$child->[1][0][1]};
367 $changes++;
368 }
fffe6900 369
e40f5df9 370 # if the parent operator explcitly allows it nuke the parenthesis
371 elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) {
372 push @children, $child->[1][0];
373 $changes++;
374 }
375
9e8dab3f 376 # only one EXPR element in the parenthesis
377 elsif (
378 @{$child->[1]} == 1 && $child->[1][0][0] eq 'EXPR'
379 ) {
380 push @children, $child->[1][0];
381 $changes++;
382 }
383
e40f5df9 384 # only one element in the parenthesis which is a binary op with two EXPR sub-children
385 elsif (
386 @{$child->[1]} == 1
387 and
388 grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords)
389 and
390 $child->[1][0][1][0][0] eq 'EXPR'
391 and
392 $child->[1][0][1][1][0] eq 'EXPR'
393 ) {
394 push @children, $child->[1][0];
395 $changes++;
396 }
397
398 # otherwise no more mucking for this pass
399 else {
400 push @children, $child;
401 }
402 }
403
404 $ast->[1] = \@children;
405
406 } while ($changes);
407
408}
fffe6900 409
410sub unparse {
411 my $tree = shift;
1b17d1b0 412
413 if (not $tree ) {
414 return '';
415 }
416 elsif (ref $tree->[0]) {
417 return join (" ", map { unparse ($_) } @$tree);
418 }
419 elsif ($tree->[0] eq 'EXPR') {
01b64cb7 420 return $tree->[1][0];
1b17d1b0 421 }
422 elsif ($tree->[0] eq 'PAREN') {
01b64cb7 423 return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]});
1b17d1b0 424 }
01b64cb7 425 elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } @binary_op_keywords ) ) {
1b17d1b0 426 return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]});
427 }
428 else {
429 return sprintf '%s %s', $tree->[0], unparse ($tree->[1]);
430 }
fffe6900 431}
432
433
4341;
435
436
437__END__
438
439=head1 NAME
440
441SQL::Abstract::Test - Helper function for testing SQL::Abstract
442
443=head1 SYNOPSIS
444
445 use SQL::Abstract;
446 use Test::More;
e7827ba2 447 use SQL::Abstract::Test import => [qw/
448 is_same_sql_bind is_same_sql is_same_bind
449 eq_sql_bind eq_sql eq_bind
450 /];
ec9af79e 451
fffe6900 452 my ($sql, @bind) = SQL::Abstract->new->select(%args);
e7827ba2 453
fffe6900 454 is_same_sql_bind($given_sql, \@given_bind,
455 $expected_sql, \@expected_bind, $test_msg);
456
e7827ba2 457 is_same_sql($given_sql, $expected_sql, $test_msg);
458 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
459
460 my $is_same = eq_sql_bind($given_sql, \@given_bind,
461 $expected_sql, \@expected_bind);
462
463 my $sql_same = eq_sql($given_sql, $expected_sql);
464 my $bind_same = eq_bind(\@given_bind, \@expected_bind);
465
fffe6900 466=head1 DESCRIPTION
467
468This module is only intended for authors of tests on
469L<SQL::Abstract|SQL::Abstract> and related modules;
470it exports functions for comparing two SQL statements
471and their bound values.
472
473The SQL comparison is performed on I<abstract syntax>,
474ignoring differences in spaces or in levels of parentheses.
475Therefore the tests will pass as long as the semantics
476is preserved, even if the surface syntax has changed.
477
ec9af79e 478B<Disclaimer> : the semantic equivalence handling is pretty limited.
479A lot of effort goes into distinguishing significant from
480non-significant parenthesis, including AND/OR operator associativity.
481Currently this module does not support commutativity and more
482intelligent transformations like Morgan laws, etc.
483
484For a good overview of what this test framework is capable of refer
485to C<t/10test.t>
fffe6900 486
487=head1 FUNCTIONS
488
489=head2 is_same_sql_bind
490
491 is_same_sql_bind($given_sql, \@given_bind,
492 $expected_sql, \@expected_bind, $test_msg);
493
494Compares given and expected pairs of C<($sql, \@bind)>, and calls
e7827ba2 495L<Test::Builder/ok> on the result, with C<$test_msg> as message. If the test
496fails, a detailed diagnostic is printed. For clients which use L<Test::More>,
497this is the one of the three functions (L</is_same_sql_bind>, L</is_same_sql>,
498L</is_same_bind>) that needs to be imported.
499
500=head2 is_same_sql
501
502 is_same_sql($given_sql, $expected_sql, $test_msg);
503
504Compares given and expected SQL statements, and calls L<Test::Builder/ok> on
505the result, with C<$test_msg> as message. If the test fails, a detailed
506diagnostic is printed. For clients which use L<Test::More>, this is the one of
507the three functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>)
508that needs to be imported.
509
510=head2 is_same_bind
511
512 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
513
514Compares given and expected bind values, and calls L<Test::Builder/ok> on the
515result, with C<$test_msg> as message. If the test fails, a detailed diagnostic
516is printed. For clients which use L<Test::More>, this is the one of the three
517functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>) that needs
518to be imported.
519
520=head2 eq_sql_bind
521
522 my $is_same = eq_sql_bind($given_sql, \@given_bind,
523 $expected_sql, \@expected_bind);
524
525Compares given and expected pairs of C<($sql, \@bind)>. Similar to
526L</is_same_sql_bind>, but it just returns a boolean value and does not print
527diagnostics or talk to L<Test::Builder>.
fffe6900 528
529=head2 eq_sql
530
531 my $is_same = eq_sql($given_sql, $expected_sql);
532
e7827ba2 533Compares the abstract syntax of two SQL statements. Similar to L</is_same_sql>,
534but it just returns a boolean value and does not print diagnostics or talk to
535L<Test::Builder>. If the result is false, the global variable L</$sql_differ>
536will contain the SQL portion where a difference was encountered; this is useful
537for printing diagnostics.
fffe6900 538
539=head2 eq_bind
540
541 my $is_same = eq_sql(\@given_bind, \@expected_bind);
542
e7827ba2 543Compares two lists of bind values, taking into account the fact that some of
544the values may be arrayrefs (see L<SQL::Abstract/bindtype>). Similar to
545L</is_same_bind>, but it just returns a boolean value and does not print
546diagnostics or talk to L<Test::Builder>.
fffe6900 547
548=head1 GLOBAL VARIABLES
549
e7827ba2 550=head2 $case_sensitive
fffe6900 551
552If true, SQL comparisons will be case-sensitive. Default is false;
553
e40f5df9 554=head2 $parenthesis_significant
555
556If true, SQL comparison will preserve and report difference in nested
557parenthesis. Useful for testing the C<-nest> modifier. Defaults to false;
558
e7827ba2 559=head2 $sql_differ
fffe6900 560
561When L</eq_sql> returns false, the global variable
562C<$sql_differ> contains the SQL portion
563where a difference was encountered.
564
565
566=head1 SEE ALSO
567
a6daa642 568L<SQL::Abstract>, L<Test::More>, L<Test::Builder>.
fffe6900 569
25823711 570=head1 AUTHORS
fffe6900 571
572Laurent Dami, E<lt>laurent.dami AT etat geneve chE<gt>
573
25823711 574Norbert Buchmuller <norbi@nix.hu>
575
e96c510a 576Peter Rabbitson <ribasushi@cpan.org>
577
fffe6900 578=head1 COPYRIGHT AND LICENSE
579
580Copyright 2008 by Laurent Dami.
581
582This library is free software; you can redistribute it and/or modify
583it under the same terms as Perl itself.