fix a dumb dumb regex error, /me --
[scpubgit/Q-Branch.git] / lib / SQL / Abstract / Test.pm
CommitLineData
fffe6900 1package SQL::Abstract::Test; # see doc at end of file
2
3use strict;
4use warnings;
5aad8cf3 5use base qw/Test::Builder::Module Exporter/;
fffe6900 6use Data::Dumper;
7use Carp;
4abea32b 8use Test::Builder;
9use Test::Deep qw(eq_deeply);
fffe6900 10
e7827ba2 11our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind
12 &eq_sql_bind &eq_sql &eq_bind
fffe6900 13 $case_sensitive $sql_differ/;
14
15our $case_sensitive = 0;
e40f5df9 16our $parenthesis_significant = 0;
fffe6900 17our $sql_differ; # keeps track of differing portion between SQLs
5aad8cf3 18our $tb = __PACKAGE__->builder;
fffe6900 19
25823711 20# Parser states for _recurse_parse()
01b64cb7 21use constant PARSE_TOP_LEVEL => 0;
5221d7fc 22use constant PARSE_IN_EXPR => 1;
23use constant PARSE_IN_PARENS => 2;
01b64cb7 24use constant PARSE_RHS => 3;
25823711 25
26# These SQL keywords always signal end of the current expression (except inside
27# of a parenthesized subexpression).
28# Format: A list of strings that will be compiled to extended syntax (ie.
29# /.../x) regexes, without capturing parentheses. They will be automatically
30# anchored to word boundaries to match the whole token).
31my @expression_terminator_sql_keywords = (
1b17d1b0 32 'SELECT',
25823711 33 'FROM',
34 '(?:
35 (?:
36 (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
37 (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
38 )?
39 JOIN
40 )',
41 'ON',
42 'WHERE',
43 'GROUP \s+ BY',
44 'HAVING',
45 'ORDER \s+ BY',
46 'LIMIT',
47 'OFFSET',
48 'FOR',
49 'UNION',
50 'INTERSECT',
51 'EXCEPT',
52);
53
01b64cb7 54# These are binary operator keywords always a single LHS and RHS
55# * AND/OR are handled separately as they are N-ary
9e8dab3f 56# * so is NOT as being unary
01b64cb7 57# * BETWEEN without paranthesis around the ANDed arguments (which
58# makes it a non-binary op) is detected and accomodated in
59# _recurse_parse()
30d09fa9 60my $stuff_around_mathops = qr/[\w\s\`\'\)]/;
01b64cb7 61my @binary_op_keywords = (
30d09fa9 62 ( map
63 { " (?<= $stuff_around_mathops) " . quotemeta $_ . "(?= $stuff_around_mathops )" }
64 (qw/< > != = <= >=/)
65 ),
66 ( map
67 { '\b (?: NOT \s+)?' . $_ . '\b' }
68 (qw/IN BETWEEN LIKE/)
69 ),
01b64cb7 70);
71
72my $tokenizer_re_str = join("\n\t|\n",
9e8dab3f 73 ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'),
30d09fa9 74 @binary_op_keywords,
01b64cb7 75);
76
77my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi;
78
35149895 79# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics
01b64cb7 80my @unrollable_ops = (
1b17d1b0 81 'ON',
82 'WHERE',
83 'GROUP \s+ BY',
84 'HAVING',
85 'ORDER \s+ BY',
86);
87
fffe6900 88sub is_same_sql_bind {
89 my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_;
90
91 # compare
25823711 92 my $same_sql = eq_sql($sql1, $sql2);
fffe6900 93 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
94
a6daa642 95 # call Test::Builder::ok
1a828f61 96 my $ret = $tb->ok($same_sql && $same_bind, $msg);
fffe6900 97
98 # add debugging info
99 if (!$same_sql) {
e7827ba2 100 _sql_differ_diag($sql1, $sql2);
fffe6900 101 }
102 if (!$same_bind) {
e7827ba2 103 _bind_differ_diag($bind_ref1, $bind_ref2);
fffe6900 104 }
1a828f61 105
106 # pass ok() result further
107 return $ret;
fffe6900 108}
109
e7827ba2 110sub is_same_sql {
111 my ($sql1, $sql2, $msg) = @_;
112
113 # compare
114 my $same_sql = eq_sql($sql1, $sql2);
115
116 # call Test::Builder::ok
1a828f61 117 my $ret = $tb->ok($same_sql, $msg);
e7827ba2 118
119 # add debugging info
120 if (!$same_sql) {
121 _sql_differ_diag($sql1, $sql2);
122 }
1a828f61 123
124 # pass ok() result further
125 return $ret;
e7827ba2 126}
127
128sub is_same_bind {
129 my ($bind_ref1, $bind_ref2, $msg) = @_;
130
131 # compare
132 my $same_bind = eq_bind($bind_ref1, $bind_ref2);
133
134 # call Test::Builder::ok
1a828f61 135 my $ret = $tb->ok($same_bind, $msg);
e7827ba2 136
137 # add debugging info
138 if (!$same_bind) {
139 _bind_differ_diag($bind_ref1, $bind_ref2);
140 }
1a828f61 141
142 # pass ok() result further
143 return $ret;
e7827ba2 144}
145
146sub _sql_differ_diag {
147 my ($sql1, $sql2) = @_;
148
149 $tb->diag("SQL expressions differ\n"
150 ." got: $sql1\n"
151 ."expected: $sql2\n"
152 ."differing in :\n$sql_differ\n"
153 );
154}
155
156sub _bind_differ_diag {
157 my ($bind_ref1, $bind_ref2) = @_;
158
159 $tb->diag("BIND values differ\n"
160 ." got: " . Dumper($bind_ref1)
161 ."expected: " . Dumper($bind_ref2)
162 );
163}
164
165sub eq_sql_bind {
166 my ($sql1, $bind_ref1, $sql2, $bind_ref2) = @_;
167
168 return eq_sql($sql1, $sql2) && eq_bind($bind_ref1, $bind_ref2);
169}
170
171
fffe6900 172sub eq_bind {
173 my ($bind_ref1, $bind_ref2) = @_;
fffe6900 174
4abea32b 175 return eq_deeply($bind_ref1, $bind_ref2);
fffe6900 176}
177
178sub eq_sql {
25823711 179 my ($sql1, $sql2) = @_;
180
181 # parse
182 my $tree1 = parse($sql1);
183 my $tree2 = parse($sql2);
184
1b17d1b0 185 return 1 if _eq_sql($tree1, $tree2);
25823711 186}
187
188sub _eq_sql {
fffe6900 189 my ($left, $right) = @_;
190
939db550 191 # one is defined the other not
192 if ( (defined $left) xor (defined $right) ) {
193 return 0;
194 }
195 # one is undefined, then so is the other
196 elsif (not defined $left) {
d15c14cc 197 return 1;
198 }
1b17d1b0 199 # one is a list, the other is an op with a list
200 elsif (ref $left->[0] xor ref $right->[0]) {
201 $sql_differ = sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) );
fffe6900 202 return 0;
203 }
1b17d1b0 204 # one is a list, so is the other
205 elsif (ref $left->[0]) {
206 for (my $i = 0; $i <= $#$left or $i <= $#$right; $i++ ) {
207 return 0 if (not _eq_sql ($left->[$i], $right->[$i]) );
208 }
209 return 1;
210 }
211 # both are an op-list combo
212 else {
213
e40f5df9 214 # unroll parenthesis if possible/allowed
215 _parenthesis_unroll ($_) for ($left, $right);
1b17d1b0 216
217 # if operators are different
218 if ($left->[0] ne $right->[0]) {
219 $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n",
220 unparse($left),
221 unparse($right);
222 return 0;
223 }
224 # elsif operators are identical, compare operands
225 else {
226 if ($left->[0] eq 'EXPR' ) { # unary operator
01b64cb7 227 (my $l = " $left->[1][0] " ) =~ s/\s+/ /g;
228 (my $r = " $right->[1][0] ") =~ s/\s+/ /g;
1b17d1b0 229 my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r);
01b64cb7 230 $sql_differ = "[$l] != [$r]\n" if not $eq;
1b17d1b0 231 return $eq;
232 }
233 else {
234 my $eq = _eq_sql($left->[1], $right->[1]);
235 $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) ) if not $eq;
236 return $eq;
237 }
fffe6900 238 }
239 }
240}
241
fffe6900 242sub parse {
243 my $s = shift;
244
25823711 245 # tokenize string, and remove all optional whitespace
246 my $tokens = [];
247 foreach my $token (split $tokenizer_re, $s) {
248 $token =~ s/\s+/ /g;
249 $token =~ s/\s+([^\w\s])/$1/g;
250 $token =~ s/([^\w\s])\s+/$1/g;
01b64cb7 251 push @$tokens, $token if length $token;
25823711 252 }
fffe6900 253
25823711 254 my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
fffe6900 255 return $tree;
256}
257
258sub _recurse_parse {
25823711 259 my ($tokens, $state) = @_;
fffe6900 260
261 my $left;
262 while (1) { # left-associative parsing
263
264 my $lookahead = $tokens->[0];
1b17d1b0 265 if ( not defined($lookahead)
266 or
267 ($state == PARSE_IN_PARENS && $lookahead eq ')')
268 or
01b64cb7 269 ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) )
270 or
9e8dab3f 271 ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) )
1b17d1b0 272 ) {
1b17d1b0 273 return $left;
1b17d1b0 274 }
fffe6900 275
276 my $token = shift @$tokens;
277
278 # nested expression in ()
279 if ($token eq '(') {
25823711 280 my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
09abf3a0 281 $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right);
282 $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right);
1b17d1b0 283 $left = $left ? [@$left, [PAREN => [$right] ]]
284 : [PAREN => [$right] ];
fffe6900 285 }
01b64cb7 286 # AND/OR
1b17d1b0 287 elsif ($token =~ /^ (?: OR | AND ) $/xi ) {
288 my $op = uc $token;
25823711 289 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 290
291 # Merge chunks if logic matches
292 if (ref $right and $op eq $right->[0]) {
293 $left = [ (shift @$right ), [$left, map { @$_ } @$right] ];
294 }
295 else {
296 $left = [$op => [$left, $right]];
297 }
fffe6900 298 }
01b64cb7 299 # binary operator keywords
300 elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) {
301 my $op = uc $token;
302 my $right = _recurse_parse($tokens, PARSE_RHS);
303
304 # A between with a simple EXPR for a 1st RHS argument needs a
305 # rerun of the search to (hopefully) find the proper AND construct
306 if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') {
307 unshift @$tokens, $right->[1][0];
308 $right = _recurse_parse($tokens, PARSE_IN_EXPR);
309 }
310
311 $left = [$op => [$left, $right] ];
312 }
25823711 313 # expression terminator keywords (as they start a new expression)
1b17d1b0 314 elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) {
01b64cb7 315 my $op = uc $token;
25823711 316 my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
1b17d1b0 317 $left = $left ? [@$left, [$op => [$right] ]]
01b64cb7 318 : [[ $op => [$right] ]];
25823711 319 }
9e8dab3f 320 # NOT (last as to allow all other NOT X pieces first)
321 elsif ( $token =~ /^ not $/ix ) {
322 my $op = uc $token;
323 my $right = _recurse_parse ($tokens, PARSE_RHS);
324 $left = $left ? [ @$left, [$op => [$right] ]]
325 : [[ $op => [$right] ]];
326
327 }
fffe6900 328 # leaf expression
329 else {
01b64cb7 330 $left = $left ? [@$left, [EXPR => [$token] ] ]
331 : [ EXPR => [$token] ];
fffe6900 332 }
333 }
334}
335
e40f5df9 336sub _parenthesis_unroll {
337 my $ast = shift;
338
339 return if $parenthesis_significant;
340 return unless (ref $ast and ref $ast->[1]);
341
342 my $changes;
343 do {
344 my @children;
345 $changes = 0;
346
347 for my $child (@{$ast->[1]}) {
348 if (not ref $child or not $child->[0] eq 'PAREN') {
349 push @children, $child;
350 next;
351 }
352
353 # unroll nested parenthesis
354 while ($child->[1][0][0] eq 'PAREN') {
355 $child = $child->[1][0];
356 $changes++;
357 }
358
359 # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list
360 if (
361 ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR')
362 and
363 $child->[1][0][0] eq $ast->[0]
364 ) {
365 push @children, @{$child->[1][0][1]};
366 $changes++;
367 }
fffe6900 368
e40f5df9 369 # if the parent operator explcitly allows it nuke the parenthesis
370 elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) {
371 push @children, $child->[1][0];
372 $changes++;
373 }
374
9e8dab3f 375 # only one EXPR element in the parenthesis
376 elsif (
377 @{$child->[1]} == 1 && $child->[1][0][0] eq 'EXPR'
378 ) {
379 push @children, $child->[1][0];
380 $changes++;
381 }
382
e40f5df9 383 # only one element in the parenthesis which is a binary op with two EXPR sub-children
384 elsif (
385 @{$child->[1]} == 1
386 and
387 grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords)
388 and
389 $child->[1][0][1][0][0] eq 'EXPR'
390 and
391 $child->[1][0][1][1][0] eq 'EXPR'
392 ) {
393 push @children, $child->[1][0];
394 $changes++;
395 }
396
397 # otherwise no more mucking for this pass
398 else {
399 push @children, $child;
400 }
401 }
402
403 $ast->[1] = \@children;
404
405 } while ($changes);
406
407}
fffe6900 408
409sub unparse {
410 my $tree = shift;
1b17d1b0 411
412 if (not $tree ) {
413 return '';
414 }
415 elsif (ref $tree->[0]) {
416 return join (" ", map { unparse ($_) } @$tree);
417 }
418 elsif ($tree->[0] eq 'EXPR') {
01b64cb7 419 return $tree->[1][0];
1b17d1b0 420 }
421 elsif ($tree->[0] eq 'PAREN') {
01b64cb7 422 return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]});
1b17d1b0 423 }
01b64cb7 424 elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } @binary_op_keywords ) ) {
1b17d1b0 425 return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]});
426 }
427 else {
428 return sprintf '%s %s', $tree->[0], unparse ($tree->[1]);
429 }
fffe6900 430}
431
432
4331;
434
435
436__END__
437
438=head1 NAME
439
440SQL::Abstract::Test - Helper function for testing SQL::Abstract
441
442=head1 SYNOPSIS
443
444 use SQL::Abstract;
445 use Test::More;
e7827ba2 446 use SQL::Abstract::Test import => [qw/
447 is_same_sql_bind is_same_sql is_same_bind
448 eq_sql_bind eq_sql eq_bind
449 /];
ec9af79e 450
fffe6900 451 my ($sql, @bind) = SQL::Abstract->new->select(%args);
e7827ba2 452
fffe6900 453 is_same_sql_bind($given_sql, \@given_bind,
454 $expected_sql, \@expected_bind, $test_msg);
455
e7827ba2 456 is_same_sql($given_sql, $expected_sql, $test_msg);
457 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
458
459 my $is_same = eq_sql_bind($given_sql, \@given_bind,
460 $expected_sql, \@expected_bind);
461
462 my $sql_same = eq_sql($given_sql, $expected_sql);
463 my $bind_same = eq_bind(\@given_bind, \@expected_bind);
464
fffe6900 465=head1 DESCRIPTION
466
467This module is only intended for authors of tests on
468L<SQL::Abstract|SQL::Abstract> and related modules;
469it exports functions for comparing two SQL statements
470and their bound values.
471
472The SQL comparison is performed on I<abstract syntax>,
473ignoring differences in spaces or in levels of parentheses.
474Therefore the tests will pass as long as the semantics
475is preserved, even if the surface syntax has changed.
476
ec9af79e 477B<Disclaimer> : the semantic equivalence handling is pretty limited.
478A lot of effort goes into distinguishing significant from
479non-significant parenthesis, including AND/OR operator associativity.
480Currently this module does not support commutativity and more
481intelligent transformations like Morgan laws, etc.
482
483For a good overview of what this test framework is capable of refer
484to C<t/10test.t>
fffe6900 485
486=head1 FUNCTIONS
487
488=head2 is_same_sql_bind
489
490 is_same_sql_bind($given_sql, \@given_bind,
491 $expected_sql, \@expected_bind, $test_msg);
492
493Compares given and expected pairs of C<($sql, \@bind)>, and calls
e7827ba2 494L<Test::Builder/ok> on the result, with C<$test_msg> as message. If the test
495fails, a detailed diagnostic is printed. For clients which use L<Test::More>,
496this is the one of the three functions (L</is_same_sql_bind>, L</is_same_sql>,
497L</is_same_bind>) that needs to be imported.
498
499=head2 is_same_sql
500
501 is_same_sql($given_sql, $expected_sql, $test_msg);
502
503Compares given and expected SQL statements, and calls L<Test::Builder/ok> on
504the result, with C<$test_msg> as message. If the test fails, a detailed
505diagnostic is printed. For clients which use L<Test::More>, this is the one of
506the three functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>)
507that needs to be imported.
508
509=head2 is_same_bind
510
511 is_same_bind(\@given_bind, \@expected_bind, $test_msg);
512
513Compares given and expected bind values, and calls L<Test::Builder/ok> on the
514result, with C<$test_msg> as message. If the test fails, a detailed diagnostic
515is printed. For clients which use L<Test::More>, this is the one of the three
516functions (L</is_same_sql_bind>, L</is_same_sql>, L</is_same_bind>) that needs
517to be imported.
518
519=head2 eq_sql_bind
520
521 my $is_same = eq_sql_bind($given_sql, \@given_bind,
522 $expected_sql, \@expected_bind);
523
524Compares given and expected pairs of C<($sql, \@bind)>. Similar to
525L</is_same_sql_bind>, but it just returns a boolean value and does not print
526diagnostics or talk to L<Test::Builder>.
fffe6900 527
528=head2 eq_sql
529
530 my $is_same = eq_sql($given_sql, $expected_sql);
531
e7827ba2 532Compares the abstract syntax of two SQL statements. Similar to L</is_same_sql>,
533but it just returns a boolean value and does not print diagnostics or talk to
534L<Test::Builder>. If the result is false, the global variable L</$sql_differ>
535will contain the SQL portion where a difference was encountered; this is useful
536for printing diagnostics.
fffe6900 537
538=head2 eq_bind
539
540 my $is_same = eq_sql(\@given_bind, \@expected_bind);
541
e7827ba2 542Compares two lists of bind values, taking into account the fact that some of
543the values may be arrayrefs (see L<SQL::Abstract/bindtype>). Similar to
544L</is_same_bind>, but it just returns a boolean value and does not print
545diagnostics or talk to L<Test::Builder>.
fffe6900 546
547=head1 GLOBAL VARIABLES
548
e7827ba2 549=head2 $case_sensitive
fffe6900 550
551If true, SQL comparisons will be case-sensitive. Default is false;
552
e40f5df9 553=head2 $parenthesis_significant
554
555If true, SQL comparison will preserve and report difference in nested
556parenthesis. Useful for testing the C<-nest> modifier. Defaults to false;
557
e7827ba2 558=head2 $sql_differ
fffe6900 559
560When L</eq_sql> returns false, the global variable
561C<$sql_differ> contains the SQL portion
562where a difference was encountered.
563
564
565=head1 SEE ALSO
566
a6daa642 567L<SQL::Abstract>, L<Test::More>, L<Test::Builder>.
fffe6900 568
25823711 569=head1 AUTHORS
fffe6900 570
571Laurent Dami, E<lt>laurent.dami AT etat geneve chE<gt>
572
25823711 573Norbert Buchmuller <norbi@nix.hu>
574
e96c510a 575Peter Rabbitson <ribasushi@cpan.org>
576
fffe6900 577=head1 COPYRIGHT AND LICENSE
578
579Copyright 2008 by Laurent Dami.
580
581This library is free software; you can redistribute it and/or modify
582it under the same terms as Perl itself.