drop perl requirement to 5.8
[catagits/DOM-Tiny.git] / lib / DOM / Tiny / CSS.pm
CommitLineData
d6512b50 1package DOM::Tiny::CSS;
2
3use strict;
4use warnings;
5use Class::Tiny::Chained 'tree';
6
7our $VERSION = '0.001';
8
9my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/;
10my $ATTR_RE = qr/
11 \[
12 ((?:$ESCAPE_RE|[\w\-])+) # Key
13 (?:
14 (\W)?= # Operator
15 (?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value
16 (?:\s+(i))? # Case-sensitivity
17 )?
18 \]
19/x;
20
21sub matches {
22 my $tree = shift->tree;
23 return $tree->[0] ne 'tag' ? undef : _match(_compile(shift), $tree, $tree);
24}
25
26sub select { _select(0, shift->tree, _compile(@_)) }
27sub select_one { _select(1, shift->tree, _compile(@_)) }
28
29sub _ancestor {
30 my ($selectors, $current, $tree, $one, $pos) = @_;
31
32 while ($current = $current->[3]) {
33 return undef if $current->[0] eq 'root' || $current eq $tree;
34 return 1 if _combinator($selectors, $current, $tree, $pos);
35 last if $one;
36 }
37
38 return undef;
39}
40
41sub _attr {
42 my ($name_re, $value_re, $current) = @_;
43
44 my $attrs = $current->[2];
45 for my $name (keys %$attrs) {
46 next unless $name =~ $name_re;
47 return 1 unless defined $attrs->{$name} && defined $value_re;
48 return 1 if $attrs->{$name} =~ $value_re;
49 }
50
51 return undef;
52}
53
54sub _combinator {
55 my ($selectors, $current, $tree, $pos) = @_;
56
57 # Selector
58 return undef unless my $c = $selectors->[$pos];
59 if (ref $c) {
60 return undef unless _selector($c, $current);
61 return 1 unless $c = $selectors->[++$pos];
62 }
63
64 # ">" (parent only)
65 return _ancestor($selectors, $current, $tree, 1, ++$pos) if $c eq '>';
66
67 # "~" (preceding siblings)
68 return _sibling($selectors, $current, $tree, 0, ++$pos) if $c eq '~';
69
70 # "+" (immediately preceding siblings)
71 return _sibling($selectors, $current, $tree, 1, ++$pos) if $c eq '+';
72
73 # " " (ancestor)
74 return _ancestor($selectors, $current, $tree, 0, ++$pos);
75}
76
77sub _compile {
78 my $css = "$_[0]";
79 $css =~ s/^\s+//;
80 $css =~ s/\s+$//;
81
82 my $group = [[]];
83 while (my $selectors = $group->[-1]) {
84 push @$selectors, [] unless @$selectors && ref $selectors->[-1];
85 my $last = $selectors->[-1];
86
87 # Separator
88 if ($css =~ /\G\s*,\s*/gc) { push @$group, [] }
89
90 # Combinator
91 elsif ($css =~ /\G\s*([ >+~])\s*/gc) { push @$selectors, $1 }
92
93 # Class or ID
94 elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
95 my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', '');
96 push @$last, ['attr', _name($name), _value($op, $2)];
97 }
98
99 # Attributes
100 elsif ($css =~ /\G$ATTR_RE/gco) {
927f1351 101 my $op = defined $2 ? $2 : '';
102 my $value = defined $3 ? $3 : (defined $4 ? $4 : $5);
103 push @$last, ['attr', _name($1), _value($op, $value, $6)];
d6512b50 104 }
105
106 # Pseudo-class (":not" contains more selectors)
107 elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) {
108 push @$last, ['pc', lc $1, $1 eq 'not' ? _compile($2) : _equation($2)];
109 }
110
111 # Tag
112 elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
113 push @$last, ['tag', _name($1)] unless $1 eq '*';
114 }
115
116 else {last}
117 }
118
119 return $group;
120}
121
122sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' }
123
124sub _equation {
125 return [] unless my $equation = shift;
126
127 # "even"
128 return [2, 2] if $equation =~ /^\s*even\s*$/i;
129
130 # "odd"
131 return [2, 1] if $equation =~ /^\s*odd\s*$/i;
132
133 # Equation
134 my $num = [1, 1];
135 return $num if $equation !~ /(?:(-?(?:\d+)?)?(n))?\s*\+?\s*(-?\s*\d+)?\s*$/i;
136 $num->[0] = defined($1) && $1 ne '' ? $1 : $2 ? 1 : 0;
137 $num->[0] = -1 if $num->[0] eq '-';
927f1351 138 $num->[1] = $3 || 0;
d6512b50 139 $num->[1] =~ s/\s+//g;
140 return $num;
141}
142
143sub _match {
144 my ($group, $current, $tree) = @_;
145 _combinator([reverse @$_], $current, $tree, 0) and return 1 for @$group;
146 return undef;
147}
148
149sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/}
150
151sub _pc {
152 my ($class, $args, $current) = @_;
153
154 # ":empty"
155 return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty';
156
157 # ":root"
158 return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root';
159
160 # ":not"
161 return !_match($args, $current, $current) if $class eq 'not';
162
163 # ":checked"
164 return exists $current->[2]{checked} || exists $current->[2]{selected}
165 if $class eq 'checked';
166
167 # ":first-*" or ":last-*" (rewrite with equation)
168 ($class, $args) = $1 ? ("nth-$class", [0, 1]) : ("nth-last-$class", [-1, 1])
169 if $class =~ s/^(?:(first)|last)-//;
170
171 # ":nth-*"
172 if ($class =~ /^nth-/) {
173 my $type = $class =~ /of-type$/ ? $current->[1] : undef;
174 my @siblings = @{_siblings($current, $type)};
175
176 # ":nth-last-*"
177 @siblings = reverse @siblings if $class =~ /^nth-last/;
178
179 for my $i (0 .. $#siblings) {
180 next if (my $result = $args->[0] * $i + $args->[1]) < 1;
181 last unless my $sibling = $siblings[$result - 1];
182 return 1 if $sibling eq $current;
183 }
184 }
185
186 # ":only-*"
187 elsif ($class =~ /^only-(?:child|(of-type))$/) {
188 $_ ne $current and return undef
189 for @{_siblings($current, $1 ? $current->[1] : undef)};
190 return 1;
191 }
192
193 return undef;
194}
195
196sub _select {
197 my ($one, $tree, $group) = @_;
198
199 my @results;
200 my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree];
201 while (my $current = shift @queue) {
202 next unless $current->[0] eq 'tag';
203
204 unshift @queue, @$current[4 .. $#$current];
205 next unless _match($group, $current, $tree);
206 $one ? return $current : push @results, $current;
207 }
208
209 return $one ? undef : \@results;
210}
211
212sub _selector {
213 my ($selector, $current) = @_;
214
215 for my $s (@$selector) {
216 my $type = $s->[0];
217
218 # Tag
219 if ($type eq 'tag') { return undef unless $current->[1] =~ $s->[1] }
220
221 # Attribute
222 elsif ($type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) }
223
224 # Pseudo-class
225 elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current) }
226 }
227
228 return 1;
229}
230
231sub _sibling {
232 my ($selectors, $current, $tree, $immediate, $pos) = @_;
233
234 my $found;
235 for my $sibling (@{_siblings($current)}) {
236 return $found if $sibling eq $current;
237
238 # "+" (immediately preceding sibling)
239 if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $pos) }
240
241 # "~" (preceding sibling)
242 else { return 1 if _combinator($selectors, $sibling, $tree, $pos) }
243 }
244
245 return undef;
246}
247
248sub _siblings {
249 my ($current, $type) = @_;
250
251 my $parent = $current->[3];
252 my @siblings = grep { $_->[0] eq 'tag' }
253 @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent];
254 @siblings = grep { $type eq $_->[1] } @siblings if defined $type;
255
256 return \@siblings;
257}
258
259sub _unescape {
260 my $value = shift;
261
262 # Remove escaped newlines
263 $value =~ s/\\\n//g;
264
265 # Unescape Unicode characters
266 $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge;
267
268 # Remove backslash
269 $value =~ s/\\//g;
270
271 return $value;
272}
273
274sub _value {
275 my ($op, $value, $insensitive) = @_;
276 return undef unless defined $value;
277 $value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value);
278
279 # "~=" (word)
280 return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~';
281
282 # "*=" (contains)
283 return qr/$value/ if $op eq '*';
284
285 # "^=" (begins with)
286 return qr/^$value/ if $op eq '^';
287
288 # "$=" (ends with)
289 return qr/$value$/ if $op eq '$';
290
291 # Everything else
292 return qr/^$value$/;
293}
294
2951;
296
297=encoding utf8
298
299=head1 NAME
300
301DOM::Tiny::CSS - CSS selector engine
302
303=head1 SYNOPSIS
304
305 use DOM::Tiny::CSS;
306
307 # Select elements from DOM tree
308 my $css = DOM::Tiny::CSS->new(tree => $tree);
309 my $elements = $css->select('h1, h2, h3');
310
311=head1 DESCRIPTION
312
313L<DOM::Tiny::CSS> is the CSS selector engine used by L<DOM::Tiny> based on
314L<Mojo::DOM::CSS>, which is based on L<Selectors Level 3|http://www.w3.org/TR/css3-selectors/>.
315
316=head1 SELECTORS
317
318All CSS selectors that make sense for a standalone parser are supported.
319
320=head2 *
321
322Any element.
323
324 my $all = $css->select('*');
325
326=head2 E
327
328An element of type C<E>.
329
330 my $title = $css->select('title');
331
332=head2 E[foo]
333
334An C<E> element with a C<foo> attribute.
335
336 my $links = $css->select('a[href]');
337
338=head2 E[foo="bar"]
339
340An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
341
342 my $case_sensitive = $css->select('input[type="hidden"]');
343 my $case_sensitive = $css->select('input[type=hidden]');
344
345=head2 E[foo="bar" i]
346
347An C<E> element whose C<foo> attribute value is exactly equal to any
348(ASCII-range) case-permutation of C<bar>. Note that this selector is
349EXPERIMENTAL and might change without warning!
350
351 my $case_insensitive = $css->select('input[type="hidden" i]');
352 my $case_insensitive = $css->select('input[type=hidden i]');
353 my $case_insensitive = $css->select('input[class~="foo" i]');
354
355This selector is part of
356L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
357in progress.
358
359=head2 E[foo~="bar"]
360
361An C<E> element whose C<foo> attribute value is a list of whitespace-separated
362values, one of which is exactly equal to C<bar>.
363
364 my $foo = $css->select('input[class~="foo"]');
365 my $foo = $css->select('input[class~=foo]');
366
367=head2 E[foo^="bar"]
368
369An C<E> element whose C<foo> attribute value begins exactly with the string
370C<bar>.
371
372 my $begins_with = $css->select('input[name^="f"]');
373 my $begins_with = $css->select('input[name^=f]');
374
375=head2 E[foo$="bar"]
376
377An C<E> element whose C<foo> attribute value ends exactly with the string
378C<bar>.
379
380 my $ends_with = $css->select('input[name$="o"]');
381 my $ends_with = $css->select('input[name$=o]');
382
383=head2 E[foo*="bar"]
384
385An C<E> element whose C<foo> attribute value contains the substring C<bar>.
386
387 my $contains = $css->select('input[name*="fo"]');
388 my $contains = $css->select('input[name*=fo]');
389
390=head2 E:root
391
392An C<E> element, root of the document.
393
394 my $root = $css->select(':root');
395
396=head2 E:nth-child(n)
397
398An C<E> element, the C<n-th> child of its parent.
399
400 my $third = $css->select('div:nth-child(3)');
401 my $odd = $css->select('div:nth-child(odd)');
402 my $even = $css->select('div:nth-child(even)');
403 my $top3 = $css->select('div:nth-child(-n+3)');
404
405=head2 E:nth-last-child(n)
406
407An C<E> element, the C<n-th> child of its parent, counting from the last one.
408
409 my $third = $css->select('div:nth-last-child(3)');
410 my $odd = $css->select('div:nth-last-child(odd)');
411 my $even = $css->select('div:nth-last-child(even)');
412 my $bottom3 = $css->select('div:nth-last-child(-n+3)');
413
414=head2 E:nth-of-type(n)
415
416An C<E> element, the C<n-th> sibling of its type.
417
418 my $third = $css->select('div:nth-of-type(3)');
419 my $odd = $css->select('div:nth-of-type(odd)');
420 my $even = $css->select('div:nth-of-type(even)');
421 my $top3 = $css->select('div:nth-of-type(-n+3)');
422
423=head2 E:nth-last-of-type(n)
424
425An C<E> element, the C<n-th> sibling of its type, counting from the last one.
426
427 my $third = $css->select('div:nth-last-of-type(3)');
428 my $odd = $css->select('div:nth-last-of-type(odd)');
429 my $even = $css->select('div:nth-last-of-type(even)');
430 my $bottom3 = $css->select('div:nth-last-of-type(-n+3)');
431
432=head2 E:first-child
433
434An C<E> element, first child of its parent.
435
436 my $first = $css->select('div p:first-child');
437
438=head2 E:last-child
439
440An C<E> element, last child of its parent.
441
442 my $last = $css->select('div p:last-child');
443
444=head2 E:first-of-type
445
446An C<E> element, first sibling of its type.
447
448 my $first = $css->select('div p:first-of-type');
449
450=head2 E:last-of-type
451
452An C<E> element, last sibling of its type.
453
454 my $last = $css->select('div p:last-of-type');
455
456=head2 E:only-child
457
458An C<E> element, only child of its parent.
459
460 my $lonely = $css->select('div p:only-child');
461
462=head2 E:only-of-type
463
464An C<E> element, only sibling of its type.
465
466 my $lonely = $css->select('div p:only-of-type');
467
468=head2 E:empty
469
470An C<E> element that has no children (including text nodes).
471
472 my $empty = $css->select(':empty');
473
474=head2 E:checked
475
476A user interface element C<E> which is checked (for instance a radio-button or
477checkbox).
478
479 my $input = $css->select(':checked');
480
481=head2 E.warning
482
483An C<E> element whose class is "warning".
484
485 my $warning = $css->select('div.warning');
486
487=head2 E#myid
488
489An C<E> element with C<ID> equal to "myid".
490
491 my $foo = $css->select('div#foo');
492
493=head2 E:not(s)
494
495An C<E> element that does not match simple selector C<s>.
496
497 my $others = $css->select('div p:not(:first-child)');
498
499=head2 E F
500
501An C<F> element descendant of an C<E> element.
502
503 my $headlines = $css->select('div h1');
504
505=head2 E E<gt> F
506
507An C<F> element child of an C<E> element.
508
509 my $headlines = $css->select('html > body > div > h1');
510
511=head2 E + F
512
513An C<F> element immediately preceded by an C<E> element.
514
515 my $second = $css->select('h1 + h2');
516
517=head2 E ~ F
518
519An C<F> element preceded by an C<E> element.
520
521 my $second = $css->select('h1 ~ h2');
522
523=head2 E, F, G
524
525Elements of type C<E>, C<F> and C<G>.
526
527 my $headlines = $css->select('h1, h2, h3');
528
529=head2 E[foo=bar][bar=baz]
530
531An C<E> element whose attributes match all following attribute selectors.
532
533 my $links = $css->select('a[foo^=b][foo$=ar]');
534
535=head1 ATTRIBUTES
536
537L<DOM::Tiny::CSS> implements the following attributes.
538
539=head2 tree
540
541 my $tree = $css->tree;
542 $css = $css->tree(['root']);
543
544Document Object Model. Note that this structure should only be used very
545carefully since it is very dynamic.
546
547=head1 METHODS
548
549L<DOM::Tiny::CSS> implements the following methods.
550
551=head2 matches
552
553 my $bool = $css->matches('head > title');
554
555Check if first node in L</"tree"> matches the CSS selector.
556
557=head2 select
558
559 my $results = $css->select('head > title');
560
561Run CSS selector against L</"tree">.
562
563=head2 select_one
564
565 my $result = $css->select_one('head > title');
566
567Run CSS selector against L</"tree"> and stop as soon as the first node matched.
568
569=head1 BUGS
570
571Report any issues on the public bugtracker.
572
573=head1 AUTHOR
574
575Dan Book <dbook@cpan.org>
576
577=head1 COPYRIGHT AND LICENSE
578
579This software is Copyright (c) 2015 by Dan Book.
580
581This is free software, licensed under:
582
583 The Artistic License 2.0 (GPL Compatible)
584
585=head1 SEE ALSO
586
587L<Mojo::DOM::CSS>