1 package DOM::Tiny::CSS;
5 use Class::Tiny::Chained 'tree';
7 our $VERSION = '0.001';
9 my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/;
12 ((?:$ESCAPE_RE|[\w\-])+) # Key
15 (?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value
16 (?:\s+(i))? # Case-sensitivity
22 my $tree = shift->tree;
23 return $tree->[0] ne 'tag' ? undef : _match(_compile(shift), $tree, $tree);
26 sub select { _select(0, shift->tree, _compile(@_)) }
27 sub select_one { _select(1, shift->tree, _compile(@_)) }
30 my ($selectors, $current, $tree, $one, $pos) = @_;
32 while ($current = $current->[3]) {
33 return undef if $current->[0] eq 'root' || $current eq $tree;
34 return 1 if _combinator($selectors, $current, $tree, $pos);
42 my ($name_re, $value_re, $current) = @_;
44 my $attrs = $current->[2];
45 for my $name (keys %$attrs) {
46 next unless $name =~ $name_re;
47 return 1 unless defined $attrs->{$name} && defined $value_re;
48 return 1 if $attrs->{$name} =~ $value_re;
55 my ($selectors, $current, $tree, $pos) = @_;
58 return undef unless my $c = $selectors->[$pos];
60 return undef unless _selector($c, $current);
61 return 1 unless $c = $selectors->[++$pos];
65 return _ancestor($selectors, $current, $tree, 1, ++$pos) if $c eq '>';
67 # "~" (preceding siblings)
68 return _sibling($selectors, $current, $tree, 0, ++$pos) if $c eq '~';
70 # "+" (immediately preceding siblings)
71 return _sibling($selectors, $current, $tree, 1, ++$pos) if $c eq '+';
74 return _ancestor($selectors, $current, $tree, 0, ++$pos);
83 while (my $selectors = $group->[-1]) {
84 push @$selectors, [] unless @$selectors && ref $selectors->[-1];
85 my $last = $selectors->[-1];
88 if ($css =~ /\G\s*,\s*/gc) { push @$group, [] }
91 elsif ($css =~ /\G\s*([ >+~])\s*/gc) { push @$selectors, $1 }
94 elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
95 my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', '');
96 push @$last, ['attr', _name($name), _value($op, $2)];
100 elsif ($css =~ /\G$ATTR_RE/gco) {
101 push @$last, ['attr', _name($1), _value($2 // '', $3 // $4 // $5, $6)];
104 # Pseudo-class (":not" contains more selectors)
105 elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) {
106 push @$last, ['pc', lc $1, $1 eq 'not' ? _compile($2) : _equation($2)];
110 elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
111 push @$last, ['tag', _name($1)] unless $1 eq '*';
120 sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' }
123 return [] unless my $equation = shift;
126 return [2, 2] if $equation =~ /^\s*even\s*$/i;
129 return [2, 1] if $equation =~ /^\s*odd\s*$/i;
133 return $num if $equation !~ /(?:(-?(?:\d+)?)?(n))?\s*\+?\s*(-?\s*\d+)?\s*$/i;
134 $num->[0] = defined($1) && $1 ne '' ? $1 : $2 ? 1 : 0;
135 $num->[0] = -1 if $num->[0] eq '-';
137 $num->[1] =~ s/\s+//g;
142 my ($group, $current, $tree) = @_;
143 _combinator([reverse @$_], $current, $tree, 0) and return 1 for @$group;
147 sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/}
150 my ($class, $args, $current) = @_;
153 return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty';
156 return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root';
159 return !_match($args, $current, $current) if $class eq 'not';
162 return exists $current->[2]{checked} || exists $current->[2]{selected}
163 if $class eq 'checked';
165 # ":first-*" or ":last-*" (rewrite with equation)
166 ($class, $args) = $1 ? ("nth-$class", [0, 1]) : ("nth-last-$class", [-1, 1])
167 if $class =~ s/^(?:(first)|last)-//;
170 if ($class =~ /^nth-/) {
171 my $type = $class =~ /of-type$/ ? $current->[1] : undef;
172 my @siblings = @{_siblings($current, $type)};
175 @siblings = reverse @siblings if $class =~ /^nth-last/;
177 for my $i (0 .. $#siblings) {
178 next if (my $result = $args->[0] * $i + $args->[1]) < 1;
179 last unless my $sibling = $siblings[$result - 1];
180 return 1 if $sibling eq $current;
185 elsif ($class =~ /^only-(?:child|(of-type))$/) {
186 $_ ne $current and return undef
187 for @{_siblings($current, $1 ? $current->[1] : undef)};
195 my ($one, $tree, $group) = @_;
198 my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree];
199 while (my $current = shift @queue) {
200 next unless $current->[0] eq 'tag';
202 unshift @queue, @$current[4 .. $#$current];
203 next unless _match($group, $current, $tree);
204 $one ? return $current : push @results, $current;
207 return $one ? undef : \@results;
211 my ($selector, $current) = @_;
213 for my $s (@$selector) {
217 if ($type eq 'tag') { return undef unless $current->[1] =~ $s->[1] }
220 elsif ($type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) }
223 elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current) }
230 my ($selectors, $current, $tree, $immediate, $pos) = @_;
233 for my $sibling (@{_siblings($current)}) {
234 return $found if $sibling eq $current;
236 # "+" (immediately preceding sibling)
237 if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $pos) }
239 # "~" (preceding sibling)
240 else { return 1 if _combinator($selectors, $sibling, $tree, $pos) }
247 my ($current, $type) = @_;
249 my $parent = $current->[3];
250 my @siblings = grep { $_->[0] eq 'tag' }
251 @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent];
252 @siblings = grep { $type eq $_->[1] } @siblings if defined $type;
260 # Remove escaped newlines
263 # Unescape Unicode characters
264 $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge;
273 my ($op, $value, $insensitive) = @_;
274 return undef unless defined $value;
275 $value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value);
278 return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~';
281 return qr/$value/ if $op eq '*';
284 return qr/^$value/ if $op eq '^';
287 return qr/$value$/ if $op eq '$';
299 DOM::Tiny::CSS - CSS selector engine
305 # Select elements from DOM tree
306 my $css = DOM::Tiny::CSS->new(tree => $tree);
307 my $elements = $css->select('h1, h2, h3');
311 L<DOM::Tiny::CSS> is the CSS selector engine used by L<DOM::Tiny> based on
312 L<Mojo::DOM::CSS>, which is based on L<Selectors Level 3|http://www.w3.org/TR/css3-selectors/>.
316 All CSS selectors that make sense for a standalone parser are supported.
322 my $all = $css->select('*');
326 An element of type C<E>.
328 my $title = $css->select('title');
332 An C<E> element with a C<foo> attribute.
334 my $links = $css->select('a[href]');
338 An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
340 my $case_sensitive = $css->select('input[type="hidden"]');
341 my $case_sensitive = $css->select('input[type=hidden]');
343 =head2 E[foo="bar" i]
345 An C<E> element whose C<foo> attribute value is exactly equal to any
346 (ASCII-range) case-permutation of C<bar>. Note that this selector is
347 EXPERIMENTAL and might change without warning!
349 my $case_insensitive = $css->select('input[type="hidden" i]');
350 my $case_insensitive = $css->select('input[type=hidden i]');
351 my $case_insensitive = $css->select('input[class~="foo" i]');
353 This selector is part of
354 L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
359 An C<E> element whose C<foo> attribute value is a list of whitespace-separated
360 values, one of which is exactly equal to C<bar>.
362 my $foo = $css->select('input[class~="foo"]');
363 my $foo = $css->select('input[class~=foo]');
367 An C<E> element whose C<foo> attribute value begins exactly with the string
370 my $begins_with = $css->select('input[name^="f"]');
371 my $begins_with = $css->select('input[name^=f]');
375 An C<E> element whose C<foo> attribute value ends exactly with the string
378 my $ends_with = $css->select('input[name$="o"]');
379 my $ends_with = $css->select('input[name$=o]');
383 An C<E> element whose C<foo> attribute value contains the substring C<bar>.
385 my $contains = $css->select('input[name*="fo"]');
386 my $contains = $css->select('input[name*=fo]');
390 An C<E> element, root of the document.
392 my $root = $css->select(':root');
394 =head2 E:nth-child(n)
396 An C<E> element, the C<n-th> child of its parent.
398 my $third = $css->select('div:nth-child(3)');
399 my $odd = $css->select('div:nth-child(odd)');
400 my $even = $css->select('div:nth-child(even)');
401 my $top3 = $css->select('div:nth-child(-n+3)');
403 =head2 E:nth-last-child(n)
405 An C<E> element, the C<n-th> child of its parent, counting from the last one.
407 my $third = $css->select('div:nth-last-child(3)');
408 my $odd = $css->select('div:nth-last-child(odd)');
409 my $even = $css->select('div:nth-last-child(even)');
410 my $bottom3 = $css->select('div:nth-last-child(-n+3)');
412 =head2 E:nth-of-type(n)
414 An C<E> element, the C<n-th> sibling of its type.
416 my $third = $css->select('div:nth-of-type(3)');
417 my $odd = $css->select('div:nth-of-type(odd)');
418 my $even = $css->select('div:nth-of-type(even)');
419 my $top3 = $css->select('div:nth-of-type(-n+3)');
421 =head2 E:nth-last-of-type(n)
423 An C<E> element, the C<n-th> sibling of its type, counting from the last one.
425 my $third = $css->select('div:nth-last-of-type(3)');
426 my $odd = $css->select('div:nth-last-of-type(odd)');
427 my $even = $css->select('div:nth-last-of-type(even)');
428 my $bottom3 = $css->select('div:nth-last-of-type(-n+3)');
432 An C<E> element, first child of its parent.
434 my $first = $css->select('div p:first-child');
438 An C<E> element, last child of its parent.
440 my $last = $css->select('div p:last-child');
442 =head2 E:first-of-type
444 An C<E> element, first sibling of its type.
446 my $first = $css->select('div p:first-of-type');
448 =head2 E:last-of-type
450 An C<E> element, last sibling of its type.
452 my $last = $css->select('div p:last-of-type');
456 An C<E> element, only child of its parent.
458 my $lonely = $css->select('div p:only-child');
460 =head2 E:only-of-type
462 An C<E> element, only sibling of its type.
464 my $lonely = $css->select('div p:only-of-type');
468 An C<E> element that has no children (including text nodes).
470 my $empty = $css->select(':empty');
474 A user interface element C<E> which is checked (for instance a radio-button or
477 my $input = $css->select(':checked');
481 An C<E> element whose class is "warning".
483 my $warning = $css->select('div.warning');
487 An C<E> element with C<ID> equal to "myid".
489 my $foo = $css->select('div#foo');
493 An C<E> element that does not match simple selector C<s>.
495 my $others = $css->select('div p:not(:first-child)');
499 An C<F> element descendant of an C<E> element.
501 my $headlines = $css->select('div h1');
505 An C<F> element child of an C<E> element.
507 my $headlines = $css->select('html > body > div > h1');
511 An C<F> element immediately preceded by an C<E> element.
513 my $second = $css->select('h1 + h2');
517 An C<F> element preceded by an C<E> element.
519 my $second = $css->select('h1 ~ h2');
523 Elements of type C<E>, C<F> and C<G>.
525 my $headlines = $css->select('h1, h2, h3');
527 =head2 E[foo=bar][bar=baz]
529 An C<E> element whose attributes match all following attribute selectors.
531 my $links = $css->select('a[foo^=b][foo$=ar]');
535 L<DOM::Tiny::CSS> implements the following attributes.
539 my $tree = $css->tree;
540 $css = $css->tree(['root']);
542 Document Object Model. Note that this structure should only be used very
543 carefully since it is very dynamic.
547 L<DOM::Tiny::CSS> inherits a constructor from L<Class::Tiny::Object|Class::Tiny/"Object construction">,
548 and implements the following methods.
552 my $bool = $css->matches('head > title');
554 Check if first node in L</"tree"> matches the CSS selector.
558 my $results = $css->select('head > title');
560 Run CSS selector against L</"tree">.
564 my $result = $css->select_one('head > title');
566 Run CSS selector against L</"tree"> and stop as soon as the first node matched.
570 Report any issues on the public bugtracker.
574 Dan Book <dbook@cpan.org>
576 =head1 COPYRIGHT AND LICENSE
578 This software is Copyright (c) 2015 by Dan Book.
580 This is free software, licensed under:
582 The Artistic License 2.0 (GPL Compatible)