Commit | Line | Data |
d6512b50 |
1 | package DOM::Tiny::CSS; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Class::Tiny::Chained 'tree'; |
6 | |
7 | our $VERSION = '0.001'; |
8 | |
9 | my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/; |
10 | my $ATTR_RE = qr/ |
11 | \[ |
12 | ((?:$ESCAPE_RE|[\w\-])+) # Key |
13 | (?: |
14 | (\W)?= # Operator |
15 | (?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value |
16 | (?:\s+(i))? # Case-sensitivity |
17 | )? |
18 | \] |
19 | /x; |
20 | |
21 | sub matches { |
22 | my $tree = shift->tree; |
23 | return $tree->[0] ne 'tag' ? undef : _match(_compile(shift), $tree, $tree); |
24 | } |
25 | |
26 | sub select { _select(0, shift->tree, _compile(@_)) } |
27 | sub select_one { _select(1, shift->tree, _compile(@_)) } |
28 | |
29 | sub _ancestor { |
30 | my ($selectors, $current, $tree, $one, $pos) = @_; |
31 | |
32 | while ($current = $current->[3]) { |
33 | return undef if $current->[0] eq 'root' || $current eq $tree; |
34 | return 1 if _combinator($selectors, $current, $tree, $pos); |
35 | last if $one; |
36 | } |
37 | |
38 | return undef; |
39 | } |
40 | |
41 | sub _attr { |
42 | my ($name_re, $value_re, $current) = @_; |
43 | |
44 | my $attrs = $current->[2]; |
45 | for my $name (keys %$attrs) { |
46 | next unless $name =~ $name_re; |
47 | return 1 unless defined $attrs->{$name} && defined $value_re; |
48 | return 1 if $attrs->{$name} =~ $value_re; |
49 | } |
50 | |
51 | return undef; |
52 | } |
53 | |
54 | sub _combinator { |
55 | my ($selectors, $current, $tree, $pos) = @_; |
56 | |
57 | # Selector |
58 | return undef unless my $c = $selectors->[$pos]; |
59 | if (ref $c) { |
60 | return undef unless _selector($c, $current); |
61 | return 1 unless $c = $selectors->[++$pos]; |
62 | } |
63 | |
64 | # ">" (parent only) |
65 | return _ancestor($selectors, $current, $tree, 1, ++$pos) if $c eq '>'; |
66 | |
67 | # "~" (preceding siblings) |
68 | return _sibling($selectors, $current, $tree, 0, ++$pos) if $c eq '~'; |
69 | |
70 | # "+" (immediately preceding siblings) |
71 | return _sibling($selectors, $current, $tree, 1, ++$pos) if $c eq '+'; |
72 | |
73 | # " " (ancestor) |
74 | return _ancestor($selectors, $current, $tree, 0, ++$pos); |
75 | } |
76 | |
77 | sub _compile { |
78 | my $css = "$_[0]"; |
79 | $css =~ s/^\s+//; |
80 | $css =~ s/\s+$//; |
81 | |
82 | my $group = [[]]; |
83 | while (my $selectors = $group->[-1]) { |
84 | push @$selectors, [] unless @$selectors && ref $selectors->[-1]; |
85 | my $last = $selectors->[-1]; |
86 | |
87 | # Separator |
88 | if ($css =~ /\G\s*,\s*/gc) { push @$group, [] } |
89 | |
90 | # Combinator |
91 | elsif ($css =~ /\G\s*([ >+~])\s*/gc) { push @$selectors, $1 } |
92 | |
93 | # Class or ID |
94 | elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) { |
95 | my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', ''); |
96 | push @$last, ['attr', _name($name), _value($op, $2)]; |
97 | } |
98 | |
99 | # Attributes |
100 | elsif ($css =~ /\G$ATTR_RE/gco) { |
91880340 |
101 | push @$last, ['attr', _name($1), _value($2 // '', $3 // $4 // $5, $6)]; |
d6512b50 |
102 | } |
103 | |
104 | # Pseudo-class (":not" contains more selectors) |
105 | elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) { |
106 | push @$last, ['pc', lc $1, $1 eq 'not' ? _compile($2) : _equation($2)]; |
107 | } |
108 | |
109 | # Tag |
110 | elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) { |
111 | push @$last, ['tag', _name($1)] unless $1 eq '*'; |
112 | } |
113 | |
114 | else {last} |
115 | } |
116 | |
117 | return $group; |
118 | } |
119 | |
120 | sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' } |
121 | |
122 | sub _equation { |
123 | return [] unless my $equation = shift; |
124 | |
125 | # "even" |
126 | return [2, 2] if $equation =~ /^\s*even\s*$/i; |
127 | |
128 | # "odd" |
129 | return [2, 1] if $equation =~ /^\s*odd\s*$/i; |
130 | |
131 | # Equation |
132 | my $num = [1, 1]; |
133 | return $num if $equation !~ /(?:(-?(?:\d+)?)?(n))?\s*\+?\s*(-?\s*\d+)?\s*$/i; |
134 | $num->[0] = defined($1) && $1 ne '' ? $1 : $2 ? 1 : 0; |
135 | $num->[0] = -1 if $num->[0] eq '-'; |
91880340 |
136 | $num->[1] = $3 // 0; |
d6512b50 |
137 | $num->[1] =~ s/\s+//g; |
138 | return $num; |
139 | } |
140 | |
141 | sub _match { |
142 | my ($group, $current, $tree) = @_; |
143 | _combinator([reverse @$_], $current, $tree, 0) and return 1 for @$group; |
144 | return undef; |
145 | } |
146 | |
147 | sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/} |
148 | |
149 | sub _pc { |
150 | my ($class, $args, $current) = @_; |
151 | |
152 | # ":empty" |
153 | return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty'; |
154 | |
155 | # ":root" |
156 | return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root'; |
157 | |
158 | # ":not" |
159 | return !_match($args, $current, $current) if $class eq 'not'; |
160 | |
161 | # ":checked" |
162 | return exists $current->[2]{checked} || exists $current->[2]{selected} |
163 | if $class eq 'checked'; |
164 | |
165 | # ":first-*" or ":last-*" (rewrite with equation) |
166 | ($class, $args) = $1 ? ("nth-$class", [0, 1]) : ("nth-last-$class", [-1, 1]) |
167 | if $class =~ s/^(?:(first)|last)-//; |
168 | |
169 | # ":nth-*" |
170 | if ($class =~ /^nth-/) { |
171 | my $type = $class =~ /of-type$/ ? $current->[1] : undef; |
172 | my @siblings = @{_siblings($current, $type)}; |
173 | |
174 | # ":nth-last-*" |
175 | @siblings = reverse @siblings if $class =~ /^nth-last/; |
176 | |
177 | for my $i (0 .. $#siblings) { |
178 | next if (my $result = $args->[0] * $i + $args->[1]) < 1; |
179 | last unless my $sibling = $siblings[$result - 1]; |
180 | return 1 if $sibling eq $current; |
181 | } |
182 | } |
183 | |
184 | # ":only-*" |
185 | elsif ($class =~ /^only-(?:child|(of-type))$/) { |
186 | $_ ne $current and return undef |
187 | for @{_siblings($current, $1 ? $current->[1] : undef)}; |
188 | return 1; |
189 | } |
190 | |
191 | return undef; |
192 | } |
193 | |
194 | sub _select { |
195 | my ($one, $tree, $group) = @_; |
196 | |
197 | my @results; |
198 | my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree]; |
199 | while (my $current = shift @queue) { |
200 | next unless $current->[0] eq 'tag'; |
201 | |
202 | unshift @queue, @$current[4 .. $#$current]; |
203 | next unless _match($group, $current, $tree); |
204 | $one ? return $current : push @results, $current; |
205 | } |
206 | |
207 | return $one ? undef : \@results; |
208 | } |
209 | |
210 | sub _selector { |
211 | my ($selector, $current) = @_; |
212 | |
213 | for my $s (@$selector) { |
214 | my $type = $s->[0]; |
215 | |
216 | # Tag |
217 | if ($type eq 'tag') { return undef unless $current->[1] =~ $s->[1] } |
218 | |
219 | # Attribute |
220 | elsif ($type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) } |
221 | |
222 | # Pseudo-class |
223 | elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current) } |
224 | } |
225 | |
226 | return 1; |
227 | } |
228 | |
229 | sub _sibling { |
230 | my ($selectors, $current, $tree, $immediate, $pos) = @_; |
231 | |
232 | my $found; |
233 | for my $sibling (@{_siblings($current)}) { |
234 | return $found if $sibling eq $current; |
235 | |
236 | # "+" (immediately preceding sibling) |
237 | if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $pos) } |
238 | |
239 | # "~" (preceding sibling) |
240 | else { return 1 if _combinator($selectors, $sibling, $tree, $pos) } |
241 | } |
242 | |
243 | return undef; |
244 | } |
245 | |
246 | sub _siblings { |
247 | my ($current, $type) = @_; |
248 | |
249 | my $parent = $current->[3]; |
250 | my @siblings = grep { $_->[0] eq 'tag' } |
251 | @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent]; |
252 | @siblings = grep { $type eq $_->[1] } @siblings if defined $type; |
253 | |
254 | return \@siblings; |
255 | } |
256 | |
257 | sub _unescape { |
258 | my $value = shift; |
259 | |
260 | # Remove escaped newlines |
261 | $value =~ s/\\\n//g; |
262 | |
263 | # Unescape Unicode characters |
264 | $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge; |
265 | |
266 | # Remove backslash |
267 | $value =~ s/\\//g; |
268 | |
269 | return $value; |
270 | } |
271 | |
272 | sub _value { |
273 | my ($op, $value, $insensitive) = @_; |
274 | return undef unless defined $value; |
275 | $value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value); |
276 | |
277 | # "~=" (word) |
278 | return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~'; |
279 | |
280 | # "*=" (contains) |
281 | return qr/$value/ if $op eq '*'; |
282 | |
283 | # "^=" (begins with) |
284 | return qr/^$value/ if $op eq '^'; |
285 | |
286 | # "$=" (ends with) |
287 | return qr/$value$/ if $op eq '$'; |
288 | |
289 | # Everything else |
290 | return qr/^$value$/; |
291 | } |
292 | |
293 | 1; |
294 | |
295 | =encoding utf8 |
296 | |
297 | =head1 NAME |
298 | |
299 | DOM::Tiny::CSS - CSS selector engine |
300 | |
301 | =head1 SYNOPSIS |
302 | |
303 | use DOM::Tiny::CSS; |
304 | |
305 | # Select elements from DOM tree |
306 | my $css = DOM::Tiny::CSS->new(tree => $tree); |
307 | my $elements = $css->select('h1, h2, h3'); |
308 | |
309 | =head1 DESCRIPTION |
310 | |
311 | L<DOM::Tiny::CSS> is the CSS selector engine used by L<DOM::Tiny> based on |
312 | L<Mojo::DOM::CSS>, which is based on L<Selectors Level 3|http://www.w3.org/TR/css3-selectors/>. |
313 | |
314 | =head1 SELECTORS |
315 | |
316 | All CSS selectors that make sense for a standalone parser are supported. |
317 | |
318 | =head2 * |
319 | |
320 | Any element. |
321 | |
322 | my $all = $css->select('*'); |
323 | |
324 | =head2 E |
325 | |
326 | An element of type C<E>. |
327 | |
328 | my $title = $css->select('title'); |
329 | |
330 | =head2 E[foo] |
331 | |
332 | An C<E> element with a C<foo> attribute. |
333 | |
334 | my $links = $css->select('a[href]'); |
335 | |
336 | =head2 E[foo="bar"] |
337 | |
338 | An C<E> element whose C<foo> attribute value is exactly equal to C<bar>. |
339 | |
340 | my $case_sensitive = $css->select('input[type="hidden"]'); |
341 | my $case_sensitive = $css->select('input[type=hidden]'); |
342 | |
343 | =head2 E[foo="bar" i] |
344 | |
345 | An C<E> element whose C<foo> attribute value is exactly equal to any |
346 | (ASCII-range) case-permutation of C<bar>. Note that this selector is |
347 | EXPERIMENTAL and might change without warning! |
348 | |
349 | my $case_insensitive = $css->select('input[type="hidden" i]'); |
350 | my $case_insensitive = $css->select('input[type=hidden i]'); |
351 | my $case_insensitive = $css->select('input[class~="foo" i]'); |
352 | |
353 | This selector is part of |
354 | L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work |
355 | in progress. |
356 | |
357 | =head2 E[foo~="bar"] |
358 | |
359 | An C<E> element whose C<foo> attribute value is a list of whitespace-separated |
360 | values, one of which is exactly equal to C<bar>. |
361 | |
362 | my $foo = $css->select('input[class~="foo"]'); |
363 | my $foo = $css->select('input[class~=foo]'); |
364 | |
365 | =head2 E[foo^="bar"] |
366 | |
367 | An C<E> element whose C<foo> attribute value begins exactly with the string |
368 | C<bar>. |
369 | |
370 | my $begins_with = $css->select('input[name^="f"]'); |
371 | my $begins_with = $css->select('input[name^=f]'); |
372 | |
373 | =head2 E[foo$="bar"] |
374 | |
375 | An C<E> element whose C<foo> attribute value ends exactly with the string |
376 | C<bar>. |
377 | |
378 | my $ends_with = $css->select('input[name$="o"]'); |
379 | my $ends_with = $css->select('input[name$=o]'); |
380 | |
381 | =head2 E[foo*="bar"] |
382 | |
383 | An C<E> element whose C<foo> attribute value contains the substring C<bar>. |
384 | |
385 | my $contains = $css->select('input[name*="fo"]'); |
386 | my $contains = $css->select('input[name*=fo]'); |
387 | |
388 | =head2 E:root |
389 | |
390 | An C<E> element, root of the document. |
391 | |
392 | my $root = $css->select(':root'); |
393 | |
394 | =head2 E:nth-child(n) |
395 | |
396 | An C<E> element, the C<n-th> child of its parent. |
397 | |
398 | my $third = $css->select('div:nth-child(3)'); |
399 | my $odd = $css->select('div:nth-child(odd)'); |
400 | my $even = $css->select('div:nth-child(even)'); |
401 | my $top3 = $css->select('div:nth-child(-n+3)'); |
402 | |
403 | =head2 E:nth-last-child(n) |
404 | |
405 | An C<E> element, the C<n-th> child of its parent, counting from the last one. |
406 | |
407 | my $third = $css->select('div:nth-last-child(3)'); |
408 | my $odd = $css->select('div:nth-last-child(odd)'); |
409 | my $even = $css->select('div:nth-last-child(even)'); |
410 | my $bottom3 = $css->select('div:nth-last-child(-n+3)'); |
411 | |
412 | =head2 E:nth-of-type(n) |
413 | |
414 | An C<E> element, the C<n-th> sibling of its type. |
415 | |
416 | my $third = $css->select('div:nth-of-type(3)'); |
417 | my $odd = $css->select('div:nth-of-type(odd)'); |
418 | my $even = $css->select('div:nth-of-type(even)'); |
419 | my $top3 = $css->select('div:nth-of-type(-n+3)'); |
420 | |
421 | =head2 E:nth-last-of-type(n) |
422 | |
423 | An C<E> element, the C<n-th> sibling of its type, counting from the last one. |
424 | |
425 | my $third = $css->select('div:nth-last-of-type(3)'); |
426 | my $odd = $css->select('div:nth-last-of-type(odd)'); |
427 | my $even = $css->select('div:nth-last-of-type(even)'); |
428 | my $bottom3 = $css->select('div:nth-last-of-type(-n+3)'); |
429 | |
430 | =head2 E:first-child |
431 | |
432 | An C<E> element, first child of its parent. |
433 | |
434 | my $first = $css->select('div p:first-child'); |
435 | |
436 | =head2 E:last-child |
437 | |
438 | An C<E> element, last child of its parent. |
439 | |
440 | my $last = $css->select('div p:last-child'); |
441 | |
442 | =head2 E:first-of-type |
443 | |
444 | An C<E> element, first sibling of its type. |
445 | |
446 | my $first = $css->select('div p:first-of-type'); |
447 | |
448 | =head2 E:last-of-type |
449 | |
450 | An C<E> element, last sibling of its type. |
451 | |
452 | my $last = $css->select('div p:last-of-type'); |
453 | |
454 | =head2 E:only-child |
455 | |
456 | An C<E> element, only child of its parent. |
457 | |
458 | my $lonely = $css->select('div p:only-child'); |
459 | |
460 | =head2 E:only-of-type |
461 | |
462 | An C<E> element, only sibling of its type. |
463 | |
464 | my $lonely = $css->select('div p:only-of-type'); |
465 | |
466 | =head2 E:empty |
467 | |
468 | An C<E> element that has no children (including text nodes). |
469 | |
470 | my $empty = $css->select(':empty'); |
471 | |
472 | =head2 E:checked |
473 | |
474 | A user interface element C<E> which is checked (for instance a radio-button or |
475 | checkbox). |
476 | |
477 | my $input = $css->select(':checked'); |
478 | |
479 | =head2 E.warning |
480 | |
481 | An C<E> element whose class is "warning". |
482 | |
483 | my $warning = $css->select('div.warning'); |
484 | |
485 | =head2 E#myid |
486 | |
487 | An C<E> element with C<ID> equal to "myid". |
488 | |
489 | my $foo = $css->select('div#foo'); |
490 | |
491 | =head2 E:not(s) |
492 | |
493 | An C<E> element that does not match simple selector C<s>. |
494 | |
495 | my $others = $css->select('div p:not(:first-child)'); |
496 | |
497 | =head2 E F |
498 | |
499 | An C<F> element descendant of an C<E> element. |
500 | |
501 | my $headlines = $css->select('div h1'); |
502 | |
503 | =head2 E E<gt> F |
504 | |
505 | An C<F> element child of an C<E> element. |
506 | |
507 | my $headlines = $css->select('html > body > div > h1'); |
508 | |
509 | =head2 E + F |
510 | |
511 | An C<F> element immediately preceded by an C<E> element. |
512 | |
513 | my $second = $css->select('h1 + h2'); |
514 | |
515 | =head2 E ~ F |
516 | |
517 | An C<F> element preceded by an C<E> element. |
518 | |
519 | my $second = $css->select('h1 ~ h2'); |
520 | |
521 | =head2 E, F, G |
522 | |
523 | Elements of type C<E>, C<F> and C<G>. |
524 | |
525 | my $headlines = $css->select('h1, h2, h3'); |
526 | |
527 | =head2 E[foo=bar][bar=baz] |
528 | |
529 | An C<E> element whose attributes match all following attribute selectors. |
530 | |
531 | my $links = $css->select('a[foo^=b][foo$=ar]'); |
532 | |
533 | =head1 ATTRIBUTES |
534 | |
535 | L<DOM::Tiny::CSS> implements the following attributes. |
536 | |
537 | =head2 tree |
538 | |
539 | my $tree = $css->tree; |
540 | $css = $css->tree(['root']); |
541 | |
542 | Document Object Model. Note that this structure should only be used very |
543 | carefully since it is very dynamic. |
544 | |
545 | =head1 METHODS |
546 | |
547 | L<DOM::Tiny::CSS> implements the following methods. |
548 | |
549 | =head2 matches |
550 | |
551 | my $bool = $css->matches('head > title'); |
552 | |
553 | Check if first node in L</"tree"> matches the CSS selector. |
554 | |
555 | =head2 select |
556 | |
557 | my $results = $css->select('head > title'); |
558 | |
559 | Run CSS selector against L</"tree">. |
560 | |
561 | =head2 select_one |
562 | |
563 | my $result = $css->select_one('head > title'); |
564 | |
565 | Run CSS selector against L</"tree"> and stop as soon as the first node matched. |
566 | |
567 | =head1 BUGS |
568 | |
569 | Report any issues on the public bugtracker. |
570 | |
571 | =head1 AUTHOR |
572 | |
573 | Dan Book <dbook@cpan.org> |
574 | |
575 | =head1 COPYRIGHT AND LICENSE |
576 | |
577 | This software is Copyright (c) 2015 by Dan Book. |
578 | |
579 | This is free software, licensed under: |
580 | |
581 | The Artistic License 2.0 (GPL Compatible) |
582 | |
583 | =head1 SEE ALSO |
584 | |
585 | L<Mojo::DOM::CSS> |