Commit | Line | Data |
d6512b50 |
1 | package DOM::Tiny::CSS; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Class::Tiny::Chained 'tree'; |
6 | |
7 | our $VERSION = '0.001'; |
8 | |
9 | my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/; |
10 | my $ATTR_RE = qr/ |
11 | \[ |
12 | ((?:$ESCAPE_RE|[\w\-])+) # Key |
13 | (?: |
14 | (\W)?= # Operator |
15 | (?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value |
16 | (?:\s+(i))? # Case-sensitivity |
17 | )? |
18 | \] |
19 | /x; |
20 | |
21 | sub matches { |
22 | my $tree = shift->tree; |
23 | return $tree->[0] ne 'tag' ? undef : _match(_compile(shift), $tree, $tree); |
24 | } |
25 | |
26 | sub select { _select(0, shift->tree, _compile(@_)) } |
27 | sub select_one { _select(1, shift->tree, _compile(@_)) } |
28 | |
29 | sub _ancestor { |
30 | my ($selectors, $current, $tree, $one, $pos) = @_; |
31 | |
32 | while ($current = $current->[3]) { |
33 | return undef if $current->[0] eq 'root' || $current eq $tree; |
34 | return 1 if _combinator($selectors, $current, $tree, $pos); |
35 | last if $one; |
36 | } |
37 | |
38 | return undef; |
39 | } |
40 | |
41 | sub _attr { |
42 | my ($name_re, $value_re, $current) = @_; |
43 | |
44 | my $attrs = $current->[2]; |
45 | for my $name (keys %$attrs) { |
46 | next unless $name =~ $name_re; |
47 | return 1 unless defined $attrs->{$name} && defined $value_re; |
48 | return 1 if $attrs->{$name} =~ $value_re; |
49 | } |
50 | |
51 | return undef; |
52 | } |
53 | |
54 | sub _combinator { |
55 | my ($selectors, $current, $tree, $pos) = @_; |
56 | |
57 | # Selector |
58 | return undef unless my $c = $selectors->[$pos]; |
59 | if (ref $c) { |
60 | return undef unless _selector($c, $current); |
61 | return 1 unless $c = $selectors->[++$pos]; |
62 | } |
63 | |
64 | # ">" (parent only) |
65 | return _ancestor($selectors, $current, $tree, 1, ++$pos) if $c eq '>'; |
66 | |
67 | # "~" (preceding siblings) |
68 | return _sibling($selectors, $current, $tree, 0, ++$pos) if $c eq '~'; |
69 | |
70 | # "+" (immediately preceding siblings) |
71 | return _sibling($selectors, $current, $tree, 1, ++$pos) if $c eq '+'; |
72 | |
73 | # " " (ancestor) |
74 | return _ancestor($selectors, $current, $tree, 0, ++$pos); |
75 | } |
76 | |
77 | sub _compile { |
78 | my $css = "$_[0]"; |
79 | $css =~ s/^\s+//; |
80 | $css =~ s/\s+$//; |
81 | |
82 | my $group = [[]]; |
83 | while (my $selectors = $group->[-1]) { |
84 | push @$selectors, [] unless @$selectors && ref $selectors->[-1]; |
85 | my $last = $selectors->[-1]; |
86 | |
87 | # Separator |
88 | if ($css =~ /\G\s*,\s*/gc) { push @$group, [] } |
89 | |
90 | # Combinator |
91 | elsif ($css =~ /\G\s*([ >+~])\s*/gc) { push @$selectors, $1 } |
92 | |
93 | # Class or ID |
94 | elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) { |
95 | my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', ''); |
96 | push @$last, ['attr', _name($name), _value($op, $2)]; |
97 | } |
98 | |
99 | # Attributes |
100 | elsif ($css =~ /\G$ATTR_RE/gco) { |
927f1351 |
101 | my $op = defined $2 ? $2 : ''; |
102 | my $value = defined $3 ? $3 : (defined $4 ? $4 : $5); |
103 | push @$last, ['attr', _name($1), _value($op, $value, $6)]; |
d6512b50 |
104 | } |
105 | |
106 | # Pseudo-class (":not" contains more selectors) |
107 | elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) { |
108 | push @$last, ['pc', lc $1, $1 eq 'not' ? _compile($2) : _equation($2)]; |
109 | } |
110 | |
111 | # Tag |
112 | elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) { |
113 | push @$last, ['tag', _name($1)] unless $1 eq '*'; |
114 | } |
115 | |
116 | else {last} |
117 | } |
118 | |
119 | return $group; |
120 | } |
121 | |
122 | sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' } |
123 | |
124 | sub _equation { |
125 | return [] unless my $equation = shift; |
126 | |
127 | # "even" |
128 | return [2, 2] if $equation =~ /^\s*even\s*$/i; |
129 | |
130 | # "odd" |
131 | return [2, 1] if $equation =~ /^\s*odd\s*$/i; |
132 | |
133 | # Equation |
134 | my $num = [1, 1]; |
135 | return $num if $equation !~ /(?:(-?(?:\d+)?)?(n))?\s*\+?\s*(-?\s*\d+)?\s*$/i; |
136 | $num->[0] = defined($1) && $1 ne '' ? $1 : $2 ? 1 : 0; |
137 | $num->[0] = -1 if $num->[0] eq '-'; |
927f1351 |
138 | $num->[1] = $3 || 0; |
d6512b50 |
139 | $num->[1] =~ s/\s+//g; |
140 | return $num; |
141 | } |
142 | |
143 | sub _match { |
144 | my ($group, $current, $tree) = @_; |
145 | _combinator([reverse @$_], $current, $tree, 0) and return 1 for @$group; |
146 | return undef; |
147 | } |
148 | |
149 | sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/} |
150 | |
151 | sub _pc { |
152 | my ($class, $args, $current) = @_; |
153 | |
154 | # ":empty" |
155 | return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty'; |
156 | |
157 | # ":root" |
158 | return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root'; |
159 | |
160 | # ":not" |
161 | return !_match($args, $current, $current) if $class eq 'not'; |
162 | |
163 | # ":checked" |
164 | return exists $current->[2]{checked} || exists $current->[2]{selected} |
165 | if $class eq 'checked'; |
166 | |
167 | # ":first-*" or ":last-*" (rewrite with equation) |
168 | ($class, $args) = $1 ? ("nth-$class", [0, 1]) : ("nth-last-$class", [-1, 1]) |
169 | if $class =~ s/^(?:(first)|last)-//; |
170 | |
171 | # ":nth-*" |
172 | if ($class =~ /^nth-/) { |
173 | my $type = $class =~ /of-type$/ ? $current->[1] : undef; |
174 | my @siblings = @{_siblings($current, $type)}; |
175 | |
176 | # ":nth-last-*" |
177 | @siblings = reverse @siblings if $class =~ /^nth-last/; |
178 | |
179 | for my $i (0 .. $#siblings) { |
180 | next if (my $result = $args->[0] * $i + $args->[1]) < 1; |
181 | last unless my $sibling = $siblings[$result - 1]; |
182 | return 1 if $sibling eq $current; |
183 | } |
184 | } |
185 | |
186 | # ":only-*" |
187 | elsif ($class =~ /^only-(?:child|(of-type))$/) { |
188 | $_ ne $current and return undef |
189 | for @{_siblings($current, $1 ? $current->[1] : undef)}; |
190 | return 1; |
191 | } |
192 | |
193 | return undef; |
194 | } |
195 | |
196 | sub _select { |
197 | my ($one, $tree, $group) = @_; |
198 | |
199 | my @results; |
200 | my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree]; |
201 | while (my $current = shift @queue) { |
202 | next unless $current->[0] eq 'tag'; |
203 | |
204 | unshift @queue, @$current[4 .. $#$current]; |
205 | next unless _match($group, $current, $tree); |
206 | $one ? return $current : push @results, $current; |
207 | } |
208 | |
209 | return $one ? undef : \@results; |
210 | } |
211 | |
212 | sub _selector { |
213 | my ($selector, $current) = @_; |
214 | |
215 | for my $s (@$selector) { |
216 | my $type = $s->[0]; |
217 | |
218 | # Tag |
219 | if ($type eq 'tag') { return undef unless $current->[1] =~ $s->[1] } |
220 | |
221 | # Attribute |
222 | elsif ($type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) } |
223 | |
224 | # Pseudo-class |
225 | elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current) } |
226 | } |
227 | |
228 | return 1; |
229 | } |
230 | |
231 | sub _sibling { |
232 | my ($selectors, $current, $tree, $immediate, $pos) = @_; |
233 | |
234 | my $found; |
235 | for my $sibling (@{_siblings($current)}) { |
236 | return $found if $sibling eq $current; |
237 | |
238 | # "+" (immediately preceding sibling) |
239 | if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $pos) } |
240 | |
241 | # "~" (preceding sibling) |
242 | else { return 1 if _combinator($selectors, $sibling, $tree, $pos) } |
243 | } |
244 | |
245 | return undef; |
246 | } |
247 | |
248 | sub _siblings { |
249 | my ($current, $type) = @_; |
250 | |
251 | my $parent = $current->[3]; |
252 | my @siblings = grep { $_->[0] eq 'tag' } |
253 | @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent]; |
254 | @siblings = grep { $type eq $_->[1] } @siblings if defined $type; |
255 | |
256 | return \@siblings; |
257 | } |
258 | |
259 | sub _unescape { |
260 | my $value = shift; |
261 | |
262 | # Remove escaped newlines |
263 | $value =~ s/\\\n//g; |
264 | |
265 | # Unescape Unicode characters |
266 | $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge; |
267 | |
268 | # Remove backslash |
269 | $value =~ s/\\//g; |
270 | |
271 | return $value; |
272 | } |
273 | |
274 | sub _value { |
275 | my ($op, $value, $insensitive) = @_; |
276 | return undef unless defined $value; |
277 | $value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value); |
278 | |
279 | # "~=" (word) |
280 | return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~'; |
281 | |
282 | # "*=" (contains) |
283 | return qr/$value/ if $op eq '*'; |
284 | |
285 | # "^=" (begins with) |
286 | return qr/^$value/ if $op eq '^'; |
287 | |
288 | # "$=" (ends with) |
289 | return qr/$value$/ if $op eq '$'; |
290 | |
291 | # Everything else |
292 | return qr/^$value$/; |
293 | } |
294 | |
295 | 1; |
296 | |
297 | =encoding utf8 |
298 | |
299 | =head1 NAME |
300 | |
301 | DOM::Tiny::CSS - CSS selector engine |
302 | |
303 | =head1 SYNOPSIS |
304 | |
305 | use DOM::Tiny::CSS; |
306 | |
307 | # Select elements from DOM tree |
308 | my $css = DOM::Tiny::CSS->new(tree => $tree); |
309 | my $elements = $css->select('h1, h2, h3'); |
310 | |
311 | =head1 DESCRIPTION |
312 | |
313 | L<DOM::Tiny::CSS> is the CSS selector engine used by L<DOM::Tiny> based on |
314 | L<Mojo::DOM::CSS>, which is based on L<Selectors Level 3|http://www.w3.org/TR/css3-selectors/>. |
315 | |
316 | =head1 SELECTORS |
317 | |
318 | All CSS selectors that make sense for a standalone parser are supported. |
319 | |
320 | =head2 * |
321 | |
322 | Any element. |
323 | |
324 | my $all = $css->select('*'); |
325 | |
326 | =head2 E |
327 | |
328 | An element of type C<E>. |
329 | |
330 | my $title = $css->select('title'); |
331 | |
332 | =head2 E[foo] |
333 | |
334 | An C<E> element with a C<foo> attribute. |
335 | |
336 | my $links = $css->select('a[href]'); |
337 | |
338 | =head2 E[foo="bar"] |
339 | |
340 | An C<E> element whose C<foo> attribute value is exactly equal to C<bar>. |
341 | |
342 | my $case_sensitive = $css->select('input[type="hidden"]'); |
343 | my $case_sensitive = $css->select('input[type=hidden]'); |
344 | |
345 | =head2 E[foo="bar" i] |
346 | |
347 | An C<E> element whose C<foo> attribute value is exactly equal to any |
348 | (ASCII-range) case-permutation of C<bar>. Note that this selector is |
349 | EXPERIMENTAL and might change without warning! |
350 | |
351 | my $case_insensitive = $css->select('input[type="hidden" i]'); |
352 | my $case_insensitive = $css->select('input[type=hidden i]'); |
353 | my $case_insensitive = $css->select('input[class~="foo" i]'); |
354 | |
355 | This selector is part of |
356 | L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work |
357 | in progress. |
358 | |
359 | =head2 E[foo~="bar"] |
360 | |
361 | An C<E> element whose C<foo> attribute value is a list of whitespace-separated |
362 | values, one of which is exactly equal to C<bar>. |
363 | |
364 | my $foo = $css->select('input[class~="foo"]'); |
365 | my $foo = $css->select('input[class~=foo]'); |
366 | |
367 | =head2 E[foo^="bar"] |
368 | |
369 | An C<E> element whose C<foo> attribute value begins exactly with the string |
370 | C<bar>. |
371 | |
372 | my $begins_with = $css->select('input[name^="f"]'); |
373 | my $begins_with = $css->select('input[name^=f]'); |
374 | |
375 | =head2 E[foo$="bar"] |
376 | |
377 | An C<E> element whose C<foo> attribute value ends exactly with the string |
378 | C<bar>. |
379 | |
380 | my $ends_with = $css->select('input[name$="o"]'); |
381 | my $ends_with = $css->select('input[name$=o]'); |
382 | |
383 | =head2 E[foo*="bar"] |
384 | |
385 | An C<E> element whose C<foo> attribute value contains the substring C<bar>. |
386 | |
387 | my $contains = $css->select('input[name*="fo"]'); |
388 | my $contains = $css->select('input[name*=fo]'); |
389 | |
390 | =head2 E:root |
391 | |
392 | An C<E> element, root of the document. |
393 | |
394 | my $root = $css->select(':root'); |
395 | |
396 | =head2 E:nth-child(n) |
397 | |
398 | An C<E> element, the C<n-th> child of its parent. |
399 | |
400 | my $third = $css->select('div:nth-child(3)'); |
401 | my $odd = $css->select('div:nth-child(odd)'); |
402 | my $even = $css->select('div:nth-child(even)'); |
403 | my $top3 = $css->select('div:nth-child(-n+3)'); |
404 | |
405 | =head2 E:nth-last-child(n) |
406 | |
407 | An C<E> element, the C<n-th> child of its parent, counting from the last one. |
408 | |
409 | my $third = $css->select('div:nth-last-child(3)'); |
410 | my $odd = $css->select('div:nth-last-child(odd)'); |
411 | my $even = $css->select('div:nth-last-child(even)'); |
412 | my $bottom3 = $css->select('div:nth-last-child(-n+3)'); |
413 | |
414 | =head2 E:nth-of-type(n) |
415 | |
416 | An C<E> element, the C<n-th> sibling of its type. |
417 | |
418 | my $third = $css->select('div:nth-of-type(3)'); |
419 | my $odd = $css->select('div:nth-of-type(odd)'); |
420 | my $even = $css->select('div:nth-of-type(even)'); |
421 | my $top3 = $css->select('div:nth-of-type(-n+3)'); |
422 | |
423 | =head2 E:nth-last-of-type(n) |
424 | |
425 | An C<E> element, the C<n-th> sibling of its type, counting from the last one. |
426 | |
427 | my $third = $css->select('div:nth-last-of-type(3)'); |
428 | my $odd = $css->select('div:nth-last-of-type(odd)'); |
429 | my $even = $css->select('div:nth-last-of-type(even)'); |
430 | my $bottom3 = $css->select('div:nth-last-of-type(-n+3)'); |
431 | |
432 | =head2 E:first-child |
433 | |
434 | An C<E> element, first child of its parent. |
435 | |
436 | my $first = $css->select('div p:first-child'); |
437 | |
438 | =head2 E:last-child |
439 | |
440 | An C<E> element, last child of its parent. |
441 | |
442 | my $last = $css->select('div p:last-child'); |
443 | |
444 | =head2 E:first-of-type |
445 | |
446 | An C<E> element, first sibling of its type. |
447 | |
448 | my $first = $css->select('div p:first-of-type'); |
449 | |
450 | =head2 E:last-of-type |
451 | |
452 | An C<E> element, last sibling of its type. |
453 | |
454 | my $last = $css->select('div p:last-of-type'); |
455 | |
456 | =head2 E:only-child |
457 | |
458 | An C<E> element, only child of its parent. |
459 | |
460 | my $lonely = $css->select('div p:only-child'); |
461 | |
462 | =head2 E:only-of-type |
463 | |
464 | An C<E> element, only sibling of its type. |
465 | |
466 | my $lonely = $css->select('div p:only-of-type'); |
467 | |
468 | =head2 E:empty |
469 | |
470 | An C<E> element that has no children (including text nodes). |
471 | |
472 | my $empty = $css->select(':empty'); |
473 | |
474 | =head2 E:checked |
475 | |
476 | A user interface element C<E> which is checked (for instance a radio-button or |
477 | checkbox). |
478 | |
479 | my $input = $css->select(':checked'); |
480 | |
481 | =head2 E.warning |
482 | |
483 | An C<E> element whose class is "warning". |
484 | |
485 | my $warning = $css->select('div.warning'); |
486 | |
487 | =head2 E#myid |
488 | |
489 | An C<E> element with C<ID> equal to "myid". |
490 | |
491 | my $foo = $css->select('div#foo'); |
492 | |
493 | =head2 E:not(s) |
494 | |
495 | An C<E> element that does not match simple selector C<s>. |
496 | |
497 | my $others = $css->select('div p:not(:first-child)'); |
498 | |
499 | =head2 E F |
500 | |
501 | An C<F> element descendant of an C<E> element. |
502 | |
503 | my $headlines = $css->select('div h1'); |
504 | |
505 | =head2 E E<gt> F |
506 | |
507 | An C<F> element child of an C<E> element. |
508 | |
509 | my $headlines = $css->select('html > body > div > h1'); |
510 | |
511 | =head2 E + F |
512 | |
513 | An C<F> element immediately preceded by an C<E> element. |
514 | |
515 | my $second = $css->select('h1 + h2'); |
516 | |
517 | =head2 E ~ F |
518 | |
519 | An C<F> element preceded by an C<E> element. |
520 | |
521 | my $second = $css->select('h1 ~ h2'); |
522 | |
523 | =head2 E, F, G |
524 | |
525 | Elements of type C<E>, C<F> and C<G>. |
526 | |
527 | my $headlines = $css->select('h1, h2, h3'); |
528 | |
529 | =head2 E[foo=bar][bar=baz] |
530 | |
531 | An C<E> element whose attributes match all following attribute selectors. |
532 | |
533 | my $links = $css->select('a[foo^=b][foo$=ar]'); |
534 | |
535 | =head1 ATTRIBUTES |
536 | |
537 | L<DOM::Tiny::CSS> implements the following attributes. |
538 | |
539 | =head2 tree |
540 | |
541 | my $tree = $css->tree; |
542 | $css = $css->tree(['root']); |
543 | |
544 | Document Object Model. Note that this structure should only be used very |
545 | carefully since it is very dynamic. |
546 | |
547 | =head1 METHODS |
548 | |
549 | L<DOM::Tiny::CSS> implements the following methods. |
550 | |
551 | =head2 matches |
552 | |
553 | my $bool = $css->matches('head > title'); |
554 | |
555 | Check if first node in L</"tree"> matches the CSS selector. |
556 | |
557 | =head2 select |
558 | |
559 | my $results = $css->select('head > title'); |
560 | |
561 | Run CSS selector against L</"tree">. |
562 | |
563 | =head2 select_one |
564 | |
565 | my $result = $css->select_one('head > title'); |
566 | |
567 | Run CSS selector against L</"tree"> and stop as soon as the first node matched. |
568 | |
569 | =head1 BUGS |
570 | |
571 | Report any issues on the public bugtracker. |
572 | |
573 | =head1 AUTHOR |
574 | |
575 | Dan Book <dbook@cpan.org> |
576 | |
577 | =head1 COPYRIGHT AND LICENSE |
578 | |
579 | This software is Copyright (c) 2015 by Dan Book. |
580 | |
581 | This is free software, licensed under: |
582 | |
583 | The Artistic License 2.0 (GPL Compatible) |
584 | |
585 | =head1 SEE ALSO |
586 | |
587 | L<Mojo::DOM::CSS> |