Commit | Line | Data |
d6512b50 |
1 | package DOM::Tiny::CSS; |
2 | |
3 | use strict; |
4 | use warnings; |
d6512b50 |
5 | |
6 | our $VERSION = '0.001'; |
7 | |
8 | my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/; |
9 | my $ATTR_RE = qr/ |
10 | \[ |
11 | ((?:$ESCAPE_RE|[\w\-])+) # Key |
12 | (?: |
13 | (\W)?= # Operator |
14 | (?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value |
15 | (?:\s+(i))? # Case-sensitivity |
16 | )? |
17 | \] |
18 | /x; |
19 | |
d066f9b8 |
20 | sub new { |
21 | my $class = shift; |
22 | bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class || $class; |
23 | } |
24 | |
25 | sub tree { |
26 | my $self = shift; |
27 | return $self->{tree} unless @_; |
28 | $self->{tree} = shift; |
29 | return $self; |
30 | } |
31 | |
d6512b50 |
32 | sub matches { |
33 | my $tree = shift->tree; |
34 | return $tree->[0] ne 'tag' ? undef : _match(_compile(shift), $tree, $tree); |
35 | } |
36 | |
37 | sub select { _select(0, shift->tree, _compile(@_)) } |
38 | sub select_one { _select(1, shift->tree, _compile(@_)) } |
39 | |
40 | sub _ancestor { |
41 | my ($selectors, $current, $tree, $one, $pos) = @_; |
42 | |
43 | while ($current = $current->[3]) { |
44 | return undef if $current->[0] eq 'root' || $current eq $tree; |
45 | return 1 if _combinator($selectors, $current, $tree, $pos); |
46 | last if $one; |
47 | } |
48 | |
49 | return undef; |
50 | } |
51 | |
52 | sub _attr { |
53 | my ($name_re, $value_re, $current) = @_; |
54 | |
55 | my $attrs = $current->[2]; |
56 | for my $name (keys %$attrs) { |
57 | next unless $name =~ $name_re; |
58 | return 1 unless defined $attrs->{$name} && defined $value_re; |
59 | return 1 if $attrs->{$name} =~ $value_re; |
60 | } |
61 | |
62 | return undef; |
63 | } |
64 | |
65 | sub _combinator { |
66 | my ($selectors, $current, $tree, $pos) = @_; |
67 | |
68 | # Selector |
69 | return undef unless my $c = $selectors->[$pos]; |
70 | if (ref $c) { |
71 | return undef unless _selector($c, $current); |
72 | return 1 unless $c = $selectors->[++$pos]; |
73 | } |
74 | |
75 | # ">" (parent only) |
76 | return _ancestor($selectors, $current, $tree, 1, ++$pos) if $c eq '>'; |
77 | |
78 | # "~" (preceding siblings) |
79 | return _sibling($selectors, $current, $tree, 0, ++$pos) if $c eq '~'; |
80 | |
81 | # "+" (immediately preceding siblings) |
82 | return _sibling($selectors, $current, $tree, 1, ++$pos) if $c eq '+'; |
83 | |
84 | # " " (ancestor) |
85 | return _ancestor($selectors, $current, $tree, 0, ++$pos); |
86 | } |
87 | |
88 | sub _compile { |
89 | my $css = "$_[0]"; |
90 | $css =~ s/^\s+//; |
91 | $css =~ s/\s+$//; |
92 | |
93 | my $group = [[]]; |
94 | while (my $selectors = $group->[-1]) { |
95 | push @$selectors, [] unless @$selectors && ref $selectors->[-1]; |
96 | my $last = $selectors->[-1]; |
97 | |
98 | # Separator |
99 | if ($css =~ /\G\s*,\s*/gc) { push @$group, [] } |
100 | |
101 | # Combinator |
102 | elsif ($css =~ /\G\s*([ >+~])\s*/gc) { push @$selectors, $1 } |
103 | |
104 | # Class or ID |
105 | elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) { |
106 | my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', ''); |
107 | push @$last, ['attr', _name($name), _value($op, $2)]; |
108 | } |
109 | |
110 | # Attributes |
111 | elsif ($css =~ /\G$ATTR_RE/gco) { |
91880340 |
112 | push @$last, ['attr', _name($1), _value($2 // '', $3 // $4 // $5, $6)]; |
d6512b50 |
113 | } |
114 | |
115 | # Pseudo-class (":not" contains more selectors) |
116 | elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) { |
117 | push @$last, ['pc', lc $1, $1 eq 'not' ? _compile($2) : _equation($2)]; |
118 | } |
119 | |
120 | # Tag |
121 | elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) { |
122 | push @$last, ['tag', _name($1)] unless $1 eq '*'; |
123 | } |
124 | |
125 | else {last} |
126 | } |
127 | |
128 | return $group; |
129 | } |
130 | |
131 | sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' } |
132 | |
133 | sub _equation { |
134 | return [] unless my $equation = shift; |
135 | |
136 | # "even" |
137 | return [2, 2] if $equation =~ /^\s*even\s*$/i; |
138 | |
139 | # "odd" |
140 | return [2, 1] if $equation =~ /^\s*odd\s*$/i; |
141 | |
142 | # Equation |
143 | my $num = [1, 1]; |
144 | return $num if $equation !~ /(?:(-?(?:\d+)?)?(n))?\s*\+?\s*(-?\s*\d+)?\s*$/i; |
145 | $num->[0] = defined($1) && $1 ne '' ? $1 : $2 ? 1 : 0; |
146 | $num->[0] = -1 if $num->[0] eq '-'; |
91880340 |
147 | $num->[1] = $3 // 0; |
d6512b50 |
148 | $num->[1] =~ s/\s+//g; |
149 | return $num; |
150 | } |
151 | |
152 | sub _match { |
153 | my ($group, $current, $tree) = @_; |
154 | _combinator([reverse @$_], $current, $tree, 0) and return 1 for @$group; |
155 | return undef; |
156 | } |
157 | |
158 | sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/} |
159 | |
160 | sub _pc { |
161 | my ($class, $args, $current) = @_; |
162 | |
163 | # ":empty" |
164 | return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty'; |
165 | |
166 | # ":root" |
167 | return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root'; |
168 | |
169 | # ":not" |
170 | return !_match($args, $current, $current) if $class eq 'not'; |
171 | |
172 | # ":checked" |
173 | return exists $current->[2]{checked} || exists $current->[2]{selected} |
174 | if $class eq 'checked'; |
175 | |
176 | # ":first-*" or ":last-*" (rewrite with equation) |
177 | ($class, $args) = $1 ? ("nth-$class", [0, 1]) : ("nth-last-$class", [-1, 1]) |
178 | if $class =~ s/^(?:(first)|last)-//; |
179 | |
180 | # ":nth-*" |
181 | if ($class =~ /^nth-/) { |
182 | my $type = $class =~ /of-type$/ ? $current->[1] : undef; |
183 | my @siblings = @{_siblings($current, $type)}; |
184 | |
185 | # ":nth-last-*" |
186 | @siblings = reverse @siblings if $class =~ /^nth-last/; |
187 | |
188 | for my $i (0 .. $#siblings) { |
189 | next if (my $result = $args->[0] * $i + $args->[1]) < 1; |
190 | last unless my $sibling = $siblings[$result - 1]; |
191 | return 1 if $sibling eq $current; |
192 | } |
193 | } |
194 | |
195 | # ":only-*" |
196 | elsif ($class =~ /^only-(?:child|(of-type))$/) { |
197 | $_ ne $current and return undef |
198 | for @{_siblings($current, $1 ? $current->[1] : undef)}; |
199 | return 1; |
200 | } |
201 | |
202 | return undef; |
203 | } |
204 | |
205 | sub _select { |
206 | my ($one, $tree, $group) = @_; |
207 | |
208 | my @results; |
209 | my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree]; |
210 | while (my $current = shift @queue) { |
211 | next unless $current->[0] eq 'tag'; |
212 | |
213 | unshift @queue, @$current[4 .. $#$current]; |
214 | next unless _match($group, $current, $tree); |
215 | $one ? return $current : push @results, $current; |
216 | } |
217 | |
218 | return $one ? undef : \@results; |
219 | } |
220 | |
221 | sub _selector { |
222 | my ($selector, $current) = @_; |
223 | |
224 | for my $s (@$selector) { |
225 | my $type = $s->[0]; |
226 | |
227 | # Tag |
228 | if ($type eq 'tag') { return undef unless $current->[1] =~ $s->[1] } |
229 | |
230 | # Attribute |
231 | elsif ($type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) } |
232 | |
233 | # Pseudo-class |
234 | elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current) } |
235 | } |
236 | |
237 | return 1; |
238 | } |
239 | |
240 | sub _sibling { |
241 | my ($selectors, $current, $tree, $immediate, $pos) = @_; |
242 | |
243 | my $found; |
244 | for my $sibling (@{_siblings($current)}) { |
245 | return $found if $sibling eq $current; |
246 | |
247 | # "+" (immediately preceding sibling) |
248 | if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $pos) } |
249 | |
250 | # "~" (preceding sibling) |
251 | else { return 1 if _combinator($selectors, $sibling, $tree, $pos) } |
252 | } |
253 | |
254 | return undef; |
255 | } |
256 | |
257 | sub _siblings { |
258 | my ($current, $type) = @_; |
259 | |
260 | my $parent = $current->[3]; |
261 | my @siblings = grep { $_->[0] eq 'tag' } |
262 | @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent]; |
263 | @siblings = grep { $type eq $_->[1] } @siblings if defined $type; |
264 | |
265 | return \@siblings; |
266 | } |
267 | |
268 | sub _unescape { |
269 | my $value = shift; |
270 | |
271 | # Remove escaped newlines |
272 | $value =~ s/\\\n//g; |
273 | |
274 | # Unescape Unicode characters |
275 | $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge; |
276 | |
277 | # Remove backslash |
278 | $value =~ s/\\//g; |
279 | |
280 | return $value; |
281 | } |
282 | |
283 | sub _value { |
284 | my ($op, $value, $insensitive) = @_; |
285 | return undef unless defined $value; |
286 | $value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value); |
287 | |
288 | # "~=" (word) |
289 | return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~'; |
290 | |
291 | # "*=" (contains) |
292 | return qr/$value/ if $op eq '*'; |
293 | |
294 | # "^=" (begins with) |
295 | return qr/^$value/ if $op eq '^'; |
296 | |
297 | # "$=" (ends with) |
298 | return qr/$value$/ if $op eq '$'; |
299 | |
300 | # Everything else |
301 | return qr/^$value$/; |
302 | } |
303 | |
304 | 1; |
305 | |
306 | =encoding utf8 |
307 | |
308 | =head1 NAME |
309 | |
310 | DOM::Tiny::CSS - CSS selector engine |
311 | |
312 | =head1 SYNOPSIS |
313 | |
314 | use DOM::Tiny::CSS; |
315 | |
316 | # Select elements from DOM tree |
317 | my $css = DOM::Tiny::CSS->new(tree => $tree); |
318 | my $elements = $css->select('h1, h2, h3'); |
319 | |
320 | =head1 DESCRIPTION |
321 | |
322 | L<DOM::Tiny::CSS> is the CSS selector engine used by L<DOM::Tiny> based on |
323 | L<Mojo::DOM::CSS>, which is based on L<Selectors Level 3|http://www.w3.org/TR/css3-selectors/>. |
324 | |
325 | =head1 SELECTORS |
326 | |
327 | All CSS selectors that make sense for a standalone parser are supported. |
328 | |
329 | =head2 * |
330 | |
331 | Any element. |
332 | |
333 | my $all = $css->select('*'); |
334 | |
335 | =head2 E |
336 | |
337 | An element of type C<E>. |
338 | |
339 | my $title = $css->select('title'); |
340 | |
341 | =head2 E[foo] |
342 | |
343 | An C<E> element with a C<foo> attribute. |
344 | |
345 | my $links = $css->select('a[href]'); |
346 | |
347 | =head2 E[foo="bar"] |
348 | |
349 | An C<E> element whose C<foo> attribute value is exactly equal to C<bar>. |
350 | |
351 | my $case_sensitive = $css->select('input[type="hidden"]'); |
352 | my $case_sensitive = $css->select('input[type=hidden]'); |
353 | |
354 | =head2 E[foo="bar" i] |
355 | |
356 | An C<E> element whose C<foo> attribute value is exactly equal to any |
357 | (ASCII-range) case-permutation of C<bar>. Note that this selector is |
358 | EXPERIMENTAL and might change without warning! |
359 | |
360 | my $case_insensitive = $css->select('input[type="hidden" i]'); |
361 | my $case_insensitive = $css->select('input[type=hidden i]'); |
362 | my $case_insensitive = $css->select('input[class~="foo" i]'); |
363 | |
364 | This selector is part of |
365 | L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work |
366 | in progress. |
367 | |
368 | =head2 E[foo~="bar"] |
369 | |
370 | An C<E> element whose C<foo> attribute value is a list of whitespace-separated |
371 | values, one of which is exactly equal to C<bar>. |
372 | |
373 | my $foo = $css->select('input[class~="foo"]'); |
374 | my $foo = $css->select('input[class~=foo]'); |
375 | |
376 | =head2 E[foo^="bar"] |
377 | |
378 | An C<E> element whose C<foo> attribute value begins exactly with the string |
379 | C<bar>. |
380 | |
381 | my $begins_with = $css->select('input[name^="f"]'); |
382 | my $begins_with = $css->select('input[name^=f]'); |
383 | |
384 | =head2 E[foo$="bar"] |
385 | |
386 | An C<E> element whose C<foo> attribute value ends exactly with the string |
387 | C<bar>. |
388 | |
389 | my $ends_with = $css->select('input[name$="o"]'); |
390 | my $ends_with = $css->select('input[name$=o]'); |
391 | |
392 | =head2 E[foo*="bar"] |
393 | |
394 | An C<E> element whose C<foo> attribute value contains the substring C<bar>. |
395 | |
396 | my $contains = $css->select('input[name*="fo"]'); |
397 | my $contains = $css->select('input[name*=fo]'); |
398 | |
399 | =head2 E:root |
400 | |
401 | An C<E> element, root of the document. |
402 | |
403 | my $root = $css->select(':root'); |
404 | |
405 | =head2 E:nth-child(n) |
406 | |
407 | An C<E> element, the C<n-th> child of its parent. |
408 | |
409 | my $third = $css->select('div:nth-child(3)'); |
410 | my $odd = $css->select('div:nth-child(odd)'); |
411 | my $even = $css->select('div:nth-child(even)'); |
412 | my $top3 = $css->select('div:nth-child(-n+3)'); |
413 | |
414 | =head2 E:nth-last-child(n) |
415 | |
416 | An C<E> element, the C<n-th> child of its parent, counting from the last one. |
417 | |
418 | my $third = $css->select('div:nth-last-child(3)'); |
419 | my $odd = $css->select('div:nth-last-child(odd)'); |
420 | my $even = $css->select('div:nth-last-child(even)'); |
421 | my $bottom3 = $css->select('div:nth-last-child(-n+3)'); |
422 | |
423 | =head2 E:nth-of-type(n) |
424 | |
425 | An C<E> element, the C<n-th> sibling of its type. |
426 | |
427 | my $third = $css->select('div:nth-of-type(3)'); |
428 | my $odd = $css->select('div:nth-of-type(odd)'); |
429 | my $even = $css->select('div:nth-of-type(even)'); |
430 | my $top3 = $css->select('div:nth-of-type(-n+3)'); |
431 | |
432 | =head2 E:nth-last-of-type(n) |
433 | |
434 | An C<E> element, the C<n-th> sibling of its type, counting from the last one. |
435 | |
436 | my $third = $css->select('div:nth-last-of-type(3)'); |
437 | my $odd = $css->select('div:nth-last-of-type(odd)'); |
438 | my $even = $css->select('div:nth-last-of-type(even)'); |
439 | my $bottom3 = $css->select('div:nth-last-of-type(-n+3)'); |
440 | |
441 | =head2 E:first-child |
442 | |
443 | An C<E> element, first child of its parent. |
444 | |
445 | my $first = $css->select('div p:first-child'); |
446 | |
447 | =head2 E:last-child |
448 | |
449 | An C<E> element, last child of its parent. |
450 | |
451 | my $last = $css->select('div p:last-child'); |
452 | |
453 | =head2 E:first-of-type |
454 | |
455 | An C<E> element, first sibling of its type. |
456 | |
457 | my $first = $css->select('div p:first-of-type'); |
458 | |
459 | =head2 E:last-of-type |
460 | |
461 | An C<E> element, last sibling of its type. |
462 | |
463 | my $last = $css->select('div p:last-of-type'); |
464 | |
465 | =head2 E:only-child |
466 | |
467 | An C<E> element, only child of its parent. |
468 | |
469 | my $lonely = $css->select('div p:only-child'); |
470 | |
471 | =head2 E:only-of-type |
472 | |
473 | An C<E> element, only sibling of its type. |
474 | |
475 | my $lonely = $css->select('div p:only-of-type'); |
476 | |
477 | =head2 E:empty |
478 | |
479 | An C<E> element that has no children (including text nodes). |
480 | |
481 | my $empty = $css->select(':empty'); |
482 | |
483 | =head2 E:checked |
484 | |
485 | A user interface element C<E> which is checked (for instance a radio-button or |
486 | checkbox). |
487 | |
488 | my $input = $css->select(':checked'); |
489 | |
490 | =head2 E.warning |
491 | |
492 | An C<E> element whose class is "warning". |
493 | |
494 | my $warning = $css->select('div.warning'); |
495 | |
496 | =head2 E#myid |
497 | |
498 | An C<E> element with C<ID> equal to "myid". |
499 | |
500 | my $foo = $css->select('div#foo'); |
501 | |
502 | =head2 E:not(s) |
503 | |
504 | An C<E> element that does not match simple selector C<s>. |
505 | |
506 | my $others = $css->select('div p:not(:first-child)'); |
507 | |
508 | =head2 E F |
509 | |
510 | An C<F> element descendant of an C<E> element. |
511 | |
512 | my $headlines = $css->select('div h1'); |
513 | |
514 | =head2 E E<gt> F |
515 | |
516 | An C<F> element child of an C<E> element. |
517 | |
518 | my $headlines = $css->select('html > body > div > h1'); |
519 | |
520 | =head2 E + F |
521 | |
522 | An C<F> element immediately preceded by an C<E> element. |
523 | |
524 | my $second = $css->select('h1 + h2'); |
525 | |
526 | =head2 E ~ F |
527 | |
528 | An C<F> element preceded by an C<E> element. |
529 | |
530 | my $second = $css->select('h1 ~ h2'); |
531 | |
532 | =head2 E, F, G |
533 | |
534 | Elements of type C<E>, C<F> and C<G>. |
535 | |
536 | my $headlines = $css->select('h1, h2, h3'); |
537 | |
538 | =head2 E[foo=bar][bar=baz] |
539 | |
540 | An C<E> element whose attributes match all following attribute selectors. |
541 | |
542 | my $links = $css->select('a[foo^=b][foo$=ar]'); |
543 | |
544 | =head1 ATTRIBUTES |
545 | |
546 | L<DOM::Tiny::CSS> implements the following attributes. |
547 | |
548 | =head2 tree |
549 | |
550 | my $tree = $css->tree; |
551 | $css = $css->tree(['root']); |
552 | |
553 | Document Object Model. Note that this structure should only be used very |
554 | carefully since it is very dynamic. |
555 | |
556 | =head1 METHODS |
557 | |
d066f9b8 |
558 | L<DOM::Tiny::CSS> implements the following methods. |
559 | |
560 | =head2 new |
561 | |
562 | my $css = DOM::Tiny::CSS->new; |
563 | my $css = DOM::Tiny::CSS->new(tree => $tree); |
564 | my $css = DOM::Tiny::CSS->new({tree => $tree}); |
565 | |
566 | Construct a new hash-based L<DOM::Tiny::CSS> object. |
d6512b50 |
567 | |
568 | =head2 matches |
569 | |
570 | my $bool = $css->matches('head > title'); |
571 | |
572 | Check if first node in L</"tree"> matches the CSS selector. |
573 | |
574 | =head2 select |
575 | |
576 | my $results = $css->select('head > title'); |
577 | |
578 | Run CSS selector against L</"tree">. |
579 | |
580 | =head2 select_one |
581 | |
582 | my $result = $css->select_one('head > title'); |
583 | |
584 | Run CSS selector against L</"tree"> and stop as soon as the first node matched. |
585 | |
586 | =head1 BUGS |
587 | |
588 | Report any issues on the public bugtracker. |
589 | |
590 | =head1 AUTHOR |
591 | |
592 | Dan Book <dbook@cpan.org> |
593 | |
594 | =head1 COPYRIGHT AND LICENSE |
595 | |
596 | This software is Copyright (c) 2015 by Dan Book. |
597 | |
598 | This is free software, licensed under: |
599 | |
600 | The Artistic License 2.0 (GPL Compatible) |
601 | |
602 | =head1 SEE ALSO |
603 | |
604 | L<Mojo::DOM::CSS> |