replace usage of Class::Tiny::Chained
[catagits/DOM-Tiny.git] / lib / DOM / Tiny / CSS.pm
CommitLineData
d6512b50 1package DOM::Tiny::CSS;
2
3use strict;
4use warnings;
d6512b50 5
6our $VERSION = '0.001';
7
8my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/;
9my $ATTR_RE = qr/
10 \[
11 ((?:$ESCAPE_RE|[\w\-])+) # Key
12 (?:
13 (\W)?= # Operator
14 (?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value
15 (?:\s+(i))? # Case-sensitivity
16 )?
17 \]
18/x;
19
d066f9b8 20sub new {
21 my $class = shift;
22 bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class || $class;
23}
24
25sub tree {
26 my $self = shift;
27 return $self->{tree} unless @_;
28 $self->{tree} = shift;
29 return $self;
30}
31
d6512b50 32sub matches {
33 my $tree = shift->tree;
34 return $tree->[0] ne 'tag' ? undef : _match(_compile(shift), $tree, $tree);
35}
36
37sub select { _select(0, shift->tree, _compile(@_)) }
38sub select_one { _select(1, shift->tree, _compile(@_)) }
39
40sub _ancestor {
41 my ($selectors, $current, $tree, $one, $pos) = @_;
42
43 while ($current = $current->[3]) {
44 return undef if $current->[0] eq 'root' || $current eq $tree;
45 return 1 if _combinator($selectors, $current, $tree, $pos);
46 last if $one;
47 }
48
49 return undef;
50}
51
52sub _attr {
53 my ($name_re, $value_re, $current) = @_;
54
55 my $attrs = $current->[2];
56 for my $name (keys %$attrs) {
57 next unless $name =~ $name_re;
58 return 1 unless defined $attrs->{$name} && defined $value_re;
59 return 1 if $attrs->{$name} =~ $value_re;
60 }
61
62 return undef;
63}
64
65sub _combinator {
66 my ($selectors, $current, $tree, $pos) = @_;
67
68 # Selector
69 return undef unless my $c = $selectors->[$pos];
70 if (ref $c) {
71 return undef unless _selector($c, $current);
72 return 1 unless $c = $selectors->[++$pos];
73 }
74
75 # ">" (parent only)
76 return _ancestor($selectors, $current, $tree, 1, ++$pos) if $c eq '>';
77
78 # "~" (preceding siblings)
79 return _sibling($selectors, $current, $tree, 0, ++$pos) if $c eq '~';
80
81 # "+" (immediately preceding siblings)
82 return _sibling($selectors, $current, $tree, 1, ++$pos) if $c eq '+';
83
84 # " " (ancestor)
85 return _ancestor($selectors, $current, $tree, 0, ++$pos);
86}
87
88sub _compile {
89 my $css = "$_[0]";
90 $css =~ s/^\s+//;
91 $css =~ s/\s+$//;
92
93 my $group = [[]];
94 while (my $selectors = $group->[-1]) {
95 push @$selectors, [] unless @$selectors && ref $selectors->[-1];
96 my $last = $selectors->[-1];
97
98 # Separator
99 if ($css =~ /\G\s*,\s*/gc) { push @$group, [] }
100
101 # Combinator
102 elsif ($css =~ /\G\s*([ >+~])\s*/gc) { push @$selectors, $1 }
103
104 # Class or ID
105 elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
106 my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', '');
107 push @$last, ['attr', _name($name), _value($op, $2)];
108 }
109
110 # Attributes
111 elsif ($css =~ /\G$ATTR_RE/gco) {
91880340 112 push @$last, ['attr', _name($1), _value($2 // '', $3 // $4 // $5, $6)];
d6512b50 113 }
114
115 # Pseudo-class (":not" contains more selectors)
116 elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) {
117 push @$last, ['pc', lc $1, $1 eq 'not' ? _compile($2) : _equation($2)];
118 }
119
120 # Tag
121 elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
122 push @$last, ['tag', _name($1)] unless $1 eq '*';
123 }
124
125 else {last}
126 }
127
128 return $group;
129}
130
131sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' }
132
133sub _equation {
134 return [] unless my $equation = shift;
135
136 # "even"
137 return [2, 2] if $equation =~ /^\s*even\s*$/i;
138
139 # "odd"
140 return [2, 1] if $equation =~ /^\s*odd\s*$/i;
141
142 # Equation
143 my $num = [1, 1];
144 return $num if $equation !~ /(?:(-?(?:\d+)?)?(n))?\s*\+?\s*(-?\s*\d+)?\s*$/i;
145 $num->[0] = defined($1) && $1 ne '' ? $1 : $2 ? 1 : 0;
146 $num->[0] = -1 if $num->[0] eq '-';
91880340 147 $num->[1] = $3 // 0;
d6512b50 148 $num->[1] =~ s/\s+//g;
149 return $num;
150}
151
152sub _match {
153 my ($group, $current, $tree) = @_;
154 _combinator([reverse @$_], $current, $tree, 0) and return 1 for @$group;
155 return undef;
156}
157
158sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/}
159
160sub _pc {
161 my ($class, $args, $current) = @_;
162
163 # ":empty"
164 return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty';
165
166 # ":root"
167 return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root';
168
169 # ":not"
170 return !_match($args, $current, $current) if $class eq 'not';
171
172 # ":checked"
173 return exists $current->[2]{checked} || exists $current->[2]{selected}
174 if $class eq 'checked';
175
176 # ":first-*" or ":last-*" (rewrite with equation)
177 ($class, $args) = $1 ? ("nth-$class", [0, 1]) : ("nth-last-$class", [-1, 1])
178 if $class =~ s/^(?:(first)|last)-//;
179
180 # ":nth-*"
181 if ($class =~ /^nth-/) {
182 my $type = $class =~ /of-type$/ ? $current->[1] : undef;
183 my @siblings = @{_siblings($current, $type)};
184
185 # ":nth-last-*"
186 @siblings = reverse @siblings if $class =~ /^nth-last/;
187
188 for my $i (0 .. $#siblings) {
189 next if (my $result = $args->[0] * $i + $args->[1]) < 1;
190 last unless my $sibling = $siblings[$result - 1];
191 return 1 if $sibling eq $current;
192 }
193 }
194
195 # ":only-*"
196 elsif ($class =~ /^only-(?:child|(of-type))$/) {
197 $_ ne $current and return undef
198 for @{_siblings($current, $1 ? $current->[1] : undef)};
199 return 1;
200 }
201
202 return undef;
203}
204
205sub _select {
206 my ($one, $tree, $group) = @_;
207
208 my @results;
209 my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree];
210 while (my $current = shift @queue) {
211 next unless $current->[0] eq 'tag';
212
213 unshift @queue, @$current[4 .. $#$current];
214 next unless _match($group, $current, $tree);
215 $one ? return $current : push @results, $current;
216 }
217
218 return $one ? undef : \@results;
219}
220
221sub _selector {
222 my ($selector, $current) = @_;
223
224 for my $s (@$selector) {
225 my $type = $s->[0];
226
227 # Tag
228 if ($type eq 'tag') { return undef unless $current->[1] =~ $s->[1] }
229
230 # Attribute
231 elsif ($type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) }
232
233 # Pseudo-class
234 elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current) }
235 }
236
237 return 1;
238}
239
240sub _sibling {
241 my ($selectors, $current, $tree, $immediate, $pos) = @_;
242
243 my $found;
244 for my $sibling (@{_siblings($current)}) {
245 return $found if $sibling eq $current;
246
247 # "+" (immediately preceding sibling)
248 if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $pos) }
249
250 # "~" (preceding sibling)
251 else { return 1 if _combinator($selectors, $sibling, $tree, $pos) }
252 }
253
254 return undef;
255}
256
257sub _siblings {
258 my ($current, $type) = @_;
259
260 my $parent = $current->[3];
261 my @siblings = grep { $_->[0] eq 'tag' }
262 @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent];
263 @siblings = grep { $type eq $_->[1] } @siblings if defined $type;
264
265 return \@siblings;
266}
267
268sub _unescape {
269 my $value = shift;
270
271 # Remove escaped newlines
272 $value =~ s/\\\n//g;
273
274 # Unescape Unicode characters
275 $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge;
276
277 # Remove backslash
278 $value =~ s/\\//g;
279
280 return $value;
281}
282
283sub _value {
284 my ($op, $value, $insensitive) = @_;
285 return undef unless defined $value;
286 $value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value);
287
288 # "~=" (word)
289 return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~';
290
291 # "*=" (contains)
292 return qr/$value/ if $op eq '*';
293
294 # "^=" (begins with)
295 return qr/^$value/ if $op eq '^';
296
297 # "$=" (ends with)
298 return qr/$value$/ if $op eq '$';
299
300 # Everything else
301 return qr/^$value$/;
302}
303
3041;
305
306=encoding utf8
307
308=head1 NAME
309
310DOM::Tiny::CSS - CSS selector engine
311
312=head1 SYNOPSIS
313
314 use DOM::Tiny::CSS;
315
316 # Select elements from DOM tree
317 my $css = DOM::Tiny::CSS->new(tree => $tree);
318 my $elements = $css->select('h1, h2, h3');
319
320=head1 DESCRIPTION
321
322L<DOM::Tiny::CSS> is the CSS selector engine used by L<DOM::Tiny> based on
323L<Mojo::DOM::CSS>, which is based on L<Selectors Level 3|http://www.w3.org/TR/css3-selectors/>.
324
325=head1 SELECTORS
326
327All CSS selectors that make sense for a standalone parser are supported.
328
329=head2 *
330
331Any element.
332
333 my $all = $css->select('*');
334
335=head2 E
336
337An element of type C<E>.
338
339 my $title = $css->select('title');
340
341=head2 E[foo]
342
343An C<E> element with a C<foo> attribute.
344
345 my $links = $css->select('a[href]');
346
347=head2 E[foo="bar"]
348
349An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
350
351 my $case_sensitive = $css->select('input[type="hidden"]');
352 my $case_sensitive = $css->select('input[type=hidden]');
353
354=head2 E[foo="bar" i]
355
356An C<E> element whose C<foo> attribute value is exactly equal to any
357(ASCII-range) case-permutation of C<bar>. Note that this selector is
358EXPERIMENTAL and might change without warning!
359
360 my $case_insensitive = $css->select('input[type="hidden" i]');
361 my $case_insensitive = $css->select('input[type=hidden i]');
362 my $case_insensitive = $css->select('input[class~="foo" i]');
363
364This selector is part of
365L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
366in progress.
367
368=head2 E[foo~="bar"]
369
370An C<E> element whose C<foo> attribute value is a list of whitespace-separated
371values, one of which is exactly equal to C<bar>.
372
373 my $foo = $css->select('input[class~="foo"]');
374 my $foo = $css->select('input[class~=foo]');
375
376=head2 E[foo^="bar"]
377
378An C<E> element whose C<foo> attribute value begins exactly with the string
379C<bar>.
380
381 my $begins_with = $css->select('input[name^="f"]');
382 my $begins_with = $css->select('input[name^=f]');
383
384=head2 E[foo$="bar"]
385
386An C<E> element whose C<foo> attribute value ends exactly with the string
387C<bar>.
388
389 my $ends_with = $css->select('input[name$="o"]');
390 my $ends_with = $css->select('input[name$=o]');
391
392=head2 E[foo*="bar"]
393
394An C<E> element whose C<foo> attribute value contains the substring C<bar>.
395
396 my $contains = $css->select('input[name*="fo"]');
397 my $contains = $css->select('input[name*=fo]');
398
399=head2 E:root
400
401An C<E> element, root of the document.
402
403 my $root = $css->select(':root');
404
405=head2 E:nth-child(n)
406
407An C<E> element, the C<n-th> child of its parent.
408
409 my $third = $css->select('div:nth-child(3)');
410 my $odd = $css->select('div:nth-child(odd)');
411 my $even = $css->select('div:nth-child(even)');
412 my $top3 = $css->select('div:nth-child(-n+3)');
413
414=head2 E:nth-last-child(n)
415
416An C<E> element, the C<n-th> child of its parent, counting from the last one.
417
418 my $third = $css->select('div:nth-last-child(3)');
419 my $odd = $css->select('div:nth-last-child(odd)');
420 my $even = $css->select('div:nth-last-child(even)');
421 my $bottom3 = $css->select('div:nth-last-child(-n+3)');
422
423=head2 E:nth-of-type(n)
424
425An C<E> element, the C<n-th> sibling of its type.
426
427 my $third = $css->select('div:nth-of-type(3)');
428 my $odd = $css->select('div:nth-of-type(odd)');
429 my $even = $css->select('div:nth-of-type(even)');
430 my $top3 = $css->select('div:nth-of-type(-n+3)');
431
432=head2 E:nth-last-of-type(n)
433
434An C<E> element, the C<n-th> sibling of its type, counting from the last one.
435
436 my $third = $css->select('div:nth-last-of-type(3)');
437 my $odd = $css->select('div:nth-last-of-type(odd)');
438 my $even = $css->select('div:nth-last-of-type(even)');
439 my $bottom3 = $css->select('div:nth-last-of-type(-n+3)');
440
441=head2 E:first-child
442
443An C<E> element, first child of its parent.
444
445 my $first = $css->select('div p:first-child');
446
447=head2 E:last-child
448
449An C<E> element, last child of its parent.
450
451 my $last = $css->select('div p:last-child');
452
453=head2 E:first-of-type
454
455An C<E> element, first sibling of its type.
456
457 my $first = $css->select('div p:first-of-type');
458
459=head2 E:last-of-type
460
461An C<E> element, last sibling of its type.
462
463 my $last = $css->select('div p:last-of-type');
464
465=head2 E:only-child
466
467An C<E> element, only child of its parent.
468
469 my $lonely = $css->select('div p:only-child');
470
471=head2 E:only-of-type
472
473An C<E> element, only sibling of its type.
474
475 my $lonely = $css->select('div p:only-of-type');
476
477=head2 E:empty
478
479An C<E> element that has no children (including text nodes).
480
481 my $empty = $css->select(':empty');
482
483=head2 E:checked
484
485A user interface element C<E> which is checked (for instance a radio-button or
486checkbox).
487
488 my $input = $css->select(':checked');
489
490=head2 E.warning
491
492An C<E> element whose class is "warning".
493
494 my $warning = $css->select('div.warning');
495
496=head2 E#myid
497
498An C<E> element with C<ID> equal to "myid".
499
500 my $foo = $css->select('div#foo');
501
502=head2 E:not(s)
503
504An C<E> element that does not match simple selector C<s>.
505
506 my $others = $css->select('div p:not(:first-child)');
507
508=head2 E F
509
510An C<F> element descendant of an C<E> element.
511
512 my $headlines = $css->select('div h1');
513
514=head2 E E<gt> F
515
516An C<F> element child of an C<E> element.
517
518 my $headlines = $css->select('html > body > div > h1');
519
520=head2 E + F
521
522An C<F> element immediately preceded by an C<E> element.
523
524 my $second = $css->select('h1 + h2');
525
526=head2 E ~ F
527
528An C<F> element preceded by an C<E> element.
529
530 my $second = $css->select('h1 ~ h2');
531
532=head2 E, F, G
533
534Elements of type C<E>, C<F> and C<G>.
535
536 my $headlines = $css->select('h1, h2, h3');
537
538=head2 E[foo=bar][bar=baz]
539
540An C<E> element whose attributes match all following attribute selectors.
541
542 my $links = $css->select('a[foo^=b][foo$=ar]');
543
544=head1 ATTRIBUTES
545
546L<DOM::Tiny::CSS> implements the following attributes.
547
548=head2 tree
549
550 my $tree = $css->tree;
551 $css = $css->tree(['root']);
552
553Document Object Model. Note that this structure should only be used very
554carefully since it is very dynamic.
555
556=head1 METHODS
557
d066f9b8 558L<DOM::Tiny::CSS> implements the following methods.
559
560=head2 new
561
562 my $css = DOM::Tiny::CSS->new;
563 my $css = DOM::Tiny::CSS->new(tree => $tree);
564 my $css = DOM::Tiny::CSS->new({tree => $tree});
565
566Construct a new hash-based L<DOM::Tiny::CSS> object.
d6512b50 567
568=head2 matches
569
570 my $bool = $css->matches('head > title');
571
572Check if first node in L</"tree"> matches the CSS selector.
573
574=head2 select
575
576 my $results = $css->select('head > title');
577
578Run CSS selector against L</"tree">.
579
580=head2 select_one
581
582 my $result = $css->select_one('head > title');
583
584Run CSS selector against L</"tree"> and stop as soon as the first node matched.
585
586=head1 BUGS
587
588Report any issues on the public bugtracker.
589
590=head1 AUTHOR
591
592Dan Book <dbook@cpan.org>
593
594=head1 COPYRIGHT AND LICENSE
595
596This software is Copyright (c) 2015 by Dan Book.
597
598This is free software, licensed under:
599
600 The Artistic License 2.0 (GPL Compatible)
601
602=head1 SEE ALSO
603
604L<Mojo::DOM::CSS>