Commit | Line | Data |
456a815d |
1 | package HTML::Zoom::SelectorParser; |
2 | |
1cf03540 |
3 | use strictures 1; |
6d0f20a6 |
4 | use base qw(HTML::Zoom::SubObject); |
456a815d |
5 | use Carp qw(confess); |
6 | |
7 | my $sel_char = '-\w_'; |
619fe6fe |
8 | my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-; |
6cae1842 |
9 | my $sel_item = qr/(?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])/; |
10 | my $sel_re = qr/($sel_item+)/; |
11 | my $match_value_re = qr/"?($sel_item*)"?/; |
456a815d |
12 | |
96e44ffd |
13 | |
682fa876 |
14 | sub new { bless({}, shift) } |
96e44ffd |
15 | |
456a815d |
16 | sub _raw_parse_simple_selector { |
17 | for ($_[1]) { # same pos() as outside |
18 | |
19 | # '*' - match anything |
20 | |
21 | /\G\*/gc and |
22 | return sub { 1 }; |
23 | |
24 | # 'element' - match on tag name |
25 | |
26 | /\G$sel_re/gc and |
27 | return do { |
619fe6fe |
28 | my $name = $_[0]->_unescape($1); |
456a815d |
29 | sub { $_[0]->{name} && $_[0]->{name} eq $name } |
30 | }; |
31 | |
32 | # '#id' - match on id attribute |
33 | |
34 | /\G#$sel_re/gc and |
35 | return do { |
619fe6fe |
36 | my $id = $_[0]->_unescape($1); |
456a815d |
37 | sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id } |
38 | }; |
39 | |
40 | # '.class1.class2' - match on intersection of classes |
41 | |
42 | /\G((?:\.$sel_re)+)/gc and |
43 | return do { |
44 | my $cls = $1; $cls =~ s/^\.//; |
619fe6fe |
45 | my @cl = map $_[0]->_unescape($_), split(/(?<!\\)\./, $cls); |
456a815d |
46 | sub { |
47 | $_[0]->{attrs}{class} |
619fe6fe |
48 | && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl |
456a815d |
49 | } |
50 | }; |
51 | |
682fa876 |
52 | # '[attr^=foo]' - match attribute with ^ anchored regex |
53 | /\G\[$sel_re\^=$match_value_re\]/gc and |
6818876e |
54 | return do { |
619fe6fe |
55 | my $attribute = $_[0]->_unescape($1); |
56 | my $value = $_[0]->_unescape($2); |
6818876e |
57 | sub { |
6cae1842 |
58 | exists $_[0]->{attrs}{$attribute} |
6818876e |
59 | && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/; |
60 | } |
682fa876 |
61 | }; |
62 | |
63 | # '[attr$=foo]' - match attribute with $ anchored regex |
64 | /\G\[$sel_re\$=$match_value_re\]/gc and |
6818876e |
65 | return do { |
619fe6fe |
66 | my $attribute = $_[0]->_unescape($1); |
67 | my $value = $_[0]->_unescape($2); |
6818876e |
68 | sub { |
6cae1842 |
69 | exists $_[0]->{attrs}{$attribute} |
6818876e |
70 | && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; |
71 | } |
682fa876 |
72 | }; |
73 | |
74 | # '[attr*=foo] - match attribute with regex: |
75 | /\G\[$sel_re\*=$match_value_re\]/gc and |
6818876e |
76 | return do { |
619fe6fe |
77 | my $attribute = $_[0]->_unescape($1); |
78 | my $value = $_[0]->_unescape($2); |
6818876e |
79 | sub { |
6cae1842 |
80 | exists $_[0]->{attrs}{$attribute} |
6818876e |
81 | && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/; |
82 | } |
682fa876 |
83 | }; |
84 | |
5b63d244 |
85 | # '[attr~=bar]' - match attribute contains word |
86 | /\G\[$sel_re~=$match_value_re\]/gc and |
87 | return do { |
619fe6fe |
88 | my $attribute = $_[0]->_unescape($1); |
89 | my $value = $_[0]->_unescape($2); |
5b63d244 |
90 | sub { |
6cae1842 |
91 | exists $_[0]->{attrs}{$attribute} |
f7dc3b61 |
92 | && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/; |
5b63d244 |
93 | } |
94 | }; |
95 | |
52878e73 |
96 | # '[attr!=bar]' - match attribute contains prefix (for language matches) |
97 | /\G\[$sel_re\|=$match_value_re\]/gc and |
98 | return do { |
619fe6fe |
99 | my $attribute = $_[0]->_unescape($1); |
100 | my $value = $_[0]->_unescape($2); |
52878e73 |
101 | sub { |
6cae1842 |
102 | exists $_[0]->{attrs}{$attribute} |
52878e73 |
103 | && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/; |
104 | } |
105 | }; |
106 | |
682fa876 |
107 | # '[attr=bar]' - match attributes |
108 | /\G\[$sel_re=$match_value_re\]/gc and |
109 | return do { |
619fe6fe |
110 | my $attribute = $_[0]->_unescape($1); |
111 | my $value = $_[0]->_unescape($2); |
6818876e |
112 | sub { |
6cae1842 |
113 | exists $_[0]->{attrs}{$attribute} |
682fa876 |
114 | && $_[0]->{attrs}{$attribute} eq $value; |
115 | } |
116 | }; |
117 | |
24725e7b |
118 | # '[attr!=bar]' - attributes doesn't match |
119 | /\G\[$sel_re!=$match_value_re\]/gc and |
120 | return do { |
619fe6fe |
121 | my $attribute = $_[0]->_unescape($1); |
122 | my $value = $_[0]->_unescape($2); |
24725e7b |
123 | sub { |
6cae1842 |
124 | ! (exists $_[0]->{attrs}{$attribute} |
24725e7b |
125 | && $_[0]->{attrs}{$attribute} eq $value); |
126 | } |
127 | }; |
128 | |
5b63d244 |
129 | # '[attr]' - match attribute being present: |
682fa876 |
130 | /\G\[$sel_re\]/gc and |
131 | return do { |
619fe6fe |
132 | my $attribute = $_[0]->_unescape($1); |
6818876e |
133 | sub { |
134 | exists $_[0]->{attrs}{$attribute}; |
135 | } |
bd4e2ca0 |
136 | }; |
619fe6fe |
137 | |
bd4e2ca0 |
138 | # none of the above matched, try catching some obvious errors: |
139 | |
140 | # indicate unmatched square bracket: |
141 | /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched ['); |
456a815d |
142 | } |
143 | } |
144 | |
145 | sub parse_selector { |
146 | my $self = $_[0]; |
147 | my $sel = $_[1]; # my pos() only please |
148 | die "No selector provided" unless $sel; |
149 | local *_; |
150 | for ($sel) { |
151 | my @sub; |
152 | PARSE: { do { |
682fa876 |
153 | |
154 | my @this_chain; |
155 | |
156 | # slurp selectors until we find something else: |
157 | while( my $sel = $self->_raw_parse_simple_selector($_) ){ |
158 | push @this_chain, $sel; |
159 | } |
160 | |
161 | if( @this_chain == 1 ) |
162 | { |
163 | push @sub, @this_chain; |
164 | } |
165 | else{ |
166 | # make a compound match closure of everything |
167 | # in this chain of selectors: |
168 | push @sub, sub{ |
169 | my $r; |
170 | for my $inner ( @this_chain ){ |
171 | if( ! ($r = $inner->( @_ )) ){ |
172 | return $r; |
173 | } |
174 | } |
175 | return $r; |
176 | } |
177 | } |
178 | |
179 | # now we're at the end or a delimiter: |
180 | last PARSE if( pos == length ); |
181 | /\G\s*,\s*/gc or do { |
182 | /\G(.*)/; |
183 | $self->_blam( "Selectors not comma separated." ); |
184 | } |
185 | |
186 | } until (pos == length) }; |
456a815d |
187 | return $sub[0] if (@sub == 1); |
188 | return sub { |
189 | foreach my $inner (@sub) { |
190 | if (my $r = $inner->(@_)) { return $r } |
191 | } |
192 | }; |
193 | } |
7871f2ff |
194 | } |
195 | |
619fe6fe |
196 | sub _unescape { |
197 | my ($self, $escaped) = @_; |
198 | (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g; |
199 | return $unescaped; |
200 | } |
456a815d |
201 | |
682fa876 |
202 | sub _blam { |
203 | my ($self, $error) = @_; |
204 | my $hat = (' ' x (pos||0)).'^'; |
205 | die "Error parsing dispatch specification: ${error}\n |
206 | ${_} |
207 | ${hat} here\n"; |
208 | } |
209 | |
456a815d |
210 | 1; |