Commit | Line | Data |
456a815d |
1 | package HTML::Zoom::SelectorParser; |
2 | |
1cf03540 |
3 | use strictures 1; |
6d0f20a6 |
4 | use base qw(HTML::Zoom::SubObject); |
456a815d |
5 | use Carp qw(confess); |
6 | |
7 | my $sel_char = '-\w_'; |
619fe6fe |
8 | my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-; |
9 | my $sel_re = qr/((?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])+)/; |
682fa876 |
10 | my $match_value_re = qr/"?$sel_re"?/; |
456a815d |
11 | |
96e44ffd |
12 | |
682fa876 |
13 | sub new { bless({}, shift) } |
96e44ffd |
14 | |
456a815d |
15 | sub _raw_parse_simple_selector { |
16 | for ($_[1]) { # same pos() as outside |
17 | |
18 | # '*' - match anything |
19 | |
20 | /\G\*/gc and |
21 | return sub { 1 }; |
22 | |
23 | # 'element' - match on tag name |
24 | |
25 | /\G$sel_re/gc and |
26 | return do { |
619fe6fe |
27 | my $name = $_[0]->_unescape($1); |
456a815d |
28 | sub { $_[0]->{name} && $_[0]->{name} eq $name } |
29 | }; |
30 | |
31 | # '#id' - match on id attribute |
32 | |
33 | /\G#$sel_re/gc and |
34 | return do { |
619fe6fe |
35 | my $id = $_[0]->_unescape($1); |
456a815d |
36 | sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id } |
37 | }; |
38 | |
39 | # '.class1.class2' - match on intersection of classes |
40 | |
41 | /\G((?:\.$sel_re)+)/gc and |
42 | return do { |
43 | my $cls = $1; $cls =~ s/^\.//; |
619fe6fe |
44 | my @cl = map $_[0]->_unescape($_), split(/(?<!\\)\./, $cls); |
456a815d |
45 | sub { |
46 | $_[0]->{attrs}{class} |
619fe6fe |
47 | && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl |
456a815d |
48 | } |
49 | }; |
50 | |
682fa876 |
51 | # '[attr^=foo]' - match attribute with ^ anchored regex |
52 | /\G\[$sel_re\^=$match_value_re\]/gc and |
6818876e |
53 | return do { |
619fe6fe |
54 | my $attribute = $_[0]->_unescape($1); |
55 | my $value = $_[0]->_unescape($2); |
6818876e |
56 | sub { |
57 | $_[0]->{attrs}{$attribute} |
58 | && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/; |
59 | } |
682fa876 |
60 | }; |
61 | |
62 | # '[attr$=foo]' - match attribute with $ anchored regex |
63 | /\G\[$sel_re\$=$match_value_re\]/gc and |
6818876e |
64 | return do { |
619fe6fe |
65 | my $attribute = $_[0]->_unescape($1); |
66 | my $value = $_[0]->_unescape($2); |
6818876e |
67 | sub { |
68 | $_[0]->{attrs}{$attribute} |
69 | && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; |
70 | } |
682fa876 |
71 | }; |
72 | |
73 | # '[attr*=foo] - match attribute with regex: |
74 | /\G\[$sel_re\*=$match_value_re\]/gc and |
6818876e |
75 | return do { |
619fe6fe |
76 | my $attribute = $_[0]->_unescape($1); |
77 | my $value = $_[0]->_unescape($2); |
6818876e |
78 | sub { |
79 | $_[0]->{attrs}{$attribute} |
80 | && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/; |
81 | } |
682fa876 |
82 | }; |
83 | |
5b63d244 |
84 | # '[attr~=bar]' - match attribute contains word |
85 | /\G\[$sel_re~=$match_value_re\]/gc and |
86 | return do { |
619fe6fe |
87 | my $attribute = $_[0]->_unescape($1); |
88 | my $value = $_[0]->_unescape($2); |
5b63d244 |
89 | sub { |
90 | $_[0]->{attrs}{$attribute} |
f7dc3b61 |
91 | && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/; |
5b63d244 |
92 | } |
93 | }; |
94 | |
52878e73 |
95 | # '[attr!=bar]' - match attribute contains prefix (for language matches) |
96 | /\G\[$sel_re\|=$match_value_re\]/gc and |
97 | return do { |
619fe6fe |
98 | my $attribute = $_[0]->_unescape($1); |
99 | my $value = $_[0]->_unescape($2); |
52878e73 |
100 | sub { |
101 | $_[0]->{attrs}{$attribute} |
102 | && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/; |
103 | } |
104 | }; |
105 | |
682fa876 |
106 | # '[attr=bar]' - match attributes |
107 | /\G\[$sel_re=$match_value_re\]/gc and |
108 | return do { |
619fe6fe |
109 | my $attribute = $_[0]->_unescape($1); |
110 | my $value = $_[0]->_unescape($2); |
6818876e |
111 | sub { |
682fa876 |
112 | $_[0]->{attrs}{$attribute} |
113 | && $_[0]->{attrs}{$attribute} eq $value; |
114 | } |
115 | }; |
116 | |
24725e7b |
117 | # '[attr!=bar]' - attributes doesn't match |
118 | /\G\[$sel_re!=$match_value_re\]/gc and |
119 | return do { |
619fe6fe |
120 | my $attribute = $_[0]->_unescape($1); |
121 | my $value = $_[0]->_unescape($2); |
24725e7b |
122 | sub { |
123 | ! ($_[0]->{attrs}{$attribute} |
124 | && $_[0]->{attrs}{$attribute} eq $value); |
125 | } |
126 | }; |
127 | |
5b63d244 |
128 | # '[attr]' - match attribute being present: |
682fa876 |
129 | /\G\[$sel_re\]/gc and |
130 | return do { |
619fe6fe |
131 | my $attribute = $_[0]->_unescape($1); |
6818876e |
132 | sub { |
133 | exists $_[0]->{attrs}{$attribute}; |
134 | } |
bd4e2ca0 |
135 | }; |
619fe6fe |
136 | |
bd4e2ca0 |
137 | # none of the above matched, try catching some obvious errors: |
138 | |
139 | # indicate unmatched square bracket: |
140 | /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched ['); |
456a815d |
141 | } |
142 | } |
143 | |
144 | sub parse_selector { |
145 | my $self = $_[0]; |
146 | my $sel = $_[1]; # my pos() only please |
147 | die "No selector provided" unless $sel; |
148 | local *_; |
149 | for ($sel) { |
150 | my @sub; |
151 | PARSE: { do { |
682fa876 |
152 | |
153 | my @this_chain; |
154 | |
155 | # slurp selectors until we find something else: |
156 | while( my $sel = $self->_raw_parse_simple_selector($_) ){ |
157 | push @this_chain, $sel; |
158 | } |
159 | |
160 | if( @this_chain == 1 ) |
161 | { |
162 | push @sub, @this_chain; |
163 | } |
164 | else{ |
165 | # make a compound match closure of everything |
166 | # in this chain of selectors: |
167 | push @sub, sub{ |
168 | my $r; |
169 | for my $inner ( @this_chain ){ |
170 | if( ! ($r = $inner->( @_ )) ){ |
171 | return $r; |
172 | } |
173 | } |
174 | return $r; |
175 | } |
176 | } |
177 | |
178 | # now we're at the end or a delimiter: |
179 | last PARSE if( pos == length ); |
180 | /\G\s*,\s*/gc or do { |
181 | /\G(.*)/; |
182 | $self->_blam( "Selectors not comma separated." ); |
183 | } |
184 | |
185 | } until (pos == length) }; |
456a815d |
186 | return $sub[0] if (@sub == 1); |
187 | return sub { |
188 | foreach my $inner (@sub) { |
189 | if (my $r = $inner->(@_)) { return $r } |
190 | } |
191 | }; |
192 | } |
7871f2ff |
193 | } |
194 | |
619fe6fe |
195 | sub _unescape { |
196 | my ($self, $escaped) = @_; |
197 | (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g; |
198 | return $unescaped; |
199 | } |
456a815d |
200 | |
682fa876 |
201 | sub _blam { |
202 | my ($self, $error) = @_; |
203 | my $hat = (' ' x (pos||0)).'^'; |
204 | die "Error parsing dispatch specification: ${error}\n |
205 | ${_} |
206 | ${hat} here\n"; |
207 | } |
208 | |
456a815d |
209 | 1; |