Commit | Line | Data |
456a815d |
1 | package HTML::Zoom::SelectorParser; |
2 | |
1cf03540 |
3 | use strictures 1; |
6d0f20a6 |
4 | use base qw(HTML::Zoom::SubObject); |
456a815d |
5 | use Carp qw(confess); |
6 | |
7 | my $sel_char = '-\w_'; |
8 | my $sel_re = qr/([$sel_char]+)/; |
682fa876 |
9 | my $match_value_re = qr/"?$sel_re"?/; |
456a815d |
10 | |
96e44ffd |
11 | |
682fa876 |
12 | sub new { bless({}, shift) } |
96e44ffd |
13 | |
456a815d |
14 | sub _raw_parse_simple_selector { |
15 | for ($_[1]) { # same pos() as outside |
16 | |
17 | # '*' - match anything |
18 | |
19 | /\G\*/gc and |
20 | return sub { 1 }; |
21 | |
22 | # 'element' - match on tag name |
23 | |
24 | /\G$sel_re/gc and |
25 | return do { |
26 | my $name = $1; |
27 | sub { $_[0]->{name} && $_[0]->{name} eq $name } |
28 | }; |
29 | |
30 | # '#id' - match on id attribute |
31 | |
32 | /\G#$sel_re/gc and |
33 | return do { |
34 | my $id = $1; |
35 | sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id } |
36 | }; |
37 | |
38 | # '.class1.class2' - match on intersection of classes |
39 | |
40 | /\G((?:\.$sel_re)+)/gc and |
41 | return do { |
42 | my $cls = $1; $cls =~ s/^\.//; |
43 | my @cl = split(/\./, $cls); |
44 | sub { |
45 | $_[0]->{attrs}{class} |
6d0f20a6 |
46 | && !grep $_[0]->{attrs}{class} !~ /(^|\s+)$_($|\s+)/, @cl |
456a815d |
47 | } |
48 | }; |
49 | |
682fa876 |
50 | # '[attr^=foo]' - match attribute with ^ anchored regex |
51 | /\G\[$sel_re\^=$match_value_re\]/gc and |
6818876e |
52 | return do { |
682fa876 |
53 | my $attribute = $1; |
54 | my $value = $2; |
6818876e |
55 | sub { |
56 | $_[0]->{attrs}{$attribute} |
57 | && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/; |
58 | } |
682fa876 |
59 | }; |
60 | |
61 | # '[attr$=foo]' - match attribute with $ anchored regex |
62 | /\G\[$sel_re\$=$match_value_re\]/gc and |
6818876e |
63 | return do { |
682fa876 |
64 | my $attribute = $1; |
65 | my $value = $2; |
6818876e |
66 | sub { |
67 | $_[0]->{attrs}{$attribute} |
68 | && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; |
69 | } |
682fa876 |
70 | }; |
71 | |
72 | # '[attr*=foo] - match attribute with regex: |
73 | /\G\[$sel_re\*=$match_value_re\]/gc and |
6818876e |
74 | return do { |
682fa876 |
75 | my $attribute = $1; |
76 | my $value = $2; |
6818876e |
77 | sub { |
78 | $_[0]->{attrs}{$attribute} |
79 | && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/; |
80 | } |
682fa876 |
81 | }; |
82 | |
5b63d244 |
83 | # '[attr~=bar]' - match attribute contains word |
84 | /\G\[$sel_re~=$match_value_re\]/gc and |
85 | return do { |
86 | my $attribute = $1; |
87 | my $value = $2; |
88 | sub { |
89 | $_[0]->{attrs}{$attribute} |
f7dc3b61 |
90 | && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/; |
5b63d244 |
91 | } |
92 | }; |
93 | |
52878e73 |
94 | # '[attr!=bar]' - match attribute contains prefix (for language matches) |
95 | /\G\[$sel_re\|=$match_value_re\]/gc and |
96 | return do { |
97 | my $attribute = $1; |
98 | my $value = $2; |
99 | sub { |
100 | $_[0]->{attrs}{$attribute} |
101 | && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/; |
102 | } |
103 | }; |
104 | |
682fa876 |
105 | # '[attr=bar]' - match attributes |
106 | /\G\[$sel_re=$match_value_re\]/gc and |
107 | return do { |
108 | my $attribute = $1; |
109 | my $value = $2; |
6818876e |
110 | sub { |
682fa876 |
111 | $_[0]->{attrs}{$attribute} |
112 | && $_[0]->{attrs}{$attribute} eq $value; |
113 | } |
114 | }; |
115 | |
24725e7b |
116 | # '[attr!=bar]' - attributes doesn't match |
117 | /\G\[$sel_re!=$match_value_re\]/gc and |
118 | return do { |
119 | my $attribute = $1; |
120 | my $value = $2; |
121 | sub { |
122 | ! ($_[0]->{attrs}{$attribute} |
123 | && $_[0]->{attrs}{$attribute} eq $value); |
124 | } |
125 | }; |
126 | |
5b63d244 |
127 | # '[attr]' - match attribute being present: |
682fa876 |
128 | /\G\[$sel_re\]/gc and |
129 | return do { |
130 | my $attribute = $1; |
6818876e |
131 | sub { |
132 | exists $_[0]->{attrs}{$attribute}; |
133 | } |
bd4e2ca0 |
134 | }; |
135 | |
136 | # none of the above matched, try catching some obvious errors: |
137 | |
138 | # indicate unmatched square bracket: |
139 | /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched ['); |
456a815d |
140 | } |
141 | } |
142 | |
143 | sub parse_selector { |
144 | my $self = $_[0]; |
145 | my $sel = $_[1]; # my pos() only please |
146 | die "No selector provided" unless $sel; |
147 | local *_; |
148 | for ($sel) { |
149 | my @sub; |
150 | PARSE: { do { |
682fa876 |
151 | |
152 | my @this_chain; |
153 | |
154 | # slurp selectors until we find something else: |
155 | while( my $sel = $self->_raw_parse_simple_selector($_) ){ |
156 | push @this_chain, $sel; |
157 | } |
158 | |
159 | if( @this_chain == 1 ) |
160 | { |
161 | push @sub, @this_chain; |
162 | } |
163 | else{ |
164 | # make a compound match closure of everything |
165 | # in this chain of selectors: |
166 | push @sub, sub{ |
167 | my $r; |
168 | for my $inner ( @this_chain ){ |
169 | if( ! ($r = $inner->( @_ )) ){ |
170 | return $r; |
171 | } |
172 | } |
173 | return $r; |
174 | } |
175 | } |
176 | |
177 | # now we're at the end or a delimiter: |
178 | last PARSE if( pos == length ); |
179 | /\G\s*,\s*/gc or do { |
180 | /\G(.*)/; |
181 | $self->_blam( "Selectors not comma separated." ); |
182 | } |
183 | |
184 | } until (pos == length) }; |
456a815d |
185 | return $sub[0] if (@sub == 1); |
186 | return sub { |
187 | foreach my $inner (@sub) { |
188 | if (my $r = $inner->(@_)) { return $r } |
189 | } |
190 | }; |
191 | } |
7871f2ff |
192 | } |
193 | |
456a815d |
194 | |
682fa876 |
195 | sub _blam { |
196 | my ($self, $error) = @_; |
197 | my $hat = (' ' x (pos||0)).'^'; |
198 | die "Error parsing dispatch specification: ${error}\n |
199 | ${_} |
200 | ${hat} here\n"; |
201 | } |
202 | |
456a815d |
203 | 1; |