patches from rt
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / SelectorParser.pm
CommitLineData
456a815d 1package HTML::Zoom::SelectorParser;
2
1cf03540 3use strictures 1;
6d0f20a6 4use base qw(HTML::Zoom::SubObject);
456a815d 5use Carp qw(confess);
6
7my $sel_char = '-\w_';
619fe6fe 8my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-;
4c6b4429 9my $sel_item = qr/(?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])/;
10my $sel_re = qr/($sel_item+)/;
11my $match_value_re = qr/"?($sel_item*)"?/;
456a815d 12
96e44ffd 13
682fa876 14sub new { bless({}, shift) }
96e44ffd 15
456a815d 16sub _raw_parse_simple_selector {
17 for ($_[1]) { # same pos() as outside
18
19 # '*' - match anything
20
21 /\G\*/gc and
22 return sub { 1 };
23
24 # 'element' - match on tag name
25
26 /\G$sel_re/gc and
27 return do {
619fe6fe 28 my $name = $_[0]->_unescape($1);
456a815d 29 sub { $_[0]->{name} && $_[0]->{name} eq $name }
30 };
31
32 # '#id' - match on id attribute
33
34 /\G#$sel_re/gc and
35 return do {
619fe6fe 36 my $id = $_[0]->_unescape($1);
456a815d 37 sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id }
38 };
39
40 # '.class1.class2' - match on intersection of classes
41
42 /\G((?:\.$sel_re)+)/gc and
43 return do {
44 my $cls = $1; $cls =~ s/^\.//;
619fe6fe 45 my @cl = map $_[0]->_unescape($_), split(/(?<!\\)\./, $cls);
456a815d 46 sub {
47 $_[0]->{attrs}{class}
619fe6fe 48 && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl
456a815d 49 }
50 };
51
682fa876 52 # '[attr^=foo]' - match attribute with ^ anchored regex
53 /\G\[$sel_re\^=$match_value_re\]/gc and
6818876e 54 return do {
619fe6fe 55 my $attribute = $_[0]->_unescape($1);
56 my $value = $_[0]->_unescape($2);
6818876e 57 sub {
4c6b4429 58 exists $_[0]->{attrs}{$attribute}
6818876e 59 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/;
60 }
682fa876 61 };
62
63 # '[attr$=foo]' - match attribute with $ anchored regex
64 /\G\[$sel_re\$=$match_value_re\]/gc and
6818876e 65 return do {
619fe6fe 66 my $attribute = $_[0]->_unescape($1);
67 my $value = $_[0]->_unescape($2);
6818876e 68 sub {
4c6b4429 69 exists $_[0]->{attrs}{$attribute}
6818876e 70 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/;
71 }
682fa876 72 };
73
74 # '[attr*=foo] - match attribute with regex:
75 /\G\[$sel_re\*=$match_value_re\]/gc and
6818876e 76 return do {
619fe6fe 77 my $attribute = $_[0]->_unescape($1);
78 my $value = $_[0]->_unescape($2);
6818876e 79 sub {
4c6b4429 80 exists $_[0]->{attrs}{$attribute}
6818876e 81 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/;
82 }
682fa876 83 };
84
5b63d244 85 # '[attr~=bar]' - match attribute contains word
86 /\G\[$sel_re~=$match_value_re\]/gc and
87 return do {
619fe6fe 88 my $attribute = $_[0]->_unescape($1);
89 my $value = $_[0]->_unescape($2);
5b63d244 90 sub {
4c6b4429 91 exists $_[0]->{attrs}{$attribute}
f7dc3b61 92 && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/;
5b63d244 93 }
94 };
95
52878e73 96 # '[attr!=bar]' - match attribute contains prefix (for language matches)
97 /\G\[$sel_re\|=$match_value_re\]/gc and
98 return do {
619fe6fe 99 my $attribute = $_[0]->_unescape($1);
100 my $value = $_[0]->_unescape($2);
52878e73 101 sub {
4c6b4429 102 exists $_[0]->{attrs}{$attribute}
52878e73 103 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/;
104 }
105 };
106
682fa876 107 # '[attr=bar]' - match attributes
108 /\G\[$sel_re=$match_value_re\]/gc and
109 return do {
619fe6fe 110 my $attribute = $_[0]->_unescape($1);
111 my $value = $_[0]->_unescape($2);
6818876e 112 sub {
4c6b4429 113 exists $_[0]->{attrs}{$attribute}
682fa876 114 && $_[0]->{attrs}{$attribute} eq $value;
115 }
116 };
117
24725e7b 118 # '[attr!=bar]' - attributes doesn't match
119 /\G\[$sel_re!=$match_value_re\]/gc and
120 return do {
619fe6fe 121 my $attribute = $_[0]->_unescape($1);
122 my $value = $_[0]->_unescape($2);
24725e7b 123 sub {
4c6b4429 124 ! (exists $_[0]->{attrs}{$attribute}
24725e7b 125 && $_[0]->{attrs}{$attribute} eq $value);
126 }
127 };
128
5b63d244 129 # '[attr]' - match attribute being present:
682fa876 130 /\G\[$sel_re\]/gc and
131 return do {
619fe6fe 132 my $attribute = $_[0]->_unescape($1);
6818876e 133 sub {
134 exists $_[0]->{attrs}{$attribute};
135 }
bd4e2ca0 136 };
619fe6fe 137
bd4e2ca0 138 # none of the above matched, try catching some obvious errors:
139
140 # indicate unmatched square bracket:
141 /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched [');
456a815d 142 }
143}
144
145sub parse_selector {
146 my $self = $_[0];
147 my $sel = $_[1]; # my pos() only please
148 die "No selector provided" unless $sel;
149 local *_;
150 for ($sel) {
151 my @sub;
152 PARSE: { do {
682fa876 153
154 my @this_chain;
155
156 # slurp selectors until we find something else:
157 while( my $sel = $self->_raw_parse_simple_selector($_) ){
158 push @this_chain, $sel;
159 }
160
161 if( @this_chain == 1 )
162 {
163 push @sub, @this_chain;
164 }
165 else{
166 # make a compound match closure of everything
167 # in this chain of selectors:
168 push @sub, sub{
169 my $r;
170 for my $inner ( @this_chain ){
171 if( ! ($r = $inner->( @_ )) ){
172 return $r;
173 }
174 }
175 return $r;
176 }
177 }
178
179 # now we're at the end or a delimiter:
180 last PARSE if( pos == length );
181 /\G\s*,\s*/gc or do {
182 /\G(.*)/;
183 $self->_blam( "Selectors not comma separated." );
184 }
185
186 } until (pos == length) };
456a815d 187 return $sub[0] if (@sub == 1);
188 return sub {
189 foreach my $inner (@sub) {
190 if (my $r = $inner->(@_)) { return $r }
191 }
192 };
193 }
7871f2ff 194}
195
619fe6fe 196sub _unescape {
197 my ($self, $escaped) = @_;
198 (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g;
199 return $unescaped;
200}
456a815d 201
682fa876 202sub _blam {
203 my ($self, $error) = @_;
204 my $hat = (' ' x (pos||0)).'^';
205 die "Error parsing dispatch specification: ${error}\n
206${_}
207${hat} here\n";
208}
209
456a815d 2101;