1 package HTML::Zoom::SelectorParser;
4 use base qw(HTML::Zoom::SubObject);
8 my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-;
9 my $sel_item = qr/(?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])/;
10 my $sel_re = qr/($sel_item+)/;
11 my $match_value_re = qr/"?($sel_item*)"?/;
14 sub new { bless({}, shift) }
16 sub _raw_parse_simple_selector {
17 for ($_[1]) { # same pos() as outside
19 # '*' - match anything
24 # 'element' - match on tag name
28 my $name = $_[0]->_unescape($1);
29 sub { $_[0]->{name} && $_[0]->{name} eq $name }
32 # '#id' - match on id attribute
36 my $id = $_[0]->_unescape($1);
37 sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id }
40 # '.class1.class2' - match on intersection of classes
42 /\G((?:\.$sel_re)+)/gc and
44 my $cls = $1; $cls =~ s/^\.//;
45 my @cl = map $_[0]->_unescape($_), split(/(?<!\\)\./, $cls);
48 && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl
52 # '[attr^=foo]' - match attribute with ^ anchored regex
53 /\G\[$sel_re\^=$match_value_re\]/gc and
55 my $attribute = $_[0]->_unescape($1);
56 my $value = $_[0]->_unescape($2);
58 exists $_[0]->{attrs}{$attribute}
59 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/;
63 # '[attr$=foo]' - match attribute with $ anchored regex
64 /\G\[$sel_re\$=$match_value_re\]/gc and
66 my $attribute = $_[0]->_unescape($1);
67 my $value = $_[0]->_unescape($2);
69 exists $_[0]->{attrs}{$attribute}
70 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/;
74 # '[attr*=foo] - match attribute with regex:
75 /\G\[$sel_re\*=$match_value_re\]/gc and
77 my $attribute = $_[0]->_unescape($1);
78 my $value = $_[0]->_unescape($2);
80 exists $_[0]->{attrs}{$attribute}
81 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/;
85 # '[attr~=bar]' - match attribute contains word
86 /\G\[$sel_re~=$match_value_re\]/gc and
88 my $attribute = $_[0]->_unescape($1);
89 my $value = $_[0]->_unescape($2);
91 exists $_[0]->{attrs}{$attribute}
92 && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/;
96 # '[attr!=bar]' - match attribute contains prefix (for language matches)
97 /\G\[$sel_re\|=$match_value_re\]/gc and
99 my $attribute = $_[0]->_unescape($1);
100 my $value = $_[0]->_unescape($2);
102 exists $_[0]->{attrs}{$attribute}
103 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/;
107 # '[attr=bar]' - match attributes
108 /\G\[$sel_re=$match_value_re\]/gc and
110 my $attribute = $_[0]->_unescape($1);
111 my $value = $_[0]->_unescape($2);
113 exists $_[0]->{attrs}{$attribute}
114 && $_[0]->{attrs}{$attribute} eq $value;
118 # '[attr!=bar]' - attributes doesn't match
119 /\G\[$sel_re!=$match_value_re\]/gc and
121 my $attribute = $_[0]->_unescape($1);
122 my $value = $_[0]->_unescape($2);
124 ! (exists $_[0]->{attrs}{$attribute}
125 && $_[0]->{attrs}{$attribute} eq $value);
129 # '[attr]' - match attribute being present:
130 /\G\[$sel_re\]/gc and
132 my $attribute = $_[0]->_unescape($1);
134 exists $_[0]->{attrs}{$attribute};
138 # none of the above matched, try catching some obvious errors:
140 # indicate unmatched square bracket:
141 /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched [');
147 my $sel = $_[1]; # my pos() only please
148 die "No selector provided" unless $sel;
156 # slurp selectors until we find something else:
157 while( my $sel = $self->_raw_parse_simple_selector($_) ){
158 push @this_chain, $sel;
161 if( @this_chain == 1 )
163 push @sub, @this_chain;
166 # make a compound match closure of everything
167 # in this chain of selectors:
170 for my $inner ( @this_chain ){
171 if( ! ($r = $inner->( @_ )) ){
179 # now we're at the end or a delimiter:
180 last PARSE if( pos == length );
181 /\G\s*,\s*/gc or do {
183 $self->_blam( "Selectors not comma separated." );
186 } until (pos == length) };
187 return $sub[0] if (@sub == 1);
189 foreach my $inner (@sub) {
190 if (my $r = $inner->(@_)) { return $r }
197 my ($self, $escaped) = @_;
198 (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g;
203 my ($self, $error) = @_;
204 my $hat = (' ' x (pos||0)).'^';
205 die "Error parsing dispatch specification: ${error}\n