1 package HTML::Zoom::SelectorParser;
4 use base qw(HTML::Zoom::SubObject);
8 my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-;
9 my $sel_re = qr/((?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])+)/;
10 my $match_value_re = qr/"?$sel_re"?/;
13 sub new { bless({}, shift) }
15 sub _raw_parse_simple_selector {
16 for ($_[1]) { # same pos() as outside
18 # '*' - match anything
23 # 'element' - match on tag name
27 my $name = $_[0]->_unescape($1);
28 sub { $_[0]->{name} && $_[0]->{name} eq $name }
31 # '#id' - match on id attribute
35 my $id = $_[0]->_unescape($1);
36 sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id }
39 # '.class1.class2' - match on intersection of classes
41 /\G((?:\.$sel_re)+)/gc and
43 my $cls = $1; $cls =~ s/^\.//;
44 my @cl = map $_[0]->_unescape($_), split(/(?<!\\)\./, $cls);
47 && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl
51 # '[attr^=foo]' - match attribute with ^ anchored regex
52 /\G\[$sel_re\^=$match_value_re\]/gc and
54 my $attribute = $_[0]->_unescape($1);
55 my $value = $_[0]->_unescape($2);
57 $_[0]->{attrs}{$attribute}
58 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/;
62 # '[attr$=foo]' - match attribute with $ anchored regex
63 /\G\[$sel_re\$=$match_value_re\]/gc and
65 my $attribute = $_[0]->_unescape($1);
66 my $value = $_[0]->_unescape($2);
68 $_[0]->{attrs}{$attribute}
69 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/;
73 # '[attr*=foo] - match attribute with regex:
74 /\G\[$sel_re\*=$match_value_re\]/gc and
76 my $attribute = $_[0]->_unescape($1);
77 my $value = $_[0]->_unescape($2);
79 $_[0]->{attrs}{$attribute}
80 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/;
84 # '[attr~=bar]' - match attribute contains word
85 /\G\[$sel_re~=$match_value_re\]/gc and
87 my $attribute = $_[0]->_unescape($1);
88 my $value = $_[0]->_unescape($2);
90 $_[0]->{attrs}{$attribute}
91 && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/;
95 # '[attr!=bar]' - match attribute contains prefix (for language matches)
96 /\G\[$sel_re\|=$match_value_re\]/gc and
98 my $attribute = $_[0]->_unescape($1);
99 my $value = $_[0]->_unescape($2);
101 $_[0]->{attrs}{$attribute}
102 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/;
106 # '[attr=bar]' - match attributes
107 /\G\[$sel_re=$match_value_re\]/gc and
109 my $attribute = $_[0]->_unescape($1);
110 my $value = $_[0]->_unescape($2);
112 $_[0]->{attrs}{$attribute}
113 && $_[0]->{attrs}{$attribute} eq $value;
117 # '[attr!=bar]' - attributes doesn't match
118 /\G\[$sel_re!=$match_value_re\]/gc and
120 my $attribute = $_[0]->_unescape($1);
121 my $value = $_[0]->_unescape($2);
123 ! ($_[0]->{attrs}{$attribute}
124 && $_[0]->{attrs}{$attribute} eq $value);
128 # '[attr]' - match attribute being present:
129 /\G\[$sel_re\]/gc and
131 my $attribute = $_[0]->_unescape($1);
133 exists $_[0]->{attrs}{$attribute};
137 # none of the above matched, try catching some obvious errors:
139 # indicate unmatched square bracket:
140 /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched [');
146 my $sel = $_[1]; # my pos() only please
147 die "No selector provided" unless $sel;
155 # slurp selectors until we find something else:
156 while( my $sel = $self->_raw_parse_simple_selector($_) ){
157 push @this_chain, $sel;
160 if( @this_chain == 1 )
162 push @sub, @this_chain;
165 # make a compound match closure of everything
166 # in this chain of selectors:
169 for my $inner ( @this_chain ){
170 if( ! ($r = $inner->( @_ )) ){
178 # now we're at the end or a delimiter:
179 last PARSE if( pos == length );
180 /\G\s*,\s*/gc or do {
182 $self->_blam( "Selectors not comma separated." );
185 } until (pos == length) };
186 return $sub[0] if (@sub == 1);
188 foreach my $inner (@sub) {
189 if (my $r = $inner->(@_)) { return $r }
196 my ($self, $escaped) = @_;
197 (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g;
202 my ($self, $error) = @_;
203 my $hat = (' ' x (pos||0)).'^';
204 die "Error parsing dispatch specification: ${error}\n