dwim selects and bugfixes, new transform_attribute method and handled trailing '...
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / SelectorParser.pm
CommitLineData
456a815d 1package HTML::Zoom::SelectorParser;
2
1cf03540 3use strictures 1;
6d0f20a6 4use base qw(HTML::Zoom::SubObject);
456a815d 5use Carp qw(confess);
6
7my $sel_char = '-\w_';
8my $sel_re = qr/([$sel_char]+)/;
682fa876 9my $match_value_re = qr/"?$sel_re"?/;
456a815d 10
96e44ffd 11
682fa876 12sub new { bless({}, shift) }
96e44ffd 13
456a815d 14sub _raw_parse_simple_selector {
15 for ($_[1]) { # same pos() as outside
16
17 # '*' - match anything
18
19 /\G\*/gc and
20 return sub { 1 };
21
22 # 'element' - match on tag name
23
24 /\G$sel_re/gc and
25 return do {
26 my $name = $1;
27 sub { $_[0]->{name} && $_[0]->{name} eq $name }
28 };
29
30 # '#id' - match on id attribute
31
32 /\G#$sel_re/gc and
33 return do {
34 my $id = $1;
35 sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id }
36 };
37
38 # '.class1.class2' - match on intersection of classes
39
40 /\G((?:\.$sel_re)+)/gc and
41 return do {
42 my $cls = $1; $cls =~ s/^\.//;
43 my @cl = split(/\./, $cls);
44 sub {
45 $_[0]->{attrs}{class}
6d0f20a6 46 && !grep $_[0]->{attrs}{class} !~ /(^|\s+)$_($|\s+)/, @cl
456a815d 47 }
48 };
49
682fa876 50 # '[attr^=foo]' - match attribute with ^ anchored regex
51 /\G\[$sel_re\^=$match_value_re\]/gc and
6818876e 52 return do {
682fa876 53 my $attribute = $1;
54 my $value = $2;
6818876e 55 sub {
56 $_[0]->{attrs}{$attribute}
57 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/;
58 }
682fa876 59 };
60
61 # '[attr$=foo]' - match attribute with $ anchored regex
62 /\G\[$sel_re\$=$match_value_re\]/gc and
6818876e 63 return do {
682fa876 64 my $attribute = $1;
65 my $value = $2;
6818876e 66 sub {
67 $_[0]->{attrs}{$attribute}
68 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/;
69 }
682fa876 70 };
71
72 # '[attr*=foo] - match attribute with regex:
73 /\G\[$sel_re\*=$match_value_re\]/gc and
6818876e 74 return do {
682fa876 75 my $attribute = $1;
76 my $value = $2;
6818876e 77 sub {
78 $_[0]->{attrs}{$attribute}
79 && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/;
80 }
682fa876 81 };
82
5b63d244 83 # '[attr~=bar]' - match attribute contains word
84 /\G\[$sel_re~=$match_value_re\]/gc and
85 return do {
86 my $attribute = $1;
87 my $value = $2;
88 sub {
89 $_[0]->{attrs}{$attribute}
f7dc3b61 90 && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/;
5b63d244 91 }
92 };
93
52878e73 94 # '[attr!=bar]' - match attribute contains prefix (for language matches)
95 /\G\[$sel_re\|=$match_value_re\]/gc and
96 return do {
97 my $attribute = $1;
98 my $value = $2;
99 sub {
100 $_[0]->{attrs}{$attribute}
101 && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/;
102 }
103 };
104
682fa876 105 # '[attr=bar]' - match attributes
106 /\G\[$sel_re=$match_value_re\]/gc and
107 return do {
108 my $attribute = $1;
109 my $value = $2;
6818876e 110 sub {
682fa876 111 $_[0]->{attrs}{$attribute}
112 && $_[0]->{attrs}{$attribute} eq $value;
113 }
114 };
115
24725e7b 116 # '[attr!=bar]' - attributes doesn't match
117 /\G\[$sel_re!=$match_value_re\]/gc and
118 return do {
119 my $attribute = $1;
120 my $value = $2;
121 sub {
122 ! ($_[0]->{attrs}{$attribute}
123 && $_[0]->{attrs}{$attribute} eq $value);
124 }
125 };
126
5b63d244 127 # '[attr]' - match attribute being present:
682fa876 128 /\G\[$sel_re\]/gc and
129 return do {
130 my $attribute = $1;
6818876e 131 sub {
132 exists $_[0]->{attrs}{$attribute};
133 }
bd4e2ca0 134 };
135
136 # none of the above matched, try catching some obvious errors:
137
138 # indicate unmatched square bracket:
139 /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched [');
456a815d 140 }
141}
142
143sub parse_selector {
144 my $self = $_[0];
145 my $sel = $_[1]; # my pos() only please
146 die "No selector provided" unless $sel;
147 local *_;
148 for ($sel) {
149 my @sub;
150 PARSE: { do {
682fa876 151
152 my @this_chain;
153
154 # slurp selectors until we find something else:
155 while( my $sel = $self->_raw_parse_simple_selector($_) ){
156 push @this_chain, $sel;
157 }
158
159 if( @this_chain == 1 )
160 {
161 push @sub, @this_chain;
162 }
163 else{
164 # make a compound match closure of everything
165 # in this chain of selectors:
166 push @sub, sub{
167 my $r;
168 for my $inner ( @this_chain ){
169 if( ! ($r = $inner->( @_ )) ){
170 return $r;
171 }
172 }
173 return $r;
174 }
175 }
176
177 # now we're at the end or a delimiter:
178 last PARSE if( pos == length );
179 /\G\s*,\s*/gc or do {
180 /\G(.*)/;
181 $self->_blam( "Selectors not comma separated." );
182 }
183
184 } until (pos == length) };
456a815d 185 return $sub[0] if (@sub == 1);
186 return sub {
187 foreach my $inner (@sub) {
188 if (my $r = $inner->(@_)) { return $r }
189 }
190 };
191 }
7871f2ff 192}
193
456a815d 194
682fa876 195sub _blam {
196 my ($self, $error) = @_;
197 my $hat = (' ' x (pos||0)).'^';
198 die "Error parsing dispatch specification: ${error}\n
199${_}
200${hat} here\n";
201}
202
456a815d 2031;