X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FHTML%2FZoom%2FSelectorParser.pm;h=1432c4ddf7c97f0e6278eaa8d0faba5629ed84a9;hb=4c6b44299edb3613877d7e99586026423e9f15b5;hp=029f688859db8a9f89ba4f13bcdcf933578c20c1;hpb=3b8c9d7d615775505ace99b90b9c0c0433b6febc;p=catagits%2FHTML-Zoom.git diff --git a/lib/HTML/Zoom/SelectorParser.pm b/lib/HTML/Zoom/SelectorParser.pm index 029f688..1432c4d 100644 --- a/lib/HTML/Zoom/SelectorParser.pm +++ b/lib/HTML/Zoom/SelectorParser.pm @@ -1,12 +1,15 @@ package HTML::Zoom::SelectorParser; -use strict; -use warnings FATAL => 'all'; +use strictures 1; use base qw(HTML::Zoom::SubObject); use Carp qw(confess); my $sel_char = '-\w_'; -my $sel_re = qr/([$sel_char]+)/; +my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-; +my $sel_item = qr/(?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])/; +my $sel_re = qr/($sel_item+)/; +my $match_value_re = qr/"?($sel_item*)"?/; + sub new { bless({}, shift) } @@ -18,24 +21,11 @@ sub _raw_parse_simple_selector { /\G\*/gc and return sub { 1 }; - # 'el[attr="foo"] - - /\G$sel_re\[$sel_re="$sel_re"\]/gc and - return do { - my $name = $1; - my $attr = $2; - my $val = $3; - sub { - $_[0]->{name} && $_[0]->{name} eq $name and - $_[0]->{attrs}{$attr} && $_[0]->{attrs}{$attr} eq $val - } - }; - # 'element' - match on tag name /\G$sel_re/gc and return do { - my $name = $1; + my $name = $_[0]->_unescape($1); sub { $_[0]->{name} && $_[0]->{name} eq $name } }; @@ -43,7 +33,7 @@ sub _raw_parse_simple_selector { /\G#$sel_re/gc and return do { - my $id = $1; + my $id = $_[0]->_unescape($1); sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id } }; @@ -52,38 +42,103 @@ sub _raw_parse_simple_selector { /\G((?:\.$sel_re)+)/gc and return do { my $cls = $1; $cls =~ s/^\.//; - my @cl = split(/\./, $cls); + my @cl = map $_[0]->_unescape($_), split(/(?{attrs}{class} - && !grep $_[0]->{attrs}{class} !~ /(^|\s+)$_($|\s+)/, @cl + && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl } }; - # 'el.class1' - element + class + # '[attr^=foo]' - match attribute with ^ anchored regex + /\G\[$sel_re\^=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/; + } + }; - /\G$sel_re\.$sel_re/gc and + # '[attr$=foo]' - match attribute with $ anchored regex + /\G\[$sel_re\$=$match_value_re\]/gc and return do { - my $cls = $1; - my $name = $2; + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); sub { - $_[0]->{name} && $_[0]->{name} eq $name and - $_[0]->{attrs}{class} && $_[0]->{attrs}{class} eq $cls + exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; } }; - # 'el#id' - element + id + # '[attr*=foo] - match attribute with regex: + /\G\[$sel_re\*=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/; + } + }; + + # '[attr~=bar]' - match attribute contains word + /\G\[$sel_re~=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/; + } + }; + + # '[attr!=bar]' - match attribute contains prefix (for language matches) + /\G\[$sel_re\|=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/; + } + }; - /\G$sel_re#$sel_re/gc and + # '[attr=bar]' - match attributes + /\G\[$sel_re=$match_value_re\]/gc and return do { - my $id = $1; - my $name = $2; + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); sub { - $_[0]->{name} && $_[0]->{name} eq $name and - $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id + exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} eq $value; } }; - confess "Couldn't parse $_ as starting with simple selector"; + # '[attr!=bar]' - attributes doesn't match + /\G\[$sel_re!=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + ! (exists $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} eq $value); + } + }; + + # '[attr]' - match attribute being present: + /\G\[$sel_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + sub { + exists $_[0]->{attrs}{$attribute}; + } + }; + + # none of the above matched, try catching some obvious errors: + + # indicate unmatched square bracket: + /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched ['); } } @@ -95,10 +150,40 @@ sub parse_selector { for ($sel) { my @sub; PARSE: { do { - push(@sub, $self->_raw_parse_simple_selector($_)); - last PARSE if (pos == length); - /\G\s*,\s*/gc or confess "Selectors not comma separated"; - } until (pos == length) }; + + my @this_chain; + + # slurp selectors until we find something else: + while( my $sel = $self->_raw_parse_simple_selector($_) ){ + push @this_chain, $sel; + } + + if( @this_chain == 1 ) + { + push @sub, @this_chain; + } + else{ + # make a compound match closure of everything + # in this chain of selectors: + push @sub, sub{ + my $r; + for my $inner ( @this_chain ){ + if( ! ($r = $inner->( @_ )) ){ + return $r; + } + } + return $r; + } + } + + # now we're at the end or a delimiter: + last PARSE if( pos == length ); + /\G\s*,\s*/gc or do { + /\G(.*)/; + $self->_blam( "Selectors not comma separated." ); + } + + } until (pos == length) }; return $sub[0] if (@sub == 1); return sub { foreach my $inner (@sub) { @@ -108,5 +193,18 @@ sub parse_selector { } } +sub _unescape { + my ($self, $escaped) = @_; + (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g; + return $unescaped; +} + +sub _blam { + my ($self, $error) = @_; + my $hat = (' ' x (pos||0)).'^'; + die "Error parsing dispatch specification: ${error}\n +${_} +${hat} here\n"; +} 1;