X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FHTML-Zoom.git;a=blobdiff_plain;f=lib%2FHTML%2FZoom%2FSelectorParser.pm;h=e2215f91e7049ff19216d0271bc15155924d3cca;hp=4e4f11c140cc4aa913f26412abf7b6a21c8822d0;hb=619fe6fe0082c0eba72445ecf470e31d7101394c;hpb=682fa876be5265211f777cf85fac6528d4dd4d41 diff --git a/lib/HTML/Zoom/SelectorParser.pm b/lib/HTML/Zoom/SelectorParser.pm index 4e4f11c..e2215f9 100644 --- a/lib/HTML/Zoom/SelectorParser.pm +++ b/lib/HTML/Zoom/SelectorParser.pm @@ -1,12 +1,12 @@ package HTML::Zoom::SelectorParser; -use strict; -use warnings FATAL => 'all'; +use strictures 1; use base qw(HTML::Zoom::SubObject); use Carp qw(confess); my $sel_char = '-\w_'; -my $sel_re = qr/([$sel_char]+)/; +my $sel_meta_char = q-!"#$%&'()*+,./:;<=>?@[\]^`{|}~-; +my $sel_re = qr/((?:(?:\\[\Q$sel_meta_char\E])|[$sel_char])+)/; my $match_value_re = qr/"?$sel_re"?/; @@ -24,7 +24,7 @@ sub _raw_parse_simple_selector { /\G$sel_re/gc and return do { - my $name = $1; + my $name = $_[0]->_unescape($1); sub { $_[0]->{name} && $_[0]->{name} eq $name } }; @@ -32,7 +32,7 @@ sub _raw_parse_simple_selector { /\G#$sel_re/gc and return do { - my $id = $1; + my $id = $_[0]->_unescape($1); sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id } }; @@ -41,57 +41,103 @@ sub _raw_parse_simple_selector { /\G((?:\.$sel_re)+)/gc and return do { my $cls = $1; $cls =~ s/^\.//; - my @cl = split(/\./, $cls); + my @cl = map $_[0]->_unescape($_), split(/(?{attrs}{class} - && !grep $_[0]->{attrs}{class} !~ /(^|\s+)$_($|\s+)/, @cl + && !grep $_[0]->{attrs}{class} !~ /(^|\s+)\Q$_\E($|\s+)/, @cl } }; # '[attr^=foo]' - match attribute with ^ anchored regex /\G\[$sel_re\^=$match_value_re\]/gc and - return do{ - my $attribute = $1; - my $value = $2; - $_[0]->{attrs}{$attribute} - && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/; + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E/; + } }; # '[attr$=foo]' - match attribute with $ anchored regex /\G\[$sel_re\$=$match_value_re\]/gc and - return do{ - my $attribute = $1; - my $value = $2; - $_[0]->{attrs}{$attribute} - && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; + } }; # '[attr*=foo] - match attribute with regex: /\G\[$sel_re\*=$match_value_re\]/gc and - return do{ - my $attribute = $1; - my $value = $2; - $_[0]->{attrs}{$attribute} - && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E$/; + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/\Q$value\E/; + } + }; + + # '[attr~=bar]' - match attribute contains word + /\G\[$sel_re~=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/\b\Q$value\E\b/; + } + }; + + # '[attr!=bar]' - match attribute contains prefix (for language matches) + /\G\[$sel_re\|=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + $_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} =~ qr/^\Q$value\E(?:-|$)/; + } }; # '[attr=bar]' - match attributes /\G\[$sel_re=$match_value_re\]/gc and return do { - my $attribute = $1; - my $value = $2; - sub{ + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { $_[0]->{attrs}{$attribute} && $_[0]->{attrs}{$attribute} eq $value; } }; - # '[attr] - match attribute being present: + # '[attr!=bar]' - attributes doesn't match + /\G\[$sel_re!=$match_value_re\]/gc and + return do { + my $attribute = $_[0]->_unescape($1); + my $value = $_[0]->_unescape($2); + sub { + ! ($_[0]->{attrs}{$attribute} + && $_[0]->{attrs}{$attribute} eq $value); + } + }; + + # '[attr]' - match attribute being present: /\G\[$sel_re\]/gc and return do { - my $attribute = $1; - $_[0]->{attrs}{$attribute}; - } + my $attribute = $_[0]->_unescape($1); + sub { + exists $_[0]->{attrs}{$attribute}; + } + }; + + # none of the above matched, try catching some obvious errors: + + # indicate unmatched square bracket: + /\G\[[^\]]*/gc and $_[0]->_blam('Unmatched ['); } } @@ -146,6 +192,11 @@ sub parse_selector { } } +sub _unescape { + my ($self, $escaped) = @_; + (my $unescaped = $escaped) =~ s/\\([\Q$sel_meta_char\E])/$1/g; + return $unescaped; +} sub _blam { my ($self, $error) = @_;