X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDOM%2FTiny%2F_HTML.pm;h=1f7e82f97559b55897e7e2648fb464c9f990401c;hb=28d939996250b69ba0df011f8d2c2c9a551a84e4;hp=9049d9b9b5065d38f39a1ff983e57a39d56e4b25;hpb=9a5f1e3f0633370ed08c12af1542a9e38870c84a;p=catagits%2FDOM-Tiny.git diff --git a/lib/DOM/Tiny/_HTML.pm b/lib/DOM/Tiny/_HTML.pm index 9049d9b..1f7e82f 100644 --- a/lib/DOM/Tiny/_HTML.pm +++ b/lib/DOM/Tiny/_HTML.pm @@ -5,13 +5,13 @@ use warnings; use DOM::Tiny::Entities qw(html_escape html_unescape); use Scalar::Util 'weaken'; -our $VERSION = '0.001'; +our $VERSION = '0.002'; my $ATTR_RE = qr/ - ([^<>=\s\/]+|\/) # Key + ([^<>=\s\/]+|\/) # Key (?: \s*=\s* - (?s:(["'])(.*?)\g{-2}|([^>\s]*)) # Value + (?s:(?:"(.*?)")|(?:'(.*?)')|([^>\s]*)) # Value )? \s* /x; @@ -33,7 +33,8 @@ my $TOKEN_RE = qr/ | \?(.*?)\? # Processing Instruction | - \s*([^<>\s]+\s*(?:(?:$ATTR_RE){0,32766})*+) # Tag + \s*([^<>\s]+\s*(?>(?:$ATTR_RE){0,32766})*) # Tag + # Workaround for perl's limit of * to {0,32767} )> | (<) # Runaway "<" @@ -50,10 +51,9 @@ my %RCDATA = map { $_ => 1 } qw(title textarea); my %END = (body => 'head', optgroup => 'optgroup', option => 'option'); # HTML elements that break paragraphs -map { $END{$_} = 'p' } ( +$END{$_} = 'p' for qw(address article aside blockquote dir div dl fieldset footer form h1 h2), - qw(h3 h4 h5 h6 header hr main menu nav ol p pre section table ul) -); + qw(h3 h4 h5 h6 header hr main menu nav ol p pre section table ul); # HTML table elements with optional end tags my %TABLE = map { $_ => 1 } qw(colgroup tbody td tfoot th thead tr); @@ -139,7 +139,8 @@ sub parse { # Attributes my (%attrs, $closing); while ($attr =~ /$ATTR_RE/go) { - my ($key, $value) = ($xml ? $1 : lc $1, $3 // $4); + my $key = $xml ? $1 : lc $1; + my $value = defined($2) ? $2 : defined($3) ? $3 : $4; # Empty tag ++$closing and next if $key eq '/';