use DOM::Tiny::Entities qw(html_escape html_unescape);
use Scalar::Util 'weaken';
-our $VERSION = '0.002';
+our $VERSION = '0.004';
my $ATTR_RE = qr/
- ([^<>=\s\/]+|\/) # Key
+ ([^<>=\s\/]+|\/) # Key
(?:
\s*=\s*
- (?s:(["'])(.*?)\g{-2}|([^>\s]*)) # Value
+ (?s:(?:"(.*?)")|(?:'(.*?)')|([^>\s]*)) # Value
)?
\s*
/x;
|
\?(.*?)\? # Processing Instruction
|
- \s*([^<>\s]+\s*(?:(?:$ATTR_RE){0,32766})*+) # Tag
+ \s*([^<>\s]+\s*(?>(?:$ATTR_RE){0,32766})*) # Tag
+ # Workaround for perl's limit of * to {0,32767}
)>
|
(<) # Runaway "<"
my %END = (body => 'head', optgroup => 'optgroup', option => 'option');
# HTML elements that break paragraphs
-map { $END{$_} = 'p' } (
+$END{$_} = 'p' for
qw(address article aside blockquote dir div dl fieldset footer form h1 h2),
- qw(h3 h4 h5 h6 header hr main menu nav ol p pre section table ul)
-);
+ qw(h3 h4 h5 h6 header hr main menu nav ol p pre section table ul);
# HTML table elements with optional end tags
my %TABLE = map { $_ => 1 } qw(colgroup tbody td tfoot th thead tr);
# Attributes
my (%attrs, $closing);
while ($attr =~ /$ATTR_RE/go) {
- my ($key, $value) = ($xml ? $1 : lc $1, $3 // $4);
+ my $key = $xml ? $1 : lc $1;
+ my $value = defined($2) ? $2 : defined($3) ? $3 : $4;
# Empty tag
++$closing and next if $key eq '/';