add / to excluded characters in attribute names to correctly parse <br/> etc.
Matt S Trout [Thu, 24 Feb 2011 20:10:16 +0000 (20:10 +0000)]
for <br/>, the '/' was being interpreted as the attribute string so
$in_place_close wasn't being set. Adding / to the exclusion list fixes this.

Changes
lib/HTML/Zoom/Parser/BuiltIn.pm
t/parser_weirdness.t [new file with mode: 0644]

diff --git a/Changes b/Changes
index 88caf83..276af99 100644 (file)
--- a/Changes
+++ b/Changes
@@ -1,3 +1,5 @@
+- add / to excluded characters in attribute names to correctly parse <br/> 
+
 0.009004 2011-02-14
 
 - Large chunks of documentation improvements from jnareb
index efa07cb..b55cc4d 100644 (file)
@@ -23,7 +23,7 @@ sub _hacky_tag_parser {
       (
         (?:[^<]*) < (?:
             ( / )? ( [^/!<>\s"'=]+ )
-            ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )?
+            ( (?:"[^"]*"|'[^']*'|[^/"'<>])+? )?
         |   
             (!-- .*? -- | ![^\-] .*? )
         ) (\s*/\s*)? >
diff --git a/t/parser_weirdness.t b/t/parser_weirdness.t
new file mode 100644 (file)
index 0000000..2abab01
--- /dev/null
@@ -0,0 +1,26 @@
+use strictures 1;
+use HTML::Zoom;
+use Test::More;
+
+my $html = <<EOHTML;
+<body>
+  <p><br/></p>
+  <p><br /></p>
+</body>
+EOHTML
+
+HTML::Zoom->from_html($html)
+          ->select('body')
+          ->collect_content({
+              into => \my @body
+            })
+          ->run;
+
+is(HTML::Zoom->from_events(\@body)->to_html, <<EOHTML,
+
+  <p><br/></p>
+  <p><br /></p>
+EOHTML
+  'Parses cuddled in place close ok');
+
+done_testing;