package HTML::Zoom::Parser::BuiltIn;
-use strict;
-use warnings FATAL => 'all';
+use strictures 1;
use base qw(HTML::Zoom::SubObject);
sub html_to_events {
->stream_from_array(@{$self->html_to_events($text)});
}
+# DO NOT BE AFRAID.
+#
+# Well, ok. Be afraid. A little. But this is lexing HTML with a regexp,
+# not really parsing (since the structure nesting isn't handled here) so
+# it's relatively not dangerous.
+#
+# Certainly it's not really any more or any less heinous than anything else
+# I could do in a handful of lines of pure perl.
+
sub _hacky_tag_parser {
my ($text, $handler) = @_;
+ $text =~ m{^([^<]*)}g;
+ if ( length $1 ) { # leading PCDATA
+ $handler->({ type => 'TEXT', raw => $1 });
+ }
while (
$text =~ m{
(
(?:[^<]*) < (?:
( / )? ( [^/!<>\s"'=]+ )
- ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )?
+ ( (?:"[^"]*"|'[^']*'|[^/"'<>])+? )?
|
(!-- .*? -- | ![^\-] .*? )
) (\s*/\s*)? >