package HTML::Zoom::Parser::BuiltIn;
-use strict;
-use warnings FATAL => 'all';
-
-use HTML::Zoom::CodeStream;
+use strictures 1;
+use base qw(HTML::Zoom::SubObject);
sub html_to_events {
- my ($class, $text) = @_;
+ my ($self, $text) = @_;
my @events;
_hacky_tag_parser($text => sub { push @events, $_[0] });
return \@events;
}
sub html_to_stream {
- my ($class, $text) = @_;
- return HTML::Zoom::CodeStream->from_array(@{$class->html_to_events($text)});
+ my ($self, $text) = @_;
+ return $self->_zconfig->stream_utils
+ ->stream_from_array(@{$self->html_to_events($text)});
}
+# DO NOT BE AFRAID.
+#
+# Well, ok. Be afraid. A little. But this is lexing HTML with a regexp,
+# not really parsing (since the structure nesting isn't handled here) so
+# it's relatively not dangerous.
+#
+# Certainly it's not really any more or any less heinous than anything else
+# I could do in a handful of lines of pure perl.
+
sub _hacky_tag_parser {
my ($text, $handler) = @_;
+ $text =~ m{^([^<]*)}g;
+ if ( length $1 ) { # leading PCDATA
+ $handler->({ type => 'TEXT', raw => $1 });
+ }
while (
$text =~ m{
(
(?:[^<]*) < (?:
( / )? ( [^/!<>\s"'=]+ )
- ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )?
+ ( (?:"[^"]*"|'[^']*'|[^/"'<>])+? )?
|
(!-- .*? -- | ![^\-] .*? )
) (\s*/\s*)? >
([^<]*)
}sxg
) {
- my ($whole, $is_close, $tag_name, $attributes, $is_comment,
+ my ($whole, $is_close, $tag_name, $attributes, $is_special,
$in_place_close, $content)
= ($1, $2, $3, $4, $5, $6, $7, $8);
- next if defined $is_comment;
- $tag_name =~ tr/A-Z/a-z/;
- if ($is_close) {
- $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole });
+ if ($is_special) {
+ $handler->({ type => 'SPECIAL', raw => $whole });
} else {
- $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/;
- $handler->({
- type => 'OPEN',
- name => $tag_name,
- is_in_place_close => $in_place_close,
- _hacky_attribute_parser($attributes),
- raw_attrs => $attributes||'',
- raw => $whole,
- });
- if ($in_place_close) {
+ $tag_name =~ tr/A-Z/a-z/;
+ if ($is_close) {
+ $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole });
+ } else {
+ $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/;
$handler->({
- type => 'CLOSE', name => $tag_name, raw => '',
- is_in_place_close => 1
+ type => 'OPEN',
+ name => $tag_name,
+ is_in_place_close => $in_place_close,
+ _hacky_attribute_parser($attributes),
+ raw_attrs => $attributes||'',
+ raw => $whole,
});
+ if ($in_place_close) {
+ $handler->({
+ type => 'CLOSE', name => $tag_name, raw => '',
+ is_in_place_close => 1
+ });
+ }
}
}
if (length $content) {