1 package HTML::Zoom::Parser::BuiltIn;
4 use base qw(HTML::Zoom::SubObject);
7 my ($self, $text) = @_;
9 _hacky_tag_parser($text => sub { push @events, $_[0] });
14 my ($self, $text) = @_;
15 return $self->_zconfig->stream_utils
16 ->stream_from_array(@{$self->html_to_events($text)});
21 # Well, ok. Be afraid. A little. But this is lexing HTML with a regexp,
22 # not really parsing (since the structure nesting isn't handled here) so
23 # it's relatively not dangerous.
25 # Certainly it's not really any more or any less heinous than anything else
26 # I could do in a handful of lines of pure perl.
28 sub _hacky_tag_parser {
29 my ($text, $handler) = @_;
30 $text =~ m{^([^<]*)}g;
31 if ( length $1 ) { # leading PCDATA
32 $handler->({ type => 'TEXT', raw => $1 });
38 ( / )? ( [^/!<>\s"'=]+ )
39 ( (?:"[^"]*"|'[^']*'|[^/"'<>])+? )?
41 (!-- .*? -- | ![^\-] .*? )
47 my ($whole, $is_close, $tag_name, $attributes, $is_special,
48 $in_place_close, $content)
49 = ($1, $2, $3, $4, $5, $6, $7, $8);
51 $handler->({ type => 'SPECIAL', raw => $whole });
53 $tag_name =~ tr/A-Z/a-z/;
55 $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole });
57 $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/;
61 is_in_place_close => $in_place_close,
62 _hacky_attribute_parser($attributes),
63 raw_attrs => $attributes||'',
66 if ($in_place_close) {
68 type => 'CLOSE', name => $tag_name, raw => '',
69 is_in_place_close => 1
74 if (length $content) {
75 $handler->({ type => 'TEXT', raw => $content });
80 sub _hacky_attribute_parser {
82 my (%attrs, @attr_names);
85 ([^\s\=\"\']+)(\s*=\s*(?:(")(.*?)"|(')(.*?)'|([^'"\s=]+)['"]*))?
90 my $val = ( $3 ? $4 : ( $5 ? $6 : $7 ));
93 $attrs{$lckey} = _simple_unescape($val);
95 $attrs{$lckey} = $lckey;
97 push(@attr_names, $lckey);
99 (attrs => \%attrs, attr_names => \@attr_names);
102 sub _simple_unescape {
104 $str =~ s/"/"/g;
114 $str =~ s/"/"/g;
120 sub html_escape { _simple_escape($_[1]) }
122 sub html_unescape { _simple_unescape($_[1]) }