make comments and doctypes get passed through
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / Parser / BuiltIn.pm
CommitLineData
456a815d 1package HTML::Zoom::Parser::BuiltIn;
2
3use strict;
4use warnings FATAL => 'all';
d80786d0 5use base qw(HTML::Zoom::SubObject);
456a815d 6
7sub html_to_events {
d80786d0 8 my ($self, $text) = @_;
456a815d 9 my @events;
10 _hacky_tag_parser($text => sub { push @events, $_[0] });
11 return \@events;
12}
13
14sub html_to_stream {
d80786d0 15 my ($self, $text) = @_;
16 return $self->_zconfig->stream_utils
17 ->stream_from_array(@{$self->html_to_events($text)});
456a815d 18}
19
20sub _hacky_tag_parser {
21 my ($text, $handler) = @_;
22 while (
23 $text =~ m{
24 (
25 (?:[^<]*) < (?:
26 ( / )? ( [^/!<>\s"'=]+ )
27 ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )?
28 |
29 (!-- .*? -- | ![^\-] .*? )
30 ) (\s*/\s*)? >
31 )
32 ([^<]*)
33 }sxg
34 ) {
e32e7b90 35 my ($whole, $is_close, $tag_name, $attributes, $is_special,
456a815d 36 $in_place_close, $content)
37 = ($1, $2, $3, $4, $5, $6, $7, $8);
e32e7b90 38 if ($is_special) {
39 $handler->({ type => 'SPECIAL', raw => $whole });
456a815d 40 } else {
e32e7b90 41 $tag_name =~ tr/A-Z/a-z/;
42 if ($is_close) {
43 $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole });
44 } else {
45 $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/;
456a815d 46 $handler->({
e32e7b90 47 type => 'OPEN',
48 name => $tag_name,
49 is_in_place_close => $in_place_close,
50 _hacky_attribute_parser($attributes),
51 raw_attrs => $attributes||'',
52 raw => $whole,
456a815d 53 });
e32e7b90 54 if ($in_place_close) {
55 $handler->({
56 type => 'CLOSE', name => $tag_name, raw => '',
57 is_in_place_close => 1
58 });
59 }
456a815d 60 }
61 }
62 if (length $content) {
63 $handler->({ type => 'TEXT', raw => $content });
64 }
65 }
66}
67
68sub _hacky_attribute_parser {
69 my ($attr_text) = @_;
70 my (%attrs, @attr_names);
71 while (
72 $attr_text =~ m{
73 ([^\s\=\"\']+)(\s*=\s*(?:(")(.*?)"|(')(.*?)'|([^'"\s=]+)['"]*))?
74 }sgx
75 ) {
76 my $key = $1;
77 my $test = $2;
78 my $val = ( $3 ? $4 : ( $5 ? $6 : $7 ));
79 my $lckey = lc($key);
80 if ($test) {
81 $attrs{$lckey} = _simple_unescape($val);
82 } else {
83 $attrs{$lckey} = $lckey;
84 }
85 push(@attr_names, $lckey);
86 }
87 (attrs => \%attrs, attr_names => \@attr_names);
88}
89
90sub _simple_unescape {
91 my $str = shift;
92 $str =~ s/&quot;/"/g;
93 $str =~ s/&lt;/</g;
94 $str =~ s/&gt;/>/g;
95 $str =~ s/&amp;/&/g;
96 $str;
97}
98
99sub _simple_escape {
100 my $str = shift;
101 $str =~ s/&/&amp;/g;
102 $str =~ s/"/&quot;/g;
103 $str =~ s/</&lt;/g;
104 $str =~ s/>/&gt;/g;
105 $str;
106}
107
f8ed299b 108sub html_escape { _simple_escape($_[1]) }
109
110sub html_unescape { _simple_unescape($_[1]) }
111
456a815d 1121;