add strictures commit (out of order)
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / Parser / BuiltIn.pm
CommitLineData
456a815d 1package HTML::Zoom::Parser::BuiltIn;
2
1cf03540 3use strictures 1;
d80786d0 4use base qw(HTML::Zoom::SubObject);
456a815d 5
6sub html_to_events {
d80786d0 7 my ($self, $text) = @_;
456a815d 8 my @events;
9 _hacky_tag_parser($text => sub { push @events, $_[0] });
10 return \@events;
11}
12
13sub html_to_stream {
d80786d0 14 my ($self, $text) = @_;
15 return $self->_zconfig->stream_utils
16 ->stream_from_array(@{$self->html_to_events($text)});
456a815d 17}
18
19sub _hacky_tag_parser {
20 my ($text, $handler) = @_;
21 while (
22 $text =~ m{
23 (
24 (?:[^<]*) < (?:
25 ( / )? ( [^/!<>\s"'=]+ )
26 ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )?
27 |
28 (!-- .*? -- | ![^\-] .*? )
29 ) (\s*/\s*)? >
30 )
31 ([^<]*)
32 }sxg
33 ) {
e32e7b90 34 my ($whole, $is_close, $tag_name, $attributes, $is_special,
456a815d 35 $in_place_close, $content)
36 = ($1, $2, $3, $4, $5, $6, $7, $8);
e32e7b90 37 if ($is_special) {
38 $handler->({ type => 'SPECIAL', raw => $whole });
456a815d 39 } else {
e32e7b90 40 $tag_name =~ tr/A-Z/a-z/;
41 if ($is_close) {
42 $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole });
43 } else {
44 $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/;
456a815d 45 $handler->({
e32e7b90 46 type => 'OPEN',
47 name => $tag_name,
48 is_in_place_close => $in_place_close,
49 _hacky_attribute_parser($attributes),
50 raw_attrs => $attributes||'',
51 raw => $whole,
456a815d 52 });
e32e7b90 53 if ($in_place_close) {
54 $handler->({
55 type => 'CLOSE', name => $tag_name, raw => '',
56 is_in_place_close => 1
57 });
58 }
456a815d 59 }
60 }
61 if (length $content) {
62 $handler->({ type => 'TEXT', raw => $content });
63 }
64 }
65}
66
67sub _hacky_attribute_parser {
68 my ($attr_text) = @_;
69 my (%attrs, @attr_names);
70 while (
71 $attr_text =~ m{
72 ([^\s\=\"\']+)(\s*=\s*(?:(")(.*?)"|(')(.*?)'|([^'"\s=]+)['"]*))?
73 }sgx
74 ) {
75 my $key = $1;
76 my $test = $2;
77 my $val = ( $3 ? $4 : ( $5 ? $6 : $7 ));
78 my $lckey = lc($key);
79 if ($test) {
80 $attrs{$lckey} = _simple_unescape($val);
81 } else {
82 $attrs{$lckey} = $lckey;
83 }
84 push(@attr_names, $lckey);
85 }
86 (attrs => \%attrs, attr_names => \@attr_names);
87}
88
89sub _simple_unescape {
90 my $str = shift;
91 $str =~ s/&quot;/"/g;
92 $str =~ s/&lt;/</g;
93 $str =~ s/&gt;/>/g;
94 $str =~ s/&amp;/&/g;
95 $str;
96}
97
98sub _simple_escape {
99 my $str = shift;
100 $str =~ s/&/&amp;/g;
101 $str =~ s/"/&quot;/g;
102 $str =~ s/</&lt;/g;
103 $str =~ s/>/&gt;/g;
104 $str;
105}
106
f8ed299b 107sub html_escape { _simple_escape($_[1]) }
108
109sub html_unescape { _simple_unescape($_[1]) }
110
456a815d 1111;