Commit | Line | Data |
9d159224 |
1 | package HTML::Zoom::Parser::BuiltIn; |
2 | |
3 | sub _hacky_tag_parser { |
4 | my ($text, $handler) = @_; |
5 | while ( |
6 | $text =~ m{ |
7 | ( |
8 | (?:[^<]*) < (?: |
9 | ( / )? ( [^/!<>\s"'=]+ ) |
10 | ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )? |
11 | | |
12 | (!-- .*? -- | ![^\-] .*? ) |
13 | ) (\s*/\s*)? > |
14 | ) |
15 | ([^<]*) |
16 | }sxg |
17 | ) { |
18 | my ($whole, $is_close, $tag_name, $attributes, $is_comment, |
19 | $in_place_close, $content) |
20 | = ($1, $2, $3, $4, $5, $6, $7, $8); |
21 | next if defined $is_comment; |
22 | $tag_name =~ tr/A-Z/a-z/; |
23 | if ($is_close) { |
24 | $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole }); |
25 | } else { |
26 | $attributes = '' if $attributes =~ /^ +$/; |
27 | $handler->({ |
28 | type => 'OPEN', |
29 | name => $tag_name, |
30 | is_in_place_close => $in_place_close, |
31 | _hacky_attribute_parser($attributes), |
32 | raw_attrs => $attributes||'', |
33 | raw => $whole, |
34 | }); |
35 | if ($in_place_close) { |
36 | $handler->({ |
37 | type => 'CLOSE', name => $tag_name, raw => '', |
38 | is_in_place_close => 1 |
39 | }); |
40 | } |
41 | } |
42 | if (length $content) { |
43 | $handler->({ type => 'TEXT', raw => $content }); |
44 | } |
45 | } |
46 | } |
47 | |
48 | sub _hacky_attribute_parser { |
49 | my ($attr_text) = @_; |
50 | my (%attrs, @attr_names); |
51 | while ( |
52 | $attr_text =~ m{ |
53 | ([^\s\=\"\']+)(\s*=\s*(?:(")(.*?)"|(')(.*?)'|([^'"\s=]+)['"]*))? |
54 | }sgx |
55 | ) { |
56 | my $key = $1; |
57 | my $test = $2; |
58 | my $val = ( $3 ? $4 : ( $5 ? $6 : $7 )); |
59 | my $lckey = lc($key); |
60 | if ($test) { |
61 | $attrs{$lckey} = _simple_unescape($val); |
62 | } else { |
63 | $attrs{$lckey} = $lckey; |
64 | } |
65 | push(@attr_names, $lckey); |
66 | } |
67 | (attrs => \%attrs, attr_names => \@attr_names); |
68 | } |
69 | |
70 | sub _simple_unescape { |
71 | my $str = shift; |
72 | $str =~ s/"/"/g; |
73 | $str =~ s/</</g; |
74 | $str =~ s/>/>/g; |
75 | $str =~ s/&/&/g; |
76 | $str; |
77 | } |
78 | |
79 | sub _simple_escape { |
80 | my $str = shift; |
81 | $str =~ s/&/&/g; |
82 | $str =~ s/"/"/g; |
83 | $str =~ s/</</g; |
84 | $str =~ s/>/>/g; |
85 | $str; |
86 | } |
87 | |
88 | 1; |