Commit | Line | Data |
456a815d |
1 | package HTML::Zoom::FilterBuilder; |
2 | |
456a815d |
3 | use strict; |
4 | use warnings FATAL => 'all'; |
5 | use HTML::Zoom::CodeStream; |
6 | |
7 | sub new { bless({}, shift) } |
8 | |
9 | sub _stream_from_code { |
10 | HTML::Zoom::CodeStream->new({ code => $_[1] }) |
11 | } |
12 | |
13 | sub _stream_from_array { |
14 | shift; # lose $self |
15 | HTML::Zoom::CodeStream->from_array(@_) |
16 | } |
17 | |
3cdbc13f |
18 | sub _stream_from_proto { |
19 | my ($self, $proto) = @_; |
20 | my $ref = ref $proto; |
21 | if (not $ref) { |
f8ed299b |
22 | require HTML::Zoom::Parser::BuiltIn; |
23 | return $self->_stream_from_array({ |
24 | type => 'TEXT', |
25 | raw => HTML::Zoom::Parser::BuiltIn->html_escape($proto) |
26 | }); |
3cdbc13f |
27 | } elsif ($ref eq 'ARRAY') { |
28 | return $self->_stream_from_array(@$proto); |
29 | } elsif ($ref eq 'CODE') { |
30 | return $proto->(); |
31 | } elsif ($ref eq 'SCALAR') { |
32 | require HTML::Zoom::Parser::BuiltIn; |
33 | return HTML::Zoom::Parser::BuiltIn->html_to_stream($$proto); |
34 | } |
626752d4 |
35 | die "Don't know how to turn $proto (ref $ref) into a stream"; |
3cdbc13f |
36 | } |
37 | |
456a815d |
38 | sub _stream_concat { |
3cdbc13f |
39 | shift->_stream_from_array(@_)->flatten; |
456a815d |
40 | } |
41 | |
42 | sub set_attribute { |
43 | my ($self, $args) = @_; |
44 | my ($name, $value) = @{$args}{qw(name value)}; |
45 | sub { |
8f962884 |
46 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
47 | my $e = exists $a->{$name}; |
48 | +{ %$evt, raw => undef, raw_attrs => undef, |
49 | attrs => { %$a, $name => $value }, |
50 | ($e # add to name list if not present |
51 | ? () |
52 | : (attr_names => [ @{$evt->{attr_names}}, $name ])) |
53 | } |
54 | }; |
55 | } |
56 | |
57 | sub add_attribute { |
58 | my ($self, $args) = @_; |
59 | my ($name, $value) = @{$args}{qw(name value)}; |
60 | sub { |
8f962884 |
61 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
62 | my $e = exists $a->{$name}; |
63 | +{ %$evt, raw => undef, raw_attrs => undef, |
64 | attrs => { |
65 | %$a, |
66 | $name => join(' ', ($e ? $a->{$name} : ()), $value) |
67 | }, |
68 | ($e # add to name list if not present |
69 | ? () |
70 | : (attr_names => [ @{$evt->{attr_names}}, $name ])) |
71 | } |
72 | }; |
73 | } |
74 | |
75 | sub remove_attribute { |
76 | my ($self, $args) = @_; |
77 | my $name = $args->{name}; |
78 | sub { |
8f962884 |
79 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
80 | return $evt unless exists $a->{$name}; |
81 | $a = { %$a }; delete $a->{$name}; |
82 | +{ %$evt, raw => undef, raw_attrs => undef, |
83 | attrs => $a, |
84 | attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ] |
85 | } |
86 | }; |
87 | } |
88 | |
76cecb10 |
89 | sub collect { |
90 | my ($self, $options) = @_; |
dae33531 |
91 | my ($into, $passthrough, $content, $filter) = |
92 | @{$options}{qw(into passthrough content filter)}; |
76cecb10 |
93 | sub { |
94 | my ($evt, $stream) = @_; |
b4d044eb |
95 | # We wipe the contents of @$into here so that other actions depending |
96 | # on this (such as a repeater) can be invoked multiple times easily. |
97 | # I -suspect- it's better for that state reset to be managed here; if it |
98 | # ever becomes painful the decision should be revisited |
99 | if ($into) { |
865bb5d2 |
100 | @$into = $content ? () : ($evt); |
b4d044eb |
101 | } |
76cecb10 |
102 | if ($evt->{is_in_place_close}) { |
865bb5d2 |
103 | return $evt if $passthrough || $content; |
76cecb10 |
104 | return; |
105 | } |
106 | my $name = $evt->{name}; |
107 | my $depth = 1; |
865bb5d2 |
108 | my $_next = $content ? 'peek' : 'next'; |
dae33531 |
109 | $stream = $filter->($stream) if $filter; |
76cecb10 |
110 | my $collector = $self->_stream_from_code(sub { |
111 | return unless $stream; |
112 | while (my ($evt) = $stream->$_next) { |
113 | $depth++ if ($evt->{type} eq 'OPEN'); |
114 | $depth-- if ($evt->{type} eq 'CLOSE'); |
115 | unless ($depth) { |
116 | undef $stream; |
865bb5d2 |
117 | return if $content; |
76cecb10 |
118 | push(@$into, $evt) if $into; |
119 | return $evt if $passthrough; |
120 | return; |
121 | } |
122 | push(@$into, $evt) if $into; |
865bb5d2 |
123 | $stream->next if $content; |
76cecb10 |
124 | return $evt if $passthrough; |
125 | } |
126 | die "Never saw closing </${name}> before end of source"; |
127 | }); |
865bb5d2 |
128 | return ($passthrough||$content) ? [ $evt, $collector ] : $collector; |
76cecb10 |
129 | }; |
130 | } |
131 | |
865bb5d2 |
132 | sub collect_content { |
133 | my ($self, $options) = @_; |
134 | $self->collect({ %{$options||{}}, content => 1 }) |
135 | } |
136 | |
456a815d |
137 | sub add_before { |
138 | my ($self, $events) = @_; |
8f962884 |
139 | sub { return $self->_stream_from_array(@$events, $_[0]) }; |
456a815d |
140 | } |
141 | |
142 | sub add_after { |
143 | my ($self, $events) = @_; |
b616863d |
144 | my $coll_proto = $self->collect({ passthrough => 1 }); |
456a815d |
145 | sub { |
8f962884 |
146 | my ($evt) = @_; |
456a815d |
147 | my $emit = $self->_stream_from_array(@$events); |
b616863d |
148 | my $coll = &$coll_proto; |
995bc8be |
149 | return ref($coll) eq 'HASH' # single event, no collect |
150 | ? [ $coll, $emit ] |
151 | : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]; |
456a815d |
152 | }; |
8f962884 |
153 | } |
456a815d |
154 | |
865bb5d2 |
155 | sub prepend_content { |
456a815d |
156 | my ($self, $events) = @_; |
157 | sub { |
8f962884 |
158 | my ($evt) = @_; |
456a815d |
159 | if ($evt->{is_in_place_close}) { |
160 | $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)}; |
161 | return [ $evt, $self->_stream_from_array( |
162 | @$events, { type => 'CLOSE', name => $evt->{name} } |
163 | ) ]; |
164 | } |
165 | return $self->_stream_from_array($evt, @$events); |
166 | }; |
167 | } |
168 | |
865bb5d2 |
169 | sub append_content { |
8f962884 |
170 | my ($self, $events) = @_; |
865bb5d2 |
171 | my $coll_proto = $self->collect({ passthrough => 1, content => 1 }); |
8f962884 |
172 | sub { |
173 | my ($evt) = @_; |
174 | if ($evt->{is_in_place_close}) { |
175 | $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)}; |
176 | return [ $evt, $self->_stream_from_array( |
177 | @$events, { type => 'CLOSE', name => $evt->{name} } |
178 | ) ]; |
179 | } |
b616863d |
180 | my $coll = &$coll_proto; |
8f962884 |
181 | my $emit = $self->_stream_from_array(@$events); |
182 | return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]; |
183 | }; |
184 | } |
185 | |
456a815d |
186 | sub replace { |
3cdbc13f |
187 | my ($self, $replace_with, $options) = @_; |
b616863d |
188 | my $coll_proto = $self->collect($options); |
456a815d |
189 | sub { |
190 | my ($evt, $stream) = @_; |
3cdbc13f |
191 | my $emit = $self->_stream_from_proto($replace_with); |
b616863d |
192 | my $coll = &$coll_proto; |
451b3b30 |
193 | # For a straightforward replace operation we can, in fact, do the emit |
194 | # -before- the collect, and my first cut did so. However in order to |
195 | # use the captured content in generating the new content, we need |
196 | # the collect stage to happen first - and it seems highly unlikely |
197 | # that in normal operation the collect phase will take long enough |
198 | # for the difference to be noticeable |
11cc25dd |
199 | return |
200 | ($coll |
201 | ? (ref $coll eq 'ARRAY' |
451b3b30 |
202 | ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ] |
203 | : $self->_stream_concat($coll, $emit) |
11cc25dd |
204 | ) |
205 | : $emit |
206 | ); |
456a815d |
207 | }; |
208 | } |
209 | |
865bb5d2 |
210 | sub replace_content { |
211 | my ($self, $replace_with, $options) = @_; |
212 | $self->replace($replace_with, { %{$options||{}}, content => 1 }) |
213 | } |
214 | |
3cdbc13f |
215 | sub repeat { |
216 | my ($self, $repeat_for, $options) = @_; |
217 | $options->{into} = \my @into; |
f8ed299b |
218 | my @between; |
219 | my $repeat_between = delete $options->{repeat_between}; |
220 | if ($repeat_between) { |
221 | require HTML::Zoom::SelectorParser; |
222 | require HTML::Zoom::FilterStream; |
223 | my $sp = HTML::Zoom::SelectorParser->new; |
224 | my $filter = $self->collect({ into => \@between }); |
225 | $options->{filter} = sub { |
226 | HTML::Zoom::FilterStream->new({ |
227 | stream => $_[0], |
228 | match => $sp->parse_selector($repeat_between), |
229 | filter => $filter |
230 | }) |
231 | }; |
232 | } |
3cdbc13f |
233 | my $repeater = sub { |
f8ed299b |
234 | my $s = $self->_stream_from_proto($repeat_for); |
235 | # We have to test $repeat_between not @between here because |
236 | # at the point we're constructing our return stream @between |
237 | # hasn't been populated yet - but we can test @between in the |
238 | # map routine because it has been by then and that saves us doing |
239 | # the extra stream construction if we don't need it. |
240 | if ($repeat_between) { |
241 | $s->map(sub { |
242 | local $_ = $self->_stream_from_array(@into); |
243 | (@between && $s->peek) |
244 | ? $self->_stream_concat( |
245 | $_[0]->($_), $self->_stream_from_array(@between) |
246 | ) |
247 | : $_[0]->($_) |
248 | }) |
249 | ->flatten; |
250 | } else { |
251 | $s->map(sub { |
252 | local $_ = $self->_stream_from_array(@into); |
253 | $_[0]->($_) |
254 | }) |
255 | ->flatten; |
256 | } |
3cdbc13f |
257 | }; |
258 | $self->replace($repeater, $options); |
259 | } |
260 | |
865bb5d2 |
261 | sub repeat_content { |
262 | my ($self, $repeat_for, $options) = @_; |
263 | $self->repeat($repeat_for, { %{$options||{}}, content => 1 }) |
264 | } |
265 | |
456a815d |
266 | 1; |