Commit | Line | Data |
456a815d |
1 | package HTML::Zoom::FilterBuilder; |
2 | |
456a815d |
3 | use strict; |
4 | use warnings FATAL => 'all'; |
d80786d0 |
5 | use base qw(HTML::Zoom::SubObject); |
456a815d |
6 | use HTML::Zoom::CodeStream; |
7 | |
456a815d |
8 | sub _stream_from_code { |
d80786d0 |
9 | shift->_zconfig->stream_utils->stream_from_code(@_) |
456a815d |
10 | } |
11 | |
12 | sub _stream_from_array { |
d80786d0 |
13 | shift->_zconfig->stream_utils->stream_from_array(@_) |
456a815d |
14 | } |
15 | |
3cdbc13f |
16 | sub _stream_from_proto { |
d80786d0 |
17 | shift->_zconfig->stream_utils->stream_from_proto(@_) |
3cdbc13f |
18 | } |
19 | |
456a815d |
20 | sub _stream_concat { |
d80786d0 |
21 | shift->_zconfig->stream_utils->stream_concat(@_) |
456a815d |
22 | } |
23 | |
6d0f20a6 |
24 | sub _flatten_stream_of_streams { |
25 | shift->_zconfig->stream_utils->flatten_stream_of_streams(@_) |
26 | } |
27 | |
456a815d |
28 | sub set_attribute { |
1c4455ae |
29 | my $self = shift; |
30 | my ($name, $value) = $self->_parse_attribute_args(@_); |
456a815d |
31 | sub { |
8f962884 |
32 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
33 | my $e = exists $a->{$name}; |
34 | +{ %$evt, raw => undef, raw_attrs => undef, |
35 | attrs => { %$a, $name => $value }, |
36 | ($e # add to name list if not present |
37 | ? () |
38 | : (attr_names => [ @{$evt->{attr_names}}, $name ])) |
39 | } |
40 | }; |
41 | } |
42 | |
1c4455ae |
43 | sub _parse_attribute_args { |
44 | my $self = shift; |
45 | # allow ->add_attribute(name => 'value') |
46 | # or ->add_attribute({ name => 'name', value => 'value' }) |
47 | my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)}; |
48 | return ($name, $self->_zconfig->parser->html_escape($value)); |
49 | } |
50 | |
456a815d |
51 | sub add_attribute { |
1c4455ae |
52 | my $self = shift; |
53 | my ($name, $value) = $self->_parse_attribute_args(@_); |
456a815d |
54 | sub { |
8f962884 |
55 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
56 | my $e = exists $a->{$name}; |
57 | +{ %$evt, raw => undef, raw_attrs => undef, |
58 | attrs => { |
59 | %$a, |
60 | $name => join(' ', ($e ? $a->{$name} : ()), $value) |
61 | }, |
62 | ($e # add to name list if not present |
63 | ? () |
64 | : (attr_names => [ @{$evt->{attr_names}}, $name ])) |
65 | } |
66 | }; |
67 | } |
68 | |
69 | sub remove_attribute { |
70 | my ($self, $args) = @_; |
1c4455ae |
71 | my $name = (ref($args) eq 'HASH') ? $args->{name} : $args; |
456a815d |
72 | sub { |
8f962884 |
73 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
74 | return $evt unless exists $a->{$name}; |
75 | $a = { %$a }; delete $a->{$name}; |
76 | +{ %$evt, raw => undef, raw_attrs => undef, |
77 | attrs => $a, |
78 | attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ] |
79 | } |
80 | }; |
81 | } |
82 | |
76cecb10 |
83 | sub collect { |
84 | my ($self, $options) = @_; |
1c4455ae |
85 | my ($into, $passthrough, $content, $filter, $flush_before) = |
86 | @{$options}{qw(into passthrough content filter flush_before)}; |
76cecb10 |
87 | sub { |
88 | my ($evt, $stream) = @_; |
b4d044eb |
89 | # We wipe the contents of @$into here so that other actions depending |
90 | # on this (such as a repeater) can be invoked multiple times easily. |
91 | # I -suspect- it's better for that state reset to be managed here; if it |
92 | # ever becomes painful the decision should be revisited |
93 | if ($into) { |
865bb5d2 |
94 | @$into = $content ? () : ($evt); |
b4d044eb |
95 | } |
76cecb10 |
96 | if ($evt->{is_in_place_close}) { |
865bb5d2 |
97 | return $evt if $passthrough || $content; |
76cecb10 |
98 | return; |
99 | } |
100 | my $name = $evt->{name}; |
101 | my $depth = 1; |
865bb5d2 |
102 | my $_next = $content ? 'peek' : 'next'; |
d80786d0 |
103 | $stream = do { local $_ = $stream; $filter->($stream) } if $filter; |
76cecb10 |
104 | my $collector = $self->_stream_from_code(sub { |
105 | return unless $stream; |
106 | while (my ($evt) = $stream->$_next) { |
107 | $depth++ if ($evt->{type} eq 'OPEN'); |
108 | $depth-- if ($evt->{type} eq 'CLOSE'); |
109 | unless ($depth) { |
110 | undef $stream; |
865bb5d2 |
111 | return if $content; |
76cecb10 |
112 | push(@$into, $evt) if $into; |
113 | return $evt if $passthrough; |
114 | return; |
115 | } |
116 | push(@$into, $evt) if $into; |
865bb5d2 |
117 | $stream->next if $content; |
76cecb10 |
118 | return $evt if $passthrough; |
119 | } |
120 | die "Never saw closing </${name}> before end of source"; |
121 | }); |
1c4455ae |
122 | if ($flush_before) { |
123 | if ($passthrough||$content) { |
124 | $evt = { %$evt, flush => 1 }; |
125 | } else { |
126 | $evt = { type => 'EMPTY', flush => 1 }; |
127 | } |
128 | } |
129 | return ($passthrough||$content||$flush_before) |
130 | ? [ $evt, $collector ] |
131 | : $collector; |
76cecb10 |
132 | }; |
133 | } |
134 | |
865bb5d2 |
135 | sub collect_content { |
136 | my ($self, $options) = @_; |
137 | $self->collect({ %{$options||{}}, content => 1 }) |
138 | } |
139 | |
456a815d |
140 | sub add_before { |
141 | my ($self, $events) = @_; |
8f962884 |
142 | sub { return $self->_stream_from_array(@$events, $_[0]) }; |
456a815d |
143 | } |
144 | |
145 | sub add_after { |
146 | my ($self, $events) = @_; |
b616863d |
147 | my $coll_proto = $self->collect({ passthrough => 1 }); |
456a815d |
148 | sub { |
8f962884 |
149 | my ($evt) = @_; |
456a815d |
150 | my $emit = $self->_stream_from_array(@$events); |
b616863d |
151 | my $coll = &$coll_proto; |
995bc8be |
152 | return ref($coll) eq 'HASH' # single event, no collect |
153 | ? [ $coll, $emit ] |
154 | : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]; |
456a815d |
155 | }; |
8f962884 |
156 | } |
456a815d |
157 | |
865bb5d2 |
158 | sub prepend_content { |
456a815d |
159 | my ($self, $events) = @_; |
160 | sub { |
8f962884 |
161 | my ($evt) = @_; |
456a815d |
162 | if ($evt->{is_in_place_close}) { |
163 | $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)}; |
164 | return [ $evt, $self->_stream_from_array( |
165 | @$events, { type => 'CLOSE', name => $evt->{name} } |
166 | ) ]; |
167 | } |
168 | return $self->_stream_from_array($evt, @$events); |
169 | }; |
170 | } |
171 | |
865bb5d2 |
172 | sub append_content { |
8f962884 |
173 | my ($self, $events) = @_; |
865bb5d2 |
174 | my $coll_proto = $self->collect({ passthrough => 1, content => 1 }); |
8f962884 |
175 | sub { |
176 | my ($evt) = @_; |
177 | if ($evt->{is_in_place_close}) { |
178 | $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)}; |
179 | return [ $evt, $self->_stream_from_array( |
180 | @$events, { type => 'CLOSE', name => $evt->{name} } |
181 | ) ]; |
182 | } |
b616863d |
183 | my $coll = &$coll_proto; |
8f962884 |
184 | my $emit = $self->_stream_from_array(@$events); |
185 | return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]; |
186 | }; |
187 | } |
188 | |
456a815d |
189 | sub replace { |
3cdbc13f |
190 | my ($self, $replace_with, $options) = @_; |
b616863d |
191 | my $coll_proto = $self->collect($options); |
456a815d |
192 | sub { |
193 | my ($evt, $stream) = @_; |
3cdbc13f |
194 | my $emit = $self->_stream_from_proto($replace_with); |
b616863d |
195 | my $coll = &$coll_proto; |
a88c1c57 |
196 | # if we're replacing the contents of an in place close |
197 | # then we need to handle that here |
198 | if ($options->{content} |
199 | && ref($coll) eq 'HASH' |
ec687101 |
200 | && $coll->{is_in_place_close} |
a88c1c57 |
201 | ) { |
a88c1c57 |
202 | my $close = $stream->next; |
ec687101 |
203 | # shallow copy and nuke in place and raw (to force smart print) |
204 | $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close); |
a88c1c57 |
205 | $emit = $self->_stream_concat( |
206 | $emit, |
207 | $self->_stream_from_array($close), |
208 | ); |
209 | } |
451b3b30 |
210 | # For a straightforward replace operation we can, in fact, do the emit |
211 | # -before- the collect, and my first cut did so. However in order to |
212 | # use the captured content in generating the new content, we need |
213 | # the collect stage to happen first - and it seems highly unlikely |
214 | # that in normal operation the collect phase will take long enough |
215 | # for the difference to be noticeable |
11cc25dd |
216 | return |
217 | ($coll |
a88c1c57 |
218 | ? (ref $coll eq 'ARRAY' # [ event, stream ] |
451b3b30 |
219 | ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ] |
a88c1c57 |
220 | : (ref $coll eq 'HASH' # event or stream? |
221 | ? [ $coll, $emit ] |
222 | : $self->_stream_concat($coll, $emit)) |
11cc25dd |
223 | ) |
224 | : $emit |
225 | ); |
456a815d |
226 | }; |
227 | } |
228 | |
865bb5d2 |
229 | sub replace_content { |
230 | my ($self, $replace_with, $options) = @_; |
231 | $self->replace($replace_with, { %{$options||{}}, content => 1 }) |
232 | } |
233 | |
3cdbc13f |
234 | sub repeat { |
235 | my ($self, $repeat_for, $options) = @_; |
236 | $options->{into} = \my @into; |
f8ed299b |
237 | my @between; |
238 | my $repeat_between = delete $options->{repeat_between}; |
239 | if ($repeat_between) { |
f8ed299b |
240 | $options->{filter} = sub { |
d80786d0 |
241 | $_->select($repeat_between)->collect({ into => \@between }) |
f8ed299b |
242 | }; |
243 | } |
3cdbc13f |
244 | my $repeater = sub { |
f8ed299b |
245 | my $s = $self->_stream_from_proto($repeat_for); |
246 | # We have to test $repeat_between not @between here because |
247 | # at the point we're constructing our return stream @between |
248 | # hasn't been populated yet - but we can test @between in the |
249 | # map routine because it has been by then and that saves us doing |
250 | # the extra stream construction if we don't need it. |
6d0f20a6 |
251 | $self->_flatten_stream_of_streams(do { |
252 | if ($repeat_between) { |
253 | $s->map(sub { |
254 | local $_ = $self->_stream_from_array(@into); |
255 | (@between && $s->peek) |
256 | ? $self->_stream_concat( |
257 | $_[0]->($_), $self->_stream_from_array(@between) |
258 | ) |
259 | : $_[0]->($_) |
260 | }) |
261 | } else { |
262 | $s->map(sub { |
263 | local $_ = $self->_stream_from_array(@into); |
264 | $_[0]->($_) |
f8ed299b |
265 | }) |
6d0f20a6 |
266 | } |
267 | }) |
3cdbc13f |
268 | }; |
269 | $self->replace($repeater, $options); |
270 | } |
271 | |
865bb5d2 |
272 | sub repeat_content { |
273 | my ($self, $repeat_for, $options) = @_; |
274 | $self->repeat($repeat_for, { %{$options||{}}, content => 1 }) |
275 | } |
276 | |
456a815d |
277 | 1; |
556c8616 |
278 | |
279 | =head1 NAME |
280 | |
281 | HTML::Zoom::FilterBuilder - Add Filters to a Stream |
282 | |
244252e7 |
283 | =head1 SYNOPSIS |
284 | |
244252e7 |
285 | use HTML::Zoom; |
286 | my $root = HTML::Zoom |
287 | ->from_html(<<MAIN); |
288 | <html> |
289 | <head> |
290 | <title>Default Title</title> |
291 | </head> |
292 | <body> |
293 | Default Content |
294 | </body> |
295 | </html> |
296 | MAIN |
297 | |
298 | my $body = HTML::Zoom |
299 | ->from_html(<<BODY); |
300 | <div id="stuff"> |
301 | <p>Stuff</p> |
302 | <p id="p1">Stuff</p> |
303 | </div> |
304 | BODY |
305 | |
306 | print $root |
307 | ->select('title') |
308 | ->replace_content('Hello World') |
309 | ->select('body') |
310 | ->replace_content($body) |
311 | ->select('#p1') |
312 | ->replace_content(sub { |
313 | ## Ask mst... |
314 | |
315 | }) |
316 | ->to_html; |
317 | |
318 | |
556c8616 |
319 | =head1 DESCRIPTION |
320 | |
321 | Given a L<HTML::Zoom> stream, provide methods to apply filters which |
322 | alter the content of that stream. |
323 | |
f6644c71 |
324 | =head1 METHODS |
325 | |
326 | This class defines the following public API |
327 | |
328 | =head2 set_attribute |
329 | |
330 | TBD |
331 | |
332 | =head2 add_attribute |
333 | |
334 | TBD |
335 | |
336 | =head2 remove_attribute |
337 | |
338 | TBD |
339 | |
340 | =head2 collect |
341 | |
342 | TBD |
343 | |
344 | =head2 collect_content |
345 | |
346 | TBD |
347 | |
348 | =head2 add_before |
349 | |
350 | TBD |
351 | |
352 | =head2 add_after |
353 | |
354 | TBD |
355 | |
356 | =head2 prepend_content |
357 | |
358 | TBD |
359 | |
360 | =head2 append_content |
361 | |
362 | TBD |
363 | |
364 | =head2 replace |
365 | |
366 | TBD |
367 | |
368 | =head2 replace_content |
369 | |
244252e7 |
370 | Given a L<HTML::Zoom/select> result, replace the content with a string, array |
371 | or another L<HTML::Zoom> object. |
f6644c71 |
372 | |
373 | =head2 repeat |
374 | |
375 | TBD |
376 | |
377 | =head2 repeat_content |
378 | |
379 | TBD |
380 | |
556c8616 |
381 | =head1 ALSO SEE |
382 | |
383 | L<HTML::Zoom> |
384 | |
385 | =head1 AUTHORS |
386 | |
387 | See L<HTML::Zoom> for authors. |
388 | |
389 | =head1 LICENSE |
390 | |
391 | See L<HTML::Zoom> for the license. |
392 | |
393 | =cut |
394 | |