Commit | Line | Data |
456a815d |
1 | package HTML::Zoom::FilterBuilder; |
2 | |
456a815d |
3 | use strict; |
4 | use warnings FATAL => 'all'; |
d80786d0 |
5 | use base qw(HTML::Zoom::SubObject); |
456a815d |
6 | use HTML::Zoom::CodeStream; |
7 | |
456a815d |
8 | sub _stream_from_code { |
d80786d0 |
9 | shift->_zconfig->stream_utils->stream_from_code(@_) |
456a815d |
10 | } |
11 | |
12 | sub _stream_from_array { |
d80786d0 |
13 | shift->_zconfig->stream_utils->stream_from_array(@_) |
456a815d |
14 | } |
15 | |
3cdbc13f |
16 | sub _stream_from_proto { |
d80786d0 |
17 | shift->_zconfig->stream_utils->stream_from_proto(@_) |
3cdbc13f |
18 | } |
19 | |
456a815d |
20 | sub _stream_concat { |
d80786d0 |
21 | shift->_zconfig->stream_utils->stream_concat(@_) |
456a815d |
22 | } |
23 | |
6d0f20a6 |
24 | sub _flatten_stream_of_streams { |
25 | shift->_zconfig->stream_utils->flatten_stream_of_streams(@_) |
26 | } |
27 | |
456a815d |
28 | sub set_attribute { |
1c4455ae |
29 | my $self = shift; |
30 | my ($name, $value) = $self->_parse_attribute_args(@_); |
456a815d |
31 | sub { |
8f962884 |
32 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
33 | my $e = exists $a->{$name}; |
34 | +{ %$evt, raw => undef, raw_attrs => undef, |
35 | attrs => { %$a, $name => $value }, |
36 | ($e # add to name list if not present |
37 | ? () |
38 | : (attr_names => [ @{$evt->{attr_names}}, $name ])) |
39 | } |
40 | }; |
41 | } |
42 | |
1c4455ae |
43 | sub _parse_attribute_args { |
44 | my $self = shift; |
2daa653a |
45 | # allow ->add_to_attribute(name => 'value') |
46 | # or ->add_to_attribute({ name => 'name', value => 'value' }) |
1c4455ae |
47 | my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)}; |
48 | return ($name, $self->_zconfig->parser->html_escape($value)); |
49 | } |
50 | |
456a815d |
51 | sub add_attribute { |
2daa653a |
52 | die "renamed to add_to_attribute. killing this entirely for 1.0"; |
53 | } |
54 | |
55 | sub add_to_attribute { |
1c4455ae |
56 | my $self = shift; |
57 | my ($name, $value) = $self->_parse_attribute_args(@_); |
456a815d |
58 | sub { |
8f962884 |
59 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
60 | my $e = exists $a->{$name}; |
61 | +{ %$evt, raw => undef, raw_attrs => undef, |
62 | attrs => { |
63 | %$a, |
64 | $name => join(' ', ($e ? $a->{$name} : ()), $value) |
65 | }, |
66 | ($e # add to name list if not present |
67 | ? () |
68 | : (attr_names => [ @{$evt->{attr_names}}, $name ])) |
69 | } |
70 | }; |
71 | } |
72 | |
73 | sub remove_attribute { |
74 | my ($self, $args) = @_; |
1c4455ae |
75 | my $name = (ref($args) eq 'HASH') ? $args->{name} : $args; |
456a815d |
76 | sub { |
8f962884 |
77 | my $a = (my $evt = $_[0])->{attrs}; |
456a815d |
78 | return $evt unless exists $a->{$name}; |
79 | $a = { %$a }; delete $a->{$name}; |
80 | +{ %$evt, raw => undef, raw_attrs => undef, |
81 | attrs => $a, |
82 | attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ] |
83 | } |
84 | }; |
85 | } |
86 | |
76cecb10 |
87 | sub collect { |
88 | my ($self, $options) = @_; |
1c4455ae |
89 | my ($into, $passthrough, $content, $filter, $flush_before) = |
90 | @{$options}{qw(into passthrough content filter flush_before)}; |
76cecb10 |
91 | sub { |
92 | my ($evt, $stream) = @_; |
b4d044eb |
93 | # We wipe the contents of @$into here so that other actions depending |
94 | # on this (such as a repeater) can be invoked multiple times easily. |
95 | # I -suspect- it's better for that state reset to be managed here; if it |
96 | # ever becomes painful the decision should be revisited |
97 | if ($into) { |
865bb5d2 |
98 | @$into = $content ? () : ($evt); |
b4d044eb |
99 | } |
76cecb10 |
100 | if ($evt->{is_in_place_close}) { |
865bb5d2 |
101 | return $evt if $passthrough || $content; |
76cecb10 |
102 | return; |
103 | } |
104 | my $name = $evt->{name}; |
105 | my $depth = 1; |
865bb5d2 |
106 | my $_next = $content ? 'peek' : 'next'; |
d80786d0 |
107 | $stream = do { local $_ = $stream; $filter->($stream) } if $filter; |
76cecb10 |
108 | my $collector = $self->_stream_from_code(sub { |
109 | return unless $stream; |
110 | while (my ($evt) = $stream->$_next) { |
111 | $depth++ if ($evt->{type} eq 'OPEN'); |
112 | $depth-- if ($evt->{type} eq 'CLOSE'); |
113 | unless ($depth) { |
114 | undef $stream; |
865bb5d2 |
115 | return if $content; |
76cecb10 |
116 | push(@$into, $evt) if $into; |
117 | return $evt if $passthrough; |
118 | return; |
119 | } |
120 | push(@$into, $evt) if $into; |
865bb5d2 |
121 | $stream->next if $content; |
76cecb10 |
122 | return $evt if $passthrough; |
123 | } |
124 | die "Never saw closing </${name}> before end of source"; |
125 | }); |
1c4455ae |
126 | if ($flush_before) { |
127 | if ($passthrough||$content) { |
128 | $evt = { %$evt, flush => 1 }; |
129 | } else { |
130 | $evt = { type => 'EMPTY', flush => 1 }; |
131 | } |
132 | } |
133 | return ($passthrough||$content||$flush_before) |
134 | ? [ $evt, $collector ] |
135 | : $collector; |
76cecb10 |
136 | }; |
137 | } |
138 | |
865bb5d2 |
139 | sub collect_content { |
140 | my ($self, $options) = @_; |
141 | $self->collect({ %{$options||{}}, content => 1 }) |
142 | } |
143 | |
456a815d |
144 | sub add_before { |
145 | my ($self, $events) = @_; |
8f962884 |
146 | sub { return $self->_stream_from_array(@$events, $_[0]) }; |
456a815d |
147 | } |
148 | |
149 | sub add_after { |
150 | my ($self, $events) = @_; |
b616863d |
151 | my $coll_proto = $self->collect({ passthrough => 1 }); |
456a815d |
152 | sub { |
8f962884 |
153 | my ($evt) = @_; |
456a815d |
154 | my $emit = $self->_stream_from_array(@$events); |
b616863d |
155 | my $coll = &$coll_proto; |
995bc8be |
156 | return ref($coll) eq 'HASH' # single event, no collect |
157 | ? [ $coll, $emit ] |
158 | : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]; |
456a815d |
159 | }; |
8f962884 |
160 | } |
456a815d |
161 | |
865bb5d2 |
162 | sub prepend_content { |
456a815d |
163 | my ($self, $events) = @_; |
164 | sub { |
8f962884 |
165 | my ($evt) = @_; |
456a815d |
166 | if ($evt->{is_in_place_close}) { |
167 | $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)}; |
168 | return [ $evt, $self->_stream_from_array( |
169 | @$events, { type => 'CLOSE', name => $evt->{name} } |
170 | ) ]; |
171 | } |
172 | return $self->_stream_from_array($evt, @$events); |
173 | }; |
174 | } |
175 | |
865bb5d2 |
176 | sub append_content { |
8f962884 |
177 | my ($self, $events) = @_; |
865bb5d2 |
178 | my $coll_proto = $self->collect({ passthrough => 1, content => 1 }); |
8f962884 |
179 | sub { |
180 | my ($evt) = @_; |
181 | if ($evt->{is_in_place_close}) { |
182 | $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)}; |
183 | return [ $evt, $self->_stream_from_array( |
184 | @$events, { type => 'CLOSE', name => $evt->{name} } |
185 | ) ]; |
186 | } |
b616863d |
187 | my $coll = &$coll_proto; |
8f962884 |
188 | my $emit = $self->_stream_from_array(@$events); |
189 | return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]; |
190 | }; |
191 | } |
192 | |
456a815d |
193 | sub replace { |
3cdbc13f |
194 | my ($self, $replace_with, $options) = @_; |
b616863d |
195 | my $coll_proto = $self->collect($options); |
456a815d |
196 | sub { |
197 | my ($evt, $stream) = @_; |
3cdbc13f |
198 | my $emit = $self->_stream_from_proto($replace_with); |
b616863d |
199 | my $coll = &$coll_proto; |
a88c1c57 |
200 | # if we're replacing the contents of an in place close |
201 | # then we need to handle that here |
202 | if ($options->{content} |
203 | && ref($coll) eq 'HASH' |
ec687101 |
204 | && $coll->{is_in_place_close} |
a88c1c57 |
205 | ) { |
a88c1c57 |
206 | my $close = $stream->next; |
ec687101 |
207 | # shallow copy and nuke in place and raw (to force smart print) |
208 | $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close); |
a88c1c57 |
209 | $emit = $self->_stream_concat( |
210 | $emit, |
211 | $self->_stream_from_array($close), |
212 | ); |
213 | } |
451b3b30 |
214 | # For a straightforward replace operation we can, in fact, do the emit |
215 | # -before- the collect, and my first cut did so. However in order to |
216 | # use the captured content in generating the new content, we need |
217 | # the collect stage to happen first - and it seems highly unlikely |
218 | # that in normal operation the collect phase will take long enough |
219 | # for the difference to be noticeable |
11cc25dd |
220 | return |
221 | ($coll |
a88c1c57 |
222 | ? (ref $coll eq 'ARRAY' # [ event, stream ] |
451b3b30 |
223 | ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ] |
a88c1c57 |
224 | : (ref $coll eq 'HASH' # event or stream? |
225 | ? [ $coll, $emit ] |
226 | : $self->_stream_concat($coll, $emit)) |
11cc25dd |
227 | ) |
228 | : $emit |
229 | ); |
456a815d |
230 | }; |
231 | } |
232 | |
865bb5d2 |
233 | sub replace_content { |
234 | my ($self, $replace_with, $options) = @_; |
235 | $self->replace($replace_with, { %{$options||{}}, content => 1 }) |
236 | } |
237 | |
3cdbc13f |
238 | sub repeat { |
239 | my ($self, $repeat_for, $options) = @_; |
240 | $options->{into} = \my @into; |
f8ed299b |
241 | my @between; |
242 | my $repeat_between = delete $options->{repeat_between}; |
243 | if ($repeat_between) { |
f8ed299b |
244 | $options->{filter} = sub { |
d80786d0 |
245 | $_->select($repeat_between)->collect({ into => \@between }) |
f8ed299b |
246 | }; |
247 | } |
3cdbc13f |
248 | my $repeater = sub { |
15059df5 |
249 | my $s = ref($repeat_for) eq 'CODE' |
250 | ? $self->_stream_from_code($repeat_for) # $repeat_for is iterator |
251 | : $self->_stream_from_proto($repeat_for); |
f8ed299b |
252 | # We have to test $repeat_between not @between here because |
253 | # at the point we're constructing our return stream @between |
254 | # hasn't been populated yet - but we can test @between in the |
255 | # map routine because it has been by then and that saves us doing |
256 | # the extra stream construction if we don't need it. |
6d0f20a6 |
257 | $self->_flatten_stream_of_streams(do { |
258 | if ($repeat_between) { |
259 | $s->map(sub { |
260 | local $_ = $self->_stream_from_array(@into); |
261 | (@between && $s->peek) |
262 | ? $self->_stream_concat( |
263 | $_[0]->($_), $self->_stream_from_array(@between) |
264 | ) |
265 | : $_[0]->($_) |
266 | }) |
267 | } else { |
268 | $s->map(sub { |
269 | local $_ = $self->_stream_from_array(@into); |
270 | $_[0]->($_) |
f8ed299b |
271 | }) |
6d0f20a6 |
272 | } |
273 | }) |
3cdbc13f |
274 | }; |
275 | $self->replace($repeater, $options); |
276 | } |
277 | |
865bb5d2 |
278 | sub repeat_content { |
279 | my ($self, $repeat_for, $options) = @_; |
280 | $self->repeat($repeat_for, { %{$options||{}}, content => 1 }) |
281 | } |
282 | |
456a815d |
283 | 1; |
556c8616 |
284 | |
285 | =head1 NAME |
286 | |
287 | HTML::Zoom::FilterBuilder - Add Filters to a Stream |
288 | |
244252e7 |
289 | =head1 SYNOPSIS |
290 | |
a42917f6 |
291 | Create an L<HTML::Zoom> instance: |
292 | |
0d8f057e |
293 | use HTML::Zoom; |
294 | my $root = HTML::Zoom |
295 | ->from_html(<<MAIN); |
296 | <html> |
297 | <head> |
298 | <title>Default Title</title> |
299 | </head> |
a42917f6 |
300 | <body bad_attr='junk'> |
0d8f057e |
301 | Default Content |
302 | </body> |
303 | </html> |
304 | MAIN |
305 | |
a42917f6 |
306 | Create a new attribute on the C<body> tag: |
307 | |
308 | $root = $root |
309 | ->select('body') |
310 | ->set_attribute(class=>'main'); |
311 | |
312 | Add a extra value to an existing attribute: |
313 | |
314 | $root = $root |
315 | ->select('body') |
316 | ->add_to_attribute(class=>'one-column'); |
317 | |
318 | Set the content of the C<title> tag: |
319 | |
320 | $root = $root |
321 | ->select('title') |
322 | ->replace_content('Hello World'); |
323 | |
324 | Set content from another L<HTML::Zoom> instance: |
325 | |
0d8f057e |
326 | my $body = HTML::Zoom |
327 | ->from_html(<<BODY); |
328 | <div id="stuff"> |
2daa653a |
329 | <p>Well Now</p> |
f8ad684d |
330 | <p id="p2">Is the Time</p> |
0d8f057e |
331 | </div> |
332 | BODY |
333 | |
a42917f6 |
334 | $root = $root |
f8ad684d |
335 | ->select('body') |
a42917f6 |
336 | ->replace_content($body); |
337 | |
338 | Set an attribute on multiple matches: |
339 | |
340 | $root = $root |
f8ad684d |
341 | ->select('p') |
a42917f6 |
342 | ->set_attribute(class=>'para'); |
343 | |
344 | Remove an attribute: |
345 | |
346 | $root = $root |
347 | ->select('body') |
348 | ->remove_attribute('bad_attr'); |
0d8f057e |
349 | |
350 | will produce: |
351 | |
352 | =begin testinfo |
353 | |
a42917f6 |
354 | my $output = $root->to_html; |
0d8f057e |
355 | my $expect = <<HTML; |
356 | |
357 | =end testinfo |
358 | |
359 | <html> |
360 | <head> |
361 | <title>Hello World</title> |
362 | </head> |
434a11c8 |
363 | <body class="main one-column"><div id="stuff"> |
adb30a8a |
364 | <p class="para">Well Now</p> |
a42917f6 |
365 | <p id="p2" class="para">Is the Time</p> |
0d8f057e |
366 | </div> |
367 | </body> |
368 | </html> |
369 | |
370 | =begin testinfo |
371 | |
372 | HTML |
373 | is($output, $expect, 'Synopsis code works ok'); |
374 | |
375 | =end testinfo |
244252e7 |
376 | |
556c8616 |
377 | =head1 DESCRIPTION |
378 | |
379 | Given a L<HTML::Zoom> stream, provide methods to apply filters which |
380 | alter the content of that stream. |
381 | |
f6644c71 |
382 | =head1 METHODS |
383 | |
384 | This class defines the following public API |
385 | |
434a11c8 |
386 | =head2 set_attribute ( $attr=>value | {name=>$attr,value=>$value} ) |
f6644c71 |
387 | |
f8ad684d |
388 | Sets an attribute of a given name to a given value for all matching selections. |
389 | |
390 | $html_zoom |
391 | ->select('p') |
392 | ->set_attribute(class=>'paragraph') |
393 | ->select('div') |
434a11c8 |
394 | ->set_attribute(name=>'class', value=>'divider'); |
395 | |
f8ad684d |
396 | |
397 | Overrides existing values, if such exist. When multiple L</set_attribute> |
398 | calls are made against the same or overlapping selection sets, the final |
399 | call wins. |
f6644c71 |
400 | |
434a11c8 |
401 | =head2 add_to_attribute ( $attr=>value | {name=>$attr,value=>$value} ) |
f6644c71 |
402 | |
434a11c8 |
403 | Adds a value to an existing attribute, or creates one if the attribute does not |
404 | yet exist. |
f6644c71 |
405 | |
434a11c8 |
406 | $html_zoom |
407 | ->select('p') |
408 | ->set_attribute(class=>'paragraph') |
409 | ->then |
410 | ->add_to_attribute(name=>'class', value=>'divider'); |
f6644c71 |
411 | |
434a11c8 |
412 | Attributes with more than one value will have a dividing space. |
413 | |
414 | =head2 remove_attribute ( $attr | {name=>$attr} ) |
415 | |
416 | Removes an attribute and all its values. |
417 | |
418 | $html_zoom |
419 | ->select('p') |
420 | ->set_attribute(class=>'paragraph') |
421 | ->then |
422 | ->remove_attribute('class'); |
423 | |
424 | Removes attributes from the original stream or events already added. |
f6644c71 |
425 | |
426 | =head2 collect |
427 | |
428 | TBD |
429 | |
430 | =head2 collect_content |
431 | |
432 | TBD |
433 | |
434 | =head2 add_before |
435 | |
436 | TBD |
437 | |
438 | =head2 add_after |
439 | |
440 | TBD |
441 | |
442 | =head2 prepend_content |
443 | |
444 | TBD |
445 | |
446 | =head2 append_content |
447 | |
448 | TBD |
449 | |
450 | =head2 replace |
451 | |
452 | TBD |
453 | |
454 | =head2 replace_content |
455 | |
244252e7 |
456 | Given a L<HTML::Zoom/select> result, replace the content with a string, array |
457 | or another L<HTML::Zoom> object. |
f6644c71 |
458 | |
459 | =head2 repeat |
460 | |
461 | TBD |
462 | |
463 | =head2 repeat_content |
464 | |
465 | TBD |
466 | |
556c8616 |
467 | =head1 ALSO SEE |
468 | |
469 | L<HTML::Zoom> |
470 | |
471 | =head1 AUTHORS |
472 | |
473 | See L<HTML::Zoom> for authors. |
474 | |
475 | =head1 LICENSE |
476 | |
477 | See L<HTML::Zoom> for the license. |
478 | |
479 | =cut |
480 | |