1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
29 my ($name, $value) = $self->_parse_attribute_args(@_);
31 my $a = (my $evt = $_[0])->{attrs};
32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
42 sub _parse_attribute_args {
44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
51 die "renamed to add_to_attribute. killing this entirely for 1.0";
54 sub add_to_attribute {
56 my ($name, $value) = $self->_parse_attribute_args(@_);
58 my $a = (my $evt = $_[0])->{attrs};
59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
65 ($e # add to name list if not present
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
72 sub remove_attribute {
73 my ($self, $args) = @_;
74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
76 my $a = (my $evt = $_[0])->{attrs};
77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
87 my ($self, $options) = @_;
88 my ($into, $passthrough, $content, $filter, $flush_before) =
89 @{$options}{qw(into passthrough content filter flush_before)};
91 my ($evt, $stream) = @_;
92 # We wipe the contents of @$into here so that other actions depending
93 # on this (such as a repeater) can be invoked multiple times easily.
94 # I -suspect- it's better for that state reset to be managed here; if it
95 # ever becomes painful the decision should be revisited
97 @$into = $content ? () : ($evt);
99 if ($evt->{is_in_place_close}) {
100 return $evt if $passthrough || $content;
103 my $name = $evt->{name};
105 my $_next = $content ? 'peek' : 'next';
106 $stream = do { local $_ = $stream; $filter->($stream) } if $filter;
107 my $collector = $self->_stream_from_code(sub {
108 return unless $stream;
109 while (my ($evt) = $stream->$_next) {
110 $depth++ if ($evt->{type} eq 'OPEN');
111 $depth-- if ($evt->{type} eq 'CLOSE');
115 push(@$into, $evt) if $into;
116 return $evt if $passthrough;
119 push(@$into, $evt) if $into;
120 $stream->next if $content;
121 return $evt if $passthrough;
123 die "Never saw closing </${name}> before end of source";
126 if ($passthrough||$content) {
127 $evt = { %$evt, flush => 1 };
129 $evt = { type => 'EMPTY', flush => 1 };
132 return ($passthrough||$content||$flush_before)
133 ? [ $evt, $collector ]
138 sub collect_content {
139 my ($self, $options) = @_;
140 $self->collect({ %{$options||{}}, content => 1 })
144 my ($self, $events) = @_;
145 sub { return $self->_stream_from_array(@$events, $_[0]) };
149 my ($self, $events) = @_;
150 my $coll_proto = $self->collect({ passthrough => 1 });
153 my $emit = $self->_stream_from_array(@$events);
154 my $coll = &$coll_proto;
155 return ref($coll) eq 'HASH' # single event, no collect
157 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
161 sub prepend_content {
162 my ($self, $events) = @_;
165 if ($evt->{is_in_place_close}) {
166 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
167 return [ $evt, $self->_stream_from_array(
168 @$events, { type => 'CLOSE', name => $evt->{name} }
171 return $self->_stream_from_array($evt, @$events);
176 my ($self, $events) = @_;
177 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
180 if ($evt->{is_in_place_close}) {
181 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
182 return [ $evt, $self->_stream_from_array(
183 @$events, { type => 'CLOSE', name => $evt->{name} }
186 my $coll = &$coll_proto;
187 my $emit = $self->_stream_from_array(@$events);
188 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
193 my ($self, $replace_with, $options) = @_;
194 my $coll_proto = $self->collect($options);
196 my ($evt, $stream) = @_;
197 my $emit = $self->_stream_from_proto($replace_with);
198 my $coll = &$coll_proto;
199 # if we're replacing the contents of an in place close
200 # then we need to handle that here
201 if ($options->{content}
202 && ref($coll) eq 'HASH'
203 && $coll->{is_in_place_close}
205 my $close = $stream->next;
206 # shallow copy and nuke in place and raw (to force smart print)
207 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
208 $emit = $self->_stream_concat(
210 $self->_stream_from_array($close),
213 # For a straightforward replace operation we can, in fact, do the emit
214 # -before- the collect, and my first cut did so. However in order to
215 # use the captured content in generating the new content, we need
216 # the collect stage to happen first - and it seems highly unlikely
217 # that in normal operation the collect phase will take long enough
218 # for the difference to be noticeable
221 ? (ref $coll eq 'ARRAY' # [ event, stream ]
222 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
223 : (ref $coll eq 'HASH' # event or stream?
225 : $self->_stream_concat($coll, $emit))
232 sub replace_content {
233 my ($self, $replace_with, $options) = @_;
234 $self->replace($replace_with, { %{$options||{}}, content => 1 })
238 my ($self, $repeat_for, $options) = @_;
239 $options->{into} = \my @into;
241 my $repeat_between = delete $options->{repeat_between};
242 if ($repeat_between) {
243 $options->{filter} = sub {
244 $_->select($repeat_between)->collect({ into => \@between })
248 my $s = $self->_stream_from_proto($repeat_for);
249 # We have to test $repeat_between not @between here because
250 # at the point we're constructing our return stream @between
251 # hasn't been populated yet - but we can test @between in the
252 # map routine because it has been by then and that saves us doing
253 # the extra stream construction if we don't need it.
254 $self->_flatten_stream_of_streams(do {
255 if ($repeat_between) {
257 local $_ = $self->_stream_from_array(@into);
258 (@between && $s->peek)
259 ? $self->_stream_concat(
260 $_[0]->($_), $self->_stream_from_array(@between)
266 local $_ = $self->_stream_from_array(@into);
272 $self->replace($repeater, $options);
276 my ($self, $repeat_for, $options) = @_;
277 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
284 HTML::Zoom::FilterBuilder - Add Filters to a Stream
288 Create an L<HTML::Zoom> instance:
291 my $root = HTML::Zoom
295 <title>Default Title</title>
297 <body bad_attr='junk'>
303 Create a new attribute on the C<body> tag:
307 ->set_attribute(class=>'main');
309 Add a extra value to an existing attribute:
313 ->add_to_attribute(class=>'one-column');
315 Set the content of the C<title> tag:
319 ->replace_content('Hello World');
321 Set content from another L<HTML::Zoom> instance:
323 my $body = HTML::Zoom
327 <p id="p2">Is the Time</p>
333 ->replace_content($body);
335 Set an attribute on multiple matches:
339 ->set_attribute(class=>'para');
345 ->remove_attribute('bad_attr');
351 my $output = $root->to_html;
358 <title>Hello World</title>
360 <body class="main one-column"><div id="stuff">
361 <p class="para">Well Now</p>
362 <p id="p2" class="para">Is the Time</p>
370 is($output, $expect, 'Synopsis code works ok');
376 Given a L<HTML::Zoom> stream, provide methods to apply filters which
377 alter the content of that stream.
381 This class defines the following public API
385 Sets an attribute of a given name to a given value for all matching selections.
389 ->set_attribute(class=>'paragraph')
391 ->set_attribute(name=>'class', value=>'divider');
394 Overrides existing values, if such exist. When multiple L</set_attribute>
395 calls are made against the same or overlapping selection sets, the final
398 =head2 add_to_attribute
400 Adds a value to an existing attribute, or creates one if the attribute does not
405 ->set_attribute(class=>'paragraph')
407 ->add_to_attribute(name=>'class', value=>'divider');
409 Attributes with more than one value will have a dividing space.
411 =head2 remove_attribute
413 Removes an attribute and all its values.
417 ->set_attribute(class=>'paragraph')
419 ->remove_attribute('class');
421 Removes attributes from the original stream or events already added.
425 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
426 optional common options as hash reference.
430 =item into [ARRAY REFERENCE]
432 Where to save collected events (selected elements).
434 $z1->select('#main-content')
435 ->collect({ into => \@body })
437 $z2->select('#main-content')
443 Run filter on collected elements (locally setting $_ to stream, and passing
444 stream as an argument to given code reference). Filtered stream would be
449 filter => sub { $_->select('.inner')->replace_content('bar!') },
453 It can be used to further filter selection. For example
457 filter => sub { $_->select('td') },
461 is equivalent to (not implemented yet) descendant selector combination, i.e.
465 =item passthrough [BOOLEAN]
467 Extract copy of elements; the stream is unchanged (it does not remove collected
468 elements). For example without 'passthrough'
470 HTML::Zoom->from_html('<foo><bar /></foo>')
472 ->collect({ content => 1 })
475 returns '<foo></foo>', while with C<passthrough> option
477 HTML::Zoom->from_html('<foo><bar /></foo>')
479 ->collect({ content => 1, passthough => 1 })
482 returns '<foo><bar /></foo>'.
484 =item content [BOOLEAN]
486 Collect content of the element, and not the element itself.
490 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
495 would return '<p>foo</p>', while
497 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
499 ->collect({ content => 1 })
502 would return '<h1></h1><p>foo</p>'.
504 See also L</collect_content>.
506 =item flush_before [BOOLEAN]
508 Generate C<flush> event before collecting, to ensure that the HTML generated up
509 to selected element being collected is flushed throught to the browser. Usually
510 used in L</repeat> or L</repeat_content>.
514 =head2 collect_content
516 Collects contents of L<HTML::Zoom/select> result.
518 HTML::Zoom->from_file($foo)
519 ->select('#main-content')
520 ->collect_content({ into => \@foo_body })
523 ->replace_content(\@foo_body)
526 Equivalent to running L</collect> with C<content> option set.
530 Given a L<HTML::Zoom/select> result, add given content (which might be string,
531 array or another L<HTML::Zoom> object) before it.
534 ->select('input[name="foo"]')
535 ->add_before(\ '<span class="warning">required field</span>');
539 Like L</add_before>, only after L<HTML::Zoom/select> result.
545 You can add zoom events directly
549 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
551 =head2 prepend_content
555 =head2 append_content
561 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
562 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
563 (via hash reference).
565 =head2 replace_content
567 Given a L<HTML::Zoom/select> result, replace the content with a string, array
568 or another L<HTML::Zoom> object.
571 ->select('title, #greeting')
572 ->replace_content('Hello world!');
576 $zoom->select('.item')->repeat(sub {
577 if (my $row = $db_thing->next) {
578 return sub { $_->select('.item-name')->replace_content($row->name) }
582 }, { flush_before => 1 });
584 Run I<$repeat_for>, which should be iterator (code reference) returning
585 subroutines, reference to array of subroutines, or other zoom-able object
586 consisting of transformations. Those subroutines would be run with $_
587 local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
588 said result passed as parameter to subroutine.
590 You might want to use iterator when you don't have all elements upfront
592 $zoom = $zoom->select('.contents')->repeat(sub {
593 while (my $line = $fh->getline) {
595 $_->select('.lno')->replace_content($fh->input_line_number)
596 ->select('.line')->replace_content($line)
602 You might want to use array reference if it doesn't matter that all iterations
605 $zoom->select('table')->repeat([
609 $_->select('td')->replace_content($e);
614 In addition to common options as in L</collect>, it also supports
618 =item repeat_between [SELECTOR]
620 Selects object to be repeated between items. In the case of array this object
621 is put between elements, in case of iterator it is put between results of
622 subsequent iterations, in the case of streamable it is put between events
625 See documentation for L</repeat_content>
629 =head2 repeat_content
631 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
632 this result to this iterator. Accepts the same options as L</repeat>.
634 Equivalent to using C<contents> option with L</repeat>.
641 $_->select('.name')->replace_content('Matt')
642 ->select('.age')->replace_content('26')
645 $_->select('.name')->replace_content('Mark')
646 ->select('.age')->replace_content('0x29')
649 $_->select('.name')->replace_content('Epitaph')
650 ->select('.age')->replace_content('<redacted>')
653 { repeat_between => '.between' }
663 See L<HTML::Zoom> for authors.
667 See L<HTML::Zoom> for the license.