1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
29 my ($name, $value) = $self->_parse_attribute_args(@_);
31 my $a = (my $evt = $_[0])->{attrs};
32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
42 sub _parse_attribute_args {
44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
51 die "renamed to add_to_attribute. killing this entirely for 1.0";
54 sub add_to_attribute {
56 my ($name, $value) = $self->_parse_attribute_args(@_);
58 my $a = (my $evt = $_[0])->{attrs};
59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
65 ($e # add to name list if not present
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
72 sub remove_attribute {
73 my ($self, $args) = @_;
74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
76 my $a = (my $evt = $_[0])->{attrs};
77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
86 sub transform_attribute {
88 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
92 my %a = %{ $evt->{attrs} };
93 my @names = @{ $evt->{attr_names} };
95 my $existed_before = exists $a{$name};
96 my $v = $code->( $a{$name} );
97 my $deleted = $existed_before && ! defined $v;
98 my $added = ! $existed_before && defined $v;
105 @names = grep $_ ne $name, @names;
109 +{ %$evt, raw => undef, raw_attrs => undef,
112 ? (attr_names => \@names )
119 my ($self, $options) = @_;
120 my ($into, $passthrough, $content, $filter, $flush_before) =
121 @{$options}{qw(into passthrough content filter flush_before)};
123 my ($evt, $stream) = @_;
124 # We wipe the contents of @$into here so that other actions depending
125 # on this (such as a repeater) can be invoked multiple times easily.
126 # I -suspect- it's better for that state reset to be managed here; if it
127 # ever becomes painful the decision should be revisited
129 @$into = $content ? () : ($evt);
131 if ($evt->{is_in_place_close}) {
132 return $evt if $passthrough || $content;
135 my $name = $evt->{name};
137 my $_next = $content ? 'peek' : 'next';
140 $stream = do { local $_ = $stream; $filter->($stream) };
143 local $_ = $self->_stream_concat(
144 $self->_stream_from_array($evt),
149 $evt = $stream->next;
152 my $collector = $self->_stream_from_code(sub {
153 return unless $stream;
154 while (my ($evt) = $stream->$_next) {
155 $depth++ if ($evt->{type} eq 'OPEN');
156 $depth-- if ($evt->{type} eq 'CLOSE');
160 push(@$into, $evt) if $into;
161 return $evt if $passthrough;
164 push(@$into, $evt) if $into;
165 $stream->next if $content;
166 return $evt if $passthrough;
168 die "Never saw closing </${name}> before end of source";
171 if ($passthrough||$content) {
172 $evt = { %$evt, flush => 1 };
174 $evt = { type => 'EMPTY', flush => 1 };
177 return ($passthrough||$content||$flush_before)
178 ? [ $evt, $collector ]
183 sub collect_content {
184 my ($self, $options) = @_;
185 $self->collect({ %{$options||{}}, content => 1 })
189 my ($self, $events) = @_;
190 sub { return $self->_stream_from_array(@$events, $_[0]) };
194 my ($self, $events) = @_;
195 my $coll_proto = $self->collect({ passthrough => 1 });
198 my $emit = $self->_stream_from_array(@$events);
199 my $coll = &$coll_proto;
200 return ref($coll) eq 'HASH' # single event, no collect
202 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
206 sub prepend_content {
207 my ($self, $events) = @_;
210 if ($evt->{is_in_place_close}) {
211 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
212 return [ $evt, $self->_stream_from_array(
213 @$events, { type => 'CLOSE', name => $evt->{name} }
216 return $self->_stream_from_array($evt, @$events);
221 my ($self, $events) = @_;
222 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
225 if ($evt->{is_in_place_close}) {
226 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
227 return [ $evt, $self->_stream_from_array(
228 @$events, { type => 'CLOSE', name => $evt->{name} }
231 my $coll = &$coll_proto;
232 my $emit = $self->_stream_from_array(@$events);
233 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
238 my ($self, $replace_with, $options) = @_;
239 my $coll_proto = $self->collect($options);
241 my ($evt, $stream) = @_;
242 my $emit = $self->_stream_from_proto($replace_with);
243 my $coll = &$coll_proto;
244 # if we're replacing the contents of an in place close
245 # then we need to handle that here
246 if ($options->{content}
247 && ref($coll) eq 'HASH'
248 && $coll->{is_in_place_close}
250 my $close = $stream->next;
251 # shallow copy and nuke in place and raw (to force smart print)
252 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
253 $emit = $self->_stream_concat(
255 $self->_stream_from_array($close),
258 # For a straightforward replace operation we can, in fact, do the emit
259 # -before- the collect, and my first cut did so. However in order to
260 # use the captured content in generating the new content, we need
261 # the collect stage to happen first - and it seems highly unlikely
262 # that in normal operation the collect phase will take long enough
263 # for the difference to be noticeable
266 ? (ref $coll eq 'ARRAY' # [ event, stream ]
267 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
268 : (ref $coll eq 'HASH' # event or stream?
270 : $self->_stream_concat($coll, $emit))
277 sub replace_content {
278 my ($self, $replace_with, $options) = @_;
279 $self->replace($replace_with, { %{$options||{}}, content => 1 })
283 my ($self, $repeat_for, $options) = @_;
284 $options->{into} = \my @into;
286 my $repeat_between = delete $options->{repeat_between};
287 if ($repeat_between) {
288 $options->{filter} = sub {
289 $_->select($repeat_between)->collect({ into => \@between })
293 my $s = $self->_stream_from_proto($repeat_for);
294 # We have to test $repeat_between not @between here because
295 # at the point we're constructing our return stream @between
296 # hasn't been populated yet - but we can test @between in the
297 # map routine because it has been by then and that saves us doing
298 # the extra stream construction if we don't need it.
299 $self->_flatten_stream_of_streams(do {
300 if ($repeat_between) {
302 local $_ = $self->_stream_from_array(@into);
303 (@between && $s->peek)
304 ? $self->_stream_concat(
305 $_[0]->($_), $self->_stream_from_array(@between)
311 local $_ = $self->_stream_from_array(@into);
317 $self->replace($repeater, $options);
321 my ($self, $repeat_for, $options) = @_;
322 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
329 HTML::Zoom::FilterBuilder - Add Filters to a Stream
333 Create an L<HTML::Zoom> instance:
336 my $root = HTML::Zoom
340 <title>Default Title</title>
342 <body bad_attr='junk'>
348 Create a new attribute on the C<body> tag:
352 ->set_attribute(class=>'main');
354 Add a extra value to an existing attribute:
358 ->add_to_attribute(class=>'one-column');
360 Set the content of the C<title> tag:
364 ->replace_content('Hello World');
366 Set content from another L<HTML::Zoom> instance:
368 my $body = HTML::Zoom
372 <p id="p2">Is the Time</p>
378 ->replace_content($body);
380 Set an attribute on multiple matches:
384 ->set_attribute(class=>'para');
390 ->remove_attribute('bad_attr');
396 my $output = $root->to_html;
403 <title>Hello World</title>
405 <body class="main one-column"><div id="stuff">
406 <p class="para">Well Now</p>
407 <p id="p2" class="para">Is the Time</p>
415 is($output, $expect, 'Synopsis code works ok');
421 Given a L<HTML::Zoom> stream, provide methods to apply filters which
422 alter the content of that stream.
426 This class defines the following public API
430 Sets an attribute of a given name to a given value for all matching selections.
434 ->set_attribute(class=>'paragraph')
436 ->set_attribute(name=>'class', value=>'divider');
439 Overrides existing values, if such exist. When multiple L</set_attribute>
440 calls are made against the same or overlapping selection sets, the final
443 =head2 add_to_attribute
445 Adds a value to an existing attribute, or creates one if the attribute does not
450 ->set_attribute(class=>'paragraph')
452 ->add_to_attribute(name=>'class', value=>'divider');
454 Attributes with more than one value will have a dividing space.
456 =head2 remove_attribute
458 Removes an attribute and all its values.
462 ->set_attribute(class=>'paragraph')
464 ->remove_attribute('class');
466 Removes attributes from the original stream or events already added.
468 =head2 transform_attribute
470 Transforms (or creates or deletes) an attribute by running the passed
471 coderef on it. If the coderef returns nothing, the attribute is
476 ->transform_attribute( href => sub {
477 ( my $a = shift ) =~ s/localhost/example.com/;
484 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
485 optional common options as hash reference.
489 =item into [ARRAY REFERENCE]
491 Where to save collected events (selected elements).
493 $z1->select('#main-content')
494 ->collect({ into => \@body })
496 $z2->select('#main-content')
502 Run filter on collected elements (locally setting $_ to stream, and passing
503 stream as an argument to given code reference). Filtered stream would be
508 filter => sub { $_->select('.inner')->replace_content('bar!') },
512 It can be used to further filter selection. For example
516 filter => sub { $_->select('td') },
520 is equivalent to (not implemented yet) descendant selector combination, i.e.
524 =item passthrough [BOOLEAN]
526 Extract copy of elements; the stream is unchanged (it does not remove collected
527 elements). For example without 'passthrough'
529 HTML::Zoom->from_html('<foo><bar /></foo>')
531 ->collect({ content => 1 })
534 returns '<foo></foo>', while with C<passthrough> option
536 HTML::Zoom->from_html('<foo><bar /></foo>')
538 ->collect({ content => 1, passthough => 1 })
541 returns '<foo><bar /></foo>'.
543 =item content [BOOLEAN]
545 Collect content of the element, and not the element itself.
549 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
554 would return '<p>foo</p>', while
556 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
558 ->collect({ content => 1 })
561 would return '<h1></h1><p>foo</p>'.
563 See also L</collect_content>.
565 =item flush_before [BOOLEAN]
567 Generate C<flush> event before collecting, to ensure that the HTML generated up
568 to selected element being collected is flushed throught to the browser. Usually
569 used in L</repeat> or L</repeat_content>.
573 =head2 collect_content
575 Collects contents of L<HTML::Zoom/select> result.
577 HTML::Zoom->from_file($foo)
578 ->select('#main-content')
579 ->collect_content({ into => \@foo_body })
582 ->replace_content(\@foo_body)
585 Equivalent to running L</collect> with C<content> option set.
589 Given a L<HTML::Zoom/select> result, add given content (which might be string,
590 array or another L<HTML::Zoom> object) before it.
593 ->select('input[name="foo"]')
594 ->add_before(\ '<span class="warning">required field</span>');
598 Like L</add_before>, only after L<HTML::Zoom/select> result.
604 You can add zoom events directly
608 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
610 =head2 prepend_content
614 =head2 append_content
620 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
621 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
622 (via hash reference).
624 =head2 replace_content
626 Given a L<HTML::Zoom/select> result, replace the content with a string, array
627 or another L<HTML::Zoom> object.
630 ->select('title, #greeting')
631 ->replace_content('Hello world!');
635 $zoom->select('.item')->repeat(sub {
636 if (my $row = $db_thing->next) {
637 return sub { $_->select('.item-name')->replace_content($row->name) }
641 }, { flush_before => 1 });
643 Run I<$repeat_for>, which should be iterator (code reference) returning
644 subroutines, reference to array of subroutines, or other zoom-able object
645 consisting of transformations. Those subroutines would be run with $_
646 local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
647 said result passed as parameter to subroutine.
649 You might want to use iterator when you don't have all elements upfront
651 $zoom = $zoom->select('.contents')->repeat(sub {
652 while (my $line = $fh->getline) {
654 $_->select('.lno')->replace_content($fh->input_line_number)
655 ->select('.line')->replace_content($line)
661 You might want to use array reference if it doesn't matter that all iterations
664 $zoom->select('table')->repeat([
668 $_->select('td')->replace_content($e);
673 In addition to common options as in L</collect>, it also supports
677 =item repeat_between [SELECTOR]
679 Selects object to be repeated between items. In the case of array this object
680 is put between elements, in case of iterator it is put between results of
681 subsequent iterations, in the case of streamable it is put between events
684 See documentation for L</repeat_content>
688 =head2 repeat_content
690 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
691 this result to this iterator. Accepts the same options as L</repeat>.
693 Equivalent to using C<contents> option with L</repeat>.
700 $_->select('.name')->replace_content('Matt')
701 ->select('.age')->replace_content('26')
704 $_->select('.name')->replace_content('Mark')
705 ->select('.age')->replace_content('0x29')
708 $_->select('.name')->replace_content('Epitaph')
709 ->select('.age')->replace_content('<redacted>')
712 { repeat_between => '.between' }
722 See L<HTML::Zoom> for authors.
726 See L<HTML::Zoom> for the license.