1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
49 die "WARNING: Long form arg (name => 'class', value => 'x') is deprecated"
50 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
51 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
52 return ($name, $self->_zconfig->parser->html_escape($value));
56 die "renamed to add_to_attribute. killing this entirely for 1.0";
59 sub add_class { shift->add_to_attribute('class',@_) }
61 sub remove_class { shift->remove_from_attribute('class',@_) }
63 sub set_class { shift->set_attribute('class',@_) }
65 sub set_id { shift->set_attribute('id',@_) }
67 sub add_to_attribute {
69 my ($name, $value) = $self->_parse_attribute_args(@_);
71 my $a = (my $evt = $_[0])->{attrs};
72 my $e = exists $a->{$name};
73 +{ %$evt, raw => undef, raw_attrs => undef,
76 $name => join(' ', ($e ? $a->{$name} : ()), $value)
78 ($e # add to name list if not present
80 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
85 sub remove_from_attribute {
87 my $attr = $self->_parse_attribute_args(@_);
90 my $a = (my $evt = $_[0])->{attrs};
91 my @kupd = grep {exists $a->{$_}} keys %$attr;
92 +{ %$evt, raw => undef, raw_attrs => undef,
95 #TODO needs to support multiple removes
96 map { my $tar = $_; $_ => join ' ',
97 map {$attr->{$tar} ne $_} split ' ', $a->{$_} } @kupd
103 sub remove_attribute {
104 my ($self, $args) = @_;
105 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
107 my $a = (my $evt = $_[0])->{attrs};
108 return $evt unless exists $a->{$name};
109 $a = { %$a }; delete $a->{$name};
110 +{ %$evt, raw => undef, raw_attrs => undef,
112 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
117 sub transform_attribute {
119 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
123 my %a = %{ $evt->{attrs} };
124 my @names = @{ $evt->{attr_names} };
126 my $existed_before = exists $a{$name};
127 my $v = $code->( $a{$name} );
128 my $deleted = $existed_before && ! defined $v;
129 my $added = ! $existed_before && defined $v;
136 @names = grep $_ ne $name, @names;
140 +{ %$evt, raw => undef, raw_attrs => undef,
143 ? (attr_names => \@names )
150 my ($self, $options) = @_;
151 my ($into, $passthrough, $content, $filter, $flush_before) =
152 @{$options}{qw(into passthrough content filter flush_before)};
154 my ($evt, $stream) = @_;
155 # We wipe the contents of @$into here so that other actions depending
156 # on this (such as a repeater) can be invoked multiple times easily.
157 # I -suspect- it's better for that state reset to be managed here; if it
158 # ever becomes painful the decision should be revisited
160 @$into = $content ? () : ($evt);
162 if ($evt->{is_in_place_close}) {
163 return $evt if $passthrough || $content;
166 my $name = $evt->{name};
168 my $_next = $content ? 'peek' : 'next';
171 $stream = do { local $_ = $stream; $filter->($stream) };
174 local $_ = $self->_stream_concat(
175 $self->_stream_from_array($evt),
180 $evt = $stream->next;
183 my $collector = $self->_stream_from_code(sub {
184 return unless $stream;
185 while (my ($evt) = $stream->$_next) {
186 $depth++ if ($evt->{type} eq 'OPEN');
187 $depth-- if ($evt->{type} eq 'CLOSE');
191 push(@$into, $evt) if $into;
192 return $evt if $passthrough;
195 push(@$into, $evt) if $into;
196 $stream->next if $content;
197 return $evt if $passthrough;
199 die "Never saw closing </${name}> before end of source";
202 if ($passthrough||$content) {
203 $evt = { %$evt, flush => 1 };
205 $evt = { type => 'EMPTY', flush => 1 };
208 return ($passthrough||$content||$flush_before)
209 ? [ $evt, $collector ]
214 sub collect_content {
215 my ($self, $options) = @_;
216 $self->collect({ %{$options||{}}, content => 1 })
220 my ($self, $events) = @_;
221 my $coll_proto = $self->collect({ passthrough => 1 });
223 my $emit = $self->_stream_from_proto($events);
224 my $coll = &$coll_proto;
226 if(ref $coll eq 'ARRAY') {
227 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
228 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
229 } elsif(ref $coll eq 'HASH') {
230 return [$emit, $coll];
232 return $self->_stream_concat($emit, $coll);
234 } else { return $emit }
239 my ($self, $events) = @_;
240 my $coll_proto = $self->collect({ passthrough => 1 });
243 my $emit = $self->_stream_from_proto($events);
244 my $coll = &$coll_proto;
245 return ref($coll) eq 'HASH' # single event, no collect
247 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
251 sub prepend_content {
252 my ($self, $events) = @_;
253 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
256 my $emit = $self->_stream_from_proto($events);
257 if ($evt->{is_in_place_close}) {
258 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
259 return [ $evt, $self->_stream_from_array(
260 $emit->next, { type => 'CLOSE', name => $evt->{name} }
263 my $coll = &$coll_proto;
264 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
269 my ($self, $events) = @_;
270 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
273 my $emit = $self->_stream_from_proto($events);
274 if ($evt->{is_in_place_close}) {
275 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
276 return [ $evt, $self->_stream_from_array(
277 $emit->next, { type => 'CLOSE', name => $evt->{name} }
280 my $coll = &$coll_proto;
281 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
286 my ($self, $replace_with, $options) = @_;
287 my $coll_proto = $self->collect($options);
289 my ($evt, $stream) = @_;
290 my $emit = $self->_stream_from_proto($replace_with);
291 my $coll = &$coll_proto;
292 # if we're replacing the contents of an in place close
293 # then we need to handle that here
294 if ($options->{content}
295 && ref($coll) eq 'HASH'
296 && $coll->{is_in_place_close}
298 my $close = $stream->next;
299 # shallow copy and nuke in place and raw (to force smart print)
300 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
301 $emit = $self->_stream_concat(
303 $self->_stream_from_array($close),
306 # For a straightforward replace operation we can, in fact, do the emit
307 # -before- the collect, and my first cut did so. However in order to
308 # use the captured content in generating the new content, we need
309 # the collect stage to happen first - and it seems highly unlikely
310 # that in normal operation the collect phase will take long enough
311 # for the difference to be noticeable
314 ? (ref $coll eq 'ARRAY' # [ event, stream ]
315 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
316 : (ref $coll eq 'HASH' # event or stream?
318 : $self->_stream_concat($coll, $emit))
325 sub replace_content {
326 my ($self, $replace_with, $options) = @_;
327 $self->replace($replace_with, { %{$options||{}}, content => 1 })
331 my ($self, $repeat_for, $options) = @_;
332 $options->{into} = \my @into;
334 my $repeat_between = delete $options->{repeat_between};
335 if ($repeat_between) {
336 $options->{filter} = sub {
337 $_->select($repeat_between)->collect({ into => \@between })
341 my $s = $self->_stream_from_proto($repeat_for);
342 # We have to test $repeat_between not @between here because
343 # at the point we're constructing our return stream @between
344 # hasn't been populated yet - but we can test @between in the
345 # map routine because it has been by then and that saves us doing
346 # the extra stream construction if we don't need it.
347 $self->_flatten_stream_of_streams(do {
348 if ($repeat_between) {
350 local $_ = $self->_stream_from_array(@into);
351 (@between && $s->peek)
352 ? $self->_stream_concat(
353 $_[0]->($_), $self->_stream_from_array(@between)
359 local $_ = $self->_stream_from_array(@into);
365 $self->replace($repeater, $options);
369 my ($self, $repeat_for, $options) = @_;
370 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
377 HTML::Zoom::FilterBuilder - Add Filters to a Stream
381 Create an L<HTML::Zoom> instance:
384 my $root = HTML::Zoom
388 <title>Default Title</title>
390 <body bad_attr='junk'>
396 Create a new attribute on the C<body> tag:
400 ->set_attribute(class=>'main');
402 Add a extra value to an existing attribute:
406 ->add_to_attribute(class=>'one-column');
408 Set the content of the C<title> tag:
412 ->replace_content('Hello World');
414 Set content from another L<HTML::Zoom> instance:
416 my $body = HTML::Zoom
420 <p id="p2">Is the Time</p>
426 ->replace_content($body);
428 Set an attribute on multiple matches:
432 ->set_attribute(class=>'para');
438 ->remove_attribute('bad_attr');
444 my $output = $root->to_html;
451 <title>Hello World</title>
453 <body class="main one-column"><div id="stuff">
454 <p class="para">Well Now</p>
455 <p id="p2" class="para">Is the Time</p>
463 is($output, $expect, 'Synopsis code works ok');
469 Given a L<HTML::Zoom> stream, provide methods to apply filters which
470 alter the content of that stream.
474 This class defines the following public API
478 Sets an attribute of a given name to a given value for all matching selections.
482 ->set_attribute(class=>'paragraph')
484 ->set_attribute({class=>'paragraph', name=>'divider'});
486 Overrides existing values, if such exist. When multiple L</set_attribute>
487 calls are made against the same or overlapping selection sets, the final
490 =head2 add_to_attribute
492 Adds a value to an existing attribute, or creates one if the attribute does not
493 yet exist. You may call this method with either an Array or HashRef of Args.
497 ->set_attribute({class => 'paragraph', name => 'test'})
499 ->add_to_attribute(class=>'divider');
501 Attributes with more than one value will have a dividing space.
503 =head2 remove_attribute
505 Removes an attribute and all its values.
509 ->set_attribute(class=>'paragraph')
511 ->remove_attribute('class');
513 =head2 remove_from_attribute
515 Removes a value from existing attribute
519 ->set_attribute(class=>'paragraph lead')
521 ->remove_from_attribute('class' => 'lead');
523 Removes attributes from the original stream or events already added.
527 Add to a class attribute
531 Remove from a class attribute
533 =head2 transform_attribute
535 Transforms (or creates or deletes) an attribute by running the passed
536 coderef on it. If the coderef returns nothing, the attribute is
541 ->transform_attribute( href => sub {
542 ( my $a = shift ) =~ s/localhost/example.com/;
549 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
550 optional common options as hash reference.
554 =item into [ARRAY REFERENCE]
556 Where to save collected events (selected elements).
558 $z1->select('#main-content')
559 ->collect({ into => \@body })
561 $z2->select('#main-content')
567 Run filter on collected elements (locally setting $_ to stream, and passing
568 stream as an argument to given code reference). Filtered stream would be
573 filter => sub { $_->select('.inner')->replace_content('bar!') },
577 It can be used to further filter selection. For example
581 filter => sub { $_->select('td') },
585 is equivalent to (not implemented yet) descendant selector combination, i.e.
589 =item passthrough [BOOLEAN]
591 Extract copy of elements; the stream is unchanged (it does not remove collected
592 elements). For example without 'passthrough'
594 HTML::Zoom->from_html('<foo><bar /></foo>')
596 ->collect({ content => 1 })
599 returns '<foo></foo>', while with C<passthrough> option
601 HTML::Zoom->from_html('<foo><bar /></foo>')
603 ->collect({ content => 1, passthough => 1 })
606 returns '<foo><bar /></foo>'.
608 =item content [BOOLEAN]
610 Collect content of the element, and not the element itself.
614 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
619 would return '<p>foo</p>', while
621 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
623 ->collect({ content => 1 })
626 would return '<h1></h1><p>foo</p>'.
628 See also L</collect_content>.
630 =item flush_before [BOOLEAN]
632 Generate C<flush> event before collecting, to ensure that the HTML generated up
633 to selected element being collected is flushed throught to the browser. Usually
634 used in L</repeat> or L</repeat_content>.
638 =head2 collect_content
640 Collects contents of L<HTML::Zoom/select> result.
642 HTML::Zoom->from_file($foo)
643 ->select('#main-content')
644 ->collect_content({ into => \@foo_body })
647 ->replace_content(\@foo_body)
650 Equivalent to running L</collect> with C<content> option set.
654 Given a L<HTML::Zoom/select> result, add given content (which might be string,
655 array or another L<HTML::Zoom> object) before it.
658 ->select('input[name="foo"]')
659 ->add_before(\ '<span class="warning">required field</span>');
663 Like L</add_before>, only after L<HTML::Zoom/select> result.
669 You can add zoom events directly
673 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
675 =head2 prepend_content
677 Similar to add_before, but adds the content to the match.
680 ->from_html(q[<p>World</p>])
682 ->prepend_content("Hello ")
685 ## <p>Hello World</p>
687 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
689 =head2 append_content
691 Similar to add_after, but adds the content to the match.
694 ->from_html(q[<p>Hello </p>])
696 ->prepend_content("World")
699 ## <p>Hello World</p>
701 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
705 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
706 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
707 (via hash reference).
709 =head2 replace_content
711 Given a L<HTML::Zoom/select> result, replace the content with a string, array
712 or another L<HTML::Zoom> object.
715 ->select('title, #greeting')
716 ->replace_content('Hello world!');
720 For a given selection, repeat over transformations, typically for the purposes
721 of populating lists. Takes either an array of anonymous subroutines or a zoom-
722 able object consisting of transformation.
724 Example of array reference style (when it doesn't matter that all iterations are
727 $zoom->select('table')->repeat([
731 $_->select('td')->replace_content($e);
736 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
737 collected elements), and with said result passed as parameter to subroutine.
739 You might want to use CodeStream when you don't have all elements upfront
741 $zoom->select('.contents')->repeat(sub {
742 HTML::Zoom::CodeStream->new({
744 while (my $line = $fh->getline) {
746 $_->select('.lno')->replace_content($fh->input_line_number)
747 ->select('.line')->replace_content($line)
755 In addition to common options as in L</collect>, it also supports:
759 =item repeat_between [SELECTOR]
761 Selects object to be repeated between items. In the case of array this object
762 is put between elements, in case of iterator it is put between results of
763 subsequent iterations, in the case of streamable it is put between events
766 See documentation for L</repeat_content>
770 =head2 repeat_content
772 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
773 this result to this iterator. Accepts the same options as L</repeat>.
775 Equivalent to using C<contents> option with L</repeat>.
782 $_->select('.name')->replace_content('Matt')
783 ->select('.age')->replace_content('26')
786 $_->select('.name')->replace_content('Mark')
787 ->select('.age')->replace_content('0x29')
790 $_->select('.name')->replace_content('Epitaph')
791 ->select('.age')->replace_content('<redacted>')
794 { repeat_between => '.between' }
804 See L<HTML::Zoom> for authors.
808 See L<HTML::Zoom> for the license.