1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my $attr = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my @kadd = grep {!exists $a->{$_}} keys %$attr;
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, %$attr },
37 @kadd ? (attr_names => [ @{$evt->{attr_names}}, @kadd ]) : ()
42 sub _parse_attribute_args {
45 warn "WARNING: Long form arg (name => 'class', value => 'x') is deprecated. This may not do what you originally intended..."
46 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
48 my $opts = ref($_[0]) eq 'HASH' ? $_[0] : {$_[0] => $_[1]};
49 for (values %{$opts}) { $self->_zconfig->parser->html_escape($_); }
54 die "renamed to add_to_attribute. killing this entirely for 1.0";
57 sub add_class { shift->add_to_attribute('class',@_) }
59 sub remove_class { shift->remove_from_attribute('class',@_) }
61 sub set_class { shift->set_attribute('class',@_) }
63 sub set_id { shift->set_attribute('id',@_) }
65 sub add_to_attribute {
67 my $attr = $self->_parse_attribute_args(@_);
69 my $a = (my $evt = $_[0])->{attrs};
70 my @kadd = grep {!exists $a->{$_}} keys %$attr;
71 +{ %$evt, raw => undef, raw_attrs => undef,
74 map {$_ => join(' ', (exists $a->{$_} ? $a->{$_} : ()), $attr->{$_}) }
77 @kadd ? (attr_names => [ @{$evt->{attr_names}}, @kadd ]) : ()
82 sub remove_from_attribute {
84 my $attr = $self->_parse_attribute_args(@_);
86 my $a = (my $evt = $_[0])->{attrs};
87 +{ %$evt, raw => undef, raw_attrs => undef,
90 #TODO needs to support multiple removes
91 map { my $tar = $_; $_ => join ' ',
92 map {$attr->{$tar} ne $_} split ' ', $a->{$_} } keys %$attr
98 sub remove_attribute {
99 my ($self, $args) = @_;
100 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
102 my $a = (my $evt = $_[0])->{attrs};
103 return $evt unless exists $a->{$name};
104 $a = { %$a }; delete $a->{$name};
105 +{ %$evt, raw => undef, raw_attrs => undef,
107 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
112 sub transform_attribute {
114 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
118 my %a = %{ $evt->{attrs} };
119 my @names = @{ $evt->{attr_names} };
121 my $existed_before = exists $a{$name};
122 my $v = $code->( $a{$name} );
123 my $deleted = $existed_before && ! defined $v;
124 my $added = ! $existed_before && defined $v;
131 @names = grep $_ ne $name, @names;
135 +{ %$evt, raw => undef, raw_attrs => undef,
138 ? (attr_names => \@names )
145 my ($self, $options) = @_;
146 my ($into, $passthrough, $content, $filter, $flush_before) =
147 @{$options}{qw(into passthrough content filter flush_before)};
149 my ($evt, $stream) = @_;
150 # We wipe the contents of @$into here so that other actions depending
151 # on this (such as a repeater) can be invoked multiple times easily.
152 # I -suspect- it's better for that state reset to be managed here; if it
153 # ever becomes painful the decision should be revisited
155 @$into = $content ? () : ($evt);
157 if ($evt->{is_in_place_close}) {
158 return $evt if $passthrough || $content;
161 my $name = $evt->{name};
163 my $_next = $content ? 'peek' : 'next';
166 $stream = do { local $_ = $stream; $filter->($stream) };
169 local $_ = $self->_stream_concat(
170 $self->_stream_from_array($evt),
175 $evt = $stream->next;
178 my $collector = $self->_stream_from_code(sub {
179 return unless $stream;
180 while (my ($evt) = $stream->$_next) {
181 $depth++ if ($evt->{type} eq 'OPEN');
182 $depth-- if ($evt->{type} eq 'CLOSE');
186 push(@$into, $evt) if $into;
187 return $evt if $passthrough;
190 push(@$into, $evt) if $into;
191 $stream->next if $content;
192 return $evt if $passthrough;
194 die "Never saw closing </${name}> before end of source";
197 if ($passthrough||$content) {
198 $evt = { %$evt, flush => 1 };
200 $evt = { type => 'EMPTY', flush => 1 };
203 return ($passthrough||$content||$flush_before)
204 ? [ $evt, $collector ]
209 sub collect_content {
210 my ($self, $options) = @_;
211 $self->collect({ %{$options||{}}, content => 1 })
215 my ($self, $events) = @_;
216 my $coll_proto = $self->collect({ passthrough => 1 });
218 my $emit = $self->_stream_from_proto($events);
219 my $coll = &$coll_proto;
221 if(ref $coll eq 'ARRAY') {
222 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
223 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
224 } elsif(ref $coll eq 'HASH') {
225 return [$emit, $coll];
227 return $self->_stream_concat($emit, $coll);
229 } else { return $emit }
234 my ($self, $events) = @_;
235 my $coll_proto = $self->collect({ passthrough => 1 });
238 my $emit = $self->_stream_from_proto($events);
239 my $coll = &$coll_proto;
240 return ref($coll) eq 'HASH' # single event, no collect
242 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
246 sub prepend_content {
247 my ($self, $events) = @_;
248 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
251 my $emit = $self->_stream_from_proto($events);
252 if ($evt->{is_in_place_close}) {
253 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
254 return [ $evt, $self->_stream_from_array(
255 $emit->next, { type => 'CLOSE', name => $evt->{name} }
258 my $coll = &$coll_proto;
259 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
264 my ($self, $events) = @_;
265 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
268 my $emit = $self->_stream_from_proto($events);
269 if ($evt->{is_in_place_close}) {
270 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
271 return [ $evt, $self->_stream_from_array(
272 $emit->next, { type => 'CLOSE', name => $evt->{name} }
275 my $coll = &$coll_proto;
276 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
281 my ($self, $replace_with, $options) = @_;
282 my $coll_proto = $self->collect($options);
284 my ($evt, $stream) = @_;
285 my $emit = $self->_stream_from_proto($replace_with);
286 my $coll = &$coll_proto;
287 # if we're replacing the contents of an in place close
288 # then we need to handle that here
289 if ($options->{content}
290 && ref($coll) eq 'HASH'
291 && $coll->{is_in_place_close}
293 my $close = $stream->next;
294 # shallow copy and nuke in place and raw (to force smart print)
295 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
296 $emit = $self->_stream_concat(
298 $self->_stream_from_array($close),
301 # For a straightforward replace operation we can, in fact, do the emit
302 # -before- the collect, and my first cut did so. However in order to
303 # use the captured content in generating the new content, we need
304 # the collect stage to happen first - and it seems highly unlikely
305 # that in normal operation the collect phase will take long enough
306 # for the difference to be noticeable
309 ? (ref $coll eq 'ARRAY' # [ event, stream ]
310 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
311 : (ref $coll eq 'HASH' # event or stream?
313 : $self->_stream_concat($coll, $emit))
320 sub replace_content {
321 my ($self, $replace_with, $options) = @_;
322 $self->replace($replace_with, { %{$options||{}}, content => 1 })
326 my ($self, $repeat_for, $options) = @_;
327 $options->{into} = \my @into;
329 my $repeat_between = delete $options->{repeat_between};
330 if ($repeat_between) {
331 $options->{filter} = sub {
332 $_->select($repeat_between)->collect({ into => \@between })
336 my $s = $self->_stream_from_proto($repeat_for);
337 # We have to test $repeat_between not @between here because
338 # at the point we're constructing our return stream @between
339 # hasn't been populated yet - but we can test @between in the
340 # map routine because it has been by then and that saves us doing
341 # the extra stream construction if we don't need it.
342 $self->_flatten_stream_of_streams(do {
343 if ($repeat_between) {
345 local $_ = $self->_stream_from_array(@into);
346 (@between && $s->peek)
347 ? $self->_stream_concat(
348 $_[0]->($_), $self->_stream_from_array(@between)
354 local $_ = $self->_stream_from_array(@into);
360 $self->replace($repeater, $options);
364 my ($self, $repeat_for, $options) = @_;
365 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
372 HTML::Zoom::FilterBuilder - Add Filters to a Stream
376 Create an L<HTML::Zoom> instance:
379 my $root = HTML::Zoom
383 <title>Default Title</title>
385 <body bad_attr='junk'>
391 Create a new attribute on the C<body> tag:
395 ->set_attribute(class=>'main');
397 Add a extra value to an existing attribute:
401 ->add_to_attribute(class=>'one-column');
403 Set the content of the C<title> tag:
407 ->replace_content('Hello World');
409 Set content from another L<HTML::Zoom> instance:
411 my $body = HTML::Zoom
415 <p id="p2">Is the Time</p>
421 ->replace_content($body);
423 Set an attribute on multiple matches:
427 ->set_attribute(class=>'para');
433 ->remove_attribute('bad_attr');
439 my $output = $root->to_html;
446 <title>Hello World</title>
448 <body class="main one-column"><div id="stuff">
449 <p class="para">Well Now</p>
450 <p id="p2" class="para">Is the Time</p>
458 is($output, $expect, 'Synopsis code works ok');
464 Given a L<HTML::Zoom> stream, provide methods to apply filters which
465 alter the content of that stream.
469 This class defines the following public API
473 Sets an attribute of a given name to a given value for all matching selections.
477 ->set_attribute(class=>'paragraph')
479 ->set_attribute({class=>'paragraph', name=>'divider'});
481 Overrides existing values, if such exist. When multiple L</set_attribute>
482 calls are made against the same or overlapping selection sets, the final
485 =head2 add_to_attribute
487 Adds a value to an existing attribute, or creates one if the attribute does not
488 yet exist. You may call this method with either an Array or HashRef of Args.
492 ->set_attribute({class => 'paragraph', name => 'test'})
494 ->add_to_attribute(class=>'divider');
496 Attributes with more than one value will have a dividing space.
498 =head2 remove_attribute
500 Removes an attribute and all its values.
504 ->set_attribute(class=>'paragraph')
506 ->remove_attribute('class');
508 =head2 remove_from_attribute
510 Removes a value from existing attribute
514 ->set_attribute(class=>'paragraph lead')
516 ->remove_from_attribute('class' => 'lead');
518 Removes attributes from the original stream or events already added.
522 Add to a class attribute
526 Remove from a class attribute
528 =head2 transform_attribute
530 Transforms (or creates or deletes) an attribute by running the passed
531 coderef on it. If the coderef returns nothing, the attribute is
536 ->transform_attribute( href => sub {
537 ( my $a = shift ) =~ s/localhost/example.com/;
544 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
545 optional common options as hash reference.
549 =item into [ARRAY REFERENCE]
551 Where to save collected events (selected elements).
553 $z1->select('#main-content')
554 ->collect({ into => \@body })
556 $z2->select('#main-content')
562 Run filter on collected elements (locally setting $_ to stream, and passing
563 stream as an argument to given code reference). Filtered stream would be
568 filter => sub { $_->select('.inner')->replace_content('bar!') },
572 It can be used to further filter selection. For example
576 filter => sub { $_->select('td') },
580 is equivalent to (not implemented yet) descendant selector combination, i.e.
584 =item passthrough [BOOLEAN]
586 Extract copy of elements; the stream is unchanged (it does not remove collected
587 elements). For example without 'passthrough'
589 HTML::Zoom->from_html('<foo><bar /></foo>')
591 ->collect({ content => 1 })
594 returns '<foo></foo>', while with C<passthrough> option
596 HTML::Zoom->from_html('<foo><bar /></foo>')
598 ->collect({ content => 1, passthough => 1 })
601 returns '<foo><bar /></foo>'.
603 =item content [BOOLEAN]
605 Collect content of the element, and not the element itself.
609 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
614 would return '<p>foo</p>', while
616 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
618 ->collect({ content => 1 })
621 would return '<h1></h1><p>foo</p>'.
623 See also L</collect_content>.
625 =item flush_before [BOOLEAN]
627 Generate C<flush> event before collecting, to ensure that the HTML generated up
628 to selected element being collected is flushed throught to the browser. Usually
629 used in L</repeat> or L</repeat_content>.
633 =head2 collect_content
635 Collects contents of L<HTML::Zoom/select> result.
637 HTML::Zoom->from_file($foo)
638 ->select('#main-content')
639 ->collect_content({ into => \@foo_body })
642 ->replace_content(\@foo_body)
645 Equivalent to running L</collect> with C<content> option set.
649 Given a L<HTML::Zoom/select> result, add given content (which might be string,
650 array or another L<HTML::Zoom> object) before it.
653 ->select('input[name="foo"]')
654 ->add_before(\ '<span class="warning">required field</span>');
658 Like L</add_before>, only after L<HTML::Zoom/select> result.
664 You can add zoom events directly
668 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
670 =head2 prepend_content
672 Similar to add_before, but adds the content to the match.
675 ->from_html(q[<p>World</p>])
677 ->prepend_content("Hello ")
680 ## <p>Hello World</p>
682 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
684 =head2 append_content
686 Similar to add_after, but adds the content to the match.
689 ->from_html(q[<p>Hello </p>])
691 ->prepend_content("World")
694 ## <p>Hello World</p>
696 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
700 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
701 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
702 (via hash reference).
704 =head2 replace_content
706 Given a L<HTML::Zoom/select> result, replace the content with a string, array
707 or another L<HTML::Zoom> object.
710 ->select('title, #greeting')
711 ->replace_content('Hello world!');
715 For a given selection, repeat over transformations, typically for the purposes
716 of populating lists. Takes either an array of anonymous subroutines or a zoom-
717 able object consisting of transformation.
719 Example of array reference style (when it doesn't matter that all iterations are
722 $zoom->select('table')->repeat([
726 $_->select('td')->replace_content($e);
731 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
732 collected elements), and with said result passed as parameter to subroutine.
734 You might want to use CodeStream when you don't have all elements upfront
736 $zoom->select('.contents')->repeat(sub {
737 HTML::Zoom::CodeStream->new({
739 while (my $line = $fh->getline) {
741 $_->select('.lno')->replace_content($fh->input_line_number)
742 ->select('.line')->replace_content($line)
750 In addition to common options as in L</collect>, it also supports:
754 =item repeat_between [SELECTOR]
756 Selects object to be repeated between items. In the case of array this object
757 is put between elements, in case of iterator it is put between results of
758 subsequent iterations, in the case of streamable it is put between events
761 See documentation for L</repeat_content>
765 =head2 repeat_content
767 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
768 this result to this iterator. Accepts the same options as L</repeat>.
770 Equivalent to using C<contents> option with L</repeat>.
777 $_->select('.name')->replace_content('Matt')
778 ->select('.age')->replace_content('26')
781 $_->select('.name')->replace_content('Mark')
782 ->select('.age')->replace_content('0x29')
785 $_->select('.name')->replace_content('Epitaph')
786 ->select('.age')->replace_content('<redacted>')
789 { repeat_between => '.between' }
799 See L<HTML::Zoom> for authors.
803 See L<HTML::Zoom> for the license.