1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
49 die "WARNING: Long form arg (name => 'class', value => 'x') is deprecated"
50 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
51 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
52 return ($name, $self->_zconfig->parser->html_escape($value));
56 die "renamed to add_to_attribute. killing this entirely for 1.0";
59 sub add_class { shift->add_to_attribute('class',@_) }
61 sub remove_class { shift->remove_from_attribute('class',@_) }
63 sub set_class { shift->set_attribute('class',@_) }
65 sub set_id { shift->set_attribute('id',@_) }
67 sub add_to_attribute {
69 my ($name, $value) = $self->_parse_attribute_args(@_);
71 my $a = (my $evt = $_[0])->{attrs};
72 my $e = exists $a->{$name};
73 +{ %$evt, raw => undef, raw_attrs => undef,
76 $name => join(' ', ($e ? $a->{$name} : ()), $value)
78 ($e # add to name list if not present
80 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
85 sub remove_from_attribute {
87 my $attr = $self->_parse_attribute_args(@_);
89 my $a = (my $evt = $_[0])->{attrs};
90 +{ %$evt, raw => undef, raw_attrs => undef,
93 #TODO needs to support multiple removes
94 map { my $tar = $_; $_ => join ' ',
95 map {$attr->{$tar} ne $_} split ' ', $a->{$_} } keys %$attr
101 sub remove_attribute {
102 my ($self, $args) = @_;
103 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
105 my $a = (my $evt = $_[0])->{attrs};
106 return $evt unless exists $a->{$name};
107 $a = { %$a }; delete $a->{$name};
108 +{ %$evt, raw => undef, raw_attrs => undef,
110 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
115 sub transform_attribute {
117 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
121 my %a = %{ $evt->{attrs} };
122 my @names = @{ $evt->{attr_names} };
124 my $existed_before = exists $a{$name};
125 my $v = $code->( $a{$name} );
126 my $deleted = $existed_before && ! defined $v;
127 my $added = ! $existed_before && defined $v;
134 @names = grep $_ ne $name, @names;
138 +{ %$evt, raw => undef, raw_attrs => undef,
141 ? (attr_names => \@names )
148 my ($self, $options) = @_;
149 my ($into, $passthrough, $content, $filter, $flush_before) =
150 @{$options}{qw(into passthrough content filter flush_before)};
152 my ($evt, $stream) = @_;
153 # We wipe the contents of @$into here so that other actions depending
154 # on this (such as a repeater) can be invoked multiple times easily.
155 # I -suspect- it's better for that state reset to be managed here; if it
156 # ever becomes painful the decision should be revisited
158 @$into = $content ? () : ($evt);
160 if ($evt->{is_in_place_close}) {
161 return $evt if $passthrough || $content;
164 my $name = $evt->{name};
166 my $_next = $content ? 'peek' : 'next';
169 $stream = do { local $_ = $stream; $filter->($stream) };
172 local $_ = $self->_stream_concat(
173 $self->_stream_from_array($evt),
178 $evt = $stream->next;
181 my $collector = $self->_stream_from_code(sub {
182 return unless $stream;
183 while (my ($evt) = $stream->$_next) {
184 $depth++ if ($evt->{type} eq 'OPEN');
185 $depth-- if ($evt->{type} eq 'CLOSE');
189 push(@$into, $evt) if $into;
190 return $evt if $passthrough;
193 push(@$into, $evt) if $into;
194 $stream->next if $content;
195 return $evt if $passthrough;
197 die "Never saw closing </${name}> before end of source";
200 if ($passthrough||$content) {
201 $evt = { %$evt, flush => 1 };
203 $evt = { type => 'EMPTY', flush => 1 };
206 return ($passthrough||$content||$flush_before)
207 ? [ $evt, $collector ]
212 sub collect_content {
213 my ($self, $options) = @_;
214 $self->collect({ %{$options||{}}, content => 1 })
218 my ($self, $events) = @_;
219 my $coll_proto = $self->collect({ passthrough => 1 });
221 my $emit = $self->_stream_from_proto($events);
222 my $coll = &$coll_proto;
224 if(ref $coll eq 'ARRAY') {
225 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
226 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
227 } elsif(ref $coll eq 'HASH') {
228 return [$emit, $coll];
230 return $self->_stream_concat($emit, $coll);
232 } else { return $emit }
237 my ($self, $events) = @_;
238 my $coll_proto = $self->collect({ passthrough => 1 });
241 my $emit = $self->_stream_from_proto($events);
242 my $coll = &$coll_proto;
243 return ref($coll) eq 'HASH' # single event, no collect
245 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
249 sub prepend_content {
250 my ($self, $events) = @_;
251 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
254 my $emit = $self->_stream_from_proto($events);
255 if ($evt->{is_in_place_close}) {
256 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
257 return [ $evt, $self->_stream_from_array(
258 $emit->next, { type => 'CLOSE', name => $evt->{name} }
261 my $coll = &$coll_proto;
262 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
267 my ($self, $events) = @_;
268 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
271 my $emit = $self->_stream_from_proto($events);
272 if ($evt->{is_in_place_close}) {
273 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
274 return [ $evt, $self->_stream_from_array(
275 $emit->next, { type => 'CLOSE', name => $evt->{name} }
278 my $coll = &$coll_proto;
279 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
284 my ($self, $replace_with, $options) = @_;
285 my $coll_proto = $self->collect($options);
287 my ($evt, $stream) = @_;
288 my $emit = $self->_stream_from_proto($replace_with);
289 my $coll = &$coll_proto;
290 # if we're replacing the contents of an in place close
291 # then we need to handle that here
292 if ($options->{content}
293 && ref($coll) eq 'HASH'
294 && $coll->{is_in_place_close}
296 my $close = $stream->next;
297 # shallow copy and nuke in place and raw (to force smart print)
298 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
299 $emit = $self->_stream_concat(
301 $self->_stream_from_array($close),
304 # For a straightforward replace operation we can, in fact, do the emit
305 # -before- the collect, and my first cut did so. However in order to
306 # use the captured content in generating the new content, we need
307 # the collect stage to happen first - and it seems highly unlikely
308 # that in normal operation the collect phase will take long enough
309 # for the difference to be noticeable
312 ? (ref $coll eq 'ARRAY' # [ event, stream ]
313 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
314 : (ref $coll eq 'HASH' # event or stream?
316 : $self->_stream_concat($coll, $emit))
323 sub replace_content {
324 my ($self, $replace_with, $options) = @_;
325 $self->replace($replace_with, { %{$options||{}}, content => 1 })
329 my ($self, $repeat_for, $options) = @_;
330 $options->{into} = \my @into;
332 my $repeat_between = delete $options->{repeat_between};
333 if ($repeat_between) {
334 $options->{filter} = sub {
335 $_->select($repeat_between)->collect({ into => \@between })
339 my $s = $self->_stream_from_proto($repeat_for);
340 # We have to test $repeat_between not @between here because
341 # at the point we're constructing our return stream @between
342 # hasn't been populated yet - but we can test @between in the
343 # map routine because it has been by then and that saves us doing
344 # the extra stream construction if we don't need it.
345 $self->_flatten_stream_of_streams(do {
346 if ($repeat_between) {
348 local $_ = $self->_stream_from_array(@into);
349 (@between && $s->peek)
350 ? $self->_stream_concat(
351 $_[0]->($_), $self->_stream_from_array(@between)
357 local $_ = $self->_stream_from_array(@into);
363 $self->replace($repeater, $options);
367 my ($self, $repeat_for, $options) = @_;
368 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
375 HTML::Zoom::FilterBuilder - Add Filters to a Stream
379 Create an L<HTML::Zoom> instance:
382 my $root = HTML::Zoom
386 <title>Default Title</title>
388 <body bad_attr='junk'>
394 Create a new attribute on the C<body> tag:
398 ->set_attribute(class=>'main');
400 Add a extra value to an existing attribute:
404 ->add_to_attribute(class=>'one-column');
406 Set the content of the C<title> tag:
410 ->replace_content('Hello World');
412 Set content from another L<HTML::Zoom> instance:
414 my $body = HTML::Zoom
418 <p id="p2">Is the Time</p>
424 ->replace_content($body);
426 Set an attribute on multiple matches:
430 ->set_attribute(class=>'para');
436 ->remove_attribute('bad_attr');
442 my $output = $root->to_html;
449 <title>Hello World</title>
451 <body class="main one-column"><div id="stuff">
452 <p class="para">Well Now</p>
453 <p id="p2" class="para">Is the Time</p>
461 is($output, $expect, 'Synopsis code works ok');
467 Given a L<HTML::Zoom> stream, provide methods to apply filters which
468 alter the content of that stream.
472 This class defines the following public API
476 Sets an attribute of a given name to a given value for all matching selections.
480 ->set_attribute(class=>'paragraph')
482 ->set_attribute({class=>'paragraph', name=>'divider'});
484 Overrides existing values, if such exist. When multiple L</set_attribute>
485 calls are made against the same or overlapping selection sets, the final
488 =head2 add_to_attribute
490 Adds a value to an existing attribute, or creates one if the attribute does not
491 yet exist. You may call this method with either an Array or HashRef of Args.
495 ->set_attribute({class => 'paragraph', name => 'test'})
497 ->add_to_attribute(class=>'divider');
499 Attributes with more than one value will have a dividing space.
501 =head2 remove_attribute
503 Removes an attribute and all its values.
507 ->set_attribute(class=>'paragraph')
509 ->remove_attribute('class');
511 =head2 remove_from_attribute
513 Removes a value from existing attribute
517 ->set_attribute(class=>'paragraph lead')
519 ->remove_from_attribute('class' => 'lead');
521 Removes attributes from the original stream or events already added.
525 Add to a class attribute
529 Remove from a class attribute
531 =head2 transform_attribute
533 Transforms (or creates or deletes) an attribute by running the passed
534 coderef on it. If the coderef returns nothing, the attribute is
539 ->transform_attribute( href => sub {
540 ( my $a = shift ) =~ s/localhost/example.com/;
547 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
548 optional common options as hash reference.
552 =item into [ARRAY REFERENCE]
554 Where to save collected events (selected elements).
556 $z1->select('#main-content')
557 ->collect({ into => \@body })
559 $z2->select('#main-content')
565 Run filter on collected elements (locally setting $_ to stream, and passing
566 stream as an argument to given code reference). Filtered stream would be
571 filter => sub { $_->select('.inner')->replace_content('bar!') },
575 It can be used to further filter selection. For example
579 filter => sub { $_->select('td') },
583 is equivalent to (not implemented yet) descendant selector combination, i.e.
587 =item passthrough [BOOLEAN]
589 Extract copy of elements; the stream is unchanged (it does not remove collected
590 elements). For example without 'passthrough'
592 HTML::Zoom->from_html('<foo><bar /></foo>')
594 ->collect({ content => 1 })
597 returns '<foo></foo>', while with C<passthrough> option
599 HTML::Zoom->from_html('<foo><bar /></foo>')
601 ->collect({ content => 1, passthough => 1 })
604 returns '<foo><bar /></foo>'.
606 =item content [BOOLEAN]
608 Collect content of the element, and not the element itself.
612 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
617 would return '<p>foo</p>', while
619 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
621 ->collect({ content => 1 })
624 would return '<h1></h1><p>foo</p>'.
626 See also L</collect_content>.
628 =item flush_before [BOOLEAN]
630 Generate C<flush> event before collecting, to ensure that the HTML generated up
631 to selected element being collected is flushed throught to the browser. Usually
632 used in L</repeat> or L</repeat_content>.
636 =head2 collect_content
638 Collects contents of L<HTML::Zoom/select> result.
640 HTML::Zoom->from_file($foo)
641 ->select('#main-content')
642 ->collect_content({ into => \@foo_body })
645 ->replace_content(\@foo_body)
648 Equivalent to running L</collect> with C<content> option set.
652 Given a L<HTML::Zoom/select> result, add given content (which might be string,
653 array or another L<HTML::Zoom> object) before it.
656 ->select('input[name="foo"]')
657 ->add_before(\ '<span class="warning">required field</span>');
661 Like L</add_before>, only after L<HTML::Zoom/select> result.
667 You can add zoom events directly
671 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
673 =head2 prepend_content
675 Similar to add_before, but adds the content to the match.
678 ->from_html(q[<p>World</p>])
680 ->prepend_content("Hello ")
683 ## <p>Hello World</p>
685 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
687 =head2 append_content
689 Similar to add_after, but adds the content to the match.
692 ->from_html(q[<p>Hello </p>])
694 ->prepend_content("World")
697 ## <p>Hello World</p>
699 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
703 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
704 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
705 (via hash reference).
707 =head2 replace_content
709 Given a L<HTML::Zoom/select> result, replace the content with a string, array
710 or another L<HTML::Zoom> object.
713 ->select('title, #greeting')
714 ->replace_content('Hello world!');
718 For a given selection, repeat over transformations, typically for the purposes
719 of populating lists. Takes either an array of anonymous subroutines or a zoom-
720 able object consisting of transformation.
722 Example of array reference style (when it doesn't matter that all iterations are
725 $zoom->select('table')->repeat([
729 $_->select('td')->replace_content($e);
734 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
735 collected elements), and with said result passed as parameter to subroutine.
737 You might want to use CodeStream when you don't have all elements upfront
739 $zoom->select('.contents')->repeat(sub {
740 HTML::Zoom::CodeStream->new({
742 while (my $line = $fh->getline) {
744 $_->select('.lno')->replace_content($fh->input_line_number)
745 ->select('.line')->replace_content($line)
753 In addition to common options as in L</collect>, it also supports:
757 =item repeat_between [SELECTOR]
759 Selects object to be repeated between items. In the case of array this object
760 is put between elements, in case of iterator it is put between results of
761 subsequent iterations, in the case of streamable it is put between events
764 See documentation for L</repeat_content>
768 =head2 repeat_content
770 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
771 this result to this iterator. Accepts the same options as L</repeat>.
773 Equivalent to using C<contents> option with L</repeat>.
780 $_->select('.name')->replace_content('Matt')
781 ->select('.age')->replace_content('26')
784 $_->select('.name')->replace_content('Mark')
785 ->select('.age')->replace_content('0x29')
788 $_->select('.name')->replace_content('Epitaph')
789 ->select('.age')->replace_content('<redacted>')
792 { repeat_between => '.between' }
802 See L<HTML::Zoom> for authors.
806 See L<HTML::Zoom> for the license.