1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
47 die "Long form arg (name => 'class', value => 'x') is no longer supported"
48 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
49 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
50 return ($name, $self->_zconfig->parser->html_escape($value));
54 die "renamed to add_to_attribute. killing this entirely for 1.0";
57 sub add_class { shift->add_to_attribute('class',@_) }
59 sub remove_class { shift->remove_attribute('class',@_) }
61 sub set_class { shift->set_attribute('class',@_) }
63 sub set_id { shift->set_attribute('id',@_) }
65 sub add_to_attribute {
67 my ($name, $value) = $self->_parse_attribute_args(@_);
69 my $a = (my $evt = $_[0])->{attrs};
70 my $e = exists $a->{$name};
71 +{ %$evt, raw => undef, raw_attrs => undef,
74 $name => join(' ', ($e ? $a->{$name} : ()), $value)
76 ($e # add to name list if not present
78 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
83 sub remove_attribute {
84 my ($self, $args) = @_;
85 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
87 my $a = (my $evt = $_[0])->{attrs};
88 return $evt unless exists $a->{$name};
89 $a = { %$a }; delete $a->{$name};
90 +{ %$evt, raw => undef, raw_attrs => undef,
92 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
97 sub transform_attribute {
99 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
103 my %a = %{ $evt->{attrs} };
104 my @names = @{ $evt->{attr_names} };
106 my $existed_before = exists $a{$name};
107 my $v = $code->( $a{$name} );
108 my $deleted = $existed_before && ! defined $v;
109 my $added = ! $existed_before && defined $v;
116 @names = grep $_ ne $name, @names;
120 +{ %$evt, raw => undef, raw_attrs => undef,
123 ? (attr_names => \@names )
130 my ($self, $options) = @_;
131 my ($into, $passthrough, $content, $filter, $flush_before) =
132 @{$options}{qw(into passthrough content filter flush_before)};
134 my ($evt, $stream) = @_;
135 # We wipe the contents of @$into here so that other actions depending
136 # on this (such as a repeater) can be invoked multiple times easily.
137 # I -suspect- it's better for that state reset to be managed here; if it
138 # ever becomes painful the decision should be revisited
140 @$into = $content ? () : ($evt);
142 if ($evt->{is_in_place_close}) {
143 return $evt if $passthrough || $content;
146 my $name = $evt->{name};
148 my $_next = $content ? 'peek' : 'next';
151 $stream = do { local $_ = $stream; $filter->($stream) };
154 local $_ = $self->_stream_concat(
155 $self->_stream_from_array($evt),
160 $evt = $stream->next;
163 my $collector = $self->_stream_from_code(sub {
164 return unless $stream;
165 while (my ($evt) = $stream->$_next) {
166 $depth++ if ($evt->{type} eq 'OPEN');
167 $depth-- if ($evt->{type} eq 'CLOSE');
171 push(@$into, $evt) if $into;
172 return $evt if $passthrough;
175 push(@$into, $evt) if $into;
176 $stream->next if $content;
177 return $evt if $passthrough;
179 die "Never saw closing </${name}> before end of source";
182 if ($passthrough||$content) {
183 $evt = { %$evt, flush => 1 };
185 $evt = { type => 'EMPTY', flush => 1 };
188 return ($passthrough||$content||$flush_before)
189 ? [ $evt, $collector ]
194 sub collect_content {
195 my ($self, $options) = @_;
196 $self->collect({ %{$options||{}}, content => 1 })
200 my ($self, $events) = @_;
201 my $coll_proto = $self->collect({ passthrough => 1 });
203 my $emit = $self->_stream_from_proto($events);
204 my $coll = &$coll_proto;
206 if(ref $coll eq 'ARRAY') {
207 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
208 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
209 } elsif(ref $coll eq 'HASH') {
210 return [$emit, $coll];
212 return $self->_stream_concat($emit, $coll);
214 } else { return $emit }
219 my ($self, $events) = @_;
220 my $coll_proto = $self->collect({ passthrough => 1 });
223 my $emit = $self->_stream_from_proto($events);
224 my $coll = &$coll_proto;
225 return ref($coll) eq 'HASH' # single event, no collect
227 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
231 sub prepend_content {
232 my ($self, $events) = @_;
233 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
236 my $emit = $self->_stream_from_proto($events);
237 if ($evt->{is_in_place_close}) {
238 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
239 return [ $evt, $self->_stream_from_array(
240 $emit->next, { type => 'CLOSE', name => $evt->{name} }
243 my $coll = &$coll_proto;
244 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
249 my ($self, $events) = @_;
250 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
253 my $emit = $self->_stream_from_proto($events);
254 if ($evt->{is_in_place_close}) {
255 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
256 return [ $evt, $self->_stream_from_array(
257 $emit->next, { type => 'CLOSE', name => $evt->{name} }
260 my $coll = &$coll_proto;
261 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
266 my ($self, $replace_with, $options) = @_;
267 my $coll_proto = $self->collect($options);
269 my ($evt, $stream) = @_;
270 my $emit = $self->_stream_from_proto($replace_with);
271 my $coll = &$coll_proto;
272 # if we're replacing the contents of an in place close
273 # then we need to handle that here
274 if ($options->{content}
275 && ref($coll) eq 'HASH'
276 && $coll->{is_in_place_close}
278 my $close = $stream->next;
279 # shallow copy and nuke in place and raw (to force smart print)
280 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
281 $emit = $self->_stream_concat(
283 $self->_stream_from_array($close),
286 # For a straightforward replace operation we can, in fact, do the emit
287 # -before- the collect, and my first cut did so. However in order to
288 # use the captured content in generating the new content, we need
289 # the collect stage to happen first - and it seems highly unlikely
290 # that in normal operation the collect phase will take long enough
291 # for the difference to be noticeable
294 ? (ref $coll eq 'ARRAY' # [ event, stream ]
295 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
296 : (ref $coll eq 'HASH' # event or stream?
298 : $self->_stream_concat($coll, $emit))
305 sub replace_content {
306 my ($self, $replace_with, $options) = @_;
307 $self->replace($replace_with, { %{$options||{}}, content => 1 })
311 my ($self, $repeat_for, $options) = @_;
312 $options->{into} = \my @into;
314 my $repeat_between = delete $options->{repeat_between};
315 if ($repeat_between) {
316 $options->{filter} = sub {
317 $_->select($repeat_between)->collect({ into => \@between })
321 my $s = $self->_stream_from_proto($repeat_for);
322 # We have to test $repeat_between not @between here because
323 # at the point we're constructing our return stream @between
324 # hasn't been populated yet - but we can test @between in the
325 # map routine because it has been by then and that saves us doing
326 # the extra stream construction if we don't need it.
327 $self->_flatten_stream_of_streams(do {
328 if ($repeat_between) {
330 local $_ = $self->_stream_from_array(@into);
331 (@between && $s->peek)
332 ? $self->_stream_concat(
333 $_[0]->($_), $self->_stream_from_array(@between)
339 local $_ = $self->_stream_from_array(@into);
345 $self->replace($repeater, $options);
349 my ($self, $repeat_for, $options) = @_;
350 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
357 HTML::Zoom::FilterBuilder - Add Filters to a Stream
361 Create an L<HTML::Zoom> instance:
364 my $root = HTML::Zoom
368 <title>Default Title</title>
370 <body bad_attr='junk'>
376 Create a new attribute on the C<body> tag:
380 ->set_attribute(class=>'main');
382 Add a extra value to an existing attribute:
386 ->add_to_attribute(class=>'one-column');
388 Set the content of the C<title> tag:
392 ->replace_content('Hello World');
394 Set content from another L<HTML::Zoom> instance:
396 my $body = HTML::Zoom
400 <p id="p2">Is the Time</p>
406 ->replace_content($body);
408 Set an attribute on multiple matches:
412 ->set_attribute(class=>'para');
418 ->remove_attribute('bad_attr');
424 my $output = $root->to_html;
431 <title>Hello World</title>
433 <body class="main one-column"><div id="stuff">
434 <p class="para">Well Now</p>
435 <p id="p2" class="para">Is the Time</p>
443 is($output, $expect, 'Synopsis code works ok');
449 Given a L<HTML::Zoom> stream, provide methods to apply filters which
450 alter the content of that stream.
454 This class defines the following public API
458 Sets an attribute of a given name to a given value for all matching selections.
462 ->set_attribute(class=>'paragraph')
464 ->set_attribute({name=>'class', value=>'divider'});
467 Overrides existing values, if such exist. When multiple L</set_attribute>
468 calls are made against the same or overlapping selection sets, the final
471 =head2 add_to_attribute
473 Adds a value to an existing attribute, or creates one if the attribute does not
474 yet exist. You may call this method with either an Array or HashRef of Args.
476 Here's the 'long form' HashRef:
480 ->set_attribute(class=>'paragraph')
482 ->add_to_attribute({name=>'class', value=>'divider'});
484 And the exact same effect using the 'short form' Array:
488 ->set_attribute(class=>'paragraph')
490 ->add_to_attribute(class=>'divider');
492 Attributes with more than one value will have a dividing space.
494 =head2 remove_attribute
496 Removes an attribute and all its values.
500 ->set_attribute(class=>'paragraph')
502 ->remove_attribute('class');
504 Removes attributes from the original stream or events already added.
506 =head2 transform_attribute
508 Transforms (or creates or deletes) an attribute by running the passed
509 coderef on it. If the coderef returns nothing, the attribute is
514 ->transform_attribute( href => sub {
515 ( my $a = shift ) =~ s/localhost/example.com/;
522 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
523 optional common options as hash reference.
527 =item into [ARRAY REFERENCE]
529 Where to save collected events (selected elements).
531 $z1->select('#main-content')
532 ->collect({ into => \@body })
534 $z2->select('#main-content')
540 Run filter on collected elements (locally setting $_ to stream, and passing
541 stream as an argument to given code reference). Filtered stream would be
546 filter => sub { $_->select('.inner')->replace_content('bar!') },
550 It can be used to further filter selection. For example
554 filter => sub { $_->select('td') },
558 is equivalent to (not implemented yet) descendant selector combination, i.e.
562 =item passthrough [BOOLEAN]
564 Extract copy of elements; the stream is unchanged (it does not remove collected
565 elements). For example without 'passthrough'
567 HTML::Zoom->from_html('<foo><bar /></foo>')
569 ->collect({ content => 1 })
572 returns '<foo></foo>', while with C<passthrough> option
574 HTML::Zoom->from_html('<foo><bar /></foo>')
576 ->collect({ content => 1, passthough => 1 })
579 returns '<foo><bar /></foo>'.
581 =item content [BOOLEAN]
583 Collect content of the element, and not the element itself.
587 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
592 would return '<p>foo</p>', while
594 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
596 ->collect({ content => 1 })
599 would return '<h1></h1><p>foo</p>'.
601 See also L</collect_content>.
603 =item flush_before [BOOLEAN]
605 Generate C<flush> event before collecting, to ensure that the HTML generated up
606 to selected element being collected is flushed throught to the browser. Usually
607 used in L</repeat> or L</repeat_content>.
611 =head2 collect_content
613 Collects contents of L<HTML::Zoom/select> result.
615 HTML::Zoom->from_file($foo)
616 ->select('#main-content')
617 ->collect_content({ into => \@foo_body })
620 ->replace_content(\@foo_body)
623 Equivalent to running L</collect> with C<content> option set.
627 Given a L<HTML::Zoom/select> result, add given content (which might be string,
628 array or another L<HTML::Zoom> object) before it.
631 ->select('input[name="foo"]')
632 ->add_before(\ '<span class="warning">required field</span>');
636 Like L</add_before>, only after L<HTML::Zoom/select> result.
642 You can add zoom events directly
646 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
648 =head2 prepend_content
650 Similar to add_before, but adds the content to the match.
653 ->from_html(q[<p>World</p>])
655 ->prepend_content("Hello ")
658 ## <p>Hello World</p>
660 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
662 =head2 append_content
664 Similar to add_after, but adds the content to the match.
667 ->from_html(q[<p>Hello </p>])
669 ->prepend_content("World")
672 ## <p>Hello World</p>
674 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
678 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
679 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
680 (via hash reference).
682 =head2 replace_content
684 Given a L<HTML::Zoom/select> result, replace the content with a string, array
685 or another L<HTML::Zoom> object.
688 ->select('title, #greeting')
689 ->replace_content('Hello world!');
693 For a given selection, repeat over transformations, typically for the purposes
694 of populating lists. Takes either an array of anonymous subroutines or a zoom-
695 able object consisting of transformation.
697 Example of array reference style (when it doesn't matter that all iterations are
700 $zoom->select('table')->repeat([
704 $_->select('td')->replace_content($e);
709 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
710 collected elements), and with said result passed as parameter to subroutine.
712 You might want to use CodeStream when you don't have all elements upfront
714 $zoom->select('.contents')->repeat(sub {
715 HTML::Zoom::CodeStream->new({
717 while (my $line = $fh->getline) {
719 $_->select('.lno')->replace_content($fh->input_line_number)
720 ->select('.line')->replace_content($line)
728 In addition to common options as in L</collect>, it also supports:
732 =item repeat_between [SELECTOR]
734 Selects object to be repeated between items. In the case of array this object
735 is put between elements, in case of iterator it is put between results of
736 subsequent iterations, in the case of streamable it is put between events
739 See documentation for L</repeat_content>
743 =head2 repeat_content
745 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
746 this result to this iterator. Accepts the same options as L</repeat>.
748 Equivalent to using C<contents> option with L</repeat>.
755 $_->select('.name')->replace_content('Matt')
756 ->select('.age')->replace_content('26')
759 $_->select('.name')->replace_content('Mark')
760 ->select('.age')->replace_content('0x29')
763 $_->select('.name')->replace_content('Epitaph')
764 ->select('.age')->replace_content('<redacted>')
767 { repeat_between => '.between' }
777 See L<HTML::Zoom> for authors.
781 See L<HTML::Zoom> for the license.