1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
29 my ($name, $value) = $self->_parse_attribute_args(@_);
31 my $a = (my $evt = $_[0])->{attrs};
32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
42 sub _parse_attribute_args {
44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
51 die "renamed to add_to_attribute. killing this entirely for 1.0";
54 sub add_to_attribute {
56 my ($name, $value) = $self->_parse_attribute_args(@_);
58 my $a = (my $evt = $_[0])->{attrs};
59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
65 ($e # add to name list if not present
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
72 sub remove_attribute {
73 my ($self, $args) = @_;
74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
76 my $a = (my $evt = $_[0])->{attrs};
77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
86 sub transform_attribute {
88 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
92 my %a = %{ $evt->{attrs} };
93 my @names = @{ $evt->{attr_names} };
95 my $existed_before = exists $a{$name};
96 my $v = $code->( $a{$name} );
97 my $deleted = $existed_before && ! defined $v;
98 my $added = ! $existed_before && defined $v;
105 @names = grep $_ ne $name, @names;
109 +{ %$evt, raw => undef, raw_attrs => undef,
112 ? (attr_names => \@names )
119 my ($self, $options) = @_;
120 my ($into, $passthrough, $content, $filter, $flush_before) =
121 @{$options}{qw(into passthrough content filter flush_before)};
123 my ($evt, $stream) = @_;
124 # We wipe the contents of @$into here so that other actions depending
125 # on this (such as a repeater) can be invoked multiple times easily.
126 # I -suspect- it's better for that state reset to be managed here; if it
127 # ever becomes painful the decision should be revisited
129 @$into = $content ? () : ($evt);
131 if ($evt->{is_in_place_close}) {
132 return $evt if $passthrough || $content;
135 my $name = $evt->{name};
137 my $_next = $content ? 'peek' : 'next';
140 $stream = do { local $_ = $stream; $filter->($stream) };
143 local $_ = $self->_stream_concat(
144 $self->_stream_from_array($evt),
149 $evt = $stream->next;
152 my $collector = $self->_stream_from_code(sub {
153 return unless $stream;
154 while (my ($evt) = $stream->$_next) {
155 $depth++ if ($evt->{type} eq 'OPEN');
156 $depth-- if ($evt->{type} eq 'CLOSE');
160 push(@$into, $evt) if $into;
161 return $evt if $passthrough;
164 push(@$into, $evt) if $into;
165 $stream->next if $content;
166 return $evt if $passthrough;
168 die "Never saw closing </${name}> before end of source";
171 if ($passthrough||$content) {
172 $evt = { %$evt, flush => 1 };
174 $evt = { type => 'EMPTY', flush => 1 };
177 return ($passthrough||$content||$flush_before)
178 ? [ $evt, $collector ]
183 sub collect_content {
184 my ($self, $options) = @_;
185 $self->collect({ %{$options||{}}, content => 1 })
189 my ($self, $events) = @_;
190 my $coll_proto = $self->collect({ passthrough => 1 });
192 my $emit = $self->_stream_from_proto($events);
193 my $coll = &$coll_proto;
195 if(ref $coll eq 'ARRAY') {
196 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
197 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
198 } elsif(ref $coll eq 'HASH') {
199 return [$emit, $coll];
201 return $self->_stream_concat($emit, $coll);
203 } else { return $emit }
208 my ($self, $events) = @_;
209 my $coll_proto = $self->collect({ passthrough => 1 });
212 my $emit = $self->_stream_from_proto($events);
213 my $coll = &$coll_proto;
214 return ref($coll) eq 'HASH' # single event, no collect
216 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
220 sub prepend_content {
221 my ($self, $events) = @_;
222 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
225 my $emit = $self->_stream_from_proto($events);
226 if ($evt->{is_in_place_close}) {
227 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
228 return [ $evt, $self->_stream_from_array(
229 $emit->next, { type => 'CLOSE', name => $evt->{name} }
232 my $coll = &$coll_proto;
233 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
238 my ($self, $events) = @_;
239 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
242 my $emit = $self->_stream_from_proto($events);
243 if ($evt->{is_in_place_close}) {
244 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
245 return [ $evt, $self->_stream_from_array(
246 $emit->next, { type => 'CLOSE', name => $evt->{name} }
249 my $coll = &$coll_proto;
250 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
255 my ($self, $replace_with, $options) = @_;
256 my $coll_proto = $self->collect($options);
258 my ($evt, $stream) = @_;
259 my $emit = $self->_stream_from_proto($replace_with);
260 my $coll = &$coll_proto;
261 # if we're replacing the contents of an in place close
262 # then we need to handle that here
263 if ($options->{content}
264 && ref($coll) eq 'HASH'
265 && $coll->{is_in_place_close}
267 my $close = $stream->next;
268 # shallow copy and nuke in place and raw (to force smart print)
269 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
270 $emit = $self->_stream_concat(
272 $self->_stream_from_array($close),
275 # For a straightforward replace operation we can, in fact, do the emit
276 # -before- the collect, and my first cut did so. However in order to
277 # use the captured content in generating the new content, we need
278 # the collect stage to happen first - and it seems highly unlikely
279 # that in normal operation the collect phase will take long enough
280 # for the difference to be noticeable
283 ? (ref $coll eq 'ARRAY' # [ event, stream ]
284 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
285 : (ref $coll eq 'HASH' # event or stream?
287 : $self->_stream_concat($coll, $emit))
294 sub replace_content {
295 my ($self, $replace_with, $options) = @_;
296 $self->replace($replace_with, { %{$options||{}}, content => 1 })
300 my ($self, $repeat_for, $options) = @_;
301 $options->{into} = \my @into;
303 my $repeat_between = delete $options->{repeat_between};
304 if ($repeat_between) {
305 $options->{filter} = sub {
306 $_->select($repeat_between)->collect({ into => \@between })
310 my $s = $self->_stream_from_proto($repeat_for);
311 # We have to test $repeat_between not @between here because
312 # at the point we're constructing our return stream @between
313 # hasn't been populated yet - but we can test @between in the
314 # map routine because it has been by then and that saves us doing
315 # the extra stream construction if we don't need it.
316 $self->_flatten_stream_of_streams(do {
317 if ($repeat_between) {
319 local $_ = $self->_stream_from_array(@into);
320 (@between && $s->peek)
321 ? $self->_stream_concat(
322 $_[0]->($_), $self->_stream_from_array(@between)
328 local $_ = $self->_stream_from_array(@into);
334 $self->replace($repeater, $options);
338 my ($self, $repeat_for, $options) = @_;
339 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
346 HTML::Zoom::FilterBuilder - Add Filters to a Stream
350 Create an L<HTML::Zoom> instance:
353 my $root = HTML::Zoom
357 <title>Default Title</title>
359 <body bad_attr='junk'>
365 Create a new attribute on the C<body> tag:
369 ->set_attribute(class=>'main');
371 Add a extra value to an existing attribute:
375 ->add_to_attribute(class=>'one-column');
377 Set the content of the C<title> tag:
381 ->replace_content('Hello World');
383 Set content from another L<HTML::Zoom> instance:
385 my $body = HTML::Zoom
389 <p id="p2">Is the Time</p>
395 ->replace_content($body);
397 Set an attribute on multiple matches:
401 ->set_attribute(class=>'para');
407 ->remove_attribute('bad_attr');
413 my $output = $root->to_html;
420 <title>Hello World</title>
422 <body class="main one-column"><div id="stuff">
423 <p class="para">Well Now</p>
424 <p id="p2" class="para">Is the Time</p>
432 is($output, $expect, 'Synopsis code works ok');
438 Given a L<HTML::Zoom> stream, provide methods to apply filters which
439 alter the content of that stream.
443 This class defines the following public API
447 Sets an attribute of a given name to a given value for all matching selections.
451 ->set_attribute(class=>'paragraph')
453 ->set_attribute({name=>'class', value=>'divider'});
456 Overrides existing values, if such exist. When multiple L</set_attribute>
457 calls are made against the same or overlapping selection sets, the final
460 =head2 add_to_attribute
462 Adds a value to an existing attribute, or creates one if the attribute does not
463 yet exist. You may call this method with either an Array or HashRef of Args.
465 Here's the 'long form' HashRef:
469 ->set_attribute(class=>'paragraph')
471 ->add_to_attribute({name=>'class', value=>'divider'});
473 And the exact same effect using the 'short form' Array:
477 ->set_attribute(class=>'paragraph')
479 ->add_to_attribute(class=>'divider');
481 Attributes with more than one value will have a dividing space.
483 =head2 remove_attribute
485 Removes an attribute and all its values.
489 ->set_attribute(class=>'paragraph')
491 ->remove_attribute('class');
493 Removes attributes from the original stream or events already added.
495 =head2 transform_attribute
497 Transforms (or creates or deletes) an attribute by running the passed
498 coderef on it. If the coderef returns nothing, the attribute is
503 ->transform_attribute( href => sub {
504 ( my $a = shift ) =~ s/localhost/example.com/;
511 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
512 optional common options as hash reference.
516 =item into [ARRAY REFERENCE]
518 Where to save collected events (selected elements).
520 $z1->select('#main-content')
521 ->collect({ into => \@body })
523 $z2->select('#main-content')
529 Run filter on collected elements (locally setting $_ to stream, and passing
530 stream as an argument to given code reference). Filtered stream would be
535 filter => sub { $_->select('.inner')->replace_content('bar!') },
539 It can be used to further filter selection. For example
543 filter => sub { $_->select('td') },
547 is equivalent to (not implemented yet) descendant selector combination, i.e.
551 =item passthrough [BOOLEAN]
553 Extract copy of elements; the stream is unchanged (it does not remove collected
554 elements). For example without 'passthrough'
556 HTML::Zoom->from_html('<foo><bar /></foo>')
558 ->collect({ content => 1 })
561 returns '<foo></foo>', while with C<passthrough> option
563 HTML::Zoom->from_html('<foo><bar /></foo>')
565 ->collect({ content => 1, passthough => 1 })
568 returns '<foo><bar /></foo>'.
570 =item content [BOOLEAN]
572 Collect content of the element, and not the element itself.
576 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
581 would return '<p>foo</p>', while
583 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
585 ->collect({ content => 1 })
588 would return '<h1></h1><p>foo</p>'.
590 See also L</collect_content>.
592 =item flush_before [BOOLEAN]
594 Generate C<flush> event before collecting, to ensure that the HTML generated up
595 to selected element being collected is flushed throught to the browser. Usually
596 used in L</repeat> or L</repeat_content>.
600 =head2 collect_content
602 Collects contents of L<HTML::Zoom/select> result.
604 HTML::Zoom->from_file($foo)
605 ->select('#main-content')
606 ->collect_content({ into => \@foo_body })
609 ->replace_content(\@foo_body)
612 Equivalent to running L</collect> with C<content> option set.
616 Given a L<HTML::Zoom/select> result, add given content (which might be string,
617 array or another L<HTML::Zoom> object) before it.
620 ->select('input[name="foo"]')
621 ->add_before(\ '<span class="warning">required field</span>');
625 Like L</add_before>, only after L<HTML::Zoom/select> result.
631 You can add zoom events directly
635 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
637 =head2 prepend_content
639 Similar to add_before, but adds the content to the match.
642 ->from_html(q[<p>World</p>])
644 ->prepend_content("Hello ")
647 ## <p>Hello World</p>
649 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
651 =head2 append_content
653 Similar to add_after, but adds the content to the match.
656 ->from_html(q[<p>Hello </p>])
658 ->prepend_content("World")
661 ## <p>Hello World</p>
663 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
667 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
668 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
669 (via hash reference).
671 =head2 replace_content
673 Given a L<HTML::Zoom/select> result, replace the content with a string, array
674 or another L<HTML::Zoom> object.
677 ->select('title, #greeting')
678 ->replace_content('Hello world!');
682 For a given selection, repeat over transformations, typically for the purposes
683 of populating lists. Takes either an array of anonymous subroutines or a zoom-
684 able object consisting of transformation.
686 Example of array reference style (when it doesn't matter that all iterations are
689 $zoom->select('table')->repeat([
693 $_->select('td')->replace_content($e);
698 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
699 collected elements), and with said result passed as parameter to subroutine.
701 You might want to use CodeStream when you don't have all elements upfront
703 $zoom->select('.contents')->repeat(sub {
704 HTML::Zoom::CodeStream->new({
706 while (my $line = $fh->getline) {
708 $_->select('.lno')->replace_content($fh->input_line_number)
709 ->select('.line')->replace_content($line)
717 In addition to common options as in L</collect>, it also supports:
721 =item repeat_between [SELECTOR]
723 Selects object to be repeated between items. In the case of array this object
724 is put between elements, in case of iterator it is put between results of
725 subsequent iterations, in the case of streamable it is put between events
728 See documentation for L</repeat_content>
732 =head2 repeat_content
734 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
735 this result to this iterator. Accepts the same options as L</repeat>.
737 Equivalent to using C<contents> option with L</repeat>.
744 $_->select('.name')->replace_content('Matt')
745 ->select('.age')->replace_content('26')
748 $_->select('.name')->replace_content('Mark')
749 ->select('.age')->replace_content('0x29')
752 $_->select('.name')->replace_content('Epitaph')
753 ->select('.age')->replace_content('<redacted>')
756 { repeat_between => '.between' }
766 See L<HTML::Zoom> for authors.
770 See L<HTML::Zoom> for the license.