1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
49 die "Long form arg (name => 'class', value => 'x') is no longer supported"
50 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
51 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
52 return ($name, $self->_zconfig->parser->html_escape($value));
56 die "renamed to add_to_attribute. killing this entirely for 1.0";
59 sub add_class { shift->add_to_attribute('class',@_) }
61 sub remove_class { shift->remove_attribute('class',@_) }
63 sub set_class { shift->set_attribute('class',@_) }
65 sub set_id { shift->set_attribute('id',@_) }
67 sub add_to_attribute {
69 my ($name, $value) = $self->_parse_attribute_args(@_);
71 my $a = (my $evt = $_[0])->{attrs};
72 my $e = exists $a->{$name};
73 +{ %$evt, raw => undef, raw_attrs => undef,
76 $name => join(' ', ($e ? $a->{$name} : ()), $value)
78 ($e # add to name list if not present
80 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
85 sub remove_attribute {
86 my ($self, $args) = @_;
87 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
89 my $a = (my $evt = $_[0])->{attrs};
90 return $evt unless exists $a->{$name};
91 $a = { %$a }; delete $a->{$name};
92 +{ %$evt, raw => undef, raw_attrs => undef,
94 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
99 sub transform_attribute {
101 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
105 my %a = %{ $evt->{attrs} };
106 my @names = @{ $evt->{attr_names} };
108 my $existed_before = exists $a{$name};
109 my $v = $code->( $a{$name} );
110 my $deleted = $existed_before && ! defined $v;
111 my $added = ! $existed_before && defined $v;
118 @names = grep $_ ne $name, @names;
122 +{ %$evt, raw => undef, raw_attrs => undef,
125 ? (attr_names => \@names )
132 my ($self, $options) = @_;
133 my ($into, $passthrough, $content, $filter, $flush_before) =
134 @{$options}{qw(into passthrough content filter flush_before)};
136 my ($evt, $stream) = @_;
137 # We wipe the contents of @$into here so that other actions depending
138 # on this (such as a repeater) can be invoked multiple times easily.
139 # I -suspect- it's better for that state reset to be managed here; if it
140 # ever becomes painful the decision should be revisited
142 @$into = $content ? () : ($evt);
144 if ($evt->{is_in_place_close}) {
145 return $evt if $passthrough || $content;
148 my $name = $evt->{name};
150 my $_next = $content ? 'peek' : 'next';
153 $stream = do { local $_ = $stream; $filter->($stream) };
156 local $_ = $self->_stream_concat(
157 $self->_stream_from_array($evt),
162 $evt = $stream->next;
165 my $collector = $self->_stream_from_code(sub {
166 return unless $stream;
167 while (my ($evt) = $stream->$_next) {
168 $depth++ if ($evt->{type} eq 'OPEN');
169 $depth-- if ($evt->{type} eq 'CLOSE');
173 push(@$into, $evt) if $into;
174 return $evt if $passthrough;
177 push(@$into, $evt) if $into;
178 $stream->next if $content;
179 return $evt if $passthrough;
181 die "Never saw closing </${name}> before end of source";
184 if ($passthrough||$content) {
185 $evt = { %$evt, flush => 1 };
187 $evt = { type => 'EMPTY', flush => 1 };
190 return ($passthrough||$content||$flush_before)
191 ? [ $evt, $collector ]
196 sub collect_content {
197 my ($self, $options) = @_;
198 $self->collect({ %{$options||{}}, content => 1 })
202 my ($self, $events) = @_;
203 my $coll_proto = $self->collect({ passthrough => 1 });
205 my $emit = $self->_stream_from_proto($events);
206 my $coll = &$coll_proto;
208 if(ref $coll eq 'ARRAY') {
209 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
210 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
211 } elsif(ref $coll eq 'HASH') {
212 return [$emit, $coll];
214 return $self->_stream_concat($emit, $coll);
216 } else { return $emit }
221 my ($self, $events) = @_;
222 my $coll_proto = $self->collect({ passthrough => 1 });
225 my $emit = $self->_stream_from_proto($events);
226 my $coll = &$coll_proto;
227 return ref($coll) eq 'HASH' # single event, no collect
229 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
233 sub prepend_content {
234 my ($self, $events) = @_;
235 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
238 my $emit = $self->_stream_from_proto($events);
239 if ($evt->{is_in_place_close}) {
240 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
241 return [ $evt, $self->_stream_from_array(
242 $emit->next, { type => 'CLOSE', name => $evt->{name} }
245 my $coll = &$coll_proto;
246 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
251 my ($self, $events) = @_;
252 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
255 my $emit = $self->_stream_from_proto($events);
256 if ($evt->{is_in_place_close}) {
257 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
258 return [ $evt, $self->_stream_from_array(
259 $emit->next, { type => 'CLOSE', name => $evt->{name} }
262 my $coll = &$coll_proto;
263 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
268 my ($self, $replace_with, $options) = @_;
269 my $coll_proto = $self->collect($options);
271 my ($evt, $stream) = @_;
272 my $emit = $self->_stream_from_proto($replace_with);
273 my $coll = &$coll_proto;
274 # if we're replacing the contents of an in place close
275 # then we need to handle that here
276 if ($options->{content}
277 && ref($coll) eq 'HASH'
278 && $coll->{is_in_place_close}
280 my $close = $stream->next;
281 # shallow copy and nuke in place and raw (to force smart print)
282 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
283 $emit = $self->_stream_concat(
285 $self->_stream_from_array($close),
288 # For a straightforward replace operation we can, in fact, do the emit
289 # -before- the collect, and my first cut did so. However in order to
290 # use the captured content in generating the new content, we need
291 # the collect stage to happen first - and it seems highly unlikely
292 # that in normal operation the collect phase will take long enough
293 # for the difference to be noticeable
296 ? (ref $coll eq 'ARRAY' # [ event, stream ]
297 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
298 : (ref $coll eq 'HASH' # event or stream?
300 : $self->_stream_concat($coll, $emit))
307 sub replace_content {
308 my ($self, $replace_with, $options) = @_;
309 $self->replace($replace_with, { %{$options||{}}, content => 1 })
313 my ($self, $repeat_for, $options) = @_;
314 $options->{into} = \my @into;
316 my $repeat_between = delete $options->{repeat_between};
317 if ($repeat_between) {
318 $options->{filter} = sub {
319 $_->select($repeat_between)->collect({ into => \@between })
323 my $s = $self->_stream_from_proto($repeat_for);
324 # We have to test $repeat_between not @between here because
325 # at the point we're constructing our return stream @between
326 # hasn't been populated yet - but we can test @between in the
327 # map routine because it has been by then and that saves us doing
328 # the extra stream construction if we don't need it.
329 $self->_flatten_stream_of_streams(do {
330 if ($repeat_between) {
332 local $_ = $self->_stream_from_array(@into);
333 (@between && $s->peek)
334 ? $self->_stream_concat(
335 $_[0]->($_), $self->_stream_from_array(@between)
341 local $_ = $self->_stream_from_array(@into);
347 $self->replace($repeater, $options);
351 my ($self, $repeat_for, $options) = @_;
352 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
359 HTML::Zoom::FilterBuilder - Add Filters to a Stream
363 Create an L<HTML::Zoom> instance:
366 my $root = HTML::Zoom
370 <title>Default Title</title>
372 <body bad_attr='junk'>
378 Create a new attribute on the C<body> tag:
382 ->set_attribute(class=>'main');
384 Add a extra value to an existing attribute:
388 ->add_to_attribute(class=>'one-column');
390 Set the content of the C<title> tag:
394 ->replace_content('Hello World');
396 Set content from another L<HTML::Zoom> instance:
398 my $body = HTML::Zoom
402 <p id="p2">Is the Time</p>
408 ->replace_content($body);
410 Set an attribute on multiple matches:
414 ->set_attribute(class=>'para');
420 ->remove_attribute('bad_attr');
426 my $output = $root->to_html;
433 <title>Hello World</title>
435 <body class="main one-column"><div id="stuff">
436 <p class="para">Well Now</p>
437 <p id="p2" class="para">Is the Time</p>
445 is($output, $expect, 'Synopsis code works ok');
451 Given a L<HTML::Zoom> stream, provide methods to apply filters which
452 alter the content of that stream.
456 This class defines the following public API
460 Sets an attribute of a given name to a given value for all matching selections.
464 ->set_attribute(class=>'paragraph')
466 ->set_attribute({name=>'class', value=>'divider'});
469 Overrides existing values, if such exist. When multiple L</set_attribute>
470 calls are made against the same or overlapping selection sets, the final
473 =head2 add_to_attribute
475 Adds a value to an existing attribute, or creates one if the attribute does not
476 yet exist. You may call this method with either an Array or HashRef of Args.
478 Here's the 'long form' HashRef:
482 ->set_attribute(class=>'paragraph')
484 ->add_to_attribute({name=>'class', value=>'divider'});
486 And the exact same effect using the 'short form' Array:
490 ->set_attribute(class=>'paragraph')
492 ->add_to_attribute(class=>'divider');
494 Attributes with more than one value will have a dividing space.
496 =head2 remove_attribute
498 Removes an attribute and all its values.
502 ->set_attribute(class=>'paragraph')
504 ->remove_attribute('class');
506 Removes attributes from the original stream or events already added.
508 =head2 transform_attribute
510 Transforms (or creates or deletes) an attribute by running the passed
511 coderef on it. If the coderef returns nothing, the attribute is
516 ->transform_attribute( href => sub {
517 ( my $a = shift ) =~ s/localhost/example.com/;
524 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
525 optional common options as hash reference.
529 =item into [ARRAY REFERENCE]
531 Where to save collected events (selected elements).
533 $z1->select('#main-content')
534 ->collect({ into => \@body })
536 $z2->select('#main-content')
542 Run filter on collected elements (locally setting $_ to stream, and passing
543 stream as an argument to given code reference). Filtered stream would be
548 filter => sub { $_->select('.inner')->replace_content('bar!') },
552 It can be used to further filter selection. For example
556 filter => sub { $_->select('td') },
560 is equivalent to (not implemented yet) descendant selector combination, i.e.
564 =item passthrough [BOOLEAN]
566 Extract copy of elements; the stream is unchanged (it does not remove collected
567 elements). For example without 'passthrough'
569 HTML::Zoom->from_html('<foo><bar /></foo>')
571 ->collect({ content => 1 })
574 returns '<foo></foo>', while with C<passthrough> option
576 HTML::Zoom->from_html('<foo><bar /></foo>')
578 ->collect({ content => 1, passthough => 1 })
581 returns '<foo><bar /></foo>'.
583 =item content [BOOLEAN]
585 Collect content of the element, and not the element itself.
589 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
594 would return '<p>foo</p>', while
596 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
598 ->collect({ content => 1 })
601 would return '<h1></h1><p>foo</p>'.
603 See also L</collect_content>.
605 =item flush_before [BOOLEAN]
607 Generate C<flush> event before collecting, to ensure that the HTML generated up
608 to selected element being collected is flushed throught to the browser. Usually
609 used in L</repeat> or L</repeat_content>.
613 =head2 collect_content
615 Collects contents of L<HTML::Zoom/select> result.
617 HTML::Zoom->from_file($foo)
618 ->select('#main-content')
619 ->collect_content({ into => \@foo_body })
622 ->replace_content(\@foo_body)
625 Equivalent to running L</collect> with C<content> option set.
629 Given a L<HTML::Zoom/select> result, add given content (which might be string,
630 array or another L<HTML::Zoom> object) before it.
633 ->select('input[name="foo"]')
634 ->add_before(\ '<span class="warning">required field</span>');
638 Like L</add_before>, only after L<HTML::Zoom/select> result.
644 You can add zoom events directly
648 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
650 =head2 prepend_content
652 Similar to add_before, but adds the content to the match.
655 ->from_html(q[<p>World</p>])
657 ->prepend_content("Hello ")
660 ## <p>Hello World</p>
662 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
664 =head2 append_content
666 Similar to add_after, but adds the content to the match.
669 ->from_html(q[<p>Hello </p>])
671 ->prepend_content("World")
674 ## <p>Hello World</p>
676 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
680 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
681 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
682 (via hash reference).
684 =head2 replace_content
686 Given a L<HTML::Zoom/select> result, replace the content with a string, array
687 or another L<HTML::Zoom> object.
690 ->select('title, #greeting')
691 ->replace_content('Hello world!');
695 For a given selection, repeat over transformations, typically for the purposes
696 of populating lists. Takes either an array of anonymous subroutines or a zoom-
697 able object consisting of transformation.
699 Example of array reference style (when it doesn't matter that all iterations are
702 $zoom->select('table')->repeat([
706 $_->select('td')->replace_content($e);
711 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
712 collected elements), and with said result passed as parameter to subroutine.
714 You might want to use CodeStream when you don't have all elements upfront
716 $zoom->select('.contents')->repeat(sub {
717 HTML::Zoom::CodeStream->new({
719 while (my $line = $fh->getline) {
721 $_->select('.lno')->replace_content($fh->input_line_number)
722 ->select('.line')->replace_content($line)
730 In addition to common options as in L</collect>, it also supports:
734 =item repeat_between [SELECTOR]
736 Selects object to be repeated between items. In the case of array this object
737 is put between elements, in case of iterator it is put between results of
738 subsequent iterations, in the case of streamable it is put between events
741 See documentation for L</repeat_content>
745 =head2 repeat_content
747 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
748 this result to this iterator. Accepts the same options as L</repeat>.
750 Equivalent to using C<contents> option with L</repeat>.
757 $_->select('.name')->replace_content('Matt')
758 ->select('.age')->replace_content('26')
761 $_->select('.name')->replace_content('Mark')
762 ->select('.age')->replace_content('0x29')
765 $_->select('.name')->replace_content('Epitaph')
766 ->select('.age')->replace_content('<redacted>')
769 { repeat_between => '.between' }
779 See L<HTML::Zoom> for authors.
783 See L<HTML::Zoom> for the license.