1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
49 die "WARNING: Long form arg (name => 'class', value => 'x') is deprecated"
50 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
51 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
52 return ($name, $self->_zconfig->parser->html_escape($value));
56 die "renamed to add_to_attribute. killing this entirely for 1.0";
59 sub add_class { shift->add_to_attribute('class',@_) }
61 sub remove_class { shift->remove_attribute('class',@_) }
63 sub set_class { shift->set_attribute('class',@_) }
65 sub set_id { shift->set_attribute('id',@_) }
67 sub add_to_attribute {
69 my ($name, $value) = $self->_parse_attribute_args(@_);
71 my $a = (my $evt = $_[0])->{attrs};
72 my $e = exists $a->{$name};
73 +{ %$evt, raw => undef, raw_attrs => undef,
76 $name => join(' ', ($e ? $a->{$name} : ()), $value)
78 ($e # add to name list if not present
80 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
85 sub remove_attribute {
86 my ($self, $args) = @_;
87 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
89 my $a = (my $evt = $_[0])->{attrs};
90 return $evt unless exists $a->{$name};
91 $a = { %$a }; delete $a->{$name};
92 +{ %$evt, raw => undef, raw_attrs => undef,
94 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
99 sub transform_attribute {
101 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
105 my %a = %{ $evt->{attrs} };
106 my @names = @{ $evt->{attr_names} };
108 my $existed_before = exists $a{$name};
109 my $v = $code->( $a{$name} );
110 my $deleted = $existed_before && ! defined $v;
111 my $added = ! $existed_before && defined $v;
118 @names = grep $_ ne $name, @names;
122 +{ %$evt, raw => undef, raw_attrs => undef,
125 ? (attr_names => \@names )
132 my ($self, $options) = @_;
133 my ($into, $passthrough, $content, $filter, $flush_before) =
134 @{$options}{qw(into passthrough content filter flush_before)};
136 my ($evt, $stream) = @_;
137 # We wipe the contents of @$into here so that other actions depending
138 # on this (such as a repeater) can be invoked multiple times easily.
139 # I -suspect- it's better for that state reset to be managed here; if it
140 # ever becomes painful the decision should be revisited
142 @$into = $content ? () : ($evt);
144 if ($evt->{is_in_place_close}) {
145 return $evt if $passthrough || $content;
148 my $name = $evt->{name};
150 my $_next = $content ? 'peek' : 'next';
153 $stream = do { local $_ = $stream; $filter->($stream) };
156 local $_ = $self->_stream_concat(
157 $self->_stream_from_array($evt),
162 $evt = $stream->next;
165 my $collector = $self->_stream_from_code(sub {
166 return unless $stream;
167 while (my ($evt) = $stream->$_next) {
168 $depth++ if ($evt->{type} eq 'OPEN');
169 $depth-- if ($evt->{type} eq 'CLOSE');
173 push(@$into, $evt) if $into;
174 return $evt if $passthrough;
177 push(@$into, $evt) if $into;
178 $stream->next if $content;
179 return $evt if $passthrough;
181 die "Never saw closing </${name}> before end of source";
184 if ($passthrough||$content) {
185 $evt = { %$evt, flush => 1 };
187 $evt = { type => 'EMPTY', flush => 1 };
190 return ($passthrough||$content||$flush_before)
191 ? [ $evt, $collector ]
196 sub collect_content {
197 my ($self, $options) = @_;
198 $self->collect({ %{$options||{}}, content => 1 })
202 my ($self, $events) = @_;
203 my $coll_proto = $self->collect({ passthrough => 1 });
205 my $emit = $self->_stream_from_proto($events);
206 my $coll = &$coll_proto;
208 if(ref $coll eq 'ARRAY') {
209 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
210 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
211 } elsif(ref $coll eq 'HASH') {
212 return [$emit, $coll];
214 return $self->_stream_concat($emit, $coll);
216 } else { return $emit }
221 my ($self, $events) = @_;
222 my $coll_proto = $self->collect({ passthrough => 1 });
225 my $emit = $self->_stream_from_proto($events);
226 my $coll = &$coll_proto;
227 return ref($coll) eq 'HASH' # single event, no collect
229 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
233 sub prepend_content {
234 my ($self, $events) = @_;
235 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
238 my $emit = $self->_stream_from_proto($events);
239 if ($evt->{is_in_place_close}) {
240 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
241 return [ $evt, $self->_stream_from_array(
242 $emit->next, { type => 'CLOSE', name => $evt->{name} }
245 my $coll = &$coll_proto;
246 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
251 my ($self, $events) = @_;
252 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
255 my $emit = $self->_stream_from_proto($events);
256 if ($evt->{is_in_place_close}) {
257 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
258 return [ $evt, $self->_stream_from_array(
259 $emit->next, { type => 'CLOSE', name => $evt->{name} }
262 my $coll = &$coll_proto;
263 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
268 my ($self, $replace_with, $options) = @_;
269 my $coll_proto = $self->collect($options);
271 my ($evt, $stream) = @_;
272 my $emit = $self->_stream_from_proto($replace_with);
273 my $coll = &$coll_proto;
274 # if we're replacing the contents of an in place close
275 # then we need to handle that here
276 if ($options->{content}
277 && ref($coll) eq 'HASH'
278 && $coll->{is_in_place_close}
280 my $close = $stream->next;
281 # shallow copy and nuke in place and raw (to force smart print)
282 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
283 $emit = $self->_stream_concat(
285 $self->_stream_from_array($close),
288 # For a straightforward replace operation we can, in fact, do the emit
289 # -before- the collect, and my first cut did so. However in order to
290 # use the captured content in generating the new content, we need
291 # the collect stage to happen first - and it seems highly unlikely
292 # that in normal operation the collect phase will take long enough
293 # for the difference to be noticeable
296 ? (ref $coll eq 'ARRAY' # [ event, stream ]
297 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
298 : (ref $coll eq 'HASH' # event or stream?
300 : $self->_stream_concat($coll, $emit))
307 sub replace_content {
308 my ($self, $replace_with, $options) = @_;
309 $self->replace($replace_with, { %{$options||{}}, content => 1 })
313 my ($self, $repeat_for, $options) = @_;
314 $options->{into} = \my @into;
316 my $repeat_between = delete $options->{repeat_between};
317 if ($repeat_between) {
318 $options->{filter} = sub {
319 $_->select($repeat_between)->collect({ into => \@between })
323 my $s = $self->_stream_from_proto($repeat_for);
324 # We have to test $repeat_between not @between here because
325 # at the point we're constructing our return stream @between
326 # hasn't been populated yet - but we can test @between in the
327 # map routine because it has been by then and that saves us doing
328 # the extra stream construction if we don't need it.
329 $self->_flatten_stream_of_streams(do {
330 if ($repeat_between) {
332 local $_ = $self->_stream_from_array(@into);
333 (@between && $s->peek)
334 ? $self->_stream_concat(
335 $_[0]->($_), $self->_stream_from_array(@between)
341 local $_ = $self->_stream_from_array(@into);
347 $self->replace($repeater, $options);
351 my ($self, $repeat_for, $options) = @_;
352 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
359 HTML::Zoom::FilterBuilder - Add Filters to a Stream
363 Create an L<HTML::Zoom> instance:
366 my $root = HTML::Zoom
370 <title>Default Title</title>
372 <body bad_attr='junk'>
378 Create a new attribute on the C<body> tag:
382 ->set_attribute(class=>'main');
384 Add a extra value to an existing attribute:
388 ->add_to_attribute(class=>'one-column');
390 Set the content of the C<title> tag:
394 ->replace_content('Hello World');
396 Set content from another L<HTML::Zoom> instance:
398 my $body = HTML::Zoom
402 <p id="p2">Is the Time</p>
408 ->replace_content($body);
410 Set an attribute on multiple matches:
414 ->set_attribute(class=>'para');
420 ->remove_attribute('bad_attr');
426 my $output = $root->to_html;
433 <title>Hello World</title>
435 <body class="main one-column"><div id="stuff">
436 <p class="para">Well Now</p>
437 <p id="p2" class="para">Is the Time</p>
445 is($output, $expect, 'Synopsis code works ok');
451 Given a L<HTML::Zoom> stream, provide methods to apply filters which
452 alter the content of that stream.
456 This class defines the following public API
460 Sets an attribute of a given name to a given value for all matching selections.
464 ->set_attribute(class=>'paragraph')
466 ->set_attribute({name=>'class', value=>'divider'});
468 Overrides existing values, if such exist. When multiple L</set_attribute>
469 calls are made against the same or overlapping selection sets, the final
472 =head2 add_to_attribute
474 Adds a value to an existing attribute, or creates one if the attribute does not
475 yet exist. You may call this method with either an Array or HashRef of Args.
479 ->set_attribute({class => 'paragraph', name => 'test'})
481 ->add_to_attribute(class=>'divider');
483 Attributes with more than one value will have a dividing space.
485 =head2 remove_attribute
487 Removes an attribute and all its values.
491 ->set_attribute(class=>'paragraph')
493 ->remove_attribute('class');
495 Removes attributes from the original stream or events already added.
497 =head2 transform_attribute
499 Transforms (or creates or deletes) an attribute by running the passed
500 coderef on it. If the coderef returns nothing, the attribute is
505 ->transform_attribute( href => sub {
506 ( my $a = shift ) =~ s/localhost/example.com/;
513 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
514 optional common options as hash reference.
518 =item into [ARRAY REFERENCE]
520 Where to save collected events (selected elements).
522 $z1->select('#main-content')
523 ->collect({ into => \@body })
525 $z2->select('#main-content')
531 Run filter on collected elements (locally setting $_ to stream, and passing
532 stream as an argument to given code reference). Filtered stream would be
537 filter => sub { $_->select('.inner')->replace_content('bar!') },
541 It can be used to further filter selection. For example
545 filter => sub { $_->select('td') },
549 is equivalent to (not implemented yet) descendant selector combination, i.e.
553 =item passthrough [BOOLEAN]
555 Extract copy of elements; the stream is unchanged (it does not remove collected
556 elements). For example without 'passthrough'
558 HTML::Zoom->from_html('<foo><bar /></foo>')
560 ->collect({ content => 1 })
563 returns '<foo></foo>', while with C<passthrough> option
565 HTML::Zoom->from_html('<foo><bar /></foo>')
567 ->collect({ content => 1, passthough => 1 })
570 returns '<foo><bar /></foo>'.
572 =item content [BOOLEAN]
574 Collect content of the element, and not the element itself.
578 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
583 would return '<p>foo</p>', while
585 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
587 ->collect({ content => 1 })
590 would return '<h1></h1><p>foo</p>'.
592 See also L</collect_content>.
594 =item flush_before [BOOLEAN]
596 Generate C<flush> event before collecting, to ensure that the HTML generated up
597 to selected element being collected is flushed throught to the browser. Usually
598 used in L</repeat> or L</repeat_content>.
602 =head2 collect_content
604 Collects contents of L<HTML::Zoom/select> result.
606 HTML::Zoom->from_file($foo)
607 ->select('#main-content')
608 ->collect_content({ into => \@foo_body })
611 ->replace_content(\@foo_body)
614 Equivalent to running L</collect> with C<content> option set.
618 Given a L<HTML::Zoom/select> result, add given content (which might be string,
619 array or another L<HTML::Zoom> object) before it.
622 ->select('input[name="foo"]')
623 ->add_before(\ '<span class="warning">required field</span>');
627 Like L</add_before>, only after L<HTML::Zoom/select> result.
633 You can add zoom events directly
637 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
639 =head2 prepend_content
641 Similar to add_before, but adds the content to the match.
644 ->from_html(q[<p>World</p>])
646 ->prepend_content("Hello ")
649 ## <p>Hello World</p>
651 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
653 =head2 append_content
655 Similar to add_after, but adds the content to the match.
658 ->from_html(q[<p>Hello </p>])
660 ->prepend_content("World")
663 ## <p>Hello World</p>
665 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
669 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
670 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
671 (via hash reference).
673 =head2 replace_content
675 Given a L<HTML::Zoom/select> result, replace the content with a string, array
676 or another L<HTML::Zoom> object.
679 ->select('title, #greeting')
680 ->replace_content('Hello world!');
684 For a given selection, repeat over transformations, typically for the purposes
685 of populating lists. Takes either an array of anonymous subroutines or a zoom-
686 able object consisting of transformation.
688 Example of array reference style (when it doesn't matter that all iterations are
691 $zoom->select('table')->repeat([
695 $_->select('td')->replace_content($e);
700 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
701 collected elements), and with said result passed as parameter to subroutine.
703 You might want to use CodeStream when you don't have all elements upfront
705 $zoom->select('.contents')->repeat(sub {
706 HTML::Zoom::CodeStream->new({
708 while (my $line = $fh->getline) {
710 $_->select('.lno')->replace_content($fh->input_line_number)
711 ->select('.line')->replace_content($line)
719 In addition to common options as in L</collect>, it also supports:
723 =item repeat_between [SELECTOR]
725 Selects object to be repeated between items. In the case of array this object
726 is put between elements, in case of iterator it is put between results of
727 subsequent iterations, in the case of streamable it is put between events
730 See documentation for L</repeat_content>
734 =head2 repeat_content
736 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
737 this result to this iterator. Accepts the same options as L</repeat>.
739 Equivalent to using C<contents> option with L</repeat>.
746 $_->select('.name')->replace_content('Matt')
747 ->select('.age')->replace_content('26')
750 $_->select('.name')->replace_content('Mark')
751 ->select('.age')->replace_content('0x29')
754 $_->select('.name')->replace_content('Epitaph')
755 ->select('.age')->replace_content('<redacted>')
758 { repeat_between => '.between' }
768 See L<HTML::Zoom> for authors.
772 See L<HTML::Zoom> for the license.