1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
49 die "WARNING: Long form arg (name => 'class', value => 'x') is deprecated"
50 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
51 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
52 return ($name, $self->_zconfig->parser->html_escape($value));
56 die "renamed to add_to_attribute. killing this entirely for 1.0";
59 sub add_class { shift->add_to_attribute('class',@_) }
61 sub remove_class { shift->remove_attribute('class',@_) }
63 sub set_class { shift->set_attribute('class',@_) }
65 sub set_id { shift->set_attribute('id',@_) }
67 sub add_to_attribute {
69 my ($name, $value) = $self->_parse_attribute_args(@_);
71 my $a = (my $evt = $_[0])->{attrs};
72 my $e = exists $a->{$name};
73 +{ %$evt, raw => undef, raw_attrs => undef,
76 $name => join(' ', ($e ? $a->{$name} : ()), $value)
78 ($e # add to name list if not present
80 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
85 sub remove_attribute {
86 my ($self, $args) = @_;
87 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
89 my $a = (my $evt = $_[0])->{attrs};
90 return $evt unless exists $a->{$name};
91 $a = { %$a }; delete $a->{$name};
92 +{ %$evt, raw => undef, raw_attrs => undef,
94 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
99 sub transform_attribute {
101 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
105 my %a = %{ $evt->{attrs} };
106 my @names = @{ $evt->{attr_names} };
108 my $existed_before = exists $a{$name};
109 my $v = $code->( $a{$name} );
110 my $deleted = $existed_before && ! defined $v;
111 my $added = ! $existed_before && defined $v;
118 @names = grep $_ ne $name, @names;
122 +{ %$evt, raw => undef, raw_attrs => undef,
125 ? (attr_names => \@names )
131 sub transform_content {
132 my ( $self, $code ) = @_;
134 my $replace = $self->replace_content(
136 $self->_stream_from_proto($code->($_));
140 my ( $evt, $stream ) = @_;
142 my $item_ref = $stream->next;
143 if ( $item_ref->{type} eq 'TEXT' ) {
144 local $_ = $item_ref->{raw};
145 return $replace->($evt, $stream);
148 return $self->_stream_concat($evt, $stream);
154 my ($self, $options) = @_;
155 my ($into, $passthrough, $content, $filter, $flush_before) =
156 @{$options}{qw(into passthrough content filter flush_before)};
158 my ($evt, $stream) = @_;
159 # We wipe the contents of @$into here so that other actions depending
160 # on this (such as a repeater) can be invoked multiple times easily.
161 # I -suspect- it's better for that state reset to be managed here; if it
162 # ever becomes painful the decision should be revisited
164 @$into = $content ? () : ($evt);
166 if ($evt->{is_in_place_close}) {
167 return $evt if $passthrough || $content;
170 my $name = $evt->{name};
172 my $_next = $content ? 'peek' : 'next';
175 $stream = do { local $_ = $stream; $filter->($stream) };
178 local $_ = $self->_stream_concat(
179 $self->_stream_from_array($evt),
184 $evt = $stream->next;
187 my $collector = $self->_stream_from_code(sub {
188 return unless $stream;
189 while (my ($evt) = $stream->$_next) {
190 $depth++ if ($evt->{type} eq 'OPEN');
191 $depth-- if ($evt->{type} eq 'CLOSE');
195 push(@$into, $evt) if $into;
196 return $evt if $passthrough;
199 push(@$into, $evt) if $into;
200 $stream->next if $content;
201 return $evt if $passthrough;
203 die "Never saw closing </${name}> before end of source";
206 if ($passthrough||$content) {
207 $evt = { %$evt, flush => 1 };
209 $evt = { type => 'EMPTY', flush => 1 };
212 return ($passthrough||$content||$flush_before)
213 ? [ $evt, $collector ]
218 sub collect_content {
219 my ($self, $options) = @_;
220 $self->collect({ %{$options||{}}, content => 1 })
224 my ($self, $events) = @_;
225 my $coll_proto = $self->collect({ passthrough => 1 });
227 my $emit = $self->_stream_from_proto($events);
228 my $coll = &$coll_proto;
230 if(ref $coll eq 'ARRAY') {
231 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
232 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
233 } elsif(ref $coll eq 'HASH') {
234 return [$emit, $coll];
236 return $self->_stream_concat($emit, $coll);
238 } else { return $emit }
243 my ($self, $events) = @_;
244 my $coll_proto = $self->collect({ passthrough => 1 });
247 my $emit = $self->_stream_from_proto($events);
248 my $coll = &$coll_proto;
249 return ref($coll) eq 'HASH' # single event, no collect
251 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
255 sub prepend_content {
256 my ($self, $events) = @_;
257 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
260 my $emit = $self->_stream_from_proto($events);
261 if ($evt->{is_in_place_close}) {
262 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
263 return [ $evt, $self->_stream_from_array(
264 $emit->next, { type => 'CLOSE', name => $evt->{name} }
267 my $coll = &$coll_proto;
268 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
273 my ($self, $events) = @_;
274 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
277 my $emit = $self->_stream_from_proto($events);
278 if ($evt->{is_in_place_close}) {
279 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
280 return [ $evt, $self->_stream_from_array(
281 $emit->next, { type => 'CLOSE', name => $evt->{name} }
284 my $coll = &$coll_proto;
285 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
290 my ($self, $replace_with, $options) = @_;
291 my $coll_proto = $self->collect($options);
293 my ($evt, $stream) = @_;
294 my $emit = $self->_stream_from_proto($replace_with);
295 my $coll = &$coll_proto;
296 # if we're replacing the contents of an in place close
297 # then we need to handle that here
298 if ($options->{content}
299 && ref($coll) eq 'HASH'
300 && $coll->{is_in_place_close}
302 my $close = $stream->next;
303 # shallow copy and nuke in place and raw (to force smart print)
304 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
305 $emit = $self->_stream_concat(
307 $self->_stream_from_array($close),
310 # For a straightforward replace operation we can, in fact, do the emit
311 # -before- the collect, and my first cut did so. However in order to
312 # use the captured content in generating the new content, we need
313 # the collect stage to happen first - and it seems highly unlikely
314 # that in normal operation the collect phase will take long enough
315 # for the difference to be noticeable
318 ? (ref $coll eq 'ARRAY' # [ event, stream ]
319 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
320 : (ref $coll eq 'HASH' # event or stream?
322 : $self->_stream_concat($coll, $emit))
329 sub replace_content {
330 my ($self, $replace_with, $options) = @_;
331 $self->replace($replace_with, { %{$options||{}}, content => 1 })
335 my ($self, $repeat_for, $options) = @_;
336 $options->{into} = \my @into;
338 my $repeat_between = delete $options->{repeat_between};
339 if ($repeat_between) {
340 $options->{filter} = sub {
341 $_->select($repeat_between)->collect({ into => \@between })
345 my $s = $self->_stream_from_proto($repeat_for);
346 # We have to test $repeat_between not @between here because
347 # at the point we're constructing our return stream @between
348 # hasn't been populated yet - but we can test @between in the
349 # map routine because it has been by then and that saves us doing
350 # the extra stream construction if we don't need it.
351 $self->_flatten_stream_of_streams(do {
352 if ($repeat_between) {
354 local $_ = $self->_stream_from_array(@into);
355 (@between && $s->peek)
356 ? $self->_stream_concat(
357 $_[0]->($_), $self->_stream_from_array(@between)
363 local $_ = $self->_stream_from_array(@into);
369 $self->replace($repeater, $options);
373 my ($self, $repeat_for, $options) = @_;
374 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
381 HTML::Zoom::FilterBuilder - Add Filters to a Stream
385 Create an L<HTML::Zoom> instance:
388 my $root = HTML::Zoom
392 <title>Default Title</title>
394 <body bad_attr='junk'>
400 Create a new attribute on the C<body> tag:
404 ->set_attribute(class=>'main');
406 Add a extra value to an existing attribute:
410 ->add_to_attribute(class=>'one-column');
412 Set the content of the C<title> tag:
416 ->replace_content('Hello World');
418 Set content from another L<HTML::Zoom> instance:
420 my $body = HTML::Zoom
424 <p id="p2">Is the Time</p>
430 ->replace_content($body);
432 Set an attribute on multiple matches:
436 ->set_attribute(class=>'para');
442 ->remove_attribute('bad_attr');
448 my $output = $root->to_html;
455 <title>Hello World</title>
457 <body class="main one-column"><div id="stuff">
458 <p class="para">Well Now</p>
459 <p id="p2" class="para">Is the Time</p>
467 is($output, $expect, 'Synopsis code works ok');
473 Given a L<HTML::Zoom> stream, provide methods to apply filters which
474 alter the content of that stream.
478 This class defines the following public API
482 Sets an attribute of a given name to a given value for all matching selections.
486 ->set_attribute(class=>'paragraph')
488 ->set_attribute({name=>'class', value=>'divider'});
491 Overrides existing values, if such exist. When multiple L</set_attribute>
492 calls are made against the same or overlapping selection sets, the final
495 =head2 add_to_attribute
497 Adds a value to an existing attribute, or creates one if the attribute does not
498 yet exist. You may call this method with either an Array or HashRef of Args.
500 Here's the 'long form' HashRef:
504 ->set_attribute(class=>'paragraph')
506 ->add_to_attribute({name=>'class', value=>'divider'});
508 And the exact same effect using the 'short form' Array:
512 ->set_attribute(class=>'paragraph')
514 ->add_to_attribute(class=>'divider');
516 Attributes with more than one value will have a dividing space.
518 =head2 remove_attribute
520 Removes an attribute and all its values.
524 ->set_attribute(class=>'paragraph')
526 ->remove_attribute('class');
528 Removes attributes from the original stream or events already added.
530 =head2 transform_attribute
532 Transforms (or creates or deletes) an attribute by running the passed
533 coderef on it. If the coderef returns nothing, the attribute is
538 ->transform_attribute( href => sub {
539 ( my $a = shift ) =~ s/localhost/example.com/;
546 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
547 optional common options as hash reference.
551 =item into [ARRAY REFERENCE]
553 Where to save collected events (selected elements).
555 $z1->select('#main-content')
556 ->collect({ into => \@body })
558 $z2->select('#main-content')
564 Run filter on collected elements (locally setting $_ to stream, and passing
565 stream as an argument to given code reference). Filtered stream would be
570 filter => sub { $_->select('.inner')->replace_content('bar!') },
574 It can be used to further filter selection. For example
578 filter => sub { $_->select('td') },
582 is equivalent to (not implemented yet) descendant selector combination, i.e.
586 =item passthrough [BOOLEAN]
588 Extract copy of elements; the stream is unchanged (it does not remove collected
589 elements). For example without 'passthrough'
591 HTML::Zoom->from_html('<foo><bar /></foo>')
593 ->collect({ content => 1 })
596 returns '<foo></foo>', while with C<passthrough> option
598 HTML::Zoom->from_html('<foo><bar /></foo>')
600 ->collect({ content => 1, passthough => 1 })
603 returns '<foo><bar /></foo>'.
605 =item content [BOOLEAN]
607 Collect content of the element, and not the element itself.
611 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
616 would return '<p>foo</p>', while
618 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
620 ->collect({ content => 1 })
623 would return '<h1></h1><p>foo</p>'.
625 See also L</collect_content>.
627 =item flush_before [BOOLEAN]
629 Generate C<flush> event before collecting, to ensure that the HTML generated up
630 to selected element being collected is flushed throught to the browser. Usually
631 used in L</repeat> or L</repeat_content>.
635 =head2 collect_content
637 Collects contents of L<HTML::Zoom/select> result.
639 HTML::Zoom->from_file($foo)
640 ->select('#main-content')
641 ->collect_content({ into => \@foo_body })
644 ->replace_content(\@foo_body)
647 Equivalent to running L</collect> with C<content> option set.
651 Given a L<HTML::Zoom/select> result, add given content (which might be string,
652 array or another L<HTML::Zoom> object) before it.
655 ->select('input[name="foo"]')
656 ->add_before(\ '<span class="warning">required field</span>');
660 Like L</add_before>, only after L<HTML::Zoom/select> result.
666 You can add zoom events directly
670 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
672 =head2 prepend_content
674 Similar to add_before, but adds the content to the match.
677 ->from_html(q[<p>World</p>])
679 ->prepend_content("Hello ")
682 ## <p>Hello World</p>
684 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
686 =head2 append_content
688 Similar to add_after, but adds the content to the match.
691 ->from_html(q[<p>Hello </p>])
693 ->prepend_content("World")
696 ## <p>Hello World</p>
698 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
702 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
703 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
704 (via hash reference).
706 =head2 replace_content
708 Given a L<HTML::Zoom/select> result, replace the content with a string, array
709 or another L<HTML::Zoom> object.
712 ->select('title, #greeting')
713 ->replace_content('Hello world!');
715 =head2 transform_content
717 Given a "select" in HTML::Zoom result, transform the content with a code
718 reference. This allows you for example to localize your template text
719 elements or doing anything else with the node's text content.
725 "please click on: $_"
731 For a given selection, repeat over transformations, typically for the purposes
732 of populating lists. Takes either an array of anonymous subroutines or a zoom-
733 able object consisting of transformation.
735 Example of array reference style (when it doesn't matter that all iterations are
738 $zoom->select('table')->repeat([
742 $_->select('td')->replace_content($e);
747 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
748 collected elements), and with said result passed as parameter to subroutine.
750 You might want to use CodeStream when you don't have all elements upfront
752 $zoom->select('.contents')->repeat(sub {
753 HTML::Zoom::CodeStream->new({
755 while (my $line = $fh->getline) {
757 $_->select('.lno')->replace_content($fh->input_line_number)
758 ->select('.line')->replace_content($line)
766 In addition to common options as in L</collect>, it also supports:
770 =item repeat_between [SELECTOR]
772 Selects object to be repeated between items. In the case of array this object
773 is put between elements, in case of iterator it is put between results of
774 subsequent iterations, in the case of streamable it is put between events
777 See documentation for L</repeat_content>
781 =head2 repeat_content
783 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
784 this result to this iterator. Accepts the same options as L</repeat>.
786 Equivalent to using C<contents> option with L</repeat>.
793 $_->select('.name')->replace_content('Matt')
794 ->select('.age')->replace_content('26')
797 $_->select('.name')->replace_content('Mark')
798 ->select('.age')->replace_content('0x29')
801 $_->select('.name')->replace_content('Epitaph')
802 ->select('.age')->replace_content('<redacted>')
805 { repeat_between => '.between' }
815 See L<HTML::Zoom> for authors.
819 See L<HTML::Zoom> for the license.