1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
49 die "WARNING: Long form arg (name => 'class', value => 'x') is deprecated"
50 if(@_ == 1 && $_[0]->{'name'} && $_[0]->{'value'});
51 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
52 return ($name, $self->_zconfig->parser->html_escape($value));
56 die "renamed to add_to_attribute. killing this entirely for 1.0";
59 sub add_class { shift->add_to_attribute('class',@_) }
61 sub remove_class { shift->remove_from_attribute('class',@_) }
63 sub set_class { shift->set_attribute('class',@_) }
65 sub set_id { shift->set_attribute('id',@_) }
67 sub add_to_attribute {
69 my ($name, $value) = $self->_parse_attribute_args(@_);
71 my $a = (my $evt = $_[0])->{attrs};
72 my $e = exists $a->{$name};
73 +{ %$evt, raw => undef, raw_attrs => undef,
76 $name => join(' ', ($e ? $a->{$name} : ()), $value)
78 ($e # add to name list if not present
80 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
85 sub remove_from_attribute {
87 my $attr = $self->_parse_attribute_args(@_);
89 my $a = (my $evt = $_[0])->{attrs};
90 +{ %$evt, raw => undef, raw_attrs => undef,
93 #TODO needs to support multiple removes
94 map { my $tar = $_; $_ => join ' ',
95 map {$attr->{$tar} ne $_} split ' ', $a->{$_} }
96 grep {exists $a->{$_}} keys %$attr
102 sub remove_attribute {
103 my ($self, $args) = @_;
104 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
106 my $a = (my $evt = $_[0])->{attrs};
107 return $evt unless exists $a->{$name};
108 $a = { %$a }; delete $a->{$name};
109 +{ %$evt, raw => undef, raw_attrs => undef,
111 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
116 sub transform_attribute {
118 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
122 my %a = %{ $evt->{attrs} };
123 my @names = @{ $evt->{attr_names} };
125 my $existed_before = exists $a{$name};
126 my $v = $code->( $a{$name} );
127 my $deleted = $existed_before && ! defined $v;
128 my $added = ! $existed_before && defined $v;
135 @names = grep $_ ne $name, @names;
139 +{ %$evt, raw => undef, raw_attrs => undef,
142 ? (attr_names => \@names )
149 my ($self, $options) = @_;
150 my ($into, $passthrough, $content, $filter, $flush_before) =
151 @{$options}{qw(into passthrough content filter flush_before)};
153 my ($evt, $stream) = @_;
154 # We wipe the contents of @$into here so that other actions depending
155 # on this (such as a repeater) can be invoked multiple times easily.
156 # I -suspect- it's better for that state reset to be managed here; if it
157 # ever becomes painful the decision should be revisited
159 @$into = $content ? () : ($evt);
161 if ($evt->{is_in_place_close}) {
162 return $evt if $passthrough || $content;
165 my $name = $evt->{name};
167 my $_next = $content ? 'peek' : 'next';
170 $stream = do { local $_ = $stream; $filter->($stream) };
173 local $_ = $self->_stream_concat(
174 $self->_stream_from_array($evt),
179 $evt = $stream->next;
182 my $collector = $self->_stream_from_code(sub {
183 return unless $stream;
184 while (my ($evt) = $stream->$_next) {
185 $depth++ if ($evt->{type} eq 'OPEN');
186 $depth-- if ($evt->{type} eq 'CLOSE');
190 push(@$into, $evt) if $into;
191 return $evt if $passthrough;
194 push(@$into, $evt) if $into;
195 $stream->next if $content;
196 return $evt if $passthrough;
198 die "Never saw closing </${name}> before end of source";
201 if ($passthrough||$content) {
202 $evt = { %$evt, flush => 1 };
204 $evt = { type => 'EMPTY', flush => 1 };
207 return ($passthrough||$content||$flush_before)
208 ? [ $evt, $collector ]
213 sub collect_content {
214 my ($self, $options) = @_;
215 $self->collect({ %{$options||{}}, content => 1 })
219 my ($self, $events) = @_;
220 my $coll_proto = $self->collect({ passthrough => 1 });
222 my $emit = $self->_stream_from_proto($events);
223 my $coll = &$coll_proto;
225 if(ref $coll eq 'ARRAY') {
226 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
227 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
228 } elsif(ref $coll eq 'HASH') {
229 return [$emit, $coll];
231 return $self->_stream_concat($emit, $coll);
233 } else { return $emit }
238 my ($self, $events) = @_;
239 my $coll_proto = $self->collect({ passthrough => 1 });
242 my $emit = $self->_stream_from_proto($events);
243 my $coll = &$coll_proto;
244 return ref($coll) eq 'HASH' # single event, no collect
246 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
250 sub prepend_content {
251 my ($self, $events) = @_;
252 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
255 my $emit = $self->_stream_from_proto($events);
256 if ($evt->{is_in_place_close}) {
257 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
258 return [ $evt, $self->_stream_from_array(
259 $emit->next, { type => 'CLOSE', name => $evt->{name} }
262 my $coll = &$coll_proto;
263 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
268 my ($self, $events) = @_;
269 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
272 my $emit = $self->_stream_from_proto($events);
273 if ($evt->{is_in_place_close}) {
274 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
275 return [ $evt, $self->_stream_from_array(
276 $emit->next, { type => 'CLOSE', name => $evt->{name} }
279 my $coll = &$coll_proto;
280 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
285 my ($self, $replace_with, $options) = @_;
286 my $coll_proto = $self->collect($options);
288 my ($evt, $stream) = @_;
289 my $emit = $self->_stream_from_proto($replace_with);
290 my $coll = &$coll_proto;
291 # if we're replacing the contents of an in place close
292 # then we need to handle that here
293 if ($options->{content}
294 && ref($coll) eq 'HASH'
295 && $coll->{is_in_place_close}
297 my $close = $stream->next;
298 # shallow copy and nuke in place and raw (to force smart print)
299 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
300 $emit = $self->_stream_concat(
302 $self->_stream_from_array($close),
305 # For a straightforward replace operation we can, in fact, do the emit
306 # -before- the collect, and my first cut did so. However in order to
307 # use the captured content in generating the new content, we need
308 # the collect stage to happen first - and it seems highly unlikely
309 # that in normal operation the collect phase will take long enough
310 # for the difference to be noticeable
313 ? (ref $coll eq 'ARRAY' # [ event, stream ]
314 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
315 : (ref $coll eq 'HASH' # event or stream?
317 : $self->_stream_concat($coll, $emit))
324 sub replace_content {
325 my ($self, $replace_with, $options) = @_;
326 $self->replace($replace_with, { %{$options||{}}, content => 1 })
330 my ($self, $repeat_for, $options) = @_;
331 $options->{into} = \my @into;
333 my $repeat_between = delete $options->{repeat_between};
334 if ($repeat_between) {
335 $options->{filter} = sub {
336 $_->select($repeat_between)->collect({ into => \@between })
340 my $s = $self->_stream_from_proto($repeat_for);
341 # We have to test $repeat_between not @between here because
342 # at the point we're constructing our return stream @between
343 # hasn't been populated yet - but we can test @between in the
344 # map routine because it has been by then and that saves us doing
345 # the extra stream construction if we don't need it.
346 $self->_flatten_stream_of_streams(do {
347 if ($repeat_between) {
349 local $_ = $self->_stream_from_array(@into);
350 (@between && $s->peek)
351 ? $self->_stream_concat(
352 $_[0]->($_), $self->_stream_from_array(@between)
358 local $_ = $self->_stream_from_array(@into);
364 $self->replace($repeater, $options);
368 my ($self, $repeat_for, $options) = @_;
369 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
376 HTML::Zoom::FilterBuilder - Add Filters to a Stream
380 Create an L<HTML::Zoom> instance:
383 my $root = HTML::Zoom
387 <title>Default Title</title>
389 <body bad_attr='junk'>
395 Create a new attribute on the C<body> tag:
399 ->set_attribute(class=>'main');
401 Add a extra value to an existing attribute:
405 ->add_to_attribute(class=>'one-column');
407 Set the content of the C<title> tag:
411 ->replace_content('Hello World');
413 Set content from another L<HTML::Zoom> instance:
415 my $body = HTML::Zoom
419 <p id="p2">Is the Time</p>
425 ->replace_content($body);
427 Set an attribute on multiple matches:
431 ->set_attribute(class=>'para');
437 ->remove_attribute('bad_attr');
443 my $output = $root->to_html;
450 <title>Hello World</title>
452 <body class="main one-column"><div id="stuff">
453 <p class="para">Well Now</p>
454 <p id="p2" class="para">Is the Time</p>
462 is($output, $expect, 'Synopsis code works ok');
468 Given a L<HTML::Zoom> stream, provide methods to apply filters which
469 alter the content of that stream.
473 This class defines the following public API
477 Sets an attribute of a given name to a given value for all matching selections.
481 ->set_attribute(class=>'paragraph')
483 ->set_attribute({class=>'paragraph', name=>'divider'});
485 Overrides existing values, if such exist. When multiple L</set_attribute>
486 calls are made against the same or overlapping selection sets, the final
489 =head2 add_to_attribute
491 Adds a value to an existing attribute, or creates one if the attribute does not
492 yet exist. You may call this method with either an Array or HashRef of Args.
496 ->set_attribute({class => 'paragraph', name => 'test'})
498 ->add_to_attribute(class=>'divider');
500 Attributes with more than one value will have a dividing space.
502 =head2 remove_attribute
504 Removes an attribute and all its values.
508 ->set_attribute(class=>'paragraph')
510 ->remove_attribute('class');
512 =head2 remove_from_attribute
514 Removes a value from existing attribute
518 ->set_attribute(class=>'paragraph lead')
520 ->remove_from_attribute('class' => 'lead');
522 Removes attributes from the original stream or events already added.
526 Add to a class attribute
530 Remove from a class attribute
532 =head2 transform_attribute
534 Transforms (or creates or deletes) an attribute by running the passed
535 coderef on it. If the coderef returns nothing, the attribute is
540 ->transform_attribute( href => sub {
541 ( my $a = shift ) =~ s/localhost/example.com/;
548 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
549 optional common options as hash reference.
553 =item into [ARRAY REFERENCE]
555 Where to save collected events (selected elements).
557 $z1->select('#main-content')
558 ->collect({ into => \@body })
560 $z2->select('#main-content')
566 Run filter on collected elements (locally setting $_ to stream, and passing
567 stream as an argument to given code reference). Filtered stream would be
572 filter => sub { $_->select('.inner')->replace_content('bar!') },
576 It can be used to further filter selection. For example
580 filter => sub { $_->select('td') },
584 is equivalent to (not implemented yet) descendant selector combination, i.e.
588 =item passthrough [BOOLEAN]
590 Extract copy of elements; the stream is unchanged (it does not remove collected
591 elements). For example without 'passthrough'
593 HTML::Zoom->from_html('<foo><bar /></foo>')
595 ->collect({ content => 1 })
598 returns '<foo></foo>', while with C<passthrough> option
600 HTML::Zoom->from_html('<foo><bar /></foo>')
602 ->collect({ content => 1, passthough => 1 })
605 returns '<foo><bar /></foo>'.
607 =item content [BOOLEAN]
609 Collect content of the element, and not the element itself.
613 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
618 would return '<p>foo</p>', while
620 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
622 ->collect({ content => 1 })
625 would return '<h1></h1><p>foo</p>'.
627 See also L</collect_content>.
629 =item flush_before [BOOLEAN]
631 Generate C<flush> event before collecting, to ensure that the HTML generated up
632 to selected element being collected is flushed throught to the browser. Usually
633 used in L</repeat> or L</repeat_content>.
637 =head2 collect_content
639 Collects contents of L<HTML::Zoom/select> result.
641 HTML::Zoom->from_file($foo)
642 ->select('#main-content')
643 ->collect_content({ into => \@foo_body })
646 ->replace_content(\@foo_body)
649 Equivalent to running L</collect> with C<content> option set.
653 Given a L<HTML::Zoom/select> result, add given content (which might be string,
654 array or another L<HTML::Zoom> object) before it.
657 ->select('input[name="foo"]')
658 ->add_before(\ '<span class="warning">required field</span>');
662 Like L</add_before>, only after L<HTML::Zoom/select> result.
668 You can add zoom events directly
672 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
674 =head2 prepend_content
676 Similar to add_before, but adds the content to the match.
679 ->from_html(q[<p>World</p>])
681 ->prepend_content("Hello ")
684 ## <p>Hello World</p>
686 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
688 =head2 append_content
690 Similar to add_after, but adds the content to the match.
693 ->from_html(q[<p>Hello </p>])
695 ->prepend_content("World")
698 ## <p>Hello World</p>
700 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
704 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
705 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
706 (via hash reference).
708 =head2 replace_content
710 Given a L<HTML::Zoom/select> result, replace the content with a string, array
711 or another L<HTML::Zoom> object.
714 ->select('title, #greeting')
715 ->replace_content('Hello world!');
719 For a given selection, repeat over transformations, typically for the purposes
720 of populating lists. Takes either an array of anonymous subroutines or a zoom-
721 able object consisting of transformation.
723 Example of array reference style (when it doesn't matter that all iterations are
726 $zoom->select('table')->repeat([
730 $_->select('td')->replace_content($e);
735 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
736 collected elements), and with said result passed as parameter to subroutine.
738 You might want to use CodeStream when you don't have all elements upfront
740 $zoom->select('.contents')->repeat(sub {
741 HTML::Zoom::CodeStream->new({
743 while (my $line = $fh->getline) {
745 $_->select('.lno')->replace_content($fh->input_line_number)
746 ->select('.line')->replace_content($line)
754 In addition to common options as in L</collect>, it also supports:
758 =item repeat_between [SELECTOR]
760 Selects object to be repeated between items. In the case of array this object
761 is put between elements, in case of iterator it is put between results of
762 subsequent iterations, in the case of streamable it is put between events
765 See documentation for L</repeat_content>
769 =head2 repeat_content
771 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
772 this result to this iterator. Accepts the same options as L</repeat>.
774 Equivalent to using C<contents> option with L</repeat>.
781 $_->select('.name')->replace_content('Matt')
782 ->select('.age')->replace_content('26')
785 $_->select('.name')->replace_content('Mark')
786 ->select('.age')->replace_content('0x29')
789 $_->select('.name')->replace_content('Epitaph')
790 ->select('.age')->replace_content('<redacted>')
793 { repeat_between => '.between' }
803 See L<HTML::Zoom> for authors.
807 See L<HTML::Zoom> for the license.