1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
27 sub set_attr { shift->set_attribute(@_); }
31 my ($name, $value) = $self->_parse_attribute_args(@_);
33 my $a = (my $evt = $_[0])->{attrs};
34 my $e = exists $a->{$name};
35 +{ %$evt, raw => undef, raw_attrs => undef,
36 attrs => { %$a, $name => $value },
37 ($e # add to name list if not present
39 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
44 sub _parse_attribute_args {
46 # allow ->add_to_attribute(name => 'value')
47 # or ->add_to_attribute({ name => 'name', value => 'value' })
48 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
49 return ($name, $self->_zconfig->parser->html_escape($value));
53 die "renamed to add_to_attribute. killing this entirely for 1.0";
56 sub add_class { shift->add_to_attribute('class',@_) }
58 sub remove_class { shift->remove_attribute('class',@_) }
60 sub set_class { shift->set_attribute('class',@_) }
62 sub set_id { shift->set_attribute('id',@_) }
64 sub add_to_attribute {
66 my ($name, $value) = $self->_parse_attribute_args(@_);
68 my $a = (my $evt = $_[0])->{attrs};
69 my $e = exists $a->{$name};
70 +{ %$evt, raw => undef, raw_attrs => undef,
73 $name => join(' ', ($e ? $a->{$name} : ()), $value)
75 ($e # add to name list if not present
77 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
82 sub remove_attribute {
83 my ($self, $args) = @_;
84 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
86 my $a = (my $evt = $_[0])->{attrs};
87 return $evt unless exists $a->{$name};
88 $a = { %$a }; delete $a->{$name};
89 +{ %$evt, raw => undef, raw_attrs => undef,
91 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
96 sub transform_attribute {
98 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
102 my %a = %{ $evt->{attrs} };
103 my @names = @{ $evt->{attr_names} };
105 my $existed_before = exists $a{$name};
106 my $v = $code->( $a{$name} );
107 my $deleted = $existed_before && ! defined $v;
108 my $added = ! $existed_before && defined $v;
115 @names = grep $_ ne $name, @names;
119 +{ %$evt, raw => undef, raw_attrs => undef,
122 ? (attr_names => \@names )
129 my ($self, $options) = @_;
130 my ($into, $passthrough, $content, $filter, $flush_before) =
131 @{$options}{qw(into passthrough content filter flush_before)};
133 my ($evt, $stream) = @_;
134 # We wipe the contents of @$into here so that other actions depending
135 # on this (such as a repeater) can be invoked multiple times easily.
136 # I -suspect- it's better for that state reset to be managed here; if it
137 # ever becomes painful the decision should be revisited
139 @$into = $content ? () : ($evt);
141 if ($evt->{is_in_place_close}) {
142 return $evt if $passthrough || $content;
145 my $name = $evt->{name};
147 my $_next = $content ? 'peek' : 'next';
150 $stream = do { local $_ = $stream; $filter->($stream) };
153 local $_ = $self->_stream_concat(
154 $self->_stream_from_array($evt),
159 $evt = $stream->next;
162 my $collector = $self->_stream_from_code(sub {
163 return unless $stream;
164 while (my ($evt) = $stream->$_next) {
165 $depth++ if ($evt->{type} eq 'OPEN');
166 $depth-- if ($evt->{type} eq 'CLOSE');
170 push(@$into, $evt) if $into;
171 return $evt if $passthrough;
174 push(@$into, $evt) if $into;
175 $stream->next if $content;
176 return $evt if $passthrough;
178 die "Never saw closing </${name}> before end of source";
181 if ($passthrough||$content) {
182 $evt = { %$evt, flush => 1 };
184 $evt = { type => 'EMPTY', flush => 1 };
187 return ($passthrough||$content||$flush_before)
188 ? [ $evt, $collector ]
193 sub collect_content {
194 my ($self, $options) = @_;
195 $self->collect({ %{$options||{}}, content => 1 })
199 my ($self, $events) = @_;
200 my $coll_proto = $self->collect({ passthrough => 1 });
202 my $emit = $self->_stream_from_proto($events);
203 my $coll = &$coll_proto;
205 if(ref $coll eq 'ARRAY') {
206 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
207 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
208 } elsif(ref $coll eq 'HASH') {
209 return [$emit, $coll];
211 return $self->_stream_concat($emit, $coll);
213 } else { return $emit }
218 my ($self, $events) = @_;
219 my $coll_proto = $self->collect({ passthrough => 1 });
222 my $emit = $self->_stream_from_proto($events);
223 my $coll = &$coll_proto;
224 return ref($coll) eq 'HASH' # single event, no collect
226 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
230 sub prepend_content {
231 my ($self, $events) = @_;
232 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
235 my $emit = $self->_stream_from_proto($events);
236 if ($evt->{is_in_place_close}) {
237 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
238 return [ $evt, $self->_stream_from_array(
239 $emit->next, { type => 'CLOSE', name => $evt->{name} }
242 my $coll = &$coll_proto;
243 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
248 my ($self, $events) = @_;
249 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
252 my $emit = $self->_stream_from_proto($events);
253 if ($evt->{is_in_place_close}) {
254 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
255 return [ $evt, $self->_stream_from_array(
256 $emit->next, { type => 'CLOSE', name => $evt->{name} }
259 my $coll = &$coll_proto;
260 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
265 my ($self, $replace_with, $options) = @_;
266 my $coll_proto = $self->collect($options);
268 my ($evt, $stream) = @_;
269 my $emit = $self->_stream_from_proto($replace_with);
270 my $coll = &$coll_proto;
271 # if we're replacing the contents of an in place close
272 # then we need to handle that here
273 if ($options->{content}
274 && ref($coll) eq 'HASH'
275 && $coll->{is_in_place_close}
277 my $close = $stream->next;
278 # shallow copy and nuke in place and raw (to force smart print)
279 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
280 $emit = $self->_stream_concat(
282 $self->_stream_from_array($close),
285 # For a straightforward replace operation we can, in fact, do the emit
286 # -before- the collect, and my first cut did so. However in order to
287 # use the captured content in generating the new content, we need
288 # the collect stage to happen first - and it seems highly unlikely
289 # that in normal operation the collect phase will take long enough
290 # for the difference to be noticeable
293 ? (ref $coll eq 'ARRAY' # [ event, stream ]
294 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
295 : (ref $coll eq 'HASH' # event or stream?
297 : $self->_stream_concat($coll, $emit))
304 sub replace_content {
305 my ($self, $replace_with, $options) = @_;
306 $self->replace($replace_with, { %{$options||{}}, content => 1 })
310 my ($self, $repeat_for, $options) = @_;
311 $options->{into} = \my @into;
313 my $repeat_between = delete $options->{repeat_between};
314 if ($repeat_between) {
315 $options->{filter} = sub {
316 $_->select($repeat_between)->collect({ into => \@between })
320 my $s = $self->_stream_from_proto($repeat_for);
321 # We have to test $repeat_between not @between here because
322 # at the point we're constructing our return stream @between
323 # hasn't been populated yet - but we can test @between in the
324 # map routine because it has been by then and that saves us doing
325 # the extra stream construction if we don't need it.
326 $self->_flatten_stream_of_streams(do {
327 if ($repeat_between) {
329 local $_ = $self->_stream_from_array(@into);
330 (@between && $s->peek)
331 ? $self->_stream_concat(
332 $_[0]->($_), $self->_stream_from_array(@between)
338 local $_ = $self->_stream_from_array(@into);
344 $self->replace($repeater, $options);
348 my ($self, $repeat_for, $options) = @_;
349 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
356 HTML::Zoom::FilterBuilder - Add Filters to a Stream
360 Create an L<HTML::Zoom> instance:
363 my $root = HTML::Zoom
367 <title>Default Title</title>
369 <body bad_attr='junk'>
375 Create a new attribute on the C<body> tag:
379 ->set_attribute(class=>'main');
381 Add a extra value to an existing attribute:
385 ->add_to_attribute(class=>'one-column');
387 Set the content of the C<title> tag:
391 ->replace_content('Hello World');
393 Set content from another L<HTML::Zoom> instance:
395 my $body = HTML::Zoom
399 <p id="p2">Is the Time</p>
405 ->replace_content($body);
407 Set an attribute on multiple matches:
411 ->set_attribute(class=>'para');
417 ->remove_attribute('bad_attr');
423 my $output = $root->to_html;
430 <title>Hello World</title>
432 <body class="main one-column"><div id="stuff">
433 <p class="para">Well Now</p>
434 <p id="p2" class="para">Is the Time</p>
442 is($output, $expect, 'Synopsis code works ok');
448 Given a L<HTML::Zoom> stream, provide methods to apply filters which
449 alter the content of that stream.
453 This class defines the following public API
457 Sets an attribute of a given name to a given value for all matching selections.
461 ->set_attribute(class=>'paragraph')
463 ->set_attribute({name=>'class', value=>'divider'});
466 Overrides existing values, if such exist. When multiple L</set_attribute>
467 calls are made against the same or overlapping selection sets, the final
470 =head2 add_to_attribute
472 Adds a value to an existing attribute, or creates one if the attribute does not
473 yet exist. You may call this method with either an Array or HashRef of Args.
475 Here's the 'long form' HashRef:
479 ->set_attribute(class=>'paragraph')
481 ->add_to_attribute({name=>'class', value=>'divider'});
483 And the exact same effect using the 'short form' Array:
487 ->set_attribute(class=>'paragraph')
489 ->add_to_attribute(class=>'divider');
491 Attributes with more than one value will have a dividing space.
493 =head2 remove_attribute
495 Removes an attribute and all its values.
499 ->set_attribute(class=>'paragraph')
501 ->remove_attribute('class');
503 Removes attributes from the original stream or events already added.
505 =head2 transform_attribute
507 Transforms (or creates or deletes) an attribute by running the passed
508 coderef on it. If the coderef returns nothing, the attribute is
513 ->transform_attribute( href => sub {
514 ( my $a = shift ) =~ s/localhost/example.com/;
521 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
522 optional common options as hash reference.
526 =item into [ARRAY REFERENCE]
528 Where to save collected events (selected elements).
530 $z1->select('#main-content')
531 ->collect({ into => \@body })
533 $z2->select('#main-content')
539 Run filter on collected elements (locally setting $_ to stream, and passing
540 stream as an argument to given code reference). Filtered stream would be
545 filter => sub { $_->select('.inner')->replace_content('bar!') },
549 It can be used to further filter selection. For example
553 filter => sub { $_->select('td') },
557 is equivalent to (not implemented yet) descendant selector combination, i.e.
561 =item passthrough [BOOLEAN]
563 Extract copy of elements; the stream is unchanged (it does not remove collected
564 elements). For example without 'passthrough'
566 HTML::Zoom->from_html('<foo><bar /></foo>')
568 ->collect({ content => 1 })
571 returns '<foo></foo>', while with C<passthrough> option
573 HTML::Zoom->from_html('<foo><bar /></foo>')
575 ->collect({ content => 1, passthough => 1 })
578 returns '<foo><bar /></foo>'.
580 =item content [BOOLEAN]
582 Collect content of the element, and not the element itself.
586 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
591 would return '<p>foo</p>', while
593 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
595 ->collect({ content => 1 })
598 would return '<h1></h1><p>foo</p>'.
600 See also L</collect_content>.
602 =item flush_before [BOOLEAN]
604 Generate C<flush> event before collecting, to ensure that the HTML generated up
605 to selected element being collected is flushed throught to the browser. Usually
606 used in L</repeat> or L</repeat_content>.
610 =head2 collect_content
612 Collects contents of L<HTML::Zoom/select> result.
614 HTML::Zoom->from_file($foo)
615 ->select('#main-content')
616 ->collect_content({ into => \@foo_body })
619 ->replace_content(\@foo_body)
622 Equivalent to running L</collect> with C<content> option set.
626 Given a L<HTML::Zoom/select> result, add given content (which might be string,
627 array or another L<HTML::Zoom> object) before it.
630 ->select('input[name="foo"]')
631 ->add_before(\ '<span class="warning">required field</span>');
635 Like L</add_before>, only after L<HTML::Zoom/select> result.
641 You can add zoom events directly
645 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
647 =head2 prepend_content
649 Similar to add_before, but adds the content to the match.
652 ->from_html(q[<p>World</p>])
654 ->prepend_content("Hello ")
657 ## <p>Hello World</p>
659 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
661 =head2 append_content
663 Similar to add_after, but adds the content to the match.
666 ->from_html(q[<p>Hello </p>])
668 ->prepend_content("World")
671 ## <p>Hello World</p>
673 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
677 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
678 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
679 (via hash reference).
681 =head2 replace_content
683 Given a L<HTML::Zoom/select> result, replace the content with a string, array
684 or another L<HTML::Zoom> object.
687 ->select('title, #greeting')
688 ->replace_content('Hello world!');
692 For a given selection, repeat over transformations, typically for the purposes
693 of populating lists. Takes either an array of anonymous subroutines or a zoom-
694 able object consisting of transformation.
696 Example of array reference style (when it doesn't matter that all iterations are
699 $zoom->select('table')->repeat([
703 $_->select('td')->replace_content($e);
708 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
709 collected elements), and with said result passed as parameter to subroutine.
711 You might want to use CodeStream when you don't have all elements upfront
713 $zoom->select('.contents')->repeat(sub {
714 HTML::Zoom::CodeStream->new({
716 while (my $line = $fh->getline) {
718 $_->select('.lno')->replace_content($fh->input_line_number)
719 ->select('.line')->replace_content($line)
727 In addition to common options as in L</collect>, it also supports:
731 =item repeat_between [SELECTOR]
733 Selects object to be repeated between items. In the case of array this object
734 is put between elements, in case of iterator it is put between results of
735 subsequent iterations, in the case of streamable it is put between events
738 See documentation for L</repeat_content>
742 =head2 repeat_content
744 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
745 this result to this iterator. Accepts the same options as L</repeat>.
747 Equivalent to using C<contents> option with L</repeat>.
754 $_->select('.name')->replace_content('Matt')
755 ->select('.age')->replace_content('26')
758 $_->select('.name')->replace_content('Mark')
759 ->select('.age')->replace_content('0x29')
762 $_->select('.name')->replace_content('Epitaph')
763 ->select('.age')->replace_content('<redacted>')
766 { repeat_between => '.between' }
776 See L<HTML::Zoom> for authors.
780 See L<HTML::Zoom> for the license.