1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
29 my ($name, $value) = $self->_parse_attribute_args(@_);
31 my $a = (my $evt = $_[0])->{attrs};
32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
42 sub _parse_attribute_args {
44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
51 die "renamed to add_to_attribute. killing this entirely for 1.0";
54 sub add_to_attribute {
56 my ($name, $value) = $self->_parse_attribute_args(@_);
58 my $a = (my $evt = $_[0])->{attrs};
59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
65 ($e # add to name list if not present
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
72 sub remove_attribute {
73 my ($self, $args) = @_;
74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
76 my $a = (my $evt = $_[0])->{attrs};
77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
87 my ($self, $options) = @_;
88 my ($into, $passthrough, $content, $filter, $flush_before) =
89 @{$options}{qw(into passthrough content filter flush_before)};
91 my ($evt, $stream) = @_;
92 # We wipe the contents of @$into here so that other actions depending
93 # on this (such as a repeater) can be invoked multiple times easily.
94 # I -suspect- it's better for that state reset to be managed here; if it
95 # ever becomes painful the decision should be revisited
97 @$into = $content ? () : ($evt);
99 if ($evt->{is_in_place_close}) {
100 return $evt if $passthrough || $content;
103 my $name = $evt->{name};
105 my $_next = $content ? 'peek' : 'next';
108 $stream = do { local $_ = $stream; $filter->($stream) };
111 local $_ = $self->_stream_concat(
112 $self->_stream_from_array($evt),
117 $evt = $stream->next;
120 my $collector = $self->_stream_from_code(sub {
121 return unless $stream;
122 while (my ($evt) = $stream->$_next) {
123 $depth++ if ($evt->{type} eq 'OPEN');
124 $depth-- if ($evt->{type} eq 'CLOSE');
128 push(@$into, $evt) if $into;
129 return $evt if $passthrough;
132 push(@$into, $evt) if $into;
133 $stream->next if $content;
134 return $evt if $passthrough;
136 die "Never saw closing </${name}> before end of source";
139 if ($passthrough||$content) {
140 $evt = { %$evt, flush => 1 };
142 $evt = { type => 'EMPTY', flush => 1 };
145 return ($passthrough||$content||$flush_before)
146 ? [ $evt, $collector ]
151 sub collect_content {
152 my ($self, $options) = @_;
153 $self->collect({ %{$options||{}}, content => 1 })
157 my ($self, $events) = @_;
158 my $coll_proto = $self->collect({ passthrough => 1 });
160 my $emit = $self->_stream_from_proto($events);
161 my $coll = &$coll_proto;
163 if(ref $coll eq 'ARRAY') {
164 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
165 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
166 } elsif(ref $coll eq 'HASH') {
167 return [$emit, $coll];
169 return $self->_stream_concat($emit, $coll);
171 } else { return $emit }
176 my ($self, $events) = @_;
177 my $coll_proto = $self->collect({ passthrough => 1 });
180 my $emit = $self->_stream_from_proto($events);
181 my $coll = &$coll_proto;
182 return ref($coll) eq 'HASH' # single event, no collect
184 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
189 #$emit = $self->_stream_concat(
191 # $self->_stream_from_array($close),
194 sub prepend_content {
195 my ($self, $events) = @_;
196 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
199 my $emit = $self->_stream_from_proto($events);
200 if ($evt->{is_in_place_close}) {
201 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
202 return [ $evt, $self->_stream_from_array(
203 $emit->next, { type => 'CLOSE', name => $evt->{name} }
206 my $coll = &$coll_proto;
207 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
212 my ($self, $events) = @_;
213 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
216 my $emit = $self->_stream_from_proto($events);
217 if ($evt->{is_in_place_close}) {
218 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
219 return [ $evt, $self->_stream_from_array(
220 $emit->next, { type => 'CLOSE', name => $evt->{name} }
223 my $coll = &$coll_proto;
224 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
229 my ($self, $replace_with, $options) = @_;
230 my $coll_proto = $self->collect($options);
232 my ($evt, $stream) = @_;
233 my $emit = $self->_stream_from_proto($replace_with);
234 my $coll = &$coll_proto;
235 # if we're replacing the contents of an in place close
236 # then we need to handle that here
237 if ($options->{content}
238 && ref($coll) eq 'HASH'
239 && $coll->{is_in_place_close}
241 my $close = $stream->next;
242 # shallow copy and nuke in place and raw (to force smart print)
243 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
244 $emit = $self->_stream_concat(
246 $self->_stream_from_array($close),
249 # For a straightforward replace operation we can, in fact, do the emit
250 # -before- the collect, and my first cut did so. However in order to
251 # use the captured content in generating the new content, we need
252 # the collect stage to happen first - and it seems highly unlikely
253 # that in normal operation the collect phase will take long enough
254 # for the difference to be noticeable
257 ? (ref $coll eq 'ARRAY' # [ event, stream ]
258 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
259 : (ref $coll eq 'HASH' # event or stream?
261 : $self->_stream_concat($coll, $emit))
268 sub replace_content {
269 my ($self, $replace_with, $options) = @_;
270 $self->replace($replace_with, { %{$options||{}}, content => 1 })
274 my ($self, $repeat_for, $options) = @_;
275 $options->{into} = \my @into;
277 my $repeat_between = delete $options->{repeat_between};
278 if ($repeat_between) {
279 $options->{filter} = sub {
280 $_->select($repeat_between)->collect({ into => \@between })
284 my $s = $self->_stream_from_proto($repeat_for);
285 # We have to test $repeat_between not @between here because
286 # at the point we're constructing our return stream @between
287 # hasn't been populated yet - but we can test @between in the
288 # map routine because it has been by then and that saves us doing
289 # the extra stream construction if we don't need it.
290 $self->_flatten_stream_of_streams(do {
291 if ($repeat_between) {
293 local $_ = $self->_stream_from_array(@into);
294 (@between && $s->peek)
295 ? $self->_stream_concat(
296 $_[0]->($_), $self->_stream_from_array(@between)
302 local $_ = $self->_stream_from_array(@into);
308 $self->replace($repeater, $options);
312 my ($self, $repeat_for, $options) = @_;
313 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
317 my ($self, $selector) = @_;
318 return HTML::Zoom::TransformBuilder->new({
319 zconfig => $self->_zconfig,
320 selector => $selector,
330 HTML::Zoom::FilterBuilder - Add Filters to a Stream
334 Create an L<HTML::Zoom> instance:
337 my $root = HTML::Zoom
341 <title>Default Title</title>
343 <body bad_attr='junk'>
349 Create a new attribute on the C<body> tag:
353 ->set_attribute(class=>'main');
355 Add a extra value to an existing attribute:
359 ->add_to_attribute(class=>'one-column');
361 Set the content of the C<title> tag:
365 ->replace_content('Hello World');
367 Set content from another L<HTML::Zoom> instance:
369 my $body = HTML::Zoom
373 <p id="p2">Is the Time</p>
379 ->replace_content($body);
381 Set an attribute on multiple matches:
385 ->set_attribute(class=>'para');
391 ->remove_attribute('bad_attr');
397 my $output = $root->to_html;
404 <title>Hello World</title>
406 <body class="main one-column"><div id="stuff">
407 <p class="para">Well Now</p>
408 <p id="p2" class="para">Is the Time</p>
416 is($output, $expect, 'Synopsis code works ok');
422 Given a L<HTML::Zoom> stream, provide methods to apply filters which
423 alter the content of that stream.
427 This class defines the following public API
431 Sets an attribute of a given name to a given value for all matching selections.
435 ->set_attribute(class=>'paragraph')
437 ->set_attribute({name=>'class', value=>'divider'});
440 Overrides existing values, if such exist. When multiple L</set_attribute>
441 calls are made against the same or overlapping selection sets, the final
444 =head2 add_to_attribute
446 Adds a value to an existing attribute, or creates one if the attribute does not
447 yet exist. You may call this method with either an Array or HashRef of Args.
449 Here's the 'long form' HashRef:
453 ->set_attribute(class=>'paragraph')
455 ->add_to_attribute({name=>'class', value=>'divider'});
457 And the exact same effect using the 'short form' Array:
461 ->set_attribute(class=>'paragraph')
463 ->add_to_attribute(class=>'divider');
465 Attributes with more than one value will have a dividing space.
467 =head2 remove_attribute
469 Removes an attribute and all its values.
473 ->set_attribute(class=>'paragraph')
475 ->remove_attribute('class');
477 Removes attributes from the original stream or events already added.
481 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
482 optional common options as hash reference.
486 =item into [ARRAY REFERENCE]
488 Where to save collected events (selected elements).
490 $z1->select('#main-content')
491 ->collect({ into => \@body })
493 $z2->select('#main-content')
499 Run filter on collected elements (locally setting $_ to stream, and passing
500 stream as an argument to given code reference). Filtered stream would be
505 filter => sub { $_->select('.inner')->replace_content('bar!') },
509 It can be used to further filter selection. For example
513 filter => sub { $_->select('td') },
517 is equivalent to (not implemented yet) descendant selector combination, i.e.
521 =item passthrough [BOOLEAN]
523 Extract copy of elements; the stream is unchanged (it does not remove collected
524 elements). For example without 'passthrough'
526 HTML::Zoom->from_html('<foo><bar /></foo>')
528 ->collect({ content => 1 })
531 returns '<foo></foo>', while with C<passthrough> option
533 HTML::Zoom->from_html('<foo><bar /></foo>')
535 ->collect({ content => 1, passthough => 1 })
538 returns '<foo><bar /></foo>'.
540 =item content [BOOLEAN]
542 Collect content of the element, and not the element itself.
546 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
551 would return '<p>foo</p>', while
553 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
555 ->collect({ content => 1 })
558 would return '<h1></h1><p>foo</p>'.
560 See also L</collect_content>.
562 =item flush_before [BOOLEAN]
564 Generate C<flush> event before collecting, to ensure that the HTML generated up
565 to selected element being collected is flushed throught to the browser. Usually
566 used in L</repeat> or L</repeat_content>.
570 =head2 collect_content
572 Collects contents of L<HTML::Zoom/select> result.
574 HTML::Zoom->from_file($foo)
575 ->select('#main-content')
576 ->collect_content({ into => \@foo_body })
579 ->replace_content(\@foo_body)
582 Equivalent to running L</collect> with C<content> option set.
586 Given a L<HTML::Zoom/select> result, add given content (which might be string,
587 array or another L<HTML::Zoom> object) before it.
590 ->select('input[name="foo"]')
591 ->add_before(\ '<span class="warning">required field</span>');
595 Like L</add_before>, only after L<HTML::Zoom/select> result.
601 You can add zoom events directly
605 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
607 =head2 prepend_content
609 Similar to add_before, but adds the content to the match.
612 ->from_html(q[<p>World</p>])
614 ->prepend_content("Hello ")
617 ## <p>Hello World</p>
619 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
621 =head2 append_content
623 Similar to add_after, but adds the content to the match.
626 ->from_html(q[<p>Hello </p>])
628 ->prepend_content("World")
631 ## <p>Hello World</p>
633 Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
637 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
638 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
639 (via hash reference).
641 =head2 replace_content
643 Given a L<HTML::Zoom/select> result, replace the content with a string, array
644 or another L<HTML::Zoom> object.
647 ->select('title, #greeting')
648 ->replace_content('Hello world!');
652 For a given selection, repeat over transformations, typically for the purposes
653 of populating lists. Takes either an array of anonymous subroutines or a zoom-
654 able object consisting of transformation.
656 Example of array reference style (when it doesn't matter that all iterations are
659 $zoom->select('table')->repeat([
663 $_->select('td')->replace_content($e);
668 Subroutines would be run with $_ localized to result of L<HTML::Zoom/select> (of
669 collected elements), and with said result passed as parameter to subroutine.
671 You might want to use CodeStream when you don't have all elements upfront
673 $zoom->select('.contents')->repeat(sub {
674 HTML::Zoom::CodeStream->new({
676 while (my $line = $fh->getline) {
678 $_->select('.lno')->replace_content($fh->input_line_number)
679 ->select('.line')->replace_content($line)
687 In addition to common options as in L</collect>, it also supports:
691 =item repeat_between [SELECTOR]
693 Selects object to be repeated between items. In the case of array this object
694 is put between elements, in case of iterator it is put between results of
695 subsequent iterations, in the case of streamable it is put between events
698 See documentation for L</repeat_content>
702 =head2 repeat_content
704 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
705 this result to this iterator. Accepts the same options as L</repeat>.
707 Equivalent to using C<contents> option with L</repeat>.
714 $_->select('.name')->replace_content('Matt')
715 ->select('.age')->replace_content('26')
718 $_->select('.name')->replace_content('Mark')
719 ->select('.age')->replace_content('0x29')
722 $_->select('.name')->replace_content('Epitaph')
723 ->select('.age')->replace_content('<redacted>')
726 { repeat_between => '.between' }
736 See L<HTML::Zoom> for authors.
740 See L<HTML::Zoom> for the license.