1 package HTML::Zoom::FilterBuilder;
4 use warnings FATAL => 'all';
5 use base qw(HTML::Zoom::SubObject);
6 use HTML::Zoom::CodeStream;
8 sub _stream_from_code {
9 shift->_zconfig->stream_utils->stream_from_code(@_)
12 sub _stream_from_array {
13 shift->_zconfig->stream_utils->stream_from_array(@_)
16 sub _stream_from_proto {
17 shift->_zconfig->stream_utils->stream_from_proto(@_)
21 shift->_zconfig->stream_utils->stream_concat(@_)
24 sub _flatten_stream_of_streams {
25 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
30 my ($name, $value) = $self->_parse_attribute_args(@_);
32 my $a = (my $evt = $_[0])->{attrs};
33 my $e = exists $a->{$name};
34 +{ %$evt, raw => undef, raw_attrs => undef,
35 attrs => { %$a, $name => $value },
36 ($e # add to name list if not present
38 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
43 sub _parse_attribute_args {
45 # allow ->add_to_attribute(name => 'value')
46 # or ->add_to_attribute({ name => 'name', value => 'value' })
47 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
48 return ($name, $self->_zconfig->parser->html_escape($value));
52 die "renamed to add_to_attribute. killing this entirely for 1.0";
55 sub add_to_attribute {
57 my ($name, $value) = $self->_parse_attribute_args(@_);
59 my $a = (my $evt = $_[0])->{attrs};
60 my $e = exists $a->{$name};
61 +{ %$evt, raw => undef, raw_attrs => undef,
64 $name => join(' ', ($e ? $a->{$name} : ()), $value)
66 ($e # add to name list if not present
68 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
73 sub remove_attribute {
74 my ($self, $args) = @_;
75 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
77 my $a = (my $evt = $_[0])->{attrs};
78 return $evt unless exists $a->{$name};
79 $a = { %$a }; delete $a->{$name};
80 +{ %$evt, raw => undef, raw_attrs => undef,
82 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
88 my ($self, $options) = @_;
89 my ($into, $passthrough, $content, $filter, $flush_before) =
90 @{$options}{qw(into passthrough content filter flush_before)};
92 my ($evt, $stream) = @_;
93 # We wipe the contents of @$into here so that other actions depending
94 # on this (such as a repeater) can be invoked multiple times easily.
95 # I -suspect- it's better for that state reset to be managed here; if it
96 # ever becomes painful the decision should be revisited
98 @$into = $content ? () : ($evt);
100 if ($evt->{is_in_place_close}) {
101 return $evt if $passthrough || $content;
104 my $name = $evt->{name};
106 my $_next = $content ? 'peek' : 'next';
107 $stream = do { local $_ = $stream; $filter->($stream) } if $filter;
108 my $collector = $self->_stream_from_code(sub {
109 return unless $stream;
110 while (my ($evt) = $stream->$_next) {
111 $depth++ if ($evt->{type} eq 'OPEN');
112 $depth-- if ($evt->{type} eq 'CLOSE');
116 push(@$into, $evt) if $into;
117 return $evt if $passthrough;
120 push(@$into, $evt) if $into;
121 $stream->next if $content;
122 return $evt if $passthrough;
124 die "Never saw closing </${name}> before end of source";
127 if ($passthrough||$content) {
128 $evt = { %$evt, flush => 1 };
130 $evt = { type => 'EMPTY', flush => 1 };
133 return ($passthrough||$content||$flush_before)
134 ? [ $evt, $collector ]
139 sub collect_content {
140 my ($self, $options) = @_;
141 $self->collect({ %{$options||{}}, content => 1 })
145 my ($self, $events) = @_;
146 sub { return $self->_stream_from_array(@$events, $_[0]) };
150 my ($self, $events) = @_;
151 my $coll_proto = $self->collect({ passthrough => 1 });
154 my $emit = $self->_stream_from_array(@$events);
155 my $coll = &$coll_proto;
156 return ref($coll) eq 'HASH' # single event, no collect
158 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
162 sub prepend_content {
163 my ($self, $events) = @_;
166 if ($evt->{is_in_place_close}) {
167 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
168 return [ $evt, $self->_stream_from_array(
169 @$events, { type => 'CLOSE', name => $evt->{name} }
172 return $self->_stream_from_array($evt, @$events);
177 my ($self, $events) = @_;
178 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
181 if ($evt->{is_in_place_close}) {
182 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
183 return [ $evt, $self->_stream_from_array(
184 @$events, { type => 'CLOSE', name => $evt->{name} }
187 my $coll = &$coll_proto;
188 my $emit = $self->_stream_from_array(@$events);
189 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
194 my ($self, $replace_with, $options) = @_;
195 my $coll_proto = $self->collect($options);
197 my ($evt, $stream) = @_;
198 my $emit = $self->_stream_from_proto($replace_with);
199 my $coll = &$coll_proto;
200 # if we're replacing the contents of an in place close
201 # then we need to handle that here
202 if ($options->{content}
203 && ref($coll) eq 'HASH'
204 && $coll->{is_in_place_close}
206 my $close = $stream->next;
207 # shallow copy and nuke in place and raw (to force smart print)
208 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
209 $emit = $self->_stream_concat(
211 $self->_stream_from_array($close),
214 # For a straightforward replace operation we can, in fact, do the emit
215 # -before- the collect, and my first cut did so. However in order to
216 # use the captured content in generating the new content, we need
217 # the collect stage to happen first - and it seems highly unlikely
218 # that in normal operation the collect phase will take long enough
219 # for the difference to be noticeable
222 ? (ref $coll eq 'ARRAY' # [ event, stream ]
223 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
224 : (ref $coll eq 'HASH' # event or stream?
226 : $self->_stream_concat($coll, $emit))
233 sub replace_content {
234 my ($self, $replace_with, $options) = @_;
235 $self->replace($replace_with, { %{$options||{}}, content => 1 })
239 my ($self, $repeat_for, $options) = @_;
240 $options->{into} = \my @into;
242 my $repeat_between = delete $options->{repeat_between};
243 if ($repeat_between) {
244 $options->{filter} = sub {
245 $_->select($repeat_between)->collect({ into => \@between })
249 my $s = $self->_stream_from_proto($repeat_for);
250 # We have to test $repeat_between not @between here because
251 # at the point we're constructing our return stream @between
252 # hasn't been populated yet - but we can test @between in the
253 # map routine because it has been by then and that saves us doing
254 # the extra stream construction if we don't need it.
255 $self->_flatten_stream_of_streams(do {
256 if ($repeat_between) {
258 local $_ = $self->_stream_from_array(@into);
259 (@between && $s->peek)
260 ? $self->_stream_concat(
261 $_[0]->($_), $self->_stream_from_array(@between)
267 local $_ = $self->_stream_from_array(@into);
273 $self->replace($repeater, $options);
277 my ($self, $repeat_for, $options) = @_;
278 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
285 HTML::Zoom::FilterBuilder - Add Filters to a Stream
289 Create an L<HTML::Zoom> instance:
292 my $root = HTML::Zoom
296 <title>Default Title</title>
298 <body bad_attr='junk'>
304 Create a new attribute on the C<body> tag:
308 ->set_attribute(class=>'main');
310 Add a extra value to an existing attribute:
314 ->add_to_attribute(class=>'one-column');
316 Set the content of the C<title> tag:
320 ->replace_content('Hello World');
322 Set content from another L<HTML::Zoom> instance:
324 my $body = HTML::Zoom
328 <p id="p2">Is the Time</p>
334 ->replace_content($body);
336 Set an attribute on multiple matches:
340 ->set_attribute(class=>'para');
346 ->remove_attribute('bad_attr');
352 my $output = $root->to_html;
359 <title>Hello World</title>
361 <body class="main one-column"><div id="stuff">
362 <p class="para">Well Now</p>
363 <p id="p2" class="para">Is the Time</p>
371 is($output, $expect, 'Synopsis code works ok');
377 Given a L<HTML::Zoom> stream, provide methods to apply filters which
378 alter the content of that stream.
382 This class defines the following public API
386 Sets an attribute of a given name to a given value for all matching selections.
390 ->set_attribute(class=>'paragraph')
392 ->set_attribute(name=>'class', value=>'divider');
395 Overrides existing values, if such exist. When multiple L</set_attribute>
396 calls are made against the same or overlapping selection sets, the final
399 =head2 add_to_attribute
401 Adds a value to an existing attribute, or creates one if the attribute does not
406 ->set_attribute(class=>'paragraph')
408 ->add_to_attribute(name=>'class', value=>'divider');
410 Attributes with more than one value will have a dividing space.
412 =head2 remove_attribute
414 Removes an attribute and all its values.
418 ->set_attribute(class=>'paragraph')
420 ->remove_attribute('class');
422 Removes attributes from the original stream or events already added.
426 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
427 optional common options as hash reference.
431 =item into [ARRAY REFERENCE]
433 Where to save collected events (selected elements).
435 $z1->select('#main-content')
436 ->collect({ into => \@body })
438 $z2->select('#main-content')
444 Run filter on collected elements (locally setting $_ to stream, and passing
445 stream as an argument to given code reference). Filtered stream would be
450 filter => sub { $_->select('.inner')->replace_content('bar!') },
454 It can be used to further filter selection. For example
458 filter => sub { $_->select('td') },
462 is equivalent to (not implemented yet) descendant selector combination, i.e.
466 =item passthrough [BOOLEAN]
468 Extract copy of elements; the stream is unchanged (it does not remove collected
469 elements). For example without 'passthrough'
471 HTML::Zoom->from_html('<foo><bar /></foo>')
473 ->collect({ content => 1 })
476 returns '<foo></foo>', while with C<passthrough> option
478 HTML::Zoom->from_html('<foo><bar /></foo>')
480 ->collect({ content => 1, passthough => 1 })
483 returns '<foo><bar /></foo>'.
485 =item content [BOOLEAN]
487 Collect content of the element, and not the element itself.
491 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
496 would return '<p>foo</p>', while
498 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
500 ->collect({ content => 1 })
503 would return '<h1></h1><p>foo</p>'.
505 See also L</collect_content>.
507 =item flush_before [BOOLEAN]
509 Generate C<flush> event before collecting, to ensure that the HTML generated up
510 to selected element being collected is flushed throught to the browser. Usually
511 used in L</repeat> or L</repeat_content>.
515 =head2 collect_content
517 Collects contents of L<HTML::Zoom/select> result.
519 HTML::Zoom->from_file($foo)
520 ->select('#main-content')
521 ->collect_content({ into => \@foo_body })
524 ->replace_content(\@foo_body)
527 Equivalent to running L</collect> with C<content> option set.
531 Given a L<HTML::Zoom/select> result, add given content (which might be string,
532 array or another L<HTML::Zoom> object) before it.
535 ->select('input[name="foo"]')
536 ->add_before(\ '<span class="warning">required field</span>');
540 Like L</add_before>, only after L<HTML::Zoom/select> result.
546 You can add zoom events directly
550 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
552 =head2 prepend_content
556 =head2 append_content
562 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
563 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
564 (via hash reference).
566 =head2 replace_content
568 Given a L<HTML::Zoom/select> result, replace the content with a string, array
569 or another L<HTML::Zoom> object.
572 ->select('title, #greeting')
573 ->replace_content('Hello world!');
577 $zoom->select('.item')->repeat(sub {
578 if (my $row = $db_thing->next) {
579 return sub { $_->select('.item-name')->replace_content($row->name) }
583 }, { flush_before => 1 });
585 Run I<$repeat_for>, which should be iterator (code reference) returning
586 subroutines, reference to array of subroutines, or other zoom-able object
587 consisting of transformations. Those subroutines would be run with $_
588 local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
589 said result passed as parameter to subroutine.
591 You might want to use iterator when you don't have all elements upfront
593 $zoom = $zoom->select('.contents')->repeat(sub {
594 while (my $line = $fh->getline) {
596 $_->select('.lno')->replace_content($fh->input_line_number)
597 ->select('.line')->replace_content($line)
603 You might want to use array reference if it doesn't matter that all iterations
606 $zoom->select('table')->repeat([
610 $_->select('td')->replace_content($e);
615 In addition to common options as in L</collect>, it also supports
619 =item repeat_between [SELECTOR]
621 Selects object to be repeated between items. In the case of array this object
622 is put between elements, in case of iterator it is put between results of
623 subsequent iterations, in the case of streamable it is put between events
626 See documentation for L</repeat_content>
630 =head2 repeat_content
632 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
633 this result to this iterator. Accepts the same options as L</repeat>.
635 Equivalent to using C<contents> option with L</repeat>.
642 $_->select('.name')->replace_content('Matt')
643 ->select('.age')->replace_content('26')
646 $_->select('.name')->replace_content('Mark')
647 ->select('.age')->replace_content('0x29')
650 $_->select('.name')->replace_content('Epitaph')
651 ->select('.age')->replace_content('<redacted>')
654 { repeat_between => '.between' }
664 See L<HTML::Zoom> for authors.
668 See L<HTML::Zoom> for the license.