1 package HTML::Zoom::FilterBuilder;
4 use base qw(HTML::Zoom::SubObject);
5 use HTML::Zoom::CodeStream;
7 sub _stream_from_code {
8 shift->_zconfig->stream_utils->stream_from_code(@_)
11 sub _stream_from_array {
12 shift->_zconfig->stream_utils->stream_from_array(@_)
15 sub _stream_from_proto {
16 shift->_zconfig->stream_utils->stream_from_proto(@_)
20 shift->_zconfig->stream_utils->stream_concat(@_)
23 sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
29 my ($name, $value) = $self->_parse_attribute_args(@_);
31 my $a = (my $evt = $_[0])->{attrs};
32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
42 sub _parse_attribute_args {
44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
51 die "renamed to add_to_attribute. killing this entirely for 1.0";
54 sub add_to_attribute {
56 my ($name, $value) = $self->_parse_attribute_args(@_);
58 my $a = (my $evt = $_[0])->{attrs};
59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
65 ($e # add to name list if not present
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
72 sub remove_attribute {
73 my ($self, $args) = @_;
74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
76 my $a = (my $evt = $_[0])->{attrs};
77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
87 my ($self, $options) = @_;
88 my ($into, $passthrough, $content, $filter, $flush_before) =
89 @{$options}{qw(into passthrough content filter flush_before)};
91 my ($evt, $stream) = @_;
92 # We wipe the contents of @$into here so that other actions depending
93 # on this (such as a repeater) can be invoked multiple times easily.
94 # I -suspect- it's better for that state reset to be managed here; if it
95 # ever becomes painful the decision should be revisited
97 @$into = $content ? () : ($evt);
99 if ($evt->{is_in_place_close}) {
100 return $evt if $passthrough || $content;
103 my $name = $evt->{name};
105 my $_next = $content ? 'peek' : 'next';
108 $stream = do { local $_ = $stream; $filter->($stream) };
111 local $_ = $self->_stream_concat(
112 $self->_stream_from_array($evt),
117 $evt = $stream->next;
120 my $collector = $self->_stream_from_code(sub {
121 return unless $stream;
122 while (my ($evt) = $stream->$_next) {
123 $depth++ if ($evt->{type} eq 'OPEN');
124 $depth-- if ($evt->{type} eq 'CLOSE');
128 push(@$into, $evt) if $into;
129 return $evt if $passthrough;
132 push(@$into, $evt) if $into;
133 $stream->next if $content;
134 return $evt if $passthrough;
136 die "Never saw closing </${name}> before end of source";
139 if ($passthrough||$content) {
140 $evt = { %$evt, flush => 1 };
142 $evt = { type => 'EMPTY', flush => 1 };
145 return ($passthrough||$content||$flush_before)
146 ? [ $evt, $collector ]
151 sub collect_content {
152 my ($self, $options) = @_;
153 $self->collect({ %{$options||{}}, content => 1 })
157 my ($self, $events) = @_;
158 sub { return $self->_stream_from_array(@$events, $_[0]) };
162 my ($self, $events) = @_;
163 my $coll_proto = $self->collect({ passthrough => 1 });
166 my $emit = $self->_stream_from_array(@$events);
167 my $coll = &$coll_proto;
168 return ref($coll) eq 'HASH' # single event, no collect
170 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
174 sub prepend_content {
175 my ($self, $events) = @_;
178 if ($evt->{is_in_place_close}) {
179 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
180 return [ $evt, $self->_stream_from_array(
181 @$events, { type => 'CLOSE', name => $evt->{name} }
184 return $self->_stream_from_array($evt, @$events);
189 my ($self, $events) = @_;
190 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
193 if ($evt->{is_in_place_close}) {
194 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
195 return [ $evt, $self->_stream_from_array(
196 @$events, { type => 'CLOSE', name => $evt->{name} }
199 my $coll = &$coll_proto;
200 my $emit = $self->_stream_from_array(@$events);
201 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
206 my ($self, $replace_with, $options) = @_;
207 my $coll_proto = $self->collect($options);
209 my ($evt, $stream) = @_;
210 my $emit = $self->_stream_from_proto($replace_with);
211 my $coll = &$coll_proto;
212 # if we're replacing the contents of an in place close
213 # then we need to handle that here
214 if ($options->{content}
215 && ref($coll) eq 'HASH'
216 && $coll->{is_in_place_close}
218 my $close = $stream->next;
219 # shallow copy and nuke in place and raw (to force smart print)
220 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
221 $emit = $self->_stream_concat(
223 $self->_stream_from_array($close),
226 # For a straightforward replace operation we can, in fact, do the emit
227 # -before- the collect, and my first cut did so. However in order to
228 # use the captured content in generating the new content, we need
229 # the collect stage to happen first - and it seems highly unlikely
230 # that in normal operation the collect phase will take long enough
231 # for the difference to be noticeable
234 ? (ref $coll eq 'ARRAY' # [ event, stream ]
235 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
236 : (ref $coll eq 'HASH' # event or stream?
238 : $self->_stream_concat($coll, $emit))
245 sub replace_content {
246 my ($self, $replace_with, $options) = @_;
247 $self->replace($replace_with, { %{$options||{}}, content => 1 })
251 my ($self, $repeat_for, $options) = @_;
252 $options->{into} = \my @into;
254 my $repeat_between = delete $options->{repeat_between};
255 if ($repeat_between) {
256 $options->{filter} = sub {
257 $_->select($repeat_between)->collect({ into => \@between })
261 my $s = $self->_stream_from_proto($repeat_for);
262 # We have to test $repeat_between not @between here because
263 # at the point we're constructing our return stream @between
264 # hasn't been populated yet - but we can test @between in the
265 # map routine because it has been by then and that saves us doing
266 # the extra stream construction if we don't need it.
267 $self->_flatten_stream_of_streams(do {
268 if ($repeat_between) {
270 local $_ = $self->_stream_from_array(@into);
271 (@between && $s->peek)
272 ? $self->_stream_concat(
273 $_[0]->($_), $self->_stream_from_array(@between)
279 local $_ = $self->_stream_from_array(@into);
285 $self->replace($repeater, $options);
289 my ($self, $repeat_for, $options) = @_;
290 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
297 HTML::Zoom::FilterBuilder - Add Filters to a Stream
301 Create an L<HTML::Zoom> instance:
304 my $root = HTML::Zoom
308 <title>Default Title</title>
310 <body bad_attr='junk'>
316 Create a new attribute on the C<body> tag:
320 ->set_attribute(class=>'main');
322 Add a extra value to an existing attribute:
326 ->add_to_attribute(class=>'one-column');
328 Set the content of the C<title> tag:
332 ->replace_content('Hello World');
334 Set content from another L<HTML::Zoom> instance:
336 my $body = HTML::Zoom
340 <p id="p2">Is the Time</p>
346 ->replace_content($body);
348 Set an attribute on multiple matches:
352 ->set_attribute(class=>'para');
358 ->remove_attribute('bad_attr');
364 my $output = $root->to_html;
371 <title>Hello World</title>
373 <body class="main one-column"><div id="stuff">
374 <p class="para">Well Now</p>
375 <p id="p2" class="para">Is the Time</p>
383 is($output, $expect, 'Synopsis code works ok');
389 Given a L<HTML::Zoom> stream, provide methods to apply filters which
390 alter the content of that stream.
394 This class defines the following public API
398 Sets an attribute of a given name to a given value for all matching selections.
402 ->set_attribute(class=>'paragraph')
404 ->set_attribute(name=>'class', value=>'divider');
407 Overrides existing values, if such exist. When multiple L</set_attribute>
408 calls are made against the same or overlapping selection sets, the final
411 =head2 add_to_attribute
413 Adds a value to an existing attribute, or creates one if the attribute does not
418 ->set_attribute(class=>'paragraph')
420 ->add_to_attribute(name=>'class', value=>'divider');
422 Attributes with more than one value will have a dividing space.
424 =head2 remove_attribute
426 Removes an attribute and all its values.
430 ->set_attribute(class=>'paragraph')
432 ->remove_attribute('class');
434 Removes attributes from the original stream or events already added.
438 Collects and extracts results of L<HTML::Zoom/select>. It takes the following
439 optional common options as hash reference.
443 =item into [ARRAY REFERENCE]
445 Where to save collected events (selected elements).
447 $z1->select('#main-content')
448 ->collect({ into => \@body })
450 $z2->select('#main-content')
456 Run filter on collected elements (locally setting $_ to stream, and passing
457 stream as an argument to given code reference). Filtered stream would be
462 filter => sub { $_->select('.inner')->replace_content('bar!') },
466 It can be used to further filter selection. For example
470 filter => sub { $_->select('td') },
474 is equivalent to (not implemented yet) descendant selector combination, i.e.
478 =item passthrough [BOOLEAN]
480 Extract copy of elements; the stream is unchanged (it does not remove collected
481 elements). For example without 'passthrough'
483 HTML::Zoom->from_html('<foo><bar /></foo>')
485 ->collect({ content => 1 })
488 returns '<foo></foo>', while with C<passthrough> option
490 HTML::Zoom->from_html('<foo><bar /></foo>')
492 ->collect({ content => 1, passthough => 1 })
495 returns '<foo><bar /></foo>'.
497 =item content [BOOLEAN]
499 Collect content of the element, and not the element itself.
503 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
508 would return '<p>foo</p>', while
510 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
512 ->collect({ content => 1 })
515 would return '<h1></h1><p>foo</p>'.
517 See also L</collect_content>.
519 =item flush_before [BOOLEAN]
521 Generate C<flush> event before collecting, to ensure that the HTML generated up
522 to selected element being collected is flushed throught to the browser. Usually
523 used in L</repeat> or L</repeat_content>.
527 =head2 collect_content
529 Collects contents of L<HTML::Zoom/select> result.
531 HTML::Zoom->from_file($foo)
532 ->select('#main-content')
533 ->collect_content({ into => \@foo_body })
536 ->replace_content(\@foo_body)
539 Equivalent to running L</collect> with C<content> option set.
543 Given a L<HTML::Zoom/select> result, add given content (which might be string,
544 array or another L<HTML::Zoom> object) before it.
547 ->select('input[name="foo"]')
548 ->add_before(\ '<span class="warning">required field</span>');
552 Like L</add_before>, only after L<HTML::Zoom/select> result.
558 You can add zoom events directly
562 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
564 =head2 prepend_content
568 =head2 append_content
574 Given a L<HTML::Zoom/select> result, replace it with a string, array or another
575 L<HTML::Zoom> object. It takes the same optional common options as L</collect>
576 (via hash reference).
578 =head2 replace_content
580 Given a L<HTML::Zoom/select> result, replace the content with a string, array
581 or another L<HTML::Zoom> object.
584 ->select('title, #greeting')
585 ->replace_content('Hello world!');
589 $zoom->select('.item')->repeat(sub {
590 if (my $row = $db_thing->next) {
591 return sub { $_->select('.item-name')->replace_content($row->name) }
595 }, { flush_before => 1 });
597 Run I<$repeat_for>, which should be iterator (code reference) returning
598 subroutines, reference to array of subroutines, or other zoom-able object
599 consisting of transformations. Those subroutines would be run with $_
600 local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
601 said result passed as parameter to subroutine.
603 You might want to use iterator when you don't have all elements upfront
605 $zoom = $zoom->select('.contents')->repeat(sub {
606 while (my $line = $fh->getline) {
608 $_->select('.lno')->replace_content($fh->input_line_number)
609 ->select('.line')->replace_content($line)
615 You might want to use array reference if it doesn't matter that all iterations
618 $zoom->select('table')->repeat([
622 $_->select('td')->replace_content($e);
627 In addition to common options as in L</collect>, it also supports
631 =item repeat_between [SELECTOR]
633 Selects object to be repeated between items. In the case of array this object
634 is put between elements, in case of iterator it is put between results of
635 subsequent iterations, in the case of streamable it is put between events
638 See documentation for L</repeat_content>
642 =head2 repeat_content
644 Given a L<HTML::Zoom/select> result, run provided iterator passing content of
645 this result to this iterator. Accepts the same options as L</repeat>.
647 Equivalent to using C<contents> option with L</repeat>.
654 $_->select('.name')->replace_content('Matt')
655 ->select('.age')->replace_content('26')
658 $_->select('.name')->replace_content('Mark')
659 ->select('.age')->replace_content('0x29')
662 $_->select('.name')->replace_content('Epitaph')
663 ->select('.age')->replace_content('<redacted>')
666 { repeat_between => '.between' }
676 See L<HTML::Zoom> for authors.
680 See L<HTML::Zoom> for the license.