add strictures commit (out of order)
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / FilterBuilder.pm
CommitLineData
456a815d 1package HTML::Zoom::FilterBuilder;
2
1cf03540 3use strictures 1;
d80786d0 4use base qw(HTML::Zoom::SubObject);
456a815d 5use HTML::Zoom::CodeStream;
6
456a815d 7sub _stream_from_code {
d80786d0 8 shift->_zconfig->stream_utils->stream_from_code(@_)
456a815d 9}
10
11sub _stream_from_array {
d80786d0 12 shift->_zconfig->stream_utils->stream_from_array(@_)
456a815d 13}
14
3cdbc13f 15sub _stream_from_proto {
d80786d0 16 shift->_zconfig->stream_utils->stream_from_proto(@_)
3cdbc13f 17}
18
456a815d 19sub _stream_concat {
d80786d0 20 shift->_zconfig->stream_utils->stream_concat(@_)
456a815d 21}
22
6d0f20a6 23sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
25}
26
456a815d 27sub set_attribute {
1c4455ae 28 my $self = shift;
29 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 30 sub {
8f962884 31 my $a = (my $evt = $_[0])->{attrs};
456a815d 32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
36 ? ()
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
38 }
39 };
40}
41
1c4455ae 42sub _parse_attribute_args {
43 my $self = shift;
2daa653a 44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
1c4455ae 46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
48}
49
456a815d 50sub add_attribute {
2daa653a 51 die "renamed to add_to_attribute. killing this entirely for 1.0";
52}
53
54sub add_to_attribute {
1c4455ae 55 my $self = shift;
56 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 57 sub {
8f962884 58 my $a = (my $evt = $_[0])->{attrs};
456a815d 59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
61 attrs => {
62 %$a,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
64 },
65 ($e # add to name list if not present
66 ? ()
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
68 }
69 };
70}
71
72sub remove_attribute {
73 my ($self, $args) = @_;
1c4455ae 74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
456a815d 75 sub {
8f962884 76 my $a = (my $evt = $_[0])->{attrs};
456a815d 77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
80 attrs => $a,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
82 }
83 };
84}
85
76cecb10 86sub collect {
87 my ($self, $options) = @_;
1c4455ae 88 my ($into, $passthrough, $content, $filter, $flush_before) =
89 @{$options}{qw(into passthrough content filter flush_before)};
76cecb10 90 sub {
91 my ($evt, $stream) = @_;
b4d044eb 92 # We wipe the contents of @$into here so that other actions depending
93 # on this (such as a repeater) can be invoked multiple times easily.
94 # I -suspect- it's better for that state reset to be managed here; if it
95 # ever becomes painful the decision should be revisited
96 if ($into) {
865bb5d2 97 @$into = $content ? () : ($evt);
b4d044eb 98 }
76cecb10 99 if ($evt->{is_in_place_close}) {
865bb5d2 100 return $evt if $passthrough || $content;
76cecb10 101 return;
102 }
103 my $name = $evt->{name};
104 my $depth = 1;
865bb5d2 105 my $_next = $content ? 'peek' : 'next';
d80786d0 106 $stream = do { local $_ = $stream; $filter->($stream) } if $filter;
76cecb10 107 my $collector = $self->_stream_from_code(sub {
108 return unless $stream;
109 while (my ($evt) = $stream->$_next) {
110 $depth++ if ($evt->{type} eq 'OPEN');
111 $depth-- if ($evt->{type} eq 'CLOSE');
112 unless ($depth) {
113 undef $stream;
865bb5d2 114 return if $content;
76cecb10 115 push(@$into, $evt) if $into;
116 return $evt if $passthrough;
117 return;
118 }
119 push(@$into, $evt) if $into;
865bb5d2 120 $stream->next if $content;
76cecb10 121 return $evt if $passthrough;
122 }
123 die "Never saw closing </${name}> before end of source";
124 });
1c4455ae 125 if ($flush_before) {
126 if ($passthrough||$content) {
127 $evt = { %$evt, flush => 1 };
128 } else {
129 $evt = { type => 'EMPTY', flush => 1 };
130 }
131 }
132 return ($passthrough||$content||$flush_before)
133 ? [ $evt, $collector ]
134 : $collector;
76cecb10 135 };
136}
137
865bb5d2 138sub collect_content {
139 my ($self, $options) = @_;
140 $self->collect({ %{$options||{}}, content => 1 })
141}
142
456a815d 143sub add_before {
144 my ($self, $events) = @_;
8f962884 145 sub { return $self->_stream_from_array(@$events, $_[0]) };
456a815d 146}
147
148sub add_after {
149 my ($self, $events) = @_;
b616863d 150 my $coll_proto = $self->collect({ passthrough => 1 });
456a815d 151 sub {
8f962884 152 my ($evt) = @_;
456a815d 153 my $emit = $self->_stream_from_array(@$events);
b616863d 154 my $coll = &$coll_proto;
995bc8be 155 return ref($coll) eq 'HASH' # single event, no collect
156 ? [ $coll, $emit ]
157 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
456a815d 158 };
8f962884 159}
456a815d 160
865bb5d2 161sub prepend_content {
456a815d 162 my ($self, $events) = @_;
163 sub {
8f962884 164 my ($evt) = @_;
456a815d 165 if ($evt->{is_in_place_close}) {
166 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
167 return [ $evt, $self->_stream_from_array(
168 @$events, { type => 'CLOSE', name => $evt->{name} }
169 ) ];
170 }
171 return $self->_stream_from_array($evt, @$events);
172 };
173}
174
865bb5d2 175sub append_content {
8f962884 176 my ($self, $events) = @_;
865bb5d2 177 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
8f962884 178 sub {
179 my ($evt) = @_;
180 if ($evt->{is_in_place_close}) {
181 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
182 return [ $evt, $self->_stream_from_array(
183 @$events, { type => 'CLOSE', name => $evt->{name} }
184 ) ];
185 }
b616863d 186 my $coll = &$coll_proto;
8f962884 187 my $emit = $self->_stream_from_array(@$events);
188 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
189 };
190}
191
456a815d 192sub replace {
3cdbc13f 193 my ($self, $replace_with, $options) = @_;
b616863d 194 my $coll_proto = $self->collect($options);
456a815d 195 sub {
196 my ($evt, $stream) = @_;
3cdbc13f 197 my $emit = $self->_stream_from_proto($replace_with);
b616863d 198 my $coll = &$coll_proto;
a88c1c57 199 # if we're replacing the contents of an in place close
200 # then we need to handle that here
201 if ($options->{content}
202 && ref($coll) eq 'HASH'
ec687101 203 && $coll->{is_in_place_close}
a88c1c57 204 ) {
a88c1c57 205 my $close = $stream->next;
ec687101 206 # shallow copy and nuke in place and raw (to force smart print)
207 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
a88c1c57 208 $emit = $self->_stream_concat(
209 $emit,
210 $self->_stream_from_array($close),
211 );
212 }
451b3b30 213 # For a straightforward replace operation we can, in fact, do the emit
214 # -before- the collect, and my first cut did so. However in order to
215 # use the captured content in generating the new content, we need
216 # the collect stage to happen first - and it seems highly unlikely
217 # that in normal operation the collect phase will take long enough
218 # for the difference to be noticeable
11cc25dd 219 return
220 ($coll
a88c1c57 221 ? (ref $coll eq 'ARRAY' # [ event, stream ]
451b3b30 222 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
a88c1c57 223 : (ref $coll eq 'HASH' # event or stream?
224 ? [ $coll, $emit ]
225 : $self->_stream_concat($coll, $emit))
11cc25dd 226 )
227 : $emit
228 );
456a815d 229 };
230}
231
865bb5d2 232sub replace_content {
233 my ($self, $replace_with, $options) = @_;
234 $self->replace($replace_with, { %{$options||{}}, content => 1 })
235}
236
3cdbc13f 237sub repeat {
238 my ($self, $repeat_for, $options) = @_;
239 $options->{into} = \my @into;
f8ed299b 240 my @between;
241 my $repeat_between = delete $options->{repeat_between};
242 if ($repeat_between) {
f8ed299b 243 $options->{filter} = sub {
d80786d0 244 $_->select($repeat_between)->collect({ into => \@between })
f8ed299b 245 };
246 }
3cdbc13f 247 my $repeater = sub {
f8ed299b 248 my $s = $self->_stream_from_proto($repeat_for);
249 # We have to test $repeat_between not @between here because
250 # at the point we're constructing our return stream @between
251 # hasn't been populated yet - but we can test @between in the
252 # map routine because it has been by then and that saves us doing
253 # the extra stream construction if we don't need it.
6d0f20a6 254 $self->_flatten_stream_of_streams(do {
255 if ($repeat_between) {
256 $s->map(sub {
257 local $_ = $self->_stream_from_array(@into);
258 (@between && $s->peek)
259 ? $self->_stream_concat(
260 $_[0]->($_), $self->_stream_from_array(@between)
261 )
262 : $_[0]->($_)
263 })
264 } else {
265 $s->map(sub {
266 local $_ = $self->_stream_from_array(@into);
267 $_[0]->($_)
f8ed299b 268 })
6d0f20a6 269 }
270 })
3cdbc13f 271 };
272 $self->replace($repeater, $options);
273}
274
865bb5d2 275sub repeat_content {
276 my ($self, $repeat_for, $options) = @_;
277 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
278}
279
456a815d 2801;
556c8616 281
282=head1 NAME
283
284HTML::Zoom::FilterBuilder - Add Filters to a Stream
285
244252e7 286=head1 SYNOPSIS
287
a42917f6 288Create an L<HTML::Zoom> instance:
289
0d8f057e 290 use HTML::Zoom;
291 my $root = HTML::Zoom
292 ->from_html(<<MAIN);
293 <html>
294 <head>
295 <title>Default Title</title>
296 </head>
a42917f6 297 <body bad_attr='junk'>
0d8f057e 298 Default Content
299 </body>
300 </html>
301 MAIN
302
a42917f6 303Create a new attribute on the C<body> tag:
304
305 $root = $root
306 ->select('body')
307 ->set_attribute(class=>'main');
308
309Add a extra value to an existing attribute:
310
311 $root = $root
312 ->select('body')
313 ->add_to_attribute(class=>'one-column');
314
315Set the content of the C<title> tag:
316
317 $root = $root
318 ->select('title')
319 ->replace_content('Hello World');
320
321Set content from another L<HTML::Zoom> instance:
322
0d8f057e 323 my $body = HTML::Zoom
324 ->from_html(<<BODY);
325 <div id="stuff">
2daa653a 326 <p>Well Now</p>
f8ad684d 327 <p id="p2">Is the Time</p>
0d8f057e 328 </div>
329 BODY
330
a42917f6 331 $root = $root
f8ad684d 332 ->select('body')
a42917f6 333 ->replace_content($body);
334
335Set an attribute on multiple matches:
336
337 $root = $root
f8ad684d 338 ->select('p')
a42917f6 339 ->set_attribute(class=>'para');
340
341Remove an attribute:
342
343 $root = $root
344 ->select('body')
345 ->remove_attribute('bad_attr');
0d8f057e 346
347will produce:
348
349=begin testinfo
350
a42917f6 351 my $output = $root->to_html;
0d8f057e 352 my $expect = <<HTML;
353
354=end testinfo
355
356 <html>
357 <head>
358 <title>Hello World</title>
359 </head>
434a11c8 360 <body class="main one-column"><div id="stuff">
adb30a8a 361 <p class="para">Well Now</p>
a42917f6 362 <p id="p2" class="para">Is the Time</p>
0d8f057e 363 </div>
364 </body>
365 </html>
366
367=begin testinfo
368
369 HTML
370 is($output, $expect, 'Synopsis code works ok');
371
372=end testinfo
244252e7 373
556c8616 374=head1 DESCRIPTION
375
376Given a L<HTML::Zoom> stream, provide methods to apply filters which
377alter the content of that stream.
378
f6644c71 379=head1 METHODS
380
381This class defines the following public API
382
e225a4bd 383=head2 set_attribute
f6644c71 384
f8ad684d 385Sets an attribute of a given name to a given value for all matching selections.
386
387 $html_zoom
388 ->select('p')
389 ->set_attribute(class=>'paragraph')
390 ->select('div')
434a11c8 391 ->set_attribute(name=>'class', value=>'divider');
392
f8ad684d 393
394Overrides existing values, if such exist. When multiple L</set_attribute>
395calls are made against the same or overlapping selection sets, the final
396call wins.
f6644c71 397
e225a4bd 398=head2 add_to_attribute
f6644c71 399
434a11c8 400Adds a value to an existing attribute, or creates one if the attribute does not
401yet exist.
f6644c71 402
434a11c8 403 $html_zoom
404 ->select('p')
405 ->set_attribute(class=>'paragraph')
406 ->then
407 ->add_to_attribute(name=>'class', value=>'divider');
f6644c71 408
434a11c8 409Attributes with more than one value will have a dividing space.
410
e225a4bd 411=head2 remove_attribute
434a11c8 412
413Removes an attribute and all its values.
414
415 $html_zoom
416 ->select('p')
417 ->set_attribute(class=>'paragraph')
418 ->then
419 ->remove_attribute('class');
420
421Removes attributes from the original stream or events already added.
f6644c71 422
423=head2 collect
424
ac3acd87 425Collects and extracts results of L<HTML::Zoom/select>. It takes the following
426optional common options as hash reference.
427
428=over
429
430=item into [ARRAY REFERENCE]
431
432Where to save collected events (selected elements).
433
434 $z1->select('#main-content')
435 ->collect({ into => \@body })
436 ->run;
437 $z2->select('#main-content')
438 ->replace(\@body)
439 ->memoize;
440
441=item filter [CODE]
442
443Run filter on collected elements (locally setting $_ to stream, and passing
444stream as an argument to given code reference). Filtered stream would be
445returned.
446
447 $z->select('.outer')
448 ->collect({
449 filter => sub { $_->select('.inner')->replace_content('bar!') },
450 passthrough => 1,
451 })
452
453It can be used to further filter selection. For example
454
455 $z->select('tr')
456 ->collect({
457 filter => sub { $_->select('td') },
458 passthrough => 1,
459 })
460
461is equivalent to (not implemented yet) descendant selector combination, i.e.
462
463 $z->select('tr td')
464
465=item passthrough [BOOLEAN]
466
467Extract copy of elements; the stream is unchanged (it does not remove collected
468elements). For example without 'passthrough'
469
470 HTML::Zoom->from_html('<foo><bar /></foo>')
471 ->select('foo')
472 ->collect({ content => 1 })
473 ->to_html
474
475returns '<foo></foo>', while with C<passthrough> option
476
477 HTML::Zoom->from_html('<foo><bar /></foo>')
478 ->select('foo')
479 ->collect({ content => 1, passthough => 1 })
480 ->to_html
481
482returns '<foo><bar /></foo>'.
483
484=item content [BOOLEAN]
485
486Collect content of the element, and not the element itself.
487
488For example
489
490 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
491 ->select('h1')
492 ->collect
493 ->to_html
494
495would return '<p>foo</p>', while
496
497 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
498 ->select('h1')
499 ->collect({ content => 1 })
500 ->to_html
501
502would return '<h1></h1><p>foo</p>'.
503
504See also L</collect_content>.
505
506=item flush_before [BOOLEAN]
507
508Generate C<flush> event before collecting, to ensure that the HTML generated up
509to selected element being collected is flushed throught to the browser. Usually
510used in L</repeat> or L</repeat_content>.
511
512=back
f6644c71 513
514=head2 collect_content
515
ac3acd87 516Collects contents of L<HTML::Zoom/select> result.
517
518 HTML::Zoom->from_file($foo)
519 ->select('#main-content')
520 ->collect_content({ into => \@foo_body })
521 ->run;
522 $z->select('#foo')
523 ->replace_content(\@foo_body)
524 ->memoize;
525
526Equivalent to running L</collect> with C<content> option set.
f6644c71 527
528=head2 add_before
529
ac3acd87 530Given a L<HTML::Zoom/select> result, add given content (which might be string,
531array or another L<HTML::Zoom> object) before it.
532
533 $html_zoom
534 ->select('input[name="foo"]')
535 ->add_before(\ '<span class="warning">required field</span>');
f6644c71 536
537=head2 add_after
538
ac3acd87 539Like L</add_before>, only after L<HTML::Zoom/select> result.
540
541 $html_zoom
542 ->select('p')
543 ->add_after("\n\n");
544
545You can add zoom events directly
546
547 $html_zoom
548 ->select('p')
549 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
f6644c71 550
551=head2 prepend_content
552
553 TBD
554
555=head2 append_content
556
557 TBD
558
559=head2 replace
560
ac3acd87 561Given a L<HTML::Zoom/select> result, replace it with a string, array or another
562L<HTML::Zoom> object. It takes the same optional common options as L</collect>
563(via hash reference).
f6644c71 564
565=head2 replace_content
566
244252e7 567Given a L<HTML::Zoom/select> result, replace the content with a string, array
568or another L<HTML::Zoom> object.
f6644c71 569
ac3acd87 570 $html_zoom
571 ->select('title, #greeting')
572 ->replace_content('Hello world!');
573
f6644c71 574=head2 repeat
575
ac3acd87 576 $zoom->select('.item')->repeat(sub {
577 if (my $row = $db_thing->next) {
578 return sub { $_->select('.item-name')->replace_content($row->name) }
579 } else {
580 return
581 }
582 }, { flush_before => 1 });
583
584Run I<$repeat_for>, which should be iterator (code reference) returning
585subroutines, reference to array of subroutines, or other zoom-able object
586consisting of transformations. Those subroutines would be run with $_
587local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
588said result passed as parameter to subroutine.
589
590You might want to use iterator when you don't have all elements upfront
591
592 $zoom = $zoom->select('.contents')->repeat(sub {
593 while (my $line = $fh->getline) {
594 return sub {
595 $_->select('.lno')->replace_content($fh->input_line_number)
596 ->select('.line')->replace_content($line)
597 }
598 }
599 return
600 });
601
602You might want to use array reference if it doesn't matter that all iterations
603are pre-generated
604
605 $zoom->select('table')->repeat([
606 map {
607 my $elem = $_;
608 sub {
609 $_->select('td')->replace_content($e);
610 }
611 } @list
612 ]);
613
614In addition to common options as in L</collect>, it also supports
615
616=over
617
618=item repeat_between [SELECTOR]
619
620Selects object to be repeated between items. In the case of array this object
621is put between elements, in case of iterator it is put between results of
622subsequent iterations, in the case of streamable it is put between events
623(->to_stream->next).
624
625See documentation for L</repeat_content>
626
627=back
f6644c71 628
629=head2 repeat_content
630
ac3acd87 631Given a L<HTML::Zoom/select> result, run provided iterator passing content of
632this result to this iterator. Accepts the same options as L</repeat>.
633
634Equivalent to using C<contents> option with L</repeat>.
635
636 $html_zoom
637 ->select('#list')
638 ->repeat_content(
639 [
640 sub {
641 $_->select('.name')->replace_content('Matt')
642 ->select('.age')->replace_content('26')
643 },
644 sub {
645 $_->select('.name')->replace_content('Mark')
646 ->select('.age')->replace_content('0x29')
647 },
648 sub {
649 $_->select('.name')->replace_content('Epitaph')
650 ->select('.age')->replace_content('<redacted>')
651 },
652 ],
653 { repeat_between => '.between' }
654 );
655
f6644c71 656
556c8616 657=head1 ALSO SEE
658
659L<HTML::Zoom>
660
661=head1 AUTHORS
662
663See L<HTML::Zoom> for authors.
664
665=head1 LICENSE
666
667See L<HTML::Zoom> for the license.
668
669=cut
670