Merge branch 'master' of git://git.shadowcat.co.uk/catagits/HTML-Zoom
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / FilterBuilder.pm
CommitLineData
456a815d 1package HTML::Zoom::FilterBuilder;
2
456a815d 3use strict;
4use warnings FATAL => 'all';
d80786d0 5use base qw(HTML::Zoom::SubObject);
456a815d 6use HTML::Zoom::CodeStream;
7
456a815d 8sub _stream_from_code {
d80786d0 9 shift->_zconfig->stream_utils->stream_from_code(@_)
456a815d 10}
11
12sub _stream_from_array {
d80786d0 13 shift->_zconfig->stream_utils->stream_from_array(@_)
456a815d 14}
15
3cdbc13f 16sub _stream_from_proto {
d80786d0 17 shift->_zconfig->stream_utils->stream_from_proto(@_)
3cdbc13f 18}
19
456a815d 20sub _stream_concat {
d80786d0 21 shift->_zconfig->stream_utils->stream_concat(@_)
456a815d 22}
23
6d0f20a6 24sub _flatten_stream_of_streams {
25 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
26}
27
456a815d 28sub set_attribute {
1c4455ae 29 my $self = shift;
30 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 31 sub {
8f962884 32 my $a = (my $evt = $_[0])->{attrs};
456a815d 33 my $e = exists $a->{$name};
34 +{ %$evt, raw => undef, raw_attrs => undef,
35 attrs => { %$a, $name => $value },
36 ($e # add to name list if not present
37 ? ()
38 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
39 }
40 };
41}
42
1c4455ae 43sub _parse_attribute_args {
44 my $self = shift;
2daa653a 45 # allow ->add_to_attribute(name => 'value')
46 # or ->add_to_attribute({ name => 'name', value => 'value' })
1c4455ae 47 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
48 return ($name, $self->_zconfig->parser->html_escape($value));
49}
50
456a815d 51sub add_attribute {
2daa653a 52 die "renamed to add_to_attribute. killing this entirely for 1.0";
53}
54
55sub add_to_attribute {
1c4455ae 56 my $self = shift;
57 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 58 sub {
8f962884 59 my $a = (my $evt = $_[0])->{attrs};
456a815d 60 my $e = exists $a->{$name};
61 +{ %$evt, raw => undef, raw_attrs => undef,
62 attrs => {
63 %$a,
64 $name => join(' ', ($e ? $a->{$name} : ()), $value)
65 },
66 ($e # add to name list if not present
67 ? ()
68 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
69 }
70 };
71}
72
73sub remove_attribute {
74 my ($self, $args) = @_;
1c4455ae 75 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
456a815d 76 sub {
8f962884 77 my $a = (my $evt = $_[0])->{attrs};
456a815d 78 return $evt unless exists $a->{$name};
79 $a = { %$a }; delete $a->{$name};
80 +{ %$evt, raw => undef, raw_attrs => undef,
81 attrs => $a,
82 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
83 }
84 };
85}
86
76cecb10 87sub collect {
88 my ($self, $options) = @_;
1c4455ae 89 my ($into, $passthrough, $content, $filter, $flush_before) =
90 @{$options}{qw(into passthrough content filter flush_before)};
76cecb10 91 sub {
92 my ($evt, $stream) = @_;
b4d044eb 93 # We wipe the contents of @$into here so that other actions depending
94 # on this (such as a repeater) can be invoked multiple times easily.
95 # I -suspect- it's better for that state reset to be managed here; if it
96 # ever becomes painful the decision should be revisited
97 if ($into) {
865bb5d2 98 @$into = $content ? () : ($evt);
b4d044eb 99 }
76cecb10 100 if ($evt->{is_in_place_close}) {
865bb5d2 101 return $evt if $passthrough || $content;
76cecb10 102 return;
103 }
104 my $name = $evt->{name};
105 my $depth = 1;
865bb5d2 106 my $_next = $content ? 'peek' : 'next';
d80786d0 107 $stream = do { local $_ = $stream; $filter->($stream) } if $filter;
76cecb10 108 my $collector = $self->_stream_from_code(sub {
109 return unless $stream;
110 while (my ($evt) = $stream->$_next) {
111 $depth++ if ($evt->{type} eq 'OPEN');
112 $depth-- if ($evt->{type} eq 'CLOSE');
113 unless ($depth) {
114 undef $stream;
865bb5d2 115 return if $content;
76cecb10 116 push(@$into, $evt) if $into;
117 return $evt if $passthrough;
118 return;
119 }
120 push(@$into, $evt) if $into;
865bb5d2 121 $stream->next if $content;
76cecb10 122 return $evt if $passthrough;
123 }
124 die "Never saw closing </${name}> before end of source";
125 });
1c4455ae 126 if ($flush_before) {
127 if ($passthrough||$content) {
128 $evt = { %$evt, flush => 1 };
129 } else {
130 $evt = { type => 'EMPTY', flush => 1 };
131 }
132 }
133 return ($passthrough||$content||$flush_before)
134 ? [ $evt, $collector ]
135 : $collector;
76cecb10 136 };
137}
138
865bb5d2 139sub collect_content {
140 my ($self, $options) = @_;
141 $self->collect({ %{$options||{}}, content => 1 })
142}
143
456a815d 144sub add_before {
145 my ($self, $events) = @_;
8f962884 146 sub { return $self->_stream_from_array(@$events, $_[0]) };
456a815d 147}
148
149sub add_after {
150 my ($self, $events) = @_;
b616863d 151 my $coll_proto = $self->collect({ passthrough => 1 });
456a815d 152 sub {
8f962884 153 my ($evt) = @_;
456a815d 154 my $emit = $self->_stream_from_array(@$events);
b616863d 155 my $coll = &$coll_proto;
995bc8be 156 return ref($coll) eq 'HASH' # single event, no collect
157 ? [ $coll, $emit ]
158 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
456a815d 159 };
8f962884 160}
456a815d 161
865bb5d2 162sub prepend_content {
456a815d 163 my ($self, $events) = @_;
164 sub {
8f962884 165 my ($evt) = @_;
456a815d 166 if ($evt->{is_in_place_close}) {
167 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
168 return [ $evt, $self->_stream_from_array(
169 @$events, { type => 'CLOSE', name => $evt->{name} }
170 ) ];
171 }
172 return $self->_stream_from_array($evt, @$events);
173 };
174}
175
865bb5d2 176sub append_content {
8f962884 177 my ($self, $events) = @_;
865bb5d2 178 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
8f962884 179 sub {
180 my ($evt) = @_;
181 if ($evt->{is_in_place_close}) {
182 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
183 return [ $evt, $self->_stream_from_array(
184 @$events, { type => 'CLOSE', name => $evt->{name} }
185 ) ];
186 }
b616863d 187 my $coll = &$coll_proto;
8f962884 188 my $emit = $self->_stream_from_array(@$events);
189 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
190 };
191}
192
456a815d 193sub replace {
3cdbc13f 194 my ($self, $replace_with, $options) = @_;
b616863d 195 my $coll_proto = $self->collect($options);
456a815d 196 sub {
197 my ($evt, $stream) = @_;
3cdbc13f 198 my $emit = $self->_stream_from_proto($replace_with);
b616863d 199 my $coll = &$coll_proto;
a88c1c57 200 # if we're replacing the contents of an in place close
201 # then we need to handle that here
202 if ($options->{content}
203 && ref($coll) eq 'HASH'
ec687101 204 && $coll->{is_in_place_close}
a88c1c57 205 ) {
a88c1c57 206 my $close = $stream->next;
ec687101 207 # shallow copy and nuke in place and raw (to force smart print)
208 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
a88c1c57 209 $emit = $self->_stream_concat(
210 $emit,
211 $self->_stream_from_array($close),
212 );
213 }
451b3b30 214 # For a straightforward replace operation we can, in fact, do the emit
215 # -before- the collect, and my first cut did so. However in order to
216 # use the captured content in generating the new content, we need
217 # the collect stage to happen first - and it seems highly unlikely
218 # that in normal operation the collect phase will take long enough
219 # for the difference to be noticeable
11cc25dd 220 return
221 ($coll
a88c1c57 222 ? (ref $coll eq 'ARRAY' # [ event, stream ]
451b3b30 223 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
a88c1c57 224 : (ref $coll eq 'HASH' # event or stream?
225 ? [ $coll, $emit ]
226 : $self->_stream_concat($coll, $emit))
11cc25dd 227 )
228 : $emit
229 );
456a815d 230 };
231}
232
865bb5d2 233sub replace_content {
234 my ($self, $replace_with, $options) = @_;
235 $self->replace($replace_with, { %{$options||{}}, content => 1 })
236}
237
3cdbc13f 238sub repeat {
239 my ($self, $repeat_for, $options) = @_;
240 $options->{into} = \my @into;
f8ed299b 241 my @between;
242 my $repeat_between = delete $options->{repeat_between};
243 if ($repeat_between) {
f8ed299b 244 $options->{filter} = sub {
d80786d0 245 $_->select($repeat_between)->collect({ into => \@between })
f8ed299b 246 };
247 }
3cdbc13f 248 my $repeater = sub {
f8ed299b 249 my $s = $self->_stream_from_proto($repeat_for);
250 # We have to test $repeat_between not @between here because
251 # at the point we're constructing our return stream @between
252 # hasn't been populated yet - but we can test @between in the
253 # map routine because it has been by then and that saves us doing
254 # the extra stream construction if we don't need it.
6d0f20a6 255 $self->_flatten_stream_of_streams(do {
256 if ($repeat_between) {
257 $s->map(sub {
258 local $_ = $self->_stream_from_array(@into);
259 (@between && $s->peek)
260 ? $self->_stream_concat(
261 $_[0]->($_), $self->_stream_from_array(@between)
262 )
263 : $_[0]->($_)
264 })
265 } else {
266 $s->map(sub {
267 local $_ = $self->_stream_from_array(@into);
268 $_[0]->($_)
f8ed299b 269 })
6d0f20a6 270 }
271 })
3cdbc13f 272 };
273 $self->replace($repeater, $options);
274}
275
865bb5d2 276sub repeat_content {
277 my ($self, $repeat_for, $options) = @_;
278 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
279}
280
456a815d 2811;
556c8616 282
283=head1 NAME
284
285HTML::Zoom::FilterBuilder - Add Filters to a Stream
286
244252e7 287=head1 SYNOPSIS
288
a42917f6 289Create an L<HTML::Zoom> instance:
290
0d8f057e 291 use HTML::Zoom;
292 my $root = HTML::Zoom
293 ->from_html(<<MAIN);
294 <html>
295 <head>
296 <title>Default Title</title>
297 </head>
a42917f6 298 <body bad_attr='junk'>
0d8f057e 299 Default Content
300 </body>
301 </html>
302 MAIN
303
a42917f6 304Create a new attribute on the C<body> tag:
305
306 $root = $root
307 ->select('body')
308 ->set_attribute(class=>'main');
309
310Add a extra value to an existing attribute:
311
312 $root = $root
313 ->select('body')
314 ->add_to_attribute(class=>'one-column');
315
316Set the content of the C<title> tag:
317
318 $root = $root
319 ->select('title')
320 ->replace_content('Hello World');
321
322Set content from another L<HTML::Zoom> instance:
323
0d8f057e 324 my $body = HTML::Zoom
325 ->from_html(<<BODY);
326 <div id="stuff">
2daa653a 327 <p>Well Now</p>
f8ad684d 328 <p id="p2">Is the Time</p>
0d8f057e 329 </div>
330 BODY
331
a42917f6 332 $root = $root
f8ad684d 333 ->select('body')
a42917f6 334 ->replace_content($body);
335
336Set an attribute on multiple matches:
337
338 $root = $root
f8ad684d 339 ->select('p')
a42917f6 340 ->set_attribute(class=>'para');
341
342Remove an attribute:
343
344 $root = $root
345 ->select('body')
346 ->remove_attribute('bad_attr');
0d8f057e 347
348will produce:
349
350=begin testinfo
351
a42917f6 352 my $output = $root->to_html;
0d8f057e 353 my $expect = <<HTML;
354
355=end testinfo
356
357 <html>
358 <head>
359 <title>Hello World</title>
360 </head>
434a11c8 361 <body class="main one-column"><div id="stuff">
adb30a8a 362 <p class="para">Well Now</p>
a42917f6 363 <p id="p2" class="para">Is the Time</p>
0d8f057e 364 </div>
365 </body>
366 </html>
367
368=begin testinfo
369
370 HTML
371 is($output, $expect, 'Synopsis code works ok');
372
373=end testinfo
244252e7 374
556c8616 375=head1 DESCRIPTION
376
377Given a L<HTML::Zoom> stream, provide methods to apply filters which
378alter the content of that stream.
379
f6644c71 380=head1 METHODS
381
382This class defines the following public API
383
e225a4bd 384=head2 set_attribute
f6644c71 385
f8ad684d 386Sets an attribute of a given name to a given value for all matching selections.
387
388 $html_zoom
389 ->select('p')
390 ->set_attribute(class=>'paragraph')
391 ->select('div')
434a11c8 392 ->set_attribute(name=>'class', value=>'divider');
393
f8ad684d 394
395Overrides existing values, if such exist. When multiple L</set_attribute>
396calls are made against the same or overlapping selection sets, the final
397call wins.
f6644c71 398
e225a4bd 399=head2 add_to_attribute
f6644c71 400
434a11c8 401Adds a value to an existing attribute, or creates one if the attribute does not
402yet exist.
f6644c71 403
434a11c8 404 $html_zoom
405 ->select('p')
406 ->set_attribute(class=>'paragraph')
407 ->then
408 ->add_to_attribute(name=>'class', value=>'divider');
f6644c71 409
434a11c8 410Attributes with more than one value will have a dividing space.
411
e225a4bd 412=head2 remove_attribute
434a11c8 413
414Removes an attribute and all its values.
415
416 $html_zoom
417 ->select('p')
418 ->set_attribute(class=>'paragraph')
419 ->then
420 ->remove_attribute('class');
421
422Removes attributes from the original stream or events already added.
f6644c71 423
424=head2 collect
425
ac3acd87 426Collects and extracts results of L<HTML::Zoom/select>. It takes the following
427optional common options as hash reference.
428
429=over
430
431=item into [ARRAY REFERENCE]
432
433Where to save collected events (selected elements).
434
435 $z1->select('#main-content')
436 ->collect({ into => \@body })
437 ->run;
438 $z2->select('#main-content')
439 ->replace(\@body)
440 ->memoize;
441
442=item filter [CODE]
443
444Run filter on collected elements (locally setting $_ to stream, and passing
445stream as an argument to given code reference). Filtered stream would be
446returned.
447
448 $z->select('.outer')
449 ->collect({
450 filter => sub { $_->select('.inner')->replace_content('bar!') },
451 passthrough => 1,
452 })
453
454It can be used to further filter selection. For example
455
456 $z->select('tr')
457 ->collect({
458 filter => sub { $_->select('td') },
459 passthrough => 1,
460 })
461
462is equivalent to (not implemented yet) descendant selector combination, i.e.
463
464 $z->select('tr td')
465
466=item passthrough [BOOLEAN]
467
468Extract copy of elements; the stream is unchanged (it does not remove collected
469elements). For example without 'passthrough'
470
471 HTML::Zoom->from_html('<foo><bar /></foo>')
472 ->select('foo')
473 ->collect({ content => 1 })
474 ->to_html
475
476returns '<foo></foo>', while with C<passthrough> option
477
478 HTML::Zoom->from_html('<foo><bar /></foo>')
479 ->select('foo')
480 ->collect({ content => 1, passthough => 1 })
481 ->to_html
482
483returns '<foo><bar /></foo>'.
484
485=item content [BOOLEAN]
486
487Collect content of the element, and not the element itself.
488
489For example
490
491 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
492 ->select('h1')
493 ->collect
494 ->to_html
495
496would return '<p>foo</p>', while
497
498 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
499 ->select('h1')
500 ->collect({ content => 1 })
501 ->to_html
502
503would return '<h1></h1><p>foo</p>'.
504
505See also L</collect_content>.
506
507=item flush_before [BOOLEAN]
508
509Generate C<flush> event before collecting, to ensure that the HTML generated up
510to selected element being collected is flushed throught to the browser. Usually
511used in L</repeat> or L</repeat_content>.
512
513=back
f6644c71 514
515=head2 collect_content
516
ac3acd87 517Collects contents of L<HTML::Zoom/select> result.
518
519 HTML::Zoom->from_file($foo)
520 ->select('#main-content')
521 ->collect_content({ into => \@foo_body })
522 ->run;
523 $z->select('#foo')
524 ->replace_content(\@foo_body)
525 ->memoize;
526
527Equivalent to running L</collect> with C<content> option set.
f6644c71 528
529=head2 add_before
530
ac3acd87 531Given a L<HTML::Zoom/select> result, add given content (which might be string,
532array or another L<HTML::Zoom> object) before it.
533
534 $html_zoom
535 ->select('input[name="foo"]')
536 ->add_before(\ '<span class="warning">required field</span>');
f6644c71 537
538=head2 add_after
539
ac3acd87 540Like L</add_before>, only after L<HTML::Zoom/select> result.
541
542 $html_zoom
543 ->select('p')
544 ->add_after("\n\n");
545
546You can add zoom events directly
547
548 $html_zoom
549 ->select('p')
550 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
f6644c71 551
552=head2 prepend_content
553
554 TBD
555
556=head2 append_content
557
558 TBD
559
560=head2 replace
561
ac3acd87 562Given a L<HTML::Zoom/select> result, replace it with a string, array or another
563L<HTML::Zoom> object. It takes the same optional common options as L</collect>
564(via hash reference).
f6644c71 565
566=head2 replace_content
567
244252e7 568Given a L<HTML::Zoom/select> result, replace the content with a string, array
569or another L<HTML::Zoom> object.
f6644c71 570
ac3acd87 571 $html_zoom
572 ->select('title, #greeting')
573 ->replace_content('Hello world!');
574
f6644c71 575=head2 repeat
576
ac3acd87 577 $zoom->select('.item')->repeat(sub {
578 if (my $row = $db_thing->next) {
579 return sub { $_->select('.item-name')->replace_content($row->name) }
580 } else {
581 return
582 }
583 }, { flush_before => 1 });
584
585Run I<$repeat_for>, which should be iterator (code reference) returning
586subroutines, reference to array of subroutines, or other zoom-able object
587consisting of transformations. Those subroutines would be run with $_
588local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
589said result passed as parameter to subroutine.
590
591You might want to use iterator when you don't have all elements upfront
592
593 $zoom = $zoom->select('.contents')->repeat(sub {
594 while (my $line = $fh->getline) {
595 return sub {
596 $_->select('.lno')->replace_content($fh->input_line_number)
597 ->select('.line')->replace_content($line)
598 }
599 }
600 return
601 });
602
603You might want to use array reference if it doesn't matter that all iterations
604are pre-generated
605
606 $zoom->select('table')->repeat([
607 map {
608 my $elem = $_;
609 sub {
610 $_->select('td')->replace_content($e);
611 }
612 } @list
613 ]);
614
615In addition to common options as in L</collect>, it also supports
616
617=over
618
619=item repeat_between [SELECTOR]
620
621Selects object to be repeated between items. In the case of array this object
622is put between elements, in case of iterator it is put between results of
623subsequent iterations, in the case of streamable it is put between events
624(->to_stream->next).
625
626See documentation for L</repeat_content>
627
628=back
f6644c71 629
630=head2 repeat_content
631
ac3acd87 632Given a L<HTML::Zoom/select> result, run provided iterator passing content of
633this result to this iterator. Accepts the same options as L</repeat>.
634
635Equivalent to using C<contents> option with L</repeat>.
636
637 $html_zoom
638 ->select('#list')
639 ->repeat_content(
640 [
641 sub {
642 $_->select('.name')->replace_content('Matt')
643 ->select('.age')->replace_content('26')
644 },
645 sub {
646 $_->select('.name')->replace_content('Mark')
647 ->select('.age')->replace_content('0x29')
648 },
649 sub {
650 $_->select('.name')->replace_content('Epitaph')
651 ->select('.age')->replace_content('<redacted>')
652 },
653 ],
654 { repeat_between => '.between' }
655 );
656
f6644c71 657
556c8616 658=head1 ALSO SEE
659
660L<HTML::Zoom>
661
662=head1 AUTHORS
663
664See L<HTML::Zoom> for authors.
665
666=head1 LICENSE
667
668See L<HTML::Zoom> for the license.
669
670=cut
671