fixed add/before pre/app pend content stuff, added docs and test, also docs and test...
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / FilterBuilder.pm
CommitLineData
456a815d 1package HTML::Zoom::FilterBuilder;
2
1cf03540 3use strictures 1;
d80786d0 4use base qw(HTML::Zoom::SubObject);
456a815d 5use HTML::Zoom::CodeStream;
6
456a815d 7sub _stream_from_code {
d80786d0 8 shift->_zconfig->stream_utils->stream_from_code(@_)
456a815d 9}
10
11sub _stream_from_array {
d80786d0 12 shift->_zconfig->stream_utils->stream_from_array(@_)
456a815d 13}
14
3cdbc13f 15sub _stream_from_proto {
d80786d0 16 shift->_zconfig->stream_utils->stream_from_proto(@_)
3cdbc13f 17}
18
456a815d 19sub _stream_concat {
d80786d0 20 shift->_zconfig->stream_utils->stream_concat(@_)
456a815d 21}
22
6d0f20a6 23sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
25}
26
456a815d 27sub set_attribute {
1c4455ae 28 my $self = shift;
29 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 30 sub {
8f962884 31 my $a = (my $evt = $_[0])->{attrs};
456a815d 32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
36 ? ()
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
38 }
39 };
40}
41
1c4455ae 42sub _parse_attribute_args {
43 my $self = shift;
2daa653a 44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
1c4455ae 46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
48}
49
456a815d 50sub add_attribute {
2daa653a 51 die "renamed to add_to_attribute. killing this entirely for 1.0";
52}
53
54sub add_to_attribute {
1c4455ae 55 my $self = shift;
56 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 57 sub {
8f962884 58 my $a = (my $evt = $_[0])->{attrs};
456a815d 59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
61 attrs => {
62 %$a,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
64 },
65 ($e # add to name list if not present
66 ? ()
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
68 }
69 };
70}
71
72sub remove_attribute {
73 my ($self, $args) = @_;
1c4455ae 74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
456a815d 75 sub {
8f962884 76 my $a = (my $evt = $_[0])->{attrs};
456a815d 77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
80 attrs => $a,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
82 }
83 };
84}
85
76cecb10 86sub collect {
87 my ($self, $options) = @_;
1c4455ae 88 my ($into, $passthrough, $content, $filter, $flush_before) =
89 @{$options}{qw(into passthrough content filter flush_before)};
76cecb10 90 sub {
91 my ($evt, $stream) = @_;
b4d044eb 92 # We wipe the contents of @$into here so that other actions depending
93 # on this (such as a repeater) can be invoked multiple times easily.
94 # I -suspect- it's better for that state reset to be managed here; if it
95 # ever becomes painful the decision should be revisited
96 if ($into) {
865bb5d2 97 @$into = $content ? () : ($evt);
b4d044eb 98 }
76cecb10 99 if ($evt->{is_in_place_close}) {
865bb5d2 100 return $evt if $passthrough || $content;
76cecb10 101 return;
102 }
103 my $name = $evt->{name};
104 my $depth = 1;
865bb5d2 105 my $_next = $content ? 'peek' : 'next';
2abde91e 106 if ($filter) {
107 if ($content) {
108 $stream = do { local $_ = $stream; $filter->($stream) };
109 } else {
110 $stream = do {
111 local $_ = $self->_stream_concat(
112 $self->_stream_from_array($evt),
113 $stream,
114 );
115 $filter->($_);
116 };
117 $evt = $stream->next;
118 }
119 }
76cecb10 120 my $collector = $self->_stream_from_code(sub {
121 return unless $stream;
122 while (my ($evt) = $stream->$_next) {
123 $depth++ if ($evt->{type} eq 'OPEN');
124 $depth-- if ($evt->{type} eq 'CLOSE');
125 unless ($depth) {
126 undef $stream;
865bb5d2 127 return if $content;
76cecb10 128 push(@$into, $evt) if $into;
129 return $evt if $passthrough;
130 return;
131 }
132 push(@$into, $evt) if $into;
865bb5d2 133 $stream->next if $content;
76cecb10 134 return $evt if $passthrough;
135 }
136 die "Never saw closing </${name}> before end of source";
137 });
1c4455ae 138 if ($flush_before) {
139 if ($passthrough||$content) {
140 $evt = { %$evt, flush => 1 };
141 } else {
142 $evt = { type => 'EMPTY', flush => 1 };
143 }
144 }
145 return ($passthrough||$content||$flush_before)
146 ? [ $evt, $collector ]
147 : $collector;
76cecb10 148 };
149}
150
865bb5d2 151sub collect_content {
152 my ($self, $options) = @_;
153 $self->collect({ %{$options||{}}, content => 1 })
154}
155
456a815d 156sub add_before {
157 my ($self, $events) = @_;
b9c27a5a 158 my $coll_proto = $self->collect({ passthrough => 1 });
159 sub {
160 my $emit = $self->_stream_from_proto($events);
161 my $coll = &$coll_proto;
162 if($coll) {
163 if(ref $coll eq 'ARRAY') {
164 my $firstbit = $self->_stream_from_proto([$coll->[0]]);
165 return $self->_stream_concat($emit, $firstbit, $coll->[1]);
166 } elsif(ref $coll eq 'HASH') {
167 return [$emit, $coll];
168 } else {
169 return $self->_stream_concat($emit, $coll);
170 }
171 } else { return $emit }
172 }
456a815d 173}
174
175sub add_after {
176 my ($self, $events) = @_;
b616863d 177 my $coll_proto = $self->collect({ passthrough => 1 });
456a815d 178 sub {
8f962884 179 my ($evt) = @_;
b9c27a5a 180 my $emit = $self->_stream_from_proto($events);
b616863d 181 my $coll = &$coll_proto;
995bc8be 182 return ref($coll) eq 'HASH' # single event, no collect
183 ? [ $coll, $emit ]
184 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
456a815d 185 };
8f962884 186}
456a815d 187
b9c27a5a 188
189 #$emit = $self->_stream_concat(
190 # $emit,
191 # $self->_stream_from_array($close),
192 # );
193
865bb5d2 194sub prepend_content {
456a815d 195 my ($self, $events) = @_;
b9c27a5a 196 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
456a815d 197 sub {
8f962884 198 my ($evt) = @_;
b9c27a5a 199 my $emit = $self->_stream_from_proto($events);
456a815d 200 if ($evt->{is_in_place_close}) {
201 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
202 return [ $evt, $self->_stream_from_array(
b9c27a5a 203 $emit->next, { type => 'CLOSE', name => $evt->{name} }
456a815d 204 ) ];
205 }
b9c27a5a 206 my $coll = &$coll_proto;
207 return [ $coll->[0], $self->_stream_concat($emit, $coll->[1]) ];
456a815d 208 };
209}
210
865bb5d2 211sub append_content {
8f962884 212 my ($self, $events) = @_;
865bb5d2 213 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
8f962884 214 sub {
215 my ($evt) = @_;
b9c27a5a 216 my $emit = $self->_stream_from_proto($events);
8f962884 217 if ($evt->{is_in_place_close}) {
218 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
219 return [ $evt, $self->_stream_from_array(
b9c27a5a 220 $emit->next, { type => 'CLOSE', name => $evt->{name} }
8f962884 221 ) ];
222 }
b616863d 223 my $coll = &$coll_proto;
8f962884 224 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
225 };
226}
227
456a815d 228sub replace {
3cdbc13f 229 my ($self, $replace_with, $options) = @_;
b616863d 230 my $coll_proto = $self->collect($options);
456a815d 231 sub {
232 my ($evt, $stream) = @_;
3cdbc13f 233 my $emit = $self->_stream_from_proto($replace_with);
b616863d 234 my $coll = &$coll_proto;
a88c1c57 235 # if we're replacing the contents of an in place close
236 # then we need to handle that here
237 if ($options->{content}
238 && ref($coll) eq 'HASH'
ec687101 239 && $coll->{is_in_place_close}
a88c1c57 240 ) {
a88c1c57 241 my $close = $stream->next;
ec687101 242 # shallow copy and nuke in place and raw (to force smart print)
243 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
a88c1c57 244 $emit = $self->_stream_concat(
245 $emit,
246 $self->_stream_from_array($close),
247 );
248 }
451b3b30 249 # For a straightforward replace operation we can, in fact, do the emit
250 # -before- the collect, and my first cut did so. However in order to
251 # use the captured content in generating the new content, we need
252 # the collect stage to happen first - and it seems highly unlikely
253 # that in normal operation the collect phase will take long enough
254 # for the difference to be noticeable
11cc25dd 255 return
256 ($coll
a88c1c57 257 ? (ref $coll eq 'ARRAY' # [ event, stream ]
451b3b30 258 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
a88c1c57 259 : (ref $coll eq 'HASH' # event or stream?
260 ? [ $coll, $emit ]
261 : $self->_stream_concat($coll, $emit))
11cc25dd 262 )
263 : $emit
264 );
456a815d 265 };
266}
267
865bb5d2 268sub replace_content {
269 my ($self, $replace_with, $options) = @_;
270 $self->replace($replace_with, { %{$options||{}}, content => 1 })
271}
272
3cdbc13f 273sub repeat {
274 my ($self, $repeat_for, $options) = @_;
275 $options->{into} = \my @into;
f8ed299b 276 my @between;
277 my $repeat_between = delete $options->{repeat_between};
278 if ($repeat_between) {
f8ed299b 279 $options->{filter} = sub {
d80786d0 280 $_->select($repeat_between)->collect({ into => \@between })
f8ed299b 281 };
282 }
3cdbc13f 283 my $repeater = sub {
f8ed299b 284 my $s = $self->_stream_from_proto($repeat_for);
285 # We have to test $repeat_between not @between here because
286 # at the point we're constructing our return stream @between
287 # hasn't been populated yet - but we can test @between in the
288 # map routine because it has been by then and that saves us doing
289 # the extra stream construction if we don't need it.
6d0f20a6 290 $self->_flatten_stream_of_streams(do {
291 if ($repeat_between) {
292 $s->map(sub {
293 local $_ = $self->_stream_from_array(@into);
294 (@between && $s->peek)
295 ? $self->_stream_concat(
296 $_[0]->($_), $self->_stream_from_array(@between)
297 )
298 : $_[0]->($_)
299 })
300 } else {
301 $s->map(sub {
302 local $_ = $self->_stream_from_array(@into);
303 $_[0]->($_)
f8ed299b 304 })
6d0f20a6 305 }
306 })
3cdbc13f 307 };
308 $self->replace($repeater, $options);
309}
310
865bb5d2 311sub repeat_content {
312 my ($self, $repeat_for, $options) = @_;
313 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
314}
315
456a815d 3161;
556c8616 317
318=head1 NAME
319
320HTML::Zoom::FilterBuilder - Add Filters to a Stream
321
244252e7 322=head1 SYNOPSIS
323
a42917f6 324Create an L<HTML::Zoom> instance:
325
0d8f057e 326 use HTML::Zoom;
327 my $root = HTML::Zoom
328 ->from_html(<<MAIN);
329 <html>
330 <head>
331 <title>Default Title</title>
332 </head>
a42917f6 333 <body bad_attr='junk'>
0d8f057e 334 Default Content
335 </body>
336 </html>
337 MAIN
338
a42917f6 339Create a new attribute on the C<body> tag:
340
341 $root = $root
342 ->select('body')
343 ->set_attribute(class=>'main');
344
345Add a extra value to an existing attribute:
346
347 $root = $root
348 ->select('body')
349 ->add_to_attribute(class=>'one-column');
350
351Set the content of the C<title> tag:
352
353 $root = $root
354 ->select('title')
355 ->replace_content('Hello World');
356
357Set content from another L<HTML::Zoom> instance:
358
0d8f057e 359 my $body = HTML::Zoom
360 ->from_html(<<BODY);
361 <div id="stuff">
2daa653a 362 <p>Well Now</p>
f8ad684d 363 <p id="p2">Is the Time</p>
0d8f057e 364 </div>
365 BODY
366
a42917f6 367 $root = $root
f8ad684d 368 ->select('body')
a42917f6 369 ->replace_content($body);
370
371Set an attribute on multiple matches:
372
373 $root = $root
f8ad684d 374 ->select('p')
a42917f6 375 ->set_attribute(class=>'para');
376
377Remove an attribute:
378
379 $root = $root
380 ->select('body')
381 ->remove_attribute('bad_attr');
0d8f057e 382
383will produce:
384
385=begin testinfo
386
a42917f6 387 my $output = $root->to_html;
0d8f057e 388 my $expect = <<HTML;
389
390=end testinfo
391
392 <html>
393 <head>
394 <title>Hello World</title>
395 </head>
434a11c8 396 <body class="main one-column"><div id="stuff">
adb30a8a 397 <p class="para">Well Now</p>
a42917f6 398 <p id="p2" class="para">Is the Time</p>
0d8f057e 399 </div>
400 </body>
401 </html>
402
403=begin testinfo
404
405 HTML
406 is($output, $expect, 'Synopsis code works ok');
407
408=end testinfo
244252e7 409
556c8616 410=head1 DESCRIPTION
411
412Given a L<HTML::Zoom> stream, provide methods to apply filters which
413alter the content of that stream.
414
f6644c71 415=head1 METHODS
416
417This class defines the following public API
418
e225a4bd 419=head2 set_attribute
f6644c71 420
f8ad684d 421Sets an attribute of a given name to a given value for all matching selections.
422
423 $html_zoom
424 ->select('p')
425 ->set_attribute(class=>'paragraph')
426 ->select('div')
b9c27a5a 427 ->set_attribute({name=>'class', value=>'divider'});
434a11c8 428
f8ad684d 429
430Overrides existing values, if such exist. When multiple L</set_attribute>
431calls are made against the same or overlapping selection sets, the final
432call wins.
f6644c71 433
e225a4bd 434=head2 add_to_attribute
f6644c71 435
434a11c8 436Adds a value to an existing attribute, or creates one if the attribute does not
b9c27a5a 437yet exist. You may call this method with either an Array or HashRef of Args.
438
439Here's the 'long form' HashRef:
f6644c71 440
434a11c8 441 $html_zoom
442 ->select('p')
443 ->set_attribute(class=>'paragraph')
444 ->then
b9c27a5a 445 ->add_to_attribute({name=>'class', value=>'divider'});
446
447And the exact same effect using the 'short form' Array:
448
449 $html_zoom
450 ->select('p')
451 ->set_attribute(class=>'paragraph')
452 ->then
453 ->add_to_attribute(class=>'divider');
f6644c71 454
434a11c8 455Attributes with more than one value will have a dividing space.
456
e225a4bd 457=head2 remove_attribute
434a11c8 458
459Removes an attribute and all its values.
460
461 $html_zoom
462 ->select('p')
463 ->set_attribute(class=>'paragraph')
464 ->then
465 ->remove_attribute('class');
466
467Removes attributes from the original stream or events already added.
f6644c71 468
469=head2 collect
470
ac3acd87 471Collects and extracts results of L<HTML::Zoom/select>. It takes the following
472optional common options as hash reference.
473
474=over
475
476=item into [ARRAY REFERENCE]
477
478Where to save collected events (selected elements).
479
480 $z1->select('#main-content')
481 ->collect({ into => \@body })
482 ->run;
483 $z2->select('#main-content')
484 ->replace(\@body)
485 ->memoize;
486
487=item filter [CODE]
488
489Run filter on collected elements (locally setting $_ to stream, and passing
490stream as an argument to given code reference). Filtered stream would be
491returned.
492
493 $z->select('.outer')
494 ->collect({
495 filter => sub { $_->select('.inner')->replace_content('bar!') },
496 passthrough => 1,
497 })
498
499It can be used to further filter selection. For example
500
501 $z->select('tr')
502 ->collect({
503 filter => sub { $_->select('td') },
504 passthrough => 1,
505 })
506
507is equivalent to (not implemented yet) descendant selector combination, i.e.
508
509 $z->select('tr td')
510
511=item passthrough [BOOLEAN]
512
513Extract copy of elements; the stream is unchanged (it does not remove collected
514elements). For example without 'passthrough'
515
516 HTML::Zoom->from_html('<foo><bar /></foo>')
517 ->select('foo')
518 ->collect({ content => 1 })
519 ->to_html
520
521returns '<foo></foo>', while with C<passthrough> option
522
523 HTML::Zoom->from_html('<foo><bar /></foo>')
524 ->select('foo')
525 ->collect({ content => 1, passthough => 1 })
526 ->to_html
527
528returns '<foo><bar /></foo>'.
529
530=item content [BOOLEAN]
531
532Collect content of the element, and not the element itself.
533
534For example
535
536 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
537 ->select('h1')
538 ->collect
539 ->to_html
540
541would return '<p>foo</p>', while
542
543 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
544 ->select('h1')
545 ->collect({ content => 1 })
546 ->to_html
547
548would return '<h1></h1><p>foo</p>'.
549
550See also L</collect_content>.
551
552=item flush_before [BOOLEAN]
553
554Generate C<flush> event before collecting, to ensure that the HTML generated up
555to selected element being collected is flushed throught to the browser. Usually
556used in L</repeat> or L</repeat_content>.
557
558=back
f6644c71 559
560=head2 collect_content
561
ac3acd87 562Collects contents of L<HTML::Zoom/select> result.
563
564 HTML::Zoom->from_file($foo)
565 ->select('#main-content')
566 ->collect_content({ into => \@foo_body })
567 ->run;
568 $z->select('#foo')
569 ->replace_content(\@foo_body)
570 ->memoize;
571
572Equivalent to running L</collect> with C<content> option set.
f6644c71 573
574=head2 add_before
575
ac3acd87 576Given a L<HTML::Zoom/select> result, add given content (which might be string,
577array or another L<HTML::Zoom> object) before it.
578
579 $html_zoom
580 ->select('input[name="foo"]')
581 ->add_before(\ '<span class="warning">required field</span>');
f6644c71 582
583=head2 add_after
584
ac3acd87 585Like L</add_before>, only after L<HTML::Zoom/select> result.
586
587 $html_zoom
588 ->select('p')
589 ->add_after("\n\n");
590
591You can add zoom events directly
592
593 $html_zoom
594 ->select('p')
595 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
f6644c71 596
597=head2 prepend_content
598
b9c27a5a 599Similar to add_before, but adds the content to the match.
600
601 HTML::Zoom
602 ->from_html(q[<p>World</p>])
603 ->select('p')
604 ->prepend_content("Hello ")
605 ->to_html
606
607 ## <p>Hello World</p>
608
609Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
f6644c71 610
611=head2 append_content
612
b9c27a5a 613Similar to add_after, but adds the content to the match.
614
615 HTML::Zoom
616 ->from_html(q[<p>Hello </p>])
617 ->select('p')
618 ->prepend_content("World")
619 ->to_html
620
621 ## <p>Hello World</p>
622
623Acceptable values are strings, scalar refs and L<HTML::Zoom> objects
f6644c71 624
625=head2 replace
626
ac3acd87 627Given a L<HTML::Zoom/select> result, replace it with a string, array or another
628L<HTML::Zoom> object. It takes the same optional common options as L</collect>
629(via hash reference).
f6644c71 630
631=head2 replace_content
632
244252e7 633Given a L<HTML::Zoom/select> result, replace the content with a string, array
634or another L<HTML::Zoom> object.
f6644c71 635
ac3acd87 636 $html_zoom
637 ->select('title, #greeting')
638 ->replace_content('Hello world!');
639
f6644c71 640=head2 repeat
641
ac3acd87 642 $zoom->select('.item')->repeat(sub {
643 if (my $row = $db_thing->next) {
644 return sub { $_->select('.item-name')->replace_content($row->name) }
645 } else {
646 return
647 }
648 }, { flush_before => 1 });
649
650Run I<$repeat_for>, which should be iterator (code reference) returning
651subroutines, reference to array of subroutines, or other zoom-able object
652consisting of transformations. Those subroutines would be run with $_
653local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
654said result passed as parameter to subroutine.
655
656You might want to use iterator when you don't have all elements upfront
657
658 $zoom = $zoom->select('.contents')->repeat(sub {
659 while (my $line = $fh->getline) {
660 return sub {
661 $_->select('.lno')->replace_content($fh->input_line_number)
662 ->select('.line')->replace_content($line)
663 }
664 }
665 return
666 });
667
668You might want to use array reference if it doesn't matter that all iterations
669are pre-generated
670
671 $zoom->select('table')->repeat([
672 map {
673 my $elem = $_;
674 sub {
675 $_->select('td')->replace_content($e);
676 }
677 } @list
678 ]);
679
680In addition to common options as in L</collect>, it also supports
681
682=over
683
684=item repeat_between [SELECTOR]
685
686Selects object to be repeated between items. In the case of array this object
687is put between elements, in case of iterator it is put between results of
688subsequent iterations, in the case of streamable it is put between events
689(->to_stream->next).
690
691See documentation for L</repeat_content>
692
693=back
f6644c71 694
695=head2 repeat_content
696
ac3acd87 697Given a L<HTML::Zoom/select> result, run provided iterator passing content of
698this result to this iterator. Accepts the same options as L</repeat>.
699
700Equivalent to using C<contents> option with L</repeat>.
701
702 $html_zoom
703 ->select('#list')
704 ->repeat_content(
705 [
706 sub {
707 $_->select('.name')->replace_content('Matt')
708 ->select('.age')->replace_content('26')
709 },
710 sub {
711 $_->select('.name')->replace_content('Mark')
712 ->select('.age')->replace_content('0x29')
713 },
714 sub {
715 $_->select('.name')->replace_content('Epitaph')
716 ->select('.age')->replace_content('<redacted>')
717 },
718 ],
719 { repeat_between => '.between' }
720 );
721
f6644c71 722
556c8616 723=head1 ALSO SEE
724
725L<HTML::Zoom>
726
727=head1 AUTHORS
728
729See L<HTML::Zoom> for authors.
730
731=head1 LICENSE
732
733See L<HTML::Zoom> for the license.
734
735=cut
736