add transform_attribute, which runs a coderef on the value of an attribute, and can...
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / FilterBuilder.pm
CommitLineData
456a815d 1package HTML::Zoom::FilterBuilder;
2
1cf03540 3use strictures 1;
d80786d0 4use base qw(HTML::Zoom::SubObject);
456a815d 5use HTML::Zoom::CodeStream;
6
456a815d 7sub _stream_from_code {
d80786d0 8 shift->_zconfig->stream_utils->stream_from_code(@_)
456a815d 9}
10
11sub _stream_from_array {
d80786d0 12 shift->_zconfig->stream_utils->stream_from_array(@_)
456a815d 13}
14
3cdbc13f 15sub _stream_from_proto {
d80786d0 16 shift->_zconfig->stream_utils->stream_from_proto(@_)
3cdbc13f 17}
18
456a815d 19sub _stream_concat {
d80786d0 20 shift->_zconfig->stream_utils->stream_concat(@_)
456a815d 21}
22
6d0f20a6 23sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
25}
26
456a815d 27sub set_attribute {
1c4455ae 28 my $self = shift;
29 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 30 sub {
8f962884 31 my $a = (my $evt = $_[0])->{attrs};
456a815d 32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
36 ? ()
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
38 }
39 };
40}
41
1c4455ae 42sub _parse_attribute_args {
43 my $self = shift;
2daa653a 44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
1c4455ae 46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
48}
49
456a815d 50sub add_attribute {
2daa653a 51 die "renamed to add_to_attribute. killing this entirely for 1.0";
52}
53
54sub add_to_attribute {
1c4455ae 55 my $self = shift;
56 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 57 sub {
8f962884 58 my $a = (my $evt = $_[0])->{attrs};
456a815d 59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
61 attrs => {
62 %$a,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
64 },
65 ($e # add to name list if not present
66 ? ()
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
68 }
69 };
70}
71
72sub remove_attribute {
73 my ($self, $args) = @_;
1c4455ae 74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
456a815d 75 sub {
8f962884 76 my $a = (my $evt = $_[0])->{attrs};
456a815d 77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
80 attrs => $a,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
82 }
83 };
84}
85
5cac799e 86sub transform_attribute {
87 my $self = shift;
88 my ( $name, $code ) = @_ > 1 ? @_ : @{$_[0]}{qw(name code)};
89
90 sub {
91 my $evt = $_[0];
92 my %a = %{ $evt->{attrs} };
93 my @names = @{ $evt->{attr_names} };
94
95 my $existed_before = exists $a{$name};
96 my $v = $code->( $a{$name} );
97 my $deleted = $existed_before && ! defined $v;
98 my $added = ! $existed_before && defined $v;
99 if( $added ) {
100 push @names, $name;
101 $a{$name} = $v;
102 }
103 elsif( $deleted ) {
104 delete $a{$name};
105 @names = grep $_ ne $name, @names;
106 } else {
107 $a{$name} = $v;
108 }
109 +{ %$evt, raw => undef, raw_attrs => undef,
110 attrs => \%a,
111 ( $deleted || $added
112 ? (attr_names => \@names )
113 : () )
114 }
115 };
116}
117
76cecb10 118sub collect {
119 my ($self, $options) = @_;
1c4455ae 120 my ($into, $passthrough, $content, $filter, $flush_before) =
121 @{$options}{qw(into passthrough content filter flush_before)};
76cecb10 122 sub {
123 my ($evt, $stream) = @_;
b4d044eb 124 # We wipe the contents of @$into here so that other actions depending
125 # on this (such as a repeater) can be invoked multiple times easily.
126 # I -suspect- it's better for that state reset to be managed here; if it
127 # ever becomes painful the decision should be revisited
128 if ($into) {
865bb5d2 129 @$into = $content ? () : ($evt);
b4d044eb 130 }
76cecb10 131 if ($evt->{is_in_place_close}) {
865bb5d2 132 return $evt if $passthrough || $content;
76cecb10 133 return;
134 }
135 my $name = $evt->{name};
136 my $depth = 1;
865bb5d2 137 my $_next = $content ? 'peek' : 'next';
2abde91e 138 if ($filter) {
139 if ($content) {
140 $stream = do { local $_ = $stream; $filter->($stream) };
141 } else {
142 $stream = do {
143 local $_ = $self->_stream_concat(
144 $self->_stream_from_array($evt),
145 $stream,
146 );
147 $filter->($_);
148 };
149 $evt = $stream->next;
150 }
151 }
76cecb10 152 my $collector = $self->_stream_from_code(sub {
153 return unless $stream;
154 while (my ($evt) = $stream->$_next) {
155 $depth++ if ($evt->{type} eq 'OPEN');
156 $depth-- if ($evt->{type} eq 'CLOSE');
157 unless ($depth) {
158 undef $stream;
865bb5d2 159 return if $content;
76cecb10 160 push(@$into, $evt) if $into;
161 return $evt if $passthrough;
162 return;
163 }
164 push(@$into, $evt) if $into;
865bb5d2 165 $stream->next if $content;
76cecb10 166 return $evt if $passthrough;
167 }
168 die "Never saw closing </${name}> before end of source";
169 });
1c4455ae 170 if ($flush_before) {
171 if ($passthrough||$content) {
172 $evt = { %$evt, flush => 1 };
173 } else {
174 $evt = { type => 'EMPTY', flush => 1 };
175 }
176 }
177 return ($passthrough||$content||$flush_before)
178 ? [ $evt, $collector ]
179 : $collector;
76cecb10 180 };
181}
182
865bb5d2 183sub collect_content {
184 my ($self, $options) = @_;
185 $self->collect({ %{$options||{}}, content => 1 })
186}
187
456a815d 188sub add_before {
189 my ($self, $events) = @_;
8f962884 190 sub { return $self->_stream_from_array(@$events, $_[0]) };
456a815d 191}
192
193sub add_after {
194 my ($self, $events) = @_;
b616863d 195 my $coll_proto = $self->collect({ passthrough => 1 });
456a815d 196 sub {
8f962884 197 my ($evt) = @_;
456a815d 198 my $emit = $self->_stream_from_array(@$events);
b616863d 199 my $coll = &$coll_proto;
995bc8be 200 return ref($coll) eq 'HASH' # single event, no collect
201 ? [ $coll, $emit ]
202 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
456a815d 203 };
8f962884 204}
456a815d 205
865bb5d2 206sub prepend_content {
456a815d 207 my ($self, $events) = @_;
208 sub {
8f962884 209 my ($evt) = @_;
456a815d 210 if ($evt->{is_in_place_close}) {
211 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
212 return [ $evt, $self->_stream_from_array(
213 @$events, { type => 'CLOSE', name => $evt->{name} }
214 ) ];
215 }
216 return $self->_stream_from_array($evt, @$events);
217 };
218}
219
865bb5d2 220sub append_content {
8f962884 221 my ($self, $events) = @_;
865bb5d2 222 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
8f962884 223 sub {
224 my ($evt) = @_;
225 if ($evt->{is_in_place_close}) {
226 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
227 return [ $evt, $self->_stream_from_array(
228 @$events, { type => 'CLOSE', name => $evt->{name} }
229 ) ];
230 }
b616863d 231 my $coll = &$coll_proto;
8f962884 232 my $emit = $self->_stream_from_array(@$events);
233 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
234 };
235}
236
456a815d 237sub replace {
3cdbc13f 238 my ($self, $replace_with, $options) = @_;
b616863d 239 my $coll_proto = $self->collect($options);
456a815d 240 sub {
241 my ($evt, $stream) = @_;
3cdbc13f 242 my $emit = $self->_stream_from_proto($replace_with);
b616863d 243 my $coll = &$coll_proto;
a88c1c57 244 # if we're replacing the contents of an in place close
245 # then we need to handle that here
246 if ($options->{content}
247 && ref($coll) eq 'HASH'
ec687101 248 && $coll->{is_in_place_close}
a88c1c57 249 ) {
a88c1c57 250 my $close = $stream->next;
ec687101 251 # shallow copy and nuke in place and raw (to force smart print)
252 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
a88c1c57 253 $emit = $self->_stream_concat(
254 $emit,
255 $self->_stream_from_array($close),
256 );
257 }
451b3b30 258 # For a straightforward replace operation we can, in fact, do the emit
259 # -before- the collect, and my first cut did so. However in order to
260 # use the captured content in generating the new content, we need
261 # the collect stage to happen first - and it seems highly unlikely
262 # that in normal operation the collect phase will take long enough
263 # for the difference to be noticeable
11cc25dd 264 return
265 ($coll
a88c1c57 266 ? (ref $coll eq 'ARRAY' # [ event, stream ]
451b3b30 267 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
a88c1c57 268 : (ref $coll eq 'HASH' # event or stream?
269 ? [ $coll, $emit ]
270 : $self->_stream_concat($coll, $emit))
11cc25dd 271 )
272 : $emit
273 );
456a815d 274 };
275}
276
865bb5d2 277sub replace_content {
278 my ($self, $replace_with, $options) = @_;
279 $self->replace($replace_with, { %{$options||{}}, content => 1 })
280}
281
3cdbc13f 282sub repeat {
283 my ($self, $repeat_for, $options) = @_;
284 $options->{into} = \my @into;
f8ed299b 285 my @between;
286 my $repeat_between = delete $options->{repeat_between};
287 if ($repeat_between) {
f8ed299b 288 $options->{filter} = sub {
d80786d0 289 $_->select($repeat_between)->collect({ into => \@between })
f8ed299b 290 };
291 }
3cdbc13f 292 my $repeater = sub {
f8ed299b 293 my $s = $self->_stream_from_proto($repeat_for);
294 # We have to test $repeat_between not @between here because
295 # at the point we're constructing our return stream @between
296 # hasn't been populated yet - but we can test @between in the
297 # map routine because it has been by then and that saves us doing
298 # the extra stream construction if we don't need it.
6d0f20a6 299 $self->_flatten_stream_of_streams(do {
300 if ($repeat_between) {
301 $s->map(sub {
302 local $_ = $self->_stream_from_array(@into);
303 (@between && $s->peek)
304 ? $self->_stream_concat(
305 $_[0]->($_), $self->_stream_from_array(@between)
306 )
307 : $_[0]->($_)
308 })
309 } else {
310 $s->map(sub {
311 local $_ = $self->_stream_from_array(@into);
312 $_[0]->($_)
f8ed299b 313 })
6d0f20a6 314 }
315 })
3cdbc13f 316 };
317 $self->replace($repeater, $options);
318}
319
865bb5d2 320sub repeat_content {
321 my ($self, $repeat_for, $options) = @_;
322 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
323}
324
456a815d 3251;
556c8616 326
327=head1 NAME
328
329HTML::Zoom::FilterBuilder - Add Filters to a Stream
330
244252e7 331=head1 SYNOPSIS
332
a42917f6 333Create an L<HTML::Zoom> instance:
334
0d8f057e 335 use HTML::Zoom;
336 my $root = HTML::Zoom
337 ->from_html(<<MAIN);
338 <html>
339 <head>
340 <title>Default Title</title>
341 </head>
a42917f6 342 <body bad_attr='junk'>
0d8f057e 343 Default Content
344 </body>
345 </html>
346 MAIN
347
a42917f6 348Create a new attribute on the C<body> tag:
349
350 $root = $root
351 ->select('body')
352 ->set_attribute(class=>'main');
353
354Add a extra value to an existing attribute:
355
356 $root = $root
357 ->select('body')
358 ->add_to_attribute(class=>'one-column');
359
360Set the content of the C<title> tag:
361
362 $root = $root
363 ->select('title')
364 ->replace_content('Hello World');
365
366Set content from another L<HTML::Zoom> instance:
367
0d8f057e 368 my $body = HTML::Zoom
369 ->from_html(<<BODY);
370 <div id="stuff">
2daa653a 371 <p>Well Now</p>
f8ad684d 372 <p id="p2">Is the Time</p>
0d8f057e 373 </div>
374 BODY
375
a42917f6 376 $root = $root
f8ad684d 377 ->select('body')
a42917f6 378 ->replace_content($body);
379
380Set an attribute on multiple matches:
381
382 $root = $root
f8ad684d 383 ->select('p')
a42917f6 384 ->set_attribute(class=>'para');
385
386Remove an attribute:
387
388 $root = $root
389 ->select('body')
390 ->remove_attribute('bad_attr');
0d8f057e 391
392will produce:
393
394=begin testinfo
395
a42917f6 396 my $output = $root->to_html;
0d8f057e 397 my $expect = <<HTML;
398
399=end testinfo
400
401 <html>
402 <head>
403 <title>Hello World</title>
404 </head>
434a11c8 405 <body class="main one-column"><div id="stuff">
adb30a8a 406 <p class="para">Well Now</p>
a42917f6 407 <p id="p2" class="para">Is the Time</p>
0d8f057e 408 </div>
409 </body>
410 </html>
411
412=begin testinfo
413
414 HTML
415 is($output, $expect, 'Synopsis code works ok');
416
417=end testinfo
244252e7 418
556c8616 419=head1 DESCRIPTION
420
421Given a L<HTML::Zoom> stream, provide methods to apply filters which
422alter the content of that stream.
423
f6644c71 424=head1 METHODS
425
426This class defines the following public API
427
e225a4bd 428=head2 set_attribute
f6644c71 429
f8ad684d 430Sets an attribute of a given name to a given value for all matching selections.
431
432 $html_zoom
433 ->select('p')
434 ->set_attribute(class=>'paragraph')
435 ->select('div')
434a11c8 436 ->set_attribute(name=>'class', value=>'divider');
437
f8ad684d 438
439Overrides existing values, if such exist. When multiple L</set_attribute>
440calls are made against the same or overlapping selection sets, the final
441call wins.
f6644c71 442
e225a4bd 443=head2 add_to_attribute
f6644c71 444
434a11c8 445Adds a value to an existing attribute, or creates one if the attribute does not
446yet exist.
f6644c71 447
434a11c8 448 $html_zoom
449 ->select('p')
450 ->set_attribute(class=>'paragraph')
451 ->then
452 ->add_to_attribute(name=>'class', value=>'divider');
f6644c71 453
434a11c8 454Attributes with more than one value will have a dividing space.
455
e225a4bd 456=head2 remove_attribute
434a11c8 457
458Removes an attribute and all its values.
459
460 $html_zoom
461 ->select('p')
462 ->set_attribute(class=>'paragraph')
463 ->then
464 ->remove_attribute('class');
465
466Removes attributes from the original stream or events already added.
f6644c71 467
5cac799e 468=head2 transform_attribute
469
470Transforms (or creates or deletes) an attribute by running the passed
471coderef on it. If the coderef returns nothing, the attribute is
472removed.
473
474 $html_zoom
475 ->select('a')
476 ->transform_attribute( href => sub {
477 ( my $a = shift ) =~ s/localhost/example.com/;
478 return $a;
479 },
480 );
481
f6644c71 482=head2 collect
483
ac3acd87 484Collects and extracts results of L<HTML::Zoom/select>. It takes the following
485optional common options as hash reference.
486
487=over
488
489=item into [ARRAY REFERENCE]
490
491Where to save collected events (selected elements).
492
493 $z1->select('#main-content')
494 ->collect({ into => \@body })
495 ->run;
496 $z2->select('#main-content')
497 ->replace(\@body)
498 ->memoize;
499
500=item filter [CODE]
501
502Run filter on collected elements (locally setting $_ to stream, and passing
503stream as an argument to given code reference). Filtered stream would be
504returned.
505
506 $z->select('.outer')
507 ->collect({
508 filter => sub { $_->select('.inner')->replace_content('bar!') },
509 passthrough => 1,
510 })
511
512It can be used to further filter selection. For example
513
514 $z->select('tr')
515 ->collect({
516 filter => sub { $_->select('td') },
517 passthrough => 1,
518 })
519
520is equivalent to (not implemented yet) descendant selector combination, i.e.
521
522 $z->select('tr td')
523
524=item passthrough [BOOLEAN]
525
526Extract copy of elements; the stream is unchanged (it does not remove collected
527elements). For example without 'passthrough'
528
529 HTML::Zoom->from_html('<foo><bar /></foo>')
530 ->select('foo')
531 ->collect({ content => 1 })
532 ->to_html
533
534returns '<foo></foo>', while with C<passthrough> option
535
536 HTML::Zoom->from_html('<foo><bar /></foo>')
537 ->select('foo')
538 ->collect({ content => 1, passthough => 1 })
539 ->to_html
540
541returns '<foo><bar /></foo>'.
542
543=item content [BOOLEAN]
544
545Collect content of the element, and not the element itself.
546
547For example
548
549 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
550 ->select('h1')
551 ->collect
552 ->to_html
553
554would return '<p>foo</p>', while
555
556 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
557 ->select('h1')
558 ->collect({ content => 1 })
559 ->to_html
560
561would return '<h1></h1><p>foo</p>'.
562
563See also L</collect_content>.
564
565=item flush_before [BOOLEAN]
566
567Generate C<flush> event before collecting, to ensure that the HTML generated up
568to selected element being collected is flushed throught to the browser. Usually
569used in L</repeat> or L</repeat_content>.
570
571=back
f6644c71 572
573=head2 collect_content
574
ac3acd87 575Collects contents of L<HTML::Zoom/select> result.
576
577 HTML::Zoom->from_file($foo)
578 ->select('#main-content')
579 ->collect_content({ into => \@foo_body })
580 ->run;
581 $z->select('#foo')
582 ->replace_content(\@foo_body)
583 ->memoize;
584
585Equivalent to running L</collect> with C<content> option set.
f6644c71 586
587=head2 add_before
588
ac3acd87 589Given a L<HTML::Zoom/select> result, add given content (which might be string,
590array or another L<HTML::Zoom> object) before it.
591
592 $html_zoom
593 ->select('input[name="foo"]')
594 ->add_before(\ '<span class="warning">required field</span>');
f6644c71 595
596=head2 add_after
597
ac3acd87 598Like L</add_before>, only after L<HTML::Zoom/select> result.
599
600 $html_zoom
601 ->select('p')
602 ->add_after("\n\n");
603
604You can add zoom events directly
605
606 $html_zoom
607 ->select('p')
608 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
f6644c71 609
610=head2 prepend_content
611
612 TBD
613
614=head2 append_content
615
616 TBD
617
618=head2 replace
619
ac3acd87 620Given a L<HTML::Zoom/select> result, replace it with a string, array or another
621L<HTML::Zoom> object. It takes the same optional common options as L</collect>
622(via hash reference).
f6644c71 623
624=head2 replace_content
625
244252e7 626Given a L<HTML::Zoom/select> result, replace the content with a string, array
627or another L<HTML::Zoom> object.
f6644c71 628
ac3acd87 629 $html_zoom
630 ->select('title, #greeting')
631 ->replace_content('Hello world!');
632
f6644c71 633=head2 repeat
634
ac3acd87 635 $zoom->select('.item')->repeat(sub {
636 if (my $row = $db_thing->next) {
637 return sub { $_->select('.item-name')->replace_content($row->name) }
638 } else {
639 return
640 }
641 }, { flush_before => 1 });
642
643Run I<$repeat_for>, which should be iterator (code reference) returning
644subroutines, reference to array of subroutines, or other zoom-able object
645consisting of transformations. Those subroutines would be run with $_
646local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
647said result passed as parameter to subroutine.
648
649You might want to use iterator when you don't have all elements upfront
650
651 $zoom = $zoom->select('.contents')->repeat(sub {
652 while (my $line = $fh->getline) {
653 return sub {
654 $_->select('.lno')->replace_content($fh->input_line_number)
655 ->select('.line')->replace_content($line)
656 }
657 }
658 return
659 });
660
661You might want to use array reference if it doesn't matter that all iterations
662are pre-generated
663
664 $zoom->select('table')->repeat([
665 map {
666 my $elem = $_;
667 sub {
668 $_->select('td')->replace_content($e);
669 }
670 } @list
671 ]);
672
673In addition to common options as in L</collect>, it also supports
674
675=over
676
677=item repeat_between [SELECTOR]
678
679Selects object to be repeated between items. In the case of array this object
680is put between elements, in case of iterator it is put between results of
681subsequent iterations, in the case of streamable it is put between events
682(->to_stream->next).
683
684See documentation for L</repeat_content>
685
686=back
f6644c71 687
688=head2 repeat_content
689
ac3acd87 690Given a L<HTML::Zoom/select> result, run provided iterator passing content of
691this result to this iterator. Accepts the same options as L</repeat>.
692
693Equivalent to using C<contents> option with L</repeat>.
694
695 $html_zoom
696 ->select('#list')
697 ->repeat_content(
698 [
699 sub {
700 $_->select('.name')->replace_content('Matt')
701 ->select('.age')->replace_content('26')
702 },
703 sub {
704 $_->select('.name')->replace_content('Mark')
705 ->select('.age')->replace_content('0x29')
706 },
707 sub {
708 $_->select('.name')->replace_content('Epitaph')
709 ->select('.age')->replace_content('<redacted>')
710 },
711 ],
712 { repeat_between => '.between' }
713 );
714
f6644c71 715
556c8616 716=head1 ALSO SEE
717
718L<HTML::Zoom>
719
720=head1 AUTHORS
721
722See L<HTML::Zoom> for authors.
723
724=head1 LICENSE
725
726See L<HTML::Zoom> for the license.
727
728=cut
729