reformat Changes
[catagits/HTML-Zoom.git] / lib / HTML / Zoom / FilterBuilder.pm
CommitLineData
456a815d 1package HTML::Zoom::FilterBuilder;
2
1cf03540 3use strictures 1;
d80786d0 4use base qw(HTML::Zoom::SubObject);
456a815d 5use HTML::Zoom::CodeStream;
6
456a815d 7sub _stream_from_code {
d80786d0 8 shift->_zconfig->stream_utils->stream_from_code(@_)
456a815d 9}
10
11sub _stream_from_array {
d80786d0 12 shift->_zconfig->stream_utils->stream_from_array(@_)
456a815d 13}
14
3cdbc13f 15sub _stream_from_proto {
d80786d0 16 shift->_zconfig->stream_utils->stream_from_proto(@_)
3cdbc13f 17}
18
456a815d 19sub _stream_concat {
d80786d0 20 shift->_zconfig->stream_utils->stream_concat(@_)
456a815d 21}
22
6d0f20a6 23sub _flatten_stream_of_streams {
24 shift->_zconfig->stream_utils->flatten_stream_of_streams(@_)
25}
26
456a815d 27sub set_attribute {
1c4455ae 28 my $self = shift;
29 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 30 sub {
8f962884 31 my $a = (my $evt = $_[0])->{attrs};
456a815d 32 my $e = exists $a->{$name};
33 +{ %$evt, raw => undef, raw_attrs => undef,
34 attrs => { %$a, $name => $value },
35 ($e # add to name list if not present
36 ? ()
37 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
38 }
39 };
40}
41
1c4455ae 42sub _parse_attribute_args {
43 my $self = shift;
2daa653a 44 # allow ->add_to_attribute(name => 'value')
45 # or ->add_to_attribute({ name => 'name', value => 'value' })
1c4455ae 46 my ($name, $value) = @_ > 1 ? @_ : @{$_[0]}{qw(name value)};
47 return ($name, $self->_zconfig->parser->html_escape($value));
48}
49
456a815d 50sub add_attribute {
2daa653a 51 die "renamed to add_to_attribute. killing this entirely for 1.0";
52}
53
54sub add_to_attribute {
1c4455ae 55 my $self = shift;
56 my ($name, $value) = $self->_parse_attribute_args(@_);
456a815d 57 sub {
8f962884 58 my $a = (my $evt = $_[0])->{attrs};
456a815d 59 my $e = exists $a->{$name};
60 +{ %$evt, raw => undef, raw_attrs => undef,
61 attrs => {
62 %$a,
63 $name => join(' ', ($e ? $a->{$name} : ()), $value)
64 },
65 ($e # add to name list if not present
66 ? ()
67 : (attr_names => [ @{$evt->{attr_names}}, $name ]))
68 }
69 };
70}
71
72sub remove_attribute {
73 my ($self, $args) = @_;
1c4455ae 74 my $name = (ref($args) eq 'HASH') ? $args->{name} : $args;
456a815d 75 sub {
8f962884 76 my $a = (my $evt = $_[0])->{attrs};
456a815d 77 return $evt unless exists $a->{$name};
78 $a = { %$a }; delete $a->{$name};
79 +{ %$evt, raw => undef, raw_attrs => undef,
80 attrs => $a,
81 attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
82 }
83 };
84}
85
76cecb10 86sub collect {
87 my ($self, $options) = @_;
1c4455ae 88 my ($into, $passthrough, $content, $filter, $flush_before) =
89 @{$options}{qw(into passthrough content filter flush_before)};
76cecb10 90 sub {
91 my ($evt, $stream) = @_;
b4d044eb 92 # We wipe the contents of @$into here so that other actions depending
93 # on this (such as a repeater) can be invoked multiple times easily.
94 # I -suspect- it's better for that state reset to be managed here; if it
95 # ever becomes painful the decision should be revisited
96 if ($into) {
865bb5d2 97 @$into = $content ? () : ($evt);
b4d044eb 98 }
76cecb10 99 if ($evt->{is_in_place_close}) {
865bb5d2 100 return $evt if $passthrough || $content;
76cecb10 101 return;
102 }
103 my $name = $evt->{name};
104 my $depth = 1;
865bb5d2 105 my $_next = $content ? 'peek' : 'next';
2abde91e 106 if ($filter) {
107 if ($content) {
108 $stream = do { local $_ = $stream; $filter->($stream) };
109 } else {
110 $stream = do {
111 local $_ = $self->_stream_concat(
112 $self->_stream_from_array($evt),
113 $stream,
114 );
115 $filter->($_);
116 };
117 $evt = $stream->next;
118 }
119 }
76cecb10 120 my $collector = $self->_stream_from_code(sub {
121 return unless $stream;
122 while (my ($evt) = $stream->$_next) {
123 $depth++ if ($evt->{type} eq 'OPEN');
124 $depth-- if ($evt->{type} eq 'CLOSE');
125 unless ($depth) {
126 undef $stream;
865bb5d2 127 return if $content;
76cecb10 128 push(@$into, $evt) if $into;
129 return $evt if $passthrough;
130 return;
131 }
132 push(@$into, $evt) if $into;
865bb5d2 133 $stream->next if $content;
76cecb10 134 return $evt if $passthrough;
135 }
136 die "Never saw closing </${name}> before end of source";
137 });
1c4455ae 138 if ($flush_before) {
139 if ($passthrough||$content) {
140 $evt = { %$evt, flush => 1 };
141 } else {
142 $evt = { type => 'EMPTY', flush => 1 };
143 }
144 }
145 return ($passthrough||$content||$flush_before)
146 ? [ $evt, $collector ]
147 : $collector;
76cecb10 148 };
149}
150
865bb5d2 151sub collect_content {
152 my ($self, $options) = @_;
153 $self->collect({ %{$options||{}}, content => 1 })
154}
155
456a815d 156sub add_before {
157 my ($self, $events) = @_;
8f962884 158 sub { return $self->_stream_from_array(@$events, $_[0]) };
456a815d 159}
160
161sub add_after {
162 my ($self, $events) = @_;
b616863d 163 my $coll_proto = $self->collect({ passthrough => 1 });
456a815d 164 sub {
8f962884 165 my ($evt) = @_;
456a815d 166 my $emit = $self->_stream_from_array(@$events);
b616863d 167 my $coll = &$coll_proto;
995bc8be 168 return ref($coll) eq 'HASH' # single event, no collect
169 ? [ $coll, $emit ]
170 : [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
456a815d 171 };
8f962884 172}
456a815d 173
865bb5d2 174sub prepend_content {
456a815d 175 my ($self, $events) = @_;
176 sub {
8f962884 177 my ($evt) = @_;
456a815d 178 if ($evt->{is_in_place_close}) {
179 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
180 return [ $evt, $self->_stream_from_array(
181 @$events, { type => 'CLOSE', name => $evt->{name} }
182 ) ];
183 }
184 return $self->_stream_from_array($evt, @$events);
185 };
186}
187
865bb5d2 188sub append_content {
8f962884 189 my ($self, $events) = @_;
865bb5d2 190 my $coll_proto = $self->collect({ passthrough => 1, content => 1 });
8f962884 191 sub {
192 my ($evt) = @_;
193 if ($evt->{is_in_place_close}) {
194 $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
195 return [ $evt, $self->_stream_from_array(
196 @$events, { type => 'CLOSE', name => $evt->{name} }
197 ) ];
198 }
b616863d 199 my $coll = &$coll_proto;
8f962884 200 my $emit = $self->_stream_from_array(@$events);
201 return [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ];
202 };
203}
204
456a815d 205sub replace {
3cdbc13f 206 my ($self, $replace_with, $options) = @_;
b616863d 207 my $coll_proto = $self->collect($options);
456a815d 208 sub {
209 my ($evt, $stream) = @_;
3cdbc13f 210 my $emit = $self->_stream_from_proto($replace_with);
b616863d 211 my $coll = &$coll_proto;
a88c1c57 212 # if we're replacing the contents of an in place close
213 # then we need to handle that here
214 if ($options->{content}
215 && ref($coll) eq 'HASH'
ec687101 216 && $coll->{is_in_place_close}
a88c1c57 217 ) {
a88c1c57 218 my $close = $stream->next;
ec687101 219 # shallow copy and nuke in place and raw (to force smart print)
220 $_ = { %$_ }, delete @{$_}{qw(is_in_place_close raw)} for ($coll, $close);
a88c1c57 221 $emit = $self->_stream_concat(
222 $emit,
223 $self->_stream_from_array($close),
224 );
225 }
451b3b30 226 # For a straightforward replace operation we can, in fact, do the emit
227 # -before- the collect, and my first cut did so. However in order to
228 # use the captured content in generating the new content, we need
229 # the collect stage to happen first - and it seems highly unlikely
230 # that in normal operation the collect phase will take long enough
231 # for the difference to be noticeable
11cc25dd 232 return
233 ($coll
a88c1c57 234 ? (ref $coll eq 'ARRAY' # [ event, stream ]
451b3b30 235 ? [ $coll->[0], $self->_stream_concat($coll->[1], $emit) ]
a88c1c57 236 : (ref $coll eq 'HASH' # event or stream?
237 ? [ $coll, $emit ]
238 : $self->_stream_concat($coll, $emit))
11cc25dd 239 )
240 : $emit
241 );
456a815d 242 };
243}
244
865bb5d2 245sub replace_content {
246 my ($self, $replace_with, $options) = @_;
247 $self->replace($replace_with, { %{$options||{}}, content => 1 })
248}
249
3cdbc13f 250sub repeat {
251 my ($self, $repeat_for, $options) = @_;
252 $options->{into} = \my @into;
f8ed299b 253 my @between;
254 my $repeat_between = delete $options->{repeat_between};
255 if ($repeat_between) {
f8ed299b 256 $options->{filter} = sub {
d80786d0 257 $_->select($repeat_between)->collect({ into => \@between })
f8ed299b 258 };
259 }
3cdbc13f 260 my $repeater = sub {
f8ed299b 261 my $s = $self->_stream_from_proto($repeat_for);
262 # We have to test $repeat_between not @between here because
263 # at the point we're constructing our return stream @between
264 # hasn't been populated yet - but we can test @between in the
265 # map routine because it has been by then and that saves us doing
266 # the extra stream construction if we don't need it.
6d0f20a6 267 $self->_flatten_stream_of_streams(do {
268 if ($repeat_between) {
269 $s->map(sub {
270 local $_ = $self->_stream_from_array(@into);
271 (@between && $s->peek)
272 ? $self->_stream_concat(
273 $_[0]->($_), $self->_stream_from_array(@between)
274 )
275 : $_[0]->($_)
276 })
277 } else {
278 $s->map(sub {
279 local $_ = $self->_stream_from_array(@into);
280 $_[0]->($_)
f8ed299b 281 })
6d0f20a6 282 }
283 })
3cdbc13f 284 };
285 $self->replace($repeater, $options);
286}
287
865bb5d2 288sub repeat_content {
289 my ($self, $repeat_for, $options) = @_;
290 $self->repeat($repeat_for, { %{$options||{}}, content => 1 })
291}
292
456a815d 2931;
556c8616 294
295=head1 NAME
296
297HTML::Zoom::FilterBuilder - Add Filters to a Stream
298
244252e7 299=head1 SYNOPSIS
300
a42917f6 301Create an L<HTML::Zoom> instance:
302
0d8f057e 303 use HTML::Zoom;
304 my $root = HTML::Zoom
305 ->from_html(<<MAIN);
306 <html>
307 <head>
308 <title>Default Title</title>
309 </head>
a42917f6 310 <body bad_attr='junk'>
0d8f057e 311 Default Content
312 </body>
313 </html>
314 MAIN
315
a42917f6 316Create a new attribute on the C<body> tag:
317
318 $root = $root
319 ->select('body')
320 ->set_attribute(class=>'main');
321
322Add a extra value to an existing attribute:
323
324 $root = $root
325 ->select('body')
326 ->add_to_attribute(class=>'one-column');
327
328Set the content of the C<title> tag:
329
330 $root = $root
331 ->select('title')
332 ->replace_content('Hello World');
333
334Set content from another L<HTML::Zoom> instance:
335
0d8f057e 336 my $body = HTML::Zoom
337 ->from_html(<<BODY);
338 <div id="stuff">
2daa653a 339 <p>Well Now</p>
f8ad684d 340 <p id="p2">Is the Time</p>
0d8f057e 341 </div>
342 BODY
343
a42917f6 344 $root = $root
f8ad684d 345 ->select('body')
a42917f6 346 ->replace_content($body);
347
348Set an attribute on multiple matches:
349
350 $root = $root
f8ad684d 351 ->select('p')
a42917f6 352 ->set_attribute(class=>'para');
353
354Remove an attribute:
355
356 $root = $root
357 ->select('body')
358 ->remove_attribute('bad_attr');
0d8f057e 359
360will produce:
361
362=begin testinfo
363
a42917f6 364 my $output = $root->to_html;
0d8f057e 365 my $expect = <<HTML;
366
367=end testinfo
368
369 <html>
370 <head>
371 <title>Hello World</title>
372 </head>
434a11c8 373 <body class="main one-column"><div id="stuff">
adb30a8a 374 <p class="para">Well Now</p>
a42917f6 375 <p id="p2" class="para">Is the Time</p>
0d8f057e 376 </div>
377 </body>
378 </html>
379
380=begin testinfo
381
382 HTML
383 is($output, $expect, 'Synopsis code works ok');
384
385=end testinfo
244252e7 386
556c8616 387=head1 DESCRIPTION
388
389Given a L<HTML::Zoom> stream, provide methods to apply filters which
390alter the content of that stream.
391
f6644c71 392=head1 METHODS
393
394This class defines the following public API
395
e225a4bd 396=head2 set_attribute
f6644c71 397
f8ad684d 398Sets an attribute of a given name to a given value for all matching selections.
399
400 $html_zoom
401 ->select('p')
402 ->set_attribute(class=>'paragraph')
403 ->select('div')
434a11c8 404 ->set_attribute(name=>'class', value=>'divider');
405
f8ad684d 406
407Overrides existing values, if such exist. When multiple L</set_attribute>
408calls are made against the same or overlapping selection sets, the final
409call wins.
f6644c71 410
e225a4bd 411=head2 add_to_attribute
f6644c71 412
434a11c8 413Adds a value to an existing attribute, or creates one if the attribute does not
414yet exist.
f6644c71 415
434a11c8 416 $html_zoom
417 ->select('p')
418 ->set_attribute(class=>'paragraph')
419 ->then
420 ->add_to_attribute(name=>'class', value=>'divider');
f6644c71 421
434a11c8 422Attributes with more than one value will have a dividing space.
423
e225a4bd 424=head2 remove_attribute
434a11c8 425
426Removes an attribute and all its values.
427
428 $html_zoom
429 ->select('p')
430 ->set_attribute(class=>'paragraph')
431 ->then
432 ->remove_attribute('class');
433
434Removes attributes from the original stream or events already added.
f6644c71 435
436=head2 collect
437
ac3acd87 438Collects and extracts results of L<HTML::Zoom/select>. It takes the following
439optional common options as hash reference.
440
441=over
442
443=item into [ARRAY REFERENCE]
444
445Where to save collected events (selected elements).
446
447 $z1->select('#main-content')
448 ->collect({ into => \@body })
449 ->run;
450 $z2->select('#main-content')
451 ->replace(\@body)
452 ->memoize;
453
454=item filter [CODE]
455
456Run filter on collected elements (locally setting $_ to stream, and passing
457stream as an argument to given code reference). Filtered stream would be
458returned.
459
460 $z->select('.outer')
461 ->collect({
462 filter => sub { $_->select('.inner')->replace_content('bar!') },
463 passthrough => 1,
464 })
465
466It can be used to further filter selection. For example
467
468 $z->select('tr')
469 ->collect({
470 filter => sub { $_->select('td') },
471 passthrough => 1,
472 })
473
474is equivalent to (not implemented yet) descendant selector combination, i.e.
475
476 $z->select('tr td')
477
478=item passthrough [BOOLEAN]
479
480Extract copy of elements; the stream is unchanged (it does not remove collected
481elements). For example without 'passthrough'
482
483 HTML::Zoom->from_html('<foo><bar /></foo>')
484 ->select('foo')
485 ->collect({ content => 1 })
486 ->to_html
487
488returns '<foo></foo>', while with C<passthrough> option
489
490 HTML::Zoom->from_html('<foo><bar /></foo>')
491 ->select('foo')
492 ->collect({ content => 1, passthough => 1 })
493 ->to_html
494
495returns '<foo><bar /></foo>'.
496
497=item content [BOOLEAN]
498
499Collect content of the element, and not the element itself.
500
501For example
502
503 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
504 ->select('h1')
505 ->collect
506 ->to_html
507
508would return '<p>foo</p>', while
509
510 HTML::Zoom->from_html('<h1>Title</h1><p>foo</p>')
511 ->select('h1')
512 ->collect({ content => 1 })
513 ->to_html
514
515would return '<h1></h1><p>foo</p>'.
516
517See also L</collect_content>.
518
519=item flush_before [BOOLEAN]
520
521Generate C<flush> event before collecting, to ensure that the HTML generated up
522to selected element being collected is flushed throught to the browser. Usually
523used in L</repeat> or L</repeat_content>.
524
525=back
f6644c71 526
527=head2 collect_content
528
ac3acd87 529Collects contents of L<HTML::Zoom/select> result.
530
531 HTML::Zoom->from_file($foo)
532 ->select('#main-content')
533 ->collect_content({ into => \@foo_body })
534 ->run;
535 $z->select('#foo')
536 ->replace_content(\@foo_body)
537 ->memoize;
538
539Equivalent to running L</collect> with C<content> option set.
f6644c71 540
541=head2 add_before
542
ac3acd87 543Given a L<HTML::Zoom/select> result, add given content (which might be string,
544array or another L<HTML::Zoom> object) before it.
545
546 $html_zoom
547 ->select('input[name="foo"]')
548 ->add_before(\ '<span class="warning">required field</span>');
f6644c71 549
550=head2 add_after
551
ac3acd87 552Like L</add_before>, only after L<HTML::Zoom/select> result.
553
554 $html_zoom
555 ->select('p')
556 ->add_after("\n\n");
557
558You can add zoom events directly
559
560 $html_zoom
561 ->select('p')
562 ->add_after([ { type => 'TEXT', raw => 'O HAI' } ]);
f6644c71 563
564=head2 prepend_content
565
566 TBD
567
568=head2 append_content
569
570 TBD
571
572=head2 replace
573
ac3acd87 574Given a L<HTML::Zoom/select> result, replace it with a string, array or another
575L<HTML::Zoom> object. It takes the same optional common options as L</collect>
576(via hash reference).
f6644c71 577
578=head2 replace_content
579
244252e7 580Given a L<HTML::Zoom/select> result, replace the content with a string, array
581or another L<HTML::Zoom> object.
f6644c71 582
ac3acd87 583 $html_zoom
584 ->select('title, #greeting')
585 ->replace_content('Hello world!');
586
f6644c71 587=head2 repeat
588
ac3acd87 589 $zoom->select('.item')->repeat(sub {
590 if (my $row = $db_thing->next) {
591 return sub { $_->select('.item-name')->replace_content($row->name) }
592 } else {
593 return
594 }
595 }, { flush_before => 1 });
596
597Run I<$repeat_for>, which should be iterator (code reference) returning
598subroutines, reference to array of subroutines, or other zoom-able object
599consisting of transformations. Those subroutines would be run with $_
600local-ized to result of L<HTML::Zoom/select> (of collected elements), and with
601said result passed as parameter to subroutine.
602
603You might want to use iterator when you don't have all elements upfront
604
605 $zoom = $zoom->select('.contents')->repeat(sub {
606 while (my $line = $fh->getline) {
607 return sub {
608 $_->select('.lno')->replace_content($fh->input_line_number)
609 ->select('.line')->replace_content($line)
610 }
611 }
612 return
613 });
614
615You might want to use array reference if it doesn't matter that all iterations
616are pre-generated
617
618 $zoom->select('table')->repeat([
619 map {
620 my $elem = $_;
621 sub {
622 $_->select('td')->replace_content($e);
623 }
624 } @list
625 ]);
626
627In addition to common options as in L</collect>, it also supports
628
629=over
630
631=item repeat_between [SELECTOR]
632
633Selects object to be repeated between items. In the case of array this object
634is put between elements, in case of iterator it is put between results of
635subsequent iterations, in the case of streamable it is put between events
636(->to_stream->next).
637
638See documentation for L</repeat_content>
639
640=back
f6644c71 641
642=head2 repeat_content
643
ac3acd87 644Given a L<HTML::Zoom/select> result, run provided iterator passing content of
645this result to this iterator. Accepts the same options as L</repeat>.
646
647Equivalent to using C<contents> option with L</repeat>.
648
649 $html_zoom
650 ->select('#list')
651 ->repeat_content(
652 [
653 sub {
654 $_->select('.name')->replace_content('Matt')
655 ->select('.age')->replace_content('26')
656 },
657 sub {
658 $_->select('.name')->replace_content('Mark')
659 ->select('.age')->replace_content('0x29')
660 },
661 sub {
662 $_->select('.name')->replace_content('Epitaph')
663 ->select('.age')->replace_content('<redacted>')
664 },
665 ],
666 { repeat_between => '.between' }
667 );
668
f6644c71 669
556c8616 670=head1 ALSO SEE
671
672L<HTML::Zoom>
673
674=head1 AUTHORS
675
676See L<HTML::Zoom> for authors.
677
678=head1 LICENSE
679
680See L<HTML::Zoom> for the license.
681
682=cut
683