add transform_attribute, which runs a coderef on the value of an attribute, and can...
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
1cf03540 3use strictures 1;
d80786d0 4
5use HTML::Zoom::ZConfig;
bf5a23d0 6use HTML::Zoom::ReadFH;
655965b3 7use HTML::Zoom::Transform;
eeeb0921 8use HTML::Zoom::TransformBuilder;
d80786d0 9
f107bef7 10our $VERSION = '0.009004';
7af7362d 11
12$VERSION = eval $VERSION;
13
d80786d0 14sub new {
15 my ($class, $args) = @_;
16 my $new = {};
17 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
18 bless($new, $class);
19}
20
21sub zconfig { shift->_self_or_new->{zconfig} }
22
23sub _self_or_new {
24 ref($_[0]) ? $_[0] : $_[0]->new
25}
26
27sub _with {
28 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
29}
30
7567494d 31sub from_events {
d80786d0 32 my $self = shift->_self_or_new;
33 $self->_with({
7567494d 34 initial_events => shift,
d80786d0 35 });
36}
37
7567494d 38sub from_html {
39 my $self = shift->_self_or_new;
40 $self->from_events($self->zconfig->parser->html_to_events($_[0]))
41}
42
bf5a23d0 43sub from_file {
44 my $self = shift->_self_or_new;
45 my $filename = shift;
46 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
47}
48
d80786d0 49sub to_stream {
50 my $self = shift;
51 die "No events to build from - forgot to call from_html?"
52 unless $self->{initial_events};
53 my $sutils = $self->zconfig->stream_utils;
54 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
2f0c6a86 55 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
d80786d0 56 $stream
57}
58
bf5a23d0 59sub to_fh {
60 HTML::Zoom::ReadFH->from_zoom(shift);
61}
62
7567494d 63sub to_events {
64 my $self = shift;
65 [ $self->zconfig->stream_utils->stream_to_array($self->to_stream) ];
66}
67
bf5a23d0 68sub run {
69 my $self = shift;
7567494d 70 $self->to_events;
bf5a23d0 71 return
72}
73
74sub apply {
75 my ($self, $code) = @_;
76 local $_ = $self;
77 $self->$code;
78}
79
fdb039c6 80sub apply_if {
81 my ($self, $predicate, $code) = @_;
82 if($predicate) {
83 local $_ = $self;
84 $self->$code;
85 }
86 else {
87 $self;
88 }
89}
90
d80786d0 91sub to_html {
92 my $self = shift;
93 $self->zconfig->producer->html_from_stream($self->to_stream);
94}
95
96sub memoize {
97 my $self = shift;
98 ref($self)->new($self)->from_html($self->to_html);
99}
100
eeeb0921 101sub with_transform {
1c4455ae 102 my $self = shift->_self_or_new;
eeeb0921 103 my ($transform) = @_;
d80786d0 104 $self->_with({
2f0c6a86 105 transforms => [
106 @{$self->{transforms}||[]},
eeeb0921 107 $transform
2f0c6a86 108 ]
d80786d0 109 });
110}
eeeb0921 111
112sub with_filter {
113 my $self = shift->_self_or_new;
114 my ($selector, $filter) = @_;
115 $self->with_transform(
116 HTML::Zoom::Transform->new({
117 zconfig => $self->zconfig,
118 selector => $selector,
119 filters => [ $filter ]
120 })
121 );
122}
d80786d0 123
124sub select {
1c4455ae 125 my $self = shift->_self_or_new;
126 my ($selector) = @_;
eeeb0921 127 return HTML::Zoom::TransformBuilder->new({
128 zconfig => $self->zconfig,
129 selector => $selector,
130 proto => $self
131 });
d80786d0 132}
133
134# There's a bug waiting to happen here: if you do something like
135#
136# $zoom->select('.foo')
1c4455ae 137# ->remove_attribute(class => 'foo')
d80786d0 138# ->then
139# ->well_anything_really
140#
141# the second action won't execute because it doesn't match anymore.
142# Ideally instead we'd merge the match subs but that's more complex to
143# implement so I'm deferring it for the moment.
144
145sub then {
146 my $self = shift;
2f0c6a86 147 die "Can't call ->then without a previous transform"
148 unless $self->{transforms};
149 $self->select($self->{transforms}->[-1]->selector);
d80786d0 150}
151
1521;
153
154=head1 NAME
155
156HTML::Zoom - selector based streaming template engine
157
158=head1 SYNOPSIS
159
160 use HTML::Zoom;
161
162 my $template = <<HTML;
163 <html>
164 <head>
165 <title>Hello people</title>
166 </head>
167 <body>
168 <h1 id="greeting">Placeholder</h1>
169 <div id="list">
170 <span>
171 <p>Name: <span class="name">Bob</span></p>
172 <p>Age: <span class="age">23</span></p>
173 </span>
174 <hr class="between" />
175 </div>
176 </body>
177 </html>
178 HTML
179
180 my $output = HTML::Zoom
181 ->from_html($template)
182 ->select('title, #greeting')->replace_content('Hello world & dog!')
183 ->select('#list')->repeat_content(
184 [
185 sub {
186 $_->select('.name')->replace_content('Matt')
187 ->select('.age')->replace_content('26')
188 },
189 sub {
190 $_->select('.name')->replace_content('Mark')
191 ->select('.age')->replace_content('0x29')
192 },
193 sub {
194 $_->select('.name')->replace_content('Epitaph')
195 ->select('.age')->replace_content('<redacted>')
196 },
197 ],
198 { repeat_between => '.between' }
199 )
200 ->to_html;
201
202will produce:
203
204=begin testinfo
205
206 my $expect = <<HTML;
207
208=end testinfo
209
210 <html>
211 <head>
212 <title>Hello world &amp; dog!</title>
213 </head>
214 <body>
215 <h1 id="greeting">Hello world &amp; dog!</h1>
216 <div id="list">
217 <span>
218 <p>Name: <span class="name">Matt</span></p>
219 <p>Age: <span class="age">26</span></p>
220 </span>
221 <hr class="between" />
222 <span>
223 <p>Name: <span class="name">Mark</span></p>
224 <p>Age: <span class="age">0x29</span></p>
225 </span>
226 <hr class="between" />
227 <span>
228 <p>Name: <span class="name">Epitaph</span></p>
229 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
230 </span>
231
232 </div>
233 </body>
234 </html>
235
236=begin testinfo
237
238 HTML
239 is($output, $expect, 'Synopsis code works ok');
240
241=end testinfo
242
1c4455ae 243=head1 DANGER WILL ROBINSON
244
245This is a 0.9 release. That means that I'm fairly happy the API isn't going
246to change in surprising and upsetting ways before 1.0 and a real compatibility
247freeze. But it also means that if it turns out there's a mistake the size of
248a politician's ego in the API design that I haven't spotted yet there may be
249a bit of breakage between here and 1.0. Hopefully not though. Appendages
250crossed and all that.
251
252Worse still, the rest of the distribution isn't documented yet. I'm sorry.
253I suck. But lots of people have been asking me to ship this, docs or no, so
254having got this class itself at least somewhat documented I figured now was
255a good time to cut a first real release.
256
257=head1 DESCRIPTION
258
259HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
260CSS selector based semantic templating engine for HTML and HTML-like
261document formats.
262
263Which is, on the whole, a bit of a mouthful. So let me step back a moment
264and explain why you care enough to understand what I mean:
265
266=head2 JQUERY ENVY
267
268HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
269piece of data into a document by doing:
270
271 $('.username').replaceAll(username);
272
273In HTML::Zoom one can write
274
275 $zoom->select('.username')->replace_content($username);
276
277which is, I hope, almost as clear, hampered only by the fact that Zoom can't
278assume a global document and therefore has nothing quite so simple as the
279$() function to get the initial selection.
280
281L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
282specification, and will continue to track that rather than the W3C standards
283for the forseeable future on grounds of pragmatism. Also on grounds of their
284spec is written in EN_US rather than EN_W3C, and I read the former much better.
285
286I am happy to admit that it's very, very much a subset at the moment - see the
287L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
288and more to be supported over time as we need it and patch it in.
289
290=head2 CLEAN TEMPLATES
291
292HTML::Zoom is the cure for messy templates. How many times have you looked at
293templates like this:
294
295 <form action="/somewhere">
296 [% FOREACH field IN fields %]
297 <label for="[% field.id %]">[% field.label %]</label>
298 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
299 [% END %]
300 </form>
301
302and despaired of the fact that neither the HTML structure nor the logic are
303remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
304cleanly:
305
306 <form class="myform" action="/somewhere">
307 <label />
308 <input />
309 </form>
310
311 $zoom->select('.myform')->repeat_content([
312 map { my $field = $_; sub {
313
314 $_->select('label')
2daa653a 315 ->add_to_attribute( for => $field->{id} )
1c4455ae 316 ->then
317 ->replace_content( $field->{label} )
318
319 ->select('input')
2daa653a 320 ->add_to_attribute( name => $field->{name} )
1c4455ae 321 ->then
2daa653a 322 ->add_to_attribute( type => $field->{type} )
1c4455ae 323 ->then
2daa653a 324 ->add_to_attribute( value => $field->{value} )
1c4455ae 325
326 } } @fields
327 ]);
328
329This is, admittedly, very much not shorter. However, it makes it extremely
330clear what's happening and therefore less hassle to maintain. Especially
331because it allows the designer to fiddle with the HTML without cutting
332himself on sharp ELSE clauses, and the developer to add available data to
333the template without getting angle bracket cuts on sensitive parts.
334
335Better still, HTML::Zoom knows that it's inserting content into HTML and
336can escape it for you - the example template should really have been:
337
338 <form action="/somewhere">
339 [% FOREACH field IN fields %]
340 <label for="[% field.id | html %]">[% field.label | html %]</label>
341 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
342 [% END %]
343 </form>
344
345and frankly I'll take slightly more code any day over *that* crawling horror.
346
347(addendum: I pick on L<Template Toolkit|Template> here specifically because
348it's the template system I hate the least - for text templating, I don't
349honestly think I'll ever like anything except the next version of Template
350Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
351
352=head2 PUTTING THE FUN INTO FUNCTIONAL
353
354The principle of HTML::Zoom is to provide a reusable, functional container
355object that lets you build up a set of transforms to be applied; every method
356call you make on a zoom object returns a new object, so it's safe to do so
357on one somebody else gave you without worrying about altering state (with
358the notable exception of ->next for stream objects, which I'll come to later).
359
360So:
361
362 my $z2 = $z1->select('.name')->replace_content($name);
363
364 my $z3 = $z2->select('.title')->replace_content('Ms.');
365
366each time produces a new Zoom object. If you want to package up a set of
367transforms to re-use, HTML::Zoom provides an 'apply' method:
368
369 my $add_name = sub { $_->select('.name')->replace_content($name) };
370
371 my $same_as_z2 = $z1->apply($add_name);
372
373=head2 LAZINESS IS A VIRTUE
374
375HTML::Zoom does its best to defer doing anything until it's absolutely
376required. The only point at which it descends into state is when you force
377it to create a stream, directly by:
378
c9e76777 379 my $stream = $zoom->to_stream;
1c4455ae 380
381 while (my $evt = $stream->next) {
382 # handle zoom event here
383 }
384
385or indirectly via:
386
387 my $final_html = $zoom->to_html;
388
389 my $fh = $zoom->to_fh;
390
391 while (my $chunk = $fh->getline) {
392 ...
393 }
394
395Better still, the $fh returned doesn't create its stream until the first
396call to getline, which means that until you call that and force it to be
397stateful you can get back to the original stateless Zoom object via:
398
399 my $zoom = $fh->to_zoom;
400
401which is exceedingly handy for filtering L<Plack> PSGI responses, among other
402things.
403
404Because HTML::Zoom doesn't try and evaluate everything up front, you can
405generally put things together in whatever order is most appropriate. This
406means that:
407
408 my $start = HTML::Zoom->from_html($html);
409
410 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
411
412and:
413
414 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
415
416 my $zoom = $start->from_html($html);
417
418will produce equivalent final $zoom objects, thus proving that there can be
419more than one way to do it without one of them being a
420L<bait and switch|Switch>.
421
422=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
423
424HTML::Zoom's execution always happens in terms of streams under the hood
425- that is, the basic pattern for doing anything is -
426
427 my $stream = get_stream_from_somewhere
428
429 while (my ($evt) = $stream->next) {
430 # do something with the event
431 }
432
433More importantly, all selectors and filters are also built as stream
434operations, so a selector and filter pair is effectively:
435
436 sub next {
437 my ($self) = @_;
438 my $next_evt = $self->parent_stream->next;
439 if ($self->selector_matches($next_evt)) {
440 return $self->apply_filter_to($next_evt);
441 } else {
442 return $next_evt;
443 }
444 }
445
446Internally, things are marginally more complicated than that, but not enough
447that you as a user should normally need to care.
448
449In fact, an HTML::Zoom object is mostly just a container for the relevant
450information from which to build the final stream that does the real work. A
451stream built from a Zoom object is a stream of events from parsing the
452initial HTML, wrapped in a filter stream per selector/filter pair provided
453as described above.
454
455The upshot of this is that the application of filters works just as well on
456streams as on the original Zoom object - in fact, when you run a
457L</repeat_content> operation your subroutines are applied to the stream for
458that element of the repeat, rather than constructing a new zoom per repeat
459element as well.
460
461More concretely:
462
463 $_->select('div')->replace_content('I AM A DIV!');
464
465works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
466shares sufficient of the implementation that you can generally forget the
467difference - barring the fact that a stream already has state attached so
468things like to_fh are no longer available.
469
470=head2 POP! GOES THE WEASEL
471
472... and by Weasel, I mean layout.
473
474HTML::Zoom's filehandle object supports an additional event key, 'flush',
475that is transparent to the rest of the system but indicates to the filehandle
476object to end a getline operation at that point and return the HTML so far.
477
478This means that in an environment where streaming output is available, such
479as a number of the L<Plack> PSGI handlers, you can add the flush key to an
480event in order to ensure that the HTML generated so far is flushed through
481to the browser right now. This can be especially useful if you know you're
482about to call a web service or a potentially slow database query or similar
483to ensure that at least the header/layout of your page renders now, improving
484perceived user responsiveness while your application waits around for the
485data it needs.
486
487This is currently exposed by the 'flush_before' option to the collect filter,
488which incidentally also underlies the replace and repeat filters, so to
489indicate we want this behaviour to happen before a query is executed we can
490write something like:
491
492 $zoom->select('.item')->repeat(sub {
493 if (my $row = $db_thing->next) {
494 return sub { $_->select('.item-name')->replace_content($row->name) }
495 } else {
496 return
497 }
498 }, { flush_before => 1 });
499
500which should have the desired effect given a sufficiently lazy $db_thing (for
501example a L<DBIx::Class::ResultSet> object).
502
503=head2 A FISTFUL OF OBJECTS
504
505At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
506whose purpose is to hang on to the various bits and pieces that things need
507so that there's a common way of accessing shared functionality.
508
509Were I a computer scientist I would probably call this an "Inversion of
510Control" object - which you'd be welcome to google to learn more about, or
511you can just imagine a computer scientist being suspended upside down over
512a pit. Either way works for me, I'm a pure maths grad.
513
514The ZConfig object hangs on to one each of the following for you:
515
516=over 4
517
518=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
519
520=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
521
522=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
523
524=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
525
526=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
527
528=back
529
530In theory you could replace any of these with anything you like, but in
531practice you're probably best restricting yourself to subclasses, or at
532least things that manage to look like the original if you squint a bit.
533
534If you do something more clever than that, or find yourself overriding things
535in your ZConfig a lot, please please tell us about it via one of the means
536mentioned under L</SUPPORT>.
537
538=head2 SEMANTIC DIDACTIC
539
540Some will argue that overloading CSS selectors to do data stuff is a terrible
541idea, and possibly even a step towards the "Concrete Javascript" pattern
542(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
543where it keeps reminding me of the late, great Tony Hart's plasticine friend).
544
545To which I say, "eh", "meh", and possibly also "feh". If it really upsets
546you, either use extra classes for this (and remove them afterwards) or
547use special fake elements or, well, honestly, just use something different.
548L<Template::Semantic> provides a similar idea to zoom except using XPath
549and XML::LibXML transforms rather than a lightweight streaming approach -
550maybe you'd like that better. Or maybe you really did want
551L<Template Toolkit|Template> after all. It is still damn good at what it does,
552after all.
553
554So far, however, I've found that for new sites the designers I'm working with
555generally want to produce nice semantic HTML with classes that represent the
556nature of the data rather than the structure of the layout, so sharing them
557as a common interface works really well for us.
558
559In the absence of any evidence that overloading CSS selectors has killed
560children or unexpectedly set fire to grandmothers - and given microformats
561have been around for a while there's been plenty of opportunity for
562octagenarian combustion - I'd suggest you give it a try and see if you like it.
563
564=head2 GET THEE TO A SUMMARY!
565
566Erm. Well.
567
568HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
569CSS selector based semantic templating engine for HTML and HTML-like
570document formats.
571
572But I said that already. Although hopefully by now you have some idea what I
573meant when I said it. If you didn't have any idea the first time. I mean, I'm
574not trying to call you stupid or anything. Just saying that maybe it wasn't
575totally obvious without the explanation. Or something.
576
577Er.
578
579Maybe we should just move on to the method docs.
580
581=head1 METHODS
582
583=head2 new
584
585 my $zoom = HTML::Zoom->new;
586
587 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
588
589Create a new empty Zoom object. You can optionally pass an
590L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
591the default components.
592
593This method isn't often used directly since several other methods can also
594act as constructors, notable L</select> and L</from_html>
595
596=head2 zconfig
597
598 my $zconfig = $zoom->zconfig;
599
600Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
601shouldn't usually need to call this yourself.
602
603=head2 from_html
604
605 my $zoom = HTML::Zoom->from_html($html);
606
607 my $z2 = $z1->from_html($html);
608
609Parses the HTML using the current zconfig's parser object and returns a new
610zoom instance with that as the source HTML to be transformed.
611
612=head2 from_file
613
614 my $zoom = HTML::Zoom->from_file($file);
615
616 my $z2 = $z1->from_file($file);
617
618Convenience method - slurps the contents of $file and calls from_html with it.
619
620=head2 to_stream
621
622 my $stream = $zoom->to_stream;
623
624 while (my ($evt) = $stream->next) {
625 ...
626
627Creates a stream, starting with a stream of the events from the HTML supplied
628via L</from_html> and then wrapping it in turn with each selector+filter pair
629that have been applied to the zoom object.
630
631=head2 to_fh
632
633 my $fh = $zoom->to_fh;
634
635 call_something_expecting_a_filehandle($fh);
636
637Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
638time its getline method is called and then return all HTML up to the next
639event with 'flush' set.
640
641You can pass this filehandle to compliant PSGI handlers (and probably most
642web frameworks).
643
644=head2 run
645
646 $zoom->run;
647
648Runs the zoom object's transforms without doing anything with the results.
649
650Normally used to get side effects of a zoom run - for example when using
651L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
652
653=head2 apply
654
655 my $z2 = $z1->apply(sub {
656 $_->select('div')->replace_content('I AM A DIV!') })
657 });
658
659Sets $_ to the zoom object and then runs the provided code. Basically syntax
660sugar, the following is entirely equivalent:
661
662 my $sub = sub {
663 shift->select('div')->replace_content('I AM A DIV!') })
664 };
665
666 my $z2 = $sub->($z1);
667
668=head2 to_html
669
670 my $html = $zoom->to_html;
671
672Runs the zoom processing and returns the resulting HTML.
673
674=head2 memoize
675
676 my $z2 = $z1->memoize;
677
678Creates a new zoom whose source HTML is the results of the original zoom's
679processing. Effectively syntax sugar for:
680
681 my $z2 = HTML::Zoom->from_html($z1->to_html);
682
683but preserves your L<HTML::Zoom::ZConfig> object.
684
685=head2 with_filter
686
687 my $zoom = HTML::Zoom->with_filter(
688 'div', $filter_builder->replace_content('I AM A DIV!')
689 );
690
691 my $z2 = $z1->with_filter(
692 'div', $filter_builder->replace_content('I AM A DIV!')
693 );
694
695Lower level interface than L</select> to adding filters to your zoom object.
696
697In normal usage, you probably don't need to call this yourself.
698
699=head2 select
700
701 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
702
703 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
704
97192b02 705Returns an intermediary object of the class L<HTML::Zoom::TransformBuilder>
1c4455ae 706on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
707
708In normal usage you should generally always put the pair of method calls
709together; the intermediary object isn't designed or expected to stick around.
710
711=head2 then
712
2daa653a 713 my $z2 = $z1->select('div')->add_to_attribute(class => 'spoon')
1c4455ae 714 ->then
715 ->replace_content('I AM A DIV!');
716
717Re-runs the previous select to allow you to chain actions together on the
718same selector.
719
f107bef7 720=head1 AUTHOR
45b4cea1 721
f107bef7 722mst - Matt S. Trout (cpan:MSTROUT) <mst@shadowcat.co.uk>
45b4cea1 723
f107bef7 724=head1 CONTRIBUTORS
45b4cea1 725
f107bef7 726Oliver Charles
727
728Jakub Nareski
729
730Simon Elliot
731
732Joe Highton
733
734John Napiorkowski
735
5cac799e 736Robert Buels
737
f107bef7 738=head1 COPYRIGHT
739
740Copyright (c) 2010-2011 the HTML::Zoom L</AUTHOR> and L</CONTRIBUTORS>
741as listed above.
45b4cea1 742
743=head1 LICENSE
744
745This library is free software, you can redistribute it and/or modify
746it under the same terms as Perl itself.
747
d80786d0 748=cut
45b4cea1 749