move to TransformBuilder in Zoom
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
3use strict;
4use warnings FATAL => 'all';
5
6use HTML::Zoom::ZConfig;
bf5a23d0 7use HTML::Zoom::ReadFH;
655965b3 8use HTML::Zoom::Transform;
eeeb0921 9use HTML::Zoom::TransformBuilder;
d80786d0 10
11sub new {
12 my ($class, $args) = @_;
13 my $new = {};
14 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
15 bless($new, $class);
16}
17
18sub zconfig { shift->_self_or_new->{zconfig} }
19
20sub _self_or_new {
21 ref($_[0]) ? $_[0] : $_[0]->new
22}
23
24sub _with {
25 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
26}
27
28sub from_html {
29 my $self = shift->_self_or_new;
30 $self->_with({
31 initial_events => $self->zconfig->parser->html_to_events($_[0])
32 });
33}
34
bf5a23d0 35sub from_file {
36 my $self = shift->_self_or_new;
37 my $filename = shift;
38 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
39}
40
d80786d0 41sub to_stream {
42 my $self = shift;
43 die "No events to build from - forgot to call from_html?"
44 unless $self->{initial_events};
45 my $sutils = $self->zconfig->stream_utils;
46 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
2f0c6a86 47 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
d80786d0 48 $stream
49}
50
bf5a23d0 51sub to_fh {
52 HTML::Zoom::ReadFH->from_zoom(shift);
53}
54
55sub run {
56 my $self = shift;
57 $self->zconfig->stream_utils->stream_to_array($self->to_stream);
58 return
59}
60
61sub apply {
62 my ($self, $code) = @_;
63 local $_ = $self;
64 $self->$code;
65}
66
d80786d0 67sub to_html {
68 my $self = shift;
69 $self->zconfig->producer->html_from_stream($self->to_stream);
70}
71
72sub memoize {
73 my $self = shift;
74 ref($self)->new($self)->from_html($self->to_html);
75}
76
eeeb0921 77sub with_transform {
1c4455ae 78 my $self = shift->_self_or_new;
eeeb0921 79 my ($transform) = @_;
d80786d0 80 $self->_with({
2f0c6a86 81 transforms => [
82 @{$self->{transforms}||[]},
eeeb0921 83 $transform
2f0c6a86 84 ]
d80786d0 85 });
86}
eeeb0921 87
88sub with_filter {
89 my $self = shift->_self_or_new;
90 my ($selector, $filter) = @_;
91 $self->with_transform(
92 HTML::Zoom::Transform->new({
93 zconfig => $self->zconfig,
94 selector => $selector,
95 filters => [ $filter ]
96 })
97 );
98}
d80786d0 99
100sub select {
1c4455ae 101 my $self = shift->_self_or_new;
102 my ($selector) = @_;
eeeb0921 103 return HTML::Zoom::TransformBuilder->new({
104 zconfig => $self->zconfig,
105 selector => $selector,
106 proto => $self
107 });
d80786d0 108}
109
110# There's a bug waiting to happen here: if you do something like
111#
112# $zoom->select('.foo')
1c4455ae 113# ->remove_attribute(class => 'foo')
d80786d0 114# ->then
115# ->well_anything_really
116#
117# the second action won't execute because it doesn't match anymore.
118# Ideally instead we'd merge the match subs but that's more complex to
119# implement so I'm deferring it for the moment.
120
121sub then {
122 my $self = shift;
2f0c6a86 123 die "Can't call ->then without a previous transform"
124 unless $self->{transforms};
125 $self->select($self->{transforms}->[-1]->selector);
d80786d0 126}
127
1281;
129
130=head1 NAME
131
132HTML::Zoom - selector based streaming template engine
133
134=head1 SYNOPSIS
135
136 use HTML::Zoom;
137
138 my $template = <<HTML;
139 <html>
140 <head>
141 <title>Hello people</title>
142 </head>
143 <body>
144 <h1 id="greeting">Placeholder</h1>
145 <div id="list">
146 <span>
147 <p>Name: <span class="name">Bob</span></p>
148 <p>Age: <span class="age">23</span></p>
149 </span>
150 <hr class="between" />
151 </div>
152 </body>
153 </html>
154 HTML
155
156 my $output = HTML::Zoom
157 ->from_html($template)
158 ->select('title, #greeting')->replace_content('Hello world & dog!')
159 ->select('#list')->repeat_content(
160 [
161 sub {
162 $_->select('.name')->replace_content('Matt')
163 ->select('.age')->replace_content('26')
164 },
165 sub {
166 $_->select('.name')->replace_content('Mark')
167 ->select('.age')->replace_content('0x29')
168 },
169 sub {
170 $_->select('.name')->replace_content('Epitaph')
171 ->select('.age')->replace_content('<redacted>')
172 },
173 ],
174 { repeat_between => '.between' }
175 )
176 ->to_html;
177
178will produce:
179
180=begin testinfo
181
182 my $expect = <<HTML;
183
184=end testinfo
185
186 <html>
187 <head>
188 <title>Hello world &amp; dog!</title>
189 </head>
190 <body>
191 <h1 id="greeting">Hello world &amp; dog!</h1>
192 <div id="list">
193 <span>
194 <p>Name: <span class="name">Matt</span></p>
195 <p>Age: <span class="age">26</span></p>
196 </span>
197 <hr class="between" />
198 <span>
199 <p>Name: <span class="name">Mark</span></p>
200 <p>Age: <span class="age">0x29</span></p>
201 </span>
202 <hr class="between" />
203 <span>
204 <p>Name: <span class="name">Epitaph</span></p>
205 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
206 </span>
207
208 </div>
209 </body>
210 </html>
211
212=begin testinfo
213
214 HTML
215 is($output, $expect, 'Synopsis code works ok');
216
217=end testinfo
218
1c4455ae 219=head1 DANGER WILL ROBINSON
220
221This is a 0.9 release. That means that I'm fairly happy the API isn't going
222to change in surprising and upsetting ways before 1.0 and a real compatibility
223freeze. But it also means that if it turns out there's a mistake the size of
224a politician's ego in the API design that I haven't spotted yet there may be
225a bit of breakage between here and 1.0. Hopefully not though. Appendages
226crossed and all that.
227
228Worse still, the rest of the distribution isn't documented yet. I'm sorry.
229I suck. But lots of people have been asking me to ship this, docs or no, so
230having got this class itself at least somewhat documented I figured now was
231a good time to cut a first real release.
232
233=head1 DESCRIPTION
234
235HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
236CSS selector based semantic templating engine for HTML and HTML-like
237document formats.
238
239Which is, on the whole, a bit of a mouthful. So let me step back a moment
240and explain why you care enough to understand what I mean:
241
242=head2 JQUERY ENVY
243
244HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
245piece of data into a document by doing:
246
247 $('.username').replaceAll(username);
248
249In HTML::Zoom one can write
250
251 $zoom->select('.username')->replace_content($username);
252
253which is, I hope, almost as clear, hampered only by the fact that Zoom can't
254assume a global document and therefore has nothing quite so simple as the
255$() function to get the initial selection.
256
257L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
258specification, and will continue to track that rather than the W3C standards
259for the forseeable future on grounds of pragmatism. Also on grounds of their
260spec is written in EN_US rather than EN_W3C, and I read the former much better.
261
262I am happy to admit that it's very, very much a subset at the moment - see the
263L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
264and more to be supported over time as we need it and patch it in.
265
266=head2 CLEAN TEMPLATES
267
268HTML::Zoom is the cure for messy templates. How many times have you looked at
269templates like this:
270
271 <form action="/somewhere">
272 [% FOREACH field IN fields %]
273 <label for="[% field.id %]">[% field.label %]</label>
274 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
275 [% END %]
276 </form>
277
278and despaired of the fact that neither the HTML structure nor the logic are
279remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
280cleanly:
281
282 <form class="myform" action="/somewhere">
283 <label />
284 <input />
285 </form>
286
287 $zoom->select('.myform')->repeat_content([
288 map { my $field = $_; sub {
289
290 $_->select('label')
291 ->add_attribute( for => $field->{id} )
292 ->then
293 ->replace_content( $field->{label} )
294
295 ->select('input')
296 ->add_attribute( name => $field->{name} )
297 ->then
298 ->add_attribute( type => $field->{type} )
299 ->then
300 ->add_attribute( value => $field->{value} )
301
302 } } @fields
303 ]);
304
305This is, admittedly, very much not shorter. However, it makes it extremely
306clear what's happening and therefore less hassle to maintain. Especially
307because it allows the designer to fiddle with the HTML without cutting
308himself on sharp ELSE clauses, and the developer to add available data to
309the template without getting angle bracket cuts on sensitive parts.
310
311Better still, HTML::Zoom knows that it's inserting content into HTML and
312can escape it for you - the example template should really have been:
313
314 <form action="/somewhere">
315 [% FOREACH field IN fields %]
316 <label for="[% field.id | html %]">[% field.label | html %]</label>
317 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
318 [% END %]
319 </form>
320
321and frankly I'll take slightly more code any day over *that* crawling horror.
322
323(addendum: I pick on L<Template Toolkit|Template> here specifically because
324it's the template system I hate the least - for text templating, I don't
325honestly think I'll ever like anything except the next version of Template
326Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
327
328=head2 PUTTING THE FUN INTO FUNCTIONAL
329
330The principle of HTML::Zoom is to provide a reusable, functional container
331object that lets you build up a set of transforms to be applied; every method
332call you make on a zoom object returns a new object, so it's safe to do so
333on one somebody else gave you without worrying about altering state (with
334the notable exception of ->next for stream objects, which I'll come to later).
335
336So:
337
338 my $z2 = $z1->select('.name')->replace_content($name);
339
340 my $z3 = $z2->select('.title')->replace_content('Ms.');
341
342each time produces a new Zoom object. If you want to package up a set of
343transforms to re-use, HTML::Zoom provides an 'apply' method:
344
345 my $add_name = sub { $_->select('.name')->replace_content($name) };
346
347 my $same_as_z2 = $z1->apply($add_name);
348
349=head2 LAZINESS IS A VIRTUE
350
351HTML::Zoom does its best to defer doing anything until it's absolutely
352required. The only point at which it descends into state is when you force
353it to create a stream, directly by:
354
c9e76777 355 my $stream = $zoom->to_stream;
1c4455ae 356
357 while (my $evt = $stream->next) {
358 # handle zoom event here
359 }
360
361or indirectly via:
362
363 my $final_html = $zoom->to_html;
364
365 my $fh = $zoom->to_fh;
366
367 while (my $chunk = $fh->getline) {
368 ...
369 }
370
371Better still, the $fh returned doesn't create its stream until the first
372call to getline, which means that until you call that and force it to be
373stateful you can get back to the original stateless Zoom object via:
374
375 my $zoom = $fh->to_zoom;
376
377which is exceedingly handy for filtering L<Plack> PSGI responses, among other
378things.
379
380Because HTML::Zoom doesn't try and evaluate everything up front, you can
381generally put things together in whatever order is most appropriate. This
382means that:
383
384 my $start = HTML::Zoom->from_html($html);
385
386 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
387
388and:
389
390 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
391
392 my $zoom = $start->from_html($html);
393
394will produce equivalent final $zoom objects, thus proving that there can be
395more than one way to do it without one of them being a
396L<bait and switch|Switch>.
397
398=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
399
400HTML::Zoom's execution always happens in terms of streams under the hood
401- that is, the basic pattern for doing anything is -
402
403 my $stream = get_stream_from_somewhere
404
405 while (my ($evt) = $stream->next) {
406 # do something with the event
407 }
408
409More importantly, all selectors and filters are also built as stream
410operations, so a selector and filter pair is effectively:
411
412 sub next {
413 my ($self) = @_;
414 my $next_evt = $self->parent_stream->next;
415 if ($self->selector_matches($next_evt)) {
416 return $self->apply_filter_to($next_evt);
417 } else {
418 return $next_evt;
419 }
420 }
421
422Internally, things are marginally more complicated than that, but not enough
423that you as a user should normally need to care.
424
425In fact, an HTML::Zoom object is mostly just a container for the relevant
426information from which to build the final stream that does the real work. A
427stream built from a Zoom object is a stream of events from parsing the
428initial HTML, wrapped in a filter stream per selector/filter pair provided
429as described above.
430
431The upshot of this is that the application of filters works just as well on
432streams as on the original Zoom object - in fact, when you run a
433L</repeat_content> operation your subroutines are applied to the stream for
434that element of the repeat, rather than constructing a new zoom per repeat
435element as well.
436
437More concretely:
438
439 $_->select('div')->replace_content('I AM A DIV!');
440
441works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
442shares sufficient of the implementation that you can generally forget the
443difference - barring the fact that a stream already has state attached so
444things like to_fh are no longer available.
445
446=head2 POP! GOES THE WEASEL
447
448... and by Weasel, I mean layout.
449
450HTML::Zoom's filehandle object supports an additional event key, 'flush',
451that is transparent to the rest of the system but indicates to the filehandle
452object to end a getline operation at that point and return the HTML so far.
453
454This means that in an environment where streaming output is available, such
455as a number of the L<Plack> PSGI handlers, you can add the flush key to an
456event in order to ensure that the HTML generated so far is flushed through
457to the browser right now. This can be especially useful if you know you're
458about to call a web service or a potentially slow database query or similar
459to ensure that at least the header/layout of your page renders now, improving
460perceived user responsiveness while your application waits around for the
461data it needs.
462
463This is currently exposed by the 'flush_before' option to the collect filter,
464which incidentally also underlies the replace and repeat filters, so to
465indicate we want this behaviour to happen before a query is executed we can
466write something like:
467
468 $zoom->select('.item')->repeat(sub {
469 if (my $row = $db_thing->next) {
470 return sub { $_->select('.item-name')->replace_content($row->name) }
471 } else {
472 return
473 }
474 }, { flush_before => 1 });
475
476which should have the desired effect given a sufficiently lazy $db_thing (for
477example a L<DBIx::Class::ResultSet> object).
478
479=head2 A FISTFUL OF OBJECTS
480
481At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
482whose purpose is to hang on to the various bits and pieces that things need
483so that there's a common way of accessing shared functionality.
484
485Were I a computer scientist I would probably call this an "Inversion of
486Control" object - which you'd be welcome to google to learn more about, or
487you can just imagine a computer scientist being suspended upside down over
488a pit. Either way works for me, I'm a pure maths grad.
489
490The ZConfig object hangs on to one each of the following for you:
491
492=over 4
493
494=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
495
496=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
497
498=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
499
500=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
501
502=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
503
504=back
505
506In theory you could replace any of these with anything you like, but in
507practice you're probably best restricting yourself to subclasses, or at
508least things that manage to look like the original if you squint a bit.
509
510If you do something more clever than that, or find yourself overriding things
511in your ZConfig a lot, please please tell us about it via one of the means
512mentioned under L</SUPPORT>.
513
514=head2 SEMANTIC DIDACTIC
515
516Some will argue that overloading CSS selectors to do data stuff is a terrible
517idea, and possibly even a step towards the "Concrete Javascript" pattern
518(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
519where it keeps reminding me of the late, great Tony Hart's plasticine friend).
520
521To which I say, "eh", "meh", and possibly also "feh". If it really upsets
522you, either use extra classes for this (and remove them afterwards) or
523use special fake elements or, well, honestly, just use something different.
524L<Template::Semantic> provides a similar idea to zoom except using XPath
525and XML::LibXML transforms rather than a lightweight streaming approach -
526maybe you'd like that better. Or maybe you really did want
527L<Template Toolkit|Template> after all. It is still damn good at what it does,
528after all.
529
530So far, however, I've found that for new sites the designers I'm working with
531generally want to produce nice semantic HTML with classes that represent the
532nature of the data rather than the structure of the layout, so sharing them
533as a common interface works really well for us.
534
535In the absence of any evidence that overloading CSS selectors has killed
536children or unexpectedly set fire to grandmothers - and given microformats
537have been around for a while there's been plenty of opportunity for
538octagenarian combustion - I'd suggest you give it a try and see if you like it.
539
540=head2 GET THEE TO A SUMMARY!
541
542Erm. Well.
543
544HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
545CSS selector based semantic templating engine for HTML and HTML-like
546document formats.
547
548But I said that already. Although hopefully by now you have some idea what I
549meant when I said it. If you didn't have any idea the first time. I mean, I'm
550not trying to call you stupid or anything. Just saying that maybe it wasn't
551totally obvious without the explanation. Or something.
552
553Er.
554
555Maybe we should just move on to the method docs.
556
557=head1 METHODS
558
559=head2 new
560
561 my $zoom = HTML::Zoom->new;
562
563 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
564
565Create a new empty Zoom object. You can optionally pass an
566L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
567the default components.
568
569This method isn't often used directly since several other methods can also
570act as constructors, notable L</select> and L</from_html>
571
572=head2 zconfig
573
574 my $zconfig = $zoom->zconfig;
575
576Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
577shouldn't usually need to call this yourself.
578
579=head2 from_html
580
581 my $zoom = HTML::Zoom->from_html($html);
582
583 my $z2 = $z1->from_html($html);
584
585Parses the HTML using the current zconfig's parser object and returns a new
586zoom instance with that as the source HTML to be transformed.
587
588=head2 from_file
589
590 my $zoom = HTML::Zoom->from_file($file);
591
592 my $z2 = $z1->from_file($file);
593
594Convenience method - slurps the contents of $file and calls from_html with it.
595
596=head2 to_stream
597
598 my $stream = $zoom->to_stream;
599
600 while (my ($evt) = $stream->next) {
601 ...
602
603Creates a stream, starting with a stream of the events from the HTML supplied
604via L</from_html> and then wrapping it in turn with each selector+filter pair
605that have been applied to the zoom object.
606
607=head2 to_fh
608
609 my $fh = $zoom->to_fh;
610
611 call_something_expecting_a_filehandle($fh);
612
613Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
614time its getline method is called and then return all HTML up to the next
615event with 'flush' set.
616
617You can pass this filehandle to compliant PSGI handlers (and probably most
618web frameworks).
619
620=head2 run
621
622 $zoom->run;
623
624Runs the zoom object's transforms without doing anything with the results.
625
626Normally used to get side effects of a zoom run - for example when using
627L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
628
629=head2 apply
630
631 my $z2 = $z1->apply(sub {
632 $_->select('div')->replace_content('I AM A DIV!') })
633 });
634
635Sets $_ to the zoom object and then runs the provided code. Basically syntax
636sugar, the following is entirely equivalent:
637
638 my $sub = sub {
639 shift->select('div')->replace_content('I AM A DIV!') })
640 };
641
642 my $z2 = $sub->($z1);
643
644=head2 to_html
645
646 my $html = $zoom->to_html;
647
648Runs the zoom processing and returns the resulting HTML.
649
650=head2 memoize
651
652 my $z2 = $z1->memoize;
653
654Creates a new zoom whose source HTML is the results of the original zoom's
655processing. Effectively syntax sugar for:
656
657 my $z2 = HTML::Zoom->from_html($z1->to_html);
658
659but preserves your L<HTML::Zoom::ZConfig> object.
660
661=head2 with_filter
662
663 my $zoom = HTML::Zoom->with_filter(
664 'div', $filter_builder->replace_content('I AM A DIV!')
665 );
666
667 my $z2 = $z1->with_filter(
668 'div', $filter_builder->replace_content('I AM A DIV!')
669 );
670
671Lower level interface than L</select> to adding filters to your zoom object.
672
673In normal usage, you probably don't need to call this yourself.
674
675=head2 select
676
677 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
678
679 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
680
681Returns an intermediary object of the class L<HTML::Zoom::MatchWithoutFilter>
682on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
683
684In normal usage you should generally always put the pair of method calls
685together; the intermediary object isn't designed or expected to stick around.
686
687=head2 then
688
689 my $z2 = $z1->select('div')->add_attribute(class => 'spoon')
690 ->then
691 ->replace_content('I AM A DIV!');
692
693Re-runs the previous select to allow you to chain actions together on the
694same selector.
695
d80786d0 696=cut