first cut at docs for Zoom.pm
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
3use strict;
4use warnings FATAL => 'all';
5
6use HTML::Zoom::ZConfig;
7use HTML::Zoom::MatchWithoutFilter;
bf5a23d0 8use HTML::Zoom::ReadFH;
d80786d0 9
10sub new {
11 my ($class, $args) = @_;
12 my $new = {};
13 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
14 bless($new, $class);
15}
16
17sub zconfig { shift->_self_or_new->{zconfig} }
18
19sub _self_or_new {
20 ref($_[0]) ? $_[0] : $_[0]->new
21}
22
23sub _with {
24 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
25}
26
27sub from_html {
28 my $self = shift->_self_or_new;
29 $self->_with({
30 initial_events => $self->zconfig->parser->html_to_events($_[0])
31 });
32}
33
bf5a23d0 34sub from_file {
35 my $self = shift->_self_or_new;
36 my $filename = shift;
37 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
38}
39
d80786d0 40sub to_stream {
41 my $self = shift;
42 die "No events to build from - forgot to call from_html?"
43 unless $self->{initial_events};
44 my $sutils = $self->zconfig->stream_utils;
45 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
46 foreach my $filter_spec (@{$self->{filters}||[]}) {
47 $stream = $sutils->wrap_with_filter($stream, @{$filter_spec});
48 }
49 $stream
50}
51
bf5a23d0 52sub to_fh {
53 HTML::Zoom::ReadFH->from_zoom(shift);
54}
55
56sub run {
57 my $self = shift;
58 $self->zconfig->stream_utils->stream_to_array($self->to_stream);
59 return
60}
61
62sub apply {
63 my ($self, $code) = @_;
64 local $_ = $self;
65 $self->$code;
66}
67
d80786d0 68sub to_html {
69 my $self = shift;
70 $self->zconfig->producer->html_from_stream($self->to_stream);
71}
72
73sub memoize {
74 my $self = shift;
75 ref($self)->new($self)->from_html($self->to_html);
76}
77
78sub with_filter {
1c4455ae 79 my $self = shift->_self_or_new;
80 my ($selector, $filter) = @_;
d80786d0 81 my $match = $self->parse_selector($selector);
82 $self->_with({
83 filters => [ @{$self->{filters}||[]}, [ $match, $filter ] ]
84 });
85}
86
87sub select {
1c4455ae 88 my $self = shift->_self_or_new;
89 my ($selector) = @_;
d80786d0 90 my $match = $self->parse_selector($selector);
91 return HTML::Zoom::MatchWithoutFilter->construct(
92 $self, $match, $self->zconfig->filter_builder,
93 );
94}
95
96# There's a bug waiting to happen here: if you do something like
97#
98# $zoom->select('.foo')
1c4455ae 99# ->remove_attribute(class => 'foo')
d80786d0 100# ->then
101# ->well_anything_really
102#
103# the second action won't execute because it doesn't match anymore.
104# Ideally instead we'd merge the match subs but that's more complex to
105# implement so I'm deferring it for the moment.
106
107sub then {
108 my $self = shift;
109 die "Can't call ->then without a previous filter"
110 unless $self->{filters};
111 $self->select($self->{filters}->[-1][0]);
112}
113
114sub parse_selector {
115 my ($self, $selector) = @_;
116 return $selector if ref($selector); # already a match sub
117 $self->zconfig->selector_parser->parse_selector($selector);
118}
119
1201;
121
122=head1 NAME
123
124HTML::Zoom - selector based streaming template engine
125
126=head1 SYNOPSIS
127
128 use HTML::Zoom;
129
130 my $template = <<HTML;
131 <html>
132 <head>
133 <title>Hello people</title>
134 </head>
135 <body>
136 <h1 id="greeting">Placeholder</h1>
137 <div id="list">
138 <span>
139 <p>Name: <span class="name">Bob</span></p>
140 <p>Age: <span class="age">23</span></p>
141 </span>
142 <hr class="between" />
143 </div>
144 </body>
145 </html>
146 HTML
147
148 my $output = HTML::Zoom
149 ->from_html($template)
150 ->select('title, #greeting')->replace_content('Hello world & dog!')
151 ->select('#list')->repeat_content(
152 [
153 sub {
154 $_->select('.name')->replace_content('Matt')
155 ->select('.age')->replace_content('26')
156 },
157 sub {
158 $_->select('.name')->replace_content('Mark')
159 ->select('.age')->replace_content('0x29')
160 },
161 sub {
162 $_->select('.name')->replace_content('Epitaph')
163 ->select('.age')->replace_content('<redacted>')
164 },
165 ],
166 { repeat_between => '.between' }
167 )
168 ->to_html;
169
170will produce:
171
172=begin testinfo
173
174 my $expect = <<HTML;
175
176=end testinfo
177
178 <html>
179 <head>
180 <title>Hello world &amp; dog!</title>
181 </head>
182 <body>
183 <h1 id="greeting">Hello world &amp; dog!</h1>
184 <div id="list">
185 <span>
186 <p>Name: <span class="name">Matt</span></p>
187 <p>Age: <span class="age">26</span></p>
188 </span>
189 <hr class="between" />
190 <span>
191 <p>Name: <span class="name">Mark</span></p>
192 <p>Age: <span class="age">0x29</span></p>
193 </span>
194 <hr class="between" />
195 <span>
196 <p>Name: <span class="name">Epitaph</span></p>
197 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
198 </span>
199
200 </div>
201 </body>
202 </html>
203
204=begin testinfo
205
206 HTML
207 is($output, $expect, 'Synopsis code works ok');
208
209=end testinfo
210
1c4455ae 211=head1 DANGER WILL ROBINSON
212
213This is a 0.9 release. That means that I'm fairly happy the API isn't going
214to change in surprising and upsetting ways before 1.0 and a real compatibility
215freeze. But it also means that if it turns out there's a mistake the size of
216a politician's ego in the API design that I haven't spotted yet there may be
217a bit of breakage between here and 1.0. Hopefully not though. Appendages
218crossed and all that.
219
220Worse still, the rest of the distribution isn't documented yet. I'm sorry.
221I suck. But lots of people have been asking me to ship this, docs or no, so
222having got this class itself at least somewhat documented I figured now was
223a good time to cut a first real release.
224
225=head1 DESCRIPTION
226
227HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
228CSS selector based semantic templating engine for HTML and HTML-like
229document formats.
230
231Which is, on the whole, a bit of a mouthful. So let me step back a moment
232and explain why you care enough to understand what I mean:
233
234=head2 JQUERY ENVY
235
236HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
237piece of data into a document by doing:
238
239 $('.username').replaceAll(username);
240
241In HTML::Zoom one can write
242
243 $zoom->select('.username')->replace_content($username);
244
245which is, I hope, almost as clear, hampered only by the fact that Zoom can't
246assume a global document and therefore has nothing quite so simple as the
247$() function to get the initial selection.
248
249L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
250specification, and will continue to track that rather than the W3C standards
251for the forseeable future on grounds of pragmatism. Also on grounds of their
252spec is written in EN_US rather than EN_W3C, and I read the former much better.
253
254I am happy to admit that it's very, very much a subset at the moment - see the
255L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
256and more to be supported over time as we need it and patch it in.
257
258=head2 CLEAN TEMPLATES
259
260HTML::Zoom is the cure for messy templates. How many times have you looked at
261templates like this:
262
263 <form action="/somewhere">
264 [% FOREACH field IN fields %]
265 <label for="[% field.id %]">[% field.label %]</label>
266 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
267 [% END %]
268 </form>
269
270and despaired of the fact that neither the HTML structure nor the logic are
271remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
272cleanly:
273
274 <form class="myform" action="/somewhere">
275 <label />
276 <input />
277 </form>
278
279 $zoom->select('.myform')->repeat_content([
280 map { my $field = $_; sub {
281
282 $_->select('label')
283 ->add_attribute( for => $field->{id} )
284 ->then
285 ->replace_content( $field->{label} )
286
287 ->select('input')
288 ->add_attribute( name => $field->{name} )
289 ->then
290 ->add_attribute( type => $field->{type} )
291 ->then
292 ->add_attribute( value => $field->{value} )
293
294 } } @fields
295 ]);
296
297This is, admittedly, very much not shorter. However, it makes it extremely
298clear what's happening and therefore less hassle to maintain. Especially
299because it allows the designer to fiddle with the HTML without cutting
300himself on sharp ELSE clauses, and the developer to add available data to
301the template without getting angle bracket cuts on sensitive parts.
302
303Better still, HTML::Zoom knows that it's inserting content into HTML and
304can escape it for you - the example template should really have been:
305
306 <form action="/somewhere">
307 [% FOREACH field IN fields %]
308 <label for="[% field.id | html %]">[% field.label | html %]</label>
309 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
310 [% END %]
311 </form>
312
313and frankly I'll take slightly more code any day over *that* crawling horror.
314
315(addendum: I pick on L<Template Toolkit|Template> here specifically because
316it's the template system I hate the least - for text templating, I don't
317honestly think I'll ever like anything except the next version of Template
318Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
319
320=head2 PUTTING THE FUN INTO FUNCTIONAL
321
322The principle of HTML::Zoom is to provide a reusable, functional container
323object that lets you build up a set of transforms to be applied; every method
324call you make on a zoom object returns a new object, so it's safe to do so
325on one somebody else gave you without worrying about altering state (with
326the notable exception of ->next for stream objects, which I'll come to later).
327
328So:
329
330 my $z2 = $z1->select('.name')->replace_content($name);
331
332 my $z3 = $z2->select('.title')->replace_content('Ms.');
333
334each time produces a new Zoom object. If you want to package up a set of
335transforms to re-use, HTML::Zoom provides an 'apply' method:
336
337 my $add_name = sub { $_->select('.name')->replace_content($name) };
338
339 my $same_as_z2 = $z1->apply($add_name);
340
341=head2 LAZINESS IS A VIRTUE
342
343HTML::Zoom does its best to defer doing anything until it's absolutely
344required. The only point at which it descends into state is when you force
345it to create a stream, directly by:
346
347 my $stream = $zoom->as_stream;
348
349 while (my $evt = $stream->next) {
350 # handle zoom event here
351 }
352
353or indirectly via:
354
355 my $final_html = $zoom->to_html;
356
357 my $fh = $zoom->to_fh;
358
359 while (my $chunk = $fh->getline) {
360 ...
361 }
362
363Better still, the $fh returned doesn't create its stream until the first
364call to getline, which means that until you call that and force it to be
365stateful you can get back to the original stateless Zoom object via:
366
367 my $zoom = $fh->to_zoom;
368
369which is exceedingly handy for filtering L<Plack> PSGI responses, among other
370things.
371
372Because HTML::Zoom doesn't try and evaluate everything up front, you can
373generally put things together in whatever order is most appropriate. This
374means that:
375
376 my $start = HTML::Zoom->from_html($html);
377
378 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
379
380and:
381
382 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
383
384 my $zoom = $start->from_html($html);
385
386will produce equivalent final $zoom objects, thus proving that there can be
387more than one way to do it without one of them being a
388L<bait and switch|Switch>.
389
390=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
391
392HTML::Zoom's execution always happens in terms of streams under the hood
393- that is, the basic pattern for doing anything is -
394
395 my $stream = get_stream_from_somewhere
396
397 while (my ($evt) = $stream->next) {
398 # do something with the event
399 }
400
401More importantly, all selectors and filters are also built as stream
402operations, so a selector and filter pair is effectively:
403
404 sub next {
405 my ($self) = @_;
406 my $next_evt = $self->parent_stream->next;
407 if ($self->selector_matches($next_evt)) {
408 return $self->apply_filter_to($next_evt);
409 } else {
410 return $next_evt;
411 }
412 }
413
414Internally, things are marginally more complicated than that, but not enough
415that you as a user should normally need to care.
416
417In fact, an HTML::Zoom object is mostly just a container for the relevant
418information from which to build the final stream that does the real work. A
419stream built from a Zoom object is a stream of events from parsing the
420initial HTML, wrapped in a filter stream per selector/filter pair provided
421as described above.
422
423The upshot of this is that the application of filters works just as well on
424streams as on the original Zoom object - in fact, when you run a
425L</repeat_content> operation your subroutines are applied to the stream for
426that element of the repeat, rather than constructing a new zoom per repeat
427element as well.
428
429More concretely:
430
431 $_->select('div')->replace_content('I AM A DIV!');
432
433works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
434shares sufficient of the implementation that you can generally forget the
435difference - barring the fact that a stream already has state attached so
436things like to_fh are no longer available.
437
438=head2 POP! GOES THE WEASEL
439
440... and by Weasel, I mean layout.
441
442HTML::Zoom's filehandle object supports an additional event key, 'flush',
443that is transparent to the rest of the system but indicates to the filehandle
444object to end a getline operation at that point and return the HTML so far.
445
446This means that in an environment where streaming output is available, such
447as a number of the L<Plack> PSGI handlers, you can add the flush key to an
448event in order to ensure that the HTML generated so far is flushed through
449to the browser right now. This can be especially useful if you know you're
450about to call a web service or a potentially slow database query or similar
451to ensure that at least the header/layout of your page renders now, improving
452perceived user responsiveness while your application waits around for the
453data it needs.
454
455This is currently exposed by the 'flush_before' option to the collect filter,
456which incidentally also underlies the replace and repeat filters, so to
457indicate we want this behaviour to happen before a query is executed we can
458write something like:
459
460 $zoom->select('.item')->repeat(sub {
461 if (my $row = $db_thing->next) {
462 return sub { $_->select('.item-name')->replace_content($row->name) }
463 } else {
464 return
465 }
466 }, { flush_before => 1 });
467
468which should have the desired effect given a sufficiently lazy $db_thing (for
469example a L<DBIx::Class::ResultSet> object).
470
471=head2 A FISTFUL OF OBJECTS
472
473At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
474whose purpose is to hang on to the various bits and pieces that things need
475so that there's a common way of accessing shared functionality.
476
477Were I a computer scientist I would probably call this an "Inversion of
478Control" object - which you'd be welcome to google to learn more about, or
479you can just imagine a computer scientist being suspended upside down over
480a pit. Either way works for me, I'm a pure maths grad.
481
482The ZConfig object hangs on to one each of the following for you:
483
484=over 4
485
486=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
487
488=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
489
490=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
491
492=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
493
494=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
495
496=back
497
498In theory you could replace any of these with anything you like, but in
499practice you're probably best restricting yourself to subclasses, or at
500least things that manage to look like the original if you squint a bit.
501
502If you do something more clever than that, or find yourself overriding things
503in your ZConfig a lot, please please tell us about it via one of the means
504mentioned under L</SUPPORT>.
505
506=head2 SEMANTIC DIDACTIC
507
508Some will argue that overloading CSS selectors to do data stuff is a terrible
509idea, and possibly even a step towards the "Concrete Javascript" pattern
510(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
511where it keeps reminding me of the late, great Tony Hart's plasticine friend).
512
513To which I say, "eh", "meh", and possibly also "feh". If it really upsets
514you, either use extra classes for this (and remove them afterwards) or
515use special fake elements or, well, honestly, just use something different.
516L<Template::Semantic> provides a similar idea to zoom except using XPath
517and XML::LibXML transforms rather than a lightweight streaming approach -
518maybe you'd like that better. Or maybe you really did want
519L<Template Toolkit|Template> after all. It is still damn good at what it does,
520after all.
521
522So far, however, I've found that for new sites the designers I'm working with
523generally want to produce nice semantic HTML with classes that represent the
524nature of the data rather than the structure of the layout, so sharing them
525as a common interface works really well for us.
526
527In the absence of any evidence that overloading CSS selectors has killed
528children or unexpectedly set fire to grandmothers - and given microformats
529have been around for a while there's been plenty of opportunity for
530octagenarian combustion - I'd suggest you give it a try and see if you like it.
531
532=head2 GET THEE TO A SUMMARY!
533
534Erm. Well.
535
536HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
537CSS selector based semantic templating engine for HTML and HTML-like
538document formats.
539
540But I said that already. Although hopefully by now you have some idea what I
541meant when I said it. If you didn't have any idea the first time. I mean, I'm
542not trying to call you stupid or anything. Just saying that maybe it wasn't
543totally obvious without the explanation. Or something.
544
545Er.
546
547Maybe we should just move on to the method docs.
548
549=head1 METHODS
550
551=head2 new
552
553 my $zoom = HTML::Zoom->new;
554
555 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
556
557Create a new empty Zoom object. You can optionally pass an
558L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
559the default components.
560
561This method isn't often used directly since several other methods can also
562act as constructors, notable L</select> and L</from_html>
563
564=head2 zconfig
565
566 my $zconfig = $zoom->zconfig;
567
568Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
569shouldn't usually need to call this yourself.
570
571=head2 from_html
572
573 my $zoom = HTML::Zoom->from_html($html);
574
575 my $z2 = $z1->from_html($html);
576
577Parses the HTML using the current zconfig's parser object and returns a new
578zoom instance with that as the source HTML to be transformed.
579
580=head2 from_file
581
582 my $zoom = HTML::Zoom->from_file($file);
583
584 my $z2 = $z1->from_file($file);
585
586Convenience method - slurps the contents of $file and calls from_html with it.
587
588=head2 to_stream
589
590 my $stream = $zoom->to_stream;
591
592 while (my ($evt) = $stream->next) {
593 ...
594
595Creates a stream, starting with a stream of the events from the HTML supplied
596via L</from_html> and then wrapping it in turn with each selector+filter pair
597that have been applied to the zoom object.
598
599=head2 to_fh
600
601 my $fh = $zoom->to_fh;
602
603 call_something_expecting_a_filehandle($fh);
604
605Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
606time its getline method is called and then return all HTML up to the next
607event with 'flush' set.
608
609You can pass this filehandle to compliant PSGI handlers (and probably most
610web frameworks).
611
612=head2 run
613
614 $zoom->run;
615
616Runs the zoom object's transforms without doing anything with the results.
617
618Normally used to get side effects of a zoom run - for example when using
619L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
620
621=head2 apply
622
623 my $z2 = $z1->apply(sub {
624 $_->select('div')->replace_content('I AM A DIV!') })
625 });
626
627Sets $_ to the zoom object and then runs the provided code. Basically syntax
628sugar, the following is entirely equivalent:
629
630 my $sub = sub {
631 shift->select('div')->replace_content('I AM A DIV!') })
632 };
633
634 my $z2 = $sub->($z1);
635
636=head2 to_html
637
638 my $html = $zoom->to_html;
639
640Runs the zoom processing and returns the resulting HTML.
641
642=head2 memoize
643
644 my $z2 = $z1->memoize;
645
646Creates a new zoom whose source HTML is the results of the original zoom's
647processing. Effectively syntax sugar for:
648
649 my $z2 = HTML::Zoom->from_html($z1->to_html);
650
651but preserves your L<HTML::Zoom::ZConfig> object.
652
653=head2 with_filter
654
655 my $zoom = HTML::Zoom->with_filter(
656 'div', $filter_builder->replace_content('I AM A DIV!')
657 );
658
659 my $z2 = $z1->with_filter(
660 'div', $filter_builder->replace_content('I AM A DIV!')
661 );
662
663Lower level interface than L</select> to adding filters to your zoom object.
664
665In normal usage, you probably don't need to call this yourself.
666
667=head2 select
668
669 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
670
671 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
672
673Returns an intermediary object of the class L<HTML::Zoom::MatchWithoutFilter>
674on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
675
676In normal usage you should generally always put the pair of method calls
677together; the intermediary object isn't designed or expected to stick around.
678
679=head2 then
680
681 my $z2 = $z1->select('div')->add_attribute(class => 'spoon')
682 ->then
683 ->replace_content('I AM A DIV!');
684
685Re-runs the previous select to allow you to chain actions together on the
686same selector.
687
688=head2 parse_selector
689
690 my $matcher = $zoom->parse_selector('div');
691
692Used by L</select> and L</with_filter> to invoke the current
693L<HTML::Zoom::SelectorParser> object to create a matcher object (currently
694a coderef but this is an implementation detail) for that selector.
695
696In normal usage, you probably don't need to call this yourself.
d80786d0 697
698=cut