removed accidentally added whitespace
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
3use strict;
4use warnings FATAL => 'all';
5
6use HTML::Zoom::ZConfig;
bf5a23d0 7use HTML::Zoom::ReadFH;
655965b3 8use HTML::Zoom::Transform;
eeeb0921 9use HTML::Zoom::TransformBuilder;
d80786d0 10
11sub new {
12 my ($class, $args) = @_;
13 my $new = {};
14 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
15 bless($new, $class);
16}
17
18sub zconfig { shift->_self_or_new->{zconfig} }
19
20sub _self_or_new {
21 ref($_[0]) ? $_[0] : $_[0]->new
22}
23
24sub _with {
25 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
26}
27
7567494d 28sub from_events {
d80786d0 29 my $self = shift->_self_or_new;
30 $self->_with({
7567494d 31 initial_events => shift,
d80786d0 32 });
33}
34
7567494d 35sub from_html {
36 my $self = shift->_self_or_new;
37 $self->from_events($self->zconfig->parser->html_to_events($_[0]))
38}
39
bf5a23d0 40sub from_file {
41 my $self = shift->_self_or_new;
42 my $filename = shift;
43 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
44}
45
d80786d0 46sub to_stream {
47 my $self = shift;
48 die "No events to build from - forgot to call from_html?"
49 unless $self->{initial_events};
50 my $sutils = $self->zconfig->stream_utils;
51 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
2f0c6a86 52 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
d80786d0 53 $stream
54}
55
bf5a23d0 56sub to_fh {
57 HTML::Zoom::ReadFH->from_zoom(shift);
58}
59
7567494d 60sub to_events {
61 my $self = shift;
62 [ $self->zconfig->stream_utils->stream_to_array($self->to_stream) ];
63}
64
bf5a23d0 65sub run {
66 my $self = shift;
7567494d 67 $self->to_events;
bf5a23d0 68 return
69}
70
71sub apply {
72 my ($self, $code) = @_;
73 local $_ = $self;
74 $self->$code;
75}
76
d80786d0 77sub to_html {
78 my $self = shift;
79 $self->zconfig->producer->html_from_stream($self->to_stream);
80}
81
82sub memoize {
83 my $self = shift;
84 ref($self)->new($self)->from_html($self->to_html);
85}
86
eeeb0921 87sub with_transform {
1c4455ae 88 my $self = shift->_self_or_new;
eeeb0921 89 my ($transform) = @_;
d80786d0 90 $self->_with({
2f0c6a86 91 transforms => [
92 @{$self->{transforms}||[]},
eeeb0921 93 $transform
2f0c6a86 94 ]
d80786d0 95 });
96}
eeeb0921 97
98sub with_filter {
99 my $self = shift->_self_or_new;
100 my ($selector, $filter) = @_;
101 $self->with_transform(
102 HTML::Zoom::Transform->new({
103 zconfig => $self->zconfig,
104 selector => $selector,
105 filters => [ $filter ]
106 })
107 );
108}
d80786d0 109
110sub select {
1c4455ae 111 my $self = shift->_self_or_new;
112 my ($selector) = @_;
eeeb0921 113 return HTML::Zoom::TransformBuilder->new({
114 zconfig => $self->zconfig,
115 selector => $selector,
116 proto => $self
117 });
d80786d0 118}
119
120# There's a bug waiting to happen here: if you do something like
121#
122# $zoom->select('.foo')
1c4455ae 123# ->remove_attribute(class => 'foo')
d80786d0 124# ->then
125# ->well_anything_really
126#
127# the second action won't execute because it doesn't match anymore.
128# Ideally instead we'd merge the match subs but that's more complex to
129# implement so I'm deferring it for the moment.
130
131sub then {
132 my $self = shift;
2f0c6a86 133 die "Can't call ->then without a previous transform"
134 unless $self->{transforms};
135 $self->select($self->{transforms}->[-1]->selector);
d80786d0 136}
137
1381;
139
140=head1 NAME
141
142HTML::Zoom - selector based streaming template engine
143
144=head1 SYNOPSIS
145
146 use HTML::Zoom;
147
148 my $template = <<HTML;
149 <html>
150 <head>
151 <title>Hello people</title>
152 </head>
153 <body>
154 <h1 id="greeting">Placeholder</h1>
155 <div id="list">
156 <span>
157 <p>Name: <span class="name">Bob</span></p>
158 <p>Age: <span class="age">23</span></p>
159 </span>
160 <hr class="between" />
161 </div>
162 </body>
163 </html>
164 HTML
165
166 my $output = HTML::Zoom
167 ->from_html($template)
168 ->select('title, #greeting')->replace_content('Hello world & dog!')
169 ->select('#list')->repeat_content(
170 [
171 sub {
172 $_->select('.name')->replace_content('Matt')
173 ->select('.age')->replace_content('26')
174 },
175 sub {
176 $_->select('.name')->replace_content('Mark')
177 ->select('.age')->replace_content('0x29')
178 },
179 sub {
180 $_->select('.name')->replace_content('Epitaph')
181 ->select('.age')->replace_content('<redacted>')
182 },
183 ],
184 { repeat_between => '.between' }
185 )
186 ->to_html;
187
188will produce:
189
190=begin testinfo
191
192 my $expect = <<HTML;
193
194=end testinfo
195
196 <html>
197 <head>
198 <title>Hello world &amp; dog!</title>
199 </head>
200 <body>
201 <h1 id="greeting">Hello world &amp; dog!</h1>
202 <div id="list">
203 <span>
204 <p>Name: <span class="name">Matt</span></p>
205 <p>Age: <span class="age">26</span></p>
206 </span>
207 <hr class="between" />
208 <span>
209 <p>Name: <span class="name">Mark</span></p>
210 <p>Age: <span class="age">0x29</span></p>
211 </span>
212 <hr class="between" />
213 <span>
214 <p>Name: <span class="name">Epitaph</span></p>
215 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
216 </span>
217
218 </div>
219 </body>
220 </html>
221
222=begin testinfo
223
224 HTML
225 is($output, $expect, 'Synopsis code works ok');
226
227=end testinfo
228
1c4455ae 229=head1 DANGER WILL ROBINSON
230
231This is a 0.9 release. That means that I'm fairly happy the API isn't going
232to change in surprising and upsetting ways before 1.0 and a real compatibility
233freeze. But it also means that if it turns out there's a mistake the size of
234a politician's ego in the API design that I haven't spotted yet there may be
235a bit of breakage between here and 1.0. Hopefully not though. Appendages
236crossed and all that.
237
238Worse still, the rest of the distribution isn't documented yet. I'm sorry.
239I suck. But lots of people have been asking me to ship this, docs or no, so
240having got this class itself at least somewhat documented I figured now was
241a good time to cut a first real release.
242
243=head1 DESCRIPTION
244
245HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
246CSS selector based semantic templating engine for HTML and HTML-like
247document formats.
248
249Which is, on the whole, a bit of a mouthful. So let me step back a moment
250and explain why you care enough to understand what I mean:
251
252=head2 JQUERY ENVY
253
254HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
255piece of data into a document by doing:
256
257 $('.username').replaceAll(username);
258
259In HTML::Zoom one can write
260
261 $zoom->select('.username')->replace_content($username);
262
263which is, I hope, almost as clear, hampered only by the fact that Zoom can't
264assume a global document and therefore has nothing quite so simple as the
265$() function to get the initial selection.
266
267L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
268specification, and will continue to track that rather than the W3C standards
269for the forseeable future on grounds of pragmatism. Also on grounds of their
270spec is written in EN_US rather than EN_W3C, and I read the former much better.
271
272I am happy to admit that it's very, very much a subset at the moment - see the
273L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
274and more to be supported over time as we need it and patch it in.
275
276=head2 CLEAN TEMPLATES
277
278HTML::Zoom is the cure for messy templates. How many times have you looked at
279templates like this:
280
281 <form action="/somewhere">
282 [% FOREACH field IN fields %]
283 <label for="[% field.id %]">[% field.label %]</label>
284 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
285 [% END %]
286 </form>
287
288and despaired of the fact that neither the HTML structure nor the logic are
289remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
290cleanly:
291
292 <form class="myform" action="/somewhere">
293 <label />
294 <input />
295 </form>
296
297 $zoom->select('.myform')->repeat_content([
298 map { my $field = $_; sub {
299
300 $_->select('label')
301 ->add_attribute( for => $field->{id} )
302 ->then
303 ->replace_content( $field->{label} )
304
305 ->select('input')
306 ->add_attribute( name => $field->{name} )
307 ->then
308 ->add_attribute( type => $field->{type} )
309 ->then
310 ->add_attribute( value => $field->{value} )
311
312 } } @fields
313 ]);
314
315This is, admittedly, very much not shorter. However, it makes it extremely
316clear what's happening and therefore less hassle to maintain. Especially
317because it allows the designer to fiddle with the HTML without cutting
318himself on sharp ELSE clauses, and the developer to add available data to
319the template without getting angle bracket cuts on sensitive parts.
320
321Better still, HTML::Zoom knows that it's inserting content into HTML and
322can escape it for you - the example template should really have been:
323
324 <form action="/somewhere">
325 [% FOREACH field IN fields %]
326 <label for="[% field.id | html %]">[% field.label | html %]</label>
327 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
328 [% END %]
329 </form>
330
331and frankly I'll take slightly more code any day over *that* crawling horror.
332
333(addendum: I pick on L<Template Toolkit|Template> here specifically because
334it's the template system I hate the least - for text templating, I don't
335honestly think I'll ever like anything except the next version of Template
336Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
337
338=head2 PUTTING THE FUN INTO FUNCTIONAL
339
340The principle of HTML::Zoom is to provide a reusable, functional container
341object that lets you build up a set of transforms to be applied; every method
342call you make on a zoom object returns a new object, so it's safe to do so
343on one somebody else gave you without worrying about altering state (with
344the notable exception of ->next for stream objects, which I'll come to later).
345
346So:
347
348 my $z2 = $z1->select('.name')->replace_content($name);
349
350 my $z3 = $z2->select('.title')->replace_content('Ms.');
351
352each time produces a new Zoom object. If you want to package up a set of
353transforms to re-use, HTML::Zoom provides an 'apply' method:
354
355 my $add_name = sub { $_->select('.name')->replace_content($name) };
356
357 my $same_as_z2 = $z1->apply($add_name);
358
359=head2 LAZINESS IS A VIRTUE
360
361HTML::Zoom does its best to defer doing anything until it's absolutely
362required. The only point at which it descends into state is when you force
363it to create a stream, directly by:
364
c9e76777 365 my $stream = $zoom->to_stream;
1c4455ae 366
367 while (my $evt = $stream->next) {
368 # handle zoom event here
369 }
370
371or indirectly via:
372
373 my $final_html = $zoom->to_html;
374
375 my $fh = $zoom->to_fh;
376
377 while (my $chunk = $fh->getline) {
378 ...
379 }
380
381Better still, the $fh returned doesn't create its stream until the first
382call to getline, which means that until you call that and force it to be
383stateful you can get back to the original stateless Zoom object via:
384
385 my $zoom = $fh->to_zoom;
386
387which is exceedingly handy for filtering L<Plack> PSGI responses, among other
388things.
389
390Because HTML::Zoom doesn't try and evaluate everything up front, you can
391generally put things together in whatever order is most appropriate. This
392means that:
393
394 my $start = HTML::Zoom->from_html($html);
395
396 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
397
398and:
399
400 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
401
402 my $zoom = $start->from_html($html);
403
404will produce equivalent final $zoom objects, thus proving that there can be
405more than one way to do it without one of them being a
406L<bait and switch|Switch>.
407
408=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
409
410HTML::Zoom's execution always happens in terms of streams under the hood
411- that is, the basic pattern for doing anything is -
412
413 my $stream = get_stream_from_somewhere
414
415 while (my ($evt) = $stream->next) {
416 # do something with the event
417 }
418
419More importantly, all selectors and filters are also built as stream
420operations, so a selector and filter pair is effectively:
421
422 sub next {
423 my ($self) = @_;
424 my $next_evt = $self->parent_stream->next;
425 if ($self->selector_matches($next_evt)) {
426 return $self->apply_filter_to($next_evt);
427 } else {
428 return $next_evt;
429 }
430 }
431
432Internally, things are marginally more complicated than that, but not enough
433that you as a user should normally need to care.
434
435In fact, an HTML::Zoom object is mostly just a container for the relevant
436information from which to build the final stream that does the real work. A
437stream built from a Zoom object is a stream of events from parsing the
438initial HTML, wrapped in a filter stream per selector/filter pair provided
439as described above.
440
441The upshot of this is that the application of filters works just as well on
442streams as on the original Zoom object - in fact, when you run a
443L</repeat_content> operation your subroutines are applied to the stream for
444that element of the repeat, rather than constructing a new zoom per repeat
445element as well.
446
447More concretely:
448
449 $_->select('div')->replace_content('I AM A DIV!');
450
451works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
452shares sufficient of the implementation that you can generally forget the
453difference - barring the fact that a stream already has state attached so
454things like to_fh are no longer available.
455
456=head2 POP! GOES THE WEASEL
457
458... and by Weasel, I mean layout.
459
460HTML::Zoom's filehandle object supports an additional event key, 'flush',
461that is transparent to the rest of the system but indicates to the filehandle
462object to end a getline operation at that point and return the HTML so far.
463
464This means that in an environment where streaming output is available, such
465as a number of the L<Plack> PSGI handlers, you can add the flush key to an
466event in order to ensure that the HTML generated so far is flushed through
467to the browser right now. This can be especially useful if you know you're
468about to call a web service or a potentially slow database query or similar
469to ensure that at least the header/layout of your page renders now, improving
470perceived user responsiveness while your application waits around for the
471data it needs.
472
473This is currently exposed by the 'flush_before' option to the collect filter,
474which incidentally also underlies the replace and repeat filters, so to
475indicate we want this behaviour to happen before a query is executed we can
476write something like:
477
478 $zoom->select('.item')->repeat(sub {
479 if (my $row = $db_thing->next) {
480 return sub { $_->select('.item-name')->replace_content($row->name) }
481 } else {
482 return
483 }
484 }, { flush_before => 1 });
485
486which should have the desired effect given a sufficiently lazy $db_thing (for
487example a L<DBIx::Class::ResultSet> object).
488
489=head2 A FISTFUL OF OBJECTS
490
491At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
492whose purpose is to hang on to the various bits and pieces that things need
493so that there's a common way of accessing shared functionality.
494
495Were I a computer scientist I would probably call this an "Inversion of
496Control" object - which you'd be welcome to google to learn more about, or
497you can just imagine a computer scientist being suspended upside down over
498a pit. Either way works for me, I'm a pure maths grad.
499
500The ZConfig object hangs on to one each of the following for you:
501
502=over 4
503
504=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
505
506=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
507
508=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
509
510=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
511
512=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
513
514=back
515
516In theory you could replace any of these with anything you like, but in
517practice you're probably best restricting yourself to subclasses, or at
518least things that manage to look like the original if you squint a bit.
519
520If you do something more clever than that, or find yourself overriding things
521in your ZConfig a lot, please please tell us about it via one of the means
522mentioned under L</SUPPORT>.
523
524=head2 SEMANTIC DIDACTIC
525
526Some will argue that overloading CSS selectors to do data stuff is a terrible
527idea, and possibly even a step towards the "Concrete Javascript" pattern
528(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
529where it keeps reminding me of the late, great Tony Hart's plasticine friend).
530
531To which I say, "eh", "meh", and possibly also "feh". If it really upsets
532you, either use extra classes for this (and remove them afterwards) or
533use special fake elements or, well, honestly, just use something different.
534L<Template::Semantic> provides a similar idea to zoom except using XPath
535and XML::LibXML transforms rather than a lightweight streaming approach -
536maybe you'd like that better. Or maybe you really did want
537L<Template Toolkit|Template> after all. It is still damn good at what it does,
538after all.
539
540So far, however, I've found that for new sites the designers I'm working with
541generally want to produce nice semantic HTML with classes that represent the
542nature of the data rather than the structure of the layout, so sharing them
543as a common interface works really well for us.
544
545In the absence of any evidence that overloading CSS selectors has killed
546children or unexpectedly set fire to grandmothers - and given microformats
547have been around for a while there's been plenty of opportunity for
548octagenarian combustion - I'd suggest you give it a try and see if you like it.
549
550=head2 GET THEE TO A SUMMARY!
551
552Erm. Well.
553
554HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
555CSS selector based semantic templating engine for HTML and HTML-like
556document formats.
557
558But I said that already. Although hopefully by now you have some idea what I
559meant when I said it. If you didn't have any idea the first time. I mean, I'm
560not trying to call you stupid or anything. Just saying that maybe it wasn't
561totally obvious without the explanation. Or something.
562
563Er.
564
565Maybe we should just move on to the method docs.
566
567=head1 METHODS
568
569=head2 new
570
571 my $zoom = HTML::Zoom->new;
572
573 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
574
575Create a new empty Zoom object. You can optionally pass an
576L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
577the default components.
578
579This method isn't often used directly since several other methods can also
580act as constructors, notable L</select> and L</from_html>
581
582=head2 zconfig
583
584 my $zconfig = $zoom->zconfig;
585
586Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
587shouldn't usually need to call this yourself.
588
589=head2 from_html
590
591 my $zoom = HTML::Zoom->from_html($html);
592
593 my $z2 = $z1->from_html($html);
594
595Parses the HTML using the current zconfig's parser object and returns a new
596zoom instance with that as the source HTML to be transformed.
597
598=head2 from_file
599
600 my $zoom = HTML::Zoom->from_file($file);
601
602 my $z2 = $z1->from_file($file);
603
604Convenience method - slurps the contents of $file and calls from_html with it.
605
606=head2 to_stream
607
608 my $stream = $zoom->to_stream;
609
610 while (my ($evt) = $stream->next) {
611 ...
612
613Creates a stream, starting with a stream of the events from the HTML supplied
614via L</from_html> and then wrapping it in turn with each selector+filter pair
615that have been applied to the zoom object.
616
617=head2 to_fh
618
619 my $fh = $zoom->to_fh;
620
621 call_something_expecting_a_filehandle($fh);
622
623Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
624time its getline method is called and then return all HTML up to the next
625event with 'flush' set.
626
627You can pass this filehandle to compliant PSGI handlers (and probably most
628web frameworks).
629
630=head2 run
631
632 $zoom->run;
633
634Runs the zoom object's transforms without doing anything with the results.
635
636Normally used to get side effects of a zoom run - for example when using
637L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
638
639=head2 apply
640
641 my $z2 = $z1->apply(sub {
642 $_->select('div')->replace_content('I AM A DIV!') })
643 });
644
645Sets $_ to the zoom object and then runs the provided code. Basically syntax
646sugar, the following is entirely equivalent:
647
648 my $sub = sub {
649 shift->select('div')->replace_content('I AM A DIV!') })
650 };
651
652 my $z2 = $sub->($z1);
653
654=head2 to_html
655
656 my $html = $zoom->to_html;
657
658Runs the zoom processing and returns the resulting HTML.
659
660=head2 memoize
661
662 my $z2 = $z1->memoize;
663
664Creates a new zoom whose source HTML is the results of the original zoom's
665processing. Effectively syntax sugar for:
666
667 my $z2 = HTML::Zoom->from_html($z1->to_html);
668
669but preserves your L<HTML::Zoom::ZConfig> object.
670
671=head2 with_filter
672
673 my $zoom = HTML::Zoom->with_filter(
674 'div', $filter_builder->replace_content('I AM A DIV!')
675 );
676
677 my $z2 = $z1->with_filter(
678 'div', $filter_builder->replace_content('I AM A DIV!')
679 );
680
681Lower level interface than L</select> to adding filters to your zoom object.
682
683In normal usage, you probably don't need to call this yourself.
684
685=head2 select
686
687 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
688
689 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
690
97192b02 691Returns an intermediary object of the class L<HTML::Zoom::TransformBuilder>
1c4455ae 692on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
693
694In normal usage you should generally always put the pair of method calls
695together; the intermediary object isn't designed or expected to stick around.
696
697=head2 then
698
699 my $z2 = $z1->select('div')->add_attribute(class => 'spoon')
700 ->then
701 ->replace_content('I AM A DIV!');
702
703Re-runs the previous select to allow you to chain actions together on the
704same selector.
705
45b4cea1 706=head1 AUTHORS
707
708=over
709
710=item * Matt S. Trout
711
712=back
713
714=head1 LICENSE
715
716This library is free software, you can redistribute it and/or modify
717it under the same terms as Perl itself.
718
d80786d0 719=cut
45b4cea1 720