Merge branch 'master' of git://git.shadowcat.co.uk/catagits/HTML-Zoom
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
3use strict;
4use warnings FATAL => 'all';
5
6use HTML::Zoom::ZConfig;
bf5a23d0 7use HTML::Zoom::ReadFH;
655965b3 8use HTML::Zoom::Transform;
eeeb0921 9use HTML::Zoom::TransformBuilder;
d80786d0 10
7af7362d 11our $VERSION = '0.009001';
12
13$VERSION = eval $VERSION;
14
d80786d0 15sub new {
16 my ($class, $args) = @_;
17 my $new = {};
18 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
19 bless($new, $class);
20}
21
22sub zconfig { shift->_self_or_new->{zconfig} }
23
24sub _self_or_new {
25 ref($_[0]) ? $_[0] : $_[0]->new
26}
27
28sub _with {
29 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
30}
31
7567494d 32sub from_events {
d80786d0 33 my $self = shift->_self_or_new;
34 $self->_with({
7567494d 35 initial_events => shift,
d80786d0 36 });
37}
38
7567494d 39sub from_html {
40 my $self = shift->_self_or_new;
41 $self->from_events($self->zconfig->parser->html_to_events($_[0]))
42}
43
bf5a23d0 44sub from_file {
45 my $self = shift->_self_or_new;
46 my $filename = shift;
47 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
48}
49
d80786d0 50sub to_stream {
51 my $self = shift;
52 die "No events to build from - forgot to call from_html?"
53 unless $self->{initial_events};
54 my $sutils = $self->zconfig->stream_utils;
55 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
2f0c6a86 56 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
d80786d0 57 $stream
58}
59
bf5a23d0 60sub to_fh {
61 HTML::Zoom::ReadFH->from_zoom(shift);
62}
63
7567494d 64sub to_events {
65 my $self = shift;
66 [ $self->zconfig->stream_utils->stream_to_array($self->to_stream) ];
67}
68
bf5a23d0 69sub run {
70 my $self = shift;
7567494d 71 $self->to_events;
bf5a23d0 72 return
73}
74
75sub apply {
76 my ($self, $code) = @_;
77 local $_ = $self;
78 $self->$code;
79}
80
fdb039c6 81sub apply_if {
82 my ($self, $predicate, $code) = @_;
83 if($predicate) {
84 local $_ = $self;
85 $self->$code;
86 }
87 else {
88 $self;
89 }
90}
91
d80786d0 92sub to_html {
93 my $self = shift;
94 $self->zconfig->producer->html_from_stream($self->to_stream);
95}
96
97sub memoize {
98 my $self = shift;
99 ref($self)->new($self)->from_html($self->to_html);
100}
101
eeeb0921 102sub with_transform {
1c4455ae 103 my $self = shift->_self_or_new;
eeeb0921 104 my ($transform) = @_;
d80786d0 105 $self->_with({
2f0c6a86 106 transforms => [
107 @{$self->{transforms}||[]},
eeeb0921 108 $transform
2f0c6a86 109 ]
d80786d0 110 });
111}
eeeb0921 112
113sub with_filter {
114 my $self = shift->_self_or_new;
115 my ($selector, $filter) = @_;
116 $self->with_transform(
117 HTML::Zoom::Transform->new({
118 zconfig => $self->zconfig,
119 selector => $selector,
120 filters => [ $filter ]
121 })
122 );
123}
d80786d0 124
125sub select {
1c4455ae 126 my $self = shift->_self_or_new;
127 my ($selector) = @_;
eeeb0921 128 return HTML::Zoom::TransformBuilder->new({
129 zconfig => $self->zconfig,
130 selector => $selector,
131 proto => $self
132 });
d80786d0 133}
134
135# There's a bug waiting to happen here: if you do something like
136#
137# $zoom->select('.foo')
1c4455ae 138# ->remove_attribute(class => 'foo')
d80786d0 139# ->then
140# ->well_anything_really
141#
142# the second action won't execute because it doesn't match anymore.
143# Ideally instead we'd merge the match subs but that's more complex to
144# implement so I'm deferring it for the moment.
145
146sub then {
147 my $self = shift;
2f0c6a86 148 die "Can't call ->then without a previous transform"
149 unless $self->{transforms};
150 $self->select($self->{transforms}->[-1]->selector);
d80786d0 151}
152
1531;
154
155=head1 NAME
156
157HTML::Zoom - selector based streaming template engine
158
159=head1 SYNOPSIS
160
161 use HTML::Zoom;
162
163 my $template = <<HTML;
164 <html>
165 <head>
166 <title>Hello people</title>
167 </head>
168 <body>
169 <h1 id="greeting">Placeholder</h1>
170 <div id="list">
171 <span>
172 <p>Name: <span class="name">Bob</span></p>
173 <p>Age: <span class="age">23</span></p>
174 </span>
175 <hr class="between" />
176 </div>
177 </body>
178 </html>
179 HTML
180
181 my $output = HTML::Zoom
182 ->from_html($template)
183 ->select('title, #greeting')->replace_content('Hello world & dog!')
184 ->select('#list')->repeat_content(
185 [
186 sub {
187 $_->select('.name')->replace_content('Matt')
188 ->select('.age')->replace_content('26')
189 },
190 sub {
191 $_->select('.name')->replace_content('Mark')
192 ->select('.age')->replace_content('0x29')
193 },
194 sub {
195 $_->select('.name')->replace_content('Epitaph')
196 ->select('.age')->replace_content('<redacted>')
197 },
198 ],
199 { repeat_between => '.between' }
200 )
201 ->to_html;
202
203will produce:
204
205=begin testinfo
206
207 my $expect = <<HTML;
208
209=end testinfo
210
211 <html>
212 <head>
213 <title>Hello world &amp; dog!</title>
214 </head>
215 <body>
216 <h1 id="greeting">Hello world &amp; dog!</h1>
217 <div id="list">
218 <span>
219 <p>Name: <span class="name">Matt</span></p>
220 <p>Age: <span class="age">26</span></p>
221 </span>
222 <hr class="between" />
223 <span>
224 <p>Name: <span class="name">Mark</span></p>
225 <p>Age: <span class="age">0x29</span></p>
226 </span>
227 <hr class="between" />
228 <span>
229 <p>Name: <span class="name">Epitaph</span></p>
230 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
231 </span>
232
233 </div>
234 </body>
235 </html>
236
237=begin testinfo
238
239 HTML
240 is($output, $expect, 'Synopsis code works ok');
241
242=end testinfo
243
1c4455ae 244=head1 DANGER WILL ROBINSON
245
246This is a 0.9 release. That means that I'm fairly happy the API isn't going
247to change in surprising and upsetting ways before 1.0 and a real compatibility
248freeze. But it also means that if it turns out there's a mistake the size of
249a politician's ego in the API design that I haven't spotted yet there may be
250a bit of breakage between here and 1.0. Hopefully not though. Appendages
251crossed and all that.
252
253Worse still, the rest of the distribution isn't documented yet. I'm sorry.
254I suck. But lots of people have been asking me to ship this, docs or no, so
255having got this class itself at least somewhat documented I figured now was
256a good time to cut a first real release.
257
258=head1 DESCRIPTION
259
260HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
261CSS selector based semantic templating engine for HTML and HTML-like
262document formats.
263
264Which is, on the whole, a bit of a mouthful. So let me step back a moment
265and explain why you care enough to understand what I mean:
266
267=head2 JQUERY ENVY
268
269HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
270piece of data into a document by doing:
271
272 $('.username').replaceAll(username);
273
274In HTML::Zoom one can write
275
276 $zoom->select('.username')->replace_content($username);
277
278which is, I hope, almost as clear, hampered only by the fact that Zoom can't
279assume a global document and therefore has nothing quite so simple as the
280$() function to get the initial selection.
281
282L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
283specification, and will continue to track that rather than the W3C standards
284for the forseeable future on grounds of pragmatism. Also on grounds of their
285spec is written in EN_US rather than EN_W3C, and I read the former much better.
286
287I am happy to admit that it's very, very much a subset at the moment - see the
288L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
289and more to be supported over time as we need it and patch it in.
290
291=head2 CLEAN TEMPLATES
292
293HTML::Zoom is the cure for messy templates. How many times have you looked at
294templates like this:
295
296 <form action="/somewhere">
297 [% FOREACH field IN fields %]
298 <label for="[% field.id %]">[% field.label %]</label>
299 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
300 [% END %]
301 </form>
302
303and despaired of the fact that neither the HTML structure nor the logic are
304remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
305cleanly:
306
307 <form class="myform" action="/somewhere">
308 <label />
309 <input />
310 </form>
311
312 $zoom->select('.myform')->repeat_content([
313 map { my $field = $_; sub {
314
315 $_->select('label')
2daa653a 316 ->add_to_attribute( for => $field->{id} )
1c4455ae 317 ->then
318 ->replace_content( $field->{label} )
319
320 ->select('input')
2daa653a 321 ->add_to_attribute( name => $field->{name} )
1c4455ae 322 ->then
2daa653a 323 ->add_to_attribute( type => $field->{type} )
1c4455ae 324 ->then
2daa653a 325 ->add_to_attribute( value => $field->{value} )
1c4455ae 326
327 } } @fields
328 ]);
329
330This is, admittedly, very much not shorter. However, it makes it extremely
331clear what's happening and therefore less hassle to maintain. Especially
332because it allows the designer to fiddle with the HTML without cutting
333himself on sharp ELSE clauses, and the developer to add available data to
334the template without getting angle bracket cuts on sensitive parts.
335
336Better still, HTML::Zoom knows that it's inserting content into HTML and
337can escape it for you - the example template should really have been:
338
339 <form action="/somewhere">
340 [% FOREACH field IN fields %]
341 <label for="[% field.id | html %]">[% field.label | html %]</label>
342 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
343 [% END %]
344 </form>
345
346and frankly I'll take slightly more code any day over *that* crawling horror.
347
348(addendum: I pick on L<Template Toolkit|Template> here specifically because
349it's the template system I hate the least - for text templating, I don't
350honestly think I'll ever like anything except the next version of Template
351Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
352
353=head2 PUTTING THE FUN INTO FUNCTIONAL
354
355The principle of HTML::Zoom is to provide a reusable, functional container
356object that lets you build up a set of transforms to be applied; every method
357call you make on a zoom object returns a new object, so it's safe to do so
358on one somebody else gave you without worrying about altering state (with
359the notable exception of ->next for stream objects, which I'll come to later).
360
361So:
362
363 my $z2 = $z1->select('.name')->replace_content($name);
364
365 my $z3 = $z2->select('.title')->replace_content('Ms.');
366
367each time produces a new Zoom object. If you want to package up a set of
368transforms to re-use, HTML::Zoom provides an 'apply' method:
369
370 my $add_name = sub { $_->select('.name')->replace_content($name) };
371
372 my $same_as_z2 = $z1->apply($add_name);
373
374=head2 LAZINESS IS A VIRTUE
375
376HTML::Zoom does its best to defer doing anything until it's absolutely
377required. The only point at which it descends into state is when you force
378it to create a stream, directly by:
379
c9e76777 380 my $stream = $zoom->to_stream;
1c4455ae 381
382 while (my $evt = $stream->next) {
383 # handle zoom event here
384 }
385
386or indirectly via:
387
388 my $final_html = $zoom->to_html;
389
390 my $fh = $zoom->to_fh;
391
392 while (my $chunk = $fh->getline) {
393 ...
394 }
395
396Better still, the $fh returned doesn't create its stream until the first
397call to getline, which means that until you call that and force it to be
398stateful you can get back to the original stateless Zoom object via:
399
400 my $zoom = $fh->to_zoom;
401
402which is exceedingly handy for filtering L<Plack> PSGI responses, among other
403things.
404
405Because HTML::Zoom doesn't try and evaluate everything up front, you can
406generally put things together in whatever order is most appropriate. This
407means that:
408
409 my $start = HTML::Zoom->from_html($html);
410
411 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
412
413and:
414
415 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
416
417 my $zoom = $start->from_html($html);
418
419will produce equivalent final $zoom objects, thus proving that there can be
420more than one way to do it without one of them being a
421L<bait and switch|Switch>.
422
423=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
424
425HTML::Zoom's execution always happens in terms of streams under the hood
426- that is, the basic pattern for doing anything is -
427
428 my $stream = get_stream_from_somewhere
429
430 while (my ($evt) = $stream->next) {
431 # do something with the event
432 }
433
434More importantly, all selectors and filters are also built as stream
435operations, so a selector and filter pair is effectively:
436
437 sub next {
438 my ($self) = @_;
439 my $next_evt = $self->parent_stream->next;
440 if ($self->selector_matches($next_evt)) {
441 return $self->apply_filter_to($next_evt);
442 } else {
443 return $next_evt;
444 }
445 }
446
447Internally, things are marginally more complicated than that, but not enough
448that you as a user should normally need to care.
449
450In fact, an HTML::Zoom object is mostly just a container for the relevant
451information from which to build the final stream that does the real work. A
452stream built from a Zoom object is a stream of events from parsing the
453initial HTML, wrapped in a filter stream per selector/filter pair provided
454as described above.
455
456The upshot of this is that the application of filters works just as well on
457streams as on the original Zoom object - in fact, when you run a
458L</repeat_content> operation your subroutines are applied to the stream for
459that element of the repeat, rather than constructing a new zoom per repeat
460element as well.
461
462More concretely:
463
464 $_->select('div')->replace_content('I AM A DIV!');
465
466works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
467shares sufficient of the implementation that you can generally forget the
468difference - barring the fact that a stream already has state attached so
469things like to_fh are no longer available.
470
471=head2 POP! GOES THE WEASEL
472
473... and by Weasel, I mean layout.
474
475HTML::Zoom's filehandle object supports an additional event key, 'flush',
476that is transparent to the rest of the system but indicates to the filehandle
477object to end a getline operation at that point and return the HTML so far.
478
479This means that in an environment where streaming output is available, such
480as a number of the L<Plack> PSGI handlers, you can add the flush key to an
481event in order to ensure that the HTML generated so far is flushed through
482to the browser right now. This can be especially useful if you know you're
483about to call a web service or a potentially slow database query or similar
484to ensure that at least the header/layout of your page renders now, improving
485perceived user responsiveness while your application waits around for the
486data it needs.
487
488This is currently exposed by the 'flush_before' option to the collect filter,
489which incidentally also underlies the replace and repeat filters, so to
490indicate we want this behaviour to happen before a query is executed we can
491write something like:
492
493 $zoom->select('.item')->repeat(sub {
494 if (my $row = $db_thing->next) {
495 return sub { $_->select('.item-name')->replace_content($row->name) }
496 } else {
497 return
498 }
499 }, { flush_before => 1 });
500
501which should have the desired effect given a sufficiently lazy $db_thing (for
502example a L<DBIx::Class::ResultSet> object).
503
504=head2 A FISTFUL OF OBJECTS
505
506At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
507whose purpose is to hang on to the various bits and pieces that things need
508so that there's a common way of accessing shared functionality.
509
510Were I a computer scientist I would probably call this an "Inversion of
511Control" object - which you'd be welcome to google to learn more about, or
512you can just imagine a computer scientist being suspended upside down over
513a pit. Either way works for me, I'm a pure maths grad.
514
515The ZConfig object hangs on to one each of the following for you:
516
517=over 4
518
519=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
520
521=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
522
523=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
524
525=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
526
527=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
528
529=back
530
531In theory you could replace any of these with anything you like, but in
532practice you're probably best restricting yourself to subclasses, or at
533least things that manage to look like the original if you squint a bit.
534
535If you do something more clever than that, or find yourself overriding things
536in your ZConfig a lot, please please tell us about it via one of the means
537mentioned under L</SUPPORT>.
538
539=head2 SEMANTIC DIDACTIC
540
541Some will argue that overloading CSS selectors to do data stuff is a terrible
542idea, and possibly even a step towards the "Concrete Javascript" pattern
543(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
544where it keeps reminding me of the late, great Tony Hart's plasticine friend).
545
546To which I say, "eh", "meh", and possibly also "feh". If it really upsets
547you, either use extra classes for this (and remove them afterwards) or
548use special fake elements or, well, honestly, just use something different.
549L<Template::Semantic> provides a similar idea to zoom except using XPath
550and XML::LibXML transforms rather than a lightweight streaming approach -
551maybe you'd like that better. Or maybe you really did want
552L<Template Toolkit|Template> after all. It is still damn good at what it does,
553after all.
554
555So far, however, I've found that for new sites the designers I'm working with
556generally want to produce nice semantic HTML with classes that represent the
557nature of the data rather than the structure of the layout, so sharing them
558as a common interface works really well for us.
559
560In the absence of any evidence that overloading CSS selectors has killed
561children or unexpectedly set fire to grandmothers - and given microformats
562have been around for a while there's been plenty of opportunity for
563octagenarian combustion - I'd suggest you give it a try and see if you like it.
564
565=head2 GET THEE TO A SUMMARY!
566
567Erm. Well.
568
569HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
570CSS selector based semantic templating engine for HTML and HTML-like
571document formats.
572
573But I said that already. Although hopefully by now you have some idea what I
574meant when I said it. If you didn't have any idea the first time. I mean, I'm
575not trying to call you stupid or anything. Just saying that maybe it wasn't
576totally obvious without the explanation. Or something.
577
578Er.
579
580Maybe we should just move on to the method docs.
581
582=head1 METHODS
583
584=head2 new
585
586 my $zoom = HTML::Zoom->new;
587
588 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
589
590Create a new empty Zoom object. You can optionally pass an
591L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
592the default components.
593
594This method isn't often used directly since several other methods can also
595act as constructors, notable L</select> and L</from_html>
596
597=head2 zconfig
598
599 my $zconfig = $zoom->zconfig;
600
601Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
602shouldn't usually need to call this yourself.
603
604=head2 from_html
605
606 my $zoom = HTML::Zoom->from_html($html);
607
608 my $z2 = $z1->from_html($html);
609
610Parses the HTML using the current zconfig's parser object and returns a new
611zoom instance with that as the source HTML to be transformed.
612
613=head2 from_file
614
615 my $zoom = HTML::Zoom->from_file($file);
616
617 my $z2 = $z1->from_file($file);
618
619Convenience method - slurps the contents of $file and calls from_html with it.
620
621=head2 to_stream
622
623 my $stream = $zoom->to_stream;
624
625 while (my ($evt) = $stream->next) {
626 ...
627
628Creates a stream, starting with a stream of the events from the HTML supplied
629via L</from_html> and then wrapping it in turn with each selector+filter pair
630that have been applied to the zoom object.
631
632=head2 to_fh
633
634 my $fh = $zoom->to_fh;
635
636 call_something_expecting_a_filehandle($fh);
637
638Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
639time its getline method is called and then return all HTML up to the next
640event with 'flush' set.
641
642You can pass this filehandle to compliant PSGI handlers (and probably most
643web frameworks).
644
645=head2 run
646
647 $zoom->run;
648
649Runs the zoom object's transforms without doing anything with the results.
650
651Normally used to get side effects of a zoom run - for example when using
652L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
653
654=head2 apply
655
656 my $z2 = $z1->apply(sub {
657 $_->select('div')->replace_content('I AM A DIV!') })
658 });
659
660Sets $_ to the zoom object and then runs the provided code. Basically syntax
661sugar, the following is entirely equivalent:
662
663 my $sub = sub {
664 shift->select('div')->replace_content('I AM A DIV!') })
665 };
666
667 my $z2 = $sub->($z1);
668
669=head2 to_html
670
671 my $html = $zoom->to_html;
672
673Runs the zoom processing and returns the resulting HTML.
674
675=head2 memoize
676
677 my $z2 = $z1->memoize;
678
679Creates a new zoom whose source HTML is the results of the original zoom's
680processing. Effectively syntax sugar for:
681
682 my $z2 = HTML::Zoom->from_html($z1->to_html);
683
684but preserves your L<HTML::Zoom::ZConfig> object.
685
686=head2 with_filter
687
688 my $zoom = HTML::Zoom->with_filter(
689 'div', $filter_builder->replace_content('I AM A DIV!')
690 );
691
692 my $z2 = $z1->with_filter(
693 'div', $filter_builder->replace_content('I AM A DIV!')
694 );
695
696Lower level interface than L</select> to adding filters to your zoom object.
697
698In normal usage, you probably don't need to call this yourself.
699
700=head2 select
701
702 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
703
704 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
705
97192b02 706Returns an intermediary object of the class L<HTML::Zoom::TransformBuilder>
1c4455ae 707on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
708
709In normal usage you should generally always put the pair of method calls
710together; the intermediary object isn't designed or expected to stick around.
711
712=head2 then
713
2daa653a 714 my $z2 = $z1->select('div')->add_to_attribute(class => 'spoon')
1c4455ae 715 ->then
716 ->replace_content('I AM A DIV!');
717
718Re-runs the previous select to allow you to chain actions together on the
719same selector.
720
45b4cea1 721=head1 AUTHORS
722
723=over
724
725=item * Matt S. Trout
726
727=back
728
729=head1 LICENSE
730
731This library is free software, you can redistribute it and/or modify
732it under the same terms as Perl itself.
733
d80786d0 734=cut
45b4cea1 735