add VERSION to Zoom.pM
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
3use strict;
4use warnings FATAL => 'all';
5
6use HTML::Zoom::ZConfig;
bf5a23d0 7use HTML::Zoom::ReadFH;
655965b3 8use HTML::Zoom::Transform;
eeeb0921 9use HTML::Zoom::TransformBuilder;
d80786d0 10
7af7362d 11our $VERSION = '0.009001';
12
13$VERSION = eval $VERSION;
14
d80786d0 15sub new {
16 my ($class, $args) = @_;
17 my $new = {};
18 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
19 bless($new, $class);
20}
21
22sub zconfig { shift->_self_or_new->{zconfig} }
23
24sub _self_or_new {
25 ref($_[0]) ? $_[0] : $_[0]->new
26}
27
28sub _with {
29 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
30}
31
7567494d 32sub from_events {
d80786d0 33 my $self = shift->_self_or_new;
34 $self->_with({
7567494d 35 initial_events => shift,
d80786d0 36 });
37}
38
7567494d 39sub from_html {
40 my $self = shift->_self_or_new;
41 $self->from_events($self->zconfig->parser->html_to_events($_[0]))
42}
43
bf5a23d0 44sub from_file {
45 my $self = shift->_self_or_new;
46 my $filename = shift;
47 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
48}
49
d80786d0 50sub to_stream {
51 my $self = shift;
52 die "No events to build from - forgot to call from_html?"
53 unless $self->{initial_events};
54 my $sutils = $self->zconfig->stream_utils;
55 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
2f0c6a86 56 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
d80786d0 57 $stream
58}
59
bf5a23d0 60sub to_fh {
61 HTML::Zoom::ReadFH->from_zoom(shift);
62}
63
7567494d 64sub to_events {
65 my $self = shift;
66 [ $self->zconfig->stream_utils->stream_to_array($self->to_stream) ];
67}
68
bf5a23d0 69sub run {
70 my $self = shift;
7567494d 71 $self->to_events;
bf5a23d0 72 return
73}
74
75sub apply {
76 my ($self, $code) = @_;
77 local $_ = $self;
78 $self->$code;
79}
80
d80786d0 81sub to_html {
82 my $self = shift;
83 $self->zconfig->producer->html_from_stream($self->to_stream);
84}
85
86sub memoize {
87 my $self = shift;
88 ref($self)->new($self)->from_html($self->to_html);
89}
90
eeeb0921 91sub with_transform {
1c4455ae 92 my $self = shift->_self_or_new;
eeeb0921 93 my ($transform) = @_;
d80786d0 94 $self->_with({
2f0c6a86 95 transforms => [
96 @{$self->{transforms}||[]},
eeeb0921 97 $transform
2f0c6a86 98 ]
d80786d0 99 });
100}
eeeb0921 101
102sub with_filter {
103 my $self = shift->_self_or_new;
104 my ($selector, $filter) = @_;
105 $self->with_transform(
106 HTML::Zoom::Transform->new({
107 zconfig => $self->zconfig,
108 selector => $selector,
109 filters => [ $filter ]
110 })
111 );
112}
d80786d0 113
114sub select {
1c4455ae 115 my $self = shift->_self_or_new;
116 my ($selector) = @_;
eeeb0921 117 return HTML::Zoom::TransformBuilder->new({
118 zconfig => $self->zconfig,
119 selector => $selector,
120 proto => $self
121 });
d80786d0 122}
123
124# There's a bug waiting to happen here: if you do something like
125#
126# $zoom->select('.foo')
1c4455ae 127# ->remove_attribute(class => 'foo')
d80786d0 128# ->then
129# ->well_anything_really
130#
131# the second action won't execute because it doesn't match anymore.
132# Ideally instead we'd merge the match subs but that's more complex to
133# implement so I'm deferring it for the moment.
134
135sub then {
136 my $self = shift;
2f0c6a86 137 die "Can't call ->then without a previous transform"
138 unless $self->{transforms};
139 $self->select($self->{transforms}->[-1]->selector);
d80786d0 140}
141
1421;
143
144=head1 NAME
145
146HTML::Zoom - selector based streaming template engine
147
148=head1 SYNOPSIS
149
150 use HTML::Zoom;
151
152 my $template = <<HTML;
153 <html>
154 <head>
155 <title>Hello people</title>
156 </head>
157 <body>
158 <h1 id="greeting">Placeholder</h1>
159 <div id="list">
160 <span>
161 <p>Name: <span class="name">Bob</span></p>
162 <p>Age: <span class="age">23</span></p>
163 </span>
164 <hr class="between" />
165 </div>
166 </body>
167 </html>
168 HTML
169
170 my $output = HTML::Zoom
171 ->from_html($template)
172 ->select('title, #greeting')->replace_content('Hello world & dog!')
173 ->select('#list')->repeat_content(
174 [
175 sub {
176 $_->select('.name')->replace_content('Matt')
177 ->select('.age')->replace_content('26')
178 },
179 sub {
180 $_->select('.name')->replace_content('Mark')
181 ->select('.age')->replace_content('0x29')
182 },
183 sub {
184 $_->select('.name')->replace_content('Epitaph')
185 ->select('.age')->replace_content('<redacted>')
186 },
187 ],
188 { repeat_between => '.between' }
189 )
190 ->to_html;
191
192will produce:
193
194=begin testinfo
195
196 my $expect = <<HTML;
197
198=end testinfo
199
200 <html>
201 <head>
202 <title>Hello world &amp; dog!</title>
203 </head>
204 <body>
205 <h1 id="greeting">Hello world &amp; dog!</h1>
206 <div id="list">
207 <span>
208 <p>Name: <span class="name">Matt</span></p>
209 <p>Age: <span class="age">26</span></p>
210 </span>
211 <hr class="between" />
212 <span>
213 <p>Name: <span class="name">Mark</span></p>
214 <p>Age: <span class="age">0x29</span></p>
215 </span>
216 <hr class="between" />
217 <span>
218 <p>Name: <span class="name">Epitaph</span></p>
219 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
220 </span>
221
222 </div>
223 </body>
224 </html>
225
226=begin testinfo
227
228 HTML
229 is($output, $expect, 'Synopsis code works ok');
230
231=end testinfo
232
1c4455ae 233=head1 DANGER WILL ROBINSON
234
235This is a 0.9 release. That means that I'm fairly happy the API isn't going
236to change in surprising and upsetting ways before 1.0 and a real compatibility
237freeze. But it also means that if it turns out there's a mistake the size of
238a politician's ego in the API design that I haven't spotted yet there may be
239a bit of breakage between here and 1.0. Hopefully not though. Appendages
240crossed and all that.
241
242Worse still, the rest of the distribution isn't documented yet. I'm sorry.
243I suck. But lots of people have been asking me to ship this, docs or no, so
244having got this class itself at least somewhat documented I figured now was
245a good time to cut a first real release.
246
247=head1 DESCRIPTION
248
249HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
250CSS selector based semantic templating engine for HTML and HTML-like
251document formats.
252
253Which is, on the whole, a bit of a mouthful. So let me step back a moment
254and explain why you care enough to understand what I mean:
255
256=head2 JQUERY ENVY
257
258HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
259piece of data into a document by doing:
260
261 $('.username').replaceAll(username);
262
263In HTML::Zoom one can write
264
265 $zoom->select('.username')->replace_content($username);
266
267which is, I hope, almost as clear, hampered only by the fact that Zoom can't
268assume a global document and therefore has nothing quite so simple as the
269$() function to get the initial selection.
270
271L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
272specification, and will continue to track that rather than the W3C standards
273for the forseeable future on grounds of pragmatism. Also on grounds of their
274spec is written in EN_US rather than EN_W3C, and I read the former much better.
275
276I am happy to admit that it's very, very much a subset at the moment - see the
277L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
278and more to be supported over time as we need it and patch it in.
279
280=head2 CLEAN TEMPLATES
281
282HTML::Zoom is the cure for messy templates. How many times have you looked at
283templates like this:
284
285 <form action="/somewhere">
286 [% FOREACH field IN fields %]
287 <label for="[% field.id %]">[% field.label %]</label>
288 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
289 [% END %]
290 </form>
291
292and despaired of the fact that neither the HTML structure nor the logic are
293remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
294cleanly:
295
296 <form class="myform" action="/somewhere">
297 <label />
298 <input />
299 </form>
300
301 $zoom->select('.myform')->repeat_content([
302 map { my $field = $_; sub {
303
304 $_->select('label')
2daa653a 305 ->add_to_attribute( for => $field->{id} )
1c4455ae 306 ->then
307 ->replace_content( $field->{label} )
308
309 ->select('input')
2daa653a 310 ->add_to_attribute( name => $field->{name} )
1c4455ae 311 ->then
2daa653a 312 ->add_to_attribute( type => $field->{type} )
1c4455ae 313 ->then
2daa653a 314 ->add_to_attribute( value => $field->{value} )
1c4455ae 315
316 } } @fields
317 ]);
318
319This is, admittedly, very much not shorter. However, it makes it extremely
320clear what's happening and therefore less hassle to maintain. Especially
321because it allows the designer to fiddle with the HTML without cutting
322himself on sharp ELSE clauses, and the developer to add available data to
323the template without getting angle bracket cuts on sensitive parts.
324
325Better still, HTML::Zoom knows that it's inserting content into HTML and
326can escape it for you - the example template should really have been:
327
328 <form action="/somewhere">
329 [% FOREACH field IN fields %]
330 <label for="[% field.id | html %]">[% field.label | html %]</label>
331 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
332 [% END %]
333 </form>
334
335and frankly I'll take slightly more code any day over *that* crawling horror.
336
337(addendum: I pick on L<Template Toolkit|Template> here specifically because
338it's the template system I hate the least - for text templating, I don't
339honestly think I'll ever like anything except the next version of Template
340Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
341
342=head2 PUTTING THE FUN INTO FUNCTIONAL
343
344The principle of HTML::Zoom is to provide a reusable, functional container
345object that lets you build up a set of transforms to be applied; every method
346call you make on a zoom object returns a new object, so it's safe to do so
347on one somebody else gave you without worrying about altering state (with
348the notable exception of ->next for stream objects, which I'll come to later).
349
350So:
351
352 my $z2 = $z1->select('.name')->replace_content($name);
353
354 my $z3 = $z2->select('.title')->replace_content('Ms.');
355
356each time produces a new Zoom object. If you want to package up a set of
357transforms to re-use, HTML::Zoom provides an 'apply' method:
358
359 my $add_name = sub { $_->select('.name')->replace_content($name) };
360
361 my $same_as_z2 = $z1->apply($add_name);
362
363=head2 LAZINESS IS A VIRTUE
364
365HTML::Zoom does its best to defer doing anything until it's absolutely
366required. The only point at which it descends into state is when you force
367it to create a stream, directly by:
368
c9e76777 369 my $stream = $zoom->to_stream;
1c4455ae 370
371 while (my $evt = $stream->next) {
372 # handle zoom event here
373 }
374
375or indirectly via:
376
377 my $final_html = $zoom->to_html;
378
379 my $fh = $zoom->to_fh;
380
381 while (my $chunk = $fh->getline) {
382 ...
383 }
384
385Better still, the $fh returned doesn't create its stream until the first
386call to getline, which means that until you call that and force it to be
387stateful you can get back to the original stateless Zoom object via:
388
389 my $zoom = $fh->to_zoom;
390
391which is exceedingly handy for filtering L<Plack> PSGI responses, among other
392things.
393
394Because HTML::Zoom doesn't try and evaluate everything up front, you can
395generally put things together in whatever order is most appropriate. This
396means that:
397
398 my $start = HTML::Zoom->from_html($html);
399
400 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
401
402and:
403
404 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
405
406 my $zoom = $start->from_html($html);
407
408will produce equivalent final $zoom objects, thus proving that there can be
409more than one way to do it without one of them being a
410L<bait and switch|Switch>.
411
412=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
413
414HTML::Zoom's execution always happens in terms of streams under the hood
415- that is, the basic pattern for doing anything is -
416
417 my $stream = get_stream_from_somewhere
418
419 while (my ($evt) = $stream->next) {
420 # do something with the event
421 }
422
423More importantly, all selectors and filters are also built as stream
424operations, so a selector and filter pair is effectively:
425
426 sub next {
427 my ($self) = @_;
428 my $next_evt = $self->parent_stream->next;
429 if ($self->selector_matches($next_evt)) {
430 return $self->apply_filter_to($next_evt);
431 } else {
432 return $next_evt;
433 }
434 }
435
436Internally, things are marginally more complicated than that, but not enough
437that you as a user should normally need to care.
438
439In fact, an HTML::Zoom object is mostly just a container for the relevant
440information from which to build the final stream that does the real work. A
441stream built from a Zoom object is a stream of events from parsing the
442initial HTML, wrapped in a filter stream per selector/filter pair provided
443as described above.
444
445The upshot of this is that the application of filters works just as well on
446streams as on the original Zoom object - in fact, when you run a
447L</repeat_content> operation your subroutines are applied to the stream for
448that element of the repeat, rather than constructing a new zoom per repeat
449element as well.
450
451More concretely:
452
453 $_->select('div')->replace_content('I AM A DIV!');
454
455works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
456shares sufficient of the implementation that you can generally forget the
457difference - barring the fact that a stream already has state attached so
458things like to_fh are no longer available.
459
460=head2 POP! GOES THE WEASEL
461
462... and by Weasel, I mean layout.
463
464HTML::Zoom's filehandle object supports an additional event key, 'flush',
465that is transparent to the rest of the system but indicates to the filehandle
466object to end a getline operation at that point and return the HTML so far.
467
468This means that in an environment where streaming output is available, such
469as a number of the L<Plack> PSGI handlers, you can add the flush key to an
470event in order to ensure that the HTML generated so far is flushed through
471to the browser right now. This can be especially useful if you know you're
472about to call a web service or a potentially slow database query or similar
473to ensure that at least the header/layout of your page renders now, improving
474perceived user responsiveness while your application waits around for the
475data it needs.
476
477This is currently exposed by the 'flush_before' option to the collect filter,
478which incidentally also underlies the replace and repeat filters, so to
479indicate we want this behaviour to happen before a query is executed we can
480write something like:
481
482 $zoom->select('.item')->repeat(sub {
483 if (my $row = $db_thing->next) {
484 return sub { $_->select('.item-name')->replace_content($row->name) }
485 } else {
486 return
487 }
488 }, { flush_before => 1 });
489
490which should have the desired effect given a sufficiently lazy $db_thing (for
491example a L<DBIx::Class::ResultSet> object).
492
493=head2 A FISTFUL OF OBJECTS
494
495At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
496whose purpose is to hang on to the various bits and pieces that things need
497so that there's a common way of accessing shared functionality.
498
499Were I a computer scientist I would probably call this an "Inversion of
500Control" object - which you'd be welcome to google to learn more about, or
501you can just imagine a computer scientist being suspended upside down over
502a pit. Either way works for me, I'm a pure maths grad.
503
504The ZConfig object hangs on to one each of the following for you:
505
506=over 4
507
508=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
509
510=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
511
512=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
513
514=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
515
516=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
517
518=back
519
520In theory you could replace any of these with anything you like, but in
521practice you're probably best restricting yourself to subclasses, or at
522least things that manage to look like the original if you squint a bit.
523
524If you do something more clever than that, or find yourself overriding things
525in your ZConfig a lot, please please tell us about it via one of the means
526mentioned under L</SUPPORT>.
527
528=head2 SEMANTIC DIDACTIC
529
530Some will argue that overloading CSS selectors to do data stuff is a terrible
531idea, and possibly even a step towards the "Concrete Javascript" pattern
532(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
533where it keeps reminding me of the late, great Tony Hart's plasticine friend).
534
535To which I say, "eh", "meh", and possibly also "feh". If it really upsets
536you, either use extra classes for this (and remove them afterwards) or
537use special fake elements or, well, honestly, just use something different.
538L<Template::Semantic> provides a similar idea to zoom except using XPath
539and XML::LibXML transforms rather than a lightweight streaming approach -
540maybe you'd like that better. Or maybe you really did want
541L<Template Toolkit|Template> after all. It is still damn good at what it does,
542after all.
543
544So far, however, I've found that for new sites the designers I'm working with
545generally want to produce nice semantic HTML with classes that represent the
546nature of the data rather than the structure of the layout, so sharing them
547as a common interface works really well for us.
548
549In the absence of any evidence that overloading CSS selectors has killed
550children or unexpectedly set fire to grandmothers - and given microformats
551have been around for a while there's been plenty of opportunity for
552octagenarian combustion - I'd suggest you give it a try and see if you like it.
553
554=head2 GET THEE TO A SUMMARY!
555
556Erm. Well.
557
558HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
559CSS selector based semantic templating engine for HTML and HTML-like
560document formats.
561
562But I said that already. Although hopefully by now you have some idea what I
563meant when I said it. If you didn't have any idea the first time. I mean, I'm
564not trying to call you stupid or anything. Just saying that maybe it wasn't
565totally obvious without the explanation. Or something.
566
567Er.
568
569Maybe we should just move on to the method docs.
570
571=head1 METHODS
572
573=head2 new
574
575 my $zoom = HTML::Zoom->new;
576
577 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
578
579Create a new empty Zoom object. You can optionally pass an
580L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
581the default components.
582
583This method isn't often used directly since several other methods can also
584act as constructors, notable L</select> and L</from_html>
585
586=head2 zconfig
587
588 my $zconfig = $zoom->zconfig;
589
590Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
591shouldn't usually need to call this yourself.
592
593=head2 from_html
594
595 my $zoom = HTML::Zoom->from_html($html);
596
597 my $z2 = $z1->from_html($html);
598
599Parses the HTML using the current zconfig's parser object and returns a new
600zoom instance with that as the source HTML to be transformed.
601
602=head2 from_file
603
604 my $zoom = HTML::Zoom->from_file($file);
605
606 my $z2 = $z1->from_file($file);
607
608Convenience method - slurps the contents of $file and calls from_html with it.
609
610=head2 to_stream
611
612 my $stream = $zoom->to_stream;
613
614 while (my ($evt) = $stream->next) {
615 ...
616
617Creates a stream, starting with a stream of the events from the HTML supplied
618via L</from_html> and then wrapping it in turn with each selector+filter pair
619that have been applied to the zoom object.
620
621=head2 to_fh
622
623 my $fh = $zoom->to_fh;
624
625 call_something_expecting_a_filehandle($fh);
626
627Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
628time its getline method is called and then return all HTML up to the next
629event with 'flush' set.
630
631You can pass this filehandle to compliant PSGI handlers (and probably most
632web frameworks).
633
634=head2 run
635
636 $zoom->run;
637
638Runs the zoom object's transforms without doing anything with the results.
639
640Normally used to get side effects of a zoom run - for example when using
641L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
642
643=head2 apply
644
645 my $z2 = $z1->apply(sub {
646 $_->select('div')->replace_content('I AM A DIV!') })
647 });
648
649Sets $_ to the zoom object and then runs the provided code. Basically syntax
650sugar, the following is entirely equivalent:
651
652 my $sub = sub {
653 shift->select('div')->replace_content('I AM A DIV!') })
654 };
655
656 my $z2 = $sub->($z1);
657
658=head2 to_html
659
660 my $html = $zoom->to_html;
661
662Runs the zoom processing and returns the resulting HTML.
663
664=head2 memoize
665
666 my $z2 = $z1->memoize;
667
668Creates a new zoom whose source HTML is the results of the original zoom's
669processing. Effectively syntax sugar for:
670
671 my $z2 = HTML::Zoom->from_html($z1->to_html);
672
673but preserves your L<HTML::Zoom::ZConfig> object.
674
675=head2 with_filter
676
677 my $zoom = HTML::Zoom->with_filter(
678 'div', $filter_builder->replace_content('I AM A DIV!')
679 );
680
681 my $z2 = $z1->with_filter(
682 'div', $filter_builder->replace_content('I AM A DIV!')
683 );
684
685Lower level interface than L</select> to adding filters to your zoom object.
686
687In normal usage, you probably don't need to call this yourself.
688
689=head2 select
690
691 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
692
693 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
694
97192b02 695Returns an intermediary object of the class L<HTML::Zoom::TransformBuilder>
1c4455ae 696on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
697
698In normal usage you should generally always put the pair of method calls
699together; the intermediary object isn't designed or expected to stick around.
700
701=head2 then
702
2daa653a 703 my $z2 = $z1->select('div')->add_to_attribute(class => 'spoon')
1c4455ae 704 ->then
705 ->replace_content('I AM A DIV!');
706
707Re-runs the previous select to allow you to chain actions together on the
708same selector.
709
45b4cea1 710=head1 AUTHORS
711
712=over
713
714=item * Matt S. Trout
715
716=back
717
718=head1 LICENSE
719
720This library is free software, you can redistribute it and/or modify
721it under the same terms as Perl itself.
722
d80786d0 723=cut
45b4cea1 724