5 use HTML::Zoom::ZConfig;
6 use HTML::Zoom::ReadFH;
7 use HTML::Zoom::Transform;
8 use HTML::Zoom::TransformBuilder;
10 our $VERSION = '0.009004';
12 $VERSION = eval $VERSION;
15 my ($class, $args) = @_;
17 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
21 sub zconfig { shift->_self_or_new->{zconfig} }
24 ref($_[0]) ? $_[0] : $_[0]->new
28 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
32 my $self = shift->_self_or_new;
34 initial_events => shift,
39 my $self = shift->_self_or_new;
40 $self->from_events($self->zconfig->parser->html_to_events($_[0]))
44 my $self = shift->_self_or_new;
46 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
51 die "No events to build from - forgot to call from_html?"
52 unless $self->{initial_events};
53 my $sutils = $self->zconfig->stream_utils;
54 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
55 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
60 HTML::Zoom::ReadFH->from_zoom(shift);
65 [ $self->zconfig->stream_utils->stream_to_array($self->to_stream) ];
75 my ($self, $code) = @_;
81 my ($self, $predicate, $code) = @_;
93 $self->zconfig->producer->html_from_stream($self->to_stream);
98 ref($self)->new($self)->from_html($self->to_html);
102 my $self = shift->_self_or_new;
103 my ($transform) = @_;
106 @{$self->{transforms}||[]},
113 my $self = shift->_self_or_new;
114 my ($selector, $filter) = @_;
115 $self->with_transform(
116 HTML::Zoom::Transform->new({
117 zconfig => $self->zconfig,
118 selector => $selector,
119 filters => [ $filter ]
125 my $self = shift->_self_or_new;
127 return HTML::Zoom::TransformBuilder->new({
128 zconfig => $self->zconfig,
129 selector => $selector,
134 # There's a bug waiting to happen here: if you do something like
136 # $zoom->select('.foo')
137 # ->remove_attribute(class => 'foo')
139 # ->well_anything_really
141 # the second action won't execute because it doesn't match anymore.
142 # Ideally instead we'd merge the match subs but that's more complex to
143 # implement so I'm deferring it for the moment.
147 die "Can't call ->then without a previous transform"
148 unless $self->{transforms};
149 $self->select($self->{transforms}->[-1]->selector);
152 ## mst: well I'm thinking if basically
153 ## mst: $zoom->$whatever($selector => @args)
154 ## mst: becomes $zoom->select($selector)->$whatever(@args)
158 my %selection_args = @_;
159 my $meth = our $AUTOLOAD;
161 while( my($selector, $args) = each %selection_args) {
162 $self = $self->select($selector)->$meth($args);
170 HTML::Zoom - selector based streaming template engine
176 my $template = <<HTML;
179 <title>Hello people</title>
182 <h1 id="greeting">Placeholder</h1>
185 <p>Name: <span class="name">Bob</span></p>
186 <p>Age: <span class="age">23</span></p>
188 <hr class="between" />
194 my $output = HTML::Zoom
195 ->from_html($template)
196 ->select('title, #greeting')->replace_content('Hello world & dog!')
197 ->select('#list')->repeat_content(
200 $_->select('.name')->replace_content('Matt')
201 ->select('.age')->replace_content('26')
204 $_->select('.name')->replace_content('Mark')
205 ->select('.age')->replace_content('0x29')
208 $_->select('.name')->replace_content('Epitaph')
209 ->select('.age')->replace_content('<redacted>')
212 { repeat_between => '.between' }
226 <title>Hello world & dog!</title>
229 <h1 id="greeting">Hello world & dog!</h1>
232 <p>Name: <span class="name">Matt</span></p>
233 <p>Age: <span class="age">26</span></p>
235 <hr class="between" />
237 <p>Name: <span class="name">Mark</span></p>
238 <p>Age: <span class="age">0x29</span></p>
240 <hr class="between" />
242 <p>Name: <span class="name">Epitaph</span></p>
243 <p>Age: <span class="age"><redacted></span></p>
253 is($output, $expect, 'Synopsis code works ok');
257 =head1 DANGER WILL ROBINSON
259 This is a 0.9 release. That means that I'm fairly happy the API isn't going
260 to change in surprising and upsetting ways before 1.0 and a real compatibility
261 freeze. But it also means that if it turns out there's a mistake the size of
262 a politician's ego in the API design that I haven't spotted yet there may be
263 a bit of breakage between here and 1.0. Hopefully not though. Appendages
264 crossed and all that.
266 Worse still, the rest of the distribution isn't documented yet. I'm sorry.
267 I suck. But lots of people have been asking me to ship this, docs or no, so
268 having got this class itself at least somewhat documented I figured now was
269 a good time to cut a first real release.
273 HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
274 CSS selector based semantic templating engine for HTML and HTML-like
277 Which is, on the whole, a bit of a mouthful. So let me step back a moment
278 and explain why you care enough to understand what I mean:
282 HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
283 piece of data into a document by doing:
285 $('.username').replaceAll(username);
287 In HTML::Zoom one can write
289 $zoom->select('.username')->replace_content($username);
291 which is, I hope, almost as clear, hampered only by the fact that Zoom can't
292 assume a global document and therefore has nothing quite so simple as the
293 $() function to get the initial selection.
295 L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
296 specification, and will continue to track that rather than the W3C standards
297 for the forseeable future on grounds of pragmatism. Also on grounds of their
298 spec is written in EN_US rather than EN_W3C, and I read the former much better.
300 I am happy to admit that it's very, very much a subset at the moment - see the
301 L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
302 and more to be supported over time as we need it and patch it in.
304 =head2 CLEAN TEMPLATES
306 HTML::Zoom is the cure for messy templates. How many times have you looked at
309 <form action="/somewhere">
310 [% FOREACH field IN fields %]
311 <label for="[% field.id %]">[% field.label %]</label>
312 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
316 and despaired of the fact that neither the HTML structure nor the logic are
317 remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
320 <form class="myform" action="/somewhere">
325 $zoom->select('.myform')->repeat_content([
326 map { my $field = $_; sub {
329 ->add_to_attribute( for => $field->{id} )
331 ->replace_content( $field->{label} )
334 ->add_to_attribute( name => $field->{name} )
336 ->add_to_attribute( type => $field->{type} )
338 ->add_to_attribute( value => $field->{value} )
343 This is, admittedly, very much not shorter. However, it makes it extremely
344 clear what's happening and therefore less hassle to maintain. Especially
345 because it allows the designer to fiddle with the HTML without cutting
346 himself on sharp ELSE clauses, and the developer to add available data to
347 the template without getting angle bracket cuts on sensitive parts.
349 Better still, HTML::Zoom knows that it's inserting content into HTML and
350 can escape it for you - the example template should really have been:
352 <form action="/somewhere">
353 [% FOREACH field IN fields %]
354 <label for="[% field.id | html %]">[% field.label | html %]</label>
355 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
359 and frankly I'll take slightly more code any day over *that* crawling horror.
361 (addendum: I pick on L<Template Toolkit|Template> here specifically because
362 it's the template system I hate the least - for text templating, I don't
363 honestly think I'll ever like anything except the next version of Template
364 Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
366 =head2 PUTTING THE FUN INTO FUNCTIONAL
368 The principle of HTML::Zoom is to provide a reusable, functional container
369 object that lets you build up a set of transforms to be applied; every method
370 call you make on a zoom object returns a new object, so it's safe to do so
371 on one somebody else gave you without worrying about altering state (with
372 the notable exception of ->next for stream objects, which I'll come to later).
376 my $z2 = $z1->select('.name')->replace_content($name);
378 my $z3 = $z2->select('.title')->replace_content('Ms.');
380 each time produces a new Zoom object. If you want to package up a set of
381 transforms to re-use, HTML::Zoom provides an 'apply' method:
383 my $add_name = sub { $_->select('.name')->replace_content($name) };
385 my $same_as_z2 = $z1->apply($add_name);
387 =head2 LAZINESS IS A VIRTUE
389 HTML::Zoom does its best to defer doing anything until it's absolutely
390 required. The only point at which it descends into state is when you force
391 it to create a stream, directly by:
393 my $stream = $zoom->to_stream;
395 while (my $evt = $stream->next) {
396 # handle zoom event here
401 my $final_html = $zoom->to_html;
403 my $fh = $zoom->to_fh;
405 while (my $chunk = $fh->getline) {
409 Better still, the $fh returned doesn't create its stream until the first
410 call to getline, which means that until you call that and force it to be
411 stateful you can get back to the original stateless Zoom object via:
413 my $zoom = $fh->to_zoom;
415 which is exceedingly handy for filtering L<Plack> PSGI responses, among other
418 Because HTML::Zoom doesn't try and evaluate everything up front, you can
419 generally put things together in whatever order is most appropriate. This
422 my $start = HTML::Zoom->from_html($html);
424 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
428 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
430 my $zoom = $start->from_html($html);
432 will produce equivalent final $zoom objects, thus proving that there can be
433 more than one way to do it without one of them being a
434 L<bait and switch|Switch>.
436 =head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
438 HTML::Zoom's execution always happens in terms of streams under the hood
439 - that is, the basic pattern for doing anything is -
441 my $stream = get_stream_from_somewhere
443 while (my ($evt) = $stream->next) {
444 # do something with the event
447 More importantly, all selectors and filters are also built as stream
448 operations, so a selector and filter pair is effectively:
452 my $next_evt = $self->parent_stream->next;
453 if ($self->selector_matches($next_evt)) {
454 return $self->apply_filter_to($next_evt);
460 Internally, things are marginally more complicated than that, but not enough
461 that you as a user should normally need to care.
463 In fact, an HTML::Zoom object is mostly just a container for the relevant
464 information from which to build the final stream that does the real work. A
465 stream built from a Zoom object is a stream of events from parsing the
466 initial HTML, wrapped in a filter stream per selector/filter pair provided
469 The upshot of this is that the application of filters works just as well on
470 streams as on the original Zoom object - in fact, when you run a
471 L</repeat_content> operation your subroutines are applied to the stream for
472 that element of the repeat, rather than constructing a new zoom per repeat
477 $_->select('div')->replace_content('I AM A DIV!');
479 works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
480 shares sufficient of the implementation that you can generally forget the
481 difference - barring the fact that a stream already has state attached so
482 things like to_fh are no longer available.
484 =head2 POP! GOES THE WEASEL
486 ... and by Weasel, I mean layout.
488 HTML::Zoom's filehandle object supports an additional event key, 'flush',
489 that is transparent to the rest of the system but indicates to the filehandle
490 object to end a getline operation at that point and return the HTML so far.
492 This means that in an environment where streaming output is available, such
493 as a number of the L<Plack> PSGI handlers, you can add the flush key to an
494 event in order to ensure that the HTML generated so far is flushed through
495 to the browser right now. This can be especially useful if you know you're
496 about to call a web service or a potentially slow database query or similar
497 to ensure that at least the header/layout of your page renders now, improving
498 perceived user responsiveness while your application waits around for the
501 This is currently exposed by the 'flush_before' option to the collect filter,
502 which incidentally also underlies the replace and repeat filters, so to
503 indicate we want this behaviour to happen before a query is executed we can
504 write something like:
506 $zoom->select('.item')->repeat(sub {
507 if (my $row = $db_thing->next) {
508 return sub { $_->select('.item-name')->replace_content($row->name) }
512 }, { flush_before => 1 });
514 which should have the desired effect given a sufficiently lazy $db_thing (for
515 example a L<DBIx::Class::ResultSet> object).
517 =head2 A FISTFUL OF OBJECTS
519 At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
520 whose purpose is to hang on to the various bits and pieces that things need
521 so that there's a common way of accessing shared functionality.
523 Were I a computer scientist I would probably call this an "Inversion of
524 Control" object - which you'd be welcome to google to learn more about, or
525 you can just imagine a computer scientist being suspended upside down over
526 a pit. Either way works for me, I'm a pure maths grad.
528 The ZConfig object hangs on to one each of the following for you:
532 =item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
534 =item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
536 =item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
538 =item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
540 =item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
544 In theory you could replace any of these with anything you like, but in
545 practice you're probably best restricting yourself to subclasses, or at
546 least things that manage to look like the original if you squint a bit.
548 If you do something more clever than that, or find yourself overriding things
549 in your ZConfig a lot, please please tell us about it via one of the means
550 mentioned under L</SUPPORT>.
552 =head2 SEMANTIC DIDACTIC
554 Some will argue that overloading CSS selectors to do data stuff is a terrible
555 idea, and possibly even a step towards the "Concrete Javascript" pattern
556 (which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
557 where it keeps reminding me of the late, great Tony Hart's plasticine friend).
559 To which I say, "eh", "meh", and possibly also "feh". If it really upsets
560 you, either use extra classes for this (and remove them afterwards) or
561 use special fake elements or, well, honestly, just use something different.
562 L<Template::Semantic> provides a similar idea to zoom except using XPath
563 and XML::LibXML transforms rather than a lightweight streaming approach -
564 maybe you'd like that better. Or maybe you really did want
565 L<Template Toolkit|Template> after all. It is still damn good at what it does,
568 So far, however, I've found that for new sites the designers I'm working with
569 generally want to produce nice semantic HTML with classes that represent the
570 nature of the data rather than the structure of the layout, so sharing them
571 as a common interface works really well for us.
573 In the absence of any evidence that overloading CSS selectors has killed
574 children or unexpectedly set fire to grandmothers - and given microformats
575 have been around for a while there's been plenty of opportunity for
576 octagenarian combustion - I'd suggest you give it a try and see if you like it.
578 =head2 GET THEE TO A SUMMARY!
582 HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
583 CSS selector based semantic templating engine for HTML and HTML-like
586 But I said that already. Although hopefully by now you have some idea what I
587 meant when I said it. If you didn't have any idea the first time. I mean, I'm
588 not trying to call you stupid or anything. Just saying that maybe it wasn't
589 totally obvious without the explanation. Or something.
593 Maybe we should just move on to the method docs.
599 my $zoom = HTML::Zoom->new;
601 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
603 Create a new empty Zoom object. You can optionally pass an
604 L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
605 the default components.
607 This method isn't often used directly since several other methods can also
608 act as constructors, notable L</select> and L</from_html>
612 my $zconfig = $zoom->zconfig;
614 Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
615 shouldn't usually need to call this yourself.
619 my $zoom = HTML::Zoom->from_html($html);
621 my $z2 = $z1->from_html($html);
623 Parses the HTML using the current zconfig's parser object and returns a new
624 zoom instance with that as the source HTML to be transformed.
628 my $zoom = HTML::Zoom->from_file($file);
630 my $z2 = $z1->from_file($file);
632 Convenience method - slurps the contents of $file and calls from_html with it.
636 my $stream = $zoom->to_stream;
638 while (my ($evt) = $stream->next) {
641 Creates a stream, starting with a stream of the events from the HTML supplied
642 via L</from_html> and then wrapping it in turn with each selector+filter pair
643 that have been applied to the zoom object.
647 my $fh = $zoom->to_fh;
649 call_something_expecting_a_filehandle($fh);
651 Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
652 time its getline method is called and then return all HTML up to the next
653 event with 'flush' set.
655 You can pass this filehandle to compliant PSGI handlers (and probably most
662 Runs the zoom object's transforms without doing anything with the results.
664 Normally used to get side effects of a zoom run - for example when using
665 L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
669 my $z2 = $z1->apply(sub {
670 $_->select('div')->replace_content('I AM A DIV!') })
673 Sets $_ to the zoom object and then runs the provided code. Basically syntax
674 sugar, the following is entirely equivalent:
677 shift->select('div')->replace_content('I AM A DIV!') })
680 my $z2 = $sub->($z1);
684 my $html = $zoom->to_html;
686 Runs the zoom processing and returns the resulting HTML.
690 my $z2 = $z1->memoize;
692 Creates a new zoom whose source HTML is the results of the original zoom's
693 processing. Effectively syntax sugar for:
695 my $z2 = HTML::Zoom->from_html($z1->to_html);
697 but preserves your L<HTML::Zoom::ZConfig> object.
701 my $zoom = HTML::Zoom->with_filter(
702 'div', $filter_builder->replace_content('I AM A DIV!')
705 my $z2 = $z1->with_filter(
706 'div', $filter_builder->replace_content('I AM A DIV!')
709 Lower level interface than L</select> to adding filters to your zoom object.
711 In normal usage, you probably don't need to call this yourself.
715 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
717 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
719 Returns an intermediary object of the class L<HTML::Zoom::TransformBuilder>
720 on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
722 In normal usage you should generally always put the pair of method calls
723 together; the intermediary object isn't designed or expected to stick around.
727 my $z2 = $z1->select('div')->add_to_attribute(class => 'spoon')
729 ->replace_content('I AM A DIV!');
731 Re-runs the previous select to allow you to chain actions together on the
736 mst - Matt S. Trout (cpan:MSTROUT) <mst@shadowcat.co.uk>
752 Copyright (c) 2010-2011 the HTML::Zoom L</AUTHOR> and L</CONTRIBUTORS>
757 This library is free software, you can redistribute it and/or modify
758 it under the same terms as Perl itself.