5 use HTML::Zoom::ZConfig;
6 use HTML::Zoom::ReadFH;
7 use HTML::Zoom::Transform;
8 use HTML::Zoom::TransformBuilder;
11 our $VERSION = '0.009006';
13 $VERSION = eval $VERSION;
16 my ($class, $args) = @_;
18 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
22 sub zconfig { shift->_self_or_new->{zconfig} }
25 ref($_[0]) ? $_[0] : $_[0]->new
29 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
33 my $self = shift->_self_or_new;
35 initial_events => shift,
40 my $self = shift->_self_or_new;
41 $self->from_events($self->zconfig->parser->html_to_events($_[0]))
45 my $self = shift->_self_or_new;
47 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
52 die "No events to build from - forgot to call from_html?"
53 unless $self->{initial_events};
54 my $sutils = $self->zconfig->stream_utils;
55 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
56 $stream = $_->apply_to_stream($stream) for @{$self->{transforms}||[]};
61 HTML::Zoom::ReadFH->from_zoom(shift);
66 [ $self->zconfig->stream_utils->stream_to_array($self->to_stream) ];
76 my ($self, $code) = @_;
82 my ($self, $predicate, $code) = @_;
94 $self->zconfig->producer->html_from_stream($self->to_stream);
99 ref($self)->new($self)->from_html($self->to_html);
103 my $self = shift->_self_or_new;
104 my ($transform) = @_;
107 @{$self->{transforms}||[]},
114 my $self = shift->_self_or_new;
115 my ($selector, $filter) = @_;
116 $self->with_transform(
117 HTML::Zoom::Transform->new({
118 zconfig => $self->zconfig,
119 selector => $selector,
120 filters => [ $filter ]
126 my $self = shift->_self_or_new;
128 return HTML::Zoom::TransformBuilder->new({
129 zconfig => $self->zconfig,
130 selector => $selector,
135 # There's a bug waiting to happen here: if you do something like
137 # $zoom->select('.foo')
138 # ->remove_attribute(class => 'foo')
140 # ->well_anything_really
142 # the second action won't execute because it doesn't match anymore.
143 # Ideally instead we'd merge the match subs but that's more complex to
144 # implement so I'm deferring it for the moment.
148 die "Can't call ->then without a previous transform"
149 unless $self->{transforms};
150 $self->select($self->{transforms}->[-1]->selector);
154 my ($self, $selector, @args) = @_;
155 my $sel = $self->select($selector);
156 my $meth = our $AUTOLOAD;
158 if (ref($selector) eq 'HASH') {
160 $ret = $ret->_do($_, $meth, @{$selector->{$_}}) for keys %$selector;
163 $self->_do($selector, $meth, @args);
168 my ($self, $selector, $meth, @args) = @_;
169 my $sel = $self->select($selector);
170 if( my $cr = $sel->_zconfig->filter_builder->can($meth)) {
171 return $sel->$meth(@args);
173 die "We can't do $meth on ->select('$selector')";
183 HTML::Zoom - selector based streaming template engine
189 my $template = <<HTML;
192 <title>Hello people</title>
195 <h1 id="greeting">Placeholder</h1>
198 <p>Name: <span class="name">Bob</span></p>
199 <p>Age: <span class="age">23</span></p>
201 <hr class="between" />
207 my $output = HTML::Zoom
208 ->from_html($template)
209 ->select('title, #greeting')->replace_content('Hello world & dog!')
210 ->select('#list')->repeat_content(
213 $_->select('.name')->replace_content('Matt')
214 ->select('.age')->replace_content('26')
217 $_->select('.name')->replace_content('Mark')
218 ->select('.age')->replace_content('0x29')
221 $_->select('.name')->replace_content('Epitaph')
222 ->select('.age')->replace_content('<redacted>')
225 { repeat_between => '.between' }
239 <title>Hello world & dog!</title>
242 <h1 id="greeting">Hello world & dog!</h1>
245 <p>Name: <span class="name">Matt</span></p>
246 <p>Age: <span class="age">26</span></p>
248 <hr class="between" />
250 <p>Name: <span class="name">Mark</span></p>
251 <p>Age: <span class="age">0x29</span></p>
253 <hr class="between" />
255 <p>Name: <span class="name">Epitaph</span></p>
256 <p>Age: <span class="age"><redacted></span></p>
266 is($output, $expect, 'Synopsis code works ok');
270 =head1 DANGER WILL ROBINSON
272 This is a 0.9 release. That means that I'm fairly happy the API isn't going
273 to change in surprising and upsetting ways before 1.0 and a real compatibility
274 freeze. But it also means that if it turns out there's a mistake the size of
275 a politician's ego in the API design that I haven't spotted yet there may be
276 a bit of breakage between here and 1.0. Hopefully not though. Appendages
277 crossed and all that.
279 Worse still, the rest of the distribution isn't documented yet. I'm sorry.
280 I suck. But lots of people have been asking me to ship this, docs or no, so
281 having got this class itself at least somewhat documented I figured now was
282 a good time to cut a first real release.
286 HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
287 CSS selector based semantic templating engine for HTML and HTML-like
290 Which is, on the whole, a bit of a mouthful. So let me step back a moment
291 and explain why you care enough to understand what I mean:
295 HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
296 piece of data into a document by doing:
298 $('.username').replaceAll(username);
300 In HTML::Zoom one can write
302 $zoom->select('.username')->replace_content($username);
304 which is, I hope, almost as clear, hampered only by the fact that Zoom can't
305 assume a global document and therefore has nothing quite so simple as the
306 $() function to get the initial selection.
308 L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
309 specification, and will continue to track that rather than the W3C standards
310 for the forseeable future on grounds of pragmatism. Also on grounds of their
311 spec is written in EN_US rather than EN_W3C, and I read the former much better.
313 I am happy to admit that it's very, very much a subset at the moment - see the
314 L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
315 and more to be supported over time as we need it and patch it in.
317 =head2 CLEAN TEMPLATES
319 HTML::Zoom is the cure for messy templates. How many times have you looked at
322 <form action="/somewhere">
323 [% FOREACH field IN fields %]
324 <label for="[% field.id %]">[% field.label %]</label>
325 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
329 and despaired of the fact that neither the HTML structure nor the logic are
330 remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
333 <form class="myform" action="/somewhere">
338 $zoom->select('.myform')->repeat_content([
339 map { my $field = $_; sub {
342 ->add_to_attribute( for => $field->{id} )
344 ->replace_content( $field->{label} )
347 ->add_to_attribute( name => $field->{name} )
349 ->add_to_attribute( type => $field->{type} )
351 ->add_to_attribute( value => $field->{value} )
356 This is, admittedly, very much not shorter. However, it makes it extremely
357 clear what's happening and therefore less hassle to maintain. Especially
358 because it allows the designer to fiddle with the HTML without cutting
359 himself on sharp ELSE clauses, and the developer to add available data to
360 the template without getting angle bracket cuts on sensitive parts.
362 Better still, HTML::Zoom knows that it's inserting content into HTML and
363 can escape it for you - the example template should really have been:
365 <form action="/somewhere">
366 [% FOREACH field IN fields %]
367 <label for="[% field.id | html %]">[% field.label | html %]</label>
368 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
372 and frankly I'll take slightly more code any day over *that* crawling horror.
374 (addendum: I pick on L<Template Toolkit|Template> here specifically because
375 it's the template system I hate the least - for text templating, I don't
376 honestly think I'll ever like anything except the next version of Template
377 Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
379 =head2 PUTTING THE FUN INTO FUNCTIONAL
381 The principle of HTML::Zoom is to provide a reusable, functional container
382 object that lets you build up a set of transforms to be applied; every method
383 call you make on a zoom object returns a new object, so it's safe to do so
384 on one somebody else gave you without worrying about altering state (with
385 the notable exception of ->next for stream objects, which I'll come to later).
389 my $z2 = $z1->select('.name')->replace_content($name);
391 my $z3 = $z2->select('.title')->replace_content('Ms.');
393 each time produces a new Zoom object. If you want to package up a set of
394 transforms to re-use, HTML::Zoom provides an 'apply' method:
396 my $add_name = sub { $_->select('.name')->replace_content($name) };
398 my $same_as_z2 = $z1->apply($add_name);
400 =head2 LAZINESS IS A VIRTUE
402 HTML::Zoom does its best to defer doing anything until it's absolutely
403 required. The only point at which it descends into state is when you force
404 it to create a stream, directly by:
406 my $stream = $zoom->to_stream;
408 while (my $evt = $stream->next) {
409 # handle zoom event here
414 my $final_html = $zoom->to_html;
416 my $fh = $zoom->to_fh;
418 while (my $chunk = $fh->getline) {
422 Better still, the $fh returned doesn't create its stream until the first
423 call to getline, which means that until you call that and force it to be
424 stateful you can get back to the original stateless Zoom object via:
426 my $zoom = $fh->to_zoom;
428 which is exceedingly handy for filtering L<Plack> PSGI responses, among other
431 Because HTML::Zoom doesn't try and evaluate everything up front, you can
432 generally put things together in whatever order is most appropriate. This
435 my $start = HTML::Zoom->from_html($html);
437 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
441 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
443 my $zoom = $start->from_html($html);
445 will produce equivalent final $zoom objects, thus proving that there can be
446 more than one way to do it without one of them being a
447 L<bait and switch|Switch>.
449 =head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
451 HTML::Zoom's execution always happens in terms of streams under the hood
452 - that is, the basic pattern for doing anything is -
454 my $stream = get_stream_from_somewhere
456 while (my ($evt) = $stream->next) {
457 # do something with the event
460 More importantly, all selectors and filters are also built as stream
461 operations, so a selector and filter pair is effectively:
465 my $next_evt = $self->parent_stream->next;
466 if ($self->selector_matches($next_evt)) {
467 return $self->apply_filter_to($next_evt);
473 Internally, things are marginally more complicated than that, but not enough
474 that you as a user should normally need to care.
476 In fact, an HTML::Zoom object is mostly just a container for the relevant
477 information from which to build the final stream that does the real work. A
478 stream built from a Zoom object is a stream of events from parsing the
479 initial HTML, wrapped in a filter stream per selector/filter pair provided
482 The upshot of this is that the application of filters works just as well on
483 streams as on the original Zoom object - in fact, when you run a
484 L</repeat_content> operation your subroutines are applied to the stream for
485 that element of the repeat, rather than constructing a new zoom per repeat
490 $_->select('div')->replace_content('I AM A DIV!');
492 works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
493 shares sufficient of the implementation that you can generally forget the
494 difference - barring the fact that a stream already has state attached so
495 things like to_fh are no longer available.
497 =head2 POP! GOES THE WEASEL
499 ... and by Weasel, I mean layout.
501 HTML::Zoom's filehandle object supports an additional event key, 'flush',
502 that is transparent to the rest of the system but indicates to the filehandle
503 object to end a getline operation at that point and return the HTML so far.
505 This means that in an environment where streaming output is available, such
506 as a number of the L<Plack> PSGI handlers, you can add the flush key to an
507 event in order to ensure that the HTML generated so far is flushed through
508 to the browser right now. This can be especially useful if you know you're
509 about to call a web service or a potentially slow database query or similar
510 to ensure that at least the header/layout of your page renders now, improving
511 perceived user responsiveness while your application waits around for the
514 This is currently exposed by the 'flush_before' option to the collect filter,
515 which incidentally also underlies the replace and repeat filters, so to
516 indicate we want this behaviour to happen before a query is executed we can
517 write something like:
519 $zoom->select('.item')->repeat(sub {
520 if (my $row = $db_thing->next) {
521 return sub { $_->select('.item-name')->replace_content($row->name) }
525 }, { flush_before => 1 });
527 which should have the desired effect given a sufficiently lazy $db_thing (for
528 example a L<DBIx::Class::ResultSet> object).
530 =head2 A FISTFUL OF OBJECTS
532 At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
533 whose purpose is to hang on to the various bits and pieces that things need
534 so that there's a common way of accessing shared functionality.
536 Were I a computer scientist I would probably call this an "Inversion of
537 Control" object - which you'd be welcome to google to learn more about, or
538 you can just imagine a computer scientist being suspended upside down over
539 a pit. Either way works for me, I'm a pure maths grad.
541 The ZConfig object hangs on to one each of the following for you:
545 =item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
547 =item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
549 =item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
551 =item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
553 =item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
557 In theory you could replace any of these with anything you like, but in
558 practice you're probably best restricting yourself to subclasses, or at
559 least things that manage to look like the original if you squint a bit.
561 If you do something more clever than that, or find yourself overriding things
562 in your ZConfig a lot, please please tell us about it via one of the means
563 mentioned under L</SUPPORT>.
565 =head2 SEMANTIC DIDACTIC
567 Some will argue that overloading CSS selectors to do data stuff is a terrible
568 idea, and possibly even a step towards the "Concrete Javascript" pattern
569 (which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
570 where it keeps reminding me of the late, great Tony Hart's plasticine friend).
572 To which I say, "eh", "meh", and possibly also "feh". If it really upsets
573 you, either use extra classes for this (and remove them afterwards) or
574 use special fake elements or, well, honestly, just use something different.
575 L<Template::Semantic> provides a similar idea to zoom except using XPath
576 and XML::LibXML transforms rather than a lightweight streaming approach -
577 maybe you'd like that better. Or maybe you really did want
578 L<Template Toolkit|Template> after all. It is still damn good at what it does,
581 So far, however, I've found that for new sites the designers I'm working with
582 generally want to produce nice semantic HTML with classes that represent the
583 nature of the data rather than the structure of the layout, so sharing them
584 as a common interface works really well for us.
586 In the absence of any evidence that overloading CSS selectors has killed
587 children or unexpectedly set fire to grandmothers - and given microformats
588 have been around for a while there's been plenty of opportunity for
589 octagenarian combustion - I'd suggest you give it a try and see if you like it.
591 =head2 GET THEE TO A SUMMARY!
595 HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
596 CSS selector based semantic templating engine for HTML and HTML-like
599 But I said that already. Although hopefully by now you have some idea what I
600 meant when I said it. If you didn't have any idea the first time. I mean, I'm
601 not trying to call you stupid or anything. Just saying that maybe it wasn't
602 totally obvious without the explanation. Or something.
606 Maybe we should just move on to the method docs.
612 my $zoom = HTML::Zoom->new;
614 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
616 Create a new empty Zoom object. You can optionally pass an
617 L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
618 the default components.
620 This method isn't often used directly since several other methods can also
621 act as constructors, notable L</select> and L</from_html>
625 my $zconfig = $zoom->zconfig;
627 Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
628 shouldn't usually need to call this yourself.
632 my $zoom = HTML::Zoom->from_html($html);
634 my $z2 = $z1->from_html($html);
636 Parses the HTML using the current zconfig's parser object and returns a new
637 zoom instance with that as the source HTML to be transformed.
641 my $zoom = HTML::Zoom->from_file($file);
643 my $z2 = $z1->from_file($file);
645 Convenience method - slurps the contents of $file and calls from_html with it.
649 my $zoom = HTML::Zoom->from_events($evt);
651 Create a new Zoom object from collected events
655 my $stream = $zoom->to_stream;
657 while (my ($evt) = $stream->next) {
660 Creates a stream, starting with a stream of the events from the HTML supplied
661 via L</from_html> and then wrapping it in turn with each selector+filter pair
662 that have been applied to the zoom object.
666 my $fh = $zoom->to_fh;
668 call_something_expecting_a_filehandle($fh);
670 Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
671 time its getline method is called and then return all HTML up to the next
672 event with 'flush' set.
674 You can pass this filehandle to compliant PSGI handlers (and probably most
681 Runs the zoom object's transforms without doing anything with the results.
683 Normally used to get side effects of a zoom run - for example when using
684 L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
688 my $z2 = $z1->apply(sub {
689 $_->select('div')->replace_content('I AM A DIV!') })
692 Sets $_ to the zoom object and then runs the provided code. Basically syntax
693 sugar, the following is entirely equivalent:
696 shift->select('div')->replace_content('I AM A DIV!') })
699 my $z2 = $sub->($z1);
703 my $z2 = $z1->apply_if($cond, sub {
704 $_->select('div')->replace_content('I AM A DIV!') })
707 ->apply but will only run the tranform if $cond is true
711 my $html = $zoom->to_html;
713 Runs the zoom processing and returns the resulting HTML.
717 my $z2 = $z1->memoize;
719 Creates a new zoom whose source HTML is the results of the original zoom's
720 processing. Effectively syntax sugar for:
722 my $z2 = HTML::Zoom->from_html($z1->to_html);
724 but preserves your L<HTML::Zoom::ZConfig> object.
728 my $zoom = HTML::Zoom->with_filter(
729 'div', $filter_builder->replace_content('I AM A DIV!')
732 my $z2 = $z1->with_filter(
733 'div', $filter_builder->replace_content('I AM A DIV!')
736 Lower level interface than L</select> to adding filters to your zoom object.
738 In normal usage, you probably don't need to call this yourself.
742 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
744 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
746 Returns an intermediary object of the class L<HTML::Zoom::TransformBuilder>
747 on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
749 In normal usage you should generally always put the pair of method calls
750 together; the intermediary object isn't designed or expected to stick around.
754 my $z2 = $z1->select('div')->add_to_attribute(class => 'spoon')
756 ->replace_content('I AM A DIV!');
758 Re-runs the previous select to allow you to chain actions together on the
761 =head1 AUTOLOAD METHODS
763 L<HTML::Zoom> AUTOLOADS methods against L</select> so that you can reduce a
764 certain amount of boilerplate typing. This allows you to replace:
766 $z->select('div')->replace_content("Hello World");
770 $z->replace_content(div => "Hello World");
772 Besides saving a few keys per invocations, you may feel this looks neater
773 in your code and increases understanding.
777 mst - Matt S. Trout (cpan:MSTROUT) <mst@shadowcat.co.uk>
797 Copyright (c) 2010-2011 the HTML::Zoom L</AUTHOR> and L</CONTRIBUTORS>
802 This library is free software, you can redistribute it and/or modify
803 it under the same terms as Perl itself.