factor out application of a match+filter to a stream into a Transform object
[catagits/HTML-Zoom.git] / lib / HTML / Zoom.pm
CommitLineData
d80786d0 1package HTML::Zoom;
2
3use strict;
4use warnings FATAL => 'all';
5
6use HTML::Zoom::ZConfig;
7use HTML::Zoom::MatchWithoutFilter;
bf5a23d0 8use HTML::Zoom::ReadFH;
655965b3 9use HTML::Zoom::Transform;
d80786d0 10
11sub new {
12 my ($class, $args) = @_;
13 my $new = {};
14 $new->{zconfig} = HTML::Zoom::ZConfig->new($args->{zconfig}||{});
15 bless($new, $class);
16}
17
18sub zconfig { shift->_self_or_new->{zconfig} }
19
20sub _self_or_new {
21 ref($_[0]) ? $_[0] : $_[0]->new
22}
23
24sub _with {
25 bless({ %{$_[0]}, %{$_[1]} }, ref($_[0]));
26}
27
28sub from_html {
29 my $self = shift->_self_or_new;
30 $self->_with({
31 initial_events => $self->zconfig->parser->html_to_events($_[0])
32 });
33}
34
bf5a23d0 35sub from_file {
36 my $self = shift->_self_or_new;
37 my $filename = shift;
38 $self->from_html(do { local (@ARGV, $/) = ($filename); <> });
39}
40
d80786d0 41sub to_stream {
42 my $self = shift;
43 die "No events to build from - forgot to call from_html?"
44 unless $self->{initial_events};
45 my $sutils = $self->zconfig->stream_utils;
46 my $stream = $sutils->stream_from_array(@{$self->{initial_events}});
47 foreach my $filter_spec (@{$self->{filters}||[]}) {
655965b3 48 $stream = HTML::Zoom::Transform->new({
49 selector => $filter_spec->[0],
50 filters => [ $filter_spec->[1] ],
51 zconfig => $self->zconfig,
52 })->apply_to_stream($stream);
53 #$stream = $sutils->wrap_with_filter($stream, @{$filter_spec});
d80786d0 54 }
55 $stream
56}
57
bf5a23d0 58sub to_fh {
59 HTML::Zoom::ReadFH->from_zoom(shift);
60}
61
62sub run {
63 my $self = shift;
64 $self->zconfig->stream_utils->stream_to_array($self->to_stream);
65 return
66}
67
68sub apply {
69 my ($self, $code) = @_;
70 local $_ = $self;
71 $self->$code;
72}
73
d80786d0 74sub to_html {
75 my $self = shift;
76 $self->zconfig->producer->html_from_stream($self->to_stream);
77}
78
79sub memoize {
80 my $self = shift;
81 ref($self)->new($self)->from_html($self->to_html);
82}
83
84sub with_filter {
1c4455ae 85 my $self = shift->_self_or_new;
86 my ($selector, $filter) = @_;
d80786d0 87 my $match = $self->parse_selector($selector);
88 $self->_with({
89 filters => [ @{$self->{filters}||[]}, [ $match, $filter ] ]
90 });
91}
92
93sub select {
1c4455ae 94 my $self = shift->_self_or_new;
95 my ($selector) = @_;
d80786d0 96 my $match = $self->parse_selector($selector);
97 return HTML::Zoom::MatchWithoutFilter->construct(
98 $self, $match, $self->zconfig->filter_builder,
99 );
100}
101
102# There's a bug waiting to happen here: if you do something like
103#
104# $zoom->select('.foo')
1c4455ae 105# ->remove_attribute(class => 'foo')
d80786d0 106# ->then
107# ->well_anything_really
108#
109# the second action won't execute because it doesn't match anymore.
110# Ideally instead we'd merge the match subs but that's more complex to
111# implement so I'm deferring it for the moment.
112
113sub then {
114 my $self = shift;
115 die "Can't call ->then without a previous filter"
116 unless $self->{filters};
117 $self->select($self->{filters}->[-1][0]);
118}
119
120sub parse_selector {
121 my ($self, $selector) = @_;
122 return $selector if ref($selector); # already a match sub
123 $self->zconfig->selector_parser->parse_selector($selector);
124}
125
1261;
127
128=head1 NAME
129
130HTML::Zoom - selector based streaming template engine
131
132=head1 SYNOPSIS
133
134 use HTML::Zoom;
135
136 my $template = <<HTML;
137 <html>
138 <head>
139 <title>Hello people</title>
140 </head>
141 <body>
142 <h1 id="greeting">Placeholder</h1>
143 <div id="list">
144 <span>
145 <p>Name: <span class="name">Bob</span></p>
146 <p>Age: <span class="age">23</span></p>
147 </span>
148 <hr class="between" />
149 </div>
150 </body>
151 </html>
152 HTML
153
154 my $output = HTML::Zoom
155 ->from_html($template)
156 ->select('title, #greeting')->replace_content('Hello world & dog!')
157 ->select('#list')->repeat_content(
158 [
159 sub {
160 $_->select('.name')->replace_content('Matt')
161 ->select('.age')->replace_content('26')
162 },
163 sub {
164 $_->select('.name')->replace_content('Mark')
165 ->select('.age')->replace_content('0x29')
166 },
167 sub {
168 $_->select('.name')->replace_content('Epitaph')
169 ->select('.age')->replace_content('<redacted>')
170 },
171 ],
172 { repeat_between => '.between' }
173 )
174 ->to_html;
175
176will produce:
177
178=begin testinfo
179
180 my $expect = <<HTML;
181
182=end testinfo
183
184 <html>
185 <head>
186 <title>Hello world &amp; dog!</title>
187 </head>
188 <body>
189 <h1 id="greeting">Hello world &amp; dog!</h1>
190 <div id="list">
191 <span>
192 <p>Name: <span class="name">Matt</span></p>
193 <p>Age: <span class="age">26</span></p>
194 </span>
195 <hr class="between" />
196 <span>
197 <p>Name: <span class="name">Mark</span></p>
198 <p>Age: <span class="age">0x29</span></p>
199 </span>
200 <hr class="between" />
201 <span>
202 <p>Name: <span class="name">Epitaph</span></p>
203 <p>Age: <span class="age">&lt;redacted&gt;</span></p>
204 </span>
205
206 </div>
207 </body>
208 </html>
209
210=begin testinfo
211
212 HTML
213 is($output, $expect, 'Synopsis code works ok');
214
215=end testinfo
216
1c4455ae 217=head1 DANGER WILL ROBINSON
218
219This is a 0.9 release. That means that I'm fairly happy the API isn't going
220to change in surprising and upsetting ways before 1.0 and a real compatibility
221freeze. But it also means that if it turns out there's a mistake the size of
222a politician's ego in the API design that I haven't spotted yet there may be
223a bit of breakage between here and 1.0. Hopefully not though. Appendages
224crossed and all that.
225
226Worse still, the rest of the distribution isn't documented yet. I'm sorry.
227I suck. But lots of people have been asking me to ship this, docs or no, so
228having got this class itself at least somewhat documented I figured now was
229a good time to cut a first real release.
230
231=head1 DESCRIPTION
232
233HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
234CSS selector based semantic templating engine for HTML and HTML-like
235document formats.
236
237Which is, on the whole, a bit of a mouthful. So let me step back a moment
238and explain why you care enough to understand what I mean:
239
240=head2 JQUERY ENVY
241
242HTML::Zoom is the cure for JQuery envy. When your javascript guy pushes a
243piece of data into a document by doing:
244
245 $('.username').replaceAll(username);
246
247In HTML::Zoom one can write
248
249 $zoom->select('.username')->replace_content($username);
250
251which is, I hope, almost as clear, hampered only by the fact that Zoom can't
252assume a global document and therefore has nothing quite so simple as the
253$() function to get the initial selection.
254
255L<HTML::Zoom::SelectorParser> implements a subset of the JQuery selector
256specification, and will continue to track that rather than the W3C standards
257for the forseeable future on grounds of pragmatism. Also on grounds of their
258spec is written in EN_US rather than EN_W3C, and I read the former much better.
259
260I am happy to admit that it's very, very much a subset at the moment - see the
261L<HTML::Zoom::SelectorParser> POD for what's currently there, and expect more
262and more to be supported over time as we need it and patch it in.
263
264=head2 CLEAN TEMPLATES
265
266HTML::Zoom is the cure for messy templates. How many times have you looked at
267templates like this:
268
269 <form action="/somewhere">
270 [% FOREACH field IN fields %]
271 <label for="[% field.id %]">[% field.label %]</label>
272 <input name="[% field.name %]" type="[% field.type %]" value="[% field.value %]" />
273 [% END %]
274 </form>
275
276and despaired of the fact that neither the HTML structure nor the logic are
277remotely easy to read? Fortunately, with HTML::Zoom we can separate the two
278cleanly:
279
280 <form class="myform" action="/somewhere">
281 <label />
282 <input />
283 </form>
284
285 $zoom->select('.myform')->repeat_content([
286 map { my $field = $_; sub {
287
288 $_->select('label')
289 ->add_attribute( for => $field->{id} )
290 ->then
291 ->replace_content( $field->{label} )
292
293 ->select('input')
294 ->add_attribute( name => $field->{name} )
295 ->then
296 ->add_attribute( type => $field->{type} )
297 ->then
298 ->add_attribute( value => $field->{value} )
299
300 } } @fields
301 ]);
302
303This is, admittedly, very much not shorter. However, it makes it extremely
304clear what's happening and therefore less hassle to maintain. Especially
305because it allows the designer to fiddle with the HTML without cutting
306himself on sharp ELSE clauses, and the developer to add available data to
307the template without getting angle bracket cuts on sensitive parts.
308
309Better still, HTML::Zoom knows that it's inserting content into HTML and
310can escape it for you - the example template should really have been:
311
312 <form action="/somewhere">
313 [% FOREACH field IN fields %]
314 <label for="[% field.id | html %]">[% field.label | html %]</label>
315 <input name="[% field.name | html %]" type="[% field.type | html %]" value="[% field.value | html %]" />
316 [% END %]
317 </form>
318
319and frankly I'll take slightly more code any day over *that* crawling horror.
320
321(addendum: I pick on L<Template Toolkit|Template> here specifically because
322it's the template system I hate the least - for text templating, I don't
323honestly think I'll ever like anything except the next version of Template
324Toolkit better - but HTML isn't text. Zoom knows that. Do you?)
325
326=head2 PUTTING THE FUN INTO FUNCTIONAL
327
328The principle of HTML::Zoom is to provide a reusable, functional container
329object that lets you build up a set of transforms to be applied; every method
330call you make on a zoom object returns a new object, so it's safe to do so
331on one somebody else gave you without worrying about altering state (with
332the notable exception of ->next for stream objects, which I'll come to later).
333
334So:
335
336 my $z2 = $z1->select('.name')->replace_content($name);
337
338 my $z3 = $z2->select('.title')->replace_content('Ms.');
339
340each time produces a new Zoom object. If you want to package up a set of
341transforms to re-use, HTML::Zoom provides an 'apply' method:
342
343 my $add_name = sub { $_->select('.name')->replace_content($name) };
344
345 my $same_as_z2 = $z1->apply($add_name);
346
347=head2 LAZINESS IS A VIRTUE
348
349HTML::Zoom does its best to defer doing anything until it's absolutely
350required. The only point at which it descends into state is when you force
351it to create a stream, directly by:
352
c9e76777 353 my $stream = $zoom->to_stream;
1c4455ae 354
355 while (my $evt = $stream->next) {
356 # handle zoom event here
357 }
358
359or indirectly via:
360
361 my $final_html = $zoom->to_html;
362
363 my $fh = $zoom->to_fh;
364
365 while (my $chunk = $fh->getline) {
366 ...
367 }
368
369Better still, the $fh returned doesn't create its stream until the first
370call to getline, which means that until you call that and force it to be
371stateful you can get back to the original stateless Zoom object via:
372
373 my $zoom = $fh->to_zoom;
374
375which is exceedingly handy for filtering L<Plack> PSGI responses, among other
376things.
377
378Because HTML::Zoom doesn't try and evaluate everything up front, you can
379generally put things together in whatever order is most appropriate. This
380means that:
381
382 my $start = HTML::Zoom->from_html($html);
383
384 my $zoom = $start->select('div')->replace_content('THIS IS A DIV!');
385
386and:
387
388 my $start = HTML::Zoom->select('div')->replace_content('THIS IS A DIV!');
389
390 my $zoom = $start->from_html($html);
391
392will produce equivalent final $zoom objects, thus proving that there can be
393more than one way to do it without one of them being a
394L<bait and switch|Switch>.
395
396=head2 STOCKTON TO DARLINGTON UNDER STREAM POWER
397
398HTML::Zoom's execution always happens in terms of streams under the hood
399- that is, the basic pattern for doing anything is -
400
401 my $stream = get_stream_from_somewhere
402
403 while (my ($evt) = $stream->next) {
404 # do something with the event
405 }
406
407More importantly, all selectors and filters are also built as stream
408operations, so a selector and filter pair is effectively:
409
410 sub next {
411 my ($self) = @_;
412 my $next_evt = $self->parent_stream->next;
413 if ($self->selector_matches($next_evt)) {
414 return $self->apply_filter_to($next_evt);
415 } else {
416 return $next_evt;
417 }
418 }
419
420Internally, things are marginally more complicated than that, but not enough
421that you as a user should normally need to care.
422
423In fact, an HTML::Zoom object is mostly just a container for the relevant
424information from which to build the final stream that does the real work. A
425stream built from a Zoom object is a stream of events from parsing the
426initial HTML, wrapped in a filter stream per selector/filter pair provided
427as described above.
428
429The upshot of this is that the application of filters works just as well on
430streams as on the original Zoom object - in fact, when you run a
431L</repeat_content> operation your subroutines are applied to the stream for
432that element of the repeat, rather than constructing a new zoom per repeat
433element as well.
434
435More concretely:
436
437 $_->select('div')->replace_content('I AM A DIV!');
438
439works on both HTML::Zoom objects themselves and HTML::Zoom stream objects and
440shares sufficient of the implementation that you can generally forget the
441difference - barring the fact that a stream already has state attached so
442things like to_fh are no longer available.
443
444=head2 POP! GOES THE WEASEL
445
446... and by Weasel, I mean layout.
447
448HTML::Zoom's filehandle object supports an additional event key, 'flush',
449that is transparent to the rest of the system but indicates to the filehandle
450object to end a getline operation at that point and return the HTML so far.
451
452This means that in an environment where streaming output is available, such
453as a number of the L<Plack> PSGI handlers, you can add the flush key to an
454event in order to ensure that the HTML generated so far is flushed through
455to the browser right now. This can be especially useful if you know you're
456about to call a web service or a potentially slow database query or similar
457to ensure that at least the header/layout of your page renders now, improving
458perceived user responsiveness while your application waits around for the
459data it needs.
460
461This is currently exposed by the 'flush_before' option to the collect filter,
462which incidentally also underlies the replace and repeat filters, so to
463indicate we want this behaviour to happen before a query is executed we can
464write something like:
465
466 $zoom->select('.item')->repeat(sub {
467 if (my $row = $db_thing->next) {
468 return sub { $_->select('.item-name')->replace_content($row->name) }
469 } else {
470 return
471 }
472 }, { flush_before => 1 });
473
474which should have the desired effect given a sufficiently lazy $db_thing (for
475example a L<DBIx::Class::ResultSet> object).
476
477=head2 A FISTFUL OF OBJECTS
478
479At the core of an HTML::Zoom system lurks an L<HTML::Zoom::ZConfig> object,
480whose purpose is to hang on to the various bits and pieces that things need
481so that there's a common way of accessing shared functionality.
482
483Were I a computer scientist I would probably call this an "Inversion of
484Control" object - which you'd be welcome to google to learn more about, or
485you can just imagine a computer scientist being suspended upside down over
486a pit. Either way works for me, I'm a pure maths grad.
487
488The ZConfig object hangs on to one each of the following for you:
489
490=over 4
491
492=item * An HTML parser, normally L<HTML::Zoom::Parser::BuiltIn>
493
494=item * An HTML producer (emitter), normally L<HTML::Zoom::Producer::BuiltIn>
495
496=item * An object to build event filters, normally L<HTML::Zoom::FilterBuilder>
497
498=item * An object to parse CSS selectors, normally L<HTML::Zoom::SelectorParser>
499
500=item * An object to build streams, normally L<HTML::Zoom::StreamUtils>
501
502=back
503
504In theory you could replace any of these with anything you like, but in
505practice you're probably best restricting yourself to subclasses, or at
506least things that manage to look like the original if you squint a bit.
507
508If you do something more clever than that, or find yourself overriding things
509in your ZConfig a lot, please please tell us about it via one of the means
510mentioned under L</SUPPORT>.
511
512=head2 SEMANTIC DIDACTIC
513
514Some will argue that overloading CSS selectors to do data stuff is a terrible
515idea, and possibly even a step towards the "Concrete Javascript" pattern
516(which I abhor) or Smalltalk's Morphic (which I ignore, except for the part
517where it keeps reminding me of the late, great Tony Hart's plasticine friend).
518
519To which I say, "eh", "meh", and possibly also "feh". If it really upsets
520you, either use extra classes for this (and remove them afterwards) or
521use special fake elements or, well, honestly, just use something different.
522L<Template::Semantic> provides a similar idea to zoom except using XPath
523and XML::LibXML transforms rather than a lightweight streaming approach -
524maybe you'd like that better. Or maybe you really did want
525L<Template Toolkit|Template> after all. It is still damn good at what it does,
526after all.
527
528So far, however, I've found that for new sites the designers I'm working with
529generally want to produce nice semantic HTML with classes that represent the
530nature of the data rather than the structure of the layout, so sharing them
531as a common interface works really well for us.
532
533In the absence of any evidence that overloading CSS selectors has killed
534children or unexpectedly set fire to grandmothers - and given microformats
535have been around for a while there's been plenty of opportunity for
536octagenarian combustion - I'd suggest you give it a try and see if you like it.
537
538=head2 GET THEE TO A SUMMARY!
539
540Erm. Well.
541
542HTML::Zoom is a lazy, stream oriented, streaming capable, mostly functional,
543CSS selector based semantic templating engine for HTML and HTML-like
544document formats.
545
546But I said that already. Although hopefully by now you have some idea what I
547meant when I said it. If you didn't have any idea the first time. I mean, I'm
548not trying to call you stupid or anything. Just saying that maybe it wasn't
549totally obvious without the explanation. Or something.
550
551Er.
552
553Maybe we should just move on to the method docs.
554
555=head1 METHODS
556
557=head2 new
558
559 my $zoom = HTML::Zoom->new;
560
561 my $zoom = HTML::Zoom->new({ zconfig => $zconfig });
562
563Create a new empty Zoom object. You can optionally pass an
564L<HTML::Zoom::ZConfig> instance if you're trying to override one or more of
565the default components.
566
567This method isn't often used directly since several other methods can also
568act as constructors, notable L</select> and L</from_html>
569
570=head2 zconfig
571
572 my $zconfig = $zoom->zconfig;
573
574Retrieve the L<HTML::Zoom::ZConfig> instance used by this Zoom object. You
575shouldn't usually need to call this yourself.
576
577=head2 from_html
578
579 my $zoom = HTML::Zoom->from_html($html);
580
581 my $z2 = $z1->from_html($html);
582
583Parses the HTML using the current zconfig's parser object and returns a new
584zoom instance with that as the source HTML to be transformed.
585
586=head2 from_file
587
588 my $zoom = HTML::Zoom->from_file($file);
589
590 my $z2 = $z1->from_file($file);
591
592Convenience method - slurps the contents of $file and calls from_html with it.
593
594=head2 to_stream
595
596 my $stream = $zoom->to_stream;
597
598 while (my ($evt) = $stream->next) {
599 ...
600
601Creates a stream, starting with a stream of the events from the HTML supplied
602via L</from_html> and then wrapping it in turn with each selector+filter pair
603that have been applied to the zoom object.
604
605=head2 to_fh
606
607 my $fh = $zoom->to_fh;
608
609 call_something_expecting_a_filehandle($fh);
610
611Returns an L<HTML::Zoom::ReadFH> instance that will create a stream the first
612time its getline method is called and then return all HTML up to the next
613event with 'flush' set.
614
615You can pass this filehandle to compliant PSGI handlers (and probably most
616web frameworks).
617
618=head2 run
619
620 $zoom->run;
621
622Runs the zoom object's transforms without doing anything with the results.
623
624Normally used to get side effects of a zoom run - for example when using
625L<HTML::Zoom::FilterBuilder/collect> to slurp events for scraping or layout.
626
627=head2 apply
628
629 my $z2 = $z1->apply(sub {
630 $_->select('div')->replace_content('I AM A DIV!') })
631 });
632
633Sets $_ to the zoom object and then runs the provided code. Basically syntax
634sugar, the following is entirely equivalent:
635
636 my $sub = sub {
637 shift->select('div')->replace_content('I AM A DIV!') })
638 };
639
640 my $z2 = $sub->($z1);
641
642=head2 to_html
643
644 my $html = $zoom->to_html;
645
646Runs the zoom processing and returns the resulting HTML.
647
648=head2 memoize
649
650 my $z2 = $z1->memoize;
651
652Creates a new zoom whose source HTML is the results of the original zoom's
653processing. Effectively syntax sugar for:
654
655 my $z2 = HTML::Zoom->from_html($z1->to_html);
656
657but preserves your L<HTML::Zoom::ZConfig> object.
658
659=head2 with_filter
660
661 my $zoom = HTML::Zoom->with_filter(
662 'div', $filter_builder->replace_content('I AM A DIV!')
663 );
664
665 my $z2 = $z1->with_filter(
666 'div', $filter_builder->replace_content('I AM A DIV!')
667 );
668
669Lower level interface than L</select> to adding filters to your zoom object.
670
671In normal usage, you probably don't need to call this yourself.
672
673=head2 select
674
675 my $zoom = HTML::Zoom->select('div')->replace_content('I AM A DIV!');
676
677 my $z2 = $z1->select('div')->replace_content('I AM A DIV!');
678
679Returns an intermediary object of the class L<HTML::Zoom::MatchWithoutFilter>
680on which methods of your L<HTML::Zoom::FilterBuilder> object can be called.
681
682In normal usage you should generally always put the pair of method calls
683together; the intermediary object isn't designed or expected to stick around.
684
685=head2 then
686
687 my $z2 = $z1->select('div')->add_attribute(class => 'spoon')
688 ->then
689 ->replace_content('I AM A DIV!');
690
691Re-runs the previous select to allow you to chain actions together on the
692same selector.
693
694=head2 parse_selector
695
696 my $matcher = $zoom->parse_selector('div');
697
698Used by L</select> and L</with_filter> to invoke the current
699L<HTML::Zoom::SelectorParser> object to create a matcher object (currently
700a coderef but this is an implementation detail) for that selector.
701
702In normal usage, you probably don't need to call this yourself.
d80786d0 703
704=cut