--- /dev/null
+package HTML::Zoom::CodeStream;
+
+use strict;
+use warnings FATAL => 'all';
+
+sub from_array {
+ my ($class, @array) = @_;
+ $class->new({ code => sub {
+ return unless @array;
+ return shift @array;
+ }});
+}
+
+sub new {
+ my ($class, $args) = @_;
+ bless({ _code => $args->{code} }, $class);
+}
+
+sub next {
+ $_[0]->{_code}->()
+}
+
+1;
+
--- /dev/null
+package HTML::Zoom::FilterBuilder;
+
+use Devel::Dwarn;
+
+use strict;
+use warnings FATAL => 'all';
+use HTML::Zoom::CodeStream;
+
+sub new { bless({}, shift) }
+
+sub _stream_from_code {
+ HTML::Zoom::CodeStream->new({ code => $_[1] })
+}
+
+sub _stream_from_array {
+ shift; # lose $self
+ HTML::Zoom::CodeStream->from_array(@_)
+}
+
+sub _stream_concat {
+ shift; # lose $self
+ my @streams = @_;
+ my $cur_stream = shift(@streams) or die "No streams passed";
+ HTML::Zoom::CodeStream->new({
+ code => sub {
+ return unless $cur_stream;
+ my $evt;
+ until (($evt) = $cur_stream->next) {
+ return unless $cur_stream = shift(@streams);
+ }
+ return $evt;
+ }
+ });
+}
+
+sub set_attribute {
+ my ($self, $args) = @_;
+ my ($name, $value) = @{$args}{qw(name value)};
+ sub {
+ my $a = (my $evt = shift)->{attrs};
+ my $e = exists $a->{$name};
+ +{ %$evt, raw => undef, raw_attrs => undef,
+ attrs => { %$a, $name => $value },
+ ($e # add to name list if not present
+ ? ()
+ : (attr_names => [ @{$evt->{attr_names}}, $name ]))
+ }
+ };
+}
+
+sub add_attribute {
+ my ($self, $args) = @_;
+ my ($name, $value) = @{$args}{qw(name value)};
+ sub {
+ my $a = (my $evt = shift)->{attrs};
+ my $e = exists $a->{$name};
+ +{ %$evt, raw => undef, raw_attrs => undef,
+ attrs => {
+ %$a,
+ $name => join(' ', ($e ? $a->{$name} : ()), $value)
+ },
+ ($e # add to name list if not present
+ ? ()
+ : (attr_names => [ @{$evt->{attr_names}}, $name ]))
+ }
+ };
+}
+
+sub remove_attribute {
+ my ($self, $args) = @_;
+ my $name = $args->{name};
+ sub {
+ my $a = (my $evt = shift)->{attrs};
+ return $evt unless exists $a->{$name};
+ $a = { %$a }; delete $a->{$name};
+ +{ %$evt, raw => undef, raw_attrs => undef,
+ attrs => $a,
+ attr_names => [ grep $_ ne $name, @{$evt->{attr_names}} ]
+ }
+ };
+}
+
+sub add_before {
+ my ($self, $events) = @_;
+ sub { return $self->_stream_from_array(@$events, shift) };
+}
+
+sub add_after {
+ my ($self, $events) = @_;
+ sub {
+ my ($evt, $stream) = @_;
+ my $emit = $self->_stream_from_array(@$events);
+ if ($evt->{is_in_place_close}) {
+ return [ $evt, $emit ];
+ }
+ my ($filtered_evt, $coll) = @{$self->collect(undef, 1)->(@_)};
+ return [ $filtered_evt, $self->_stream_concat($coll, $emit) ];
+ };
+}
+
+sub prepend_inside {
+ my ($self, $events) = @_;
+ sub {
+ my $evt = shift;
+ if ($evt->{is_in_place_close}) {
+ $evt = { %$evt }; delete @{$evt}{qw(raw is_in_place_close)};
+ return [ $evt, $self->_stream_from_array(
+ @$events, { type => 'CLOSE', name => $evt->{name} }
+ ) ];
+ }
+ return $self->_stream_from_array($evt, @$events);
+ };
+}
+
+sub replace {
+ my ($self, $events) = @_;
+ sub {
+ my ($evt, $stream) = @_;
+ my $emit = $self->_stream_from_array(@$events);
+ if ($evt->{is_in_place_close}) {
+ return $emit
+ }
+ return $self->_stream_concat($emit, $self->collect->(@_));
+ };
+}
+
+sub collect {
+ my ($self, $into, $passthrough) = @_;
+ sub {
+ my ($evt, $stream) = @_;
+ push(@$into, $evt) if $into;
+ if ($evt->{is_in_place_close}) {
+ return $evt if $passthrough;
+ return;
+ }
+ my $name = $evt->{name};
+ my $depth = 1;
+ my $collector = $self->_stream_from_code(sub {
+ return unless $stream;
+ while (my ($evt) = $stream->next) {
+ $depth++ if ($evt->{type} eq 'OPEN');
+ $depth-- if ($evt->{type} eq 'CLOSE');
+ unless ($depth) {
+ undef $stream;
+ return $evt if $passthrough;
+ return;
+ }
+ push(@$into, $evt) if $into;
+ return $evt if $passthrough;
+ }
+ die "Never saw closing </${name}> before end of source";
+ });
+ return $passthrough ? [ $evt, $collector ] : $collector;
+ };
+}
+
+1;
--- /dev/null
+package HTML::Zoom::FilterStream;
+
+use strict;
+use warnings FATAL => 'all';
+
+sub new {
+ my ($class, $args) = @_;
+ bless(
+ {
+ _stream => $args->{stream},
+ _match => $args->{match},
+ _filter => $args->{filter},
+ },
+ $class
+ );
+}
+
+sub next {
+ my ($self) = @_;
+
+ # if our main stream is already gone then we can short-circuit
+ # straight out - there's no way for an alternate stream to be there
+
+ return unless $self->{_stream};
+
+ # if we have an alternate stream (provided by a filter call resulting
+ # from a match on the main stream) then we want to read from that until
+ # it's gone - we're still effectively "in the match" but this is the
+ # point at which that fact is abstracted away from downstream consumers
+
+ if (my $alt = $self->{_alt_stream}) {
+
+ if (my ($evt) = $alt->next) {
+ return $evt;
+ }
+
+ # once the alternate stream is exhausted we can throw it away so future
+ # requests fall straight through to the main stream
+
+ delete $self->{_alt_stream};
+ }
+
+ # if there's no alternate stream currently, process the main stream
+
+ while (my ($evt) = $self->{_stream}->next) {
+
+ # don't match this event? return it immediately
+
+ return $evt unless $evt->{type} eq 'OPEN' and $self->{_match}->($evt);
+
+ # run our filter routine against the current event
+
+ my ($res) = $self->{_filter}->($evt, $self->{_stream});
+
+ # if the result is just an event, we can return that now
+
+ return $res if ref($res) eq 'HASH';
+
+ # if no result at all, jump back to the top of the loop to get the
+ # next event and try again - the filter has eaten this one
+
+ next unless defined $res;
+
+ # ARRAY means a pair of [ $evt, $new_stream ]
+
+ if (ref($res) eq 'ARRAY') {
+ $self->{_alt_stream} = $res->[1];
+ return $res->[0];
+ }
+
+ # the filter returned a stream - if it contains something return the
+ # first entry and stash it as the new alternate stream
+
+ if (my ($new_evt) = $res->next) {
+ $self->{_alt_stream} = $res;
+ return $new_evt;
+ }
+
+ # we got a new alternate stream but it turned out to be empty
+ # - this will happens for e.g. with an in place close (<foo />) that's
+ # being removed. In that case, we fall off to loop back round and try
+ # the next event from our main stream
+ }
+
+ # main stream exhausted so throw it away so we hit the short circuit
+ # at the top and return nothing to indicate to our caller we're done
+
+ delete $self->{_stream};
+ return;
+}
+
+1;
--- /dev/null
+package HTML::Zoom::Parser::BuiltIn;
+
+use strict;
+use warnings FATAL => 'all';
+
+use HTML::Zoom::CodeStream;
+
+sub html_to_events {
+ my ($class, $text) = @_;
+ my @events;
+ _hacky_tag_parser($text => sub { push @events, $_[0] });
+ return \@events;
+}
+
+sub html_to_stream {
+ my ($class, $text) = @_;
+ return HTML::Zoom::CodeStream->from_array(@{$class->html_to_events($text)});
+}
+
+sub _hacky_tag_parser {
+ my ($text, $handler) = @_;
+ while (
+ $text =~ m{
+ (
+ (?:[^<]*) < (?:
+ ( / )? ( [^/!<>\s"'=]+ )
+ ( (?:"[^"]*"|'[^']*'|[^"'<>])+? )?
+ |
+ (!-- .*? -- | ![^\-] .*? )
+ ) (\s*/\s*)? >
+ )
+ ([^<]*)
+ }sxg
+ ) {
+ my ($whole, $is_close, $tag_name, $attributes, $is_comment,
+ $in_place_close, $content)
+ = ($1, $2, $3, $4, $5, $6, $7, $8);
+ next if defined $is_comment;
+ $tag_name =~ tr/A-Z/a-z/;
+ if ($is_close) {
+ $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole });
+ } else {
+ $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/;
+ $handler->({
+ type => 'OPEN',
+ name => $tag_name,
+ is_in_place_close => $in_place_close,
+ _hacky_attribute_parser($attributes),
+ raw_attrs => $attributes||'',
+ raw => $whole,
+ });
+ if ($in_place_close) {
+ $handler->({
+ type => 'CLOSE', name => $tag_name, raw => '',
+ is_in_place_close => 1
+ });
+ }
+ }
+ if (length $content) {
+ $handler->({ type => 'TEXT', raw => $content });
+ }
+ }
+}
+
+sub _hacky_attribute_parser {
+ my ($attr_text) = @_;
+ my (%attrs, @attr_names);
+ while (
+ $attr_text =~ m{
+ ([^\s\=\"\']+)(\s*=\s*(?:(")(.*?)"|(')(.*?)'|([^'"\s=]+)['"]*))?
+ }sgx
+ ) {
+ my $key = $1;
+ my $test = $2;
+ my $val = ( $3 ? $4 : ( $5 ? $6 : $7 ));
+ my $lckey = lc($key);
+ if ($test) {
+ $attrs{$lckey} = _simple_unescape($val);
+ } else {
+ $attrs{$lckey} = $lckey;
+ }
+ push(@attr_names, $lckey);
+ }
+ (attrs => \%attrs, attr_names => \@attr_names);
+}
+
+sub _simple_unescape {
+ my $str = shift;
+ $str =~ s/"/"/g;
+ $str =~ s/</</g;
+ $str =~ s/>/>/g;
+ $str =~ s/&/&/g;
+ $str;
+}
+
+sub _simple_escape {
+ my $str = shift;
+ $str =~ s/&/&/g;
+ $str =~ s/"/"/g;
+ $str =~ s/</</g;
+ $str =~ s/>/>/g;
+ $str;
+}
+
+1;
--- /dev/null
+package HTML::Zoom::Producer::BuiltIn;
+
+use strict;
+use warnings FATAL => 'all';
+
+sub html_from_stream {
+ my ($class, $stream) = @_;
+ my $html;
+ while (my ($evt) = $stream->next) { $html .= $class->_event_to_html($evt) }
+ return $html;
+}
+
+sub _event_to_html {
+ my ($self, $evt) = @_;
+ # big expression
+ if (defined $evt->{raw}) {
+ $evt->{raw}
+ } elsif ($evt->{type} eq 'OPEN') {
+ '<'
+ .$evt->{name}
+ .(defined $evt->{raw_attrs}
+ ? $evt->{raw_attrs}
+ : do {
+ my @names = @{$evt->{attr_names}};
+ @names
+ ? join(' ', '', map qq{${_}="${\$evt->{attrs}{$_}}"}, @names)
+ : ''
+ }
+ )
+ .($evt->{is_in_place_close} ? ' /' : '')
+ .'>'
+ } elsif ($evt->{type} eq 'CLOSE') {
+ '</'.$evt->{name}.'>'
+ } elsif ($evt->{type} eq 'EMPTY') {
+ ''
+ } else {
+ die "No raw value in event and no special handling for type ".$evt->{type};
+ }
+}
+
+1;
--- /dev/null
+package HTML::Zoom::SelectorParser;
+
+use strict;
+use warnings FATAL => 'all';
+use Carp qw(confess);
+
+my $sel_char = '-\w_';
+my $sel_re = qr/([$sel_char]+)/;
+
+sub new { bless({}, shift) }
+
+sub _raw_parse_simple_selector {
+ for ($_[1]) { # same pos() as outside
+
+ # '*' - match anything
+
+ /\G\*/gc and
+ return sub { 1 };
+
+ # 'element' - match on tag name
+
+ /\G$sel_re/gc and
+ return do {
+ my $name = $1;
+ sub { $_[0]->{name} && $_[0]->{name} eq $name }
+ };
+
+ # '#id' - match on id attribute
+
+ /\G#$sel_re/gc and
+ return do {
+ my $id = $1;
+ sub { $_[0]->{attrs}{id} && $_[0]->{attrs}{id} eq $id }
+ };
+
+ # '.class1.class2' - match on intersection of classes
+
+ /\G((?:\.$sel_re)+)/gc and
+ return do {
+ my $cls = $1; $cls =~ s/^\.//;
+ my @cl = split(/\./, $cls);
+ sub {
+ $_[0]->{attrs}{class}
+ && !grep $_[0]->{attrs}{class} !~ /\b$_\b/, @cl
+ }
+ };
+
+ confess "Couldn't parse $_ as starting with simple selector";
+ }
+}
+
+sub parse_selector {
+ my $self = $_[0];
+ my $sel = $_[1]; # my pos() only please
+ die "No selector provided" unless $sel;
+ local *_;
+ for ($sel) {
+ my @sub;
+ PARSE: { do {
+ push(@sub, $self->_raw_parse_simple_selector($_));
+ last PARSE if (pos == length);
+ /\G\s*,\s*/gc or confess "Selectors not comma separated";
+ } until (pos == length) };
+ return $sub[0] if (@sub == 1);
+ return sub {
+ foreach my $inner (@sub) {
+ if (my $r = $inner->(@_)) { return $r }
+ }
+ };
+ }
+}
+
+
+1;
--- /dev/null
+use strict;
+use warnings FATAL => 'all';
+use Test::More;
+
+use HTML::Zoom::Parser::BuiltIn;
+use HTML::Zoom::Producer::BuiltIn;
+use HTML::Zoom::SelectorParser;
+use HTML::Zoom::FilterBuilder;
+use HTML::Zoom::FilterStream;
+
+my $tmpl = <<END;
+<body>
+ <div class="main">
+ <span class="hilight name">Bob</span>
+ <span class="career">Builder</span>
+ <hr />
+ </div>
+</body>
+END
+
+sub src_stream { HTML::Zoom::Parser::BuiltIn->html_to_stream($tmpl); }
+
+sub html_sink { HTML::Zoom::Producer::BuiltIn->html_from_stream($_[0]) }
+
+my $fb = HTML::Zoom::FilterBuilder->new;
+
+my $sp = HTML::Zoom::SelectorParser->new;
+
+sub filter {
+ my ($stream, $sel, $cb) = @_;
+ return HTML::Zoom::FilterStream->new({
+ stream => $stream,
+ match => $sp->parse_selector($sel),
+ filter => do { local $_ = $fb; $cb->($fb) }
+ });
+}
+
+sub run_for (&;$) {
+ my $cb = shift;
+ (html_sink
+ (filter
+ src_stream,
+ (shift or '.main'),
+ $cb
+ )
+ )
+}
+
+(my $expect = $tmpl) =~ s/(?=<div)/O HAI/;
+
+my $ohai = [ { type => 'TEXT', raw => 'O HAI' } ];
+
+is(
+ run_for { $_->add_before($ohai) },
+ $expect,
+ 'add_before ok'
+);
+
+($expect = $tmpl) =~ s/(?<=<\/div>)/O HAI/;
+
+is(
+ run_for { $_->add_after($ohai) },
+ $expect,
+ 'add_after ok'
+);
+
+($expect = $tmpl) =~ s/(?<=class="main">)/O HAI/;
+
+is(
+ run_for { $_->prepend_inside($ohai) },
+ $expect,
+ 'prepend_inside ok'
+);
+
+($expect = $tmpl) =~ s/<hr \/>/<hr>O HAI<\/hr>/;
+
+is(
+ (run_for { $_->prepend_inside($ohai) } 'hr'),
+ $expect,
+ 'prepend_inside ok with in place close'
+);
+
+is(
+ run_for { $_->replace($ohai) },
+'<body>
+ O HAI
+</body>
+',
+ 'replace ok'
+);
+
+done_testing;