X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDOM%2FTiny.pm;h=6ccba6f10e05416bfdef8a9d75233d7652d70809;hb=eb912094365336d4af3aede041fc636ef8bc7607;hp=5c9a1360a75a8da8e610a2f6d4c5753ac7687535;hpb=7218d584be520047d85245f35ec5edc337ac9e13;p=catagits%2FDOM-Tiny.git diff --git a/lib/DOM/Tiny.pm b/lib/DOM/Tiny.pm index 5c9a136..6ccba6f 100644 --- a/lib/DOM/Tiny.pm +++ b/lib/DOM/Tiny.pm @@ -2,389 +2,9 @@ package DOM::Tiny; use strict; use warnings; +use base qw(Mojo::DOM58); -use overload - '@{}' => sub { shift->child_nodes }, - '%{}' => sub { shift->attr }, - bool => sub {1}, - '""' => sub { shift->to_string }, - fallback => 1; - -use Carp 'croak'; -use DOM::Tiny::_Collection; -use DOM::Tiny::_CSS; -use DOM::Tiny::_HTML; -use Scalar::Util qw(blessed weaken); - -our $VERSION = '0.003'; - -sub new { - my $class = shift; - my $self = bless \DOM::Tiny::_HTML->new, ref $class || $class; - return @_ ? $self->parse(@_) : $self; -} - -sub TO_JSON { shift->_delegate('render') } - -sub all_text { shift->_all_text(1, @_) } - -sub ancestors { _select($_[0]->_collect($_[0]->_ancestors), $_[1]) } - -sub append { shift->_add(1, @_) } -sub append_content { shift->_content(1, 0, @_) } - -sub at { - my $self = shift; - return undef unless my $result = $self->_css->select_one(@_); - return $self->_build($result, $self->xml); -} - -sub attr { - my $self = shift; - - # Hash - my $tree = $self->tree; - my $attrs = $tree->[0] ne 'tag' ? {} : $tree->[2]; - return $attrs unless @_; - - # Get - return $attrs->{$_[0]} unless @_ > 1 || ref $_[0]; - - # Set - my $values = ref $_[0] ? $_[0] : {@_}; - @$attrs{keys %$values} = values %$values; - - return $self; -} - -sub child_nodes { $_[0]->_collect(_nodes($_[0]->tree)) } - -sub children { _select($_[0]->_collect(_nodes($_[0]->tree, 1)), $_[1]) } - -sub content { - my $self = shift; - - my $type = $self->type; - if ($type eq 'root' || $type eq 'tag') { - return $self->_content(0, 1, @_) if @_; - my $html = DOM::Tiny::_HTML->new(xml => $self->xml); - return join '', map { $html->tree($_)->render } _nodes($self->tree); - } - - return $self->tree->[1] unless @_; - $self->tree->[1] = shift; - return $self; -} - -sub descendant_nodes { $_[0]->_collect(_all(_nodes($_[0]->tree))) } - -sub find { $_[0]->_collect(@{$_[0]->_css->select($_[1])}) } - -sub following { _select($_[0]->_collect(@{$_[0]->_siblings(1)->[1]}), $_[1]) } -sub following_nodes { $_[0]->_collect(@{$_[0]->_siblings->[1]}) } - -sub matches { shift->_css->matches(@_) } - -sub namespace { - my $self = shift; - - return undef if (my $tree = $self->tree)->[0] ne 'tag'; - - # Extract namespace prefix and search parents - my $ns = $tree->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef; - for my $node ($tree, $self->_ancestors) { - - # Namespace for prefix - my $attrs = $node->[2]; - if ($ns) { $_ eq $ns and return $attrs->{$_} for keys %$attrs } - - # Namespace attribute - elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} } - } - - return undef; -} - -sub next { $_[0]->_maybe($_[0]->_siblings(1, 0)->[1]) } -sub next_node { $_[0]->_maybe($_[0]->_siblings(0, 0)->[1]) } - -sub parent { - my $self = shift; - return undef if $self->tree->[0] eq 'root'; - return $self->_build($self->_parent, $self->xml); -} - -sub parse { shift->_delegate(parse => @_) } - -sub preceding { _select($_[0]->_collect(@{$_[0]->_siblings(1)->[0]}), $_[1]) } -sub preceding_nodes { $_[0]->_collect(@{$_[0]->_siblings->[0]}) } - -sub prepend { shift->_add(0, @_) } -sub prepend_content { shift->_content(0, 0, @_) } - -sub previous { $_[0]->_maybe($_[0]->_siblings(1, -1)->[0]) } -sub previous_node { $_[0]->_maybe($_[0]->_siblings(0, -1)->[0]) } - -sub remove { shift->replace('') } - -sub replace { - my ($self, $new) = @_; - return $self->parse($new) if (my $tree = $self->tree)->[0] eq 'root'; - return $self->_replace($self->_parent, $tree, _nodes($self->_parse($new))); -} - -sub root { - my $self = shift; - return $self unless my $tree = $self->_ancestors(1); - return $self->_build($tree, $self->xml); -} - -sub strip { - my $self = shift; - return $self if (my $tree = $self->tree)->[0] ne 'tag'; - return $self->_replace($tree->[3], $tree, _nodes($tree)); -} - -sub tag { - my ($self, $tag) = @_; - return undef if (my $tree = $self->tree)->[0] ne 'tag'; - return $tree->[1] unless $tag; - $tree->[1] = $tag; - return $self; -} - -sub tap { DOM::Tiny::_Collection::tap(@_) } - -sub text { shift->_all_text(0, @_) } - -sub to_string { shift->_delegate('render') } - -sub tree { shift->_delegate(tree => @_) } - -sub type { shift->tree->[0] } - -sub val { - my $self = shift; - - # "option" - return defined($self->{value}) ? $self->{value} : $self->text - if (my $tag = $self->tag) eq 'option'; - - # "textarea", "input" or "button" - return $tag eq 'textarea' ? $self->text : $self->{value} if $tag ne 'select'; - - # "select" - my $v = $self->find('option:checked')->map('val'); - return exists $self->{multiple} ? $v->size ? $v->to_array : undef : $v->last; -} - -sub wrap { shift->_wrap(0, @_) } -sub wrap_content { shift->_wrap(1, @_) } - -sub xml { shift->_delegate(xml => @_) } - -sub _add { - my ($self, $offset, $new) = @_; - - return $self if (my $tree = $self->tree)->[0] eq 'root'; - - my $parent = $self->_parent; - splice @$parent, _offset($parent, $tree) + $offset, 0, - _link($parent, _nodes($self->_parse($new))); - - return $self; -} - -sub _all { - map { $_->[0] eq 'tag' ? ($_, _all(_nodes($_))) : ($_) } @_; -} - -sub _all_text { - my ($self, $recurse, $trim) = @_; - - # Detect "pre" tag - my $tree = $self->tree; - $trim = 1 unless defined $trim; - map { $_->[1] eq 'pre' and $trim = 0 } $self->_ancestors, $tree - if $trim && $tree->[0] ne 'root'; - - return _text([_nodes($tree)], $recurse, $trim); -} - -sub _ancestors { - my ($self, $root) = @_; - - return () unless my $tree = $self->_parent; - my @ancestors; - do { push @ancestors, $tree } - while ($tree->[0] eq 'tag') && ($tree = $tree->[3]); - return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1]; -} - -sub _build { shift->new->tree(shift)->xml(shift) } - -sub _collect { - my $self = shift; - my $xml = $self->xml; - return DOM::Tiny::_Collection->new(map { $self->_build($_, $xml) } @_); -} - -sub _content { - my ($self, $start, $offset, $new) = @_; - - my $tree = $self->tree; - unless ($tree->[0] eq 'root' || $tree->[0] eq 'tag') { - my $old = $self->content; - return $self->content($start ? "$old$new" : "$new$old"); - } - - $start = $start ? ($#$tree + 1) : _start($tree); - $offset = $offset ? $#$tree : 0; - splice @$tree, $start, $offset, _link($tree, _nodes($self->_parse($new))); - - return $self; -} - -sub _css { DOM::Tiny::_CSS->new(tree => shift->tree) } - -sub _delegate { - my ($self, $method) = (shift, shift); - return $$self->$method unless @_; - $$self->$method(@_); - return $self; -} - -sub _link { - my ($parent, @children) = @_; - - # Link parent to children - for my $node (@children) { - my $offset = $node->[0] eq 'tag' ? 3 : 2; - $node->[$offset] = $parent; - weaken $node->[$offset]; - } - - return @children; -} - -sub _maybe { $_[1] ? $_[0]->_build($_[1], $_[0]->xml) : undef } - -sub _nodes { - return () unless my $tree = shift; - my @nodes = @$tree[_start($tree) .. $#$tree]; - return shift() ? grep { $_->[0] eq 'tag' } @nodes : @nodes; -} - -sub _offset { - my ($parent, $child) = @_; - my $i = _start($parent); - $_ eq $child ? last : $i++ for @$parent[$i .. $#$parent]; - return $i; -} - -sub _parent { $_[0]->tree->[$_[0]->type eq 'tag' ? 3 : 2] } - -sub _parse { DOM::Tiny::_HTML->new(xml => shift->xml)->parse(shift)->tree } - -sub _replace { - my ($self, $parent, $child, @nodes) = @_; - splice @$parent, _offset($parent, $child), 1, _link($parent, @nodes); - return $self->parent; -} - -sub _select { - my ($collection, $selector) = @_; - return $collection unless $selector; - return $collection->new(grep { $_->matches($selector) } @$collection); -} - -sub _siblings { - my ($self, $tags, $i) = @_; - - return [] unless my $parent = $self->parent; - - my $tree = $self->tree; - my (@before, @after, $match); - for my $node (_nodes($parent->tree)) { - ++$match and next if !$match && $node eq $tree; - next if $tags && $node->[0] ne 'tag'; - $match ? push @after, $node : push @before, $node; - } - - return defined $i ? [$before[$i], $after[$i]] : [\@before, \@after]; -} - -sub _squish { - my $str = shift; - $str =~ s/^\s+//; - $str =~ s/\s+$//; - $str =~ s/\s+/ /g; - return $str; -} - -sub _start { $_[0][0] eq 'root' ? 1 : 4 } - -sub _text { - my ($nodes, $recurse, $trim) = @_; - - # Merge successive text nodes - my $i = 0; - while (my $next = $nodes->[$i + 1]) { - ++$i and next unless $nodes->[$i][0] eq 'text' && $next->[0] eq 'text'; - splice @$nodes, $i, 2, ['text', $nodes->[$i][1] . $next->[1]]; - } - - my $text = ''; - for my $node (@$nodes) { - my $type = $node->[0]; - - # Text - my $chunk = ''; - if ($type eq 'text') { $chunk = $trim ? _squish $node->[1] : $node->[1] } - - # CDATA or raw text - elsif ($type eq 'cdata' || $type eq 'raw') { $chunk = $node->[1] } - - # Nested tag - elsif ($type eq 'tag' && $recurse) { - no warnings 'recursion'; - $chunk = _text([_nodes($node)], 1, $node->[1] eq 'pre' ? 0 : $trim); - } - - # Add leading whitespace if punctuation allows it - $chunk = " $chunk" if $text =~ /\S\z/ && $chunk =~ /^[^.!?,;:\s]+/; - - # Trim whitespace blocks - $text .= $chunk if $chunk =~ /\S+/ || !$trim; - } - - return $text; -} - -sub _wrap { - my ($self, $content, $new) = @_; - - $content = 1 if (my $tree = $self->tree)->[0] eq 'root'; - $content = 0 if $tree->[0] ne 'root' && $tree->[0] ne 'tag'; - - # Find innermost tag - my $current; - my $first = $new = $self->_parse($new); - $current = $first while $first = (_nodes($first, 1))[0]; - return $self unless $current; - - # Wrap content - if ($content) { - push @$current, _link($current, _nodes($tree)); - splice @$tree, _start($tree), $#$tree, _link($tree, _nodes($new)); - return $self; - } - - # Wrap element - $self->_replace($self->_parent, $tree, _nodes($new)); - push @$current, _link($current, $tree); - return $self; -} +our $VERSION = '0.004'; 1; @@ -392,1121 +12,20 @@ sub _wrap { =head1 NAME -DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors - -=head1 SYNOPSIS - - use DOM::Tiny; - - # Parse - my $dom = DOM::Tiny->new('
Test
123
456
'); - $dom->find(':not(p)')->map('strip'); - - # Render - say "$dom"; - -=head1 DESCRIPTION - -LHi!
'); - say $dom->at('p[id]')->text; - -If XML processing instructions are found, the parser will automatically switch -into XML mode and everything becomes case-sensitive. - - # XML semantics - my $dom = DOM::Tiny->new('Hi!
'); - say $dom->at('P[ID]')->text; - -XML detection can also be disabled with the L"xml"> method. - - # Force XML semantics - my $dom = DOM::Tiny->new->xml(1)->parse('Hi!
'); - say $dom->at('P[ID]')->text; - - # Force HTML semantics - my $dom = DOM::Tiny->new->xml(0)->parse('Hi!
'); - say $dom->at('p[id]')->text; - -=head1 SELECTORS - -Lbar
baz\nbar
baz\nI ⥠DOM::Tiny!
'); - -Append HTML/XML fragment to this node. +DOM::Tiny - This is an empty subclass, you wanted Mojo::DOM58 - # "Test 123
" - $dom->parse('Test
')->at('p') - ->child_nodes->first->append(' 123')->root; +Development continues under the name LI ⥠DOM::Tiny!
'); - -Append HTML/XML fragment (for CTest123
" - $dom->parse('Test
')->at('p')->append_content('123')->root; - -=head2 at - - my $result = $dom->at('div ~ p'); - -Find first descendant element of this element matching the CSS selector and -return it as a L123
" - $dom->parse('Test123
')->at('p')->child_nodes->first->remove; - - # "" - $dom->parse('123')->child_nodes->first; - - # " Test " - $dom->parse('123')->child_nodes->last->content; - -=head2 children - - my $collection = $dom->children; - my $collection = $dom->children('div ~ p'); - -Find all child elements of this element matching the CSS selector and return a -LI ⥠DOM::Tiny!
'); - -Return this node's content or replace it with HTML/XML fragment (for C123
" - $dom->parse('Test
')->at('p')->content('123')->root; - - # "123
" - $dom->parse('123
') - ->descendant_nodes->grep(sub { $_->type eq 'comment' }) - ->map('remove')->first; - - # "testtest
" - $dom->parse('123456
') - ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' }) - ->map(content => 'test')->first->root; - -=head2 find - - my $collection = $dom->find('div ~ p'); - -Find all descendant elements of this element matching the CSS selector and -return a LA
C')->at('p')->following_nodes->last->content; - -=head2 matches - - my $bool = $dom->matches('div ~ p'); - -Check if this element matches the CSS selector. All selectors listed in -L"SELECTORS"> are supported. - - # True - $dom->parse('A
')->at('p')->matches('.a'); - $dom->parse('A
')->at('p')->matches('p[class]'); - - # False - $dom->parse('A
')->at('p')->matches('.b'); - $dom->parse('A
')->at('p')->matches('p[id]'); - -=head2 namespace - - my $namespace = $dom->namespace; - -Find this element's namespace or return C123456
') - ->at('b')->next_node->next_node; - - # " Test " - $dom->parse('123456
') - ->at('b')->next_node->content; - -=head2 parent - - my $parent = $dom->parent; - -Return LC
')->at('p')->preceding_nodes->first->content; - -=head2 prepend - - $dom = $dom->prepend('I ⥠DOM::Tiny!
'); - -Prepend HTML/XML fragment to this node. - - # "Test 123
" - $dom->parse('123
') - ->at('p')->child_nodes->first->prepend('Test ')->root; - -=head2 prepend_content - - $dom = $dom->prepend_content('I ⥠DOM::Tiny!
'); - -Prepend HTML/XML fragment (for C123Test
" - $dom->parse('Test
')->at('p')->prepend_content('123')->root; - -=head2 previous - - my $sibling = $dom->previous; - -Return L123456
') - ->at('b')->previous_node->previous_node; - - # " Test " - $dom->parse('123456
') - ->at('b')->previous_node->content; - -=head2 remove - - my $parent = $dom->remove; - -Remove this node and return L"root"> (for C456
" - $dom->parse('123456
') - ->at('p')->child_nodes->first->remove->root; - -=head2 replace - - my $parent = $dom->replace('123
" - $dom->parse('Test
') - ->at('p')->child_nodes->[0]->replace('123')->root; - -=head2 root - - my $root = $dom->root; - -Return Lbar
baz\nbar
baz\nTest
')->type; - - # "tag" - $dom->parse('Test
')->at('p')->type; - - # "text" - $dom->parse('Test
')->at('p')->child_nodes->first->type; - -=head2 val - - my $value = $dom->val; - -Extract value from form element (such as C