X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDOM%2FTiny.pm;h=960c4801adbd6fb5d5825a15ac869e4d7ce707f4;hb=63873d67e780fce26d459e74222f627ccebc9392;hp=699ffd94e496389b37027ae88782fd0e1d8502e7;hpb=5a70ee9d2295cf74db3b0e2476e906cdcaee1ff3;p=catagits%2FDOM-Tiny.git
diff --git a/lib/DOM/Tiny.pm b/lib/DOM/Tiny.pm
index 699ffd9..960c480 100644
--- a/lib/DOM/Tiny.pm
+++ b/lib/DOM/Tiny.pm
@@ -11,19 +11,21 @@ use overload
fallback => 1;
use Carp 'croak';
-use DOM::Tiny::Collection;
-use DOM::Tiny::CSS;
-use DOM::Tiny::HTML;
+use DOM::Tiny::_Collection;
+use DOM::Tiny::_CSS;
+use DOM::Tiny::_HTML;
use Scalar::Util qw(blessed weaken);
our $VERSION = '0.001';
sub new {
my $class = shift;
- my $self = bless \DOM::Tiny::HTML->new, ref $class || $class;
+ my $self = bless \DOM::Tiny::_HTML->new, ref $class || $class;
return @_ ? $self->parse(@_) : $self;
}
+sub TO_JSON { shift->_delegate('render') }
+
sub all_text { shift->_all_text(1, @_) }
sub ancestors { _select($_[0]->_collect($_[0]->_ancestors), $_[1]) }
@@ -65,7 +67,7 @@ sub content {
my $type = $self->type;
if ($type eq 'root' || $type eq 'tag') {
return $self->_content(0, 1, @_) if @_;
- my $html = DOM::Tiny::HTML->new(xml => $self->xml);
+ my $html = DOM::Tiny::_HTML->new(xml => $self->xml);
return join '', map { $html->tree($_)->render } _nodes($self->tree);
}
@@ -151,7 +153,7 @@ sub tag {
return $self;
}
-sub tap { shift->DOM::Tiny::Collection::tap(@_) }
+sub tap { shift->DOM::Tiny::_Collection::tap(@_) }
sub text { shift->_all_text(0, @_) }
@@ -223,7 +225,7 @@ sub _build { shift->new->tree(shift)->xml(shift) }
sub _collect {
my $self = shift;
my $xml = $self->xml;
- return DOM::Tiny::Collection->new(map { $self->_build($_, $xml) } @_);
+ return DOM::Tiny::_Collection->new(map { $self->_build($_, $xml) } @_);
}
sub _content {
@@ -242,7 +244,7 @@ sub _content {
return $self;
}
-sub _css { DOM::Tiny::CSS->new(tree => shift->tree) }
+sub _css { DOM::Tiny::_CSS->new(tree => shift->tree) }
sub _delegate {
my ($self, $method) = (shift, shift);
@@ -281,7 +283,7 @@ sub _offset {
sub _parent { $_[0]->tree->[$_[0]->type eq 'tag' ? 3 : 2] }
-sub _parse { DOM::Tiny::HTML->new(xml => shift->xml)->parse(shift)->tree }
+sub _parse { DOM::Tiny::_HTML->new(xml => shift->xml)->parse(shift)->tree }
sub _replace {
my ($self, $parent, $child, @nodes) = @_;
@@ -420,10 +422,11 @@ DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
=head1 DESCRIPTION
-L is a minimalistic and relaxed pure-perl HTML/XML DOM parser with
-support for the L and
-L based on L. It
-will even try to interpret broken HTML and XML, so you should not use it for
+L is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
+on L. It supports the L
+and L, and
+matching based on L. It will
+even try to interpret broken HTML and XML, so you should not use it for
validation.
=head1 NODES AND ELEMENTS
@@ -478,6 +481,260 @@ XML detection can also be disabled with the L"xml"> method.
my $dom = DOM::Tiny->new->xml(0)->parse('Hi!
');
say $dom->at('p[id]')->text;
+=head1 SELECTORS
+
+L uses a CSS selector engine based on L. All CSS
+selectors that make sense for a standalone parser are supported.
+
+=head2 *
+
+Any element.
+
+ my $all = $dom->find('*');
+
+=head2 E
+
+An element of type C.
+
+ my $title = $dom->at('title');
+
+=head2 E[foo]
+
+An C element with a C attribute.
+
+ my $links = $dom->find('a[href]');
+
+=head2 E[foo="bar"]
+
+An C element whose C attribute value is exactly equal to C.
+
+ my $case_sensitive = $dom->find('input[type="hidden"]');
+ my $case_sensitive = $dom->find('input[type=hidden]');
+
+=head2 E[foo="bar" i]
+
+An C element whose C attribute value is exactly equal to any
+(ASCII-range) case-permutation of C. Note that this selector is
+EXPERIMENTAL and might change without warning!
+
+ my $case_insensitive = $dom->find('input[type="hidden" i]');
+ my $case_insensitive = $dom->find('input[type=hidden i]');
+ my $case_insensitive = $dom->find('input[class~="foo" i]');
+
+This selector is part of
+L, which is still a work
+in progress.
+
+=head2 E[foo~="bar"]
+
+An C element whose C attribute value is a list of whitespace-separated
+values, one of which is exactly equal to C.
+
+ my $foo = $dom->find('input[class~="foo"]');
+ my $foo = $dom->find('input[class~=foo]');
+
+=head2 E[foo^="bar"]
+
+An C element whose C attribute value begins exactly with the string
+C.
+
+ my $begins_with = $dom->find('input[name^="f"]');
+ my $begins_with = $dom->find('input[name^=f]');
+
+=head2 E[foo$="bar"]
+
+An C element whose C attribute value ends exactly with the string
+C.
+
+ my $ends_with = $dom->find('input[name$="o"]');
+ my $ends_with = $dom->find('input[name$=o]');
+
+=head2 E[foo*="bar"]
+
+An C element whose C attribute value contains the substring C.
+
+ my $contains = $dom->find('input[name*="fo"]');
+ my $contains = $dom->find('input[name*=fo]');
+
+=head2 E:root
+
+An C element, root of the document.
+
+ my $root = $dom->at(':root');
+
+=head2 E:nth-child(n)
+
+An C element, the C child of its parent.
+
+ my $third = $dom->find('div:nth-child(3)');
+ my $odd = $dom->find('div:nth-child(odd)');
+ my $even = $dom->find('div:nth-child(even)');
+ my $top3 = $dom->find('div:nth-child(-n+3)');
+
+=head2 E:nth-last-child(n)
+
+An C element, the C child of its parent, counting from the last one.
+
+ my $third = $dom->find('div:nth-last-child(3)');
+ my $odd = $dom->find('div:nth-last-child(odd)');
+ my $even = $dom->find('div:nth-last-child(even)');
+ my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
+
+=head2 E:nth-of-type(n)
+
+An C element, the C sibling of its type.
+
+ my $third = $dom->find('div:nth-of-type(3)');
+ my $odd = $dom->find('div:nth-of-type(odd)');
+ my $even = $dom->find('div:nth-of-type(even)');
+ my $top3 = $dom->find('div:nth-of-type(-n+3)');
+
+=head2 E:nth-last-of-type(n)
+
+An C element, the C sibling of its type, counting from the last one.
+
+ my $third = $dom->find('div:nth-last-of-type(3)');
+ my $odd = $dom->find('div:nth-last-of-type(odd)');
+ my $even = $dom->find('div:nth-last-of-type(even)');
+ my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
+
+=head2 E:first-child
+
+An C element, first child of its parent.
+
+ my $first = $dom->find('div p:first-child');
+
+=head2 E:last-child
+
+An C element, last child of its parent.
+
+ my $last = $dom->find('div p:last-child');
+
+=head2 E:first-of-type
+
+An C element, first sibling of its type.
+
+ my $first = $dom->find('div p:first-of-type');
+
+=head2 E:last-of-type
+
+An C element, last sibling of its type.
+
+ my $last = $dom->find('div p:last-of-type');
+
+=head2 E:only-child
+
+An C element, only child of its parent.
+
+ my $lonely = $dom->find('div p:only-child');
+
+=head2 E:only-of-type
+
+An C element, only sibling of its type.
+
+ my $lonely = $dom->find('div p:only-of-type');
+
+=head2 E:empty
+
+An C element that has no children (including text nodes).
+
+ my $empty = $dom->find(':empty');
+
+=head2 E:checked
+
+A user interface element C which is checked (for instance a radio-button or
+checkbox).
+
+ my $input = $dom->find(':checked');
+
+=head2 E.warning
+
+An C element whose class is "warning".
+
+ my $warning = $dom->find('div.warning');
+
+=head2 E#myid
+
+An C element with C equal to "myid".
+
+ my $foo = $dom->at('div#foo');
+
+=head2 E:not(s)
+
+An C element that does not match simple selector C.
+
+ my $others = $dom->find('div p:not(:first-child)');
+
+=head2 E F
+
+An C element descendant of an C element.
+
+ my $headlines = $dom->find('div h1');
+
+=head2 E E F
+
+An C element child of an C element.
+
+ my $headlines = $dom->find('html > body > div > h1');
+
+=head2 E + F
+
+An C element immediately preceded by an C element.
+
+ my $second = $dom->find('h1 + h2');
+
+=head2 E ~ F
+
+An C element preceded by an C element.
+
+ my $second = $dom->find('h1 ~ h2');
+
+=head2 E, F, G
+
+Elements of type C, C and C.
+
+ my $headlines = $dom->find('h1, h2, h3');
+
+=head2 E[foo=bar][bar=baz]
+
+An C element whose attributes match all following attribute selectors.
+
+ my $links = $dom->find('a[foo^=b][foo$=ar]');
+
+=head1 OPERATORS
+
+L overloads the following operators.
+
+=head2 array
+
+ my @nodes = @$dom;
+
+Alias for L"child_nodes">.
+
+ # ""
+ $dom->parse('123')->[0];
+
+=head2 bool
+
+ my $bool = !!$dom;
+
+Always true.
+
+=head2 hash
+
+ my %attrs = %$dom;
+
+Alias for L"attr">.
+
+ # "test"
+ $dom->parse('Test
')->at('div')->{id};
+
+=head2 stringify
+
+ my $str = "$dom";
+
+Alias for L"to_string">.
+
=head1 METHODS
L implements the following methods.
@@ -510,8 +767,8 @@ whitespace trimming is enabled by default.
my $collection = $dom->ancestors('div ~ p');
Find all ancestor elements of this node matching the CSS selector and return a
-L object containing these elements as L
-objects. All selectors from L are supported.
+L containing these elements as L
+objects. All selectors listed in L"SELECTORS"> are supported.
# List tag names of ancestor elements
say $dom->ancestors->map('tag')->join("\n");
@@ -554,7 +811,7 @@ node's content.
Find first descendant element of this element matching the CSS selector and
return it as a L object or return C if none could be found.
-All selectors from L are supported.
+All selectors listed in L"SELECTORS"> are supported.
# Find first element with "svg" namespace definition
my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
@@ -581,7 +838,7 @@ This element's attributes.
my $collection = $dom->child_nodes;
-Return a L object containing all child nodes of this
+Return a L containing all child nodes of this
element as L objects.
# "123
"
@@ -599,8 +856,8 @@ element as L objects.
my $collection = $dom->children('div ~ p');
Find all child elements of this element matching the CSS selector and return a
-L object containing these elements as L
-objects. All selectors from L are supported.
+L containing these elements as L
+objects. All selectors listed in L"SELECTORS"> are supported.
# Show tag name of random child element
say $dom->children->shuffle->first->tag;
@@ -636,7 +893,7 @@ and C nodes) or raw content.
my $collection = $dom->descendant_nodes;
-Return a L object containing all descendant nodes of
+Return a L containing all descendant nodes of
this element as L objects.
# "123
"
@@ -654,9 +911,8 @@ this element as L objects.
my $collection = $dom->find('div ~ p');
Find all descendant elements of this element matching the CSS selector and
-return a L object containing these elements as
-L objects. All selectors from L are
-supported.
+return a L containing these elements as
+L objects. All selectors listed in L"SELECTORS"> are supported.
# Find a specific element and extract information
my $id = $dom->find('div')->[23]{id};
@@ -676,8 +932,8 @@ supported.
my $collection = $dom->following('div ~ p');
Find all sibling elements after this node matching the CSS selector and return
-a L object containing these elements as L
-objects. All selectors from L are supported.
+a L containing these elements as L
+objects. All selectors listen in L"SELECTORS"> are supported.
# List tags of sibling elements after this node
say $dom->following->map('tag')->join("\n");
@@ -686,7 +942,7 @@ objects. All selectors from L are supported.
my $collection = $dom->following_nodes;
-Return a L object containing all sibling nodes after
+Return a L containing all sibling nodes after
this node as L objects.
# "C"
@@ -696,8 +952,8 @@ this node as L objects.
my $bool = $dom->matches('div ~ p');
-Check if this element matches the CSS selector. All selectors from
-L are supported.
+Check if this element matches the CSS selector. All selectors listed in
+L"SELECTORS"> are supported.
# True
$dom->parse('A
')->at('p')->matches('.a');
@@ -755,7 +1011,7 @@ no parent.
$dom = $dom->parse('I ⥠DOM::Tiny!');
-Parse HTML/XML fragment with L.
+Parse HTML/XML fragment.
# Parse XML
my $dom = DOM::Tiny->new->xml(1)->parse($xml);
@@ -766,8 +1022,8 @@ Parse HTML/XML fragment with L.
my $collection = $dom->preceding('div ~ p');
Find all sibling elements before this node matching the CSS selector and return
-a L object containing these elements as L
-objects. All selectors from L are supported.
+a L containing these elements as L
+objects. All selectors listed in L"SELECTORS"> are supported.
# List tags of sibling elements before this node
say $dom->preceding->map('tag')->join("\n");
@@ -776,8 +1032,8 @@ objects. All selectors from L are supported.
my $collection = $dom->preceding_nodes;
-Return a L object containing all sibling nodes before
-this node as L objects.
+Return a L containing all sibling nodes
+before this node as L objects.
# "A"
$dom->parse('AC
')->at('p')->preceding_nodes->first->content;
@@ -1026,39 +1282,216 @@ children of the first innermost element.
Disable HTML semantics in parser and activate case-sensitivity, defaults to
auto detection based on processing instructions.
-=head1 OPERATORS
+=head1 COLLECTION METHODS
-L overloads the following operators.
+Some L methods return an array-based collection object based on
+L, which can either be accessed directly as an array
+reference, or with the following methods.
-=head2 array
+ # Chain methods
+ $collection->map(sub { ucfirst })->shuffle->each(sub {
+ my ($word, $num) = @_;
+ say "$num: $word";
+ });
- my @nodes = @$dom;
+ # Access array directly to manipulate collection
+ $collection->[23] += 100;
+ say for @$collection;
-Alias for L"child_nodes">.
+=head2 compact
- # ""
- $dom->parse('123')->[0];
+ my $new = $collection->compact;
-=head2 bool
+Create a new collection with all elements that are defined and not an empty
+string.
- my $bool = !!$dom;
+ # $collection contains (0, 1, undef, 2, '', 3)
+ $collection->compact->join(', '); # "0, 1, 2, 3"
-Always true.
+=head2 each
-=head2 hash
+ my @elements = $collection->each;
+ $collection = $collection->each(sub {...});
- my %attrs = %$dom;
+Evaluate callback for each element in collection or return all elements as a
+list if none has been provided. The element will be the first argument passed
+to the callback and is also available as C<$_>.
-Alias for L"attr">.
+ # Make a numbered list
+ $collection->each(sub {
+ my ($e, $num) = @_;
+ say "$num: $e";
+ });
- # "test"
- $dom->parse('Test
')->at('div')->{id};
+=head2 first
-=head2 stringify
+ my $first = $collection->first;
+ my $first = $collection->first(qr/foo/);
+ my $first = $collection->first(sub {...});
+ my $first = $collection->first($method);
+ my $first = $collection->first($method, @args);
- my $str = "$dom";
+Evaluate regular expression/callback for, or call method on, each element in
+collection and return the first one that matched the regular expression, or for
+which the callback/method returned true. The element will be the first argument
+passed to the callback and is also available as C<$_>.
-Alias for L"to_string">.
+ # Longer version
+ my $first = $collection->first(sub { $_->$method(@args) });
+
+ # Find first value that contains the word "dom"
+ my $interesting = $collection->first(qr/dom/i);
+
+ # Find first value that is greater than 5
+ my $greater = $collection->first(sub { $_ > 5 });
+
+=head2 flatten
+
+ my $new = $collection->flatten;
+
+Flatten nested collections/arrays recursively and create a new collection with
+all elements.
+
+ # $collection contains (1, [2, [3, 4], 5, [6]], 7)
+ $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
+
+=head2 grep
+
+ my $new = $collection->grep(qr/foo/);
+ my $new = $collection->grep(sub {...});
+ my $new = $collection->grep($method);
+ my $new = $collection->grep($method, @args);
+
+Evaluate regular expression/callback for, or call method on, each element in
+collection and create a new collection with all elements that matched the
+regular expression, or for which the callback/method returned true. The element
+will be the first argument passed to the callback and is also available as
+C<$_>.
+
+ # Longer version
+ my $new = $collection->grep(sub { $_->$method(@args) });
+
+ # Find all values that contain the word "dom"
+ my $interesting = $collection->grep(qr/dom/i);
+
+ # Find all values that are greater than 5
+ my $greater = $collection->grep(sub { $_ > 5 });
+
+=head2 join
+
+ my $stream = $collection->join;
+ my $stream = $collection->join("\n");
+
+Turn collection into string.
+
+ # Join all values with commas
+ $collection->join(', ');
+
+=head2 last
+
+ my $last = $collection->last;
+
+Return the last element in collection.
+
+=head2 map
+
+ my $new = $collection->map(sub {...});
+ my $new = $collection->map($method);
+ my $new = $collection->map($method, @args);
+
+Evaluate callback for, or call method on, each element in collection and create
+a new collection from the results. The element will be the first argument
+passed to the callback and is also available as C<$_>.
+
+ # Longer version
+ my $new = $collection->map(sub { $_->$method(@args) });
+
+ # Append the word "dom" to all values
+ my $domified = $collection->map(sub { $_ . 'dom' });
+
+=head2 reduce
+
+ my $result = $collection->reduce(sub {...});
+ my $result = $collection->reduce(sub {...}, $initial);
+
+Reduce elements in collection with callback, the first element will be used as
+initial value if none has been provided.
+
+ # Calculate the sum of all values
+ my $sum = $collection->reduce(sub { $a + $b });
+
+ # Count how often each value occurs in collection
+ my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
+
+=head2 reverse
+
+ my $new = $collection->reverse;
+
+Create a new collection with all elements in reverse order.
+
+=head2 slice
+
+ my $new = $collection->slice(4 .. 7);
+
+Create a new collection with all selected elements.
+
+ # $collection contains ('A', 'B', 'C', 'D', 'E')
+ $collection->slice(1, 2, 4)->join(' '); # "B C E"
+
+=head2 shuffle
+
+ my $new = $collection->shuffle;
+
+Create a new collection with all elements in random order.
+
+=head2 size
+
+ my $size = $collection->size;
+
+Number of elements in collection.
+
+=head2 sort
+
+ my $new = $collection->sort;
+ my $new = $collection->sort(sub {...});
+
+Sort elements based on return value of callback and create a new collection
+from the results.
+
+ # Sort values case-insensitive
+ my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
+
+=head2 tap
+
+ $collection = $collection->tap(sub {...});
+
+Equivalent to L.
+
+=head2 to_array
+
+ my $array = $collection->to_array;
+
+Turn collection into array reference.
+
+=head2 uniq
+
+ my $new = $collection->uniq;
+ my $new = $collection->uniq(sub {...});
+ my $new = $collection->uniq($method);
+ my $new = $collection->uniq($method, @args);
+
+Create a new collection without duplicate elements, using the string
+representation of either the elements or the return value of the
+callback/method.
+
+ # Longer version
+ my $new = $collection->uniq(sub { $_->$method(@args) });
+
+ # $collection contains ('foo', 'bar', 'bar', 'baz')
+ $collection->uniq->join(' '); # "foo bar baz"
+
+ # $collection contains ([1, 2], [2, 1], [3, 2])
+ $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
=head1 BUGS
@@ -1079,3 +1512,7 @@ This is free software, licensed under:
=head1 SEE ALSO
L, L, L, L, L
+
+=for Pod::Coverage TO_JSON
+
+=cut