From: Matt S Trout Date: Sun, 10 Apr 2016 15:09:18 +0000 (+0000) Subject: v0.004 X-Git-Tag: v0.004^0 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FDOM-Tiny.git;a=commitdiff_plain;h=9cc55beb4593a694c6e07231bbea6f3973ff6148 v0.004 - Murderize dist due to replacement by Mojo::DOM58 --- diff --git a/Changes b/Changes index 9688117..2f929b1 100644 --- a/Changes +++ b/Changes @@ -1,4 +1,6 @@ {{$NEXT}} + +0.004 2016-04-10 15:09:07 UTC - Murderize dist due to replacement by Mojo::DOM58 0.003 2016-03-19 18:35:22 UTC diff --git a/META.json b/META.json index 331a418..ce3461f 100644 --- a/META.json +++ b/META.json @@ -1,10 +1,10 @@ { - "abstract" : "Minimalistic HTML/XML DOM parser with CSS selectors", + "abstract" : "This is an empty subclass, you wanted Mojo::DOM58", "author" : [ "Dan Book " ], "dynamic_config" : 0, - "generated_by" : "Dist::Zilla version 5.043, CPAN::Meta::Converter version 2.150005", + "generated_by" : "Dist::Zilla version 5.043, CPAN::Meta::Converter version 2.150001", "license" : [ "artistic_2" ], @@ -46,6 +46,7 @@ "Carp" : "0", "Exporter" : "0", "List::Util" : "0", + "Mojo::DOM58" : "0", "Scalar::Util" : "0", "perl" : "5.008001" } diff --git a/Makefile.PL b/Makefile.PL index 86a2247..03df229 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -7,7 +7,7 @@ use 5.008001; use ExtUtils::MakeMaker; my %WriteMakefileArgs = ( - "ABSTRACT" => "Minimalistic HTML/XML DOM parser with CSS selectors", + "ABSTRACT" => "This is an empty subclass, you wanted Mojo::DOM58", "AUTHOR" => "Dan Book ", "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => 0 @@ -20,6 +20,7 @@ my %WriteMakefileArgs = ( "Carp" => 0, "Exporter" => 0, "List::Util" => 0, + "Mojo::DOM58" => 0, "Scalar::Util" => 0 }, "TEST_REQUIRES" => { @@ -40,6 +41,7 @@ my %FallbackPrereqs = ( "Exporter" => 0, "JSON::PP" => 0, "List::Util" => 0, + "Mojo::DOM58" => 0, "Scalar::Util" => 0, "Test::More" => "0.88" ); diff --git a/README.pod b/README.pod index fc667fe..352a408 100644 --- a/README.pod +++ b/README.pod @@ -4,1121 +4,14 @@ =head1 NAME -DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors +DOM::Tiny - This is an empty subclass, you wanted Mojo::DOM58 -=head1 SYNOPSIS +=head1 IT'S DEAD, JIM. - use DOM::Tiny; +Development continues under the name L. - # Parse - my $dom = DOM::Tiny->new('

Test

123

'); - - # Find - say $dom->at('#b')->text; - say $dom->find('p')->map('text')->join("\n"); - say $dom->find('[id]')->map(attr => 'id')->join("\n"); - - # Iterate - $dom->find('p[id]')->reverse->each(sub { say $_->{id} }); - - # Loop - for my $e ($dom->find('p[id]')->each) { - say $e->{id}, ':', $e->text; - } - - # Modify - $dom->find('div p')->last->append('

456

'); - $dom->find(':not(p)')->map('strip'); - - # Render - say "$dom"; - -=head1 DESCRIPTION - -L is a minimalistic and relaxed pure-perl HTML/XML DOM parser based -on L. It supports the L -and L, and -matching based on L. It will -even try to interpret broken HTML and XML, so you should not use it for -validation. - -=head1 NODES AND ELEMENTS - -When we parse an HTML/XML fragment, it gets turned into a tree of nodes. - - - - Hello - World! - - -There are currently eight different kinds of nodes, C, C, -C, C, C, C, C and C. Elements are nodes of -the type C. - - root - |- doctype (html) - +- tag (html) - |- tag (head) - | +- tag (title) - | +- raw (Hello) - +- tag (body) - +- text (World!) - -While all node types are represented as L objects, some methods like -L and L only apply to elements. - -=head1 CASE-SENSITIVITY - -L defaults to HTML semantics, that means all tags and attribute -names are lowercased and selectors need to be lowercase as well. - - # HTML semantics - my $dom = DOM::Tiny->new('

Hi!

'); - say $dom->at('p[id]')->text; - -If an XML declaration is found, the parser will automatically switch into XML -mode and everything becomes case-sensitive. - - # XML semantics - my $dom = DOM::Tiny->new('

Hi!

'); - say $dom->at('P[ID]')->text; - -XML detection can also be disabled with the L method. - - # Force XML semantics - my $dom = DOM::Tiny->new->xml(1)->parse('

Hi!

'); - say $dom->at('P[ID]')->text; - - # Force HTML semantics - my $dom = DOM::Tiny->new->xml(0)->parse('

Hi!

'); - say $dom->at('p[id]')->text; - -=head1 SELECTORS - -L uses a CSS selector engine based on L. All CSS -selectors that make sense for a standalone parser are supported. - -=over - -=item Z<>* - -Any element. - - my $all = $dom->find('*'); - -=item E - -An element of type C. - - my $title = $dom->at('title'); - -=item E[foo] - -An C element with a C attribute. - - my $links = $dom->find('a[href]'); - -=item E[foo="bar"] - -An C element whose C attribute value is exactly equal to C. - - my $case_sensitive = $dom->find('input[type="hidden"]'); - my $case_sensitive = $dom->find('input[type=hidden]'); - -=item E[foo="bar" i] - -An C element whose C attribute value is exactly equal to any -(ASCII-range) case-permutation of C. Note that this selector is -EXPERIMENTAL and might change without warning! - - my $case_insensitive = $dom->find('input[type="hidden" i]'); - my $case_insensitive = $dom->find('input[type=hidden i]'); - my $case_insensitive = $dom->find('input[class~="foo" i]'); - -This selector is part of -L, which is still a work -in progress. - -=item E[foo~="bar"] - -An C element whose C attribute value is a list of whitespace-separated -values, one of which is exactly equal to C. - - my $foo = $dom->find('input[class~="foo"]'); - my $foo = $dom->find('input[class~=foo]'); - -=item E[foo^="bar"] - -An C element whose C attribute value begins exactly with the string -C. - - my $begins_with = $dom->find('input[name^="f"]'); - my $begins_with = $dom->find('input[name^=f]'); - -=item E[foo$="bar"] - -An C element whose C attribute value ends exactly with the string -C. - - my $ends_with = $dom->find('input[name$="o"]'); - my $ends_with = $dom->find('input[name$=o]'); - -=item E[foo*="bar"] - -An C element whose C attribute value contains the substring C. - - my $contains = $dom->find('input[name*="fo"]'); - my $contains = $dom->find('input[name*=fo]'); - -=item E:root - -An C element, root of the document. - - my $root = $dom->at(':root'); - -=item E:nth-child(n) - -An C element, the C child of its parent. - - my $third = $dom->find('div:nth-child(3)'); - my $odd = $dom->find('div:nth-child(odd)'); - my $even = $dom->find('div:nth-child(even)'); - my $top3 = $dom->find('div:nth-child(-n+3)'); - -=item E:nth-last-child(n) - -An C element, the C child of its parent, counting from the last one. - - my $third = $dom->find('div:nth-last-child(3)'); - my $odd = $dom->find('div:nth-last-child(odd)'); - my $even = $dom->find('div:nth-last-child(even)'); - my $bottom3 = $dom->find('div:nth-last-child(-n+3)'); - -=item E:nth-of-type(n) - -An C element, the C sibling of its type. - - my $third = $dom->find('div:nth-of-type(3)'); - my $odd = $dom->find('div:nth-of-type(odd)'); - my $even = $dom->find('div:nth-of-type(even)'); - my $top3 = $dom->find('div:nth-of-type(-n+3)'); - -=item E:nth-last-of-type(n) - -An C element, the C sibling of its type, counting from the last one. - - my $third = $dom->find('div:nth-last-of-type(3)'); - my $odd = $dom->find('div:nth-last-of-type(odd)'); - my $even = $dom->find('div:nth-last-of-type(even)'); - my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)'); - -=item E:first-child - -An C element, first child of its parent. - - my $first = $dom->find('div p:first-child'); - -=item E:last-child - -An C element, last child of its parent. - - my $last = $dom->find('div p:last-child'); - -=item E:first-of-type - -An C element, first sibling of its type. - - my $first = $dom->find('div p:first-of-type'); - -=item E:last-of-type - -An C element, last sibling of its type. - - my $last = $dom->find('div p:last-of-type'); - -=item E:only-child - -An C element, only child of its parent. - - my $lonely = $dom->find('div p:only-child'); - -=item E:only-of-type - -An C element, only sibling of its type. - - my $lonely = $dom->find('div p:only-of-type'); - -=item E:empty - -An C element that has no children (including text nodes). - - my $empty = $dom->find(':empty'); - -=item E:checked - -A user interface element C which is checked (for instance a radio-button or -checkbox). - - my $input = $dom->find(':checked'); - -=item E.warning - -An C element whose class is "warning". - - my $warning = $dom->find('div.warning'); - -=item E#myid - -An C element with C equal to "myid". - - my $foo = $dom->at('div#foo'); - -=item E:not(s) - -An C element that does not match simple selector C. - - my $others = $dom->find('div p:not(:first-child)'); - -=item E F - -An C element descendant of an C element. - - my $headlines = $dom->find('div h1'); - -=item E E F - -An C element child of an C element. - - my $headlines = $dom->find('html > body > div > h1'); - -=item E + F - -An C element immediately preceded by an C element. - - my $second = $dom->find('h1 + h2'); - -=item E ~ F - -An C element preceded by an C element. - - my $second = $dom->find('h1 ~ h2'); - -=item E, F, G - -Elements of type C, C and C. - - my $headlines = $dom->find('h1, h2, h3'); - -=item E[foo=bar][bar=baz] - -An C element whose attributes match all following attribute selectors. - - my $links = $dom->find('a[foo^=b][foo$=ar]'); - -=back - -=head1 OPERATORS - -L overloads the following operators. - -=head2 array - - my @nodes = @$dom; - -Alias for L. - - # "" - $dom->parse('123')->[0]; - -=head2 bool - - my $bool = !!$dom; - -Always true. - -=head2 hash - - my %attrs = %$dom; - -Alias for L. - - # "test" - $dom->parse('
Test
')->at('div')->{id}; - -=head2 stringify - - my $str = "$dom"; - -Alias for L. - -=head1 METHODS - -L implements the following methods. - -=head2 new - - my $dom = DOM::Tiny->new; - my $dom = DOM::Tiny->new('I ♥ DOM::Tiny!'); - -Construct a new scalar-based L object and L HTML/XML -fragment if necessary. - -=head2 all_text - - my $trimmed = $dom->all_text; - my $untrimmed = $dom->all_text(0); - -Extract text content from all descendant nodes of this element, smart -whitespace trimming is enabled by default. - - # "foo bar baz" - $dom->parse("
foo\n

bar

baz\n
")->at('div')->all_text; - - # "foo\nbarbaz\n" - $dom->parse("
foo\n

bar

baz\n
")->at('div')->all_text(0); - -=head2 ancestors - - my $collection = $dom->ancestors; - my $collection = $dom->ancestors('div ~ p'); - -Find all ancestor elements of this node matching the CSS selector and return a -L containing these elements as L -objects. All selectors listed in L are supported. - - # List tag names of ancestor elements - say $dom->ancestors->map('tag')->join("\n"); - -=head2 append - - $dom = $dom->append('

I ♥ DOM::Tiny!

'); - -Append HTML/XML fragment to this node (for all node types other than C). - - # "

Test

123

" - $dom->parse('

Test

') - ->at('h1')->append('

123

')->root; - - # "

Test 123

" - $dom->parse('

Test

')->at('p') - ->child_nodes->first->append(' 123')->root; - -=head2 append_content - - $dom = $dom->append_content('

I ♥ DOM::Tiny!

'); - -Append HTML/XML fragment (for C and C nodes) or raw content to this -node's content. - - # "

Test123

" - $dom->parse('

Test

') - ->at('h1')->append_content('123')->root; - - # "
" - $dom->parse('
') - ->child_nodes->first->append_content('123 ')->root; - - # "

Test123

" - $dom->parse('

Test

')->at('p')->append_content('123')->root; - -=head2 at - - my $result = $dom->at('div ~ p'); - -Find first descendant element of this element matching the CSS selector and -return it as a L object, or C if none could be found. All -selectors listed in L are supported. - - # Find first element with "svg" namespace definition - my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'}; - -=head2 attr - - my $hash = $dom->attr; - my $foo = $dom->attr('foo'); - $dom = $dom->attr({foo => 'bar'}); - $dom = $dom->attr(foo => 'bar'); - -This element's attributes. - - # Remove an attribute - delete $dom->attr->{id}; - - # Attribute without value - $dom->attr(selected => undef); - - # List id attributes - say $dom->find('*')->map(attr => 'id')->compact->join("\n"); - -=head2 child_nodes - - my $collection = $dom->child_nodes; - -Return a L containing all child nodes of this -element as L objects. - - # "

123

" - $dom->parse('

Test123

')->at('p')->child_nodes->first->remove; - - # "" - $dom->parse('123')->child_nodes->first; - - # " Test " - $dom->parse('123')->child_nodes->last->content; - -=head2 children - - my $collection = $dom->children; - my $collection = $dom->children('div ~ p'); - -Find all child elements of this element matching the CSS selector and return a -L containing these elements as L -objects. All selectors listed in L are supported. - - # Show tag name of random child element - say $dom->children->shuffle->first->tag; - -=head2 content - - my $str = $dom->content; - $dom = $dom->content('

I ♥ DOM::Tiny!

'); - -Return this node's content or replace it with HTML/XML fragment (for C -and C nodes) or raw content. - - # "Test" - $dom->parse('
Test
')->at('div')->content; - - # "

123

" - $dom->parse('

Test

')->at('h1')->content('123')->root; - - # "

123

" - $dom->parse('

Test

')->at('p')->content('123')->root; - - # "

" - $dom->parse('

Test

')->at('h1')->content('')->root; - - # " Test " - $dom->parse('
')->child_nodes->first->content; - - # "
456
" - $dom->parse('
456
') - ->at('div')->child_nodes->first->content(' 123 ')->root; - -=head2 descendant_nodes - - my $collection = $dom->descendant_nodes; - -Return a L containing all descendant nodes of -this element as L objects. - - # "

123

" - $dom->parse('

123

') - ->descendant_nodes->grep(sub { $_->type eq 'comment' }) - ->map('remove')->first; - - # "

testtest

" - $dom->parse('

123456

') - ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' }) - ->map(content => 'test')->first->root; - -=head2 find - - my $collection = $dom->find('div ~ p'); - -Find all descendant elements of this element matching the CSS selector and -return a L containing these elements as -L objects. All selectors listed in L are supported. - - # Find a specific element and extract information - my $id = $dom->find('div')->[23]{id}; - - # Extract information from multiple elements - my @headers = $dom->find('h1, h2, h3')->map('text')->each; - - # Count all the different tags - my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {}); - - # Find elements with a class that contains dots - my @divs = $dom->find('div.foo\.bar')->each; - -=head2 following - - my $collection = $dom->following; - my $collection = $dom->following('div ~ p'); - -Find all sibling elements after this node matching the CSS selector and return -a L containing these elements as L -objects. All selectors listen in L are supported. - - # List tags of sibling elements after this node - say $dom->following->map('tag')->join("\n"); - -=head2 following_nodes - - my $collection = $dom->following_nodes; - -Return a L containing all sibling nodes after -this node as L objects. - - # "C" - $dom->parse('

A

C')->at('p')->following_nodes->last->content; - -=head2 matches - - my $bool = $dom->matches('div ~ p'); - -Check if this element matches the CSS selector. All selectors listed in -L are supported. - - # True - $dom->parse('

A

')->at('p')->matches('.a'); - $dom->parse('

A

')->at('p')->matches('p[class]'); - - # False - $dom->parse('

A

')->at('p')->matches('.b'); - $dom->parse('

A

')->at('p')->matches('p[id]'); - -=head2 namespace - - my $namespace = $dom->namespace; - -Find this element's namespace, or return C if none could be found. - - # Find namespace for an element with namespace prefix - my $namespace = $dom->at('svg > svg\:circle')->namespace; - - # Find namespace for an element that may or may not have a namespace prefix - my $namespace = $dom->at('svg > circle')->namespace; - -=head2 next - - my $sibling = $dom->next; - -Return L object for next sibling element, or C if there are -no more siblings. - - # "

123

" - $dom->parse('

Test

123

')->at('h1')->next; - -=head2 next_node - - my $sibling = $dom->next_node; - -Return L object for next sibling node, or C if there are no -more siblings. - - # "456" - $dom->parse('

123456

') - ->at('b')->next_node->next_node; - - # " Test " - $dom->parse('

123456

') - ->at('b')->next_node->content; - -=head2 parent - - my $parent = $dom->parent; - -Return L object for parent of this node, or C if this node -has no parent. - - # "Test" - $dom->parse('

Test

')->at('i')->parent; - -=head2 parse - - $dom = $dom->parse('I ♥ DOM::Tiny!'); - -Parse HTML/XML fragment. - - # Parse XML - my $dom = DOM::Tiny->new->xml(1)->parse('I ♥ DOM::Tiny!'); - -=head2 preceding - - my $collection = $dom->preceding; - my $collection = $dom->preceding('div ~ p'); - -Find all sibling elements before this node matching the CSS selector and return -a L containing these elements as L -objects. All selectors listed in L are supported. - - # List tags of sibling elements before this node - say $dom->preceding->map('tag')->join("\n"); - -=head2 preceding_nodes - - my $collection = $dom->preceding_nodes; - -Return a L containing all sibling nodes -before this node as L objects. - - # "A" - $dom->parse('A

C

')->at('p')->preceding_nodes->first->content; - -=head2 prepend - - $dom = $dom->prepend('

I ♥ DOM::Tiny!

'); - -Prepend HTML/XML fragment to this node (for all node types other than C). - - # "

Test

123

" - $dom->parse('

123

') - ->at('h2')->prepend('

Test

')->root; - - # "

Test 123

" - $dom->parse('

123

') - ->at('p')->child_nodes->first->prepend('Test ')->root; - -=head2 prepend_content - - $dom = $dom->prepend_content('

I ♥ DOM::Tiny!

'); - -Prepend HTML/XML fragment (for C and C nodes) or raw content to this -node's content. - - # "

Test123

" - $dom->parse('

123

') - ->at('h2')->prepend_content('Test')->root; - - # "
" - $dom->parse('
') - ->child_nodes->first->prepend_content(' Test')->root; - - # "

123Test

" - $dom->parse('

Test

')->at('p')->prepend_content('123')->root; - -=head2 previous - - my $sibling = $dom->previous; - -Return L object for previous sibling element, or C if there -are no more siblings. - - # "

Test

" - $dom->parse('

Test

123

')->at('h2')->previous; - -=head2 previous_node - - my $sibling = $dom->previous_node; - -Return L object for previous sibling node, or C if there are -no more siblings. - - # "123" - $dom->parse('

123456

') - ->at('b')->previous_node->previous_node; - - # " Test " - $dom->parse('

123456

') - ->at('b')->previous_node->content; - -=head2 remove - - my $parent = $dom->remove; - -Remove this node and return L (for C nodes) or L. - - # "
" - $dom->parse('

Test

')->at('h1')->remove; - - # "

456

" - $dom->parse('

123456

') - ->at('p')->child_nodes->first->remove->root; - -=head2 replace - - my $parent = $dom->replace('
I ♥ DOM::Tiny!
'); - -Replace this node with HTML/XML fragment and return L (for C -nodes) or L. - - # "

123

" - $dom->parse('

Test

')->at('h1')->replace('

123

'); - - # "

123

" - $dom->parse('

Test

') - ->at('p')->child_nodes->[0]->replace('123')->root; - -=head2 root - - my $root = $dom->root; - -Return L object for C node. - -=head2 strip - - my $parent = $dom->strip; - -Remove this element while preserving its content and return L. - - # "
Test
" - $dom->parse('

Test

')->at('h1')->strip; - -=head2 tag - - my $tag = $dom->tag; - $dom = $dom->tag('div'); - -This element's tag name. - - # List tag names of child elements - say $dom->children->map('tag')->join("\n"); - -=head2 tap - - $dom = $dom->tap(sub {...}); - -Equivalent to L. - -=head2 text - - my $trimmed = $dom->text; - my $untrimmed = $dom->text(0); - -Extract text content from this element only (not including child elements), -smart whitespace trimming is enabled by default. - - # "foo baz" - $dom->parse("
foo\n

bar

baz\n
")->at('div')->text; - - # "foo\nbaz\n" - $dom->parse("
foo\n

bar

baz\n
")->at('div')->text(0); - -=head2 to_string - - my $str = $dom->to_string; - -Render this node and its content to HTML/XML. - - # "Test" - $dom->parse('
Test
')->at('div b')->to_string; - -=head2 tree - - my $tree = $dom->tree; - $dom = $dom->tree(['root']); - -Document Object Model. Note that this structure should only be used very -carefully since it is very dynamic. - -=head2 type - - my $type = $dom->type; - -This node's type, usually C, C, C, C, C, -C, C or C. - - # "cdata" - $dom->parse('')->child_nodes->first->type; - - # "comment" - $dom->parse('')->child_nodes->first->type; - - # "doctype" - $dom->parse('')->child_nodes->first->type; - - # "pi" - $dom->parse('')->child_nodes->first->type; - - # "raw" - $dom->parse('Test')->at('title')->child_nodes->first->type; - - # "root" - $dom->parse('

Test

')->type; - - # "tag" - $dom->parse('

Test

')->at('p')->type; - - # "text" - $dom->parse('

Test

')->at('p')->child_nodes->first->type; - -=head2 val - - my $value = $dom->val; - -Extract value from form element (such as C