X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=README.pod;h=352a4089ac00321c4549cf7a3fb6e4632e7a3335;hb=39b3bd8c0a238f7dae32daffd2166783486cf64b;hp=2f6ea69c8e41d8472386324eeab8bf31905a6514;hpb=d6512b506041e5f51cb53585efc6823ec5f3b109;p=catagits%2FDOM-Tiny.git diff --git a/README.pod b/README.pod index 2f6ea69..352a408 100644 --- a/README.pod +++ b/README.pod @@ -4,686 +4,34 @@ =head1 NAME -DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors +DOM::Tiny - This is an empty subclass, you wanted Mojo::DOM58 -=head1 SYNOPSIS +=head1 IT'S DEAD, JIM. - use DOM::Tiny; +Development continues under the name L. - # Parse - my $dom = DOM::Tiny->new('

Test

123

'); +This is an empty subclass thereof to avoid defecating on existing users +from a great height, but you should still update your code. - # Find - say $dom->at('#b')->text; - say $dom->find('p')->map('text')->join("\n"); - say $dom->find('[id]')->map(attr => 'id')->join("\n"); - - # Iterate - $dom->find('p[id]')->reverse->each(sub { say $_->{id} }); - - # Loop - for my $e ($dom->find('p[id]')->each) { - say $e->{id}, ':', $e->text; - } - - # Modify - $dom->find('div p')->last->append('

456

'); - $dom->find(':not(p)')->map('strip'); - - # Render - say "$dom"; - -=head1 DESCRIPTION - -L is a minimalistic and relaxed HTML/XML DOM parser with CSS -selector support based on L. It will even try to interpret broken -HTML and XML, so you should not use it for validation. - -=head1 NODES AND ELEMENTS - -When we parse an HTML/XML fragment, it gets turned into a tree of nodes. - - - - Hello - World! - - -There are currently eight different kinds of nodes, C, C, -C, C, C, C, C and C. Elements are nodes of -the type C. - - root - |- doctype (html) - +- tag (html) - |- tag (head) - | +- tag (title) - | +- raw (Hello) - +- tag (body) - +- text (World!) - -While all node types are represented as L objects, some methods like -L and L only apply to elements. - -=head1 CASE-SENSITIVITY - -L defaults to HTML semantics, that means all tags and attribute -names are lowercased and selectors need to be lowercase as well. - - # HTML semantics - my $dom = DOM::Tiny->new('

Hi!

'); - say $dom->at('p[id]')->text; - -If XML processing instructions are found, the parser will automatically switch -into XML mode and everything becomes case-sensitive. - - # XML semantics - my $dom = DOM::Tiny->new('

Hi!

'); - say $dom->at('P[ID]')->text; - -XML detection can also be disabled with the L method. - - # Force XML semantics - my $dom = DOM::Tiny->new->xml(1)->parse('

Hi!

'); - say $dom->at('P[ID]')->text; - - # Force HTML semantics - my $dom = DOM::Tiny->new->xml(0)->parse('

Hi!

'); - say $dom->at('p[id]')->text; - -=head1 METHODS - -L implements the following methods. - -=head2 all_text - - my $trimmed = $dom->all_text; - my $untrimmed = $dom->all_text(0); - -Extract text content from all descendant nodes of this element, smart -whitespace trimming is enabled by default. - - # "foo bar baz" - $dom->parse("

foo\n

bar

baz\n

")->at('div')->all_text; - - # "foo\nbarbaz\n" - $dom->parse("

foo\n

bar

baz\n

")->at('div')->all_text(0); - -=head2 ancestors - - my $collection = $dom->ancestors; - my $collection = $dom->ancestors('div ~ p'); - -Find all ancestor elements of this node matching the CSS selector and return a -L object containing these elements as L -objects. All selectors from L are supported. - - # List tag names of ancestor elements - say $dom->ancestors->map('tag')->join("\n"); - -=head2 append - - $dom = $dom->append('

I â¥ DOM::Tiny!

'); - -Append HTML/XML fragment to this node. - - # "

Test

123

" - $dom->parse('

Test

') - ->at('h1')->append('

123

')->root; - - # "

Test 123

" - $dom->parse('

Test

')->at('p') - ->child_nodes->first->append(' 123')->root; - -=head2 append_content - - $dom = $dom->append_content('

I â¥ DOM::Tiny!

'); - -Append HTML/XML fragment (for C and C nodes) or raw content to this -node's content. - - # "

Test123

" - $dom->parse('

Test

') - ->at('h1')->append_content('123')->root; - - # "
" - $dom->parse('
') - ->child_nodes->first->append_content('123 ')->root; - - # "

Test123

" - $dom->parse('

Test

')->at('p')->append_content('123')->root; - -=head2 at - - my $result = $dom->at('div ~ p'); - -Find first descendant element of this element matching the CSS selector and -return it as a L object or return C if none could be found. -All selectors from L are supported. - - # Find first element with "svg" namespace definition - my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'}; - -=head2 attr - - my $hash = $dom->attr; - my $foo = $dom->attr('foo'); - $dom = $dom->attr({foo => 'bar'}); - $dom = $dom->attr(foo => 'bar'); - -This element's attributes. - - # Remove an attribute - delete $dom->attr->{id}; - - # Attribute without value - $dom->attr(selected => undef); - - # List id attributes - say $dom->find('*')->map(attr => 'id')->compact->join("\n"); - -=head2 child_nodes - - my $collection = $dom->child_nodes; - -Return a L object containing all child nodes of this -element as L objects. - - # "

123

" - $dom->parse('

Test123

')->at('p')->child_nodes->first->remove; - - # "" - $dom->parse('123')->child_nodes->first; - - # " Test " - $dom->parse('123')->child_nodes->last->content; - -=head2 children - - my $collection = $dom->children; - my $collection = $dom->children('div ~ p'); - -Find all child elements of this element matching the CSS selector and return a -L object containing these elements as L -objects. All selectors from L are supported. - - # Show tag name of random child element - say $dom->children->shuffle->first->tag; - -=head2 content - - my $str = $dom->content; - $dom = $dom->content('

I â¥ DOM::Tiny!

'); - -Return this node's content or replace it with HTML/XML fragment (for C -and C nodes) or raw content. - - # "Test" - $dom->parse('

Test

')->at('div')->content; - - # "

123

" - $dom->parse('

Test

')->at('h1')->content('123')->root; - - # "

123

" - $dom->parse('

Test

')->at('p')->content('123')->root; - - # "

" - $dom->parse('

Test

')->at('h1')->content('')->root; - - # " Test " - $dom->parse('
')->child_nodes->first->content; - - # "

456

" - $dom->parse('

456

') - ->at('div')->child_nodes->first->content(' 123 ')->root; - -=head2 descendant_nodes - - my $collection = $dom->descendant_nodes; - -Return a L object containing all descendant nodes of -this element as L objects. - - # "

123

" - $dom->parse('

123

') - ->descendant_nodes->grep(sub { $_->type eq 'comment' }) - ->map('remove')->first; - - # "

testtest

" - $dom->parse('

123456

') - ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' }) - ->map(content => 'test')->first->root; - -=head2 find - - my $collection = $dom->find('div ~ p'); - -Find all descendant elements of this element matching the CSS selector and -return a L object containing these elements as -L objects. All selectors from L are -supported. - - # Find a specific element and extract information - my $id = $dom->find('div')->[23]{id}; - - # Extract information from multiple elements - my @headers = $dom->find('h1, h2, h3')->map('text')->each; - - # Count all the different tags - my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {}); - - # Find elements with a class that contains dots - my @divs = $dom->find('div.foo\.bar')->each; - -=head2 following - - my $collection = $dom->following; - my $collection = $dom->following('div ~ p'); - -Find all sibling elements after this node matching the CSS selector and return -a L object containing these elements as L -objects. All selectors from L are supported. - - # List tags of sibling elements after this node - say $dom->following->map('tag')->join("\n"); - -=head2 following_nodes - - my $collection = $dom->following_nodes; - -Return a L object containing all sibling nodes after -this node as L objects. - - # "C" - $dom->parse('

C')->at('p')->following_nodes->last->content; - -=head2 matches - - my $bool = $dom->matches('div ~ p'); - -Check if this element matches the CSS selector. All selectors from -L are supported. - - # True - $dom->parse('

')->at('p')->matches('.a'); - $dom->parse('

')->at('p')->matches('p[class]'); - - # False - $dom->parse('

')->at('p')->matches('.b'); - $dom->parse('

')->at('p')->matches('p[id]'); - -=head2 namespace - - my $namespace = $dom->namespace; - -Find this element's namespace or return C if none could be found. - - # Find namespace for an element with namespace prefix - my $namespace = $dom->at('svg > svg\:circle')->namespace; - - # Find namespace for an element that may or may not have a namespace prefix - my $namespace = $dom->at('svg > circle')->namespace; - -=head2 new - - my $dom = DOM::Tiny->new; - my $dom = DOM::Tiny->new('I â¥ DOM::Tiny!'); - -Construct a new scalar-based L object and L HTML/XML -fragment if necessary. - -=head2 next - - my $sibling = $dom->next; - -Return L object for next sibling element or C if there are no -more siblings. - - # "

123

" - $dom->parse('

Test

123

')->at('h1')->next; - -=head2 next_node - - my $sibling = $dom->next_node; - -Return L object for next sibling node or C if there are no -more siblings. - - # "456" - $dom->parse('

123456

') - ->at('b')->next_node->next_node; - - # " Test " - $dom->parse('

123456

') - ->at('b')->next_node->content; - -=head2 parent - - my $parent = $dom->parent; - -Return L object for parent of this node or C if this node has -no parent. - -=head2 parse - - $dom = $dom->parse('I â¥ DOM::Tiny!'); - -Parse HTML/XML fragment with L. - - # Parse XML - my $dom = DOM::Tiny->new->xml(1)->parse($xml); - -=head2 preceding - - my $collection = $dom->preceding; - my $collection = $dom->preceding('div ~ p'); - -Find all sibling elements before this node matching the CSS selector and return -a L object containing these elements as L -objects. All selectors from L are supported. - - # List tags of sibling elements before this node - say $dom->preceding->map('tag')->join("\n"); - -=head2 preceding_nodes - - my $collection = $dom->preceding_nodes; - -Return a L object containing all sibling nodes before -this node as L objects. - - # "A" - $dom->parse('A

')->at('p')->preceding_nodes->first->content; - -=head2 prepend - - $dom = $dom->prepend('

I â¥ DOM::Tiny!

'); - -Prepend HTML/XML fragment to this node. - - # "

Test

123

" - $dom->parse('

123

') - ->at('h2')->prepend('

Test

')->root; - - # "

Test 123

" - $dom->parse('

123

') - ->at('p')->child_nodes->first->prepend('Test ')->root; - -=head2 prepend_content - - $dom = $dom->prepend_content('

I â¥ DOM::Tiny!

'); - -Prepend HTML/XML fragment (for C and C nodes) or raw content to this -node's content. - - # "

Test123

" - $dom->parse('

123

') - ->at('h2')->prepend_content('Test')->root; - - # "
" - $dom->parse('
') - ->child_nodes->first->prepend_content(' Test')->root; - - # "

123Test

" - $dom->parse('

Test

')->at('p')->prepend_content('123')->root; - -=head2 previous - - my $sibling = $dom->previous; - -Return L object for previous sibling element or C if there -are no more siblings. - - # "

Test

" - $dom->parse('

Test

123

')->at('h2')->previous; - -=head2 previous_node - - my $sibling = $dom->previous_node; - -Return L object for previous sibling node or C if there are -no more siblings. - - # "123" - $dom->parse('

123456

') - ->at('b')->previous_node->previous_node; - - # " Test " - $dom->parse('

123456

') - ->at('b')->previous_node->content; - -=head2 remove - - my $parent = $dom->remove; - -Remove this node and return L (for C nodes) or L. - - # "

" - $dom->parse('

Test

')->at('h1')->remove; - - # "

456

" - $dom->parse('

123456

') - ->at('p')->child_nodes->first->remove->root; - -=head2 replace - - my $parent = $dom->replace('

I â¥ DOM::Tiny!

'); - -Replace this node with HTML/XML fragment and return L (for C -nodes) or L. - - # "

123

" - $dom->parse('

Test

')->at('h1')->replace('

123

'); - - # "

123

" - $dom->parse('

Test

') - ->at('p')->child_nodes->[0]->replace('123')->root; - -=head2 root - - my $root = $dom->root; - -Return L object for C node. - -=head2 strip - - my $parent = $dom->strip; - -Remove this element while preserving its content and return L. - - # "

Test

" - $dom->parse('

Test

')->at('h1')->strip; - -=head2 tag - - my $tag = $dom->tag; - $dom = $dom->tag('div'); - -This element's tag name. - - # List tag names of child elements - say $dom->children->map('tag')->join("\n"); - -=head2 tap - - $dom = $dom->tap(sub {...}); - -Alias for L. - -=head2 text - - my $trimmed = $dom->text; - my $untrimmed = $dom->text(0); - -Extract text content from this element only (not including child elements), -smart whitespace trimming is enabled by default. - - # "foo baz" - $dom->parse("

foo\n

bar

baz\n

")->at('div')->text; - - # "foo\nbaz\n" - $dom->parse("

foo\n

bar

baz\n

")->at('div')->text(0); - -=head2 to_string - - my $str = $dom->to_string; - -Render this node and its content to HTML/XML. - - # "Test" - $dom->parse('

Test

')->at('div b')->to_string; - -=head2 tree - - my $tree = $dom->tree; - $dom = $dom->tree(['root']); - -Document Object Model. Note that this structure should only be used very -carefully since it is very dynamic. - -=head2 type - - my $type = $dom->type; - -This node's type, usually C, C, C, C, C, -C, C or C. - - # "cdata" - $dom->parse('')->child_nodes->first->type; - - # "comment" - $dom->parse('')->child_nodes->first->type; - - # "doctype" - $dom->parse('')->child_nodes->first->type; - - # "pi" - $dom->parse('')->child_nodes->first->type; - - # "raw" - $dom->parse('Test')->at('title')->child_nodes->first->type; - - # "root" - $dom->parse('

Test

')->type; - - # "tag" - $dom->parse('

Test

')->at('p')->type; - - # "text" - $dom->parse('

Test

')->at('p')->child_nodes->first->type; - -=head2 val - - my $value = $dom->val; - -Extract value from form element (such as C