X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=README.pod;fp=README.pod;h=2f6ea69c8e41d8472386324eeab8bf31905a6514;hb=d6512b506041e5f51cb53585efc6823ec5f3b109;hp=0000000000000000000000000000000000000000;hpb=a292be342745c12aa805e7aca023756ad824f5d4;p=catagits%2FDOM-Tiny.git
diff --git a/README.pod b/README.pod
new file mode 100644
index 0000000..2f6ea69
--- /dev/null
+++ b/README.pod
@@ -0,0 +1,696 @@
+=pod
+
+=encoding utf8
+
+=head1 NAME
+
+DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
+
+=head1 SYNOPSIS
+
+ use DOM::Tiny;
+
+ # Parse
+ my $dom = DOM::Tiny->new('
Test
123
');
+
+ # Find
+ say $dom->at('#b')->text;
+ say $dom->find('p')->map('text')->join("\n");
+ say $dom->find('[id]')->map(attr => 'id')->join("\n");
+
+ # Iterate
+ $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
+
+ # Loop
+ for my $e ($dom->find('p[id]')->each) {
+ say $e->{id}, ':', $e->text;
+ }
+
+ # Modify
+ $dom->find('div p')->last->append('
456
');
+ $dom->find(':not(p)')->map('strip');
+
+ # Render
+ say "$dom";
+
+=head1 DESCRIPTION
+
+L is a minimalistic and relaxed HTML/XML DOM parser with CSS
+selector support based on L. It will even try to interpret broken
+HTML and XML, so you should not use it for validation.
+
+=head1 NODES AND ELEMENTS
+
+When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
+
+
+
+ Hello
+ World!
+
+
+There are currently eight different kinds of nodes, C, C,
+C, C, C, C, C and C. Elements are nodes of
+the type C.
+
+ root
+ |- doctype (html)
+ +- tag (html)
+ |- tag (head)
+ | +- tag (title)
+ | +- raw (Hello)
+ +- tag (body)
+ +- text (World!)
+
+While all node types are represented as L objects, some methods like
+L"attr"> and L"namespace"> only apply to elements.
+
+=head1 CASE-SENSITIVITY
+
+L defaults to HTML semantics, that means all tags and attribute
+names are lowercased and selectors need to be lowercase as well.
+
+ # HTML semantics
+ my $dom = DOM::Tiny->new('
Hi!
');
+ say $dom->at('p[id]')->text;
+
+If XML processing instructions are found, the parser will automatically switch
+into XML mode and everything becomes case-sensitive.
+
+ # XML semantics
+ my $dom = DOM::Tiny->new('
Hi!
');
+ say $dom->at('P[ID]')->text;
+
+XML detection can also be disabled with the L"xml"> method.
+
+ # Force XML semantics
+ my $dom = DOM::Tiny->new->xml(1)->parse('
Hi!
');
+ say $dom->at('P[ID]')->text;
+
+ # Force HTML semantics
+ my $dom = DOM::Tiny->new->xml(0)->parse('
Hi!
');
+ say $dom->at('p[id]')->text;
+
+=head1 METHODS
+
+L implements the following methods.
+
+=head2 all_text
+
+ my $trimmed = $dom->all_text;
+ my $untrimmed = $dom->all_text(0);
+
+Extract text content from all descendant nodes of this element, smart
+whitespace trimming is enabled by default.
+
+ # "foo bar baz"
+ $dom->parse("
")->at('div')->all_text(0);
+
+=head2 ancestors
+
+ my $collection = $dom->ancestors;
+ my $collection = $dom->ancestors('div ~ p');
+
+Find all ancestor elements of this node matching the CSS selector and return a
+L object containing these elements as L
+objects. All selectors from L are supported.
+
+ # List tag names of ancestor elements
+ say $dom->ancestors->map('tag')->join("\n");
+
+=head2 append
+
+ $dom = $dom->append('
I ⥠DOM::Tiny!
');
+
+Append HTML/XML fragment to this node.
+
+ # "
')->at('p')->append_content('123')->root;
+
+=head2 at
+
+ my $result = $dom->at('div ~ p');
+
+Find first descendant element of this element matching the CSS selector and
+return it as a L object or return C if none could be found.
+All selectors from L are supported.
+
+ # Find first element with "svg" namespace definition
+ my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
+
+=head2 attr
+
+ my $hash = $dom->attr;
+ my $foo = $dom->attr('foo');
+ $dom = $dom->attr({foo => 'bar'});
+ $dom = $dom->attr(foo => 'bar');
+
+This element's attributes.
+
+ # Remove an attribute
+ delete $dom->attr->{id};
+
+ # Attribute without value
+ $dom->attr(selected => undef);
+
+ # List id attributes
+ say $dom->find('*')->map(attr => 'id')->compact->join("\n");
+
+=head2 child_nodes
+
+ my $collection = $dom->child_nodes;
+
+Return a L object containing all child nodes of this
+element as L objects.
+
+ # "
123
"
+ $dom->parse('
Test123
')->at('p')->child_nodes->first->remove;
+
+ # ""
+ $dom->parse('123')->child_nodes->first;
+
+ # " Test "
+ $dom->parse('123')->child_nodes->last->content;
+
+=head2 children
+
+ my $collection = $dom->children;
+ my $collection = $dom->children('div ~ p');
+
+Find all child elements of this element matching the CSS selector and return a
+L object containing these elements as L
+objects. All selectors from L are supported.
+
+ # Show tag name of random child element
+ say $dom->children->shuffle->first->tag;
+
+=head2 content
+
+ my $str = $dom->content;
+ $dom = $dom->content('
I ⥠DOM::Tiny!
');
+
+Return this node's content or replace it with HTML/XML fragment (for C
+and C nodes) or raw content.
+
+ # "Test"
+ $dom->parse('
')
+ ->at('div')->child_nodes->first->content(' 123 ')->root;
+
+=head2 descendant_nodes
+
+ my $collection = $dom->descendant_nodes;
+
+Return a L object containing all descendant nodes of
+this element as L objects.
+
+ # "
')
+ ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
+ ->map(content => 'test')->first->root;
+
+=head2 find
+
+ my $collection = $dom->find('div ~ p');
+
+Find all descendant elements of this element matching the CSS selector and
+return a L object containing these elements as
+L objects. All selectors from L are
+supported.
+
+ # Find a specific element and extract information
+ my $id = $dom->find('div')->[23]{id};
+
+ # Extract information from multiple elements
+ my @headers = $dom->find('h1, h2, h3')->map('text')->each;
+
+ # Count all the different tags
+ my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
+
+ # Find elements with a class that contains dots
+ my @divs = $dom->find('div.foo\.bar')->each;
+
+=head2 following
+
+ my $collection = $dom->following;
+ my $collection = $dom->following('div ~ p');
+
+Find all sibling elements after this node matching the CSS selector and return
+a L object containing these elements as L
+objects. All selectors from L are supported.
+
+ # List tags of sibling elements after this node
+ say $dom->following->map('tag')->join("\n");
+
+=head2 following_nodes
+
+ my $collection = $dom->following_nodes;
+
+Return a L object containing all sibling nodes after
+this node as L objects.
+
+ # "C"
+ $dom->parse('
A
C')->at('p')->following_nodes->last->content;
+
+=head2 matches
+
+ my $bool = $dom->matches('div ~ p');
+
+Check if this element matches the CSS selector. All selectors from
+L are supported.
+
+ # True
+ $dom->parse('
')->at('p')->matches('p[id]');
+
+=head2 namespace
+
+ my $namespace = $dom->namespace;
+
+Find this element's namespace or return C if none could be found.
+
+ # Find namespace for an element with namespace prefix
+ my $namespace = $dom->at('svg > svg\:circle')->namespace;
+
+ # Find namespace for an element that may or may not have a namespace prefix
+ my $namespace = $dom->at('svg > circle')->namespace;
+
+=head2 new
+
+ my $dom = DOM::Tiny->new;
+ my $dom = DOM::Tiny->new('I ⥠DOM::Tiny!');
+
+Construct a new scalar-based L object and L"parse"> HTML/XML
+fragment if necessary.
+
+=head2 next
+
+ my $sibling = $dom->next;
+
+Return L object for next sibling element or C if there are no
+more siblings.
+
+ # "
123
"
+ $dom->parse('
Test
123
')->at('h1')->next;
+
+=head2 next_node
+
+ my $sibling = $dom->next_node;
+
+Return L object for next sibling node or C if there are no
+more siblings.
+
+ # "456"
+ $dom->parse('
')
+ ->at('b')->next_node->content;
+
+=head2 parent
+
+ my $parent = $dom->parent;
+
+Return L object for parent of this node or C if this node has
+no parent.
+
+=head2 parse
+
+ $dom = $dom->parse('I ⥠DOM::Tiny!');
+
+Parse HTML/XML fragment with L.
+
+ # Parse XML
+ my $dom = DOM::Tiny->new->xml(1)->parse($xml);
+
+=head2 preceding
+
+ my $collection = $dom->preceding;
+ my $collection = $dom->preceding('div ~ p');
+
+Find all sibling elements before this node matching the CSS selector and return
+a L object containing these elements as L
+objects. All selectors from L are supported.
+
+ # List tags of sibling elements before this node
+ say $dom->preceding->map('tag')->join("\n");
+
+=head2 preceding_nodes
+
+ my $collection = $dom->preceding_nodes;
+
+Return a L object containing all sibling nodes before
+this node as L objects.
+
+ # "A"
+ $dom->parse('A
')->at('p')->prepend_content('123')->root;
+
+=head2 previous
+
+ my $sibling = $dom->previous;
+
+Return L object for previous sibling element or C if there
+are no more siblings.
+
+ # "
Test
"
+ $dom->parse('
Test
123
')->at('h2')->previous;
+
+=head2 previous_node
+
+ my $sibling = $dom->previous_node;
+
+Return L object for previous sibling node or C if there are
+no more siblings.
+
+ # "123"
+ $dom->parse('
');
+
+Replace this node with HTML/XML fragment and return L"root"> (for C
+nodes) or L"parent">.
+
+ # "
123
"
+ $dom->parse('
Test
')->at('h1')->replace('
123
');
+
+ # "
123
"
+ $dom->parse('
Test
')
+ ->at('p')->child_nodes->[0]->replace('123')->root;
+
+=head2 root
+
+ my $root = $dom->root;
+
+Return L object for C node.
+
+=head2 strip
+
+ my $parent = $dom->strip;
+
+Remove this element while preserving its content and return L"parent">.
+
+ # "
Test
"
+ $dom->parse('
Test
')->at('h1')->strip;
+
+=head2 tag
+
+ my $tag = $dom->tag;
+ $dom = $dom->tag('div');
+
+This element's tag name.
+
+ # List tag names of child elements
+ say $dom->children->map('tag')->join("\n");
+
+=head2 tap
+
+ $dom = $dom->tap(sub {...});
+
+Alias for L.
+
+=head2 text
+
+ my $trimmed = $dom->text;
+ my $untrimmed = $dom->text(0);
+
+Extract text content from this element only (not including child elements),
+smart whitespace trimming is enabled by default.
+
+ # "foo baz"
+ $dom->parse("
")->at('div')->text(0);
+
+=head2 to_string
+
+ my $str = $dom->to_string;
+
+Render this node and its content to HTML/XML.
+
+ # "Test"
+ $dom->parse('
Test
')->at('div b')->to_string;
+
+=head2 tree
+
+ my $tree = $dom->tree;
+ $dom = $dom->tree(['root']);
+
+Document Object Model. Note that this structure should only be used very
+carefully since it is very dynamic.
+
+=head2 type
+
+ my $type = $dom->type;
+
+This node's type, usually C, C, C, C, C,
+C, C or C.
+
+ # "cdata"
+ $dom->parse('')->child_nodes->first->type;
+
+ # "comment"
+ $dom->parse('')->child_nodes->first->type;
+
+ # "doctype"
+ $dom->parse('')->child_nodes->first->type;
+
+ # "pi"
+ $dom->parse('')->child_nodes->first->type;
+
+ # "raw"
+ $dom->parse('Test')->at('title')->child_nodes->first->type;
+
+ # "root"
+ $dom->parse('
Test
')->type;
+
+ # "tag"
+ $dom->parse('
Test
')->at('p')->type;
+
+ # "text"
+ $dom->parse('
Test
')->at('p')->child_nodes->first->type;
+
+=head2 val
+
+ my $value = $dom->val;
+
+Extract value from form element (such as C