X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=README.pod;h=29e6d4715813af545cfa6cf3bf07a498318a4f9b;hb=eb9737f29128e8d0c85fe9d2983abc937c9f3787;hp=3c86b613d3d7da53d6e90d05f0f21ca327f3521c;hpb=5a70ee9d2295cf74db3b0e2476e906cdcaee1ff3;p=catagits%2FDOM-Tiny.git
diff --git a/README.pod b/README.pod
index 3c86b61..29e6d47 100644
--- a/README.pod
+++ b/README.pod
@@ -35,10 +35,11 @@ DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
=head1 DESCRIPTION
-L is a minimalistic and relaxed pure-perl HTML/XML DOM parser with
-support for the L and
-L based on L. It
-will even try to interpret broken HTML and XML, so you should not use it for
+L is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
+on L. It supports the L
+and L, and
+matching based on L. It will
+even try to interpret broken HTML and XML, so you should not use it for
validation.
=head1 NODES AND ELEMENTS
@@ -76,8 +77,8 @@ names are lowercased and selectors need to be lowercase as well.
my $dom = DOM::Tiny->new('
Hi!
');
say $dom->at('p[id]')->text;
-If XML processing instructions are found, the parser will automatically switch
-into XML mode and everything becomes case-sensitive.
+If an XML declaration is found, the parser will automatically switch into XML
+mode and everything becomes case-sensitive.
# XML semantics
my $dom = DOM::Tiny->new('
Hi!
');
@@ -93,6 +94,264 @@ XML detection can also be disabled with the L"xml"> method.
my $dom = DOM::Tiny->new->xml(0)->parse('
Hi!
');
say $dom->at('p[id]')->text;
+=head1 SELECTORS
+
+L uses a CSS selector engine based on L. All CSS
+selectors that make sense for a standalone parser are supported.
+
+=over
+
+=item Z<>*
+
+Any element.
+
+ my $all = $dom->find('*');
+
+=item E
+
+An element of type C.
+
+ my $title = $dom->at('title');
+
+=item E[foo]
+
+An C element with a C attribute.
+
+ my $links = $dom->find('a[href]');
+
+=item E[foo="bar"]
+
+An C element whose C attribute value is exactly equal to C.
+
+ my $case_sensitive = $dom->find('input[type="hidden"]');
+ my $case_sensitive = $dom->find('input[type=hidden]');
+
+=item E[foo="bar" i]
+
+An C element whose C attribute value is exactly equal to any
+(ASCII-range) case-permutation of C. Note that this selector is
+EXPERIMENTAL and might change without warning!
+
+ my $case_insensitive = $dom->find('input[type="hidden" i]');
+ my $case_insensitive = $dom->find('input[type=hidden i]');
+ my $case_insensitive = $dom->find('input[class~="foo" i]');
+
+This selector is part of
+L, which is still a work
+in progress.
+
+=item E[foo~="bar"]
+
+An C element whose C attribute value is a list of whitespace-separated
+values, one of which is exactly equal to C.
+
+ my $foo = $dom->find('input[class~="foo"]');
+ my $foo = $dom->find('input[class~=foo]');
+
+=item E[foo^="bar"]
+
+An C element whose C attribute value begins exactly with the string
+C.
+
+ my $begins_with = $dom->find('input[name^="f"]');
+ my $begins_with = $dom->find('input[name^=f]');
+
+=item E[foo$="bar"]
+
+An C element whose C attribute value ends exactly with the string
+C.
+
+ my $ends_with = $dom->find('input[name$="o"]');
+ my $ends_with = $dom->find('input[name$=o]');
+
+=item E[foo*="bar"]
+
+An C element whose C attribute value contains the substring C.
+
+ my $contains = $dom->find('input[name*="fo"]');
+ my $contains = $dom->find('input[name*=fo]');
+
+=item E:root
+
+An C element, root of the document.
+
+ my $root = $dom->at(':root');
+
+=item E:nth-child(n)
+
+An C element, the C child of its parent.
+
+ my $third = $dom->find('div:nth-child(3)');
+ my $odd = $dom->find('div:nth-child(odd)');
+ my $even = $dom->find('div:nth-child(even)');
+ my $top3 = $dom->find('div:nth-child(-n+3)');
+
+=item E:nth-last-child(n)
+
+An C element, the C child of its parent, counting from the last one.
+
+ my $third = $dom->find('div:nth-last-child(3)');
+ my $odd = $dom->find('div:nth-last-child(odd)');
+ my $even = $dom->find('div:nth-last-child(even)');
+ my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
+
+=item E:nth-of-type(n)
+
+An C element, the C sibling of its type.
+
+ my $third = $dom->find('div:nth-of-type(3)');
+ my $odd = $dom->find('div:nth-of-type(odd)');
+ my $even = $dom->find('div:nth-of-type(even)');
+ my $top3 = $dom->find('div:nth-of-type(-n+3)');
+
+=item E:nth-last-of-type(n)
+
+An C element, the C sibling of its type, counting from the last one.
+
+ my $third = $dom->find('div:nth-last-of-type(3)');
+ my $odd = $dom->find('div:nth-last-of-type(odd)');
+ my $even = $dom->find('div:nth-last-of-type(even)');
+ my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
+
+=item E:first-child
+
+An C element, first child of its parent.
+
+ my $first = $dom->find('div p:first-child');
+
+=item E:last-child
+
+An C element, last child of its parent.
+
+ my $last = $dom->find('div p:last-child');
+
+=item E:first-of-type
+
+An C element, first sibling of its type.
+
+ my $first = $dom->find('div p:first-of-type');
+
+=item E:last-of-type
+
+An C element, last sibling of its type.
+
+ my $last = $dom->find('div p:last-of-type');
+
+=item E:only-child
+
+An C element, only child of its parent.
+
+ my $lonely = $dom->find('div p:only-child');
+
+=item E:only-of-type
+
+An C element, only sibling of its type.
+
+ my $lonely = $dom->find('div p:only-of-type');
+
+=item E:empty
+
+An C element that has no children (including text nodes).
+
+ my $empty = $dom->find(':empty');
+
+=item E:checked
+
+A user interface element C which is checked (for instance a radio-button or
+checkbox).
+
+ my $input = $dom->find(':checked');
+
+=item E.warning
+
+An C element whose class is "warning".
+
+ my $warning = $dom->find('div.warning');
+
+=item E#myid
+
+An C element with C equal to "myid".
+
+ my $foo = $dom->at('div#foo');
+
+=item E:not(s)
+
+An C element that does not match simple selector C.
+
+ my $others = $dom->find('div p:not(:first-child)');
+
+=item E F
+
+An C element descendant of an C element.
+
+ my $headlines = $dom->find('div h1');
+
+=item E E F
+
+An C element child of an C element.
+
+ my $headlines = $dom->find('html > body > div > h1');
+
+=item E + F
+
+An C element immediately preceded by an C element.
+
+ my $second = $dom->find('h1 + h2');
+
+=item E ~ F
+
+An C element preceded by an C element.
+
+ my $second = $dom->find('h1 ~ h2');
+
+=item E, F, G
+
+Elements of type C, C and C.
+
+ my $headlines = $dom->find('h1, h2, h3');
+
+=item E[foo=bar][bar=baz]
+
+An C element whose attributes match all following attribute selectors.
+
+ my $links = $dom->find('a[foo^=b][foo$=ar]');
+
+=back
+
+=head1 OPERATORS
+
+L overloads the following operators.
+
+=head2 array
+
+ my @nodes = @$dom;
+
+Alias for L"child_nodes">.
+
+ # ""
+ $dom->parse('123')->[0];
+
+=head2 bool
+
+ my $bool = !!$dom;
+
+Always true.
+
+=head2 hash
+
+ my %attrs = %$dom;
+
+Alias for L"attr">.
+
+ # "test"
+ $dom->parse('