ported fixes from Mojolicious
[catagits/DOM-Tiny.git] / README.pod
CommitLineData
d6512b50 1=pod
2
3=encoding utf8
4
5=head1 NAME
6
7DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
8
9=head1 SYNOPSIS
10
11 use DOM::Tiny;
12
13 # Parse
14 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
15
16 # Find
17 say $dom->at('#b')->text;
18 say $dom->find('p')->map('text')->join("\n");
19 say $dom->find('[id]')->map(attr => 'id')->join("\n");
20
21 # Iterate
22 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
23
24 # Loop
25 for my $e ($dom->find('p[id]')->each) {
26 say $e->{id}, ':', $e->text;
27 }
28
29 # Modify
30 $dom->find('div p')->last->append('<p id="c">456</p>');
31 $dom->find(':not(p)')->map('strip');
32
33 # Render
34 say "$dom";
35
36=head1 DESCRIPTION
37
9a5f1e3f 38L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
39on L<Mojo::DOM>. It supports the L<HTML Living Standard|https://html.spec.whatwg.org/>
40and L<Extensible Markup Language (XML) 1.0|http://www.w3.org/TR/xml/>, and
41matching based on L<CSS3 selectors|http://www.w3.org/TR/selectors/>. It will
42even try to interpret broken HTML and XML, so you should not use it for
5a70ee9d 43validation.
d6512b50 44
45=head1 NODES AND ELEMENTS
46
47When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
48
49 <!DOCTYPE html>
50 <html>
51 <head><title>Hello</title></head>
52 <body>World!</body>
53 </html>
54
55There are currently eight different kinds of nodes, C<cdata>, C<comment>,
56C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
57the type C<tag>.
58
59 root
60 |- doctype (html)
61 +- tag (html)
62 |- tag (head)
63 | +- tag (title)
64 | +- raw (Hello)
65 +- tag (body)
66 +- text (World!)
67
68While all node types are represented as L<DOM::Tiny> objects, some methods like
69L</"attr"> and L</"namespace"> only apply to elements.
70
71=head1 CASE-SENSITIVITY
72
73L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
74names are lowercased and selectors need to be lowercase as well.
75
76 # HTML semantics
77 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
78 say $dom->at('p[id]')->text;
79
eb9737f2 80If an XML declaration is found, the parser will automatically switch into XML
81mode and everything becomes case-sensitive.
d6512b50 82
83 # XML semantics
84 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
85 say $dom->at('P[ID]')->text;
86
87XML detection can also be disabled with the L</"xml"> method.
88
89 # Force XML semantics
90 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
91 say $dom->at('P[ID]')->text;
92
93 # Force HTML semantics
94 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
95 say $dom->at('p[id]')->text;
96
63873d67 97=head1 SELECTORS
98
99L<DOM::Tiny> uses a CSS selector engine based on L<Mojo::DOM::CSS>. All CSS
100selectors that make sense for a standalone parser are supported.
101
03eb5521 102=over
103
104=item Z<>*
63873d67 105
106Any element.
107
108 my $all = $dom->find('*');
109
03eb5521 110=item E
63873d67 111
112An element of type C<E>.
113
114 my $title = $dom->at('title');
115
03eb5521 116=item E[foo]
63873d67 117
118An C<E> element with a C<foo> attribute.
119
120 my $links = $dom->find('a[href]');
121
03eb5521 122=item E[foo="bar"]
63873d67 123
124An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
125
126 my $case_sensitive = $dom->find('input[type="hidden"]');
127 my $case_sensitive = $dom->find('input[type=hidden]');
128
03eb5521 129=item E[foo="bar" i]
63873d67 130
131An C<E> element whose C<foo> attribute value is exactly equal to any
132(ASCII-range) case-permutation of C<bar>. Note that this selector is
133EXPERIMENTAL and might change without warning!
134
135 my $case_insensitive = $dom->find('input[type="hidden" i]');
136 my $case_insensitive = $dom->find('input[type=hidden i]');
137 my $case_insensitive = $dom->find('input[class~="foo" i]');
138
139This selector is part of
140L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
141in progress.
142
03eb5521 143=item E[foo~="bar"]
63873d67 144
145An C<E> element whose C<foo> attribute value is a list of whitespace-separated
146values, one of which is exactly equal to C<bar>.
147
148 my $foo = $dom->find('input[class~="foo"]');
149 my $foo = $dom->find('input[class~=foo]');
150
03eb5521 151=item E[foo^="bar"]
63873d67 152
153An C<E> element whose C<foo> attribute value begins exactly with the string
154C<bar>.
155
156 my $begins_with = $dom->find('input[name^="f"]');
157 my $begins_with = $dom->find('input[name^=f]');
158
03eb5521 159=item E[foo$="bar"]
63873d67 160
161An C<E> element whose C<foo> attribute value ends exactly with the string
162C<bar>.
163
164 my $ends_with = $dom->find('input[name$="o"]');
165 my $ends_with = $dom->find('input[name$=o]');
166
03eb5521 167=item E[foo*="bar"]
63873d67 168
169An C<E> element whose C<foo> attribute value contains the substring C<bar>.
170
171 my $contains = $dom->find('input[name*="fo"]');
172 my $contains = $dom->find('input[name*=fo]');
173
03eb5521 174=item E:root
63873d67 175
176An C<E> element, root of the document.
177
178 my $root = $dom->at(':root');
179
03eb5521 180=item E:nth-child(n)
63873d67 181
182An C<E> element, the C<n-th> child of its parent.
183
184 my $third = $dom->find('div:nth-child(3)');
185 my $odd = $dom->find('div:nth-child(odd)');
186 my $even = $dom->find('div:nth-child(even)');
187 my $top3 = $dom->find('div:nth-child(-n+3)');
188
03eb5521 189=item E:nth-last-child(n)
63873d67 190
191An C<E> element, the C<n-th> child of its parent, counting from the last one.
192
193 my $third = $dom->find('div:nth-last-child(3)');
194 my $odd = $dom->find('div:nth-last-child(odd)');
195 my $even = $dom->find('div:nth-last-child(even)');
196 my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
197
03eb5521 198=item E:nth-of-type(n)
63873d67 199
200An C<E> element, the C<n-th> sibling of its type.
201
202 my $third = $dom->find('div:nth-of-type(3)');
203 my $odd = $dom->find('div:nth-of-type(odd)');
204 my $even = $dom->find('div:nth-of-type(even)');
205 my $top3 = $dom->find('div:nth-of-type(-n+3)');
206
03eb5521 207=item E:nth-last-of-type(n)
63873d67 208
209An C<E> element, the C<n-th> sibling of its type, counting from the last one.
210
211 my $third = $dom->find('div:nth-last-of-type(3)');
212 my $odd = $dom->find('div:nth-last-of-type(odd)');
213 my $even = $dom->find('div:nth-last-of-type(even)');
214 my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
215
03eb5521 216=item E:first-child
63873d67 217
218An C<E> element, first child of its parent.
219
220 my $first = $dom->find('div p:first-child');
221
03eb5521 222=item E:last-child
63873d67 223
224An C<E> element, last child of its parent.
225
226 my $last = $dom->find('div p:last-child');
227
03eb5521 228=item E:first-of-type
63873d67 229
230An C<E> element, first sibling of its type.
231
232 my $first = $dom->find('div p:first-of-type');
233
03eb5521 234=item E:last-of-type
63873d67 235
236An C<E> element, last sibling of its type.
237
238 my $last = $dom->find('div p:last-of-type');
239
03eb5521 240=item E:only-child
63873d67 241
242An C<E> element, only child of its parent.
243
244 my $lonely = $dom->find('div p:only-child');
245
03eb5521 246=item E:only-of-type
63873d67 247
248An C<E> element, only sibling of its type.
249
250 my $lonely = $dom->find('div p:only-of-type');
251
03eb5521 252=item E:empty
63873d67 253
254An C<E> element that has no children (including text nodes).
255
256 my $empty = $dom->find(':empty');
257
03eb5521 258=item E:checked
63873d67 259
260A user interface element C<E> which is checked (for instance a radio-button or
261checkbox).
262
263 my $input = $dom->find(':checked');
264
03eb5521 265=item E.warning
63873d67 266
267An C<E> element whose class is "warning".
268
269 my $warning = $dom->find('div.warning');
270
03eb5521 271=item E#myid
63873d67 272
273An C<E> element with C<ID> equal to "myid".
274
275 my $foo = $dom->at('div#foo');
276
03eb5521 277=item E:not(s)
63873d67 278
279An C<E> element that does not match simple selector C<s>.
280
281 my $others = $dom->find('div p:not(:first-child)');
282
03eb5521 283=item E F
63873d67 284
285An C<F> element descendant of an C<E> element.
286
287 my $headlines = $dom->find('div h1');
288
03eb5521 289=item E E<gt> F
63873d67 290
291An C<F> element child of an C<E> element.
292
293 my $headlines = $dom->find('html > body > div > h1');
294
03eb5521 295=item E + F
63873d67 296
297An C<F> element immediately preceded by an C<E> element.
298
299 my $second = $dom->find('h1 + h2');
300
03eb5521 301=item E ~ F
63873d67 302
303An C<F> element preceded by an C<E> element.
304
305 my $second = $dom->find('h1 ~ h2');
306
03eb5521 307=item E, F, G
63873d67 308
309Elements of type C<E>, C<F> and C<G>.
310
311 my $headlines = $dom->find('h1, h2, h3');
312
03eb5521 313=item E[foo=bar][bar=baz]
63873d67 314
315An C<E> element whose attributes match all following attribute selectors.
316
317 my $links = $dom->find('a[foo^=b][foo$=ar]');
318
03eb5521 319=back
320
9a5f1e3f 321=head1 OPERATORS
322
323L<DOM::Tiny> overloads the following operators.
324
325=head2 array
326
327 my @nodes = @$dom;
328
329Alias for L</"child_nodes">.
330
331 # "<!-- Test -->"
332 $dom->parse('<!-- Test --><b>123</b>')->[0];
333
334=head2 bool
335
336 my $bool = !!$dom;
337
338Always true.
339
340=head2 hash
341
342 my %attrs = %$dom;
343
344Alias for L</"attr">.
345
346 # "test"
347 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
348
349=head2 stringify
350
351 my $str = "$dom";
352
353Alias for L</"to_string">.
354
d6512b50 355=head1 METHODS
356
357L<DOM::Tiny> implements the following methods.
358
3793c28f 359=head2 new
360
361 my $dom = DOM::Tiny->new;
362 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
363
364Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
365fragment if necessary.
366
d6512b50 367=head2 all_text
368
369 my $trimmed = $dom->all_text;
370 my $untrimmed = $dom->all_text(0);
371
372Extract text content from all descendant nodes of this element, smart
373whitespace trimming is enabled by default.
374
375 # "foo bar baz"
376 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
377
378 # "foo\nbarbaz\n"
379 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
380
381=head2 ancestors
382
383 my $collection = $dom->ancestors;
384 my $collection = $dom->ancestors('div ~ p');
385
386Find all ancestor elements of this node matching the CSS selector and return a
8563f527 387L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 388objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 389
390 # List tag names of ancestor elements
391 say $dom->ancestors->map('tag')->join("\n");
392
393=head2 append
394
395 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
396
c7cad649 397Append HTML/XML fragment to this node (for all node types other than C<root>).
d6512b50 398
399 # "<div><h1>Test</h1><h2>123</h2></div>"
400 $dom->parse('<div><h1>Test</h1></div>')
401 ->at('h1')->append('<h2>123</h2>')->root;
402
403 # "<p>Test 123</p>"
404 $dom->parse('<p>Test</p>')->at('p')
405 ->child_nodes->first->append(' 123')->root;
406
407=head2 append_content
408
409 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
410
411Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
412node's content.
413
414 # "<div><h1>Test123</h1></div>"
415 $dom->parse('<div><h1>Test</h1></div>')
416 ->at('h1')->append_content('123')->root;
417
418 # "<!-- Test 123 --><br>"
419 $dom->parse('<!-- Test --><br>')
420 ->child_nodes->first->append_content('123 ')->root;
421
422 # "<p>Test<i>123</i></p>"
423 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
424
425=head2 at
426
427 my $result = $dom->at('div ~ p');
428
429Find first descendant element of this element matching the CSS selector and
eb9737f2 430return it as a L<DOM::Tiny> object, or C<undef> if none could be found. All
431selectors listed in L</"SELECTORS"> are supported.
d6512b50 432
433 # Find first element with "svg" namespace definition
434 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
435
436=head2 attr
437
438 my $hash = $dom->attr;
439 my $foo = $dom->attr('foo');
440 $dom = $dom->attr({foo => 'bar'});
441 $dom = $dom->attr(foo => 'bar');
442
443This element's attributes.
444
445 # Remove an attribute
446 delete $dom->attr->{id};
447
448 # Attribute without value
449 $dom->attr(selected => undef);
450
451 # List id attributes
452 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
453
454=head2 child_nodes
455
456 my $collection = $dom->child_nodes;
457
8563f527 458Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 459element as L<DOM::Tiny> objects.
460
461 # "<p><b>123</b></p>"
462 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
463
464 # "<!DOCTYPE html>"
465 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
466
467 # " Test "
468 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
469
470=head2 children
471
472 my $collection = $dom->children;
473 my $collection = $dom->children('div ~ p');
474
475Find all child elements of this element matching the CSS selector and return a
8563f527 476L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 477objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 478
479 # Show tag name of random child element
480 say $dom->children->shuffle->first->tag;
481
482=head2 content
483
484 my $str = $dom->content;
485 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
486
487Return this node's content or replace it with HTML/XML fragment (for C<root>
488and C<tag> nodes) or raw content.
489
490 # "<b>Test</b>"
491 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
492
493 # "<div><h1>123</h1></div>"
494 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
495
496 # "<p><i>123</i></p>"
497 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
498
499 # "<div><h1></h1></div>"
500 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
501
502 # " Test "
503 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
504
505 # "<div><!-- 123 -->456</div>"
506 $dom->parse('<div><!-- Test -->456</div>')
507 ->at('div')->child_nodes->first->content(' 123 ')->root;
508
509=head2 descendant_nodes
510
511 my $collection = $dom->descendant_nodes;
512
8563f527 513Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 514this element as L<DOM::Tiny> objects.
515
516 # "<p><b>123</b></p>"
517 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
518 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
519 ->map('remove')->first;
520
521 # "<p><b>test</b>test</p>"
522 $dom->parse('<p><b>123</b>456</p>')
523 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
524 ->map(content => 'test')->first->root;
525
526=head2 find
527
528 my $collection = $dom->find('div ~ p');
529
530Find all descendant elements of this element matching the CSS selector and
8563f527 531return a L<collection|/"COLLECTION METHODS"> containing these elements as
9a5f1e3f 532L<DOM::Tiny> objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 533
534 # Find a specific element and extract information
535 my $id = $dom->find('div')->[23]{id};
536
537 # Extract information from multiple elements
538 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
539
540 # Count all the different tags
541 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
542
543 # Find elements with a class that contains dots
544 my @divs = $dom->find('div.foo\.bar')->each;
545
546=head2 following
547
548 my $collection = $dom->following;
549 my $collection = $dom->following('div ~ p');
550
551Find all sibling elements after this node matching the CSS selector and return
8563f527 552a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 553objects. All selectors listen in L</"SELECTORS"> are supported.
d6512b50 554
555 # List tags of sibling elements after this node
556 say $dom->following->map('tag')->join("\n");
557
558=head2 following_nodes
559
560 my $collection = $dom->following_nodes;
561
8563f527 562Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 563this node as L<DOM::Tiny> objects.
564
565 # "C"
566 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
567
568=head2 matches
569
570 my $bool = $dom->matches('div ~ p');
571
9a5f1e3f 572Check if this element matches the CSS selector. All selectors listed in
573L</"SELECTORS"> are supported.
d6512b50 574
575 # True
576 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
577 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
578
579 # False
580 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
581 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
582
583=head2 namespace
584
585 my $namespace = $dom->namespace;
586
eb9737f2 587Find this element's namespace, or return C<undef> if none could be found.
d6512b50 588
589 # Find namespace for an element with namespace prefix
590 my $namespace = $dom->at('svg > svg\:circle')->namespace;
591
592 # Find namespace for an element that may or may not have a namespace prefix
593 my $namespace = $dom->at('svg > circle')->namespace;
594
d6512b50 595=head2 next
596
597 my $sibling = $dom->next;
598
eb9737f2 599Return L<DOM::Tiny> object for next sibling element, or C<undef> if there are
600no more siblings.
d6512b50 601
602 # "<h2>123</h2>"
603 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
604
605=head2 next_node
606
607 my $sibling = $dom->next_node;
608
eb9737f2 609Return L<DOM::Tiny> object for next sibling node, or C<undef> if there are no
d6512b50 610more siblings.
611
612 # "456"
613 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
614 ->at('b')->next_node->next_node;
615
616 # " Test "
617 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
618 ->at('b')->next_node->content;
619
620=head2 parent
621
622 my $parent = $dom->parent;
623
eb9737f2 624Return L<DOM::Tiny> object for parent of this node, or C<undef> if this node
625has no parent.
626
627 # "<b><i>Test</i></b>"
628 $dom->parse('<p><b><i>Test</i></b></p>')->at('i')->parent;
d6512b50 629
630=head2 parse
631
632 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
633
9a5f1e3f 634Parse HTML/XML fragment.
d6512b50 635
636 # Parse XML
eb9737f2 637 my $dom = DOM::Tiny->new->xml(1)->parse('<foo>I ♥ DOM::Tiny!</foo>');
d6512b50 638
639=head2 preceding
640
641 my $collection = $dom->preceding;
642 my $collection = $dom->preceding('div ~ p');
643
644Find all sibling elements before this node matching the CSS selector and return
8563f527 645a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 646objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 647
648 # List tags of sibling elements before this node
649 say $dom->preceding->map('tag')->join("\n");
650
651=head2 preceding_nodes
652
653 my $collection = $dom->preceding_nodes;
654
8563f527 655Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
656before this node as L<DOM::Tiny> objects.
d6512b50 657
658 # "A"
659 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
660
661=head2 prepend
662
663 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
664
c7cad649 665Prepend HTML/XML fragment to this node (for all node types other than C<root>).
d6512b50 666
667 # "<div><h1>Test</h1><h2>123</h2></div>"
668 $dom->parse('<div><h2>123</h2></div>')
669 ->at('h2')->prepend('<h1>Test</h1>')->root;
670
671 # "<p>Test 123</p>"
672 $dom->parse('<p>123</p>')
673 ->at('p')->child_nodes->first->prepend('Test ')->root;
674
675=head2 prepend_content
676
677 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
678
679Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
680node's content.
681
682 # "<div><h2>Test123</h2></div>"
683 $dom->parse('<div><h2>123</h2></div>')
684 ->at('h2')->prepend_content('Test')->root;
685
686 # "<!-- Test 123 --><br>"
687 $dom->parse('<!-- 123 --><br>')
688 ->child_nodes->first->prepend_content(' Test')->root;
689
690 # "<p><i>123</i>Test</p>"
691 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
692
693=head2 previous
694
695 my $sibling = $dom->previous;
696
eb9737f2 697Return L<DOM::Tiny> object for previous sibling element, or C<undef> if there
d6512b50 698are no more siblings.
699
700 # "<h1>Test</h1>"
701 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
702
703=head2 previous_node
704
705 my $sibling = $dom->previous_node;
706
eb9737f2 707Return L<DOM::Tiny> object for previous sibling node, or C<undef> if there are
d6512b50 708no more siblings.
709
710 # "123"
711 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
712 ->at('b')->previous_node->previous_node;
713
714 # " Test "
715 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
716 ->at('b')->previous_node->content;
717
718=head2 remove
719
720 my $parent = $dom->remove;
721
722Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
723
724 # "<div></div>"
725 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
726
727 # "<p><b>456</b></p>"
728 $dom->parse('<p>123<b>456</b></p>')
729 ->at('p')->child_nodes->first->remove->root;
730
731=head2 replace
732
733 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
734
735Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
736nodes) or L</"parent">.
737
738 # "<div><h2>123</h2></div>"
739 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
740
741 # "<p><b>123</b></p>"
742 $dom->parse('<p>Test</p>')
743 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
744
745=head2 root
746
747 my $root = $dom->root;
748
749Return L<DOM::Tiny> object for C<root> node.
750
751=head2 strip
752
753 my $parent = $dom->strip;
754
755Remove this element while preserving its content and return L</"parent">.
756
757 # "<div>Test</div>"
758 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
759
760=head2 tag
761
762 my $tag = $dom->tag;
763 $dom = $dom->tag('div');
764
765This element's tag name.
766
767 # List tag names of child elements
768 say $dom->children->map('tag')->join("\n");
769
770=head2 tap
771
772 $dom = $dom->tap(sub {...});
773
e99ef07d 774Equivalent to L<Mojo::Base/"tap">.
d6512b50 775
776=head2 text
777
778 my $trimmed = $dom->text;
779 my $untrimmed = $dom->text(0);
780
781Extract text content from this element only (not including child elements),
782smart whitespace trimming is enabled by default.
783
784 # "foo baz"
785 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
786
787 # "foo\nbaz\n"
788 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
789
790=head2 to_string
791
792 my $str = $dom->to_string;
793
794Render this node and its content to HTML/XML.
795
796 # "<b>Test</b>"
797 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
798
799=head2 tree
800
801 my $tree = $dom->tree;
802 $dom = $dom->tree(['root']);
803
804Document Object Model. Note that this structure should only be used very
805carefully since it is very dynamic.
806
807=head2 type
808
809 my $type = $dom->type;
810
811This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
812C<root>, C<tag> or C<text>.
813
814 # "cdata"
815 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
816
817 # "comment"
818 $dom->parse('<!-- Test -->')->child_nodes->first->type;
819
820 # "doctype"
821 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
822
823 # "pi"
824 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
825
826 # "raw"
827 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
828
829 # "root"
830 $dom->parse('<p>Test</p>')->type;
831
832 # "tag"
833 $dom->parse('<p>Test</p>')->at('p')->type;
834
835 # "text"
836 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
837
838=head2 val
839
840 my $value = $dom->val;
841
842Extract value from form element (such as C<button>, C<input>, C<option>,
eb9737f2 843C<select> and C<textarea>), or return C<undef> if this element has no value. In
d6512b50 844the case of C<select> with C<multiple> attribute, find C<option> elements with
eb9737f2 845C<selected> attribute and return an array reference with all values, or
846C<undef> if none could be found.
d6512b50 847
848 # "a"
4b5e2513 849 $dom->parse('<input name=test value=a>')->at('input')->val;
d6512b50 850
851 # "b"
852 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
853
854 # "c"
855 $dom->parse('<option value="c">Test</option>')->at('option')->val;
856
857 # "d"
858 $dom->parse('<select><option selected>d</option></select>')
859 ->at('select')->val;
860
861 # "e"
862 $dom->parse('<select multiple><option selected>e</option></select>')
863 ->at('select')->val->[0];
864
4b5e2513 865 # "on"
866 $dom->parse('<input name=test type=checkbox>')->at('input')->val;
867
d6512b50 868=head2 wrap
869
870 $dom = $dom->wrap('<div></div>');
871
c7cad649 872Wrap HTML/XML fragment around this node (for all node types other than C<root>),
873placing it as the last child of the first innermost element.
d6512b50 874
875 # "<p>123<b>Test</b></p>"
876 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
877
878 # "<div><p><b>Test</b></p>123</div>"
879 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
880
881 # "<p><b>Test</b></p><p>123</p>"
882 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
883
884 # "<p><b>Test</b></p>"
885 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
886
887=head2 wrap_content
888
889 $dom = $dom->wrap_content('<div></div>');
890
c7cad649 891Wrap HTML/XML fragment around this node's content (for C<root> and C<tag>
892nodes), placing it as the last children of the first innermost element.
d6512b50 893
894 # "<p><b>123Test</b></p>"
895 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
896
897 # "<p><b>Test</b></p><p>123</p>"
898 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
899
900=head2 xml
901
902 my $bool = $dom->xml;
903 $dom = $dom->xml($bool);
904
905Disable HTML semantics in parser and activate case-sensitivity, defaults to
eb9737f2 906auto detection based on XML declarations.
d6512b50 907
78ba4051 908=head1 COLLECTION METHODS
909
9a5f1e3f 910Some L<DOM::Tiny> methods return an array-based collection object based on
911L<Mojo::Collection>, which can either be accessed directly as an array
912reference, or with the following methods.
78ba4051 913
914 # Chain methods
915 $collection->map(sub { ucfirst })->shuffle->each(sub {
916 my ($word, $num) = @_;
917 say "$num: $word";
918 });
919
920 # Access array directly to manipulate collection
921 $collection->[23] += 100;
922 say for @$collection;
923
924=head2 compact
925
926 my $new = $collection->compact;
927
928Create a new collection with all elements that are defined and not an empty
929string.
930
931 # $collection contains (0, 1, undef, 2, '', 3)
932 $collection->compact->join(', '); # "0, 1, 2, 3"
933
934=head2 each
935
936 my @elements = $collection->each;
937 $collection = $collection->each(sub {...});
938
939Evaluate callback for each element in collection or return all elements as a
940list if none has been provided. The element will be the first argument passed
941to the callback and is also available as C<$_>.
942
943 # Make a numbered list
944 $collection->each(sub {
945 my ($e, $num) = @_;
946 say "$num: $e";
947 });
948
949=head2 first
950
951 my $first = $collection->first;
952 my $first = $collection->first(qr/foo/);
953 my $first = $collection->first(sub {...});
954 my $first = $collection->first($method);
955 my $first = $collection->first($method, @args);
956
957Evaluate regular expression/callback for, or call method on, each element in
958collection and return the first one that matched the regular expression, or for
959which the callback/method returned true. The element will be the first argument
960passed to the callback and is also available as C<$_>.
961
962 # Longer version
963 my $first = $collection->first(sub { $_->$method(@args) });
964
c7bc4d31 965 # Find first value that contains the word "tiny"
966 my $interesting = $collection->first(qr/tiny/i);
78ba4051 967
968 # Find first value that is greater than 5
969 my $greater = $collection->first(sub { $_ > 5 });
970
971=head2 flatten
972
973 my $new = $collection->flatten;
974
975Flatten nested collections/arrays recursively and create a new collection with
976all elements.
977
978 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
979 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
980
981=head2 grep
982
983 my $new = $collection->grep(qr/foo/);
984 my $new = $collection->grep(sub {...});
985 my $new = $collection->grep($method);
986 my $new = $collection->grep($method, @args);
987
988Evaluate regular expression/callback for, or call method on, each element in
989collection and create a new collection with all elements that matched the
990regular expression, or for which the callback/method returned true. The element
991will be the first argument passed to the callback and is also available as
992C<$_>.
993
994 # Longer version
995 my $new = $collection->grep(sub { $_->$method(@args) });
996
c7bc4d31 997 # Find all values that contain the word "tiny"
998 my $interesting = $collection->grep(qr/tiny/i);
78ba4051 999
1000 # Find all values that are greater than 5
1001 my $greater = $collection->grep(sub { $_ > 5 });
1002
1003=head2 join
1004
1005 my $stream = $collection->join;
1006 my $stream = $collection->join("\n");
1007
1008Turn collection into string.
1009
1010 # Join all values with commas
1011 $collection->join(', ');
1012
1013=head2 last
1014
1015 my $last = $collection->last;
1016
1017Return the last element in collection.
1018
1019=head2 map
1020
1021 my $new = $collection->map(sub {...});
1022 my $new = $collection->map($method);
1023 my $new = $collection->map($method, @args);
1024
1025Evaluate callback for, or call method on, each element in collection and create
1026a new collection from the results. The element will be the first argument
1027passed to the callback and is also available as C<$_>.
1028
1029 # Longer version
1030 my $new = $collection->map(sub { $_->$method(@args) });
1031
c7bc4d31 1032 # Append the word "tiny" to all values
1033 my $domified = $collection->map(sub { $_ . 'tiny' });
78ba4051 1034
1035=head2 reduce
1036
1037 my $result = $collection->reduce(sub {...});
1038 my $result = $collection->reduce(sub {...}, $initial);
1039
1040Reduce elements in collection with callback, the first element will be used as
1041initial value if none has been provided.
1042
1043 # Calculate the sum of all values
1044 my $sum = $collection->reduce(sub { $a + $b });
1045
1046 # Count how often each value occurs in collection
1047 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
1048
1049=head2 reverse
1050
1051 my $new = $collection->reverse;
1052
1053Create a new collection with all elements in reverse order.
1054
1055=head2 slice
1056
1057 my $new = $collection->slice(4 .. 7);
1058
1059Create a new collection with all selected elements.
1060
1061 # $collection contains ('A', 'B', 'C', 'D', 'E')
1062 $collection->slice(1, 2, 4)->join(' '); # "B C E"
1063
1064=head2 shuffle
1065
1066 my $new = $collection->shuffle;
1067
1068Create a new collection with all elements in random order.
1069
1070=head2 size
1071
1072 my $size = $collection->size;
1073
1074Number of elements in collection.
1075
1076=head2 sort
1077
1078 my $new = $collection->sort;
1079 my $new = $collection->sort(sub {...});
1080
1081Sort elements based on return value of callback and create a new collection
1082from the results.
1083
1084 # Sort values case-insensitive
1085 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
1086
1087=head2 tap
1088
1089 $collection = $collection->tap(sub {...});
1090
1091Equivalent to L<Mojo::Base/"tap">.
1092
1093=head2 to_array
1094
1095 my $array = $collection->to_array;
1096
1097Turn collection into array reference.
1098
1099=head2 uniq
1100
1101 my $new = $collection->uniq;
1102 my $new = $collection->uniq(sub {...});
1103 my $new = $collection->uniq($method);
1104 my $new = $collection->uniq($method, @args);
1105
1106Create a new collection without duplicate elements, using the string
1107representation of either the elements or the return value of the
1108callback/method.
1109
1110 # Longer version
1111 my $new = $collection->uniq(sub { $_->$method(@args) });
1112
1113 # $collection contains ('foo', 'bar', 'bar', 'baz')
1114 $collection->uniq->join(' '); # "foo bar baz"
1115
1116 # $collection contains ([1, 2], [2, 1], [3, 2])
1117 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
1118
d6512b50 1119=head1 BUGS
1120
1121Report any issues on the public bugtracker.
1122
1123=head1 AUTHOR
1124
1125Dan Book <dbook@cpan.org>
1126
9ba11a91 1127Code and tests adapted from L<Mojo::DOM>, a lightweight DOM parser by the L<Mojolicious> team.
7218d584 1128
2d9f5165 1129=head1 CONTRIBUTORS
1130
1131=over
1132
1133=item Matt S Trout (mst)
1134
1135=back
1136
d6512b50 1137=head1 COPYRIGHT AND LICENSE
1138
9ba11a91 1139Copyright (c) 2008-2015 Sebastian Riedel.
1140
1141Copyright (c) 2015 L</"AUTHOR"> and L</"CONTRIBUTORS"> for adaptation to standalone format.
d6512b50 1142
1143This is free software, licensed under:
1144
1145 The Artistic License 2.0 (GPL Compatible)
1146
1147=head1 SEE ALSO
1148
31877452 1149L<Mojo::DOM>, L<HTML::TreeBuilder>, L<XML::LibXML>, L<XML::Twig>, L<XML::Smart>
d6512b50 1150
9a5f1e3f 1151=for Pod::Coverage TO_JSON
1152
d6512b50 1153=cut