rip DOM::Tiny code out and replace with a use base of Mojo::DOM58
[catagits/DOM-Tiny.git] / lib / DOM / Tiny.pm
CommitLineData
a292be34 1package DOM::Tiny;
2
3use strict;
4use warnings;
c0f0ce89 5use base qw(Mojo::DOM58);
d6512b50 6
a292be34 71;
8
d6512b50 9=encoding utf8
10
a292be34 11=head1 NAME
12
d6512b50 13DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
a292be34 14
15=head1 SYNOPSIS
16
d6512b50 17 use DOM::Tiny;
18
19 # Parse
20 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
21
22 # Find
23 say $dom->at('#b')->text;
24 say $dom->find('p')->map('text')->join("\n");
25 say $dom->find('[id]')->map(attr => 'id')->join("\n");
26
27 # Iterate
28 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
29
30 # Loop
31 for my $e ($dom->find('p[id]')->each) {
32 say $e->{id}, ':', $e->text;
33 }
34
35 # Modify
36 $dom->find('div p')->last->append('<p id="c">456</p>');
37 $dom->find(':not(p)')->map('strip');
38
39 # Render
40 say "$dom";
41
a292be34 42=head1 DESCRIPTION
43
9a5f1e3f 44L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
45on L<Mojo::DOM>. It supports the L<HTML Living Standard|https://html.spec.whatwg.org/>
46and L<Extensible Markup Language (XML) 1.0|http://www.w3.org/TR/xml/>, and
47matching based on L<CSS3 selectors|http://www.w3.org/TR/selectors/>. It will
48even try to interpret broken HTML and XML, so you should not use it for
5a70ee9d 49validation.
d6512b50 50
51=head1 NODES AND ELEMENTS
52
53When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
54
55 <!DOCTYPE html>
56 <html>
57 <head><title>Hello</title></head>
58 <body>World!</body>
59 </html>
60
61There are currently eight different kinds of nodes, C<cdata>, C<comment>,
62C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
63the type C<tag>.
64
65 root
66 |- doctype (html)
67 +- tag (html)
68 |- tag (head)
69 | +- tag (title)
70 | +- raw (Hello)
71 +- tag (body)
72 +- text (World!)
73
74While all node types are represented as L<DOM::Tiny> objects, some methods like
75L</"attr"> and L</"namespace"> only apply to elements.
76
77=head1 CASE-SENSITIVITY
78
79L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
80names are lowercased and selectors need to be lowercase as well.
81
82 # HTML semantics
83 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
84 say $dom->at('p[id]')->text;
85
eb9737f2 86If an XML declaration is found, the parser will automatically switch into XML
87mode and everything becomes case-sensitive.
d6512b50 88
89 # XML semantics
90 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
91 say $dom->at('P[ID]')->text;
92
93XML detection can also be disabled with the L</"xml"> method.
94
95 # Force XML semantics
96 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
97 say $dom->at('P[ID]')->text;
98
99 # Force HTML semantics
100 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
101 say $dom->at('p[id]')->text;
102
63873d67 103=head1 SELECTORS
104
105L<DOM::Tiny> uses a CSS selector engine based on L<Mojo::DOM::CSS>. All CSS
106selectors that make sense for a standalone parser are supported.
107
03eb5521 108=over
109
110=item Z<>*
63873d67 111
112Any element.
113
114 my $all = $dom->find('*');
115
03eb5521 116=item E
63873d67 117
118An element of type C<E>.
119
120 my $title = $dom->at('title');
121
03eb5521 122=item E[foo]
63873d67 123
124An C<E> element with a C<foo> attribute.
125
126 my $links = $dom->find('a[href]');
127
03eb5521 128=item E[foo="bar"]
63873d67 129
130An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
131
132 my $case_sensitive = $dom->find('input[type="hidden"]');
133 my $case_sensitive = $dom->find('input[type=hidden]');
134
03eb5521 135=item E[foo="bar" i]
63873d67 136
137An C<E> element whose C<foo> attribute value is exactly equal to any
138(ASCII-range) case-permutation of C<bar>. Note that this selector is
139EXPERIMENTAL and might change without warning!
140
141 my $case_insensitive = $dom->find('input[type="hidden" i]');
142 my $case_insensitive = $dom->find('input[type=hidden i]');
143 my $case_insensitive = $dom->find('input[class~="foo" i]');
144
145This selector is part of
146L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
147in progress.
148
03eb5521 149=item E[foo~="bar"]
63873d67 150
151An C<E> element whose C<foo> attribute value is a list of whitespace-separated
152values, one of which is exactly equal to C<bar>.
153
154 my $foo = $dom->find('input[class~="foo"]');
155 my $foo = $dom->find('input[class~=foo]');
156
03eb5521 157=item E[foo^="bar"]
63873d67 158
159An C<E> element whose C<foo> attribute value begins exactly with the string
160C<bar>.
161
162 my $begins_with = $dom->find('input[name^="f"]');
163 my $begins_with = $dom->find('input[name^=f]');
164
03eb5521 165=item E[foo$="bar"]
63873d67 166
167An C<E> element whose C<foo> attribute value ends exactly with the string
168C<bar>.
169
170 my $ends_with = $dom->find('input[name$="o"]');
171 my $ends_with = $dom->find('input[name$=o]');
172
03eb5521 173=item E[foo*="bar"]
63873d67 174
175An C<E> element whose C<foo> attribute value contains the substring C<bar>.
176
177 my $contains = $dom->find('input[name*="fo"]');
178 my $contains = $dom->find('input[name*=fo]');
179
03eb5521 180=item E:root
63873d67 181
182An C<E> element, root of the document.
183
184 my $root = $dom->at(':root');
185
03eb5521 186=item E:nth-child(n)
63873d67 187
188An C<E> element, the C<n-th> child of its parent.
189
190 my $third = $dom->find('div:nth-child(3)');
191 my $odd = $dom->find('div:nth-child(odd)');
192 my $even = $dom->find('div:nth-child(even)');
193 my $top3 = $dom->find('div:nth-child(-n+3)');
194
03eb5521 195=item E:nth-last-child(n)
63873d67 196
197An C<E> element, the C<n-th> child of its parent, counting from the last one.
198
199 my $third = $dom->find('div:nth-last-child(3)');
200 my $odd = $dom->find('div:nth-last-child(odd)');
201 my $even = $dom->find('div:nth-last-child(even)');
202 my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
203
03eb5521 204=item E:nth-of-type(n)
63873d67 205
206An C<E> element, the C<n-th> sibling of its type.
207
208 my $third = $dom->find('div:nth-of-type(3)');
209 my $odd = $dom->find('div:nth-of-type(odd)');
210 my $even = $dom->find('div:nth-of-type(even)');
211 my $top3 = $dom->find('div:nth-of-type(-n+3)');
212
03eb5521 213=item E:nth-last-of-type(n)
63873d67 214
215An C<E> element, the C<n-th> sibling of its type, counting from the last one.
216
217 my $third = $dom->find('div:nth-last-of-type(3)');
218 my $odd = $dom->find('div:nth-last-of-type(odd)');
219 my $even = $dom->find('div:nth-last-of-type(even)');
220 my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
221
03eb5521 222=item E:first-child
63873d67 223
224An C<E> element, first child of its parent.
225
226 my $first = $dom->find('div p:first-child');
227
03eb5521 228=item E:last-child
63873d67 229
230An C<E> element, last child of its parent.
231
232 my $last = $dom->find('div p:last-child');
233
03eb5521 234=item E:first-of-type
63873d67 235
236An C<E> element, first sibling of its type.
237
238 my $first = $dom->find('div p:first-of-type');
239
03eb5521 240=item E:last-of-type
63873d67 241
242An C<E> element, last sibling of its type.
243
244 my $last = $dom->find('div p:last-of-type');
245
03eb5521 246=item E:only-child
63873d67 247
248An C<E> element, only child of its parent.
249
250 my $lonely = $dom->find('div p:only-child');
251
03eb5521 252=item E:only-of-type
63873d67 253
254An C<E> element, only sibling of its type.
255
256 my $lonely = $dom->find('div p:only-of-type');
257
03eb5521 258=item E:empty
63873d67 259
260An C<E> element that has no children (including text nodes).
261
262 my $empty = $dom->find(':empty');
263
03eb5521 264=item E:checked
63873d67 265
266A user interface element C<E> which is checked (for instance a radio-button or
267checkbox).
268
269 my $input = $dom->find(':checked');
270
03eb5521 271=item E.warning
63873d67 272
273An C<E> element whose class is "warning".
274
275 my $warning = $dom->find('div.warning');
276
03eb5521 277=item E#myid
63873d67 278
279An C<E> element with C<ID> equal to "myid".
280
281 my $foo = $dom->at('div#foo');
282
03eb5521 283=item E:not(s)
63873d67 284
285An C<E> element that does not match simple selector C<s>.
286
287 my $others = $dom->find('div p:not(:first-child)');
288
03eb5521 289=item E F
63873d67 290
291An C<F> element descendant of an C<E> element.
292
293 my $headlines = $dom->find('div h1');
294
03eb5521 295=item E E<gt> F
63873d67 296
297An C<F> element child of an C<E> element.
298
299 my $headlines = $dom->find('html > body > div > h1');
300
03eb5521 301=item E + F
63873d67 302
303An C<F> element immediately preceded by an C<E> element.
304
305 my $second = $dom->find('h1 + h2');
306
03eb5521 307=item E ~ F
63873d67 308
309An C<F> element preceded by an C<E> element.
310
311 my $second = $dom->find('h1 ~ h2');
312
03eb5521 313=item E, F, G
63873d67 314
315Elements of type C<E>, C<F> and C<G>.
316
317 my $headlines = $dom->find('h1, h2, h3');
318
03eb5521 319=item E[foo=bar][bar=baz]
63873d67 320
321An C<E> element whose attributes match all following attribute selectors.
322
323 my $links = $dom->find('a[foo^=b][foo$=ar]');
324
03eb5521 325=back
326
9a5f1e3f 327=head1 OPERATORS
328
329L<DOM::Tiny> overloads the following operators.
330
331=head2 array
332
333 my @nodes = @$dom;
334
335Alias for L</"child_nodes">.
336
337 # "<!-- Test -->"
338 $dom->parse('<!-- Test --><b>123</b>')->[0];
339
340=head2 bool
341
342 my $bool = !!$dom;
343
344Always true.
345
346=head2 hash
347
348 my %attrs = %$dom;
349
350Alias for L</"attr">.
351
352 # "test"
353 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
354
355=head2 stringify
356
357 my $str = "$dom";
358
359Alias for L</"to_string">.
360
d6512b50 361=head1 METHODS
362
363L<DOM::Tiny> implements the following methods.
364
3793c28f 365=head2 new
366
367 my $dom = DOM::Tiny->new;
368 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
369
370Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
371fragment if necessary.
372
d6512b50 373=head2 all_text
374
375 my $trimmed = $dom->all_text;
376 my $untrimmed = $dom->all_text(0);
377
378Extract text content from all descendant nodes of this element, smart
379whitespace trimming is enabled by default.
380
381 # "foo bar baz"
382 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
383
384 # "foo\nbarbaz\n"
385 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
386
387=head2 ancestors
388
389 my $collection = $dom->ancestors;
390 my $collection = $dom->ancestors('div ~ p');
391
392Find all ancestor elements of this node matching the CSS selector and return a
8563f527 393L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 394objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 395
396 # List tag names of ancestor elements
397 say $dom->ancestors->map('tag')->join("\n");
398
399=head2 append
400
401 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
402
c7cad649 403Append HTML/XML fragment to this node (for all node types other than C<root>).
d6512b50 404
405 # "<div><h1>Test</h1><h2>123</h2></div>"
406 $dom->parse('<div><h1>Test</h1></div>')
407 ->at('h1')->append('<h2>123</h2>')->root;
408
409 # "<p>Test 123</p>"
410 $dom->parse('<p>Test</p>')->at('p')
411 ->child_nodes->first->append(' 123')->root;
412
413=head2 append_content
414
415 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
416
417Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
418node's content.
419
420 # "<div><h1>Test123</h1></div>"
421 $dom->parse('<div><h1>Test</h1></div>')
422 ->at('h1')->append_content('123')->root;
423
424 # "<!-- Test 123 --><br>"
425 $dom->parse('<!-- Test --><br>')
426 ->child_nodes->first->append_content('123 ')->root;
427
428 # "<p>Test<i>123</i></p>"
429 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
430
431=head2 at
432
433 my $result = $dom->at('div ~ p');
434
435Find first descendant element of this element matching the CSS selector and
eb9737f2 436return it as a L<DOM::Tiny> object, or C<undef> if none could be found. All
437selectors listed in L</"SELECTORS"> are supported.
d6512b50 438
439 # Find first element with "svg" namespace definition
440 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
441
442=head2 attr
443
444 my $hash = $dom->attr;
445 my $foo = $dom->attr('foo');
446 $dom = $dom->attr({foo => 'bar'});
447 $dom = $dom->attr(foo => 'bar');
448
449This element's attributes.
450
451 # Remove an attribute
452 delete $dom->attr->{id};
453
454 # Attribute without value
455 $dom->attr(selected => undef);
456
457 # List id attributes
458 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
459
460=head2 child_nodes
461
462 my $collection = $dom->child_nodes;
463
8563f527 464Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 465element as L<DOM::Tiny> objects.
466
467 # "<p><b>123</b></p>"
468 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
469
470 # "<!DOCTYPE html>"
471 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
472
473 # " Test "
474 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
475
476=head2 children
477
478 my $collection = $dom->children;
479 my $collection = $dom->children('div ~ p');
480
481Find all child elements of this element matching the CSS selector and return a
8563f527 482L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 483objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 484
485 # Show tag name of random child element
486 say $dom->children->shuffle->first->tag;
487
488=head2 content
489
490 my $str = $dom->content;
491 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
492
493Return this node's content or replace it with HTML/XML fragment (for C<root>
494and C<tag> nodes) or raw content.
495
496 # "<b>Test</b>"
497 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
498
499 # "<div><h1>123</h1></div>"
500 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
501
502 # "<p><i>123</i></p>"
503 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
504
505 # "<div><h1></h1></div>"
506 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
507
508 # " Test "
509 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
510
511 # "<div><!-- 123 -->456</div>"
512 $dom->parse('<div><!-- Test -->456</div>')
513 ->at('div')->child_nodes->first->content(' 123 ')->root;
514
515=head2 descendant_nodes
516
517 my $collection = $dom->descendant_nodes;
518
8563f527 519Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 520this element as L<DOM::Tiny> objects.
521
522 # "<p><b>123</b></p>"
523 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
524 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
525 ->map('remove')->first;
526
527 # "<p><b>test</b>test</p>"
528 $dom->parse('<p><b>123</b>456</p>')
529 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
530 ->map(content => 'test')->first->root;
531
532=head2 find
533
534 my $collection = $dom->find('div ~ p');
535
536Find all descendant elements of this element matching the CSS selector and
8563f527 537return a L<collection|/"COLLECTION METHODS"> containing these elements as
9a5f1e3f 538L<DOM::Tiny> objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 539
540 # Find a specific element and extract information
541 my $id = $dom->find('div')->[23]{id};
542
543 # Extract information from multiple elements
544 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
545
546 # Count all the different tags
547 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
548
549 # Find elements with a class that contains dots
550 my @divs = $dom->find('div.foo\.bar')->each;
551
552=head2 following
553
554 my $collection = $dom->following;
555 my $collection = $dom->following('div ~ p');
556
557Find all sibling elements after this node matching the CSS selector and return
8563f527 558a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 559objects. All selectors listen in L</"SELECTORS"> are supported.
d6512b50 560
561 # List tags of sibling elements after this node
562 say $dom->following->map('tag')->join("\n");
563
564=head2 following_nodes
565
566 my $collection = $dom->following_nodes;
567
8563f527 568Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 569this node as L<DOM::Tiny> objects.
570
571 # "C"
572 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
573
574=head2 matches
575
576 my $bool = $dom->matches('div ~ p');
577
9a5f1e3f 578Check if this element matches the CSS selector. All selectors listed in
579L</"SELECTORS"> are supported.
d6512b50 580
581 # True
582 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
583 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
584
585 # False
586 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
587 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
588
589=head2 namespace
590
591 my $namespace = $dom->namespace;
592
eb9737f2 593Find this element's namespace, or return C<undef> if none could be found.
d6512b50 594
595 # Find namespace for an element with namespace prefix
596 my $namespace = $dom->at('svg > svg\:circle')->namespace;
597
598 # Find namespace for an element that may or may not have a namespace prefix
599 my $namespace = $dom->at('svg > circle')->namespace;
600
d6512b50 601=head2 next
602
603 my $sibling = $dom->next;
604
eb9737f2 605Return L<DOM::Tiny> object for next sibling element, or C<undef> if there are
606no more siblings.
d6512b50 607
608 # "<h2>123</h2>"
609 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
610
611=head2 next_node
612
613 my $sibling = $dom->next_node;
614
eb9737f2 615Return L<DOM::Tiny> object for next sibling node, or C<undef> if there are no
d6512b50 616more siblings.
617
618 # "456"
619 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
620 ->at('b')->next_node->next_node;
621
622 # " Test "
623 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
624 ->at('b')->next_node->content;
625
626=head2 parent
627
628 my $parent = $dom->parent;
629
eb9737f2 630Return L<DOM::Tiny> object for parent of this node, or C<undef> if this node
631has no parent.
632
633 # "<b><i>Test</i></b>"
634 $dom->parse('<p><b><i>Test</i></b></p>')->at('i')->parent;
d6512b50 635
636=head2 parse
637
638 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
639
9a5f1e3f 640Parse HTML/XML fragment.
d6512b50 641
642 # Parse XML
eb9737f2 643 my $dom = DOM::Tiny->new->xml(1)->parse('<foo>I ♥ DOM::Tiny!</foo>');
d6512b50 644
645=head2 preceding
646
647 my $collection = $dom->preceding;
648 my $collection = $dom->preceding('div ~ p');
649
650Find all sibling elements before this node matching the CSS selector and return
8563f527 651a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 652objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 653
654 # List tags of sibling elements before this node
655 say $dom->preceding->map('tag')->join("\n");
656
657=head2 preceding_nodes
658
659 my $collection = $dom->preceding_nodes;
660
8563f527 661Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
662before this node as L<DOM::Tiny> objects.
d6512b50 663
664 # "A"
665 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
666
667=head2 prepend
668
669 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
670
c7cad649 671Prepend HTML/XML fragment to this node (for all node types other than C<root>).
d6512b50 672
673 # "<div><h1>Test</h1><h2>123</h2></div>"
674 $dom->parse('<div><h2>123</h2></div>')
675 ->at('h2')->prepend('<h1>Test</h1>')->root;
676
677 # "<p>Test 123</p>"
678 $dom->parse('<p>123</p>')
679 ->at('p')->child_nodes->first->prepend('Test ')->root;
680
681=head2 prepend_content
682
683 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
684
685Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
686node's content.
687
688 # "<div><h2>Test123</h2></div>"
689 $dom->parse('<div><h2>123</h2></div>')
690 ->at('h2')->prepend_content('Test')->root;
691
692 # "<!-- Test 123 --><br>"
693 $dom->parse('<!-- 123 --><br>')
694 ->child_nodes->first->prepend_content(' Test')->root;
695
696 # "<p><i>123</i>Test</p>"
697 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
698
699=head2 previous
700
701 my $sibling = $dom->previous;
702
eb9737f2 703Return L<DOM::Tiny> object for previous sibling element, or C<undef> if there
d6512b50 704are no more siblings.
705
706 # "<h1>Test</h1>"
707 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
708
709=head2 previous_node
710
711 my $sibling = $dom->previous_node;
712
eb9737f2 713Return L<DOM::Tiny> object for previous sibling node, or C<undef> if there are
d6512b50 714no more siblings.
715
716 # "123"
717 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
718 ->at('b')->previous_node->previous_node;
719
720 # " Test "
721 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
722 ->at('b')->previous_node->content;
723
724=head2 remove
725
726 my $parent = $dom->remove;
727
728Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
729
730 # "<div></div>"
731 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
732
733 # "<p><b>456</b></p>"
734 $dom->parse('<p>123<b>456</b></p>')
735 ->at('p')->child_nodes->first->remove->root;
736
737=head2 replace
738
739 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
740
741Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
742nodes) or L</"parent">.
743
744 # "<div><h2>123</h2></div>"
745 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
746
747 # "<p><b>123</b></p>"
748 $dom->parse('<p>Test</p>')
749 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
750
751=head2 root
752
753 my $root = $dom->root;
754
755Return L<DOM::Tiny> object for C<root> node.
756
757=head2 strip
758
759 my $parent = $dom->strip;
760
761Remove this element while preserving its content and return L</"parent">.
762
763 # "<div>Test</div>"
764 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
765
766=head2 tag
767
768 my $tag = $dom->tag;
769 $dom = $dom->tag('div');
770
771This element's tag name.
772
773 # List tag names of child elements
774 say $dom->children->map('tag')->join("\n");
775
776=head2 tap
777
778 $dom = $dom->tap(sub {...});
779
e99ef07d 780Equivalent to L<Mojo::Base/"tap">.
d6512b50 781
782=head2 text
783
784 my $trimmed = $dom->text;
785 my $untrimmed = $dom->text(0);
786
787Extract text content from this element only (not including child elements),
788smart whitespace trimming is enabled by default.
789
790 # "foo baz"
791 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
792
793 # "foo\nbaz\n"
794 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
795
796=head2 to_string
797
798 my $str = $dom->to_string;
799
800Render this node and its content to HTML/XML.
801
802 # "<b>Test</b>"
803 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
804
805=head2 tree
806
807 my $tree = $dom->tree;
808 $dom = $dom->tree(['root']);
809
810Document Object Model. Note that this structure should only be used very
811carefully since it is very dynamic.
812
813=head2 type
814
815 my $type = $dom->type;
816
817This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
818C<root>, C<tag> or C<text>.
819
820 # "cdata"
821 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
822
823 # "comment"
824 $dom->parse('<!-- Test -->')->child_nodes->first->type;
825
826 # "doctype"
827 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
828
829 # "pi"
830 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
831
832 # "raw"
833 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
834
835 # "root"
836 $dom->parse('<p>Test</p>')->type;
837
838 # "tag"
839 $dom->parse('<p>Test</p>')->at('p')->type;
840
841 # "text"
842 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
843
844=head2 val
845
846 my $value = $dom->val;
847
848Extract value from form element (such as C<button>, C<input>, C<option>,
eb9737f2 849C<select> and C<textarea>), or return C<undef> if this element has no value. In
d6512b50 850the case of C<select> with C<multiple> attribute, find C<option> elements with
eb9737f2 851C<selected> attribute and return an array reference with all values, or
852C<undef> if none could be found.
d6512b50 853
854 # "a"
4b5e2513 855 $dom->parse('<input name=test value=a>')->at('input')->val;
d6512b50 856
857 # "b"
858 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
859
860 # "c"
861 $dom->parse('<option value="c">Test</option>')->at('option')->val;
862
863 # "d"
864 $dom->parse('<select><option selected>d</option></select>')
865 ->at('select')->val;
866
867 # "e"
868 $dom->parse('<select multiple><option selected>e</option></select>')
869 ->at('select')->val->[0];
870
4b5e2513 871 # "on"
872 $dom->parse('<input name=test type=checkbox>')->at('input')->val;
873
d6512b50 874=head2 wrap
875
876 $dom = $dom->wrap('<div></div>');
877
c7cad649 878Wrap HTML/XML fragment around this node (for all node types other than C<root>),
879placing it as the last child of the first innermost element.
d6512b50 880
881 # "<p>123<b>Test</b></p>"
882 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
883
884 # "<div><p><b>Test</b></p>123</div>"
885 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
886
887 # "<p><b>Test</b></p><p>123</p>"
888 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
889
890 # "<p><b>Test</b></p>"
891 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
892
893=head2 wrap_content
894
895 $dom = $dom->wrap_content('<div></div>');
896
c7cad649 897Wrap HTML/XML fragment around this node's content (for C<root> and C<tag>
898nodes), placing it as the last children of the first innermost element.
d6512b50 899
900 # "<p><b>123Test</b></p>"
901 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
902
903 # "<p><b>Test</b></p><p>123</p>"
904 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
905
906=head2 xml
907
908 my $bool = $dom->xml;
909 $dom = $dom->xml($bool);
910
911Disable HTML semantics in parser and activate case-sensitivity, defaults to
eb9737f2 912auto detection based on XML declarations.
d6512b50 913
78ba4051 914=head1 COLLECTION METHODS
915
9a5f1e3f 916Some L<DOM::Tiny> methods return an array-based collection object based on
917L<Mojo::Collection>, which can either be accessed directly as an array
918reference, or with the following methods.
78ba4051 919
920 # Chain methods
921 $collection->map(sub { ucfirst })->shuffle->each(sub {
922 my ($word, $num) = @_;
923 say "$num: $word";
924 });
925
926 # Access array directly to manipulate collection
927 $collection->[23] += 100;
928 say for @$collection;
929
930=head2 compact
931
932 my $new = $collection->compact;
933
934Create a new collection with all elements that are defined and not an empty
935string.
936
937 # $collection contains (0, 1, undef, 2, '', 3)
938 $collection->compact->join(', '); # "0, 1, 2, 3"
939
940=head2 each
941
942 my @elements = $collection->each;
943 $collection = $collection->each(sub {...});
944
945Evaluate callback for each element in collection or return all elements as a
946list if none has been provided. The element will be the first argument passed
947to the callback and is also available as C<$_>.
948
949 # Make a numbered list
950 $collection->each(sub {
951 my ($e, $num) = @_;
952 say "$num: $e";
953 });
954
955=head2 first
956
957 my $first = $collection->first;
958 my $first = $collection->first(qr/foo/);
959 my $first = $collection->first(sub {...});
960 my $first = $collection->first($method);
961 my $first = $collection->first($method, @args);
962
963Evaluate regular expression/callback for, or call method on, each element in
964collection and return the first one that matched the regular expression, or for
965which the callback/method returned true. The element will be the first argument
966passed to the callback and is also available as C<$_>.
967
968 # Longer version
969 my $first = $collection->first(sub { $_->$method(@args) });
970
c7bc4d31 971 # Find first value that contains the word "tiny"
972 my $interesting = $collection->first(qr/tiny/i);
78ba4051 973
974 # Find first value that is greater than 5
975 my $greater = $collection->first(sub { $_ > 5 });
976
977=head2 flatten
978
979 my $new = $collection->flatten;
980
981Flatten nested collections/arrays recursively and create a new collection with
982all elements.
983
984 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
985 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
986
987=head2 grep
988
989 my $new = $collection->grep(qr/foo/);
990 my $new = $collection->grep(sub {...});
991 my $new = $collection->grep($method);
992 my $new = $collection->grep($method, @args);
993
994Evaluate regular expression/callback for, or call method on, each element in
995collection and create a new collection with all elements that matched the
996regular expression, or for which the callback/method returned true. The element
997will be the first argument passed to the callback and is also available as
998C<$_>.
999
1000 # Longer version
1001 my $new = $collection->grep(sub { $_->$method(@args) });
1002
c7bc4d31 1003 # Find all values that contain the word "tiny"
1004 my $interesting = $collection->grep(qr/tiny/i);
78ba4051 1005
1006 # Find all values that are greater than 5
1007 my $greater = $collection->grep(sub { $_ > 5 });
1008
1009=head2 join
1010
1011 my $stream = $collection->join;
1012 my $stream = $collection->join("\n");
1013
1014Turn collection into string.
1015
1016 # Join all values with commas
1017 $collection->join(', ');
1018
1019=head2 last
1020
1021 my $last = $collection->last;
1022
1023Return the last element in collection.
1024
1025=head2 map
1026
1027 my $new = $collection->map(sub {...});
1028 my $new = $collection->map($method);
1029 my $new = $collection->map($method, @args);
1030
1031Evaluate callback for, or call method on, each element in collection and create
1032a new collection from the results. The element will be the first argument
1033passed to the callback and is also available as C<$_>.
1034
1035 # Longer version
1036 my $new = $collection->map(sub { $_->$method(@args) });
1037
c7bc4d31 1038 # Append the word "tiny" to all values
1039 my $domified = $collection->map(sub { $_ . 'tiny' });
78ba4051 1040
1041=head2 reduce
1042
1043 my $result = $collection->reduce(sub {...});
1044 my $result = $collection->reduce(sub {...}, $initial);
1045
1046Reduce elements in collection with callback, the first element will be used as
1047initial value if none has been provided.
1048
1049 # Calculate the sum of all values
1050 my $sum = $collection->reduce(sub { $a + $b });
1051
1052 # Count how often each value occurs in collection
1053 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
1054
1055=head2 reverse
1056
1057 my $new = $collection->reverse;
1058
1059Create a new collection with all elements in reverse order.
1060
1061=head2 slice
1062
1063 my $new = $collection->slice(4 .. 7);
1064
1065Create a new collection with all selected elements.
1066
1067 # $collection contains ('A', 'B', 'C', 'D', 'E')
1068 $collection->slice(1, 2, 4)->join(' '); # "B C E"
1069
1070=head2 shuffle
1071
1072 my $new = $collection->shuffle;
1073
1074Create a new collection with all elements in random order.
1075
1076=head2 size
1077
1078 my $size = $collection->size;
1079
1080Number of elements in collection.
1081
1082=head2 sort
1083
1084 my $new = $collection->sort;
1085 my $new = $collection->sort(sub {...});
1086
1087Sort elements based on return value of callback and create a new collection
1088from the results.
1089
1090 # Sort values case-insensitive
1091 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
1092
1093=head2 tap
1094
1095 $collection = $collection->tap(sub {...});
1096
1097Equivalent to L<Mojo::Base/"tap">.
1098
1099=head2 to_array
1100
1101 my $array = $collection->to_array;
1102
1103Turn collection into array reference.
1104
1105=head2 uniq
1106
1107 my $new = $collection->uniq;
1108 my $new = $collection->uniq(sub {...});
1109 my $new = $collection->uniq($method);
1110 my $new = $collection->uniq($method, @args);
1111
1112Create a new collection without duplicate elements, using the string
1113representation of either the elements or the return value of the
1114callback/method.
1115
1116 # Longer version
1117 my $new = $collection->uniq(sub { $_->$method(@args) });
1118
1119 # $collection contains ('foo', 'bar', 'bar', 'baz')
1120 $collection->uniq->join(' '); # "foo bar baz"
1121
1122 # $collection contains ([1, 2], [2, 1], [3, 2])
1123 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
1124
a292be34 1125=head1 BUGS
1126
1127Report any issues on the public bugtracker.
1128
1129=head1 AUTHOR
1130
1131Dan Book <dbook@cpan.org>
1132
9ba11a91 1133Code and tests adapted from L<Mojo::DOM>, a lightweight DOM parser by the L<Mojolicious> team.
7218d584 1134
2d9f5165 1135=head1 CONTRIBUTORS
1136
1137=over
1138
1139=item Matt S Trout (mst)
1140
1141=back
1142
a292be34 1143=head1 COPYRIGHT AND LICENSE
1144
9ba11a91 1145Copyright (c) 2008-2015 Sebastian Riedel.
1146
1147Copyright (c) 2015 L</"AUTHOR"> and L</"CONTRIBUTORS"> for adaptation to standalone format.
a292be34 1148
1149This is free software, licensed under:
1150
1151 The Artistic License 2.0 (GPL Compatible)
1152
1153=head1 SEE ALSO
1154
31877452 1155L<Mojo::DOM>, L<HTML::TreeBuilder>, L<XML::LibXML>, L<XML::Twig>, L<XML::Smart>
9a5f1e3f 1156
1157=for Pod::Coverage TO_JSON
1158
1159=cut