merge wrap fixes from Mojolicious 6.34
[catagits/DOM-Tiny.git] / README.pod
CommitLineData
d6512b50 1=pod
2
3=encoding utf8
4
5=head1 NAME
6
7DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
8
9=head1 SYNOPSIS
10
11 use DOM::Tiny;
12
13 # Parse
14 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
15
16 # Find
17 say $dom->at('#b')->text;
18 say $dom->find('p')->map('text')->join("\n");
19 say $dom->find('[id]')->map(attr => 'id')->join("\n");
20
21 # Iterate
22 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
23
24 # Loop
25 for my $e ($dom->find('p[id]')->each) {
26 say $e->{id}, ':', $e->text;
27 }
28
29 # Modify
30 $dom->find('div p')->last->append('<p id="c">456</p>');
31 $dom->find(':not(p)')->map('strip');
32
33 # Render
34 say "$dom";
35
36=head1 DESCRIPTION
37
9a5f1e3f 38L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
39on L<Mojo::DOM>. It supports the L<HTML Living Standard|https://html.spec.whatwg.org/>
40and L<Extensible Markup Language (XML) 1.0|http://www.w3.org/TR/xml/>, and
41matching based on L<CSS3 selectors|http://www.w3.org/TR/selectors/>. It will
42even try to interpret broken HTML and XML, so you should not use it for
5a70ee9d 43validation.
d6512b50 44
45=head1 NODES AND ELEMENTS
46
47When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
48
49 <!DOCTYPE html>
50 <html>
51 <head><title>Hello</title></head>
52 <body>World!</body>
53 </html>
54
55There are currently eight different kinds of nodes, C<cdata>, C<comment>,
56C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
57the type C<tag>.
58
59 root
60 |- doctype (html)
61 +- tag (html)
62 |- tag (head)
63 | +- tag (title)
64 | +- raw (Hello)
65 +- tag (body)
66 +- text (World!)
67
68While all node types are represented as L<DOM::Tiny> objects, some methods like
69L</"attr"> and L</"namespace"> only apply to elements.
70
71=head1 CASE-SENSITIVITY
72
73L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
74names are lowercased and selectors need to be lowercase as well.
75
76 # HTML semantics
77 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
78 say $dom->at('p[id]')->text;
79
eb9737f2 80If an XML declaration is found, the parser will automatically switch into XML
81mode and everything becomes case-sensitive.
d6512b50 82
83 # XML semantics
84 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
85 say $dom->at('P[ID]')->text;
86
87XML detection can also be disabled with the L</"xml"> method.
88
89 # Force XML semantics
90 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
91 say $dom->at('P[ID]')->text;
92
93 # Force HTML semantics
94 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
95 say $dom->at('p[id]')->text;
96
63873d67 97=head1 SELECTORS
98
99L<DOM::Tiny> uses a CSS selector engine based on L<Mojo::DOM::CSS>. All CSS
100selectors that make sense for a standalone parser are supported.
101
03eb5521 102=over
103
104=item Z<>*
63873d67 105
106Any element.
107
108 my $all = $dom->find('*');
109
03eb5521 110=item E
63873d67 111
112An element of type C<E>.
113
114 my $title = $dom->at('title');
115
03eb5521 116=item E[foo]
63873d67 117
118An C<E> element with a C<foo> attribute.
119
120 my $links = $dom->find('a[href]');
121
03eb5521 122=item E[foo="bar"]
63873d67 123
124An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
125
126 my $case_sensitive = $dom->find('input[type="hidden"]');
127 my $case_sensitive = $dom->find('input[type=hidden]');
128
03eb5521 129=item E[foo="bar" i]
63873d67 130
131An C<E> element whose C<foo> attribute value is exactly equal to any
132(ASCII-range) case-permutation of C<bar>. Note that this selector is
133EXPERIMENTAL and might change without warning!
134
135 my $case_insensitive = $dom->find('input[type="hidden" i]');
136 my $case_insensitive = $dom->find('input[type=hidden i]');
137 my $case_insensitive = $dom->find('input[class~="foo" i]');
138
139This selector is part of
140L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
141in progress.
142
03eb5521 143=item E[foo~="bar"]
63873d67 144
145An C<E> element whose C<foo> attribute value is a list of whitespace-separated
146values, one of which is exactly equal to C<bar>.
147
148 my $foo = $dom->find('input[class~="foo"]');
149 my $foo = $dom->find('input[class~=foo]');
150
03eb5521 151=item E[foo^="bar"]
63873d67 152
153An C<E> element whose C<foo> attribute value begins exactly with the string
154C<bar>.
155
156 my $begins_with = $dom->find('input[name^="f"]');
157 my $begins_with = $dom->find('input[name^=f]');
158
03eb5521 159=item E[foo$="bar"]
63873d67 160
161An C<E> element whose C<foo> attribute value ends exactly with the string
162C<bar>.
163
164 my $ends_with = $dom->find('input[name$="o"]');
165 my $ends_with = $dom->find('input[name$=o]');
166
03eb5521 167=item E[foo*="bar"]
63873d67 168
169An C<E> element whose C<foo> attribute value contains the substring C<bar>.
170
171 my $contains = $dom->find('input[name*="fo"]');
172 my $contains = $dom->find('input[name*=fo]');
173
03eb5521 174=item E:root
63873d67 175
176An C<E> element, root of the document.
177
178 my $root = $dom->at(':root');
179
03eb5521 180=item E:nth-child(n)
63873d67 181
182An C<E> element, the C<n-th> child of its parent.
183
184 my $third = $dom->find('div:nth-child(3)');
185 my $odd = $dom->find('div:nth-child(odd)');
186 my $even = $dom->find('div:nth-child(even)');
187 my $top3 = $dom->find('div:nth-child(-n+3)');
188
03eb5521 189=item E:nth-last-child(n)
63873d67 190
191An C<E> element, the C<n-th> child of its parent, counting from the last one.
192
193 my $third = $dom->find('div:nth-last-child(3)');
194 my $odd = $dom->find('div:nth-last-child(odd)');
195 my $even = $dom->find('div:nth-last-child(even)');
196 my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
197
03eb5521 198=item E:nth-of-type(n)
63873d67 199
200An C<E> element, the C<n-th> sibling of its type.
201
202 my $third = $dom->find('div:nth-of-type(3)');
203 my $odd = $dom->find('div:nth-of-type(odd)');
204 my $even = $dom->find('div:nth-of-type(even)');
205 my $top3 = $dom->find('div:nth-of-type(-n+3)');
206
03eb5521 207=item E:nth-last-of-type(n)
63873d67 208
209An C<E> element, the C<n-th> sibling of its type, counting from the last one.
210
211 my $third = $dom->find('div:nth-last-of-type(3)');
212 my $odd = $dom->find('div:nth-last-of-type(odd)');
213 my $even = $dom->find('div:nth-last-of-type(even)');
214 my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
215
03eb5521 216=item E:first-child
63873d67 217
218An C<E> element, first child of its parent.
219
220 my $first = $dom->find('div p:first-child');
221
03eb5521 222=item E:last-child
63873d67 223
224An C<E> element, last child of its parent.
225
226 my $last = $dom->find('div p:last-child');
227
03eb5521 228=item E:first-of-type
63873d67 229
230An C<E> element, first sibling of its type.
231
232 my $first = $dom->find('div p:first-of-type');
233
03eb5521 234=item E:last-of-type
63873d67 235
236An C<E> element, last sibling of its type.
237
238 my $last = $dom->find('div p:last-of-type');
239
03eb5521 240=item E:only-child
63873d67 241
242An C<E> element, only child of its parent.
243
244 my $lonely = $dom->find('div p:only-child');
245
03eb5521 246=item E:only-of-type
63873d67 247
248An C<E> element, only sibling of its type.
249
250 my $lonely = $dom->find('div p:only-of-type');
251
03eb5521 252=item E:empty
63873d67 253
254An C<E> element that has no children (including text nodes).
255
256 my $empty = $dom->find(':empty');
257
03eb5521 258=item E:checked
63873d67 259
260A user interface element C<E> which is checked (for instance a radio-button or
261checkbox).
262
263 my $input = $dom->find(':checked');
264
03eb5521 265=item E.warning
63873d67 266
267An C<E> element whose class is "warning".
268
269 my $warning = $dom->find('div.warning');
270
03eb5521 271=item E#myid
63873d67 272
273An C<E> element with C<ID> equal to "myid".
274
275 my $foo = $dom->at('div#foo');
276
03eb5521 277=item E:not(s)
63873d67 278
279An C<E> element that does not match simple selector C<s>.
280
281 my $others = $dom->find('div p:not(:first-child)');
282
03eb5521 283=item E F
63873d67 284
285An C<F> element descendant of an C<E> element.
286
287 my $headlines = $dom->find('div h1');
288
03eb5521 289=item E E<gt> F
63873d67 290
291An C<F> element child of an C<E> element.
292
293 my $headlines = $dom->find('html > body > div > h1');
294
03eb5521 295=item E + F
63873d67 296
297An C<F> element immediately preceded by an C<E> element.
298
299 my $second = $dom->find('h1 + h2');
300
03eb5521 301=item E ~ F
63873d67 302
303An C<F> element preceded by an C<E> element.
304
305 my $second = $dom->find('h1 ~ h2');
306
03eb5521 307=item E, F, G
63873d67 308
309Elements of type C<E>, C<F> and C<G>.
310
311 my $headlines = $dom->find('h1, h2, h3');
312
03eb5521 313=item E[foo=bar][bar=baz]
63873d67 314
315An C<E> element whose attributes match all following attribute selectors.
316
317 my $links = $dom->find('a[foo^=b][foo$=ar]');
318
03eb5521 319=back
320
9a5f1e3f 321=head1 OPERATORS
322
323L<DOM::Tiny> overloads the following operators.
324
325=head2 array
326
327 my @nodes = @$dom;
328
329Alias for L</"child_nodes">.
330
331 # "<!-- Test -->"
332 $dom->parse('<!-- Test --><b>123</b>')->[0];
333
334=head2 bool
335
336 my $bool = !!$dom;
337
338Always true.
339
340=head2 hash
341
342 my %attrs = %$dom;
343
344Alias for L</"attr">.
345
346 # "test"
347 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
348
349=head2 stringify
350
351 my $str = "$dom";
352
353Alias for L</"to_string">.
354
d6512b50 355=head1 METHODS
356
357L<DOM::Tiny> implements the following methods.
358
3793c28f 359=head2 new
360
361 my $dom = DOM::Tiny->new;
362 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
363
364Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
365fragment if necessary.
366
d6512b50 367=head2 all_text
368
369 my $trimmed = $dom->all_text;
370 my $untrimmed = $dom->all_text(0);
371
372Extract text content from all descendant nodes of this element, smart
373whitespace trimming is enabled by default.
374
375 # "foo bar baz"
376 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
377
378 # "foo\nbarbaz\n"
379 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
380
381=head2 ancestors
382
383 my $collection = $dom->ancestors;
384 my $collection = $dom->ancestors('div ~ p');
385
386Find all ancestor elements of this node matching the CSS selector and return a
8563f527 387L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 388objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 389
390 # List tag names of ancestor elements
391 say $dom->ancestors->map('tag')->join("\n");
392
393=head2 append
394
395 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
396
c7cad649 397Append HTML/XML fragment to this node (for all node types other than C<root>).
d6512b50 398
399 # "<div><h1>Test</h1><h2>123</h2></div>"
400 $dom->parse('<div><h1>Test</h1></div>')
401 ->at('h1')->append('<h2>123</h2>')->root;
402
403 # "<p>Test 123</p>"
404 $dom->parse('<p>Test</p>')->at('p')
405 ->child_nodes->first->append(' 123')->root;
406
407=head2 append_content
408
409 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
410
411Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
412node's content.
413
414 # "<div><h1>Test123</h1></div>"
415 $dom->parse('<div><h1>Test</h1></div>')
416 ->at('h1')->append_content('123')->root;
417
418 # "<!-- Test 123 --><br>"
419 $dom->parse('<!-- Test --><br>')
420 ->child_nodes->first->append_content('123 ')->root;
421
422 # "<p>Test<i>123</i></p>"
423 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
424
425=head2 at
426
427 my $result = $dom->at('div ~ p');
428
429Find first descendant element of this element matching the CSS selector and
eb9737f2 430return it as a L<DOM::Tiny> object, or C<undef> if none could be found. All
431selectors listed in L</"SELECTORS"> are supported.
d6512b50 432
433 # Find first element with "svg" namespace definition
434 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
435
436=head2 attr
437
438 my $hash = $dom->attr;
439 my $foo = $dom->attr('foo');
440 $dom = $dom->attr({foo => 'bar'});
441 $dom = $dom->attr(foo => 'bar');
442
443This element's attributes.
444
445 # Remove an attribute
446 delete $dom->attr->{id};
447
448 # Attribute without value
449 $dom->attr(selected => undef);
450
451 # List id attributes
452 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
453
454=head2 child_nodes
455
456 my $collection = $dom->child_nodes;
457
8563f527 458Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 459element as L<DOM::Tiny> objects.
460
461 # "<p><b>123</b></p>"
462 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
463
464 # "<!DOCTYPE html>"
465 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
466
467 # " Test "
468 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
469
470=head2 children
471
472 my $collection = $dom->children;
473 my $collection = $dom->children('div ~ p');
474
475Find all child elements of this element matching the CSS selector and return a
8563f527 476L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 477objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 478
479 # Show tag name of random child element
480 say $dom->children->shuffle->first->tag;
481
482=head2 content
483
484 my $str = $dom->content;
485 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
486
487Return this node's content or replace it with HTML/XML fragment (for C<root>
488and C<tag> nodes) or raw content.
489
490 # "<b>Test</b>"
491 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
492
493 # "<div><h1>123</h1></div>"
494 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
495
496 # "<p><i>123</i></p>"
497 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
498
499 # "<div><h1></h1></div>"
500 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
501
502 # " Test "
503 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
504
505 # "<div><!-- 123 -->456</div>"
506 $dom->parse('<div><!-- Test -->456</div>')
507 ->at('div')->child_nodes->first->content(' 123 ')->root;
508
509=head2 descendant_nodes
510
511 my $collection = $dom->descendant_nodes;
512
8563f527 513Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 514this element as L<DOM::Tiny> objects.
515
516 # "<p><b>123</b></p>"
517 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
518 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
519 ->map('remove')->first;
520
521 # "<p><b>test</b>test</p>"
522 $dom->parse('<p><b>123</b>456</p>')
523 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
524 ->map(content => 'test')->first->root;
525
526=head2 find
527
528 my $collection = $dom->find('div ~ p');
529
530Find all descendant elements of this element matching the CSS selector and
8563f527 531return a L<collection|/"COLLECTION METHODS"> containing these elements as
9a5f1e3f 532L<DOM::Tiny> objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 533
534 # Find a specific element and extract information
535 my $id = $dom->find('div')->[23]{id};
536
537 # Extract information from multiple elements
538 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
539
540 # Count all the different tags
541 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
542
543 # Find elements with a class that contains dots
544 my @divs = $dom->find('div.foo\.bar')->each;
545
546=head2 following
547
548 my $collection = $dom->following;
549 my $collection = $dom->following('div ~ p');
550
551Find all sibling elements after this node matching the CSS selector and return
8563f527 552a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 553objects. All selectors listen in L</"SELECTORS"> are supported.
d6512b50 554
555 # List tags of sibling elements after this node
556 say $dom->following->map('tag')->join("\n");
557
558=head2 following_nodes
559
560 my $collection = $dom->following_nodes;
561
8563f527 562Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 563this node as L<DOM::Tiny> objects.
564
565 # "C"
566 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
567
568=head2 matches
569
570 my $bool = $dom->matches('div ~ p');
571
9a5f1e3f 572Check if this element matches the CSS selector. All selectors listed in
573L</"SELECTORS"> are supported.
d6512b50 574
575 # True
576 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
577 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
578
579 # False
580 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
581 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
582
583=head2 namespace
584
585 my $namespace = $dom->namespace;
586
eb9737f2 587Find this element's namespace, or return C<undef> if none could be found.
d6512b50 588
589 # Find namespace for an element with namespace prefix
590 my $namespace = $dom->at('svg > svg\:circle')->namespace;
591
592 # Find namespace for an element that may or may not have a namespace prefix
593 my $namespace = $dom->at('svg > circle')->namespace;
594
d6512b50 595=head2 next
596
597 my $sibling = $dom->next;
598
eb9737f2 599Return L<DOM::Tiny> object for next sibling element, or C<undef> if there are
600no more siblings.
d6512b50 601
602 # "<h2>123</h2>"
603 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
604
605=head2 next_node
606
607 my $sibling = $dom->next_node;
608
eb9737f2 609Return L<DOM::Tiny> object for next sibling node, or C<undef> if there are no
d6512b50 610more siblings.
611
612 # "456"
613 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
614 ->at('b')->next_node->next_node;
615
616 # " Test "
617 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
618 ->at('b')->next_node->content;
619
620=head2 parent
621
622 my $parent = $dom->parent;
623
eb9737f2 624Return L<DOM::Tiny> object for parent of this node, or C<undef> if this node
625has no parent.
626
627 # "<b><i>Test</i></b>"
628 $dom->parse('<p><b><i>Test</i></b></p>')->at('i')->parent;
d6512b50 629
630=head2 parse
631
632 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
633
9a5f1e3f 634Parse HTML/XML fragment.
d6512b50 635
636 # Parse XML
eb9737f2 637 my $dom = DOM::Tiny->new->xml(1)->parse('<foo>I ♥ DOM::Tiny!</foo>');
d6512b50 638
639=head2 preceding
640
641 my $collection = $dom->preceding;
642 my $collection = $dom->preceding('div ~ p');
643
644Find all sibling elements before this node matching the CSS selector and return
8563f527 645a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 646objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 647
648 # List tags of sibling elements before this node
649 say $dom->preceding->map('tag')->join("\n");
650
651=head2 preceding_nodes
652
653 my $collection = $dom->preceding_nodes;
654
8563f527 655Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
656before this node as L<DOM::Tiny> objects.
d6512b50 657
658 # "A"
659 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
660
661=head2 prepend
662
663 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
664
c7cad649 665Prepend HTML/XML fragment to this node (for all node types other than C<root>).
d6512b50 666
667 # "<div><h1>Test</h1><h2>123</h2></div>"
668 $dom->parse('<div><h2>123</h2></div>')
669 ->at('h2')->prepend('<h1>Test</h1>')->root;
670
671 # "<p>Test 123</p>"
672 $dom->parse('<p>123</p>')
673 ->at('p')->child_nodes->first->prepend('Test ')->root;
674
675=head2 prepend_content
676
677 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
678
679Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
680node's content.
681
682 # "<div><h2>Test123</h2></div>"
683 $dom->parse('<div><h2>123</h2></div>')
684 ->at('h2')->prepend_content('Test')->root;
685
686 # "<!-- Test 123 --><br>"
687 $dom->parse('<!-- 123 --><br>')
688 ->child_nodes->first->prepend_content(' Test')->root;
689
690 # "<p><i>123</i>Test</p>"
691 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
692
693=head2 previous
694
695 my $sibling = $dom->previous;
696
eb9737f2 697Return L<DOM::Tiny> object for previous sibling element, or C<undef> if there
d6512b50 698are no more siblings.
699
700 # "<h1>Test</h1>"
701 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
702
703=head2 previous_node
704
705 my $sibling = $dom->previous_node;
706
eb9737f2 707Return L<DOM::Tiny> object for previous sibling node, or C<undef> if there are
d6512b50 708no more siblings.
709
710 # "123"
711 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
712 ->at('b')->previous_node->previous_node;
713
714 # " Test "
715 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
716 ->at('b')->previous_node->content;
717
718=head2 remove
719
720 my $parent = $dom->remove;
721
722Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
723
724 # "<div></div>"
725 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
726
727 # "<p><b>456</b></p>"
728 $dom->parse('<p>123<b>456</b></p>')
729 ->at('p')->child_nodes->first->remove->root;
730
731=head2 replace
732
733 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
734
735Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
736nodes) or L</"parent">.
737
738 # "<div><h2>123</h2></div>"
739 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
740
741 # "<p><b>123</b></p>"
742 $dom->parse('<p>Test</p>')
743 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
744
745=head2 root
746
747 my $root = $dom->root;
748
749Return L<DOM::Tiny> object for C<root> node.
750
751=head2 strip
752
753 my $parent = $dom->strip;
754
755Remove this element while preserving its content and return L</"parent">.
756
757 # "<div>Test</div>"
758 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
759
760=head2 tag
761
762 my $tag = $dom->tag;
763 $dom = $dom->tag('div');
764
765This element's tag name.
766
767 # List tag names of child elements
768 say $dom->children->map('tag')->join("\n");
769
770=head2 tap
771
772 $dom = $dom->tap(sub {...});
773
e99ef07d 774Equivalent to L<Mojo::Base/"tap">.
d6512b50 775
776=head2 text
777
778 my $trimmed = $dom->text;
779 my $untrimmed = $dom->text(0);
780
781Extract text content from this element only (not including child elements),
782smart whitespace trimming is enabled by default.
783
784 # "foo baz"
785 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
786
787 # "foo\nbaz\n"
788 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
789
790=head2 to_string
791
792 my $str = $dom->to_string;
793
794Render this node and its content to HTML/XML.
795
796 # "<b>Test</b>"
797 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
798
799=head2 tree
800
801 my $tree = $dom->tree;
802 $dom = $dom->tree(['root']);
803
804Document Object Model. Note that this structure should only be used very
805carefully since it is very dynamic.
806
807=head2 type
808
809 my $type = $dom->type;
810
811This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
812C<root>, C<tag> or C<text>.
813
814 # "cdata"
815 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
816
817 # "comment"
818 $dom->parse('<!-- Test -->')->child_nodes->first->type;
819
820 # "doctype"
821 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
822
823 # "pi"
824 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
825
826 # "raw"
827 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
828
829 # "root"
830 $dom->parse('<p>Test</p>')->type;
831
832 # "tag"
833 $dom->parse('<p>Test</p>')->at('p')->type;
834
835 # "text"
836 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
837
838=head2 val
839
840 my $value = $dom->val;
841
842Extract value from form element (such as C<button>, C<input>, C<option>,
eb9737f2 843C<select> and C<textarea>), or return C<undef> if this element has no value. In
d6512b50 844the case of C<select> with C<multiple> attribute, find C<option> elements with
eb9737f2 845C<selected> attribute and return an array reference with all values, or
846C<undef> if none could be found.
d6512b50 847
848 # "a"
849 $dom->parse('<input name="test" value="a">')->at('input')->val;
850
851 # "b"
852 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
853
854 # "c"
855 $dom->parse('<option value="c">Test</option>')->at('option')->val;
856
857 # "d"
858 $dom->parse('<select><option selected>d</option></select>')
859 ->at('select')->val;
860
861 # "e"
862 $dom->parse('<select multiple><option selected>e</option></select>')
863 ->at('select')->val->[0];
864
865=head2 wrap
866
867 $dom = $dom->wrap('<div></div>');
868
c7cad649 869Wrap HTML/XML fragment around this node (for all node types other than C<root>),
870placing it as the last child of the first innermost element.
d6512b50 871
872 # "<p>123<b>Test</b></p>"
873 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
874
875 # "<div><p><b>Test</b></p>123</div>"
876 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
877
878 # "<p><b>Test</b></p><p>123</p>"
879 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
880
881 # "<p><b>Test</b></p>"
882 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
883
884=head2 wrap_content
885
886 $dom = $dom->wrap_content('<div></div>');
887
c7cad649 888Wrap HTML/XML fragment around this node's content (for C<root> and C<tag>
889nodes), placing it as the last children of the first innermost element.
d6512b50 890
891 # "<p><b>123Test</b></p>"
892 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
893
894 # "<p><b>Test</b></p><p>123</p>"
895 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
896
897=head2 xml
898
899 my $bool = $dom->xml;
900 $dom = $dom->xml($bool);
901
902Disable HTML semantics in parser and activate case-sensitivity, defaults to
eb9737f2 903auto detection based on XML declarations.
d6512b50 904
78ba4051 905=head1 COLLECTION METHODS
906
9a5f1e3f 907Some L<DOM::Tiny> methods return an array-based collection object based on
908L<Mojo::Collection>, which can either be accessed directly as an array
909reference, or with the following methods.
78ba4051 910
911 # Chain methods
912 $collection->map(sub { ucfirst })->shuffle->each(sub {
913 my ($word, $num) = @_;
914 say "$num: $word";
915 });
916
917 # Access array directly to manipulate collection
918 $collection->[23] += 100;
919 say for @$collection;
920
921=head2 compact
922
923 my $new = $collection->compact;
924
925Create a new collection with all elements that are defined and not an empty
926string.
927
928 # $collection contains (0, 1, undef, 2, '', 3)
929 $collection->compact->join(', '); # "0, 1, 2, 3"
930
931=head2 each
932
933 my @elements = $collection->each;
934 $collection = $collection->each(sub {...});
935
936Evaluate callback for each element in collection or return all elements as a
937list if none has been provided. The element will be the first argument passed
938to the callback and is also available as C<$_>.
939
940 # Make a numbered list
941 $collection->each(sub {
942 my ($e, $num) = @_;
943 say "$num: $e";
944 });
945
946=head2 first
947
948 my $first = $collection->first;
949 my $first = $collection->first(qr/foo/);
950 my $first = $collection->first(sub {...});
951 my $first = $collection->first($method);
952 my $first = $collection->first($method, @args);
953
954Evaluate regular expression/callback for, or call method on, each element in
955collection and return the first one that matched the regular expression, or for
956which the callback/method returned true. The element will be the first argument
957passed to the callback and is also available as C<$_>.
958
959 # Longer version
960 my $first = $collection->first(sub { $_->$method(@args) });
961
c7bc4d31 962 # Find first value that contains the word "tiny"
963 my $interesting = $collection->first(qr/tiny/i);
78ba4051 964
965 # Find first value that is greater than 5
966 my $greater = $collection->first(sub { $_ > 5 });
967
968=head2 flatten
969
970 my $new = $collection->flatten;
971
972Flatten nested collections/arrays recursively and create a new collection with
973all elements.
974
975 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
976 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
977
978=head2 grep
979
980 my $new = $collection->grep(qr/foo/);
981 my $new = $collection->grep(sub {...});
982 my $new = $collection->grep($method);
983 my $new = $collection->grep($method, @args);
984
985Evaluate regular expression/callback for, or call method on, each element in
986collection and create a new collection with all elements that matched the
987regular expression, or for which the callback/method returned true. The element
988will be the first argument passed to the callback and is also available as
989C<$_>.
990
991 # Longer version
992 my $new = $collection->grep(sub { $_->$method(@args) });
993
c7bc4d31 994 # Find all values that contain the word "tiny"
995 my $interesting = $collection->grep(qr/tiny/i);
78ba4051 996
997 # Find all values that are greater than 5
998 my $greater = $collection->grep(sub { $_ > 5 });
999
1000=head2 join
1001
1002 my $stream = $collection->join;
1003 my $stream = $collection->join("\n");
1004
1005Turn collection into string.
1006
1007 # Join all values with commas
1008 $collection->join(', ');
1009
1010=head2 last
1011
1012 my $last = $collection->last;
1013
1014Return the last element in collection.
1015
1016=head2 map
1017
1018 my $new = $collection->map(sub {...});
1019 my $new = $collection->map($method);
1020 my $new = $collection->map($method, @args);
1021
1022Evaluate callback for, or call method on, each element in collection and create
1023a new collection from the results. The element will be the first argument
1024passed to the callback and is also available as C<$_>.
1025
1026 # Longer version
1027 my $new = $collection->map(sub { $_->$method(@args) });
1028
c7bc4d31 1029 # Append the word "tiny" to all values
1030 my $domified = $collection->map(sub { $_ . 'tiny' });
78ba4051 1031
1032=head2 reduce
1033
1034 my $result = $collection->reduce(sub {...});
1035 my $result = $collection->reduce(sub {...}, $initial);
1036
1037Reduce elements in collection with callback, the first element will be used as
1038initial value if none has been provided.
1039
1040 # Calculate the sum of all values
1041 my $sum = $collection->reduce(sub { $a + $b });
1042
1043 # Count how often each value occurs in collection
1044 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
1045
1046=head2 reverse
1047
1048 my $new = $collection->reverse;
1049
1050Create a new collection with all elements in reverse order.
1051
1052=head2 slice
1053
1054 my $new = $collection->slice(4 .. 7);
1055
1056Create a new collection with all selected elements.
1057
1058 # $collection contains ('A', 'B', 'C', 'D', 'E')
1059 $collection->slice(1, 2, 4)->join(' '); # "B C E"
1060
1061=head2 shuffle
1062
1063 my $new = $collection->shuffle;
1064
1065Create a new collection with all elements in random order.
1066
1067=head2 size
1068
1069 my $size = $collection->size;
1070
1071Number of elements in collection.
1072
1073=head2 sort
1074
1075 my $new = $collection->sort;
1076 my $new = $collection->sort(sub {...});
1077
1078Sort elements based on return value of callback and create a new collection
1079from the results.
1080
1081 # Sort values case-insensitive
1082 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
1083
1084=head2 tap
1085
1086 $collection = $collection->tap(sub {...});
1087
1088Equivalent to L<Mojo::Base/"tap">.
1089
1090=head2 to_array
1091
1092 my $array = $collection->to_array;
1093
1094Turn collection into array reference.
1095
1096=head2 uniq
1097
1098 my $new = $collection->uniq;
1099 my $new = $collection->uniq(sub {...});
1100 my $new = $collection->uniq($method);
1101 my $new = $collection->uniq($method, @args);
1102
1103Create a new collection without duplicate elements, using the string
1104representation of either the elements or the return value of the
1105callback/method.
1106
1107 # Longer version
1108 my $new = $collection->uniq(sub { $_->$method(@args) });
1109
1110 # $collection contains ('foo', 'bar', 'bar', 'baz')
1111 $collection->uniq->join(' '); # "foo bar baz"
1112
1113 # $collection contains ([1, 2], [2, 1], [3, 2])
1114 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
1115
d6512b50 1116=head1 BUGS
1117
1118Report any issues on the public bugtracker.
1119
1120=head1 AUTHOR
1121
1122Dan Book <dbook@cpan.org>
1123
9ba11a91 1124Code and tests adapted from L<Mojo::DOM>, a lightweight DOM parser by the L<Mojolicious> team.
7218d584 1125
2d9f5165 1126=head1 CONTRIBUTORS
1127
1128=over
1129
1130=item Matt S Trout (mst)
1131
1132=back
1133
d6512b50 1134=head1 COPYRIGHT AND LICENSE
1135
9ba11a91 1136Copyright (c) 2008-2015 Sebastian Riedel.
1137
1138Copyright (c) 2015 L</"AUTHOR"> and L</"CONTRIBUTORS"> for adaptation to standalone format.
d6512b50 1139
1140This is free software, licensed under:
1141
1142 The Artistic License 2.0 (GPL Compatible)
1143
1144=head1 SEE ALSO
1145
31877452 1146L<Mojo::DOM>, L<HTML::TreeBuilder>, L<XML::LibXML>, L<XML::Twig>, L<XML::Smart>
d6512b50 1147
9a5f1e3f 1148=for Pod::Coverage TO_JSON
1149
d6512b50 1150=cut