better example text
[catagits/DOM-Tiny.git] / README.pod
CommitLineData
d6512b50 1=pod
2
3=encoding utf8
4
5=head1 NAME
6
7DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
8
9=head1 SYNOPSIS
10
11 use DOM::Tiny;
12
13 # Parse
14 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
15
16 # Find
17 say $dom->at('#b')->text;
18 say $dom->find('p')->map('text')->join("\n");
19 say $dom->find('[id]')->map(attr => 'id')->join("\n");
20
21 # Iterate
22 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
23
24 # Loop
25 for my $e ($dom->find('p[id]')->each) {
26 say $e->{id}, ':', $e->text;
27 }
28
29 # Modify
30 $dom->find('div p')->last->append('<p id="c">456</p>');
31 $dom->find(':not(p)')->map('strip');
32
33 # Render
34 say "$dom";
35
36=head1 DESCRIPTION
37
9a5f1e3f 38L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
39on L<Mojo::DOM>. It supports the L<HTML Living Standard|https://html.spec.whatwg.org/>
40and L<Extensible Markup Language (XML) 1.0|http://www.w3.org/TR/xml/>, and
41matching based on L<CSS3 selectors|http://www.w3.org/TR/selectors/>. It will
42even try to interpret broken HTML and XML, so you should not use it for
5a70ee9d 43validation.
d6512b50 44
45=head1 NODES AND ELEMENTS
46
47When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
48
49 <!DOCTYPE html>
50 <html>
51 <head><title>Hello</title></head>
52 <body>World!</body>
53 </html>
54
55There are currently eight different kinds of nodes, C<cdata>, C<comment>,
56C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
57the type C<tag>.
58
59 root
60 |- doctype (html)
61 +- tag (html)
62 |- tag (head)
63 | +- tag (title)
64 | +- raw (Hello)
65 +- tag (body)
66 +- text (World!)
67
68While all node types are represented as L<DOM::Tiny> objects, some methods like
69L</"attr"> and L</"namespace"> only apply to elements.
70
71=head1 CASE-SENSITIVITY
72
73L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
74names are lowercased and selectors need to be lowercase as well.
75
76 # HTML semantics
77 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
78 say $dom->at('p[id]')->text;
79
80If XML processing instructions are found, the parser will automatically switch
81into XML mode and everything becomes case-sensitive.
82
83 # XML semantics
84 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
85 say $dom->at('P[ID]')->text;
86
87XML detection can also be disabled with the L</"xml"> method.
88
89 # Force XML semantics
90 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
91 say $dom->at('P[ID]')->text;
92
93 # Force HTML semantics
94 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
95 say $dom->at('p[id]')->text;
96
63873d67 97=head1 SELECTORS
98
99L<DOM::Tiny> uses a CSS selector engine based on L<Mojo::DOM::CSS>. All CSS
100selectors that make sense for a standalone parser are supported.
101
102=head2 *
103
104Any element.
105
106 my $all = $dom->find('*');
107
108=head2 E
109
110An element of type C<E>.
111
112 my $title = $dom->at('title');
113
114=head2 E[foo]
115
116An C<E> element with a C<foo> attribute.
117
118 my $links = $dom->find('a[href]');
119
120=head2 E[foo="bar"]
121
122An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
123
124 my $case_sensitive = $dom->find('input[type="hidden"]');
125 my $case_sensitive = $dom->find('input[type=hidden]');
126
127=head2 E[foo="bar" i]
128
129An C<E> element whose C<foo> attribute value is exactly equal to any
130(ASCII-range) case-permutation of C<bar>. Note that this selector is
131EXPERIMENTAL and might change without warning!
132
133 my $case_insensitive = $dom->find('input[type="hidden" i]');
134 my $case_insensitive = $dom->find('input[type=hidden i]');
135 my $case_insensitive = $dom->find('input[class~="foo" i]');
136
137This selector is part of
138L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
139in progress.
140
141=head2 E[foo~="bar"]
142
143An C<E> element whose C<foo> attribute value is a list of whitespace-separated
144values, one of which is exactly equal to C<bar>.
145
146 my $foo = $dom->find('input[class~="foo"]');
147 my $foo = $dom->find('input[class~=foo]');
148
149=head2 E[foo^="bar"]
150
151An C<E> element whose C<foo> attribute value begins exactly with the string
152C<bar>.
153
154 my $begins_with = $dom->find('input[name^="f"]');
155 my $begins_with = $dom->find('input[name^=f]');
156
157=head2 E[foo$="bar"]
158
159An C<E> element whose C<foo> attribute value ends exactly with the string
160C<bar>.
161
162 my $ends_with = $dom->find('input[name$="o"]');
163 my $ends_with = $dom->find('input[name$=o]');
164
165=head2 E[foo*="bar"]
166
167An C<E> element whose C<foo> attribute value contains the substring C<bar>.
168
169 my $contains = $dom->find('input[name*="fo"]');
170 my $contains = $dom->find('input[name*=fo]');
171
172=head2 E:root
173
174An C<E> element, root of the document.
175
176 my $root = $dom->at(':root');
177
178=head2 E:nth-child(n)
179
180An C<E> element, the C<n-th> child of its parent.
181
182 my $third = $dom->find('div:nth-child(3)');
183 my $odd = $dom->find('div:nth-child(odd)');
184 my $even = $dom->find('div:nth-child(even)');
185 my $top3 = $dom->find('div:nth-child(-n+3)');
186
187=head2 E:nth-last-child(n)
188
189An C<E> element, the C<n-th> child of its parent, counting from the last one.
190
191 my $third = $dom->find('div:nth-last-child(3)');
192 my $odd = $dom->find('div:nth-last-child(odd)');
193 my $even = $dom->find('div:nth-last-child(even)');
194 my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
195
196=head2 E:nth-of-type(n)
197
198An C<E> element, the C<n-th> sibling of its type.
199
200 my $third = $dom->find('div:nth-of-type(3)');
201 my $odd = $dom->find('div:nth-of-type(odd)');
202 my $even = $dom->find('div:nth-of-type(even)');
203 my $top3 = $dom->find('div:nth-of-type(-n+3)');
204
205=head2 E:nth-last-of-type(n)
206
207An C<E> element, the C<n-th> sibling of its type, counting from the last one.
208
209 my $third = $dom->find('div:nth-last-of-type(3)');
210 my $odd = $dom->find('div:nth-last-of-type(odd)');
211 my $even = $dom->find('div:nth-last-of-type(even)');
212 my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
213
214=head2 E:first-child
215
216An C<E> element, first child of its parent.
217
218 my $first = $dom->find('div p:first-child');
219
220=head2 E:last-child
221
222An C<E> element, last child of its parent.
223
224 my $last = $dom->find('div p:last-child');
225
226=head2 E:first-of-type
227
228An C<E> element, first sibling of its type.
229
230 my $first = $dom->find('div p:first-of-type');
231
232=head2 E:last-of-type
233
234An C<E> element, last sibling of its type.
235
236 my $last = $dom->find('div p:last-of-type');
237
238=head2 E:only-child
239
240An C<E> element, only child of its parent.
241
242 my $lonely = $dom->find('div p:only-child');
243
244=head2 E:only-of-type
245
246An C<E> element, only sibling of its type.
247
248 my $lonely = $dom->find('div p:only-of-type');
249
250=head2 E:empty
251
252An C<E> element that has no children (including text nodes).
253
254 my $empty = $dom->find(':empty');
255
256=head2 E:checked
257
258A user interface element C<E> which is checked (for instance a radio-button or
259checkbox).
260
261 my $input = $dom->find(':checked');
262
263=head2 E.warning
264
265An C<E> element whose class is "warning".
266
267 my $warning = $dom->find('div.warning');
268
269=head2 E#myid
270
271An C<E> element with C<ID> equal to "myid".
272
273 my $foo = $dom->at('div#foo');
274
275=head2 E:not(s)
276
277An C<E> element that does not match simple selector C<s>.
278
279 my $others = $dom->find('div p:not(:first-child)');
280
281=head2 E F
282
283An C<F> element descendant of an C<E> element.
284
285 my $headlines = $dom->find('div h1');
286
287=head2 E E<gt> F
288
289An C<F> element child of an C<E> element.
290
291 my $headlines = $dom->find('html > body > div > h1');
292
293=head2 E + F
294
295An C<F> element immediately preceded by an C<E> element.
296
297 my $second = $dom->find('h1 + h2');
298
299=head2 E ~ F
300
301An C<F> element preceded by an C<E> element.
302
303 my $second = $dom->find('h1 ~ h2');
304
305=head2 E, F, G
306
307Elements of type C<E>, C<F> and C<G>.
308
309 my $headlines = $dom->find('h1, h2, h3');
310
311=head2 E[foo=bar][bar=baz]
312
313An C<E> element whose attributes match all following attribute selectors.
314
315 my $links = $dom->find('a[foo^=b][foo$=ar]');
316
9a5f1e3f 317=head1 OPERATORS
318
319L<DOM::Tiny> overloads the following operators.
320
321=head2 array
322
323 my @nodes = @$dom;
324
325Alias for L</"child_nodes">.
326
327 # "<!-- Test -->"
328 $dom->parse('<!-- Test --><b>123</b>')->[0];
329
330=head2 bool
331
332 my $bool = !!$dom;
333
334Always true.
335
336=head2 hash
337
338 my %attrs = %$dom;
339
340Alias for L</"attr">.
341
342 # "test"
343 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
344
345=head2 stringify
346
347 my $str = "$dom";
348
349Alias for L</"to_string">.
350
d6512b50 351=head1 METHODS
352
353L<DOM::Tiny> implements the following methods.
354
3793c28f 355=head2 new
356
357 my $dom = DOM::Tiny->new;
358 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
359
360Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
361fragment if necessary.
362
d6512b50 363=head2 all_text
364
365 my $trimmed = $dom->all_text;
366 my $untrimmed = $dom->all_text(0);
367
368Extract text content from all descendant nodes of this element, smart
369whitespace trimming is enabled by default.
370
371 # "foo bar baz"
372 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
373
374 # "foo\nbarbaz\n"
375 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
376
377=head2 ancestors
378
379 my $collection = $dom->ancestors;
380 my $collection = $dom->ancestors('div ~ p');
381
382Find all ancestor elements of this node matching the CSS selector and return a
8563f527 383L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 384objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 385
386 # List tag names of ancestor elements
387 say $dom->ancestors->map('tag')->join("\n");
388
389=head2 append
390
391 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
392
393Append HTML/XML fragment to this node.
394
395 # "<div><h1>Test</h1><h2>123</h2></div>"
396 $dom->parse('<div><h1>Test</h1></div>')
397 ->at('h1')->append('<h2>123</h2>')->root;
398
399 # "<p>Test 123</p>"
400 $dom->parse('<p>Test</p>')->at('p')
401 ->child_nodes->first->append(' 123')->root;
402
403=head2 append_content
404
405 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
406
407Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
408node's content.
409
410 # "<div><h1>Test123</h1></div>"
411 $dom->parse('<div><h1>Test</h1></div>')
412 ->at('h1')->append_content('123')->root;
413
414 # "<!-- Test 123 --><br>"
415 $dom->parse('<!-- Test --><br>')
416 ->child_nodes->first->append_content('123 ')->root;
417
418 # "<p>Test<i>123</i></p>"
419 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
420
421=head2 at
422
423 my $result = $dom->at('div ~ p');
424
425Find first descendant element of this element matching the CSS selector and
426return it as a L<DOM::Tiny> object or return C<undef> if none could be found.
9a5f1e3f 427All selectors listed in L</"SELECTORS"> are supported.
d6512b50 428
429 # Find first element with "svg" namespace definition
430 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
431
432=head2 attr
433
434 my $hash = $dom->attr;
435 my $foo = $dom->attr('foo');
436 $dom = $dom->attr({foo => 'bar'});
437 $dom = $dom->attr(foo => 'bar');
438
439This element's attributes.
440
441 # Remove an attribute
442 delete $dom->attr->{id};
443
444 # Attribute without value
445 $dom->attr(selected => undef);
446
447 # List id attributes
448 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
449
450=head2 child_nodes
451
452 my $collection = $dom->child_nodes;
453
8563f527 454Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 455element as L<DOM::Tiny> objects.
456
457 # "<p><b>123</b></p>"
458 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
459
460 # "<!DOCTYPE html>"
461 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
462
463 # " Test "
464 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
465
466=head2 children
467
468 my $collection = $dom->children;
469 my $collection = $dom->children('div ~ p');
470
471Find all child elements of this element matching the CSS selector and return a
8563f527 472L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 473objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 474
475 # Show tag name of random child element
476 say $dom->children->shuffle->first->tag;
477
478=head2 content
479
480 my $str = $dom->content;
481 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
482
483Return this node's content or replace it with HTML/XML fragment (for C<root>
484and C<tag> nodes) or raw content.
485
486 # "<b>Test</b>"
487 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
488
489 # "<div><h1>123</h1></div>"
490 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
491
492 # "<p><i>123</i></p>"
493 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
494
495 # "<div><h1></h1></div>"
496 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
497
498 # " Test "
499 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
500
501 # "<div><!-- 123 -->456</div>"
502 $dom->parse('<div><!-- Test -->456</div>')
503 ->at('div')->child_nodes->first->content(' 123 ')->root;
504
505=head2 descendant_nodes
506
507 my $collection = $dom->descendant_nodes;
508
8563f527 509Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 510this element as L<DOM::Tiny> objects.
511
512 # "<p><b>123</b></p>"
513 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
514 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
515 ->map('remove')->first;
516
517 # "<p><b>test</b>test</p>"
518 $dom->parse('<p><b>123</b>456</p>')
519 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
520 ->map(content => 'test')->first->root;
521
522=head2 find
523
524 my $collection = $dom->find('div ~ p');
525
526Find all descendant elements of this element matching the CSS selector and
8563f527 527return a L<collection|/"COLLECTION METHODS"> containing these elements as
9a5f1e3f 528L<DOM::Tiny> objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 529
530 # Find a specific element and extract information
531 my $id = $dom->find('div')->[23]{id};
532
533 # Extract information from multiple elements
534 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
535
536 # Count all the different tags
537 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
538
539 # Find elements with a class that contains dots
540 my @divs = $dom->find('div.foo\.bar')->each;
541
542=head2 following
543
544 my $collection = $dom->following;
545 my $collection = $dom->following('div ~ p');
546
547Find all sibling elements after this node matching the CSS selector and return
8563f527 548a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 549objects. All selectors listen in L</"SELECTORS"> are supported.
d6512b50 550
551 # List tags of sibling elements after this node
552 say $dom->following->map('tag')->join("\n");
553
554=head2 following_nodes
555
556 my $collection = $dom->following_nodes;
557
8563f527 558Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 559this node as L<DOM::Tiny> objects.
560
561 # "C"
562 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
563
564=head2 matches
565
566 my $bool = $dom->matches('div ~ p');
567
9a5f1e3f 568Check if this element matches the CSS selector. All selectors listed in
569L</"SELECTORS"> are supported.
d6512b50 570
571 # True
572 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
573 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
574
575 # False
576 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
577 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
578
579=head2 namespace
580
581 my $namespace = $dom->namespace;
582
583Find this element's namespace or return C<undef> if none could be found.
584
585 # Find namespace for an element with namespace prefix
586 my $namespace = $dom->at('svg > svg\:circle')->namespace;
587
588 # Find namespace for an element that may or may not have a namespace prefix
589 my $namespace = $dom->at('svg > circle')->namespace;
590
d6512b50 591=head2 next
592
593 my $sibling = $dom->next;
594
595Return L<DOM::Tiny> object for next sibling element or C<undef> if there are no
596more siblings.
597
598 # "<h2>123</h2>"
599 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
600
601=head2 next_node
602
603 my $sibling = $dom->next_node;
604
605Return L<DOM::Tiny> object for next sibling node or C<undef> if there are no
606more siblings.
607
608 # "456"
609 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
610 ->at('b')->next_node->next_node;
611
612 # " Test "
613 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
614 ->at('b')->next_node->content;
615
616=head2 parent
617
618 my $parent = $dom->parent;
619
620Return L<DOM::Tiny> object for parent of this node or C<undef> if this node has
621no parent.
622
623=head2 parse
624
625 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
626
9a5f1e3f 627Parse HTML/XML fragment.
d6512b50 628
629 # Parse XML
630 my $dom = DOM::Tiny->new->xml(1)->parse($xml);
631
632=head2 preceding
633
634 my $collection = $dom->preceding;
635 my $collection = $dom->preceding('div ~ p');
636
637Find all sibling elements before this node matching the CSS selector and return
8563f527 638a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 639objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 640
641 # List tags of sibling elements before this node
642 say $dom->preceding->map('tag')->join("\n");
643
644=head2 preceding_nodes
645
646 my $collection = $dom->preceding_nodes;
647
8563f527 648Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
649before this node as L<DOM::Tiny> objects.
d6512b50 650
651 # "A"
652 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
653
654=head2 prepend
655
656 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
657
658Prepend HTML/XML fragment to this node.
659
660 # "<div><h1>Test</h1><h2>123</h2></div>"
661 $dom->parse('<div><h2>123</h2></div>')
662 ->at('h2')->prepend('<h1>Test</h1>')->root;
663
664 # "<p>Test 123</p>"
665 $dom->parse('<p>123</p>')
666 ->at('p')->child_nodes->first->prepend('Test ')->root;
667
668=head2 prepend_content
669
670 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
671
672Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
673node's content.
674
675 # "<div><h2>Test123</h2></div>"
676 $dom->parse('<div><h2>123</h2></div>')
677 ->at('h2')->prepend_content('Test')->root;
678
679 # "<!-- Test 123 --><br>"
680 $dom->parse('<!-- 123 --><br>')
681 ->child_nodes->first->prepend_content(' Test')->root;
682
683 # "<p><i>123</i>Test</p>"
684 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
685
686=head2 previous
687
688 my $sibling = $dom->previous;
689
690Return L<DOM::Tiny> object for previous sibling element or C<undef> if there
691are no more siblings.
692
693 # "<h1>Test</h1>"
694 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
695
696=head2 previous_node
697
698 my $sibling = $dom->previous_node;
699
700Return L<DOM::Tiny> object for previous sibling node or C<undef> if there are
701no more siblings.
702
703 # "123"
704 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
705 ->at('b')->previous_node->previous_node;
706
707 # " Test "
708 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
709 ->at('b')->previous_node->content;
710
711=head2 remove
712
713 my $parent = $dom->remove;
714
715Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
716
717 # "<div></div>"
718 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
719
720 # "<p><b>456</b></p>"
721 $dom->parse('<p>123<b>456</b></p>')
722 ->at('p')->child_nodes->first->remove->root;
723
724=head2 replace
725
726 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
727
728Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
729nodes) or L</"parent">.
730
731 # "<div><h2>123</h2></div>"
732 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
733
734 # "<p><b>123</b></p>"
735 $dom->parse('<p>Test</p>')
736 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
737
738=head2 root
739
740 my $root = $dom->root;
741
742Return L<DOM::Tiny> object for C<root> node.
743
744=head2 strip
745
746 my $parent = $dom->strip;
747
748Remove this element while preserving its content and return L</"parent">.
749
750 # "<div>Test</div>"
751 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
752
753=head2 tag
754
755 my $tag = $dom->tag;
756 $dom = $dom->tag('div');
757
758This element's tag name.
759
760 # List tag names of child elements
761 say $dom->children->map('tag')->join("\n");
762
763=head2 tap
764
765 $dom = $dom->tap(sub {...});
766
e99ef07d 767Equivalent to L<Mojo::Base/"tap">.
d6512b50 768
769=head2 text
770
771 my $trimmed = $dom->text;
772 my $untrimmed = $dom->text(0);
773
774Extract text content from this element only (not including child elements),
775smart whitespace trimming is enabled by default.
776
777 # "foo baz"
778 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
779
780 # "foo\nbaz\n"
781 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
782
783=head2 to_string
784
785 my $str = $dom->to_string;
786
787Render this node and its content to HTML/XML.
788
789 # "<b>Test</b>"
790 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
791
792=head2 tree
793
794 my $tree = $dom->tree;
795 $dom = $dom->tree(['root']);
796
797Document Object Model. Note that this structure should only be used very
798carefully since it is very dynamic.
799
800=head2 type
801
802 my $type = $dom->type;
803
804This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
805C<root>, C<tag> or C<text>.
806
807 # "cdata"
808 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
809
810 # "comment"
811 $dom->parse('<!-- Test -->')->child_nodes->first->type;
812
813 # "doctype"
814 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
815
816 # "pi"
817 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
818
819 # "raw"
820 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
821
822 # "root"
823 $dom->parse('<p>Test</p>')->type;
824
825 # "tag"
826 $dom->parse('<p>Test</p>')->at('p')->type;
827
828 # "text"
829 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
830
831=head2 val
832
833 my $value = $dom->val;
834
835Extract value from form element (such as C<button>, C<input>, C<option>,
836C<select> and C<textarea>) or return C<undef> if this element has no value. In
837the case of C<select> with C<multiple> attribute, find C<option> elements with
838C<selected> attribute and return an array reference with all values or C<undef>
839if none could be found.
840
841 # "a"
842 $dom->parse('<input name="test" value="a">')->at('input')->val;
843
844 # "b"
845 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
846
847 # "c"
848 $dom->parse('<option value="c">Test</option>')->at('option')->val;
849
850 # "d"
851 $dom->parse('<select><option selected>d</option></select>')
852 ->at('select')->val;
853
854 # "e"
855 $dom->parse('<select multiple><option selected>e</option></select>')
856 ->at('select')->val->[0];
857
858=head2 wrap
859
860 $dom = $dom->wrap('<div></div>');
861
862Wrap HTML/XML fragment around this node, placing it as the last child of the
863first innermost element.
864
865 # "<p>123<b>Test</b></p>"
866 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
867
868 # "<div><p><b>Test</b></p>123</div>"
869 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
870
871 # "<p><b>Test</b></p><p>123</p>"
872 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
873
874 # "<p><b>Test</b></p>"
875 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
876
877=head2 wrap_content
878
879 $dom = $dom->wrap_content('<div></div>');
880
881Wrap HTML/XML fragment around this node's content, placing it as the last
882children of the first innermost element.
883
884 # "<p><b>123Test</b></p>"
885 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
886
887 # "<p><b>Test</b></p><p>123</p>"
888 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
889
890=head2 xml
891
892 my $bool = $dom->xml;
893 $dom = $dom->xml($bool);
894
895Disable HTML semantics in parser and activate case-sensitivity, defaults to
896auto detection based on processing instructions.
897
78ba4051 898=head1 COLLECTION METHODS
899
9a5f1e3f 900Some L<DOM::Tiny> methods return an array-based collection object based on
901L<Mojo::Collection>, which can either be accessed directly as an array
902reference, or with the following methods.
78ba4051 903
904 # Chain methods
905 $collection->map(sub { ucfirst })->shuffle->each(sub {
906 my ($word, $num) = @_;
907 say "$num: $word";
908 });
909
910 # Access array directly to manipulate collection
911 $collection->[23] += 100;
912 say for @$collection;
913
914=head2 compact
915
916 my $new = $collection->compact;
917
918Create a new collection with all elements that are defined and not an empty
919string.
920
921 # $collection contains (0, 1, undef, 2, '', 3)
922 $collection->compact->join(', '); # "0, 1, 2, 3"
923
924=head2 each
925
926 my @elements = $collection->each;
927 $collection = $collection->each(sub {...});
928
929Evaluate callback for each element in collection or return all elements as a
930list if none has been provided. The element will be the first argument passed
931to the callback and is also available as C<$_>.
932
933 # Make a numbered list
934 $collection->each(sub {
935 my ($e, $num) = @_;
936 say "$num: $e";
937 });
938
939=head2 first
940
941 my $first = $collection->first;
942 my $first = $collection->first(qr/foo/);
943 my $first = $collection->first(sub {...});
944 my $first = $collection->first($method);
945 my $first = $collection->first($method, @args);
946
947Evaluate regular expression/callback for, or call method on, each element in
948collection and return the first one that matched the regular expression, or for
949which the callback/method returned true. The element will be the first argument
950passed to the callback and is also available as C<$_>.
951
952 # Longer version
953 my $first = $collection->first(sub { $_->$method(@args) });
954
c7bc4d31 955 # Find first value that contains the word "tiny"
956 my $interesting = $collection->first(qr/tiny/i);
78ba4051 957
958 # Find first value that is greater than 5
959 my $greater = $collection->first(sub { $_ > 5 });
960
961=head2 flatten
962
963 my $new = $collection->flatten;
964
965Flatten nested collections/arrays recursively and create a new collection with
966all elements.
967
968 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
969 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
970
971=head2 grep
972
973 my $new = $collection->grep(qr/foo/);
974 my $new = $collection->grep(sub {...});
975 my $new = $collection->grep($method);
976 my $new = $collection->grep($method, @args);
977
978Evaluate regular expression/callback for, or call method on, each element in
979collection and create a new collection with all elements that matched the
980regular expression, or for which the callback/method returned true. The element
981will be the first argument passed to the callback and is also available as
982C<$_>.
983
984 # Longer version
985 my $new = $collection->grep(sub { $_->$method(@args) });
986
c7bc4d31 987 # Find all values that contain the word "tiny"
988 my $interesting = $collection->grep(qr/tiny/i);
78ba4051 989
990 # Find all values that are greater than 5
991 my $greater = $collection->grep(sub { $_ > 5 });
992
993=head2 join
994
995 my $stream = $collection->join;
996 my $stream = $collection->join("\n");
997
998Turn collection into string.
999
1000 # Join all values with commas
1001 $collection->join(', ');
1002
1003=head2 last
1004
1005 my $last = $collection->last;
1006
1007Return the last element in collection.
1008
1009=head2 map
1010
1011 my $new = $collection->map(sub {...});
1012 my $new = $collection->map($method);
1013 my $new = $collection->map($method, @args);
1014
1015Evaluate callback for, or call method on, each element in collection and create
1016a new collection from the results. The element will be the first argument
1017passed to the callback and is also available as C<$_>.
1018
1019 # Longer version
1020 my $new = $collection->map(sub { $_->$method(@args) });
1021
c7bc4d31 1022 # Append the word "tiny" to all values
1023 my $domified = $collection->map(sub { $_ . 'tiny' });
78ba4051 1024
1025=head2 reduce
1026
1027 my $result = $collection->reduce(sub {...});
1028 my $result = $collection->reduce(sub {...}, $initial);
1029
1030Reduce elements in collection with callback, the first element will be used as
1031initial value if none has been provided.
1032
1033 # Calculate the sum of all values
1034 my $sum = $collection->reduce(sub { $a + $b });
1035
1036 # Count how often each value occurs in collection
1037 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
1038
1039=head2 reverse
1040
1041 my $new = $collection->reverse;
1042
1043Create a new collection with all elements in reverse order.
1044
1045=head2 slice
1046
1047 my $new = $collection->slice(4 .. 7);
1048
1049Create a new collection with all selected elements.
1050
1051 # $collection contains ('A', 'B', 'C', 'D', 'E')
1052 $collection->slice(1, 2, 4)->join(' '); # "B C E"
1053
1054=head2 shuffle
1055
1056 my $new = $collection->shuffle;
1057
1058Create a new collection with all elements in random order.
1059
1060=head2 size
1061
1062 my $size = $collection->size;
1063
1064Number of elements in collection.
1065
1066=head2 sort
1067
1068 my $new = $collection->sort;
1069 my $new = $collection->sort(sub {...});
1070
1071Sort elements based on return value of callback and create a new collection
1072from the results.
1073
1074 # Sort values case-insensitive
1075 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
1076
1077=head2 tap
1078
1079 $collection = $collection->tap(sub {...});
1080
1081Equivalent to L<Mojo::Base/"tap">.
1082
1083=head2 to_array
1084
1085 my $array = $collection->to_array;
1086
1087Turn collection into array reference.
1088
1089=head2 uniq
1090
1091 my $new = $collection->uniq;
1092 my $new = $collection->uniq(sub {...});
1093 my $new = $collection->uniq($method);
1094 my $new = $collection->uniq($method, @args);
1095
1096Create a new collection without duplicate elements, using the string
1097representation of either the elements or the return value of the
1098callback/method.
1099
1100 # Longer version
1101 my $new = $collection->uniq(sub { $_->$method(@args) });
1102
1103 # $collection contains ('foo', 'bar', 'bar', 'baz')
1104 $collection->uniq->join(' '); # "foo bar baz"
1105
1106 # $collection contains ([1, 2], [2, 1], [3, 2])
1107 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
1108
d6512b50 1109=head1 BUGS
1110
1111Report any issues on the public bugtracker.
1112
1113=head1 AUTHOR
1114
1115Dan Book <dbook@cpan.org>
1116
1117=head1 COPYRIGHT AND LICENSE
1118
1119This software is Copyright (c) 2015 by Dan Book.
1120
1121This is free software, licensed under:
1122
1123 The Artistic License 2.0 (GPL Compatible)
1124
1125=head1 SEE ALSO
1126
31877452 1127L<Mojo::DOM>, L<HTML::TreeBuilder>, L<XML::LibXML>, L<XML::Twig>, L<XML::Smart>
d6512b50 1128
9a5f1e3f 1129=for Pod::Coverage TO_JSON
1130
d6512b50 1131=cut