perl 5.8 support (mst)
[catagits/DOM-Tiny.git] / README.pod
CommitLineData
d6512b50 1=pod
2
3=encoding utf8
4
5=head1 NAME
6
7DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
8
9=head1 SYNOPSIS
10
11 use DOM::Tiny;
12
13 # Parse
14 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
15
16 # Find
17 say $dom->at('#b')->text;
18 say $dom->find('p')->map('text')->join("\n");
19 say $dom->find('[id]')->map(attr => 'id')->join("\n");
20
21 # Iterate
22 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
23
24 # Loop
25 for my $e ($dom->find('p[id]')->each) {
26 say $e->{id}, ':', $e->text;
27 }
28
29 # Modify
30 $dom->find('div p')->last->append('<p id="c">456</p>');
31 $dom->find(':not(p)')->map('strip');
32
33 # Render
34 say "$dom";
35
36=head1 DESCRIPTION
37
9a5f1e3f 38L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
39on L<Mojo::DOM>. It supports the L<HTML Living Standard|https://html.spec.whatwg.org/>
40and L<Extensible Markup Language (XML) 1.0|http://www.w3.org/TR/xml/>, and
41matching based on L<CSS3 selectors|http://www.w3.org/TR/selectors/>. It will
42even try to interpret broken HTML and XML, so you should not use it for
5a70ee9d 43validation.
d6512b50 44
45=head1 NODES AND ELEMENTS
46
47When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
48
49 <!DOCTYPE html>
50 <html>
51 <head><title>Hello</title></head>
52 <body>World!</body>
53 </html>
54
55There are currently eight different kinds of nodes, C<cdata>, C<comment>,
56C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
57the type C<tag>.
58
59 root
60 |- doctype (html)
61 +- tag (html)
62 |- tag (head)
63 | +- tag (title)
64 | +- raw (Hello)
65 +- tag (body)
66 +- text (World!)
67
68While all node types are represented as L<DOM::Tiny> objects, some methods like
69L</"attr"> and L</"namespace"> only apply to elements.
70
71=head1 CASE-SENSITIVITY
72
73L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
74names are lowercased and selectors need to be lowercase as well.
75
76 # HTML semantics
77 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
78 say $dom->at('p[id]')->text;
79
80If XML processing instructions are found, the parser will automatically switch
81into XML mode and everything becomes case-sensitive.
82
83 # XML semantics
84 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
85 say $dom->at('P[ID]')->text;
86
87XML detection can also be disabled with the L</"xml"> method.
88
89 # Force XML semantics
90 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
91 say $dom->at('P[ID]')->text;
92
93 # Force HTML semantics
94 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
95 say $dom->at('p[id]')->text;
96
63873d67 97=head1 SELECTORS
98
99L<DOM::Tiny> uses a CSS selector engine based on L<Mojo::DOM::CSS>. All CSS
100selectors that make sense for a standalone parser are supported.
101
03eb5521 102=over
103
104=item Z<>*
63873d67 105
106Any element.
107
108 my $all = $dom->find('*');
109
03eb5521 110=item E
63873d67 111
112An element of type C<E>.
113
114 my $title = $dom->at('title');
115
03eb5521 116=item E[foo]
63873d67 117
118An C<E> element with a C<foo> attribute.
119
120 my $links = $dom->find('a[href]');
121
03eb5521 122=item E[foo="bar"]
63873d67 123
124An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
125
126 my $case_sensitive = $dom->find('input[type="hidden"]');
127 my $case_sensitive = $dom->find('input[type=hidden]');
128
03eb5521 129=item E[foo="bar" i]
63873d67 130
131An C<E> element whose C<foo> attribute value is exactly equal to any
132(ASCII-range) case-permutation of C<bar>. Note that this selector is
133EXPERIMENTAL and might change without warning!
134
135 my $case_insensitive = $dom->find('input[type="hidden" i]');
136 my $case_insensitive = $dom->find('input[type=hidden i]');
137 my $case_insensitive = $dom->find('input[class~="foo" i]');
138
139This selector is part of
140L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
141in progress.
142
03eb5521 143=item E[foo~="bar"]
63873d67 144
145An C<E> element whose C<foo> attribute value is a list of whitespace-separated
146values, one of which is exactly equal to C<bar>.
147
148 my $foo = $dom->find('input[class~="foo"]');
149 my $foo = $dom->find('input[class~=foo]');
150
03eb5521 151=item E[foo^="bar"]
63873d67 152
153An C<E> element whose C<foo> attribute value begins exactly with the string
154C<bar>.
155
156 my $begins_with = $dom->find('input[name^="f"]');
157 my $begins_with = $dom->find('input[name^=f]');
158
03eb5521 159=item E[foo$="bar"]
63873d67 160
161An C<E> element whose C<foo> attribute value ends exactly with the string
162C<bar>.
163
164 my $ends_with = $dom->find('input[name$="o"]');
165 my $ends_with = $dom->find('input[name$=o]');
166
03eb5521 167=item E[foo*="bar"]
63873d67 168
169An C<E> element whose C<foo> attribute value contains the substring C<bar>.
170
171 my $contains = $dom->find('input[name*="fo"]');
172 my $contains = $dom->find('input[name*=fo]');
173
03eb5521 174=item E:root
63873d67 175
176An C<E> element, root of the document.
177
178 my $root = $dom->at(':root');
179
03eb5521 180=item E:nth-child(n)
63873d67 181
182An C<E> element, the C<n-th> child of its parent.
183
184 my $third = $dom->find('div:nth-child(3)');
185 my $odd = $dom->find('div:nth-child(odd)');
186 my $even = $dom->find('div:nth-child(even)');
187 my $top3 = $dom->find('div:nth-child(-n+3)');
188
03eb5521 189=item E:nth-last-child(n)
63873d67 190
191An C<E> element, the C<n-th> child of its parent, counting from the last one.
192
193 my $third = $dom->find('div:nth-last-child(3)');
194 my $odd = $dom->find('div:nth-last-child(odd)');
195 my $even = $dom->find('div:nth-last-child(even)');
196 my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
197
03eb5521 198=item E:nth-of-type(n)
63873d67 199
200An C<E> element, the C<n-th> sibling of its type.
201
202 my $third = $dom->find('div:nth-of-type(3)');
203 my $odd = $dom->find('div:nth-of-type(odd)');
204 my $even = $dom->find('div:nth-of-type(even)');
205 my $top3 = $dom->find('div:nth-of-type(-n+3)');
206
03eb5521 207=item E:nth-last-of-type(n)
63873d67 208
209An C<E> element, the C<n-th> sibling of its type, counting from the last one.
210
211 my $third = $dom->find('div:nth-last-of-type(3)');
212 my $odd = $dom->find('div:nth-last-of-type(odd)');
213 my $even = $dom->find('div:nth-last-of-type(even)');
214 my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
215
03eb5521 216=item E:first-child
63873d67 217
218An C<E> element, first child of its parent.
219
220 my $first = $dom->find('div p:first-child');
221
03eb5521 222=item E:last-child
63873d67 223
224An C<E> element, last child of its parent.
225
226 my $last = $dom->find('div p:last-child');
227
03eb5521 228=item E:first-of-type
63873d67 229
230An C<E> element, first sibling of its type.
231
232 my $first = $dom->find('div p:first-of-type');
233
03eb5521 234=item E:last-of-type
63873d67 235
236An C<E> element, last sibling of its type.
237
238 my $last = $dom->find('div p:last-of-type');
239
03eb5521 240=item E:only-child
63873d67 241
242An C<E> element, only child of its parent.
243
244 my $lonely = $dom->find('div p:only-child');
245
03eb5521 246=item E:only-of-type
63873d67 247
248An C<E> element, only sibling of its type.
249
250 my $lonely = $dom->find('div p:only-of-type');
251
03eb5521 252=item E:empty
63873d67 253
254An C<E> element that has no children (including text nodes).
255
256 my $empty = $dom->find(':empty');
257
03eb5521 258=item E:checked
63873d67 259
260A user interface element C<E> which is checked (for instance a radio-button or
261checkbox).
262
263 my $input = $dom->find(':checked');
264
03eb5521 265=item E.warning
63873d67 266
267An C<E> element whose class is "warning".
268
269 my $warning = $dom->find('div.warning');
270
03eb5521 271=item E#myid
63873d67 272
273An C<E> element with C<ID> equal to "myid".
274
275 my $foo = $dom->at('div#foo');
276
03eb5521 277=item E:not(s)
63873d67 278
279An C<E> element that does not match simple selector C<s>.
280
281 my $others = $dom->find('div p:not(:first-child)');
282
03eb5521 283=item E F
63873d67 284
285An C<F> element descendant of an C<E> element.
286
287 my $headlines = $dom->find('div h1');
288
03eb5521 289=item E E<gt> F
63873d67 290
291An C<F> element child of an C<E> element.
292
293 my $headlines = $dom->find('html > body > div > h1');
294
03eb5521 295=item E + F
63873d67 296
297An C<F> element immediately preceded by an C<E> element.
298
299 my $second = $dom->find('h1 + h2');
300
03eb5521 301=item E ~ F
63873d67 302
303An C<F> element preceded by an C<E> element.
304
305 my $second = $dom->find('h1 ~ h2');
306
03eb5521 307=item E, F, G
63873d67 308
309Elements of type C<E>, C<F> and C<G>.
310
311 my $headlines = $dom->find('h1, h2, h3');
312
03eb5521 313=item E[foo=bar][bar=baz]
63873d67 314
315An C<E> element whose attributes match all following attribute selectors.
316
317 my $links = $dom->find('a[foo^=b][foo$=ar]');
318
03eb5521 319=back
320
9a5f1e3f 321=head1 OPERATORS
322
323L<DOM::Tiny> overloads the following operators.
324
325=head2 array
326
327 my @nodes = @$dom;
328
329Alias for L</"child_nodes">.
330
331 # "<!-- Test -->"
332 $dom->parse('<!-- Test --><b>123</b>')->[0];
333
334=head2 bool
335
336 my $bool = !!$dom;
337
338Always true.
339
340=head2 hash
341
342 my %attrs = %$dom;
343
344Alias for L</"attr">.
345
346 # "test"
347 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
348
349=head2 stringify
350
351 my $str = "$dom";
352
353Alias for L</"to_string">.
354
d6512b50 355=head1 METHODS
356
357L<DOM::Tiny> implements the following methods.
358
3793c28f 359=head2 new
360
361 my $dom = DOM::Tiny->new;
362 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
363
364Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
365fragment if necessary.
366
d6512b50 367=head2 all_text
368
369 my $trimmed = $dom->all_text;
370 my $untrimmed = $dom->all_text(0);
371
372Extract text content from all descendant nodes of this element, smart
373whitespace trimming is enabled by default.
374
375 # "foo bar baz"
376 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
377
378 # "foo\nbarbaz\n"
379 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
380
381=head2 ancestors
382
383 my $collection = $dom->ancestors;
384 my $collection = $dom->ancestors('div ~ p');
385
386Find all ancestor elements of this node matching the CSS selector and return a
8563f527 387L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 388objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 389
390 # List tag names of ancestor elements
391 say $dom->ancestors->map('tag')->join("\n");
392
393=head2 append
394
395 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
396
397Append HTML/XML fragment to this node.
398
399 # "<div><h1>Test</h1><h2>123</h2></div>"
400 $dom->parse('<div><h1>Test</h1></div>')
401 ->at('h1')->append('<h2>123</h2>')->root;
402
403 # "<p>Test 123</p>"
404 $dom->parse('<p>Test</p>')->at('p')
405 ->child_nodes->first->append(' 123')->root;
406
407=head2 append_content
408
409 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
410
411Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
412node's content.
413
414 # "<div><h1>Test123</h1></div>"
415 $dom->parse('<div><h1>Test</h1></div>')
416 ->at('h1')->append_content('123')->root;
417
418 # "<!-- Test 123 --><br>"
419 $dom->parse('<!-- Test --><br>')
420 ->child_nodes->first->append_content('123 ')->root;
421
422 # "<p>Test<i>123</i></p>"
423 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
424
425=head2 at
426
427 my $result = $dom->at('div ~ p');
428
429Find first descendant element of this element matching the CSS selector and
430return it as a L<DOM::Tiny> object or return C<undef> if none could be found.
9a5f1e3f 431All selectors listed in L</"SELECTORS"> are supported.
d6512b50 432
433 # Find first element with "svg" namespace definition
434 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
435
436=head2 attr
437
438 my $hash = $dom->attr;
439 my $foo = $dom->attr('foo');
440 $dom = $dom->attr({foo => 'bar'});
441 $dom = $dom->attr(foo => 'bar');
442
443This element's attributes.
444
445 # Remove an attribute
446 delete $dom->attr->{id};
447
448 # Attribute without value
449 $dom->attr(selected => undef);
450
451 # List id attributes
452 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
453
454=head2 child_nodes
455
456 my $collection = $dom->child_nodes;
457
8563f527 458Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 459element as L<DOM::Tiny> objects.
460
461 # "<p><b>123</b></p>"
462 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
463
464 # "<!DOCTYPE html>"
465 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
466
467 # " Test "
468 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
469
470=head2 children
471
472 my $collection = $dom->children;
473 my $collection = $dom->children('div ~ p');
474
475Find all child elements of this element matching the CSS selector and return a
8563f527 476L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 477objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 478
479 # Show tag name of random child element
480 say $dom->children->shuffle->first->tag;
481
482=head2 content
483
484 my $str = $dom->content;
485 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
486
487Return this node's content or replace it with HTML/XML fragment (for C<root>
488and C<tag> nodes) or raw content.
489
490 # "<b>Test</b>"
491 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
492
493 # "<div><h1>123</h1></div>"
494 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
495
496 # "<p><i>123</i></p>"
497 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
498
499 # "<div><h1></h1></div>"
500 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
501
502 # " Test "
503 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
504
505 # "<div><!-- 123 -->456</div>"
506 $dom->parse('<div><!-- Test -->456</div>')
507 ->at('div')->child_nodes->first->content(' 123 ')->root;
508
509=head2 descendant_nodes
510
511 my $collection = $dom->descendant_nodes;
512
8563f527 513Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 514this element as L<DOM::Tiny> objects.
515
516 # "<p><b>123</b></p>"
517 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
518 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
519 ->map('remove')->first;
520
521 # "<p><b>test</b>test</p>"
522 $dom->parse('<p><b>123</b>456</p>')
523 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
524 ->map(content => 'test')->first->root;
525
526=head2 find
527
528 my $collection = $dom->find('div ~ p');
529
530Find all descendant elements of this element matching the CSS selector and
8563f527 531return a L<collection|/"COLLECTION METHODS"> containing these elements as
9a5f1e3f 532L<DOM::Tiny> objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 533
534 # Find a specific element and extract information
535 my $id = $dom->find('div')->[23]{id};
536
537 # Extract information from multiple elements
538 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
539
540 # Count all the different tags
541 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
542
543 # Find elements with a class that contains dots
544 my @divs = $dom->find('div.foo\.bar')->each;
545
546=head2 following
547
548 my $collection = $dom->following;
549 my $collection = $dom->following('div ~ p');
550
551Find all sibling elements after this node matching the CSS selector and return
8563f527 552a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 553objects. All selectors listen in L</"SELECTORS"> are supported.
d6512b50 554
555 # List tags of sibling elements after this node
556 say $dom->following->map('tag')->join("\n");
557
558=head2 following_nodes
559
560 my $collection = $dom->following_nodes;
561
8563f527 562Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 563this node as L<DOM::Tiny> objects.
564
565 # "C"
566 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
567
568=head2 matches
569
570 my $bool = $dom->matches('div ~ p');
571
9a5f1e3f 572Check if this element matches the CSS selector. All selectors listed in
573L</"SELECTORS"> are supported.
d6512b50 574
575 # True
576 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
577 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
578
579 # False
580 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
581 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
582
583=head2 namespace
584
585 my $namespace = $dom->namespace;
586
587Find this element's namespace or return C<undef> if none could be found.
588
589 # Find namespace for an element with namespace prefix
590 my $namespace = $dom->at('svg > svg\:circle')->namespace;
591
592 # Find namespace for an element that may or may not have a namespace prefix
593 my $namespace = $dom->at('svg > circle')->namespace;
594
d6512b50 595=head2 next
596
597 my $sibling = $dom->next;
598
599Return L<DOM::Tiny> object for next sibling element or C<undef> if there are no
600more siblings.
601
602 # "<h2>123</h2>"
603 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
604
605=head2 next_node
606
607 my $sibling = $dom->next_node;
608
609Return L<DOM::Tiny> object for next sibling node or C<undef> if there are no
610more siblings.
611
612 # "456"
613 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
614 ->at('b')->next_node->next_node;
615
616 # " Test "
617 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
618 ->at('b')->next_node->content;
619
620=head2 parent
621
622 my $parent = $dom->parent;
623
624Return L<DOM::Tiny> object for parent of this node or C<undef> if this node has
625no parent.
626
627=head2 parse
628
629 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
630
9a5f1e3f 631Parse HTML/XML fragment.
d6512b50 632
633 # Parse XML
634 my $dom = DOM::Tiny->new->xml(1)->parse($xml);
635
636=head2 preceding
637
638 my $collection = $dom->preceding;
639 my $collection = $dom->preceding('div ~ p');
640
641Find all sibling elements before this node matching the CSS selector and return
8563f527 642a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 643objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 644
645 # List tags of sibling elements before this node
646 say $dom->preceding->map('tag')->join("\n");
647
648=head2 preceding_nodes
649
650 my $collection = $dom->preceding_nodes;
651
8563f527 652Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
653before this node as L<DOM::Tiny> objects.
d6512b50 654
655 # "A"
656 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
657
658=head2 prepend
659
660 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
661
662Prepend HTML/XML fragment to this node.
663
664 # "<div><h1>Test</h1><h2>123</h2></div>"
665 $dom->parse('<div><h2>123</h2></div>')
666 ->at('h2')->prepend('<h1>Test</h1>')->root;
667
668 # "<p>Test 123</p>"
669 $dom->parse('<p>123</p>')
670 ->at('p')->child_nodes->first->prepend('Test ')->root;
671
672=head2 prepend_content
673
674 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
675
676Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
677node's content.
678
679 # "<div><h2>Test123</h2></div>"
680 $dom->parse('<div><h2>123</h2></div>')
681 ->at('h2')->prepend_content('Test')->root;
682
683 # "<!-- Test 123 --><br>"
684 $dom->parse('<!-- 123 --><br>')
685 ->child_nodes->first->prepend_content(' Test')->root;
686
687 # "<p><i>123</i>Test</p>"
688 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
689
690=head2 previous
691
692 my $sibling = $dom->previous;
693
694Return L<DOM::Tiny> object for previous sibling element or C<undef> if there
695are no more siblings.
696
697 # "<h1>Test</h1>"
698 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
699
700=head2 previous_node
701
702 my $sibling = $dom->previous_node;
703
704Return L<DOM::Tiny> object for previous sibling node or C<undef> if there are
705no more siblings.
706
707 # "123"
708 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
709 ->at('b')->previous_node->previous_node;
710
711 # " Test "
712 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
713 ->at('b')->previous_node->content;
714
715=head2 remove
716
717 my $parent = $dom->remove;
718
719Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
720
721 # "<div></div>"
722 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
723
724 # "<p><b>456</b></p>"
725 $dom->parse('<p>123<b>456</b></p>')
726 ->at('p')->child_nodes->first->remove->root;
727
728=head2 replace
729
730 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
731
732Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
733nodes) or L</"parent">.
734
735 # "<div><h2>123</h2></div>"
736 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
737
738 # "<p><b>123</b></p>"
739 $dom->parse('<p>Test</p>')
740 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
741
742=head2 root
743
744 my $root = $dom->root;
745
746Return L<DOM::Tiny> object for C<root> node.
747
748=head2 strip
749
750 my $parent = $dom->strip;
751
752Remove this element while preserving its content and return L</"parent">.
753
754 # "<div>Test</div>"
755 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
756
757=head2 tag
758
759 my $tag = $dom->tag;
760 $dom = $dom->tag('div');
761
762This element's tag name.
763
764 # List tag names of child elements
765 say $dom->children->map('tag')->join("\n");
766
767=head2 tap
768
769 $dom = $dom->tap(sub {...});
770
e99ef07d 771Equivalent to L<Mojo::Base/"tap">.
d6512b50 772
773=head2 text
774
775 my $trimmed = $dom->text;
776 my $untrimmed = $dom->text(0);
777
778Extract text content from this element only (not including child elements),
779smart whitespace trimming is enabled by default.
780
781 # "foo baz"
782 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
783
784 # "foo\nbaz\n"
785 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
786
787=head2 to_string
788
789 my $str = $dom->to_string;
790
791Render this node and its content to HTML/XML.
792
793 # "<b>Test</b>"
794 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
795
796=head2 tree
797
798 my $tree = $dom->tree;
799 $dom = $dom->tree(['root']);
800
801Document Object Model. Note that this structure should only be used very
802carefully since it is very dynamic.
803
804=head2 type
805
806 my $type = $dom->type;
807
808This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
809C<root>, C<tag> or C<text>.
810
811 # "cdata"
812 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
813
814 # "comment"
815 $dom->parse('<!-- Test -->')->child_nodes->first->type;
816
817 # "doctype"
818 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
819
820 # "pi"
821 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
822
823 # "raw"
824 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
825
826 # "root"
827 $dom->parse('<p>Test</p>')->type;
828
829 # "tag"
830 $dom->parse('<p>Test</p>')->at('p')->type;
831
832 # "text"
833 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
834
835=head2 val
836
837 my $value = $dom->val;
838
839Extract value from form element (such as C<button>, C<input>, C<option>,
840C<select> and C<textarea>) or return C<undef> if this element has no value. In
841the case of C<select> with C<multiple> attribute, find C<option> elements with
842C<selected> attribute and return an array reference with all values or C<undef>
843if none could be found.
844
845 # "a"
846 $dom->parse('<input name="test" value="a">')->at('input')->val;
847
848 # "b"
849 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
850
851 # "c"
852 $dom->parse('<option value="c">Test</option>')->at('option')->val;
853
854 # "d"
855 $dom->parse('<select><option selected>d</option></select>')
856 ->at('select')->val;
857
858 # "e"
859 $dom->parse('<select multiple><option selected>e</option></select>')
860 ->at('select')->val->[0];
861
862=head2 wrap
863
864 $dom = $dom->wrap('<div></div>');
865
866Wrap HTML/XML fragment around this node, placing it as the last child of the
867first innermost element.
868
869 # "<p>123<b>Test</b></p>"
870 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
871
872 # "<div><p><b>Test</b></p>123</div>"
873 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
874
875 # "<p><b>Test</b></p><p>123</p>"
876 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
877
878 # "<p><b>Test</b></p>"
879 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
880
881=head2 wrap_content
882
883 $dom = $dom->wrap_content('<div></div>');
884
885Wrap HTML/XML fragment around this node's content, placing it as the last
886children of the first innermost element.
887
888 # "<p><b>123Test</b></p>"
889 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
890
891 # "<p><b>Test</b></p><p>123</p>"
892 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
893
894=head2 xml
895
896 my $bool = $dom->xml;
897 $dom = $dom->xml($bool);
898
899Disable HTML semantics in parser and activate case-sensitivity, defaults to
900auto detection based on processing instructions.
901
78ba4051 902=head1 COLLECTION METHODS
903
9a5f1e3f 904Some L<DOM::Tiny> methods return an array-based collection object based on
905L<Mojo::Collection>, which can either be accessed directly as an array
906reference, or with the following methods.
78ba4051 907
908 # Chain methods
909 $collection->map(sub { ucfirst })->shuffle->each(sub {
910 my ($word, $num) = @_;
911 say "$num: $word";
912 });
913
914 # Access array directly to manipulate collection
915 $collection->[23] += 100;
916 say for @$collection;
917
918=head2 compact
919
920 my $new = $collection->compact;
921
922Create a new collection with all elements that are defined and not an empty
923string.
924
925 # $collection contains (0, 1, undef, 2, '', 3)
926 $collection->compact->join(', '); # "0, 1, 2, 3"
927
928=head2 each
929
930 my @elements = $collection->each;
931 $collection = $collection->each(sub {...});
932
933Evaluate callback for each element in collection or return all elements as a
934list if none has been provided. The element will be the first argument passed
935to the callback and is also available as C<$_>.
936
937 # Make a numbered list
938 $collection->each(sub {
939 my ($e, $num) = @_;
940 say "$num: $e";
941 });
942
943=head2 first
944
945 my $first = $collection->first;
946 my $first = $collection->first(qr/foo/);
947 my $first = $collection->first(sub {...});
948 my $first = $collection->first($method);
949 my $first = $collection->first($method, @args);
950
951Evaluate regular expression/callback for, or call method on, each element in
952collection and return the first one that matched the regular expression, or for
953which the callback/method returned true. The element will be the first argument
954passed to the callback and is also available as C<$_>.
955
956 # Longer version
957 my $first = $collection->first(sub { $_->$method(@args) });
958
c7bc4d31 959 # Find first value that contains the word "tiny"
960 my $interesting = $collection->first(qr/tiny/i);
78ba4051 961
962 # Find first value that is greater than 5
963 my $greater = $collection->first(sub { $_ > 5 });
964
965=head2 flatten
966
967 my $new = $collection->flatten;
968
969Flatten nested collections/arrays recursively and create a new collection with
970all elements.
971
972 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
973 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
974
975=head2 grep
976
977 my $new = $collection->grep(qr/foo/);
978 my $new = $collection->grep(sub {...});
979 my $new = $collection->grep($method);
980 my $new = $collection->grep($method, @args);
981
982Evaluate regular expression/callback for, or call method on, each element in
983collection and create a new collection with all elements that matched the
984regular expression, or for which the callback/method returned true. The element
985will be the first argument passed to the callback and is also available as
986C<$_>.
987
988 # Longer version
989 my $new = $collection->grep(sub { $_->$method(@args) });
990
c7bc4d31 991 # Find all values that contain the word "tiny"
992 my $interesting = $collection->grep(qr/tiny/i);
78ba4051 993
994 # Find all values that are greater than 5
995 my $greater = $collection->grep(sub { $_ > 5 });
996
997=head2 join
998
999 my $stream = $collection->join;
1000 my $stream = $collection->join("\n");
1001
1002Turn collection into string.
1003
1004 # Join all values with commas
1005 $collection->join(', ');
1006
1007=head2 last
1008
1009 my $last = $collection->last;
1010
1011Return the last element in collection.
1012
1013=head2 map
1014
1015 my $new = $collection->map(sub {...});
1016 my $new = $collection->map($method);
1017 my $new = $collection->map($method, @args);
1018
1019Evaluate callback for, or call method on, each element in collection and create
1020a new collection from the results. The element will be the first argument
1021passed to the callback and is also available as C<$_>.
1022
1023 # Longer version
1024 my $new = $collection->map(sub { $_->$method(@args) });
1025
c7bc4d31 1026 # Append the word "tiny" to all values
1027 my $domified = $collection->map(sub { $_ . 'tiny' });
78ba4051 1028
1029=head2 reduce
1030
1031 my $result = $collection->reduce(sub {...});
1032 my $result = $collection->reduce(sub {...}, $initial);
1033
1034Reduce elements in collection with callback, the first element will be used as
1035initial value if none has been provided.
1036
1037 # Calculate the sum of all values
1038 my $sum = $collection->reduce(sub { $a + $b });
1039
1040 # Count how often each value occurs in collection
1041 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
1042
1043=head2 reverse
1044
1045 my $new = $collection->reverse;
1046
1047Create a new collection with all elements in reverse order.
1048
1049=head2 slice
1050
1051 my $new = $collection->slice(4 .. 7);
1052
1053Create a new collection with all selected elements.
1054
1055 # $collection contains ('A', 'B', 'C', 'D', 'E')
1056 $collection->slice(1, 2, 4)->join(' '); # "B C E"
1057
1058=head2 shuffle
1059
1060 my $new = $collection->shuffle;
1061
1062Create a new collection with all elements in random order.
1063
1064=head2 size
1065
1066 my $size = $collection->size;
1067
1068Number of elements in collection.
1069
1070=head2 sort
1071
1072 my $new = $collection->sort;
1073 my $new = $collection->sort(sub {...});
1074
1075Sort elements based on return value of callback and create a new collection
1076from the results.
1077
1078 # Sort values case-insensitive
1079 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
1080
1081=head2 tap
1082
1083 $collection = $collection->tap(sub {...});
1084
1085Equivalent to L<Mojo::Base/"tap">.
1086
1087=head2 to_array
1088
1089 my $array = $collection->to_array;
1090
1091Turn collection into array reference.
1092
1093=head2 uniq
1094
1095 my $new = $collection->uniq;
1096 my $new = $collection->uniq(sub {...});
1097 my $new = $collection->uniq($method);
1098 my $new = $collection->uniq($method, @args);
1099
1100Create a new collection without duplicate elements, using the string
1101representation of either the elements or the return value of the
1102callback/method.
1103
1104 # Longer version
1105 my $new = $collection->uniq(sub { $_->$method(@args) });
1106
1107 # $collection contains ('foo', 'bar', 'bar', 'baz')
1108 $collection->uniq->join(' '); # "foo bar baz"
1109
1110 # $collection contains ([1, 2], [2, 1], [3, 2])
1111 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
1112
d6512b50 1113=head1 BUGS
1114
1115Report any issues on the public bugtracker.
1116
1117=head1 AUTHOR
1118
1119Dan Book <dbook@cpan.org>
1120
2d9f5165 1121=head1 CONTRIBUTORS
1122
1123=over
1124
1125=item Matt S Trout (mst)
1126
1127=back
1128
d6512b50 1129=head1 COPYRIGHT AND LICENSE
1130
1131This software is Copyright (c) 2015 by Dan Book.
1132
1133This is free software, licensed under:
1134
1135 The Artistic License 2.0 (GPL Compatible)
1136
1137=head1 SEE ALSO
1138
31877452 1139L<Mojo::DOM>, L<HTML::TreeBuilder>, L<XML::LibXML>, L<XML::Twig>, L<XML::Smart>
d6512b50 1140
9a5f1e3f 1141=for Pod::Coverage TO_JSON
1142
d6512b50 1143=cut