make DOM::Tiny::HTML and DOM::Tiny::CSS private also
[catagits/DOM-Tiny.git] / README.pod
CommitLineData
d6512b50 1=pod
2
3=encoding utf8
4
5=head1 NAME
6
7DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
8
9=head1 SYNOPSIS
10
11 use DOM::Tiny;
12
13 # Parse
14 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
15
16 # Find
17 say $dom->at('#b')->text;
18 say $dom->find('p')->map('text')->join("\n");
19 say $dom->find('[id]')->map(attr => 'id')->join("\n");
20
21 # Iterate
22 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
23
24 # Loop
25 for my $e ($dom->find('p[id]')->each) {
26 say $e->{id}, ':', $e->text;
27 }
28
29 # Modify
30 $dom->find('div p')->last->append('<p id="c">456</p>');
31 $dom->find(':not(p)')->map('strip');
32
33 # Render
34 say "$dom";
35
36=head1 DESCRIPTION
37
9a5f1e3f 38L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser based
39on L<Mojo::DOM>. It supports the L<HTML Living Standard|https://html.spec.whatwg.org/>
40and L<Extensible Markup Language (XML) 1.0|http://www.w3.org/TR/xml/>, and
41matching based on L<CSS3 selectors|http://www.w3.org/TR/selectors/>. It will
42even try to interpret broken HTML and XML, so you should not use it for
5a70ee9d 43validation.
d6512b50 44
45=head1 NODES AND ELEMENTS
46
47When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
48
49 <!DOCTYPE html>
50 <html>
51 <head><title>Hello</title></head>
52 <body>World!</body>
53 </html>
54
55There are currently eight different kinds of nodes, C<cdata>, C<comment>,
56C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
57the type C<tag>.
58
59 root
60 |- doctype (html)
61 +- tag (html)
62 |- tag (head)
63 | +- tag (title)
64 | +- raw (Hello)
65 +- tag (body)
66 +- text (World!)
67
68While all node types are represented as L<DOM::Tiny> objects, some methods like
69L</"attr"> and L</"namespace"> only apply to elements.
70
71=head1 CASE-SENSITIVITY
72
73L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
74names are lowercased and selectors need to be lowercase as well.
75
76 # HTML semantics
77 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
78 say $dom->at('p[id]')->text;
79
80If XML processing instructions are found, the parser will automatically switch
81into XML mode and everything becomes case-sensitive.
82
83 # XML semantics
84 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
85 say $dom->at('P[ID]')->text;
86
87XML detection can also be disabled with the L</"xml"> method.
88
89 # Force XML semantics
90 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
91 say $dom->at('P[ID]')->text;
92
93 # Force HTML semantics
94 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
95 say $dom->at('p[id]')->text;
96
9a5f1e3f 97=head1 OPERATORS
98
99L<DOM::Tiny> overloads the following operators.
100
101=head2 array
102
103 my @nodes = @$dom;
104
105Alias for L</"child_nodes">.
106
107 # "<!-- Test -->"
108 $dom->parse('<!-- Test --><b>123</b>')->[0];
109
110=head2 bool
111
112 my $bool = !!$dom;
113
114Always true.
115
116=head2 hash
117
118 my %attrs = %$dom;
119
120Alias for L</"attr">.
121
122 # "test"
123 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
124
125=head2 stringify
126
127 my $str = "$dom";
128
129Alias for L</"to_string">.
130
d6512b50 131=head1 METHODS
132
133L<DOM::Tiny> implements the following methods.
134
3793c28f 135=head2 new
136
137 my $dom = DOM::Tiny->new;
138 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
139
140Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
141fragment if necessary.
142
d6512b50 143=head2 all_text
144
145 my $trimmed = $dom->all_text;
146 my $untrimmed = $dom->all_text(0);
147
148Extract text content from all descendant nodes of this element, smart
149whitespace trimming is enabled by default.
150
151 # "foo bar baz"
152 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
153
154 # "foo\nbarbaz\n"
155 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
156
157=head2 ancestors
158
159 my $collection = $dom->ancestors;
160 my $collection = $dom->ancestors('div ~ p');
161
162Find all ancestor elements of this node matching the CSS selector and return a
8563f527 163L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 164objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 165
166 # List tag names of ancestor elements
167 say $dom->ancestors->map('tag')->join("\n");
168
169=head2 append
170
171 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
172
173Append HTML/XML fragment to this node.
174
175 # "<div><h1>Test</h1><h2>123</h2></div>"
176 $dom->parse('<div><h1>Test</h1></div>')
177 ->at('h1')->append('<h2>123</h2>')->root;
178
179 # "<p>Test 123</p>"
180 $dom->parse('<p>Test</p>')->at('p')
181 ->child_nodes->first->append(' 123')->root;
182
183=head2 append_content
184
185 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
186
187Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
188node's content.
189
190 # "<div><h1>Test123</h1></div>"
191 $dom->parse('<div><h1>Test</h1></div>')
192 ->at('h1')->append_content('123')->root;
193
194 # "<!-- Test 123 --><br>"
195 $dom->parse('<!-- Test --><br>')
196 ->child_nodes->first->append_content('123 ')->root;
197
198 # "<p>Test<i>123</i></p>"
199 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
200
201=head2 at
202
203 my $result = $dom->at('div ~ p');
204
205Find first descendant element of this element matching the CSS selector and
206return it as a L<DOM::Tiny> object or return C<undef> if none could be found.
9a5f1e3f 207All selectors listed in L</"SELECTORS"> are supported.
d6512b50 208
209 # Find first element with "svg" namespace definition
210 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
211
212=head2 attr
213
214 my $hash = $dom->attr;
215 my $foo = $dom->attr('foo');
216 $dom = $dom->attr({foo => 'bar'});
217 $dom = $dom->attr(foo => 'bar');
218
219This element's attributes.
220
221 # Remove an attribute
222 delete $dom->attr->{id};
223
224 # Attribute without value
225 $dom->attr(selected => undef);
226
227 # List id attributes
228 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
229
230=head2 child_nodes
231
232 my $collection = $dom->child_nodes;
233
8563f527 234Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 235element as L<DOM::Tiny> objects.
236
237 # "<p><b>123</b></p>"
238 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
239
240 # "<!DOCTYPE html>"
241 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
242
243 # " Test "
244 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
245
246=head2 children
247
248 my $collection = $dom->children;
249 my $collection = $dom->children('div ~ p');
250
251Find all child elements of this element matching the CSS selector and return a
8563f527 252L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 253objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 254
255 # Show tag name of random child element
256 say $dom->children->shuffle->first->tag;
257
258=head2 content
259
260 my $str = $dom->content;
261 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
262
263Return this node's content or replace it with HTML/XML fragment (for C<root>
264and C<tag> nodes) or raw content.
265
266 # "<b>Test</b>"
267 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
268
269 # "<div><h1>123</h1></div>"
270 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
271
272 # "<p><i>123</i></p>"
273 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
274
275 # "<div><h1></h1></div>"
276 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
277
278 # " Test "
279 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
280
281 # "<div><!-- 123 -->456</div>"
282 $dom->parse('<div><!-- Test -->456</div>')
283 ->at('div')->child_nodes->first->content(' 123 ')->root;
284
285=head2 descendant_nodes
286
287 my $collection = $dom->descendant_nodes;
288
8563f527 289Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 290this element as L<DOM::Tiny> objects.
291
292 # "<p><b>123</b></p>"
293 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
294 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
295 ->map('remove')->first;
296
297 # "<p><b>test</b>test</p>"
298 $dom->parse('<p><b>123</b>456</p>')
299 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
300 ->map(content => 'test')->first->root;
301
302=head2 find
303
304 my $collection = $dom->find('div ~ p');
305
306Find all descendant elements of this element matching the CSS selector and
8563f527 307return a L<collection|/"COLLECTION METHODS"> containing these elements as
9a5f1e3f 308L<DOM::Tiny> objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 309
310 # Find a specific element and extract information
311 my $id = $dom->find('div')->[23]{id};
312
313 # Extract information from multiple elements
314 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
315
316 # Count all the different tags
317 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
318
319 # Find elements with a class that contains dots
320 my @divs = $dom->find('div.foo\.bar')->each;
321
322=head2 following
323
324 my $collection = $dom->following;
325 my $collection = $dom->following('div ~ p');
326
327Find all sibling elements after this node matching the CSS selector and return
8563f527 328a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 329objects. All selectors listen in L</"SELECTORS"> are supported.
d6512b50 330
331 # List tags of sibling elements after this node
332 say $dom->following->map('tag')->join("\n");
333
334=head2 following_nodes
335
336 my $collection = $dom->following_nodes;
337
8563f527 338Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 339this node as L<DOM::Tiny> objects.
340
341 # "C"
342 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
343
344=head2 matches
345
346 my $bool = $dom->matches('div ~ p');
347
9a5f1e3f 348Check if this element matches the CSS selector. All selectors listed in
349L</"SELECTORS"> are supported.
d6512b50 350
351 # True
352 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
353 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
354
355 # False
356 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
357 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
358
359=head2 namespace
360
361 my $namespace = $dom->namespace;
362
363Find this element's namespace or return C<undef> if none could be found.
364
365 # Find namespace for an element with namespace prefix
366 my $namespace = $dom->at('svg > svg\:circle')->namespace;
367
368 # Find namespace for an element that may or may not have a namespace prefix
369 my $namespace = $dom->at('svg > circle')->namespace;
370
d6512b50 371=head2 next
372
373 my $sibling = $dom->next;
374
375Return L<DOM::Tiny> object for next sibling element or C<undef> if there are no
376more siblings.
377
378 # "<h2>123</h2>"
379 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
380
381=head2 next_node
382
383 my $sibling = $dom->next_node;
384
385Return L<DOM::Tiny> object for next sibling node or C<undef> if there are no
386more siblings.
387
388 # "456"
389 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
390 ->at('b')->next_node->next_node;
391
392 # " Test "
393 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
394 ->at('b')->next_node->content;
395
396=head2 parent
397
398 my $parent = $dom->parent;
399
400Return L<DOM::Tiny> object for parent of this node or C<undef> if this node has
401no parent.
402
403=head2 parse
404
405 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
406
9a5f1e3f 407Parse HTML/XML fragment.
d6512b50 408
409 # Parse XML
410 my $dom = DOM::Tiny->new->xml(1)->parse($xml);
411
412=head2 preceding
413
414 my $collection = $dom->preceding;
415 my $collection = $dom->preceding('div ~ p');
416
417Find all sibling elements before this node matching the CSS selector and return
8563f527 418a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
9a5f1e3f 419objects. All selectors listed in L</"SELECTORS"> are supported.
d6512b50 420
421 # List tags of sibling elements before this node
422 say $dom->preceding->map('tag')->join("\n");
423
424=head2 preceding_nodes
425
426 my $collection = $dom->preceding_nodes;
427
8563f527 428Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
429before this node as L<DOM::Tiny> objects.
d6512b50 430
431 # "A"
432 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
433
434=head2 prepend
435
436 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
437
438Prepend HTML/XML fragment to this node.
439
440 # "<div><h1>Test</h1><h2>123</h2></div>"
441 $dom->parse('<div><h2>123</h2></div>')
442 ->at('h2')->prepend('<h1>Test</h1>')->root;
443
444 # "<p>Test 123</p>"
445 $dom->parse('<p>123</p>')
446 ->at('p')->child_nodes->first->prepend('Test ')->root;
447
448=head2 prepend_content
449
450 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
451
452Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
453node's content.
454
455 # "<div><h2>Test123</h2></div>"
456 $dom->parse('<div><h2>123</h2></div>')
457 ->at('h2')->prepend_content('Test')->root;
458
459 # "<!-- Test 123 --><br>"
460 $dom->parse('<!-- 123 --><br>')
461 ->child_nodes->first->prepend_content(' Test')->root;
462
463 # "<p><i>123</i>Test</p>"
464 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
465
466=head2 previous
467
468 my $sibling = $dom->previous;
469
470Return L<DOM::Tiny> object for previous sibling element or C<undef> if there
471are no more siblings.
472
473 # "<h1>Test</h1>"
474 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
475
476=head2 previous_node
477
478 my $sibling = $dom->previous_node;
479
480Return L<DOM::Tiny> object for previous sibling node or C<undef> if there are
481no more siblings.
482
483 # "123"
484 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
485 ->at('b')->previous_node->previous_node;
486
487 # " Test "
488 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
489 ->at('b')->previous_node->content;
490
491=head2 remove
492
493 my $parent = $dom->remove;
494
495Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
496
497 # "<div></div>"
498 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
499
500 # "<p><b>456</b></p>"
501 $dom->parse('<p>123<b>456</b></p>')
502 ->at('p')->child_nodes->first->remove->root;
503
504=head2 replace
505
506 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
507
508Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
509nodes) or L</"parent">.
510
511 # "<div><h2>123</h2></div>"
512 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
513
514 # "<p><b>123</b></p>"
515 $dom->parse('<p>Test</p>')
516 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
517
518=head2 root
519
520 my $root = $dom->root;
521
522Return L<DOM::Tiny> object for C<root> node.
523
524=head2 strip
525
526 my $parent = $dom->strip;
527
528Remove this element while preserving its content and return L</"parent">.
529
530 # "<div>Test</div>"
531 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
532
533=head2 tag
534
535 my $tag = $dom->tag;
536 $dom = $dom->tag('div');
537
538This element's tag name.
539
540 # List tag names of child elements
541 say $dom->children->map('tag')->join("\n");
542
543=head2 tap
544
545 $dom = $dom->tap(sub {...});
546
e99ef07d 547Equivalent to L<Mojo::Base/"tap">.
d6512b50 548
549=head2 text
550
551 my $trimmed = $dom->text;
552 my $untrimmed = $dom->text(0);
553
554Extract text content from this element only (not including child elements),
555smart whitespace trimming is enabled by default.
556
557 # "foo baz"
558 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
559
560 # "foo\nbaz\n"
561 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
562
563=head2 to_string
564
565 my $str = $dom->to_string;
566
567Render this node and its content to HTML/XML.
568
569 # "<b>Test</b>"
570 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
571
572=head2 tree
573
574 my $tree = $dom->tree;
575 $dom = $dom->tree(['root']);
576
577Document Object Model. Note that this structure should only be used very
578carefully since it is very dynamic.
579
580=head2 type
581
582 my $type = $dom->type;
583
584This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
585C<root>, C<tag> or C<text>.
586
587 # "cdata"
588 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
589
590 # "comment"
591 $dom->parse('<!-- Test -->')->child_nodes->first->type;
592
593 # "doctype"
594 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
595
596 # "pi"
597 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
598
599 # "raw"
600 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
601
602 # "root"
603 $dom->parse('<p>Test</p>')->type;
604
605 # "tag"
606 $dom->parse('<p>Test</p>')->at('p')->type;
607
608 # "text"
609 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
610
611=head2 val
612
613 my $value = $dom->val;
614
615Extract value from form element (such as C<button>, C<input>, C<option>,
616C<select> and C<textarea>) or return C<undef> if this element has no value. In
617the case of C<select> with C<multiple> attribute, find C<option> elements with
618C<selected> attribute and return an array reference with all values or C<undef>
619if none could be found.
620
621 # "a"
622 $dom->parse('<input name="test" value="a">')->at('input')->val;
623
624 # "b"
625 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
626
627 # "c"
628 $dom->parse('<option value="c">Test</option>')->at('option')->val;
629
630 # "d"
631 $dom->parse('<select><option selected>d</option></select>')
632 ->at('select')->val;
633
634 # "e"
635 $dom->parse('<select multiple><option selected>e</option></select>')
636 ->at('select')->val->[0];
637
638=head2 wrap
639
640 $dom = $dom->wrap('<div></div>');
641
642Wrap HTML/XML fragment around this node, placing it as the last child of the
643first innermost element.
644
645 # "<p>123<b>Test</b></p>"
646 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
647
648 # "<div><p><b>Test</b></p>123</div>"
649 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
650
651 # "<p><b>Test</b></p><p>123</p>"
652 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
653
654 # "<p><b>Test</b></p>"
655 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
656
657=head2 wrap_content
658
659 $dom = $dom->wrap_content('<div></div>');
660
661Wrap HTML/XML fragment around this node's content, placing it as the last
662children of the first innermost element.
663
664 # "<p><b>123Test</b></p>"
665 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
666
667 # "<p><b>Test</b></p><p>123</p>"
668 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
669
670=head2 xml
671
672 my $bool = $dom->xml;
673 $dom = $dom->xml($bool);
674
675Disable HTML semantics in parser and activate case-sensitivity, defaults to
676auto detection based on processing instructions.
677
78ba4051 678=head1 COLLECTION METHODS
679
9a5f1e3f 680Some L<DOM::Tiny> methods return an array-based collection object based on
681L<Mojo::Collection>, which can either be accessed directly as an array
682reference, or with the following methods.
78ba4051 683
684 # Chain methods
685 $collection->map(sub { ucfirst })->shuffle->each(sub {
686 my ($word, $num) = @_;
687 say "$num: $word";
688 });
689
690 # Access array directly to manipulate collection
691 $collection->[23] += 100;
692 say for @$collection;
693
694=head2 compact
695
696 my $new = $collection->compact;
697
698Create a new collection with all elements that are defined and not an empty
699string.
700
701 # $collection contains (0, 1, undef, 2, '', 3)
702 $collection->compact->join(', '); # "0, 1, 2, 3"
703
704=head2 each
705
706 my @elements = $collection->each;
707 $collection = $collection->each(sub {...});
708
709Evaluate callback for each element in collection or return all elements as a
710list if none has been provided. The element will be the first argument passed
711to the callback and is also available as C<$_>.
712
713 # Make a numbered list
714 $collection->each(sub {
715 my ($e, $num) = @_;
716 say "$num: $e";
717 });
718
719=head2 first
720
721 my $first = $collection->first;
722 my $first = $collection->first(qr/foo/);
723 my $first = $collection->first(sub {...});
724 my $first = $collection->first($method);
725 my $first = $collection->first($method, @args);
726
727Evaluate regular expression/callback for, or call method on, each element in
728collection and return the first one that matched the regular expression, or for
729which the callback/method returned true. The element will be the first argument
730passed to the callback and is also available as C<$_>.
731
732 # Longer version
733 my $first = $collection->first(sub { $_->$method(@args) });
734
735 # Find first value that contains the word "dom"
736 my $interesting = $collection->first(qr/dom/i);
737
738 # Find first value that is greater than 5
739 my $greater = $collection->first(sub { $_ > 5 });
740
741=head2 flatten
742
743 my $new = $collection->flatten;
744
745Flatten nested collections/arrays recursively and create a new collection with
746all elements.
747
748 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
749 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
750
751=head2 grep
752
753 my $new = $collection->grep(qr/foo/);
754 my $new = $collection->grep(sub {...});
755 my $new = $collection->grep($method);
756 my $new = $collection->grep($method, @args);
757
758Evaluate regular expression/callback for, or call method on, each element in
759collection and create a new collection with all elements that matched the
760regular expression, or for which the callback/method returned true. The element
761will be the first argument passed to the callback and is also available as
762C<$_>.
763
764 # Longer version
765 my $new = $collection->grep(sub { $_->$method(@args) });
766
767 # Find all values that contain the word "dom"
768 my $interesting = $collection->grep(qr/dom/i);
769
770 # Find all values that are greater than 5
771 my $greater = $collection->grep(sub { $_ > 5 });
772
773=head2 join
774
775 my $stream = $collection->join;
776 my $stream = $collection->join("\n");
777
778Turn collection into string.
779
780 # Join all values with commas
781 $collection->join(', ');
782
783=head2 last
784
785 my $last = $collection->last;
786
787Return the last element in collection.
788
789=head2 map
790
791 my $new = $collection->map(sub {...});
792 my $new = $collection->map($method);
793 my $new = $collection->map($method, @args);
794
795Evaluate callback for, or call method on, each element in collection and create
796a new collection from the results. The element will be the first argument
797passed to the callback and is also available as C<$_>.
798
799 # Longer version
800 my $new = $collection->map(sub { $_->$method(@args) });
801
802 # Append the word "dom" to all values
803 my $domified = $collection->map(sub { $_ . 'dom' });
804
805=head2 reduce
806
807 my $result = $collection->reduce(sub {...});
808 my $result = $collection->reduce(sub {...}, $initial);
809
810Reduce elements in collection with callback, the first element will be used as
811initial value if none has been provided.
812
813 # Calculate the sum of all values
814 my $sum = $collection->reduce(sub { $a + $b });
815
816 # Count how often each value occurs in collection
817 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
818
819=head2 reverse
820
821 my $new = $collection->reverse;
822
823Create a new collection with all elements in reverse order.
824
825=head2 slice
826
827 my $new = $collection->slice(4 .. 7);
828
829Create a new collection with all selected elements.
830
831 # $collection contains ('A', 'B', 'C', 'D', 'E')
832 $collection->slice(1, 2, 4)->join(' '); # "B C E"
833
834=head2 shuffle
835
836 my $new = $collection->shuffle;
837
838Create a new collection with all elements in random order.
839
840=head2 size
841
842 my $size = $collection->size;
843
844Number of elements in collection.
845
846=head2 sort
847
848 my $new = $collection->sort;
849 my $new = $collection->sort(sub {...});
850
851Sort elements based on return value of callback and create a new collection
852from the results.
853
854 # Sort values case-insensitive
855 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
856
857=head2 tap
858
859 $collection = $collection->tap(sub {...});
860
861Equivalent to L<Mojo::Base/"tap">.
862
863=head2 to_array
864
865 my $array = $collection->to_array;
866
867Turn collection into array reference.
868
869=head2 uniq
870
871 my $new = $collection->uniq;
872 my $new = $collection->uniq(sub {...});
873 my $new = $collection->uniq($method);
874 my $new = $collection->uniq($method, @args);
875
876Create a new collection without duplicate elements, using the string
877representation of either the elements or the return value of the
878callback/method.
879
880 # Longer version
881 my $new = $collection->uniq(sub { $_->$method(@args) });
882
883 # $collection contains ('foo', 'bar', 'bar', 'baz')
884 $collection->uniq->join(' '); # "foo bar baz"
885
886 # $collection contains ([1, 2], [2, 1], [3, 2])
887 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
888
9a5f1e3f 889=head1 SELECTORS
890
891L<DOM::Tiny> uses a CSS selector engine based on L<Mojo::DOM::CSS>. All CSS
892selectors that make sense for a standalone parser are supported.
893
894=head2 *
895
896Any element.
897
898 my $all = $dom->find('*');
899
900=head2 E
901
902An element of type C<E>.
903
904 my $title = $dom->at('title');
905
906=head2 E[foo]
907
908An C<E> element with a C<foo> attribute.
909
910 my $links = $dom->find('a[href]');
911
912=head2 E[foo="bar"]
913
914An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
915
916 my $case_sensitive = $dom->find('input[type="hidden"]');
917 my $case_sensitive = $dom->find('input[type=hidden]');
918
919=head2 E[foo="bar" i]
920
921An C<E> element whose C<foo> attribute value is exactly equal to any
922(ASCII-range) case-permutation of C<bar>. Note that this selector is
923EXPERIMENTAL and might change without warning!
924
925 my $case_insensitive = $dom->find('input[type="hidden" i]');
926 my $case_insensitive = $dom->find('input[type=hidden i]');
927 my $case_insensitive = $dom->find('input[class~="foo" i]');
928
929This selector is part of
930L<Selectors Level 4|http://dev.w3.org/csswg/selectors-4>, which is still a work
931in progress.
932
933=head2 E[foo~="bar"]
934
935An C<E> element whose C<foo> attribute value is a list of whitespace-separated
936values, one of which is exactly equal to C<bar>.
937
938 my $foo = $dom->find('input[class~="foo"]');
939 my $foo = $dom->find('input[class~=foo]');
940
941=head2 E[foo^="bar"]
942
943An C<E> element whose C<foo> attribute value begins exactly with the string
944C<bar>.
945
946 my $begins_with = $dom->find('input[name^="f"]');
947 my $begins_with = $dom->find('input[name^=f]');
948
949=head2 E[foo$="bar"]
950
951An C<E> element whose C<foo> attribute value ends exactly with the string
952C<bar>.
953
954 my $ends_with = $dom->find('input[name$="o"]');
955 my $ends_with = $dom->find('input[name$=o]');
956
957=head2 E[foo*="bar"]
958
959An C<E> element whose C<foo> attribute value contains the substring C<bar>.
960
961 my $contains = $dom->find('input[name*="fo"]');
962 my $contains = $dom->find('input[name*=fo]');
963
964=head2 E:root
965
966An C<E> element, root of the document.
967
968 my $root = $dom->at(':root');
969
970=head2 E:nth-child(n)
971
972An C<E> element, the C<n-th> child of its parent.
973
974 my $third = $dom->find('div:nth-child(3)');
975 my $odd = $dom->find('div:nth-child(odd)');
976 my $even = $dom->find('div:nth-child(even)');
977 my $top3 = $dom->find('div:nth-child(-n+3)');
978
979=head2 E:nth-last-child(n)
980
981An C<E> element, the C<n-th> child of its parent, counting from the last one.
982
983 my $third = $dom->find('div:nth-last-child(3)');
984 my $odd = $dom->find('div:nth-last-child(odd)');
985 my $even = $dom->find('div:nth-last-child(even)');
986 my $bottom3 = $dom->find('div:nth-last-child(-n+3)');
987
988=head2 E:nth-of-type(n)
989
990An C<E> element, the C<n-th> sibling of its type.
991
992 my $third = $dom->find('div:nth-of-type(3)');
993 my $odd = $dom->find('div:nth-of-type(odd)');
994 my $even = $dom->find('div:nth-of-type(even)');
995 my $top3 = $dom->find('div:nth-of-type(-n+3)');
996
997=head2 E:nth-last-of-type(n)
998
999An C<E> element, the C<n-th> sibling of its type, counting from the last one.
1000
1001 my $third = $dom->find('div:nth-last-of-type(3)');
1002 my $odd = $dom->find('div:nth-last-of-type(odd)');
1003 my $even = $dom->find('div:nth-last-of-type(even)');
1004 my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)');
1005
1006=head2 E:first-child
1007
1008An C<E> element, first child of its parent.
1009
1010 my $first = $dom->find('div p:first-child');
1011
1012=head2 E:last-child
1013
1014An C<E> element, last child of its parent.
1015
1016 my $last = $dom->find('div p:last-child');
1017
1018=head2 E:first-of-type
1019
1020An C<E> element, first sibling of its type.
1021
1022 my $first = $dom->find('div p:first-of-type');
1023
1024=head2 E:last-of-type
1025
1026An C<E> element, last sibling of its type.
1027
1028 my $last = $dom->find('div p:last-of-type');
1029
1030=head2 E:only-child
1031
1032An C<E> element, only child of its parent.
1033
1034 my $lonely = $dom->find('div p:only-child');
1035
1036=head2 E:only-of-type
1037
1038An C<E> element, only sibling of its type.
1039
1040 my $lonely = $dom->find('div p:only-of-type');
1041
1042=head2 E:empty
1043
1044An C<E> element that has no children (including text nodes).
1045
1046 my $empty = $dom->find(':empty');
1047
1048=head2 E:checked
1049
1050A user interface element C<E> which is checked (for instance a radio-button or
1051checkbox).
1052
1053 my $input = $dom->find(':checked');
1054
1055=head2 E.warning
1056
1057An C<E> element whose class is "warning".
1058
1059 my $warning = $dom->find('div.warning');
1060
1061=head2 E#myid
1062
1063An C<E> element with C<ID> equal to "myid".
1064
1065 my $foo = $dom->at('div#foo');
1066
1067=head2 E:not(s)
1068
1069An C<E> element that does not match simple selector C<s>.
1070
1071 my $others = $dom->find('div p:not(:first-child)');
1072
1073=head2 E F
1074
1075An C<F> element descendant of an C<E> element.
1076
1077 my $headlines = $dom->find('div h1');
1078
1079=head2 E E<gt> F
1080
1081An C<F> element child of an C<E> element.
1082
1083 my $headlines = $dom->find('html > body > div > h1');
1084
1085=head2 E + F
1086
1087An C<F> element immediately preceded by an C<E> element.
1088
1089 my $second = $dom->find('h1 + h2');
1090
1091=head2 E ~ F
1092
1093An C<F> element preceded by an C<E> element.
1094
1095 my $second = $dom->find('h1 ~ h2');
1096
1097=head2 E, F, G
1098
1099Elements of type C<E>, C<F> and C<G>.
1100
1101 my $headlines = $dom->find('h1, h2, h3');
1102
1103=head2 E[foo=bar][bar=baz]
1104
1105An C<E> element whose attributes match all following attribute selectors.
1106
1107 my $links = $dom->find('a[foo^=b][foo$=ar]');
1108
d6512b50 1109=head1 BUGS
1110
1111Report any issues on the public bugtracker.
1112
1113=head1 AUTHOR
1114
1115Dan Book <dbook@cpan.org>
1116
1117=head1 COPYRIGHT AND LICENSE
1118
1119This software is Copyright (c) 2015 by Dan Book.
1120
1121This is free software, licensed under:
1122
1123 The Artistic License 2.0 (GPL Compatible)
1124
1125=head1 SEE ALSO
1126
1127L<Mojo::DOM>, L<XML::LibXML>, L<XML::Twig>, L<HTML::TreeBuilder>, L<XML::Smart>
1128
9a5f1e3f 1129=for Pod::Coverage TO_JSON
1130
d6512b50 1131=cut