reference collection methods section
[catagits/DOM-Tiny.git] / lib / DOM / Tiny.pm
CommitLineData
a292be34 1package DOM::Tiny;
2
3use strict;
4use warnings;
5
d6512b50 6use overload
7 '@{}' => sub { shift->child_nodes },
8 '%{}' => sub { shift->attr },
9 bool => sub {1},
10 '""' => sub { shift->to_string },
11 fallback => 1;
12
13use Carp 'croak';
78ba4051 14use DOM::Tiny::_Collection;
d6512b50 15use DOM::Tiny::CSS;
16use DOM::Tiny::HTML;
17use Scalar::Util qw(blessed weaken);
18
a292be34 19our $VERSION = '0.001';
20
3793c28f 21sub new {
22 my $class = shift;
23 my $self = bless \DOM::Tiny::HTML->new, ref $class || $class;
24 return @_ ? $self->parse(@_) : $self;
25}
26
78ba4051 27sub TO_JSON { shift->_delegate('render') }
28
d6512b50 29sub all_text { shift->_all_text(1, @_) }
30
31sub ancestors { _select($_[0]->_collect($_[0]->_ancestors), $_[1]) }
32
33sub append { shift->_add(1, @_) }
34sub append_content { shift->_content(1, 0, @_) }
35
36sub at {
37 my $self = shift;
38 return undef unless my $result = $self->_css->select_one(@_);
39 return $self->_build($result, $self->xml);
40}
41
42sub attr {
43 my $self = shift;
44
45 # Hash
46 my $tree = $self->tree;
47 my $attrs = $tree->[0] ne 'tag' ? {} : $tree->[2];
48 return $attrs unless @_;
49
50 # Get
51 return $attrs->{$_[0]} unless @_ > 1 || ref $_[0];
52
53 # Set
54 my $values = ref $_[0] ? $_[0] : {@_};
55 @$attrs{keys %$values} = values %$values;
56
57 return $self;
58}
59
60sub child_nodes { $_[0]->_collect(_nodes($_[0]->tree)) }
61
62sub children { _select($_[0]->_collect(_nodes($_[0]->tree, 1)), $_[1]) }
63
64sub content {
65 my $self = shift;
66
67 my $type = $self->type;
68 if ($type eq 'root' || $type eq 'tag') {
69 return $self->_content(0, 1, @_) if @_;
70 my $html = DOM::Tiny::HTML->new(xml => $self->xml);
71 return join '', map { $html->tree($_)->render } _nodes($self->tree);
72 }
73
74 return $self->tree->[1] unless @_;
75 $self->tree->[1] = shift;
76 return $self;
77}
78
79sub descendant_nodes { $_[0]->_collect(_all(_nodes($_[0]->tree))) }
80
81sub find { $_[0]->_collect(@{$_[0]->_css->select($_[1])}) }
82
83sub following { _select($_[0]->_collect(@{$_[0]->_siblings(1)->[1]}), $_[1]) }
84sub following_nodes { $_[0]->_collect(@{$_[0]->_siblings->[1]}) }
85
86sub matches { shift->_css->matches(@_) }
87
88sub namespace {
89 my $self = shift;
90
91 return undef if (my $tree = $self->tree)->[0] ne 'tag';
92
93 # Extract namespace prefix and search parents
94 my $ns = $tree->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef;
95 for my $node ($tree, $self->_ancestors) {
96
97 # Namespace for prefix
98 my $attrs = $node->[2];
99 if ($ns) { $_ eq $ns and return $attrs->{$_} for keys %$attrs }
100
101 # Namespace attribute
102 elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} }
103 }
104
105 return undef;
106}
107
d6512b50 108sub next { $_[0]->_maybe($_[0]->_siblings(1, 0)->[1]) }
109sub next_node { $_[0]->_maybe($_[0]->_siblings(0, 0)->[1]) }
110
111sub parent {
112 my $self = shift;
113 return undef if $self->tree->[0] eq 'root';
114 return $self->_build($self->_parent, $self->xml);
115}
116
117sub parse { shift->_delegate(parse => @_) }
118
119sub preceding { _select($_[0]->_collect(@{$_[0]->_siblings(1)->[0]}), $_[1]) }
120sub preceding_nodes { $_[0]->_collect(@{$_[0]->_siblings->[0]}) }
121
122sub prepend { shift->_add(0, @_) }
123sub prepend_content { shift->_content(0, 0, @_) }
124
125sub previous { $_[0]->_maybe($_[0]->_siblings(1, -1)->[0]) }
126sub previous_node { $_[0]->_maybe($_[0]->_siblings(0, -1)->[0]) }
127
128sub remove { shift->replace('') }
129
130sub replace {
131 my ($self, $new) = @_;
132 return $self->parse($new) if (my $tree = $self->tree)->[0] eq 'root';
133 return $self->_replace($self->_parent, $tree, _nodes($self->_parse($new)));
134}
135
136sub root {
137 my $self = shift;
138 return $self unless my $tree = $self->_ancestors(1);
139 return $self->_build($tree, $self->xml);
140}
141
142sub strip {
143 my $self = shift;
144 return $self if (my $tree = $self->tree)->[0] ne 'tag';
145 return $self->_replace($tree->[3], $tree, _nodes($tree));
146}
147
148sub tag {
149 my ($self, $tag) = @_;
150 return undef if (my $tree = $self->tree)->[0] ne 'tag';
151 return $tree->[1] unless $tag;
152 $tree->[1] = $tag;
153 return $self;
154}
155
78ba4051 156sub tap { shift->DOM::Tiny::_Collection::tap(@_) }
d6512b50 157
158sub text { shift->_all_text(0, @_) }
159
160sub to_string { shift->_delegate('render') }
161
162sub tree { shift->_delegate(tree => @_) }
163
164sub type { shift->tree->[0] }
165
166sub val {
167 my $self = shift;
168
169 # "option"
91880340 170 return $self->{value} // $self->text if (my $tag = $self->tag) eq 'option';
d6512b50 171
172 # "textarea", "input" or "button"
173 return $tag eq 'textarea' ? $self->text : $self->{value} if $tag ne 'select';
174
175 # "select"
176 my $v = $self->find('option:checked')->map('val');
177 return exists $self->{multiple} ? $v->size ? $v->to_array : undef : $v->last;
178}
179
180sub wrap { shift->_wrap(0, @_) }
181sub wrap_content { shift->_wrap(1, @_) }
182
183sub xml { shift->_delegate(xml => @_) }
184
185sub _add {
186 my ($self, $offset, $new) = @_;
187
188 return $self if (my $tree = $self->tree)->[0] eq 'root';
189
190 my $parent = $self->_parent;
191 splice @$parent, _offset($parent, $tree) + $offset, 0,
192 _link($parent, _nodes($self->_parse($new)));
193
194 return $self;
195}
196
197sub _all {
198 map { $_->[0] eq 'tag' ? ($_, _all(_nodes($_))) : ($_) } @_;
199}
200
201sub _all_text {
202 my ($self, $recurse, $trim) = @_;
203
204 # Detect "pre" tag
205 my $tree = $self->tree;
206 $trim = 1 unless defined $trim;
207 map { $_->[1] eq 'pre' and $trim = 0 } $self->_ancestors, $tree
208 if $trim && $tree->[0] ne 'root';
209
210 return _text([_nodes($tree)], $recurse, $trim);
211}
212
213sub _ancestors {
214 my ($self, $root) = @_;
215
216 return unless my $tree = $self->_parent;
217 my @ancestors;
218 do { push @ancestors, $tree }
219 while ($tree->[0] eq 'tag') && ($tree = $tree->[3]);
220 return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1];
221}
222
223sub _build { shift->new->tree(shift)->xml(shift) }
224
225sub _collect {
226 my $self = shift;
227 my $xml = $self->xml;
78ba4051 228 return DOM::Tiny::_Collection->new(map { $self->_build($_, $xml) } @_);
d6512b50 229}
230
231sub _content {
232 my ($self, $start, $offset, $new) = @_;
233
234 my $tree = $self->tree;
235 unless ($tree->[0] eq 'root' || $tree->[0] eq 'tag') {
236 my $old = $self->content;
237 return $self->content($start ? "$old$new" : "$new$old");
238 }
239
240 $start = $start ? ($#$tree + 1) : _start($tree);
241 $offset = $offset ? $#$tree : 0;
242 splice @$tree, $start, $offset, _link($tree, _nodes($self->_parse($new)));
243
244 return $self;
245}
246
247sub _css { DOM::Tiny::CSS->new(tree => shift->tree) }
248
249sub _delegate {
250 my ($self, $method) = (shift, shift);
251 return $$self->$method unless @_;
252 $$self->$method(@_);
253 return $self;
254}
255
256sub _link {
257 my ($parent, @children) = @_;
258
259 # Link parent to children
260 for my $node (@children) {
261 my $offset = $node->[0] eq 'tag' ? 3 : 2;
262 $node->[$offset] = $parent;
263 weaken $node->[$offset];
264 }
265
266 return @children;
267}
268
269sub _maybe { $_[1] ? $_[0]->_build($_[1], $_[0]->xml) : undef }
270
271sub _nodes {
272 return unless my $tree = shift;
273 my @nodes = @$tree[_start($tree) .. $#$tree];
274 return shift() ? grep { $_->[0] eq 'tag' } @nodes : @nodes;
275}
276
277sub _offset {
278 my ($parent, $child) = @_;
279 my $i = _start($parent);
280 $_ eq $child ? last : $i++ for @$parent[$i .. $#$parent];
281 return $i;
282}
283
284sub _parent { $_[0]->tree->[$_[0]->type eq 'tag' ? 3 : 2] }
285
286sub _parse { DOM::Tiny::HTML->new(xml => shift->xml)->parse(shift)->tree }
287
288sub _replace {
927f1351 289 my ($self, $parent, $child, @nodes) = @_;
290 splice @$parent, _offset($parent, $child), 1, _link($parent, @nodes);
d6512b50 291 return $self->parent;
292}
293
294sub _select {
295 my ($collection, $selector) = @_;
296 return $collection unless $selector;
297 return $collection->new(grep { $_->matches($selector) } @$collection);
298}
299
300sub _siblings {
301 my ($self, $tags, $i) = @_;
302
303 return [] unless my $parent = $self->parent;
304
305 my $tree = $self->tree;
306 my (@before, @after, $match);
307 for my $node (_nodes($parent->tree)) {
308 ++$match and next if !$match && $node eq $tree;
309 next if $tags && $node->[0] ne 'tag';
310 $match ? push @after, $node : push @before, $node;
311 }
312
313 return defined $i ? [$before[$i], $after[$i]] : [\@before, \@after];
314}
315
316sub _squish {
317 my $str = shift;
318 $str =~ s/^\s+//;
319 $str =~ s/\s+$//;
320 $str =~ s/\s+/ /g;
321 return $str;
322}
323
324sub _start { $_[0][0] eq 'root' ? 1 : 4 }
325
326sub _text {
327 my ($nodes, $recurse, $trim) = @_;
328
329 # Merge successive text nodes
330 my $i = 0;
331 while (my $next = $nodes->[$i + 1]) {
332 ++$i and next unless $nodes->[$i][0] eq 'text' && $next->[0] eq 'text';
333 splice @$nodes, $i, 2, ['text', $nodes->[$i][1] . $next->[1]];
334 }
335
336 my $text = '';
337 for my $node (@$nodes) {
338 my $type = $node->[0];
339
340 # Text
341 my $chunk = '';
342 if ($type eq 'text') { $chunk = $trim ? _squish $node->[1] : $node->[1] }
343
344 # CDATA or raw text
345 elsif ($type eq 'cdata' || $type eq 'raw') { $chunk = $node->[1] }
346
347 # Nested tag
348 elsif ($type eq 'tag' && $recurse) {
349 no warnings 'recursion';
350 $chunk = _text([_nodes($node)], 1, $node->[1] eq 'pre' ? 0 : $trim);
351 }
352
353 # Add leading whitespace if punctuation allows it
354 $chunk = " $chunk" if $text =~ /\S\z/ && $chunk =~ /^[^.!?,;:\s]+/;
355
356 # Trim whitespace blocks
357 $text .= $chunk if $chunk =~ /\S+/ || !$trim;
358 }
359
360 return $text;
361}
362
363sub _wrap {
364 my ($self, $content, $new) = @_;
365
366 $content = 1 if (my $tree = $self->tree)->[0] eq 'root';
367 $content = 0 if $tree->[0] ne 'root' && $tree->[0] ne 'tag';
368
369 # Find innermost tag
370 my $current;
371 my $first = $new = $self->_parse($new);
372 $current = $first while $first = (_nodes($first, 1))[0];
373 return $self unless $current;
374
375 # Wrap content
376 if ($content) {
377 push @$current, _link($current, _nodes($tree));
378 splice @$tree, _start($tree), $#$tree, _link($tree, _nodes($new));
379 return $self;
380 }
381
382 # Wrap element
383 $self->_replace($self->_parent, $tree, _nodes($new));
384 push @$current, _link($current, $tree);
385 return $self;
386}
387
a292be34 3881;
389
d6512b50 390=encoding utf8
391
78ba4051 392=for Pod::Coverage TO_JSON
393
a292be34 394=head1 NAME
395
d6512b50 396DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors
a292be34 397
398=head1 SYNOPSIS
399
d6512b50 400 use DOM::Tiny;
401
402 # Parse
403 my $dom = DOM::Tiny->new('<div><p id="a">Test</p><p id="b">123</p></div>');
404
405 # Find
406 say $dom->at('#b')->text;
407 say $dom->find('p')->map('text')->join("\n");
408 say $dom->find('[id]')->map(attr => 'id')->join("\n");
409
410 # Iterate
411 $dom->find('p[id]')->reverse->each(sub { say $_->{id} });
412
413 # Loop
414 for my $e ($dom->find('p[id]')->each) {
415 say $e->{id}, ':', $e->text;
416 }
417
418 # Modify
419 $dom->find('div p')->last->append('<p id="c">456</p>');
420 $dom->find(':not(p)')->map('strip');
421
422 # Render
423 say "$dom";
424
a292be34 425=head1 DESCRIPTION
426
5a70ee9d 427L<DOM::Tiny> is a minimalistic and relaxed pure-perl HTML/XML DOM parser with
428support for the L<HTML Living Standard|https://html.spec.whatwg.org/> and
429L<CSS3 selectors|http://www.w3.org/TR/selectors/> based on L<Mojo::DOM>. It
430will even try to interpret broken HTML and XML, so you should not use it for
431validation.
d6512b50 432
433=head1 NODES AND ELEMENTS
434
435When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
436
437 <!DOCTYPE html>
438 <html>
439 <head><title>Hello</title></head>
440 <body>World!</body>
441 </html>
442
443There are currently eight different kinds of nodes, C<cdata>, C<comment>,
444C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
445the type C<tag>.
446
447 root
448 |- doctype (html)
449 +- tag (html)
450 |- tag (head)
451 | +- tag (title)
452 | +- raw (Hello)
453 +- tag (body)
454 +- text (World!)
455
456While all node types are represented as L<DOM::Tiny> objects, some methods like
457L</"attr"> and L</"namespace"> only apply to elements.
458
459=head1 CASE-SENSITIVITY
460
461L<DOM::Tiny> defaults to HTML semantics, that means all tags and attribute
462names are lowercased and selectors need to be lowercase as well.
463
464 # HTML semantics
465 my $dom = DOM::Tiny->new('<P ID="greeting">Hi!</P>');
466 say $dom->at('p[id]')->text;
467
468If XML processing instructions are found, the parser will automatically switch
469into XML mode and everything becomes case-sensitive.
470
471 # XML semantics
472 my $dom = DOM::Tiny->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
473 say $dom->at('P[ID]')->text;
474
475XML detection can also be disabled with the L</"xml"> method.
476
477 # Force XML semantics
478 my $dom = DOM::Tiny->new->xml(1)->parse('<P ID="greeting">Hi!</P>');
479 say $dom->at('P[ID]')->text;
480
481 # Force HTML semantics
482 my $dom = DOM::Tiny->new->xml(0)->parse('<P ID="greeting">Hi!</P>');
483 say $dom->at('p[id]')->text;
484
485=head1 METHODS
486
487L<DOM::Tiny> implements the following methods.
488
3793c28f 489=head2 new
490
491 my $dom = DOM::Tiny->new;
492 my $dom = DOM::Tiny->new('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
493
494Construct a new scalar-based L<DOM::Tiny> object and L</"parse"> HTML/XML
495fragment if necessary.
496
d6512b50 497=head2 all_text
498
499 my $trimmed = $dom->all_text;
500 my $untrimmed = $dom->all_text(0);
501
502Extract text content from all descendant nodes of this element, smart
503whitespace trimming is enabled by default.
504
505 # "foo bar baz"
506 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text;
507
508 # "foo\nbarbaz\n"
509 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
510
511=head2 ancestors
512
513 my $collection = $dom->ancestors;
514 my $collection = $dom->ancestors('div ~ p');
515
516Find all ancestor elements of this node matching the CSS selector and return a
8563f527 517L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
d6512b50 518objects. All selectors from L<DOM::Tiny::CSS/"SELECTORS"> are supported.
519
520 # List tag names of ancestor elements
521 say $dom->ancestors->map('tag')->join("\n");
522
523=head2 append
524
525 $dom = $dom->append('<p>I ♥ DOM::Tiny!</p>');
526
527Append HTML/XML fragment to this node.
528
529 # "<div><h1>Test</h1><h2>123</h2></div>"
530 $dom->parse('<div><h1>Test</h1></div>')
531 ->at('h1')->append('<h2>123</h2>')->root;
532
533 # "<p>Test 123</p>"
534 $dom->parse('<p>Test</p>')->at('p')
535 ->child_nodes->first->append(' 123')->root;
536
537=head2 append_content
538
539 $dom = $dom->append_content('<p>I ♥ DOM::Tiny!</p>');
540
541Append HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
542node's content.
543
544 # "<div><h1>Test123</h1></div>"
545 $dom->parse('<div><h1>Test</h1></div>')
546 ->at('h1')->append_content('123')->root;
547
548 # "<!-- Test 123 --><br>"
549 $dom->parse('<!-- Test --><br>')
550 ->child_nodes->first->append_content('123 ')->root;
551
552 # "<p>Test<i>123</i></p>"
553 $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
554
555=head2 at
556
557 my $result = $dom->at('div ~ p');
558
559Find first descendant element of this element matching the CSS selector and
560return it as a L<DOM::Tiny> object or return C<undef> if none could be found.
561All selectors from L<DOM::Tiny::CSS/"SELECTORS"> are supported.
562
563 # Find first element with "svg" namespace definition
564 my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
565
566=head2 attr
567
568 my $hash = $dom->attr;
569 my $foo = $dom->attr('foo');
570 $dom = $dom->attr({foo => 'bar'});
571 $dom = $dom->attr(foo => 'bar');
572
573This element's attributes.
574
575 # Remove an attribute
576 delete $dom->attr->{id};
577
578 # Attribute without value
579 $dom->attr(selected => undef);
580
581 # List id attributes
582 say $dom->find('*')->map(attr => 'id')->compact->join("\n");
583
584=head2 child_nodes
585
586 my $collection = $dom->child_nodes;
587
8563f527 588Return a L<collection|/"COLLECTION METHODS"> containing all child nodes of this
d6512b50 589element as L<DOM::Tiny> objects.
590
591 # "<p><b>123</b></p>"
592 $dom->parse('<p>Test<b>123</b></p>')->at('p')->child_nodes->first->remove;
593
594 # "<!DOCTYPE html>"
595 $dom->parse('<!DOCTYPE html><b>123</b>')->child_nodes->first;
596
597 # " Test "
598 $dom->parse('<b>123</b><!-- Test -->')->child_nodes->last->content;
599
600=head2 children
601
602 my $collection = $dom->children;
603 my $collection = $dom->children('div ~ p');
604
605Find all child elements of this element matching the CSS selector and return a
8563f527 606L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
d6512b50 607objects. All selectors from L<DOM::Tiny::CSS/"SELECTORS"> are supported.
608
609 # Show tag name of random child element
610 say $dom->children->shuffle->first->tag;
611
612=head2 content
613
614 my $str = $dom->content;
615 $dom = $dom->content('<p>I ♥ DOM::Tiny!</p>');
616
617Return this node's content or replace it with HTML/XML fragment (for C<root>
618and C<tag> nodes) or raw content.
619
620 # "<b>Test</b>"
621 $dom->parse('<div><b>Test</b></div>')->at('div')->content;
622
623 # "<div><h1>123</h1></div>"
624 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root;
625
626 # "<p><i>123</i></p>"
627 $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root;
628
629 # "<div><h1></h1></div>"
630 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root;
631
632 # " Test "
633 $dom->parse('<!-- Test --><br>')->child_nodes->first->content;
634
635 # "<div><!-- 123 -->456</div>"
636 $dom->parse('<div><!-- Test -->456</div>')
637 ->at('div')->child_nodes->first->content(' 123 ')->root;
638
639=head2 descendant_nodes
640
641 my $collection = $dom->descendant_nodes;
642
8563f527 643Return a L<collection|/"COLLECTION METHODS"> containing all descendant nodes of
d6512b50 644this element as L<DOM::Tiny> objects.
645
646 # "<p><b>123</b></p>"
647 $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')
648 ->descendant_nodes->grep(sub { $_->type eq 'comment' })
649 ->map('remove')->first;
650
651 # "<p><b>test</b>test</p>"
652 $dom->parse('<p><b>123</b>456</p>')
653 ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' })
654 ->map(content => 'test')->first->root;
655
656=head2 find
657
658 my $collection = $dom->find('div ~ p');
659
660Find all descendant elements of this element matching the CSS selector and
8563f527 661return a L<collection|/"COLLECTION METHODS"> containing these elements as
d6512b50 662L<DOM::Tiny> objects. All selectors from L<DOM::Tiny::CSS/"SELECTORS"> are
663supported.
664
665 # Find a specific element and extract information
666 my $id = $dom->find('div')->[23]{id};
667
668 # Extract information from multiple elements
669 my @headers = $dom->find('h1, h2, h3')->map('text')->each;
670
671 # Count all the different tags
672 my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {});
673
674 # Find elements with a class that contains dots
675 my @divs = $dom->find('div.foo\.bar')->each;
676
677=head2 following
678
679 my $collection = $dom->following;
680 my $collection = $dom->following('div ~ p');
681
682Find all sibling elements after this node matching the CSS selector and return
8563f527 683a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
d6512b50 684objects. All selectors from L<DOM::Tiny::CSS/"SELECTORS"> are supported.
685
686 # List tags of sibling elements after this node
687 say $dom->following->map('tag')->join("\n");
688
689=head2 following_nodes
690
691 my $collection = $dom->following_nodes;
692
8563f527 693Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes after
d6512b50 694this node as L<DOM::Tiny> objects.
695
696 # "C"
697 $dom->parse('<p>A</p><!-- B -->C')->at('p')->following_nodes->last->content;
698
699=head2 matches
700
701 my $bool = $dom->matches('div ~ p');
702
703Check if this element matches the CSS selector. All selectors from
704L<DOM::Tiny::CSS/"SELECTORS"> are supported.
705
706 # True
707 $dom->parse('<p class="a">A</p>')->at('p')->matches('.a');
708 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[class]');
709
710 # False
711 $dom->parse('<p class="a">A</p>')->at('p')->matches('.b');
712 $dom->parse('<p class="a">A</p>')->at('p')->matches('p[id]');
713
714=head2 namespace
715
716 my $namespace = $dom->namespace;
717
718Find this element's namespace or return C<undef> if none could be found.
719
720 # Find namespace for an element with namespace prefix
721 my $namespace = $dom->at('svg > svg\:circle')->namespace;
722
723 # Find namespace for an element that may or may not have a namespace prefix
724 my $namespace = $dom->at('svg > circle')->namespace;
725
d6512b50 726=head2 next
727
728 my $sibling = $dom->next;
729
730Return L<DOM::Tiny> object for next sibling element or C<undef> if there are no
731more siblings.
732
733 # "<h2>123</h2>"
734 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
735
736=head2 next_node
737
738 my $sibling = $dom->next_node;
739
740Return L<DOM::Tiny> object for next sibling node or C<undef> if there are no
741more siblings.
742
743 # "456"
744 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
745 ->at('b')->next_node->next_node;
746
747 # " Test "
748 $dom->parse('<p><b>123</b><!-- Test -->456</p>')
749 ->at('b')->next_node->content;
750
751=head2 parent
752
753 my $parent = $dom->parent;
754
755Return L<DOM::Tiny> object for parent of this node or C<undef> if this node has
756no parent.
757
758=head2 parse
759
760 $dom = $dom->parse('<foo bar="baz">I ♥ DOM::Tiny!</foo>');
761
762Parse HTML/XML fragment with L<DOM::Tiny::HTML>.
763
764 # Parse XML
765 my $dom = DOM::Tiny->new->xml(1)->parse($xml);
766
767=head2 preceding
768
769 my $collection = $dom->preceding;
770 my $collection = $dom->preceding('div ~ p');
771
772Find all sibling elements before this node matching the CSS selector and return
8563f527 773a L<collection|/"COLLECTION METHODS"> containing these elements as L<DOM::Tiny>
d6512b50 774objects. All selectors from L<DOM::Tiny::CSS/"SELECTORS"> are supported.
775
776 # List tags of sibling elements before this node
777 say $dom->preceding->map('tag')->join("\n");
778
779=head2 preceding_nodes
780
781 my $collection = $dom->preceding_nodes;
782
8563f527 783Return a L<collection|/"COLLECTION METHODS"> containing all sibling nodes
784before this node as L<DOM::Tiny> objects.
d6512b50 785
786 # "A"
787 $dom->parse('A<!-- B --><p>C</p>')->at('p')->preceding_nodes->first->content;
788
789=head2 prepend
790
791 $dom = $dom->prepend('<p>I ♥ DOM::Tiny!</p>');
792
793Prepend HTML/XML fragment to this node.
794
795 # "<div><h1>Test</h1><h2>123</h2></div>"
796 $dom->parse('<div><h2>123</h2></div>')
797 ->at('h2')->prepend('<h1>Test</h1>')->root;
798
799 # "<p>Test 123</p>"
800 $dom->parse('<p>123</p>')
801 ->at('p')->child_nodes->first->prepend('Test ')->root;
802
803=head2 prepend_content
804
805 $dom = $dom->prepend_content('<p>I ♥ DOM::Tiny!</p>');
806
807Prepend HTML/XML fragment (for C<root> and C<tag> nodes) or raw content to this
808node's content.
809
810 # "<div><h2>Test123</h2></div>"
811 $dom->parse('<div><h2>123</h2></div>')
812 ->at('h2')->prepend_content('Test')->root;
813
814 # "<!-- Test 123 --><br>"
815 $dom->parse('<!-- 123 --><br>')
816 ->child_nodes->first->prepend_content(' Test')->root;
817
818 # "<p><i>123</i>Test</p>"
819 $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
820
821=head2 previous
822
823 my $sibling = $dom->previous;
824
825Return L<DOM::Tiny> object for previous sibling element or C<undef> if there
826are no more siblings.
827
828 # "<h1>Test</h1>"
829 $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
830
831=head2 previous_node
832
833 my $sibling = $dom->previous_node;
834
835Return L<DOM::Tiny> object for previous sibling node or C<undef> if there are
836no more siblings.
837
838 # "123"
839 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
840 ->at('b')->previous_node->previous_node;
841
842 # " Test "
843 $dom->parse('<p>123<!-- Test --><b>456</b></p>')
844 ->at('b')->previous_node->content;
845
846=head2 remove
847
848 my $parent = $dom->remove;
849
850Remove this node and return L</"root"> (for C<root> nodes) or L</"parent">.
851
852 # "<div></div>"
853 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove;
854
855 # "<p><b>456</b></p>"
856 $dom->parse('<p>123<b>456</b></p>')
857 ->at('p')->child_nodes->first->remove->root;
858
859=head2 replace
860
861 my $parent = $dom->replace('<div>I ♥ DOM::Tiny!</div>');
862
863Replace this node with HTML/XML fragment and return L</"root"> (for C<root>
864nodes) or L</"parent">.
865
866 # "<div><h2>123</h2></div>"
867 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>');
868
869 # "<p><b>123</b></p>"
870 $dom->parse('<p>Test</p>')
871 ->at('p')->child_nodes->[0]->replace('<b>123</b>')->root;
872
873=head2 root
874
875 my $root = $dom->root;
876
877Return L<DOM::Tiny> object for C<root> node.
878
879=head2 strip
880
881 my $parent = $dom->strip;
882
883Remove this element while preserving its content and return L</"parent">.
884
885 # "<div>Test</div>"
886 $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
887
888=head2 tag
889
890 my $tag = $dom->tag;
891 $dom = $dom->tag('div');
892
893This element's tag name.
894
895 # List tag names of child elements
896 say $dom->children->map('tag')->join("\n");
897
898=head2 tap
899
900 $dom = $dom->tap(sub {...});
901
e99ef07d 902Equivalent to L<Mojo::Base/"tap">.
d6512b50 903
904=head2 text
905
906 my $trimmed = $dom->text;
907 my $untrimmed = $dom->text(0);
908
909Extract text content from this element only (not including child elements),
910smart whitespace trimming is enabled by default.
911
912 # "foo baz"
913 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text;
914
915 # "foo\nbaz\n"
916 $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
917
918=head2 to_string
919
920 my $str = $dom->to_string;
921
922Render this node and its content to HTML/XML.
923
924 # "<b>Test</b>"
925 $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
926
927=head2 tree
928
929 my $tree = $dom->tree;
930 $dom = $dom->tree(['root']);
931
932Document Object Model. Note that this structure should only be used very
933carefully since it is very dynamic.
934
935=head2 type
936
937 my $type = $dom->type;
938
939This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
940C<root>, C<tag> or C<text>.
941
942 # "cdata"
943 $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
944
945 # "comment"
946 $dom->parse('<!-- Test -->')->child_nodes->first->type;
947
948 # "doctype"
949 $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
950
951 # "pi"
952 $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
953
954 # "raw"
955 $dom->parse('<title>Test</title>')->at('title')->child_nodes->first->type;
956
957 # "root"
958 $dom->parse('<p>Test</p>')->type;
959
960 # "tag"
961 $dom->parse('<p>Test</p>')->at('p')->type;
962
963 # "text"
964 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->type;
965
966=head2 val
967
968 my $value = $dom->val;
969
970Extract value from form element (such as C<button>, C<input>, C<option>,
971C<select> and C<textarea>) or return C<undef> if this element has no value. In
972the case of C<select> with C<multiple> attribute, find C<option> elements with
973C<selected> attribute and return an array reference with all values or C<undef>
974if none could be found.
975
976 # "a"
977 $dom->parse('<input name="test" value="a">')->at('input')->val;
978
979 # "b"
980 $dom->parse('<textarea>b</textarea>')->at('textarea')->val;
981
982 # "c"
983 $dom->parse('<option value="c">Test</option>')->at('option')->val;
984
985 # "d"
986 $dom->parse('<select><option selected>d</option></select>')
987 ->at('select')->val;
988
989 # "e"
990 $dom->parse('<select multiple><option selected>e</option></select>')
991 ->at('select')->val->[0];
992
993=head2 wrap
994
995 $dom = $dom->wrap('<div></div>');
996
997Wrap HTML/XML fragment around this node, placing it as the last child of the
998first innermost element.
999
1000 # "<p>123<b>Test</b></p>"
1001 $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root;
1002
1003 # "<div><p><b>Test</b></p>123</div>"
1004 $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root;
1005
1006 # "<p><b>Test</b></p><p>123</p>"
1007 $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root;
1008
1009 # "<p><b>Test</b></p>"
1010 $dom->parse('<p>Test</p>')->at('p')->child_nodes->first->wrap('<b>')->root;
1011
1012=head2 wrap_content
1013
1014 $dom = $dom->wrap_content('<div></div>');
1015
1016Wrap HTML/XML fragment around this node's content, placing it as the last
1017children of the first innermost element.
1018
1019 # "<p><b>123Test</b></p>"
1020 $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root;
1021
1022 # "<p><b>Test</b></p><p>123</p>"
1023 $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
1024
1025=head2 xml
1026
1027 my $bool = $dom->xml;
1028 $dom = $dom->xml($bool);
1029
1030Disable HTML semantics in parser and activate case-sensitivity, defaults to
1031auto detection based on processing instructions.
1032
1033=head1 OPERATORS
1034
1035L<DOM::Tiny> overloads the following operators.
1036
1037=head2 array
1038
1039 my @nodes = @$dom;
1040
1041Alias for L</"child_nodes">.
1042
1043 # "<!-- Test -->"
1044 $dom->parse('<!-- Test --><b>123</b>')->[0];
1045
1046=head2 bool
1047
1048 my $bool = !!$dom;
1049
1050Always true.
1051
1052=head2 hash
1053
1054 my %attrs = %$dom;
1055
1056Alias for L</"attr">.
1057
1058 # "test"
1059 $dom->parse('<div id="test">Test</div>')->at('div')->{id};
1060
1061=head2 stringify
1062
1063 my $str = "$dom";
1064
1065Alias for L</"to_string">.
1066
78ba4051 1067=head1 COLLECTION METHODS
1068
1069Some L<DOM::Tiny> methods return an array-based collection object, which can
1070either be accessed directly as an array reference, or with the following
1071methods.
1072
1073 # Chain methods
1074 $collection->map(sub { ucfirst })->shuffle->each(sub {
1075 my ($word, $num) = @_;
1076 say "$num: $word";
1077 });
1078
1079 # Access array directly to manipulate collection
1080 $collection->[23] += 100;
1081 say for @$collection;
1082
1083=head2 compact
1084
1085 my $new = $collection->compact;
1086
1087Create a new collection with all elements that are defined and not an empty
1088string.
1089
1090 # $collection contains (0, 1, undef, 2, '', 3)
1091 $collection->compact->join(', '); # "0, 1, 2, 3"
1092
1093=head2 each
1094
1095 my @elements = $collection->each;
1096 $collection = $collection->each(sub {...});
1097
1098Evaluate callback for each element in collection or return all elements as a
1099list if none has been provided. The element will be the first argument passed
1100to the callback and is also available as C<$_>.
1101
1102 # Make a numbered list
1103 $collection->each(sub {
1104 my ($e, $num) = @_;
1105 say "$num: $e";
1106 });
1107
1108=head2 first
1109
1110 my $first = $collection->first;
1111 my $first = $collection->first(qr/foo/);
1112 my $first = $collection->first(sub {...});
1113 my $first = $collection->first($method);
1114 my $first = $collection->first($method, @args);
1115
1116Evaluate regular expression/callback for, or call method on, each element in
1117collection and return the first one that matched the regular expression, or for
1118which the callback/method returned true. The element will be the first argument
1119passed to the callback and is also available as C<$_>.
1120
1121 # Longer version
1122 my $first = $collection->first(sub { $_->$method(@args) });
1123
1124 # Find first value that contains the word "dom"
1125 my $interesting = $collection->first(qr/dom/i);
1126
1127 # Find first value that is greater than 5
1128 my $greater = $collection->first(sub { $_ > 5 });
1129
1130=head2 flatten
1131
1132 my $new = $collection->flatten;
1133
1134Flatten nested collections/arrays recursively and create a new collection with
1135all elements.
1136
1137 # $collection contains (1, [2, [3, 4], 5, [6]], 7)
1138 $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7"
1139
1140=head2 grep
1141
1142 my $new = $collection->grep(qr/foo/);
1143 my $new = $collection->grep(sub {...});
1144 my $new = $collection->grep($method);
1145 my $new = $collection->grep($method, @args);
1146
1147Evaluate regular expression/callback for, or call method on, each element in
1148collection and create a new collection with all elements that matched the
1149regular expression, or for which the callback/method returned true. The element
1150will be the first argument passed to the callback and is also available as
1151C<$_>.
1152
1153 # Longer version
1154 my $new = $collection->grep(sub { $_->$method(@args) });
1155
1156 # Find all values that contain the word "dom"
1157 my $interesting = $collection->grep(qr/dom/i);
1158
1159 # Find all values that are greater than 5
1160 my $greater = $collection->grep(sub { $_ > 5 });
1161
1162=head2 join
1163
1164 my $stream = $collection->join;
1165 my $stream = $collection->join("\n");
1166
1167Turn collection into string.
1168
1169 # Join all values with commas
1170 $collection->join(', ');
1171
1172=head2 last
1173
1174 my $last = $collection->last;
1175
1176Return the last element in collection.
1177
1178=head2 map
1179
1180 my $new = $collection->map(sub {...});
1181 my $new = $collection->map($method);
1182 my $new = $collection->map($method, @args);
1183
1184Evaluate callback for, or call method on, each element in collection and create
1185a new collection from the results. The element will be the first argument
1186passed to the callback and is also available as C<$_>.
1187
1188 # Longer version
1189 my $new = $collection->map(sub { $_->$method(@args) });
1190
1191 # Append the word "dom" to all values
1192 my $domified = $collection->map(sub { $_ . 'dom' });
1193
1194=head2 reduce
1195
1196 my $result = $collection->reduce(sub {...});
1197 my $result = $collection->reduce(sub {...}, $initial);
1198
1199Reduce elements in collection with callback, the first element will be used as
1200initial value if none has been provided.
1201
1202 # Calculate the sum of all values
1203 my $sum = $collection->reduce(sub { $a + $b });
1204
1205 # Count how often each value occurs in collection
1206 my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {});
1207
1208=head2 reverse
1209
1210 my $new = $collection->reverse;
1211
1212Create a new collection with all elements in reverse order.
1213
1214=head2 slice
1215
1216 my $new = $collection->slice(4 .. 7);
1217
1218Create a new collection with all selected elements.
1219
1220 # $collection contains ('A', 'B', 'C', 'D', 'E')
1221 $collection->slice(1, 2, 4)->join(' '); # "B C E"
1222
1223=head2 shuffle
1224
1225 my $new = $collection->shuffle;
1226
1227Create a new collection with all elements in random order.
1228
1229=head2 size
1230
1231 my $size = $collection->size;
1232
1233Number of elements in collection.
1234
1235=head2 sort
1236
1237 my $new = $collection->sort;
1238 my $new = $collection->sort(sub {...});
1239
1240Sort elements based on return value of callback and create a new collection
1241from the results.
1242
1243 # Sort values case-insensitive
1244 my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) });
1245
1246=head2 tap
1247
1248 $collection = $collection->tap(sub {...});
1249
1250Equivalent to L<Mojo::Base/"tap">.
1251
1252=head2 to_array
1253
1254 my $array = $collection->to_array;
1255
1256Turn collection into array reference.
1257
1258=head2 uniq
1259
1260 my $new = $collection->uniq;
1261 my $new = $collection->uniq(sub {...});
1262 my $new = $collection->uniq($method);
1263 my $new = $collection->uniq($method, @args);
1264
1265Create a new collection without duplicate elements, using the string
1266representation of either the elements or the return value of the
1267callback/method.
1268
1269 # Longer version
1270 my $new = $collection->uniq(sub { $_->$method(@args) });
1271
1272 # $collection contains ('foo', 'bar', 'bar', 'baz')
1273 $collection->uniq->join(' '); # "foo bar baz"
1274
1275 # $collection contains ([1, 2], [2, 1], [3, 2])
1276 $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]"
1277
a292be34 1278=head1 BUGS
1279
1280Report any issues on the public bugtracker.
1281
1282=head1 AUTHOR
1283
1284Dan Book <dbook@cpan.org>
1285
1286=head1 COPYRIGHT AND LICENSE
1287
1288This software is Copyright (c) 2015 by Dan Book.
1289
1290This is free software, licensed under:
1291
1292 The Artistic License 2.0 (GPL Compatible)
1293
1294=head1 SEE ALSO
1295
d6512b50 1296L<Mojo::DOM>, L<XML::LibXML>, L<XML::Twig>, L<HTML::TreeBuilder>, L<XML::Smart>