Commit | Line | Data |
3fea05b9 |
1 | # $Id: Tree.pm,v 1.2 2003/07/31 07:54:51 matt Exp $ |
2 | |
3 | package XML::Parser::Style::Tree; |
4 | $XML::Parser::Built_In_Styles{Tree} = 1; |
5 | |
6 | sub Init { |
7 | my $expat = shift; |
8 | $expat->{Lists} = []; |
9 | $expat->{Curlist} = $expat->{Tree} = []; |
10 | } |
11 | |
12 | sub Start { |
13 | my $expat = shift; |
14 | my $tag = shift; |
15 | my $newlist = [ { @_ } ]; |
16 | push @{ $expat->{Lists} }, $expat->{Curlist}; |
17 | push @{ $expat->{Curlist} }, $tag => $newlist; |
18 | $expat->{Curlist} = $newlist; |
19 | } |
20 | |
21 | sub End { |
22 | my $expat = shift; |
23 | my $tag = shift; |
24 | $expat->{Curlist} = pop @{ $expat->{Lists} }; |
25 | } |
26 | |
27 | sub Char { |
28 | my $expat = shift; |
29 | my $text = shift; |
30 | my $clist = $expat->{Curlist}; |
31 | my $pos = $#$clist; |
32 | |
33 | if ($pos > 0 and $clist->[$pos - 1] eq '0') { |
34 | $clist->[$pos] .= $text; |
35 | } else { |
36 | push @$clist, 0 => $text; |
37 | } |
38 | } |
39 | |
40 | sub Final { |
41 | my $expat = shift; |
42 | delete $expat->{Curlist}; |
43 | delete $expat->{Lists}; |
44 | $expat->{Tree}; |
45 | } |
46 | |
47 | 1; |
48 | __END__ |
49 | |
50 | =head1 NAME |
51 | |
52 | XML::Parser::Style::Tree |
53 | |
54 | =head1 SYNOPSIS |
55 | |
56 | use XML::Parser; |
57 | my $p = XML::Parser->new(Style => 'Tree'); |
58 | my $tree = $p->parsefile('foo.xml'); |
59 | |
60 | =head1 DESCRIPTION |
61 | |
62 | This module implements XML::Parser's Tree style parser. |
63 | |
64 | When parsing a document, C<parse()> will return a parse tree for the |
65 | document. Each node in the tree |
66 | takes the form of a tag, content pair. Text nodes are represented with |
67 | a pseudo-tag of "0" and the string that is their content. For elements, |
68 | the content is an array reference. The first item in the array is a |
69 | (possibly empty) hash reference containing attributes. The remainder of |
70 | the array is a sequence of tag-content pairs representing the content |
71 | of the element. |
72 | |
73 | So for example the result of parsing: |
74 | |
75 | <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> |
76 | |
77 | would be: |
78 | Tag Content |
79 | ================================================================== |
80 | [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], |
81 | bar, [ {}, 0, "Howdy", ref, [{}]], |
82 | 0, "do" |
83 | ] |
84 | ] |
85 | |
86 | The root document "foo", has 3 children: a "head" element, a "bar" |
87 | element and the text "do". After the empty attribute hash, these are |
88 | represented in it's contents by 3 tag-content pairs. |
89 | |
90 | =cut |