3 package XML::Handler::Trees;
7 package XML::Handler::Tree;
10 my $class = ref($_[0]) || $_[0];
17 $self->{Curlist}=$self->{Tree}=[];
21 my ($self,$element)=@_;
23 if (exists $element->{LocalName}) {
24 # namespaces are available!
26 foreach my $attr (values %{$element->{Attributes}}) {
27 if ($attr->{NamespaceURI}) {
28 $newlist->[0]{"{$attr->{NamespaceURI}}$attr->{LocalName}"} = $attr->{Value};
31 $newlist->[0]{$attr->{Name}} = $attr->{Value};
35 elsif (ref $element->{Attributes} eq 'HASH') {
36 $newlist=[{map {$_=>$element->{Attributes}{$_}} keys %{$element->{Attributes}}}];
39 $newlist=[{map {$_=>$element->{Attributes}{$_}{Value}} keys %{$element->{Attributes}}}];
41 push @{ $self->{Lists} }, $self->{Curlist};
42 if (exists($element->{LocalName}) && $element->{NamespaceURI}) {
43 push @{ $self->{Curlist} }, "{$element->{NamespaceURI}}$element->{LocalName}" => $newlist;
46 push @{ $self->{Curlist} }, $element->{Name} => $newlist;
48 $self->{Curlist} = $newlist;
52 my ($self,$element)=@_;
53 $self->{Curlist}=pop @{$self->{Lists}};
58 my $clist = $self->{Curlist};
60 if ($pos>0 and $clist->[$pos-1] eq '0') {
61 $clist->[$pos].=$text->{Data};
64 push @$clist,0=>$text->{Data};
70 sub processing_instruction {}
74 delete $self->{Curlist};
75 delete $self->{Lists};
79 package XML::Handler::EasyTree;
83 $class=ref($class) || $class;
84 my $self={Noempty=>0,Latin=>0,Searchable=>0,@_};
85 $self->{Noempty}||=$self->{Searchable};
92 $self->{Curlist} = $self->{Tree} = [];
96 my ($self,$element)=@_;
100 if ($self->{Searchable}) {
101 $newnode= XML::Handler::EasyTree::Searchable->new( Name => $self->nsname($element), Content => $newlist );
104 $newnode={type=>'e',attrib=>{},name=>$self->nsname($element),content=>$newlist};
106 if (exists $element->{LocalName}) {
107 while (my ($name,$obj) = each %{$element->{Attributes}}) {
108 $newnode->{attrib}{$name} = $self->encode($obj->{Value});
111 elsif (ref $element->{Attributes} eq 'HASH') {
112 while (my ($name,$val)=each %{$element->{Attributes}}) {
113 $newnode->{attrib}{$self->nsname($name)}=$self->encode($val);
117 foreach my $att (keys %{$element->{Attributes}}) {
118 $newnode->{attrib}{$self->nsname($element->{Attributes}{$att})}=$self->encode($element->{Attributes}{$att}{Value});
121 push @{ $self->{Lists} }, $self->{Curlist};
122 push @{ $self->{Curlist} }, $newnode;
123 $self->{Curlist} = $newlist;
129 $self->{Curlist}=pop @{$self->{Lists}};
134 my $clist=$self->{Curlist};
135 if (!@$clist || $clist->[-1]{type} ne 't') {
136 push @$clist,{type=>'t',content=>''};
138 $clist->[-1]{content}.=$self->encode($text->{Data});
141 sub processing_instruction {
144 my $clist=$self->{Curlist};
145 push @$clist,{type=>'p',target=>$self->encode($pi->{Target}),content=>$self->encode($pi->{Data})};
153 delete $self->{Curlist};
154 delete $self->{Lists};
155 if ($self->{Searchable}) {
156 return XML::Handler::EasyTree::Searchable->new( Name => '__TOPLEVEL__', Content => $self->{Tree} );
164 if (defined $name->{NamespaceURI}) {
165 $name="{$name->{NamespaceURI}}$name->{LocalName}";
171 return $self->encode($name);
176 if ($self->{Latin}) {
177 $text=~s{([\xc0-\xc3])(.)}{
180 chr((($hi & 0x03) <<6) | ($lo & 0x3F))
188 if ($self->{Noempty}) {
189 my $clist=$self->{Curlist};
190 if (@$clist && $clist->[-1]{type} eq 't' && $clist->[-1]{content}=~/^\s+$/) {
196 package XML::Handler::EasyTree::Searchable;
199 # new() returns a new node with the same structure at the `newnode'
202 # Usage: XML::Handler::EasyTree::Searchable->new( Name => $name, Content => $content );
206 my $class = ref($type) || $type || die "must supply a object type" ;
210 my $name = $opts{Name} || '';
211 my $content = $opts{Content} || undef;
222 # name() returns the name of the node. Ideally, it should return a
223 # "fully qualified" name, but it doesn't
227 return $self->{name};
231 # value() returns the value associated with an object
237 unless( ( exists $self->{content} ) && ( defined $self->{content} ) );
239 my $possible = $self->{content};
241 die "not an array" unless( "$possible" =~ /ARRAY/ );
243 $possible = $possible->[0];
246 unless( ( exists $possible->{type} ) && ( $possible->{type} eq 't' ) );
249 unless( ( exists $possible->{content} ) && ( defined $possible->{content} ) );
251 return $possible->{content};
255 # usage: $newobj = $obj->child( $name );
257 # child() returns a child (elements only) of the object with the $name
259 # for the case where there is more than one child that match $name,
260 # the array context semantics haven't been completely worked out:
261 # - in an array context, all children are returned.
262 # - in scalar context, the first child matching $name is returned.
264 # In a scalar context, The XML::Parser::SimpleObj class returns an
265 # object containing all the children matching $name, unless there is
266 # only one child in which case it returns that child (see commented
267 # code). I find that behavior confusing.
271 my $spec = shift || '';
273 my $array = $self->{content};
277 @rv = grep { $_->{name} eq $spec } grep { $_->{type} eq 'e' } @$array;
279 @rv = grep { $_->{type} eq 'e' } @$array;
282 my $num = scalar( @rv );
287 return '' unless( $num );
288 return $rv[0] if( $num == 1 );
289 # my $class = ref( $self );
290 # return $class->new( Name => "__magic_child_list_object__", Content => [ @rv ] );
295 # usage: @children = $obj->children( $name );
297 # children() returns a list of all children (elements only) of the
298 # $obj that match $name -- in the order in which they appeared in the
303 my $array = $self->{content};
304 my $spec = shift || '';
309 @rv = grep { $_->{name} eq $spec } grep { $_->{type} eq 'e' } @$array;
311 @rv = grep { $_->{type} eq 'e' } @$array;
318 # usage: @children_names = $obj->children_names();
320 # children_names() returns a list of all the names of the objects
321 # children (elements only) in the order in which they appeared in the
326 my $array = $self->{content};
328 return map { $_->{name} } grep { $_->{type} eq 'e' } @$array;
332 # usage: $attrib = $obj->attribute( $att_name );
334 # attribute() returns the string associated with the attribute of the
335 # object. If not found returns a null string.
339 my $spec = shift || return '';
341 return '' unless( ( exists $self->{attrib} ) && ( defined $self->{attrib} ) );
343 my $attrib = $self->{attrib};
344 return '' unless( ( exists $attrib->{$spec} ) && ( defined $attrib->{$spec} ) );
346 return $attrib->{$spec};
350 # usage: @attribute_list = $obj->attribute_list();
352 # attribute_list() returns a list (in no particular order) of the
353 # attribute names associated with the object
358 return '' unless( ( exists $self->{attrib} ) && ( defined $self->{attrib} ) );
360 my $attrib = $self->{attrib};
361 return '' unless( "$attrib" =~ /HASH/ );
363 return keys %$attrib;
367 # usage: $text = $obj->dump_tree();
369 # dump_tree() returns a textual representation (in xml form) of the
370 # object's heirarchy. Only elements are processed.
377 my $pretty = delete $opts{-pretty};
379 my $name = $self->name();
380 my $value = $self->value();
381 my @children = $self->children();
384 unless( $name eq '__TOPLEVEL__' ) {
386 for my $att ( $self->attribute_list() ) {
387 $text .= sprintf( " %s=\"%s\"", $att, encode($self->attribute( $att )) );
392 $text .= encode($value);
397 for my $child ( @children ) {
398 $text .= $child->dump_tree();
401 unless( $name eq '__TOPLEVEL__' ) {
409 # usage: $text = $obj->pretty_dump_tree();
411 # pretty_dump_tree() is identical to dump_tree(), except that newline
412 # and indentation embellishments are added
414 sub pretty_dump_tree {
416 my $tab = shift || 0;
418 my $indent = " " x ( 2 * $tab );
420 my $name = $self->name();
421 my $value = $self->value();
422 my @children = $self->children();
425 unless( $name eq '__TOPLEVEL__' ) {
426 $text .= "$indent<$name";
427 for my $att ( $self->attribute_list() ) {
428 $text .= sprintf( " %s=\"%s\"", $att, encode($self->attribute( $att )) );
432 if( defined $value ) {
433 $text .= encode($value);
434 $text .= "</$name>\n";
441 for my $child ( @children ) {
442 $text .= $child->pretty_dump_tree( $tab + 1 );
445 unless( $name eq '__TOPLEVEL__' ) {
446 $text .= "$indent</$name>\n";
454 my %encodings=('&'=>'amp','<'=>'lt','>'=>'gt','"'=>'quot',"'"=>'apos');
455 $encstr=~s/([&<>"'])/&$encodings{$1};/g;
459 package XML::Handler::TreeBuilder;
462 @ISA=qw(XML::Element);
465 require XML::Element;
466 my $class = ref($_[0]) || $_[0];
467 my $self = XML::Element->new('NIL');
468 $self->{'_element_class'} = 'XML::Element';
469 $self->{'_store_comments'} = 0;
470 $self->{'_store_pis'} = 0;
471 $self->{'_store_declarations'} = 0;
476 sub start_document {}
479 my ($self,$element)=@_;
481 if (exists $element->{LocalName}) {
482 @attlist=map {$_=>$element->{Attributes}{$_}{Value}} keys %{$element->{Attributes}};
484 elsif (ref $element->{Attributes} eq 'HASH') {
485 @attlist=map {$_=>$element->{Attributes}{$_}} keys %{$element->{Attributes}};
488 @attlist=map {$_=>$element->{Attributes}{$_}{Value}} keys %{$element->{Attributes}};
490 if(@{$self->{_stack}}) {
491 push @{$self->{_stack}}, $self->{'_element_class'}->new($element->{Name},@attlist);
492 $self->{_stack}[-2]->push_content( $self->{_stack}[-1] );
495 $self->tag($element->{Name});
497 $self->attr(splice(@attlist,0,2));
499 push @{$self->{_stack}}, $self;
505 pop @{$self->{_stack}};
511 $self->{_stack}[-1]->push_content($text->{Data});
515 my ($self,$comment)=@_;
516 return unless $self->{'_store_comments'};
517 (@{$self->{_stack}} ? $self->{_stack}[-1] : $self)->push_content(
518 $self->{'_element_class'}->new('~comment', 'text' => $comment->{Data})
523 sub processing_instruction {
525 return unless $self->{'_store_pis'};
526 (@{$self->{_stack}} ? $self->{_stack}[-1] : $self)->push_content(
527 $self->{'_element_class'}->new('~pi', 'text' => "$pi->{Target} $pi->{Data}")
537 sub _elem # universal accessor...
539 my($self, $elem, $val) = @_;
540 my $old = $self->{$elem};
541 $self->{$elem} = $val if defined $val;
545 sub store_comments { shift->_elem('_store_comments', @_); }
546 sub store_declarations { shift->_elem('_store_declarations', @_); }
547 sub store_pis { shift->_elem('_store_pis', @_); }
554 XML::Handler::Trees - PerlSAX handlers for building tree structures
558 use XML::Handler::Trees;
559 use XML::Parser::PerlSAX;
561 my $p=XML::Parser::PerlSAX->new();
562 my $h=XML::Handler::Tree->new();
563 my $tree=$p->parse(Handler=>$h,Source=>{SystemId=>'file.xml'});
565 my $p=XML::Parser::PerlSAX->new();
566 my $h=XML::Handler::EasyTree->new(Noempty=>1);
567 my $easytree=$p->parse(Handler=>$h,Source=>{SystemId=>'file.xml'});
569 my $p=XML::Parser::PerlSAX->new();
570 my $h=XML::Handler::TreeBuilder->new();
572 my $tree=$p->parse(Handler=>$h,Source=>{SystemId=>'file.xml'});
576 XML::Handler::Trees provides three PerlSAX handler classes for building
577 tree structures. XML::Handler::Tree builds the same type of tree as the
578 "Tree" style in XML::Parser. XML::Handler::EasyTree builds the same
579 type of tree as the "EasyTree" style added to XML::Parser by
580 XML::Parser::EasyTree. XML::Handler::TreeBuilder builds the same type
581 of tree as Sean M. Burke's XML::TreeBuilder. These classes make it
582 possible to construct these tree structures from sources other than
585 All three handlers can be driven by either PerlSAX 1 or PerlSAX 2
586 drivers. In all cases, the end_document() method returns a reference to
587 the constructed tree, which normally becomes the return value of the
590 =head1 CLASS XML::Handler::Tree
592 This handler builds the same type of tree structure as the "Tree" style
593 in XML::Parser. Some modules such as Dan Brian's XML::SimpleObject work
594 with this type of tree. See the documentation for XML::Parser for details.
600 =item $handler = XML::Handler::Tree->new()
602 Creates a handler object.
606 =head1 CLASS XML::Handler::EasyTree
608 This handler builds a lightweight tree structure representing the XML
609 document. This structure is, at least in this author's opinion, easier to
610 work with than the "standard" style of tree. It is the same type of
611 structure as built by XML::Parser when using XML::Parser::EasyTree, or
612 by the get_simple_tree method in XML::Records.
614 The tree is returned as a reference to an array of tree nodes, each of
615 which is a hash reference. All nodes have a 'type' key whose value is
616 the type of the node: 'e' for element nodes, 't' for text nodes, and 'p'
617 for processing instruction nodes. All nodes also have a 'content' key
618 whose value is a reference to an array holding the element's child nodes
619 for element nodes, the string value for text nodes, and the data value
620 for processing instruction nodes. Element nodes also have an 'attrib'
621 key whose value is a reference to a hash of attribute names and values and a 'name'
622 key whose value is the element's name. Processing instructions also have
623 a 'target' key whose value is the PI's target.
625 EasyTree nodes are ordinary Perl hashes and are not objects. Contiguous
626 runs of text are always returned in a single node.
628 The reason the parser returns an array reference rather than the root
629 element's node is that an XML document can legally contain processing
630 instructions outside the root element (the xml-stylesheet PI is commonly
633 If namespace information is available (only possible with PerlSAX 2),
634 element and attribute names will be prefixed with their (possibly empty)
635 namespace URI enclosed in curly brackets, and namespace prefixes will be
642 =item $handler = XML::Handler::EasyTree->new([options])
644 Creates a handler object. Options can be provided hash-style:
650 If this is set to a true value, text nodes consisting entirely of
651 whitespace will not be stored in the tree. The default is false.
655 If this is set to a true value, characters with Unicode values in the
656 Latin-1 range (160-255) will be stored in the tree as Latin-1 rather
657 than UTF-8. The default is false.
661 If this is set to a true value, the parser will return a tree of XML::Handler::EasyTree::Searchable
662 objects rather than bare array references, providing access to the navigation methods
663 listed below. The top-level node returned will be a dummy element node with a name of "__TOPLEVEL__".
664 It is false by default. Setting this option automatically enables the Noempty option.
670 =head2 XML::Handler::EasyTree::Searchable METHODS
672 If the Searchable option is set, all nodes in the tree will be XML::Handler::EasyTree::Searchable objects,
673 which have the same structure as EasyTree nodes but also implement the following methods similar to
674 those in XML::SimpleObject.
678 =item $name = $node->name()
680 Returns the name of the node. Ideally, it should return a
681 "fully qualified" name, but it doesn't.
683 =item $val = $node->value()
685 Returns the text value associated with a node object. Returns undef if the node has
686 no text children or its first child is not a text node.
688 =item $newobj = $obj->child( $name );
690 Returns a child (elements only) of the object with the $name.
692 For the case where there is more than one child that match $name,
693 the array context semantics haven't been completely worked out:
694 - in an array context, all children are returned.
695 - in scalar context, the first child matching $name is returned.
697 In a scalar context, The XML::Parser::SimpleObj class returns an
698 object containing all the children matching $name, unless there is
699 only one child in which case it returns that child (see commented
700 code). I find that behavior confusing.
702 =item @children = $obj->children( $name );
704 Returns a list of all children (elements only) of the
705 $obj that match $name -- in the order in which they appeared in the
708 =item @children_names = $obj->children_names();
710 Returns a list of all the names of the objects
711 children (elements only) in the order in which they appeared in the
714 =item $attrib = $obj->attribute( $att_name );
716 Returns the string associated with the attribute of the
717 object. If not found returns a null string.
719 =item @attribute_list = $obj->attribute_list();
721 Returns a list (in no particular order) of the
722 attribute names associated with the object
724 =item $text = $obj->dump_tree();
726 Returns a textual representation (in xml form) of the
727 object's hierarchy. Only elements are processed. The result will
728 be in whatever character encoding the SAX driver delivered (which may
729 not be the same encoding as the original source).
731 =item $text = $obj->pretty_dump_tree();
733 Identical to dump_tree(), except that newline
734 and indentation embellishments are added
742 use XML::Handler::Trees;
743 use XML::Parser::PerlSAX;
746 my $p=XML::Parser::PerlSAX->new();
747 my $h=XML::Handler::EasyTree->new( Searchable=>1 );
748 my $easytree=$p->parse( Handler => $h, Source => { SystemId => 'systemB.xml' } );
750 my $vme = $easytree->child( "vmesystem" );
753 print "vmesystem config: ", $vme->attribute( "configuration_name" ), "\n";
756 print "vmesystem children: ", join( ', ', $vme->children_names() ), "\n";
759 print "gps model is ", $vme->child( "gps" )->child( "model" )->value(), "\n";
760 my $gps = $vme->child( "gps" );
761 print "gps slot is ", $gps->child( "slot" )->value(), "\n";
764 print "reconstructed XML: \n";
765 print $easytree->dump_tree(), "\n";
768 # print "recontructed XML (pretty): \n";
769 # print $easytree->pretty_dump_tree(), "\n";
774 =head1 CLASS XML::Handler::TreeBuilder
776 This handler builds XML document trees constructed of
777 XML::Element objects (XML::Element is a subclass of HTML::Element
778 adapted for XML). To use it, XML::TreeBuilder and its prerequisite
779 HTML::Tree need to be installed. See the documentation for those
780 modules for information on how to work with these tree structures.
786 =item $handler = XML::Handler::TreeBuilder->new()
788 Creates a handler which builds a tree rooted in an XML::Element.
790 =item $root->store_comments(value)
792 This determines whether comments will be stored in the tree (not all SAX
793 drivers generate comment events). Currently, this is off by default.
795 =item $root->store_declarations(value)
797 This determines whether markup declarations will be stored in the tree.
798 Currently, this is off by default. The present implementation does not
799 store markup declarations in any case; this method is provided for future use.
801 =item $root->store_pis(value)
803 This determines whether processing instructions will be stored in the tree.
804 Currently, this is off (false) by default.
810 Eric Bohlman (ebohlman@omsdev.com)
812 PerlSAX 2 compatibility added by Matt Sergeant (matt@sergeant.org)
814 XML::EasyTree::Searchable written by Stuart McDow (smcdow@moontower.org)
816 Copyright (c) 2001 Eric Bohlman.
818 Portions of this code Copyright (c) 2001 Matt Sergeant.
820 Portions of this code Copyright (c) 2001 Stuart McDow.
822 All rights reserved. This program is free software; you can redistribute it
823 and/or modify it under the same terms as Perl itself.
830 L<XML::Parser::EasyTree>