1 # $Id: Parser.pm 785 2009-07-16 14:17:46Z pajas $
3 # This is free software, you may use it and distribute it under the same terms as
6 # Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
10 package XML::LibXML::SAX::Parser;
13 use vars qw($VERSION @ISA);
16 use XML::LibXML::Common qw(:libxml);
18 use XML::SAX::DocumentLocator;
20 $VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
21 @ISA = ('XML::SAX::Base');
24 return $XML::LibXML::__threads_shared ? 0 : 1;
27 sub _parse_characterstream {
28 my ($self, $fh, $options) = @_;
29 die "parsing a characterstream is not supported at this time";
32 sub _parse_bytestream {
33 my ($self, $fh, $options) = @_;
34 my $parser = XML::LibXML->new();
35 my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh);
36 $self->generate($doc);
40 my ($self, $str, $options) = @_;
41 my $parser = XML::LibXML->new();
42 my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str);
43 $self->generate($doc);
47 my ($self, $sysid, $options) = @_;
48 my $parser = XML::LibXML->new();
49 my $doc = $parser->parse_file($sysid);
50 $self->generate($doc);
57 my $doc = $node->ownerDocument();
59 # precompute some DocumentLocator values
66 my $dtd = defined $doc ? $doc->externalSubset() : undef;
68 $locator{PublicId} = $dtd->publicId();
69 $locator{SystemId} = $dtd->systemId();
72 $locator{Encoding} = $doc->encoding();
73 $locator{XMLVersion} = $doc->version();
75 $self->set_document_locator(
76 XML::SAX::DocumentLocator->new(
77 sub { $locator{PublicId} },
78 sub { $locator{SystemId} },
79 sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef },
81 sub { $locator{Encoding} },
82 sub { $locator{XMLVersion} },
87 if ( $node->nodeType() == XML_DOCUMENT_NODE
88 || $node->nodeType == XML_HTML_DOCUMENT_NODE ) {
89 $self->start_document({});
90 $self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding});
91 $self->process_node($node);
92 $self->end_document({});
97 my ($self, $node) = @_;
99 local $self->{current_node} = $node;
101 my $node_type = $node->nodeType();
102 if ($node_type == XML_COMMENT_NODE) {
103 $self->comment( { Data => $node->getData } );
105 elsif ($node_type == XML_TEXT_NODE
106 || $node_type == XML_CDATA_SECTION_NODE) {
107 # warn($node->getData . "\n");
108 $self->characters( { Data => $node->nodeValue } );
110 elsif ($node_type == XML_ELEMENT_NODE) {
111 # warn("<" . $node->getName . ">\n");
112 $self->process_element($node);
113 # warn("</" . $node->getName . ">\n");
115 elsif ($node_type == XML_ENTITY_REF_NODE) {
116 foreach my $kid ($node->childNodes) {
117 # warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
118 $self->process_node($kid);
121 elsif ($node_type == XML_DOCUMENT_NODE
122 || $node_type == XML_HTML_DOCUMENT_NODE
123 || $node_type == XML_DOCUMENT_FRAG_NODE) {
124 # some times it is just usefull to generate SAX events from
125 # a document fragment (very good with filters).
126 foreach my $kid ($node->childNodes) {
127 $self->process_node($kid);
130 elsif ($node_type == XML_PI_NODE) {
131 $self->processing_instruction( { Target => $node->getName, Data => $node->getData } );
133 elsif ($node_type == XML_COMMENT_NODE) {
134 $self->comment( { Data => $node->getData } );
136 elsif ( $node_type == XML_XINCLUDE_START
137 || $node_type == XML_XINCLUDE_END ) {
139 # i may want to handle this one day, dunno yet
141 elsif ($node_type == XML_DTD_NODE ) {
143 # i will support DTDs, but had no time yet.
146 # warn("unsupported node type: $node_type");
151 sub process_element {
152 my ($self, $element) = @_;
155 my @ns_maps = $element->getNamespaces;
157 foreach my $ns (@ns_maps) {
158 $self->start_prefix_mapping(
160 NamespaceURI => $ns->href,
161 Prefix => ( defined $ns->localname ? $ns->localname : ''),
166 foreach my $attr ($element->attributes) {
168 # warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n");
169 # this isa dump thing...
170 if ($attr->isa('XML::LibXML::Namespace')) {
171 # TODO This needs fixing modulo agreeing on what
172 # is the right thing to do here.
173 unless ( defined $attr->name ) {
174 ## It's an atter like "xmlns='foo'"
175 $attribs->{"{}xmlns"} =
178 LocalName => "xmlns",
180 Value => $attr->href,
185 my $prefix = "xmlns";
186 my $localname = $attr->localname;
187 my $key = "{http://www.w3.org/2000/xmlns/}";
190 if ( defined $localname ) {
192 $name.= ":".$localname;
198 Value => $attr->href,
199 NamespaceURI => "http://www.w3.org/2000/xmlns/",
201 LocalName => $localname,
206 my $ns = $attr->namespaceURI;
208 $ns = '' unless defined $ns;
209 $key = "{$ns}".$attr->localname;
210 ## Not sure why, but $attr->name is coming through stripped
211 ## of its prefix, so we need to hand-assemble a real name.
212 my $name = $attr->name;
213 $name = "" unless defined $name;
215 my $prefix = $attr->prefix;
216 $prefix = "" unless defined $prefix;
217 $name = "$prefix:$name"
218 if index( $name, ":" ) < 0 && length $prefix;
223 Value => $attr->value,
226 LocalName => $attr->localname,
230 # warn("Attr made: ", Dumper($attribs->{$key}), "\n");
234 Name => $element->nodeName,
235 Attributes => $attribs,
236 NamespaceURI => $element->namespaceURI,
237 Prefix => $element->prefix || "",
238 LocalName => $element->localname,
241 $self->start_element($node);
243 foreach my $child ($element->childNodes) {
244 $self->process_node($child);
247 my $end_node = { %$node };
249 delete $end_node->{Attributes};
251 $self->end_element($end_node);
253 foreach my $ns (@ns_maps) {
254 $self->end_prefix_mapping(
256 NamespaceURI => $ns->href,
257 Prefix => ( defined $ns->localname ? $ns->localname : ''),