1 # $Id: DTDDecls.pm,v 1.9 2008-08-05 12:37:13 grant Exp $
3 package XML::SAX::PurePerl;
6 use XML::SAX::PurePerl::Productions qw($SingleChar);
9 my ($self, $reader) = @_;
11 my $data = $reader->data(9);
12 return 0 unless $data =~ /^<!ELEMENT/;
13 $reader->move_along(9);
15 $self->skip_whitespace($reader) ||
16 $self->parser_error("No whitespace after ELEMENT declaration", $reader);
18 my $name = $self->Name($reader);
20 $self->skip_whitespace($reader) ||
21 $self->parser_error("No whitespace after ELEMENT's name", $reader);
23 $self->contentspec($reader, $name);
25 $self->skip_whitespace($reader);
27 $reader->match('>') or $self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader);
33 my ($self, $reader, $name) = @_;
35 my $data = $reader->data(5);
38 if ($data =~ /^EMPTY/) {
39 $reader->move_along(5);
42 elsif ($data =~ /^ANY/) {
43 $reader->move_along(3);
47 $model = $self->Mixed_or_children($reader);
51 # call SAX callback now.
52 $self->element_decl({Name => $name, Model => $model});
56 $self->parser_error("contentspec not found in ELEMENT declaration", $reader);
59 sub Mixed_or_children {
60 my ($self, $reader) = @_;
62 my $data = $reader->data(8);
63 $data =~ /^\(/ or return; # $self->parser_error("No opening bracket in Mixed or children", $reader);
65 if ($data =~ /^\(\s*\#PCDATA/) {
67 $self->skip_whitespace($reader);
68 $reader->move_along(7);
69 my $model = $self->Mixed($reader);
73 # not matched - must be Children
74 return $self->children($reader);
77 # Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' '*' )
78 # | ( '(' S* PCDATA S* ')' )
80 my ($self, $reader) = @_;
82 # Mixed_or_children already matched '(' S* '#PCDATA'
84 my $model = '(#PCDATA';
86 $self->skip_whitespace($reader);
91 last unless $reader->match('|');
92 $self->skip_whitespace($reader);
94 my $name = $self->Name($reader) ||
95 $self->parser_error("No 'Name' after Mixed content '|'", $reader);
98 $self->parser_error("Element '$name' has already appeared in this group", $reader);
104 $self->skip_whitespace($reader);
107 $reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader);
111 if ($reader->match('*')) {
118 # [[47]] Children ::= ChoiceOrSeq Cardinality?
119 # [[48]] Cp ::= ( QName | ChoiceOrSeq ) Cardinality?
120 # ChoiceOrSeq ::= '(' S* Cp ( Choice | Seq )? S* ')'
121 # [[49]] Choice ::= ( S* '|' S* Cp )+
122 # [[50]] Seq ::= ( S* ',' S* Cp )+
123 # // Children ::= (Choice | Seq) Cardinality?
124 # // Cp ::= ( QName | Choice | Seq) Cardinality?
125 # // Choice ::= '(' S* Cp ( S* '|' S* Cp )+ S* ')'
126 # // Seq ::= '(' S* Cp ( S* ',' S* Cp )* S* ')'
127 # [[51]] Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' MixedCardinality )
128 # | ( '(' S* PCDATA S* ')' )
129 # Cardinality ::= '?' | '+' | '*'
130 # MixedCardinality ::= '*'
132 my ($self, $reader) = @_;
134 return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
138 my ($self, $reader) = @_;
140 $reader->match('(') or $self->parser_error("choice/seq contains no opening bracket", $reader);
144 $self->skip_whitespace($reader);
146 $model .= $self->Cp($reader);
148 if (my $choice = $self->Choice($reader)) {
152 $model .= $self->Seq($reader);
155 $self->skip_whitespace($reader);
157 $reader->match(')') or $self->parser_error("choice/seq contains no closing bracket", $reader);
165 my ($self, $reader) = @_;
166 # cardinality is always optional
167 my $data = $reader->data;
168 if ($data =~ /^([\?\+\*])/) {
169 $reader->move_along(1);
176 my ($self, $reader) = @_;
181 if (my $name = $self->Name($reader)) {
182 return $name . $self->Cardinality($reader);
185 return $name if defined $name;
186 return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
190 my ($self, $reader) = @_;
193 $self->skip_whitespace($reader);
195 while ($reader->match('|')) {
196 $self->skip_whitespace($reader);
198 $model .= $self->Cp($reader);
199 $self->skip_whitespace($reader);
206 my ($self, $reader) = @_;
209 $self->skip_whitespace($reader);
211 while ($reader->match(',')) {
212 $self->skip_whitespace($reader);
213 my $cp = $self->Cp($reader);
218 $self->skip_whitespace($reader);
225 my ($self, $reader) = @_;
227 my $data = $reader->data(9);
228 if ($data =~ /^<!ATTLIST/) {
231 $reader->move_along(9);
233 $self->skip_whitespace($reader) ||
234 $self->parser_error("No whitespace after ATTLIST declaration", $reader);
235 my $name = $self->Name($reader);
237 $self->AttDefList($reader, $name);
239 $self->skip_whitespace($reader);
241 $reader->match('>') or $self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader);
250 my ($self, $reader, $name) = @_;
252 1 while $self->AttDef($reader, $name);
256 my ($self, $reader, $el_name) = @_;
258 $self->skip_whitespace($reader) || return 0;
259 my $att_name = $self->Name($reader) || return 0;
260 $self->skip_whitespace($reader) ||
261 $self->parser_error("No whitespace after Name in attribute definition", $reader);
262 my $att_type = $self->AttType($reader);
264 $self->skip_whitespace($reader) ||
265 $self->parser_error("No whitespace after AttType in attribute definition", $reader);
266 my ($mode, $value) = $self->DefaultDecl($reader);
268 # fire SAX event here!
269 $self->attribute_decl({
280 my ($self, $reader) = @_;
282 return $self->StringType($reader) ||
283 $self->TokenizedType($reader) ||
284 $self->EnumeratedType($reader) ||
285 $self->parser_error("Can't match AttType", $reader);
289 my ($self, $reader) = @_;
291 my $data = $reader->data(5);
292 return unless $data =~ /^CDATA/;
293 $reader->move_along(5);
298 my ($self, $reader) = @_;
300 my $data = $reader->data(8);
301 if ($data =~ /^(IDREFS?|ID|ENTITIES|ENTITY|NMTOKENS?)/) {
302 $reader->move_along(length($1));
309 my ($self, $reader) = @_;
310 return $self->NotationType($reader) || $self->Enumeration($reader);
314 my ($self, $reader) = @_;
316 my $data = $reader->data(8);
317 return unless $data =~ /^NOTATION/;
318 $reader->move_along(8);
320 $self->skip_whitespace($reader) ||
321 $self->parser_error("No whitespace after NOTATION", $reader);
322 $reader->match('(') or $self->parser_error("No opening bracket in notation section", $reader);
324 $self->skip_whitespace($reader);
325 my $model = 'NOTATION (';
326 my $name = $self->Name($reader) ||
327 $self->parser_error("No name in notation section", $reader);
329 $self->skip_whitespace($reader);
330 $data = $reader->data;
331 while ($data =~ /^\|/) {
332 $reader->move_along(1);
334 $self->skip_whitespace($reader);
335 my $name = $self->Name($reader) ||
336 $self->parser_error("No name in notation section", $reader);
338 $self->skip_whitespace($reader);
339 $data = $reader->data;
341 $data =~ /^\)/ or $self->parser_error("No closing bracket in notation section", $reader);
342 $reader->move_along(1);
350 my ($self, $reader) = @_;
352 return unless $reader->match('(');
354 $self->skip_whitespace($reader);
356 my $nmtoken = $self->Nmtoken($reader) ||
357 $self->parser_error("No Nmtoken in enumerated declaration", $reader);
359 $self->skip_whitespace($reader);
360 my $data = $reader->data;
361 while ($data =~ /^\|/) {
363 $reader->move_along(1);
364 $self->skip_whitespace($reader);
365 my $nmtoken = $self->Nmtoken($reader) ||
366 $self->parser_error("No Nmtoken in enumerated declaration", $reader);
368 $self->skip_whitespace($reader);
369 $data = $reader->data;
371 $data =~ /^\)/ or $self->parser_error("No closing bracket in enumerated declaration", $reader);
372 $reader->move_along(1);
380 my ($self, $reader) = @_;
381 return $self->Name($reader);
385 my ($self, $reader) = @_;
387 my $data = $reader->data(9);
388 if ($data =~ /^(\#REQUIRED|\#IMPLIED)/) {
389 $reader->move_along(length($1));
393 if ($data =~ /^\#FIXED/) {
394 $reader->move_along(6);
395 $self->skip_whitespace($reader) || $self->parser_error(
396 "no whitespace after FIXED specifier", $reader);
397 my $value = $self->AttValue($reader);
398 return "#FIXED", $value;
400 my $value = $self->AttValue($reader);
401 return undef, $value;
405 my ($self, $reader) = @_;
407 my $data = $reader->data(8);
408 return 0 unless $data =~ /^<!ENTITY/;
409 $reader->move_along(8);
411 $self->skip_whitespace($reader) || $self->parser_error(
412 "No whitespace after ENTITY declaration", $reader);
414 $self->PEDecl($reader) || $self->GEDecl($reader);
416 $self->skip_whitespace($reader);
418 $reader->match('>') or $self->parser_error("No closing '>' in entity definition", $reader);
424 my ($self, $reader) = @_;
426 my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader);
427 $self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader);
429 # TODO: ExternalID calls lexhandler method. Wrong place for it.
431 if ($value = $self->ExternalID($reader)) {
432 $value .= $self->NDataDecl($reader);
435 $value = $self->EntityValue($reader);
438 if ($self->{ParseOptions}{entities}{$name}) {
439 warn("entity $name already exists\n");
441 $self->{ParseOptions}{entities}{$name} = 1;
442 $self->{ParseOptions}{expanded_entity}{$name} = $value; # ???
449 my ($self, $reader) = @_;
451 return 0 unless $reader->match('%');
453 $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader);
454 my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader);
455 $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader);
456 my $value = $self->ExternalID($reader) ||
457 $self->EntityValue($reader) ||
458 $self->parser_error("PE is not a value or an external resource", $reader);
463 my $quotre = qr/[^%&\"]/;
464 my $aposre = qr/[^%&\']/;
467 my ($self, $reader) = @_;
469 my $data = $reader->data;
473 $data =~ /^'/ or $self->parser_error("Not a quote character", $reader);
477 $reader->move_along(1);
482 my $data = $reader->data;
484 $self->parser_error("EOF found while reading entity value", $reader)
485 unless length($data);
487 if ($data =~ /^($re+)/) {
490 $reader->move_along(length($match));
492 elsif ($reader->match('&')) {
493 # if it's a char ref, expand now:
494 if ($reader->match('#')) {
497 if ($reader->match('x')) {
498 my $data = $reader->data;
500 $self->parser_error("EOF looking for reference end", $reader)
501 unless length($data);
502 if ($data !~ /^([0-9a-fA-F]*)/) {
506 $reader->move_along(length($1));
507 if (length($1) == length($data)) {
508 $data = $reader->data;
514 $char = chr_ref(hex($ref));
518 my $data = $reader->data;
520 $self->parser_error("EOF looking for reference end", $reader)
521 unless length($data);
522 if ($data !~ /^([0-9]*)/) {
526 $reader->move_along(length($1));
527 if (length($1) == length($data)) {
528 $data = $reader->data;
536 $reader->match(';') ||
537 $self->parser_error("No semi-colon found after character reference", $reader);
538 if ($char !~ $SingleChar) { # match a single character
539 $self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader);
544 # entity refs in entities get expanded later, so don't parse now.
548 elsif ($reader->match('%')) {
549 $value .= $self->PEReference($reader);
551 elsif ($reader->match($quote)) {
556 $self->parser_error("Invalid character in attribute value: " . substr($reader->data, 0, 1), $reader);
564 my ($self, $reader) = @_;
565 $self->skip_whitespace($reader) || return '';
566 my $data = $reader->data(5);
567 return '' unless $data =~ /^NDATA/;
568 $reader->move_along(5);
569 $self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader);
570 my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader);
571 return " NDATA $name";
575 my ($self, $reader) = @_;
577 my $data = $reader->data(10);
578 return 0 unless $data =~ /^<!NOTATION/;
579 $reader->move_along(10);
580 $self->skip_whitespace($reader) ||
581 $self->parser_error("No whitespace after NOTATION declaration", $reader);
582 $data = $reader->data;
585 $self->parser_error("EOF found while looking for end of NotationDecl", $reader)
586 unless length($data);
588 if ($data =~ /^([^>]*)>/) {
590 $reader->move_along(length($1) + 1);
591 $self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" });
596 $reader->move_along(length($data));
597 $data = $reader->data;