18 chr(0x00) => "STUPIDXML(#x00)",
19 chr(0x01) => "STUPIDXML(#x01)",
20 chr(0x02) => "STUPIDXML(#x02)",
21 chr(0x03) => "STUPIDXML(#x03)",
22 chr(0x04) => "STUPIDXML(#x04)",
23 chr(0x05) => "STUPIDXML(#x05)",
24 chr(0x06) => "STUPIDXML(#x06)",
25 chr(0x07) => "STUPIDXML(#x07)",
26 chr(0x08) => "STUPIDXML(#x08)",
29 chr(0x0b) => "STUPIDXML(#x0b)",
30 chr(0x0c) => "STUPIDXML(#x0c)",
32 chr(0x0e) => "STUPIDXML(#x0e)",
33 chr(0x0f) => "STUPIDXML(#x0f)",
34 chr(0x10) => "STUPIDXML(#x10)",
35 chr(0x11) => "STUPIDXML(#x11)",
36 chr(0x12) => "STUPIDXML(#x12)",
37 chr(0x13) => "STUPIDXML(#x13)",
38 chr(0x14) => "STUPIDXML(#x14)",
39 chr(0x15) => "STUPIDXML(#x15)",
40 chr(0x16) => "STUPIDXML(#x16)",
41 chr(0x17) => "STUPIDXML(#x17)",
42 chr(0x18) => "STUPIDXML(#x18)",
43 chr(0x19) => "STUPIDXML(#x19)",
44 chr(0x1a) => "STUPIDXML(#x1a)",
45 chr(0x1b) => "STUPIDXML(#x1b)",
46 chr(0x1c) => "STUPIDXML(#x1c)",
47 chr(0x1d) => "STUPIDXML(#x1d)",
48 chr(0x1e) => "STUPIDXML(#x1e)",
49 chr(0x1f) => "STUPIDXML(#x1f)",
50 chr(0x7f) => "STUPIDXML(#x7f)",
51 chr(0x80) => "STUPIDXML(#x80)",
52 chr(0x81) => "STUPIDXML(#x81)",
53 chr(0x82) => "STUPIDXML(#x82)",
54 chr(0x83) => "STUPIDXML(#x83)",
55 chr(0x84) => "STUPIDXML(#x84)",
56 chr(0x86) => "STUPIDXML(#x86)",
57 chr(0x87) => "STUPIDXML(#x87)",
58 chr(0x88) => "STUPIDXML(#x88)",
59 chr(0x89) => "STUPIDXML(#x89)",
60 chr(0x90) => "STUPIDXML(#x90)",
61 chr(0x91) => "STUPIDXML(#x91)",
62 chr(0x92) => "STUPIDXML(#x92)",
63 chr(0x93) => "STUPIDXML(#x93)",
64 chr(0x94) => "STUPIDXML(#x94)",
65 chr(0x95) => "STUPIDXML(#x95)",
66 chr(0x96) => "STUPIDXML(#x96)",
67 chr(0x97) => "STUPIDXML(#x97)",
68 chr(0x98) => "STUPIDXML(#x98)",
69 chr(0x99) => "STUPIDXML(#x99)",
70 chr(0x9a) => "STUPIDXML(#x9a)",
71 chr(0x9b) => "STUPIDXML(#x9b)",
72 chr(0x9c) => "STUPIDXML(#x9c)",
73 chr(0x9d) => "STUPIDXML(#x9d)",
74 chr(0x9e) => "STUPIDXML(#x9e)",
75 chr(0x9f) => "STUPIDXML(#x9f)",
79 '"' => """, # XML idiocy
84 $text =~ s/(.)/$xmlish{$1} || $1/seg;
90 return xmlquote($self->{text});
95 return xmlquote($self->{rep});
100 my $array = $self->{Kids};
103 $in = ' ' x ($depth * 2);
104 foreach my $chunk (@$array) {
105 if (ref $chunk eq "ARRAY") {
109 $ret .= $chunk->xml();
116 $in = ' ' x ($depth * 2);
120 package P5RE::RE; BEGIN { our @ISA = 'P5RE'; }
124 my $kind = $self->{kind};
125 my $modifiers = $self->{modifiers} || "";
127 $modifiers = " modifiers=\"$modifiers\"";
129 my $text = "$in<$kind$modifiers>\n";
130 $text .= $self->xmlkids();
131 $text .= "$in</$kind>\n";
135 package P5RE::Alt; our @ISA = 'P5RE';
139 my $text = "$in<alt>\n";
140 $text .= $self->xmlkids();
141 $text .= "$in</alt>\n";
145 #package P5RE::Atom; our @ISA = 'P5RE';
149 # my $text = "$in<atom>\n";
150 # $text .= $self->xmlkids();
151 # $text .= "$in</atom>\n";
155 package P5RE::Quant; our @ISA = 'P5RE';
159 my $q = $self->{type};
160 my $text = "$in<quant type=\"$q\">\n";
161 $text .= $self->xmlkids();
162 $text .= "$in</quant>\n";
166 package P5RE::White; our @ISA = 'P5RE';
170 return "$in<white text=\"" . $self->text() . "\" />\n";
173 package P5RE::Char; our @ISA = 'P5RE';
177 return "$in<char text=\"" . $self->text() . "\" />\n";
180 package P5RE::Comment; our @ISA = 'P5RE';
184 return "$in<comment rep=\"" . $self->rep() . "\" />\n";
187 package P5RE::Mod; our @ISA = 'P5RE';
191 return "$in<mod modifiers=\"" . $self->{modifiers} . "\" />\n";
194 package P5RE::Meta; our @ISA = 'P5RE';
200 $sem = 'sem="' . $self->{sem} . '" '
202 return "$in<meta rep=\"" . $self->rep() . "\" $sem/>\n";
205 package P5RE::Var; our @ISA = 'P5RE';
209 return "$in<var name=\"" . $self->{name} . "\" />\n";
212 package P5RE::Closure; our @ISA = 'P5RE';
216 return "$in<closure rep=\"" . $self->{rep} . "\" />\n";
219 package P5RE::CClass; our @ISA = 'P5RE';
223 my $neg = $self->{neg} ? "negated" : "normal";
224 my $text = "$in<cclass match=\"$neg\">\n";
225 $text .= $self->xmlkids();
226 $text .= "$in</cclass>\n";
230 package P5RE::Range; our @ISA = 'P5RE';
234 my $text = "$in<range>\n";
235 $text .= $self->xmlkids();
236 $text .= "$in</range>\n";
251 return bless { Kids => [@alts], kind => $kind }, "P5RE::RE";
258 local $extended = $extended;
259 local $insensitive = $insensitive;
260 local $multiline = $multiline;
261 local $singleline = $singleline;
262 while ($quant = quant()) {
264 ref $quant eq ref $quants[-1] and
265 exists $quants[-1]{text} and
266 exists $quant->{text} )
268 $quants[-1]{text} .= $quant->{text};
271 push(@quants, $quant);
274 return bless { Kids => [@quants] }, "P5RE::Alt";
279 return 0 unless $atom;
280 # $atom = bless { Kids => [$atom] }, "P5RE::Atom";
281 if (s/^([*+?]\??|\{\d+(?:,\d*)?\}\??)//) {
282 return bless { Kids => [$atom], type => $1 }, "P5RE::Quant";
289 if ($_ eq "") { return 0 }
290 if (/^[)|]/) { return 0 }
292 # whitespace is special because we don't know if /x is in effect
294 if (s/^(?=\s|#)(\s*(?:#.*)?)//) { return bless { text => $1 }, "P5RE::White"; }
297 # all the parenthesized forms
302 elsif (s/^(\?#.*?)\)/)/) {
303 $re = bless { rep => "($1)" }, "P5RE::Comment";
306 $re = re('lookahead');
309 $re = re('neglookahead');
312 $re = re('lookbehind');
315 $re = re('neglookbehind');
318 $re = re('nobacktrack');
320 elsif (s/^(\?\??\{.*?\})\)/)/) {
321 $re = bless { rep => "($1)" }, "P5RE::Closure";
323 elsif (s/^(\?\(\d+\))//) {
325 $re = re('conditional');
326 $re->{modifiers} = "$mods";
328 elsif (s/^\?(?=\(\?)//) {
331 $re = re('conditional');
332 unshift(@{$re->{Kids}}, $cond);
334 elsif (s/^(\?[-imsx]+)://) {
336 local $extended = $extended;
337 local $insensitive = $insensitive;
338 local $multiline = $multiline;
339 local $singleline = $singleline;
342 $re->{modifiers} = "$mods";
344 elsif (s/^(\?[-imsx]+)//) {
346 $re = bless { modifiers => "($mods)" }, "P5RE::Mod";
350 $re = re('UNRECOGNIZED');
356 if (not s/^\)//) { die "Expected right paren at: '$_'" }
362 my $s = $singleline ? '.' : '\N';
363 return bless { rep => '.', sem => $s }, "P5RE::Meta";
366 my $s = $multiline ? '^^' : '^';
367 return bless { rep => '^', sem => $s }, "P5RE::Meta";
369 if (s/^\$(?:$|(?=[|)]))//) {
370 my $s = $multiline ? '$$' : '$';
371 return bless { rep => '$', sem => $s }, "P5RE::Meta";
373 if (s/^([\$\@](\w+|.))//) { # XXX need to handle subscripts here
374 return bless { name => $1 }, "P5RE::Var";
380 if (not s/^\]//) { die "Expected right paren at: '$_'" }
386 return bless { rep => onechar() }, "P5RE::Meta";
389 # optimization, would happen anyway
390 if (s/^(\w+)//) { return bless { text => $1 }, "P5RE::Char"; }
393 if (s/^(.)//) { return bless { text => $1 }, "P5RE::Char"; }
400 if (s/^\^//) { $neg = 1 }
401 if (s/^([\]\-])//) { $cclass .= $1 }
403 while ($_ ne "" and not /^\]/) {
406 my $o1 = onecharobj();
408 push @cclass, bless { text => $cclass }, "P5RE::Char";
412 if (s/^-(?=[^]])//) {
413 my $o2 = onecharobj();
414 push @cclass, bless { Kids => [$o1, $o2] }, "P5RE::Range";
420 elsif (s/^(\[([:=.])\^?\w*\2\])//) {
422 push @cclass, bless { text => $cclass }, "P5RE::Char";
425 push @cclass, bless { rep => $1 }, "P5RE::Meta";
428 $cclass .= onechar();
433 push @cclass, bless { text => $cclass }, "P5RE::Char";
435 return bless { Kids => [@cclass], neg => $neg }, "P5RE::CClass";
441 $ch = bless { rep => $ch }, "P5RE::Meta";
444 $ch = bless { text => $ch }, "P5RE::Char";
449 die "Oops, short cclass" unless s/^(.)//;
452 if (s/^([rntf]|[0-7]{1,4})//) { $ch .= $1 }
453 elsif (s/^(x[0-9a-fA-f]{1,2})//) { $ch .= $1 }
454 elsif (s/^(x\{[0-9a-fA-f]+\})//) { $ch .= $1 }
455 elsif (s/^([NpP]\{.*?\})//) { $ch .= $1 }
456 elsif (s/^([cpP].)//) { $ch .= $1 }
457 elsif (s/^(.)//) { $ch .= $1 }
459 die "Oops, short backwhack";
467 if ($mods =~ /\-.*x/) {
470 elsif ($mods =~ /x/) {
473 if ($mods =~ /\-.*i/) {
476 elsif ($mods =~ /i/) {
479 if ($mods =~ /\-.*m/) {
482 elsif ($mods =~ /m/) {
485 if ($mods =~ /\-.*s/) {
488 elsif ($mods =~ /s/) {
495 s/^(\W)(.*)\1(\w*)$/$2/;
497 substr($_,0,0) = "(?$mod)" if $mod ne "";
503 while (my $line = <>) {
505 my $x = P5RE::reparse($line);
507 print "#######################################\n";