Commit | Line | Data |
af6ca1d0 |
1 | package ExtUtils::Constant::Base; |
2 | |
3 | use strict; |
4 | use vars qw($VERSION $is_perl56); |
5 | use Carp; |
6 | use Text::Wrap; |
7 | use ExtUtils::Constant::Utils qw(C_stringify perl_stringify); |
71adef12 |
8 | $VERSION = '0.03'; |
af6ca1d0 |
9 | |
10 | $is_perl56 = ($] < 5.007 && $] > 5.005_50); |
11 | |
12 | |
13 | =head1 NAME |
14 | |
15 | ExtUtils::Constant::Base - base class for ExtUtils::Constant objects |
16 | |
17 | =head1 SYNOPSIS |
18 | |
19 | require ExtUtils::Constant::Base; |
20 | @ISA = 'ExtUtils::Constant::Base'; |
21 | |
22 | =head1 DESCRIPTION |
23 | |
24 | ExtUtils::Constant::Base provides a base implementation of methods to |
25 | generate C code to give fast constant value lookup by named string. Currently |
26 | it's mostly used ExtUtils::Constant::XS, which generates the lookup code |
27 | for the constant() subroutine found in many XS modules. |
28 | |
29 | =head1 USAGE |
30 | |
31 | ExtUtils::Constant::Base exports no subroutines. The following methods are |
32 | available |
33 | |
34 | =over 4 |
35 | |
36 | =cut |
37 | |
38 | sub valid_type { |
39 | # Default to assuming that you don't need different types of return data. |
40 | 1; |
41 | } |
42 | sub default_type { |
43 | ''; |
44 | } |
45 | |
46 | =item header |
47 | |
48 | A method returning a scalar containing definitions needed, typically for a |
49 | C header file. |
50 | |
51 | =cut |
52 | |
53 | sub header { |
54 | '' |
55 | } |
56 | |
57 | # This might actually be a return statement. Note that you are responsible |
58 | # for any space you might need before your value, as it lets to perform |
59 | # "tricks" such as "return KEY_" and have strings appended. |
60 | sub assignment_clause_for_type; |
61 | # In which case this might be an empty string |
62 | sub return_statement_for_type {undef}; |
63 | sub return_statement_for_notdef; |
64 | sub return_statement_for_notfound; |
65 | |
66 | # "#if 1" is true to a C pre-processor |
67 | sub macro_from_name { |
68 | 1; |
69 | } |
70 | |
c48e04e6 |
71 | sub macro_from_item { |
72 | 1; |
73 | } |
74 | |
329b93cd |
75 | sub macro_to_ifdef { |
76 | my ($self, $macro) = @_; |
77 | if (ref $macro) { |
78 | return $macro->[0]; |
79 | } |
80 | if (defined $macro && $macro ne "" && $macro ne "1") { |
71adef12 |
81 | return $macro ? "#ifdef $macro\n" : "#if 0\n"; |
329b93cd |
82 | } |
83 | return ""; |
84 | } |
85 | |
86 | sub macro_to_endif { |
87 | my ($self, $macro) = @_; |
88 | |
89 | if (ref $macro) { |
90 | return $macro->[1]; |
91 | } |
92 | if (defined $macro && $macro ne "" && $macro ne "1") { |
93 | return "#endif\n"; |
94 | } |
95 | return ""; |
96 | } |
97 | |
af6ca1d0 |
98 | sub name_param { |
99 | 'name'; |
100 | } |
101 | |
102 | # This is possibly buggy, in that it's not mandatory (below, in the main |
103 | # C_constant parameters, but is expected to exist here, if it's needed) |
104 | # Buggy because if you're definitely pure 8 bit only, and will never be |
105 | # presented with your constants in utf8, the default form of C_constant can't |
106 | # be told not to do the utf8 version. |
107 | |
108 | sub is_utf8_param { |
109 | 'utf8'; |
110 | } |
111 | |
112 | sub memEQ { |
113 | "!memcmp"; |
114 | } |
115 | |
116 | =item memEQ_clause args_hashref |
117 | |
118 | A method to return a suitable C C<if> statement to check whether I<name> |
119 | is equal to the C variable C<name>. If I<checked_at> is defined, then it |
120 | is used to avoid C<memEQ> for short names, or to generate a comment to |
121 | highlight the position of the character in the C<switch> statement. |
122 | |
123 | If i<checked_at> is a reference to a scalar, then instead it gives |
124 | the characters pre-checked at the beginning, (and the number of chars by |
125 | which the C variable name has been advanced. These need to be chopped from |
126 | the front of I<name>). |
127 | |
128 | =cut |
129 | |
130 | sub memEQ_clause { |
131 | # if (memEQ(name, "thingy", 6)) { |
132 | # Which could actually be a character comparison or even "" |
133 | my ($self, $args) = @_; |
134 | my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)}; |
135 | $indent = ' ' x ($indent || 4); |
136 | my $front_chop; |
137 | if (ref $checked_at) { |
138 | # regexp won't work on 5.6.1 without use utf8; in turn that won't work |
139 | # on 5.005_03. |
140 | substr ($name, 0, length $$checked_at,) = ''; |
141 | $front_chop = C_stringify ($$checked_at); |
142 | undef $checked_at; |
143 | } |
144 | my $len = length $name; |
145 | |
146 | if ($len < 2) { |
147 | return $indent . "{\n" |
148 | if (defined $checked_at and $checked_at == 0) or $len == 0; |
149 | # We didn't switch, drop through to the code for the 2 character string |
150 | $checked_at = 1; |
151 | } |
152 | |
153 | my $name_param = $self->name_param; |
154 | |
155 | if ($len < 3 and defined $checked_at) { |
156 | my $check; |
157 | if ($checked_at == 1) { |
158 | $check = 0; |
159 | } elsif ($checked_at == 0) { |
160 | $check = 1; |
161 | } |
162 | if (defined $check) { |
163 | my $char = C_stringify (substr $name, $check, 1); |
164 | # Placate 5.005 with a break in the string. I can't see a good way of |
165 | # getting it to not take [ as introducing an array lookup, even with |
166 | # ${name_param}[$check] |
167 | return $indent . "if ($name_param" . "[$check] == '$char') {\n"; |
168 | } |
169 | } |
170 | if (($len == 2 and !defined $checked_at) |
171 | or ($len == 3 and defined ($checked_at) and $checked_at == 2)) { |
172 | my $char1 = C_stringify (substr $name, 0, 1); |
173 | my $char2 = C_stringify (substr $name, 1, 1); |
174 | return $indent . |
175 | "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n"; |
176 | } |
177 | if (($len == 3 and defined ($checked_at) and $checked_at == 1)) { |
178 | my $char1 = C_stringify (substr $name, 0, 1); |
179 | my $char2 = C_stringify (substr $name, 2, 1); |
180 | return $indent . |
181 | "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n"; |
182 | } |
183 | |
184 | my $pointer = '^'; |
185 | my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1; |
186 | if ($have_checked_last) { |
187 | # Checked at the last character, so no need to memEQ it. |
188 | $pointer = C_stringify (chop $name); |
189 | $len--; |
190 | } |
191 | |
192 | $name = C_stringify ($name); |
193 | my $memEQ = $self->memEQ(); |
194 | my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n"; |
195 | # Put a little ^ under the letter we checked at |
196 | # Screws up for non printable and non-7 bit stuff, but that's too hard to |
197 | # get right. |
198 | if (defined $checked_at) { |
199 | $body .= $indent . "/* " . (' ' x length $memEQ) |
200 | . (' ' x length $name_param) |
201 | . (' ' x $checked_at) . $pointer |
202 | . (' ' x ($len - $checked_at + length $len)) . " */\n"; |
203 | } elsif (defined $front_chop) { |
204 | $body .= $indent . "/* $front_chop" |
205 | . (' ' x ($len + 1 + length $len)) . " */\n"; |
206 | } |
207 | return $body; |
208 | } |
209 | |
210 | =item dump_names arg_hashref, ITEM... |
211 | |
212 | An internal function to generate the embedded perl code that will regenerate |
213 | the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the |
214 | same as for C_constant. I<indent> is treated as number of spaces to indent |
215 | by. If C<declare_types> is true a C<$types> is always declared in the perl |
216 | code generated, if defined and false never declared, and if undefined C<$types> |
217 | is only declared if the values in I<types> as passed in cannot be inferred from |
218 | I<default_types> and the I<ITEM>s. |
219 | |
220 | =cut |
221 | |
222 | sub dump_names { |
223 | my ($self, $args, @items) = @_; |
224 | my ($default_type, $what, $indent, $declare_types) |
225 | = @{$args}{qw(default_type what indent declare_types)}; |
226 | $indent = ' ' x ($indent || 0); |
227 | |
228 | my $result; |
229 | my (@simple, @complex, %used_types); |
230 | foreach (@items) { |
231 | my $type; |
232 | if (ref $_) { |
233 | $type = $_->{type} || $default_type; |
234 | if ($_->{utf8}) { |
235 | # For simplicity always skip the bytes case, and reconstitute this entry |
236 | # from its utf8 twin. |
237 | next if $_->{utf8} eq 'no'; |
238 | # Copy the hashref, as we don't want to mess with the caller's hashref. |
239 | $_ = {%$_}; |
240 | unless ($is_perl56) { |
241 | utf8::decode ($_->{name}); |
242 | } else { |
243 | $_->{name} = pack 'U*', unpack 'U0U*', $_->{name}; |
244 | } |
245 | delete $_->{utf8}; |
246 | } |
247 | } else { |
248 | $_ = {name=>$_}; |
249 | $type = $default_type; |
250 | } |
251 | $used_types{$type}++; |
252 | if ($type eq $default_type |
253 | # grr 5.6.1 |
254 | and length $_->{name} |
255 | and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//) |
256 | and !defined ($_->{macro}) and !defined ($_->{value}) |
257 | and !defined ($_->{default}) and !defined ($_->{pre}) |
258 | and !defined ($_->{post}) and !defined ($_->{def_pre}) |
b760f360 |
259 | and !defined ($_->{def_post}) and !defined ($_->{weight})) { |
af6ca1d0 |
260 | # It's the default type, and the name consists only of A-Za-z0-9_ |
261 | push @simple, $_->{name}; |
262 | } else { |
263 | push @complex, $_; |
264 | } |
265 | } |
266 | |
267 | if (!defined $declare_types) { |
268 | # Do they pass in any types we weren't already using? |
269 | foreach (keys %$what) { |
270 | next if $used_types{$_}; |
271 | $declare_types++; # Found one in $what that wasn't used. |
272 | last; # And one is enough to terminate this loop |
273 | } |
274 | } |
275 | if ($declare_types) { |
276 | $result = $indent . 'my $types = {map {($_, 1)} qw(' |
277 | . join (" ", sort keys %$what) . ")};\n"; |
278 | } |
279 | local $Text::Wrap::huge = 'overflow'; |
280 | local $Text::Wrap::columns = 80; |
281 | $result .= wrap ($indent . "my \@names = (qw(", |
282 | $indent . " ", join (" ", sort @simple) . ")"); |
283 | if (@complex) { |
284 | foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) { |
285 | my $name = perl_stringify $item->{name}; |
286 | my $line = ",\n$indent {name=>\"$name\""; |
287 | $line .= ", type=>\"$item->{type}\"" if defined $item->{type}; |
288 | foreach my $thing (qw (macro value default pre post def_pre def_post)) { |
289 | my $value = $item->{$thing}; |
290 | if (defined $value) { |
291 | if (ref $value) { |
292 | $line .= ", $thing=>[\"" |
293 | . join ('", "', map {perl_stringify $_} @$value) . '"]'; |
294 | } else { |
295 | $line .= ", $thing=>\"" . perl_stringify($value) . "\""; |
296 | } |
297 | } |
298 | } |
299 | $line .= "}"; |
300 | # Ensure that the enclosing C comment doesn't end |
301 | # by turning */ into *" . "/ |
302 | $line =~ s!\*\/!\*" . "/!gs; |
303 | # gcc -Wall doesn't like finding /* inside a comment |
304 | $line =~ s!\/\*!/" . "\*!gs; |
305 | $result .= $line; |
306 | } |
307 | } |
308 | $result .= ");\n"; |
309 | |
310 | $result; |
311 | } |
312 | |
313 | =item assign arg_hashref, VALUE... |
314 | |
315 | A method to return a suitable assignment clause. If I<type> is aggregate |
316 | (eg I<PVN> expects both pointer and length) then there should be multiple |
317 | I<VALUE>s for the components. I<pre> and I<post> if defined give snippets |
318 | of C code to proceed and follow the assignment. I<pre> will be at the start |
319 | of a block, so variables may be defined in it. |
320 | |
321 | =cut |
322 | # Hmm. value undef to to NOTDEF? value () to do NOTFOUND? |
323 | |
324 | sub assign { |
325 | my $self = shift; |
326 | my $args = shift; |
b760f360 |
327 | my ($indent, $type, $pre, $post, $item) |
328 | = @{$args}{qw(indent type pre post item)}; |
af6ca1d0 |
329 | $post ||= ''; |
330 | my $clause; |
331 | my $close; |
332 | if ($pre) { |
333 | chomp $pre; |
334 | $close = "$indent}\n"; |
335 | $clause = $indent . "{\n"; |
336 | $indent .= " "; |
337 | $clause .= "$indent$pre"; |
338 | $clause .= ";" unless $pre =~ /;$/; |
339 | $clause .= "\n"; |
340 | } |
341 | confess "undef \$type" unless defined $type; |
342 | confess "Can't generate code for type $type" |
343 | unless $self->valid_type($type); |
344 | |
345 | $clause .= join '', map {"$indent$_\n"} |
b760f360 |
346 | $self->assignment_clause_for_type({type=>$type,item=>$item}, @_); |
af6ca1d0 |
347 | chomp $post; |
348 | if (length $post) { |
349 | $clause .= "$post"; |
350 | $clause .= ";" unless $post =~ /;$/; |
351 | $clause .= "\n"; |
352 | } |
353 | my $return = $self->return_statement_for_type($type); |
354 | $clause .= "$indent$return\n" if defined $return; |
355 | $clause .= $close if $close; |
356 | return $clause; |
357 | } |
358 | |
359 | =item return_clause arg_hashref, ITEM |
360 | |
361 | A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref |
362 | (as passed to C<C_constant> and C<match_clause>. I<indent> is the number |
363 | of spaces to indent, defaulting to 6. |
364 | |
365 | =cut |
366 | |
367 | sub return_clause { |
368 | |
369 | ##ifdef thingy |
370 | # *iv_return = thingy; |
371 | # return PERL_constant_ISIV; |
372 | ##else |
373 | # return PERL_constant_NOTDEF; |
374 | ##endif |
375 | my ($self, $args, $item) = @_; |
376 | my $indent = $args->{indent}; |
377 | |
c48e04e6 |
378 | my ($name, $value, $default, $pre, $post, $def_pre, $def_post, $type) |
379 | = @$item{qw (name value default pre post def_pre def_post type)}; |
af6ca1d0 |
380 | $value = $name unless defined $value; |
c48e04e6 |
381 | my $macro = $self->macro_from_item($item); |
af6ca1d0 |
382 | $indent = ' ' x ($indent || 6); |
383 | unless (defined $type) { |
384 | # use Data::Dumper; print STDERR Dumper ($item); |
385 | confess "undef \$type"; |
386 | } |
387 | |
af6ca1d0 |
388 | ##ifdef thingy |
329b93cd |
389 | my $clause = $self->macro_to_ifdef($macro); |
af6ca1d0 |
390 | |
391 | # *iv_return = thingy; |
392 | # return PERL_constant_ISIV; |
393 | $clause |
b760f360 |
394 | .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post, |
395 | item=>$item}, ref $value ? @$value : $value); |
af6ca1d0 |
396 | |
329b93cd |
397 | if (defined $macro && $macro ne "" && $macro ne "1") { |
af6ca1d0 |
398 | ##else |
399 | $clause .= "#else\n"; |
400 | |
401 | # return PERL_constant_NOTDEF; |
402 | if (!defined $default) { |
403 | my $notdef = $self->return_statement_for_notdef(); |
404 | $clause .= "$indent$notdef\n" if defined $notdef; |
405 | } else { |
406 | my @default = ref $default ? @$default : $default; |
407 | $type = shift @default; |
408 | $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, |
b760f360 |
409 | post=>$post, item=>$item}, @default); |
af6ca1d0 |
410 | } |
af6ca1d0 |
411 | } |
329b93cd |
412 | ##endif |
413 | $clause .= $self->macro_to_endif($macro); |
414 | |
af6ca1d0 |
415 | return $clause; |
416 | } |
417 | |
418 | sub match_clause { |
419 | # $offset defined if we have checked an offset. |
420 | my ($self, $args, $item) = @_; |
421 | my ($offset, $indent) = @{$args}{qw(checked_at indent)}; |
422 | $indent = ' ' x ($indent || 4); |
423 | my $body = ''; |
424 | my ($no, $yes, $either, $name, $inner_indent); |
425 | if (ref $item eq 'ARRAY') { |
426 | ($yes, $no) = @$item; |
427 | $either = $yes || $no; |
428 | confess "$item is $either expecting hashref in [0] || [1]" |
429 | unless ref $either eq 'HASH'; |
430 | $name = $either->{name}; |
431 | } else { |
432 | confess "$item->{name} has utf8 flag '$item->{utf8}', should be false" |
433 | if $item->{utf8}; |
434 | $name = $item->{name}; |
435 | $inner_indent = $indent; |
436 | } |
437 | |
438 | $body .= $self->memEQ_clause ({name => $name, checked_at => $offset, |
439 | indent => length $indent}); |
440 | # If we've been presented with an arrayref for $item, then the user string |
441 | # contains in the range 128-255, and we need to check whether it was utf8 |
442 | # (or not). |
443 | # In the worst case we have two named constants, where one's name happens |
444 | # encoded in UTF8 happens to be the same byte sequence as the second's |
445 | # encoded in (say) ISO-8859-1. |
446 | # In this case, $yes and $no both have item hashrefs. |
447 | if ($yes) { |
448 | $body .= $indent . " if (" . $self->is_utf8_param . ") {\n"; |
449 | } elsif ($no) { |
450 | $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n"; |
451 | } |
452 | if ($either) { |
453 | $body .= $self->return_clause ({indent=>4 + length $indent}, $either); |
454 | if ($yes and $no) { |
455 | $body .= $indent . " } else {\n"; |
456 | $body .= $self->return_clause ({indent=>4 + length $indent}, $no); |
457 | } |
458 | $body .= $indent . " }\n"; |
459 | } else { |
460 | $body .= $self->return_clause ({indent=>2 + length $indent}, $item); |
461 | } |
462 | $body .= $indent . "}\n"; |
463 | } |
464 | |
465 | |
466 | =item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM... |
467 | |
468 | An internal method to generate a suitable C<switch> clause, called by |
469 | C<C_constant> I<ITEM>s are in the hash ref format as given in the description |
470 | of C<C_constant>, and must all have the names of the same length, given by |
471 | I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being |
472 | the hashrefs in the I<ITEM> list. (No parameters are modified, and there can |
473 | be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without |
474 | causing problems - the hash is passed in to save generating it afresh for |
475 | each call). |
476 | |
477 | =cut |
478 | |
479 | sub switch_clause { |
480 | my ($self, $args, $namelen, $items, @items) = @_; |
481 | my ($indent, $comment) = @{$args}{qw(indent comment)}; |
482 | $indent = ' ' x ($indent || 2); |
483 | |
484 | local $Text::Wrap::huge = 'overflow'; |
485 | local $Text::Wrap::columns = 80; |
486 | |
487 | my @names = sort map {$_->{name}} @items; |
488 | my $leader = $indent . '/* '; |
489 | my $follower = ' ' x length $leader; |
490 | my $body = $indent . "/* Names all of length $namelen. */\n"; |
491 | if (defined $comment) { |
492 | $body = wrap ($leader, $follower, $comment) . "\n"; |
493 | $leader = $follower; |
494 | } |
495 | my @safe_names = @names; |
496 | foreach (@safe_names) { |
497 | confess sprintf "Name '$_' is length %d, not $namelen", length |
498 | unless length == $namelen; |
499 | # Argh. 5.6.1 |
500 | # next unless tr/A-Za-z0-9_//c; |
501 | next if tr/A-Za-z0-9_// == length; |
502 | $_ = '"' . perl_stringify ($_) . '"'; |
503 | # Ensure that the enclosing C comment doesn't end |
504 | # by turning */ into *" . "/ |
505 | s!\*\/!\*"."/!gs; |
506 | # gcc -Wall doesn't like finding /* inside a comment |
507 | s!\/\*!/"."\*!gs; |
508 | } |
509 | $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n"; |
510 | # Figure out what to switch on. |
511 | # (RMS, Spread of jump table, Position, Hashref) |
512 | my @best = (1e38, ~0); |
513 | # Prefer the last character over the others. (As it lets us shorten the |
514 | # memEQ clause at no cost). |
515 | foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) { |
516 | my ($min, $max) = (~0, 0); |
517 | my %spread; |
518 | if ($is_perl56) { |
519 | # Need proper Unicode preserving hash keys for bytes in range 128-255 |
520 | # here too, for some reason. grr 5.6.1 yet again. |
521 | tie %spread, 'ExtUtils::Constant::Aaargh56Hash'; |
522 | } |
523 | foreach (@names) { |
524 | my $char = substr $_, $i, 1; |
525 | my $ord = ord $char; |
526 | confess "char $ord is out of range" if $ord > 255; |
527 | $max = $ord if $ord > $max; |
528 | $min = $ord if $ord < $min; |
529 | push @{$spread{$char}}, $_; |
530 | # warn "$_ $char"; |
531 | } |
532 | # I'm going to pick the character to split on that minimises the root |
533 | # mean square of the number of names in each case. Normally this should |
534 | # be the one with the most keys, but it may pick a 7 where the 8 has |
535 | # one long linear search. I'm not sure if RMS or just sum of squares is |
536 | # actually better. |
537 | # $max and $min are for the tie-breaker if the root mean squares match. |
538 | # Assuming that the compiler may be building a jump table for the |
539 | # switch() then try to minimise the size of that jump table. |
540 | # Finally use < not <= so that if it still ties the earliest part of |
541 | # the string wins. Because if that passes but the memEQ fails, it may |
542 | # only need the start of the string to bin the choice. |
543 | # I think. But I'm micro-optimising. :-) |
544 | # OK. Trump that. Now favour the last character of the string, before the |
545 | # rest. |
546 | my $ss; |
547 | $ss += @$_ * @$_ foreach values %spread; |
548 | my $rms = sqrt ($ss / keys %spread); |
549 | if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { |
550 | @best = ($rms, $max - $min, $i, \%spread); |
551 | } |
552 | } |
553 | confess "Internal error. Failed to pick a switch point for @names" |
554 | unless defined $best[2]; |
555 | # use Data::Dumper; print Dumper (@best); |
556 | my ($offset, $best) = @best[2,3]; |
557 | $body .= $indent . "/* Offset $offset gives the best switch position. */\n"; |
558 | |
559 | my $do_front_chop = $offset == 0 && $namelen > 2; |
560 | if ($do_front_chop) { |
561 | $body .= $indent . "switch (*" . $self->name_param() . "++) {\n"; |
562 | } else { |
563 | $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n"; |
564 | } |
565 | foreach my $char (sort keys %$best) { |
566 | confess sprintf "'$char' is %d bytes long, not 1", length $char |
567 | if length ($char) != 1; |
568 | confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255; |
569 | $body .= $indent . "case '" . C_stringify ($char) . "':\n"; |
b760f360 |
570 | foreach my $thisone (sort { |
571 | # Deal with the case of an item actually being an array ref to 1 or 2 |
4e1a4e48 |
572 | # hashrefs. Don't assign to $a or $b, as they're aliases to the orignal |
b760f360 |
573 | my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a; |
574 | my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b; |
4e1a4e48 |
575 | # Sort by weight first |
b760f360 |
576 | ($r->{weight} || 0) <=> ($l->{weight} || 0) |
577 | # Sort equal weights by name |
4e1a4e48 |
578 | or $l->{name} cmp $r->{name}} |
579 | # If this looks evil, maybe it is. $items is a |
580 | # hashref, and we're doing a hash slice on it |
581 | @{$items}{@{$best->{$char}}}) { |
af6ca1d0 |
582 | # warn "You are here"; |
583 | if ($do_front_chop) { |
584 | $body .= $self->match_clause ({indent => 2 + length $indent, |
585 | checked_at => \$char}, $thisone); |
586 | } else { |
587 | $body .= $self->match_clause ({indent => 2 + length $indent, |
588 | checked_at => $offset}, $thisone); |
589 | } |
590 | } |
591 | $body .= $indent . " break;\n"; |
592 | } |
593 | $body .= $indent . "}\n"; |
594 | return $body; |
595 | } |
596 | |
597 | sub C_constant_return_type { |
598 | "static int"; |
599 | } |
600 | |
601 | sub C_constant_prefix_param { |
602 | ''; |
603 | } |
604 | |
605 | sub C_constant_prefix_param_defintion { |
606 | ''; |
607 | } |
608 | |
f8901870 |
609 | sub name_param_definition { |
af6ca1d0 |
610 | "const char *" . $_[0]->name_param; |
611 | } |
612 | |
f8901870 |
613 | sub namelen_param { |
af6ca1d0 |
614 | 'len'; |
615 | } |
616 | |
f8901870 |
617 | sub namelen_param_definition { |
618 | 'size_t ' . $_[0]->namelen_param; |
af6ca1d0 |
619 | } |
620 | |
f8901870 |
621 | sub C_constant_other_params { |
af6ca1d0 |
622 | ''; |
623 | } |
624 | |
f8901870 |
625 | sub C_constant_other_params_defintion { |
af6ca1d0 |
626 | ''; |
627 | } |
628 | |
629 | =item params WHAT |
630 | |
631 | An "internal" method, subject to change, currently called to allow an |
632 | overriding class to cache information that will then be passed into all |
633 | the C<*param*> calls. (Yes, having to read the source to make sense of this is |
634 | considered a known bug). I<WHAT> is be a hashref of types the constant |
635 | function will return. In ExtUtils::Constant::XS this method is used to |
636 | returns a hashref keyed IV NV PV SV to show which combination of pointers will |
f8901870 |
637 | be needed in the C argument list generated by |
638 | C_constant_other_params_definition and C_constant_other_params |
af6ca1d0 |
639 | |
640 | =cut |
641 | |
642 | sub params { |
643 | ''; |
644 | } |
645 | |
646 | |
647 | =item dogfood arg_hashref, ITEM... |
648 | |
649 | An internal function to generate the embedded perl code that will regenerate |
650 | the constant subroutines. Parameters are the same as for C_constant. |
651 | |
652 | Currently the base class does nothing and returns an empty string. |
653 | |
654 | =cut |
655 | |
656 | sub dogfood { |
657 | '' |
658 | } |
659 | |
329b93cd |
660 | =item normalise_items args, default_type, seen_types, seen_items, ITEM... |
efa32bb4 |
661 | |
662 | Convert the items to a normalised form. For 8 bit and Unicode values converts |
663 | the item to an array of 1 or 2 items, both 8 bit and UTF-8 encoded. |
664 | |
665 | =cut |
666 | |
667 | sub normalise_items |
668 | { |
669 | my $self = shift; |
329b93cd |
670 | my $args = shift; |
efa32bb4 |
671 | my $default_type = shift; |
672 | my $what = shift; |
673 | my $items = shift; |
674 | my @new_items; |
675 | foreach my $orig (@_) { |
676 | my ($name, $item); |
677 | if (ref $orig) { |
678 | # Make a copy which is a normalised version of the ref passed in. |
679 | $name = $orig->{name}; |
680 | my ($type, $macro, $value) = @$orig{qw (type macro value)}; |
681 | $type ||= $default_type; |
682 | $what->{$type} = 1; |
683 | $item = {name=>$name, type=>$type}; |
684 | |
685 | undef $macro if defined $macro and $macro eq $name; |
686 | $item->{macro} = $macro if defined $macro; |
687 | undef $value if defined $value and $value eq $name; |
688 | $item->{value} = $value if defined $value; |
64bb7586 |
689 | foreach my $key (qw(default pre post def_pre def_post weight |
690 | not_constant)) { |
efa32bb4 |
691 | my $value = $orig->{$key}; |
692 | $item->{$key} = $value if defined $value; |
693 | # warn "$key $value"; |
694 | } |
695 | } else { |
696 | $name = $orig; |
697 | $item = {name=>$name, type=>$default_type}; |
698 | $what->{$default_type} = 1; |
699 | } |
700 | warn +(ref ($self) || $self) |
701 | . "doesn't know how to handle values of type $_ used in macro $name" |
702 | unless $self->valid_type ($item->{type}); |
703 | # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c |
704 | # doesn't work. Upgrade to 5.8 |
705 | # if ($name !~ tr/\0-\177//c || $] < 5.005_50) { |
329b93cd |
706 | if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50 |
707 | || $args->{disable_utf8_duplication}) { |
efa32bb4 |
708 | # No characters outside 7 bit ASCII. |
709 | if (exists $items->{$name}) { |
710 | die "Multiple definitions for macro $name"; |
711 | } |
712 | $items->{$name} = $item; |
713 | } else { |
714 | # No characters outside 8 bit. This is hardest. |
715 | if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') { |
716 | confess "Unexpected ASCII definition for macro $name"; |
717 | } |
718 | # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/; |
719 | # if ($name !~ tr/\0-\377//c) { |
720 | if ($name =~ tr/\0-\377// == length $name) { |
721 | # if ($] < 5.007) { |
722 | # $name = pack "C*", unpack "U*", $name; |
723 | # } |
724 | $item->{utf8} = 'no'; |
725 | $items->{$name}[1] = $item; |
726 | push @new_items, $item; |
727 | # Copy item, to create the utf8 variant. |
728 | $item = {%$item}; |
729 | } |
730 | # Encode the name as utf8 bytes. |
731 | unless ($is_perl56) { |
732 | utf8::encode($name); |
733 | } else { |
734 | # warn "Was >$name< " . length ${name}; |
735 | $name = pack 'C*', unpack 'C*', $name . pack 'U*'; |
736 | # warn "Now '${name}' " . length ${name}; |
737 | } |
738 | if ($items->{$name}[0]) { |
739 | die "Multiple definitions for macro $name"; |
740 | } |
741 | $item->{utf8} = 'yes'; |
742 | $item->{name} = $name; |
743 | $items->{$name}[0] = $item; |
744 | # We have need for the utf8 flag. |
745 | $what->{''} = 1; |
746 | } |
747 | push @new_items, $item; |
748 | } |
749 | @new_items; |
750 | } |
751 | |
af6ca1d0 |
752 | =item C_constant arg_hashref, ITEM... |
753 | |
754 | A function that returns a B<list> of C subroutine definitions that return |
755 | the value and type of constants when passed the name by the XS wrapper. |
756 | I<ITEM...> gives a list of constant names. Each can either be a string, |
757 | which is taken as a C macro name, or a reference to a hash with the following |
758 | keys |
759 | |
760 | =over 8 |
761 | |
762 | =item name |
763 | |
764 | The name of the constant, as seen by the perl code. |
765 | |
766 | =item type |
767 | |
768 | The type of the constant (I<IV>, I<NV> etc) |
769 | |
770 | =item value |
771 | |
772 | A C expression for the value of the constant, or a list of C expressions if |
773 | the type is aggregate. This defaults to the I<name> if not given. |
774 | |
775 | =item macro |
776 | |
777 | The C pre-processor macro to use in the C<#ifdef>. This defaults to the |
778 | I<name>, and is mainly used if I<value> is an C<enum>. If a reference an |
779 | array is passed then the first element is used in place of the C<#ifdef> |
780 | line, and the second element in place of the C<#endif>. This allows |
781 | pre-processor constructions such as |
782 | |
783 | #if defined (foo) |
784 | #if !defined (bar) |
785 | ... |
786 | #endif |
787 | #endif |
788 | |
789 | to be used to determine if a constant is to be defined. |
790 | |
791 | A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif> |
792 | test is omitted. |
793 | |
794 | =item default |
795 | |
796 | Default value to use (instead of C<croak>ing with "your vendor has not |
797 | defined...") to return if the macro isn't defined. Specify a reference to |
798 | an array with type followed by value(s). |
799 | |
800 | =item pre |
801 | |
802 | C code to use before the assignment of the value of the constant. This allows |
803 | you to use temporary variables to extract a value from part of a C<struct> |
804 | and return this as I<value>. This C code is places at the start of a block, |
805 | so you can declare variables in it. |
806 | |
807 | =item post |
808 | |
809 | C code to place between the assignment of value (to a temporary) and the |
810 | return from the function. This allows you to clear up anything in I<pre>. |
811 | Rarely needed. |
812 | |
813 | =item def_pre |
814 | |
815 | =item def_post |
816 | |
817 | Equivalents of I<pre> and I<post> for the default value. |
818 | |
819 | =item utf8 |
820 | |
821 | Generated internally. Is zero or undefined if name is 7 bit ASCII, |
822 | "no" if the name is 8 bit (and so should only match if SvUTF8() is false), |
823 | "yes" if the name is utf8 encoded. |
824 | |
825 | The internals automatically clone any name with characters 128-255 but none |
826 | 256+ (ie one that could be either in bytes or utf8) into a second entry |
827 | which is utf8 encoded. |
828 | |
b760f360 |
829 | =item weight |
830 | |
831 | Optional sorting weight for names, to determine the order of |
832 | linear testing when multiple names fall in the same case of a switch clause. |
833 | Higher comes earlier, undefined defaults to zero. |
834 | |
af6ca1d0 |
835 | =back |
836 | |
837 | In the argument hashref, I<package> is the name of the package, and is only |
838 | used in comments inside the generated C code. I<subname> defaults to |
839 | C<constant> if undefined. |
840 | |
841 | I<default_type> is the type returned by C<ITEM>s that don't specify their |
842 | type. It defaults to the value of C<default_type()>. I<types> should be given |
843 | either as a comma separated list of types that the C subroutine I<subname> |
844 | will generate or as a reference to a hash. I<default_type> will be added to |
845 | the list if not present, as will any types given in the list of I<ITEM>s. The |
846 | resultant list should be the same list of types that C<XS_constant> is |
847 | given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of |
848 | parameters to the constant function. I<indent> is currently unused and |
849 | ignored. In future it may be used to pass in information used to change the C |
850 | indentation style used.] The best way to maintain consistency is to pass in a |
851 | hash reference and let this function update it. |
852 | |
853 | I<breakout> governs when child functions of I<subname> are generated. If there |
854 | are I<breakout> or more I<ITEM>s with the same length of name, then the code |
855 | to switch between them is placed into a function named I<subname>_I<len>, for |
856 | example C<constant_5> for names 5 characters long. The default I<breakout> is |
857 | 3. A single C<ITEM> is always inlined. |
858 | |
859 | =cut |
860 | |
861 | # The parameter now BREAKOUT was previously documented as: |
862 | # |
863 | # I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of |
864 | # this length, and that the constant name passed in by perl is checked and |
865 | # also of this length. It is used during recursion, and should be C<undef> |
866 | # unless the caller has checked all the lengths during code generation, and |
867 | # the generated subroutine is only to be called with a name of this length. |
868 | # |
869 | # As you can see it now performs this function during recursion by being a |
870 | # scalar reference. |
871 | |
872 | sub C_constant { |
873 | my ($self, $args, @items) = @_; |
874 | my ($package, $subname, $default_type, $what, $indent, $breakout) = |
875 | @{$args}{qw(package subname default_type types indent breakout)}; |
876 | $package ||= 'Foo'; |
877 | $subname ||= 'constant'; |
878 | # I'm not using this. But a hashref could be used for full formatting without |
879 | # breaking this API |
880 | # $indent ||= 0; |
881 | |
882 | my ($namelen, $items); |
883 | if (ref $breakout) { |
884 | # We are called recursively. We trust @items to be normalised, $what to |
885 | # be a hashref, and pinch %$items from our parent to save recalculation. |
886 | ($namelen, $items) = @$breakout; |
887 | } else { |
efa32bb4 |
888 | $items = {}; |
af6ca1d0 |
889 | if ($is_perl56) { |
890 | # Need proper Unicode preserving hash keys. |
891 | require ExtUtils::Constant::Aaargh56Hash; |
af6ca1d0 |
892 | tie %$items, 'ExtUtils::Constant::Aaargh56Hash'; |
893 | } |
894 | $breakout ||= 3; |
895 | $default_type ||= $self->default_type(); |
896 | if (!ref $what) { |
897 | # Convert line of the form IV,UV,NV to hash |
898 | $what = {map {$_ => 1} split /,\s*/, ($what || '')}; |
899 | # Figure out what types we're dealing with, and assign all unknowns to the |
900 | # default type |
901 | } |
329b93cd |
902 | @items = $self->normalise_items ({}, $default_type, $what, $items, @items); |
af6ca1d0 |
903 | # use Data::Dumper; print Dumper @items; |
904 | } |
905 | my $params = $self->params ($what); |
906 | |
907 | # Probably "static int" |
908 | my ($body, @subs); |
909 | $body = $self->C_constant_return_type($params) . "\n$subname (" |
910 | # Eg "pTHX_ " |
911 | . $self->C_constant_prefix_param_defintion($params) |
912 | # Probably "const char *name" |
f8901870 |
913 | . $self->name_param_definition($params); |
af6ca1d0 |
914 | # Something like ", STRLEN len" |
f8901870 |
915 | $body .= ", " . $self->namelen_param_definition($params) |
af6ca1d0 |
916 | unless defined $namelen; |
f8901870 |
917 | $body .= $self->C_constant_other_params_defintion($params); |
af6ca1d0 |
918 | $body .= ") {\n"; |
919 | |
920 | if (defined $namelen) { |
921 | # We are a child subroutine. Print the simple description |
922 | my $comment = 'When generated this function returned values for the list' |
923 | . ' of names given here. However, subsequent manual editing may have' |
924 | . ' added or removed some.'; |
925 | $body .= $self->switch_clause ({indent=>2, comment=>$comment}, |
926 | $namelen, $items, @items); |
927 | } else { |
928 | # We are the top level. |
929 | $body .= " /* Initially switch on the length of the name. */\n"; |
930 | $body .= $self->dogfood ({package => $package, subname => $subname, |
931 | default_type => $default_type, what => $what, |
932 | indent => $indent, breakout => $breakout}, |
933 | @items); |
f8901870 |
934 | $body .= ' switch ('.$self->namelen_param().") {\n"; |
af6ca1d0 |
935 | # Need to group names of the same length |
936 | my @by_length; |
937 | foreach (@items) { |
938 | push @{$by_length[length $_->{name}]}, $_; |
939 | } |
940 | foreach my $i (0 .. $#by_length) { |
941 | next unless $by_length[$i]; # None of this length |
942 | $body .= " case $i:\n"; |
943 | if (@{$by_length[$i]} == 1) { |
944 | my $only_thing = $by_length[$i]->[0]; |
945 | if ($only_thing->{utf8}) { |
946 | if ($only_thing->{utf8} eq 'yes') { |
947 | # With utf8 on flag item is passed in element 0 |
948 | $body .= $self->match_clause (undef, [$only_thing]); |
949 | } else { |
950 | # With utf8 off flag item is passed in element 1 |
951 | $body .= $self->match_clause (undef, [undef, $only_thing]); |
952 | } |
953 | } else { |
954 | $body .= $self->match_clause (undef, $only_thing); |
955 | } |
956 | } elsif (@{$by_length[$i]} < $breakout) { |
957 | $body .= $self->switch_clause ({indent=>4}, |
958 | $i, $items, @{$by_length[$i]}); |
959 | } else { |
960 | # Only use the minimal set of parameters actually needed by the types |
961 | # of the names of this length. |
962 | my $what = {}; |
963 | foreach (@{$by_length[$i]}) { |
964 | $what->{$_->{type}} = 1; |
965 | $what->{''} = 1 if $_->{utf8}; |
966 | } |
967 | $params = $self->params ($what); |
968 | push @subs, $self->C_constant ({package=>$package, |
969 | subname=>"${subname}_$i", |
970 | default_type => $default_type, |
971 | types => $what, indent => $indent, |
972 | breakout => [$i, $items]}, |
973 | @{$by_length[$i]}); |
974 | $body .= " return ${subname}_$i (" |
975 | # Eg "aTHX_ " |
976 | . $self->C_constant_prefix_param($params) |
977 | # Probably "name" |
978 | . $self->name_param($params); |
f8901870 |
979 | $body .= $self->C_constant_other_params($params); |
af6ca1d0 |
980 | $body .= ");\n"; |
981 | } |
982 | $body .= " break;\n"; |
983 | } |
984 | $body .= " }\n"; |
985 | } |
986 | my $notfound = $self->return_statement_for_notfound(); |
987 | $body .= " $notfound\n" if $notfound; |
988 | $body .= "}\n"; |
989 | return (@subs, $body); |
990 | } |
991 | |
992 | 1; |
993 | __END__ |
994 | |
995 | =back |
996 | |
997 | =head1 BUGS |
998 | |
999 | Not everything is documented yet. |
1000 | |
1001 | Probably others. |
1002 | |
1003 | =head1 AUTHOR |
1004 | |
1005 | Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and |
1006 | others |