Commit | Line | Data |
af6ca1d0 |
1 | package ExtUtils::Constant::Base; |
2 | |
3 | use strict; |
4 | use vars qw($VERSION $is_perl56); |
5 | use Carp; |
6 | use Text::Wrap; |
7 | use ExtUtils::Constant::Utils qw(C_stringify perl_stringify); |
8 | |
9 | $VERSION = '0.01'; |
10 | |
11 | $is_perl56 = ($] < 5.007 && $] > 5.005_50); |
12 | |
13 | |
14 | =head1 NAME |
15 | |
16 | ExtUtils::Constant::Base - base class for ExtUtils::Constant objects |
17 | |
18 | =head1 SYNOPSIS |
19 | |
20 | require ExtUtils::Constant::Base; |
21 | @ISA = 'ExtUtils::Constant::Base'; |
22 | |
23 | =head1 DESCRIPTION |
24 | |
25 | ExtUtils::Constant::Base provides a base implementation of methods to |
26 | generate C code to give fast constant value lookup by named string. Currently |
27 | it's mostly used ExtUtils::Constant::XS, which generates the lookup code |
28 | for the constant() subroutine found in many XS modules. |
29 | |
30 | =head1 USAGE |
31 | |
32 | ExtUtils::Constant::Base exports no subroutines. The following methods are |
33 | available |
34 | |
35 | =over 4 |
36 | |
37 | =cut |
38 | |
39 | sub valid_type { |
40 | # Default to assuming that you don't need different types of return data. |
41 | 1; |
42 | } |
43 | sub default_type { |
44 | ''; |
45 | } |
46 | |
47 | =item header |
48 | |
49 | A method returning a scalar containing definitions needed, typically for a |
50 | C header file. |
51 | |
52 | =cut |
53 | |
54 | sub header { |
55 | '' |
56 | } |
57 | |
58 | # This might actually be a return statement. Note that you are responsible |
59 | # for any space you might need before your value, as it lets to perform |
60 | # "tricks" such as "return KEY_" and have strings appended. |
61 | sub assignment_clause_for_type; |
62 | # In which case this might be an empty string |
63 | sub return_statement_for_type {undef}; |
64 | sub return_statement_for_notdef; |
65 | sub return_statement_for_notfound; |
66 | |
67 | # "#if 1" is true to a C pre-processor |
68 | sub macro_from_name { |
69 | 1; |
70 | } |
71 | |
72 | sub name_param { |
73 | 'name'; |
74 | } |
75 | |
76 | # This is possibly buggy, in that it's not mandatory (below, in the main |
77 | # C_constant parameters, but is expected to exist here, if it's needed) |
78 | # Buggy because if you're definitely pure 8 bit only, and will never be |
79 | # presented with your constants in utf8, the default form of C_constant can't |
80 | # be told not to do the utf8 version. |
81 | |
82 | sub is_utf8_param { |
83 | 'utf8'; |
84 | } |
85 | |
86 | sub memEQ { |
87 | "!memcmp"; |
88 | } |
89 | |
90 | =item memEQ_clause args_hashref |
91 | |
92 | A method to return a suitable C C<if> statement to check whether I<name> |
93 | is equal to the C variable C<name>. If I<checked_at> is defined, then it |
94 | is used to avoid C<memEQ> for short names, or to generate a comment to |
95 | highlight the position of the character in the C<switch> statement. |
96 | |
97 | If i<checked_at> is a reference to a scalar, then instead it gives |
98 | the characters pre-checked at the beginning, (and the number of chars by |
99 | which the C variable name has been advanced. These need to be chopped from |
100 | the front of I<name>). |
101 | |
102 | =cut |
103 | |
104 | sub memEQ_clause { |
105 | # if (memEQ(name, "thingy", 6)) { |
106 | # Which could actually be a character comparison or even "" |
107 | my ($self, $args) = @_; |
108 | my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)}; |
109 | $indent = ' ' x ($indent || 4); |
110 | my $front_chop; |
111 | if (ref $checked_at) { |
112 | # regexp won't work on 5.6.1 without use utf8; in turn that won't work |
113 | # on 5.005_03. |
114 | substr ($name, 0, length $$checked_at,) = ''; |
115 | $front_chop = C_stringify ($$checked_at); |
116 | undef $checked_at; |
117 | } |
118 | my $len = length $name; |
119 | |
120 | if ($len < 2) { |
121 | return $indent . "{\n" |
122 | if (defined $checked_at and $checked_at == 0) or $len == 0; |
123 | # We didn't switch, drop through to the code for the 2 character string |
124 | $checked_at = 1; |
125 | } |
126 | |
127 | my $name_param = $self->name_param; |
128 | |
129 | if ($len < 3 and defined $checked_at) { |
130 | my $check; |
131 | if ($checked_at == 1) { |
132 | $check = 0; |
133 | } elsif ($checked_at == 0) { |
134 | $check = 1; |
135 | } |
136 | if (defined $check) { |
137 | my $char = C_stringify (substr $name, $check, 1); |
138 | # Placate 5.005 with a break in the string. I can't see a good way of |
139 | # getting it to not take [ as introducing an array lookup, even with |
140 | # ${name_param}[$check] |
141 | return $indent . "if ($name_param" . "[$check] == '$char') {\n"; |
142 | } |
143 | } |
144 | if (($len == 2 and !defined $checked_at) |
145 | or ($len == 3 and defined ($checked_at) and $checked_at == 2)) { |
146 | my $char1 = C_stringify (substr $name, 0, 1); |
147 | my $char2 = C_stringify (substr $name, 1, 1); |
148 | return $indent . |
149 | "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n"; |
150 | } |
151 | if (($len == 3 and defined ($checked_at) and $checked_at == 1)) { |
152 | my $char1 = C_stringify (substr $name, 0, 1); |
153 | my $char2 = C_stringify (substr $name, 2, 1); |
154 | return $indent . |
155 | "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n"; |
156 | } |
157 | |
158 | my $pointer = '^'; |
159 | my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1; |
160 | if ($have_checked_last) { |
161 | # Checked at the last character, so no need to memEQ it. |
162 | $pointer = C_stringify (chop $name); |
163 | $len--; |
164 | } |
165 | |
166 | $name = C_stringify ($name); |
167 | my $memEQ = $self->memEQ(); |
168 | my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n"; |
169 | # Put a little ^ under the letter we checked at |
170 | # Screws up for non printable and non-7 bit stuff, but that's too hard to |
171 | # get right. |
172 | if (defined $checked_at) { |
173 | $body .= $indent . "/* " . (' ' x length $memEQ) |
174 | . (' ' x length $name_param) |
175 | . (' ' x $checked_at) . $pointer |
176 | . (' ' x ($len - $checked_at + length $len)) . " */\n"; |
177 | } elsif (defined $front_chop) { |
178 | $body .= $indent . "/* $front_chop" |
179 | . (' ' x ($len + 1 + length $len)) . " */\n"; |
180 | } |
181 | return $body; |
182 | } |
183 | |
184 | =item dump_names arg_hashref, ITEM... |
185 | |
186 | An internal function to generate the embedded perl code that will regenerate |
187 | the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the |
188 | same as for C_constant. I<indent> is treated as number of spaces to indent |
189 | by. If C<declare_types> is true a C<$types> is always declared in the perl |
190 | code generated, if defined and false never declared, and if undefined C<$types> |
191 | is only declared if the values in I<types> as passed in cannot be inferred from |
192 | I<default_types> and the I<ITEM>s. |
193 | |
194 | =cut |
195 | |
196 | sub dump_names { |
197 | my ($self, $args, @items) = @_; |
198 | my ($default_type, $what, $indent, $declare_types) |
199 | = @{$args}{qw(default_type what indent declare_types)}; |
200 | $indent = ' ' x ($indent || 0); |
201 | |
202 | my $result; |
203 | my (@simple, @complex, %used_types); |
204 | foreach (@items) { |
205 | my $type; |
206 | if (ref $_) { |
207 | $type = $_->{type} || $default_type; |
208 | if ($_->{utf8}) { |
209 | # For simplicity always skip the bytes case, and reconstitute this entry |
210 | # from its utf8 twin. |
211 | next if $_->{utf8} eq 'no'; |
212 | # Copy the hashref, as we don't want to mess with the caller's hashref. |
213 | $_ = {%$_}; |
214 | unless ($is_perl56) { |
215 | utf8::decode ($_->{name}); |
216 | } else { |
217 | $_->{name} = pack 'U*', unpack 'U0U*', $_->{name}; |
218 | } |
219 | delete $_->{utf8}; |
220 | } |
221 | } else { |
222 | $_ = {name=>$_}; |
223 | $type = $default_type; |
224 | } |
225 | $used_types{$type}++; |
226 | if ($type eq $default_type |
227 | # grr 5.6.1 |
228 | and length $_->{name} |
229 | and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//) |
230 | and !defined ($_->{macro}) and !defined ($_->{value}) |
231 | and !defined ($_->{default}) and !defined ($_->{pre}) |
232 | and !defined ($_->{post}) and !defined ($_->{def_pre}) |
b760f360 |
233 | and !defined ($_->{def_post}) and !defined ($_->{weight})) { |
af6ca1d0 |
234 | # It's the default type, and the name consists only of A-Za-z0-9_ |
235 | push @simple, $_->{name}; |
236 | } else { |
237 | push @complex, $_; |
238 | } |
239 | } |
240 | |
241 | if (!defined $declare_types) { |
242 | # Do they pass in any types we weren't already using? |
243 | foreach (keys %$what) { |
244 | next if $used_types{$_}; |
245 | $declare_types++; # Found one in $what that wasn't used. |
246 | last; # And one is enough to terminate this loop |
247 | } |
248 | } |
249 | if ($declare_types) { |
250 | $result = $indent . 'my $types = {map {($_, 1)} qw(' |
251 | . join (" ", sort keys %$what) . ")};\n"; |
252 | } |
253 | local $Text::Wrap::huge = 'overflow'; |
254 | local $Text::Wrap::columns = 80; |
255 | $result .= wrap ($indent . "my \@names = (qw(", |
256 | $indent . " ", join (" ", sort @simple) . ")"); |
257 | if (@complex) { |
258 | foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) { |
259 | my $name = perl_stringify $item->{name}; |
260 | my $line = ",\n$indent {name=>\"$name\""; |
261 | $line .= ", type=>\"$item->{type}\"" if defined $item->{type}; |
262 | foreach my $thing (qw (macro value default pre post def_pre def_post)) { |
263 | my $value = $item->{$thing}; |
264 | if (defined $value) { |
265 | if (ref $value) { |
266 | $line .= ", $thing=>[\"" |
267 | . join ('", "', map {perl_stringify $_} @$value) . '"]'; |
268 | } else { |
269 | $line .= ", $thing=>\"" . perl_stringify($value) . "\""; |
270 | } |
271 | } |
272 | } |
273 | $line .= "}"; |
274 | # Ensure that the enclosing C comment doesn't end |
275 | # by turning */ into *" . "/ |
276 | $line =~ s!\*\/!\*" . "/!gs; |
277 | # gcc -Wall doesn't like finding /* inside a comment |
278 | $line =~ s!\/\*!/" . "\*!gs; |
279 | $result .= $line; |
280 | } |
281 | } |
282 | $result .= ");\n"; |
283 | |
284 | $result; |
285 | } |
286 | |
287 | =item assign arg_hashref, VALUE... |
288 | |
289 | A method to return a suitable assignment clause. If I<type> is aggregate |
290 | (eg I<PVN> expects both pointer and length) then there should be multiple |
291 | I<VALUE>s for the components. I<pre> and I<post> if defined give snippets |
292 | of C code to proceed and follow the assignment. I<pre> will be at the start |
293 | of a block, so variables may be defined in it. |
294 | |
295 | =cut |
296 | # Hmm. value undef to to NOTDEF? value () to do NOTFOUND? |
297 | |
298 | sub assign { |
299 | my $self = shift; |
300 | my $args = shift; |
b760f360 |
301 | my ($indent, $type, $pre, $post, $item) |
302 | = @{$args}{qw(indent type pre post item)}; |
af6ca1d0 |
303 | $post ||= ''; |
304 | my $clause; |
305 | my $close; |
306 | if ($pre) { |
307 | chomp $pre; |
308 | $close = "$indent}\n"; |
309 | $clause = $indent . "{\n"; |
310 | $indent .= " "; |
311 | $clause .= "$indent$pre"; |
312 | $clause .= ";" unless $pre =~ /;$/; |
313 | $clause .= "\n"; |
314 | } |
315 | confess "undef \$type" unless defined $type; |
316 | confess "Can't generate code for type $type" |
317 | unless $self->valid_type($type); |
318 | |
319 | $clause .= join '', map {"$indent$_\n"} |
b760f360 |
320 | $self->assignment_clause_for_type({type=>$type,item=>$item}, @_); |
af6ca1d0 |
321 | chomp $post; |
322 | if (length $post) { |
323 | $clause .= "$post"; |
324 | $clause .= ";" unless $post =~ /;$/; |
325 | $clause .= "\n"; |
326 | } |
327 | my $return = $self->return_statement_for_type($type); |
328 | $clause .= "$indent$return\n" if defined $return; |
329 | $clause .= $close if $close; |
330 | return $clause; |
331 | } |
332 | |
333 | =item return_clause arg_hashref, ITEM |
334 | |
335 | A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref |
336 | (as passed to C<C_constant> and C<match_clause>. I<indent> is the number |
337 | of spaces to indent, defaulting to 6. |
338 | |
339 | =cut |
340 | |
341 | sub return_clause { |
342 | |
343 | ##ifdef thingy |
344 | # *iv_return = thingy; |
345 | # return PERL_constant_ISIV; |
346 | ##else |
347 | # return PERL_constant_NOTDEF; |
348 | ##endif |
349 | my ($self, $args, $item) = @_; |
350 | my $indent = $args->{indent}; |
351 | |
352 | my ($name, $value, $macro, $default, $pre, $post, $def_pre, $def_post, $type) |
353 | = @$item{qw (name value macro default pre post def_pre def_post type)}; |
354 | $value = $name unless defined $value; |
355 | $macro = $self->macro_from_name($item) unless defined $macro; |
356 | # "#if 1" is true to a C pre-processor |
357 | $macro = 1 if !defined $macro or $macro eq ''; |
358 | $indent = ' ' x ($indent || 6); |
359 | unless (defined $type) { |
360 | # use Data::Dumper; print STDERR Dumper ($item); |
361 | confess "undef \$type"; |
362 | } |
363 | |
364 | my $clause; |
365 | |
366 | ##ifdef thingy |
367 | if (ref $macro) { |
368 | $clause = $macro->[0]; |
369 | } elsif ($macro ne "1") { |
370 | $clause = "#ifdef $macro\n"; |
371 | } |
372 | |
373 | # *iv_return = thingy; |
374 | # return PERL_constant_ISIV; |
375 | $clause |
b760f360 |
376 | .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post, |
377 | item=>$item}, ref $value ? @$value : $value); |
af6ca1d0 |
378 | |
379 | if (ref $macro or $macro ne "1") { |
380 | ##else |
381 | $clause .= "#else\n"; |
382 | |
383 | # return PERL_constant_NOTDEF; |
384 | if (!defined $default) { |
385 | my $notdef = $self->return_statement_for_notdef(); |
386 | $clause .= "$indent$notdef\n" if defined $notdef; |
387 | } else { |
388 | my @default = ref $default ? @$default : $default; |
389 | $type = shift @default; |
390 | $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, |
b760f360 |
391 | post=>$post, item=>$item}, @default); |
af6ca1d0 |
392 | } |
393 | |
394 | ##endif |
395 | if (ref $macro) { |
396 | $clause .= $macro->[1]; |
397 | } else { |
398 | $clause .= "#endif\n"; |
399 | } |
400 | } |
401 | return $clause; |
402 | } |
403 | |
404 | sub match_clause { |
405 | # $offset defined if we have checked an offset. |
406 | my ($self, $args, $item) = @_; |
407 | my ($offset, $indent) = @{$args}{qw(checked_at indent)}; |
408 | $indent = ' ' x ($indent || 4); |
409 | my $body = ''; |
410 | my ($no, $yes, $either, $name, $inner_indent); |
411 | if (ref $item eq 'ARRAY') { |
412 | ($yes, $no) = @$item; |
413 | $either = $yes || $no; |
414 | confess "$item is $either expecting hashref in [0] || [1]" |
415 | unless ref $either eq 'HASH'; |
416 | $name = $either->{name}; |
417 | } else { |
418 | confess "$item->{name} has utf8 flag '$item->{utf8}', should be false" |
419 | if $item->{utf8}; |
420 | $name = $item->{name}; |
421 | $inner_indent = $indent; |
422 | } |
423 | |
424 | $body .= $self->memEQ_clause ({name => $name, checked_at => $offset, |
425 | indent => length $indent}); |
426 | # If we've been presented with an arrayref for $item, then the user string |
427 | # contains in the range 128-255, and we need to check whether it was utf8 |
428 | # (or not). |
429 | # In the worst case we have two named constants, where one's name happens |
430 | # encoded in UTF8 happens to be the same byte sequence as the second's |
431 | # encoded in (say) ISO-8859-1. |
432 | # In this case, $yes and $no both have item hashrefs. |
433 | if ($yes) { |
434 | $body .= $indent . " if (" . $self->is_utf8_param . ") {\n"; |
435 | } elsif ($no) { |
436 | $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n"; |
437 | } |
438 | if ($either) { |
439 | $body .= $self->return_clause ({indent=>4 + length $indent}, $either); |
440 | if ($yes and $no) { |
441 | $body .= $indent . " } else {\n"; |
442 | $body .= $self->return_clause ({indent=>4 + length $indent}, $no); |
443 | } |
444 | $body .= $indent . " }\n"; |
445 | } else { |
446 | $body .= $self->return_clause ({indent=>2 + length $indent}, $item); |
447 | } |
448 | $body .= $indent . "}\n"; |
449 | } |
450 | |
451 | |
452 | =item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM... |
453 | |
454 | An internal method to generate a suitable C<switch> clause, called by |
455 | C<C_constant> I<ITEM>s are in the hash ref format as given in the description |
456 | of C<C_constant>, and must all have the names of the same length, given by |
457 | I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being |
458 | the hashrefs in the I<ITEM> list. (No parameters are modified, and there can |
459 | be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without |
460 | causing problems - the hash is passed in to save generating it afresh for |
461 | each call). |
462 | |
463 | =cut |
464 | |
465 | sub switch_clause { |
466 | my ($self, $args, $namelen, $items, @items) = @_; |
467 | my ($indent, $comment) = @{$args}{qw(indent comment)}; |
468 | $indent = ' ' x ($indent || 2); |
469 | |
470 | local $Text::Wrap::huge = 'overflow'; |
471 | local $Text::Wrap::columns = 80; |
472 | |
473 | my @names = sort map {$_->{name}} @items; |
474 | my $leader = $indent . '/* '; |
475 | my $follower = ' ' x length $leader; |
476 | my $body = $indent . "/* Names all of length $namelen. */\n"; |
477 | if (defined $comment) { |
478 | $body = wrap ($leader, $follower, $comment) . "\n"; |
479 | $leader = $follower; |
480 | } |
481 | my @safe_names = @names; |
482 | foreach (@safe_names) { |
483 | confess sprintf "Name '$_' is length %d, not $namelen", length |
484 | unless length == $namelen; |
485 | # Argh. 5.6.1 |
486 | # next unless tr/A-Za-z0-9_//c; |
487 | next if tr/A-Za-z0-9_// == length; |
488 | $_ = '"' . perl_stringify ($_) . '"'; |
489 | # Ensure that the enclosing C comment doesn't end |
490 | # by turning */ into *" . "/ |
491 | s!\*\/!\*"."/!gs; |
492 | # gcc -Wall doesn't like finding /* inside a comment |
493 | s!\/\*!/"."\*!gs; |
494 | } |
495 | $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n"; |
496 | # Figure out what to switch on. |
497 | # (RMS, Spread of jump table, Position, Hashref) |
498 | my @best = (1e38, ~0); |
499 | # Prefer the last character over the others. (As it lets us shorten the |
500 | # memEQ clause at no cost). |
501 | foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) { |
502 | my ($min, $max) = (~0, 0); |
503 | my %spread; |
504 | if ($is_perl56) { |
505 | # Need proper Unicode preserving hash keys for bytes in range 128-255 |
506 | # here too, for some reason. grr 5.6.1 yet again. |
507 | tie %spread, 'ExtUtils::Constant::Aaargh56Hash'; |
508 | } |
509 | foreach (@names) { |
510 | my $char = substr $_, $i, 1; |
511 | my $ord = ord $char; |
512 | confess "char $ord is out of range" if $ord > 255; |
513 | $max = $ord if $ord > $max; |
514 | $min = $ord if $ord < $min; |
515 | push @{$spread{$char}}, $_; |
516 | # warn "$_ $char"; |
517 | } |
518 | # I'm going to pick the character to split on that minimises the root |
519 | # mean square of the number of names in each case. Normally this should |
520 | # be the one with the most keys, but it may pick a 7 where the 8 has |
521 | # one long linear search. I'm not sure if RMS or just sum of squares is |
522 | # actually better. |
523 | # $max and $min are for the tie-breaker if the root mean squares match. |
524 | # Assuming that the compiler may be building a jump table for the |
525 | # switch() then try to minimise the size of that jump table. |
526 | # Finally use < not <= so that if it still ties the earliest part of |
527 | # the string wins. Because if that passes but the memEQ fails, it may |
528 | # only need the start of the string to bin the choice. |
529 | # I think. But I'm micro-optimising. :-) |
530 | # OK. Trump that. Now favour the last character of the string, before the |
531 | # rest. |
532 | my $ss; |
533 | $ss += @$_ * @$_ foreach values %spread; |
534 | my $rms = sqrt ($ss / keys %spread); |
535 | if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { |
536 | @best = ($rms, $max - $min, $i, \%spread); |
537 | } |
538 | } |
539 | confess "Internal error. Failed to pick a switch point for @names" |
540 | unless defined $best[2]; |
541 | # use Data::Dumper; print Dumper (@best); |
542 | my ($offset, $best) = @best[2,3]; |
543 | $body .= $indent . "/* Offset $offset gives the best switch position. */\n"; |
544 | |
545 | my $do_front_chop = $offset == 0 && $namelen > 2; |
546 | if ($do_front_chop) { |
547 | $body .= $indent . "switch (*" . $self->name_param() . "++) {\n"; |
548 | } else { |
549 | $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n"; |
550 | } |
551 | foreach my $char (sort keys %$best) { |
552 | confess sprintf "'$char' is %d bytes long, not 1", length $char |
553 | if length ($char) != 1; |
554 | confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255; |
555 | $body .= $indent . "case '" . C_stringify ($char) . "':\n"; |
b760f360 |
556 | foreach my $thisone (sort { |
557 | # Deal with the case of an item actually being an array ref to 1 or 2 |
4e1a4e48 |
558 | # hashrefs. Don't assign to $a or $b, as they're aliases to the orignal |
b760f360 |
559 | my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a; |
560 | my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b; |
4e1a4e48 |
561 | # Sort by weight first |
b760f360 |
562 | ($r->{weight} || 0) <=> ($l->{weight} || 0) |
563 | # Sort equal weights by name |
4e1a4e48 |
564 | or $l->{name} cmp $r->{name}} |
565 | # If this looks evil, maybe it is. $items is a |
566 | # hashref, and we're doing a hash slice on it |
567 | @{$items}{@{$best->{$char}}}) { |
af6ca1d0 |
568 | # warn "You are here"; |
569 | if ($do_front_chop) { |
570 | $body .= $self->match_clause ({indent => 2 + length $indent, |
571 | checked_at => \$char}, $thisone); |
572 | } else { |
573 | $body .= $self->match_clause ({indent => 2 + length $indent, |
574 | checked_at => $offset}, $thisone); |
575 | } |
576 | } |
577 | $body .= $indent . " break;\n"; |
578 | } |
579 | $body .= $indent . "}\n"; |
580 | return $body; |
581 | } |
582 | |
583 | sub C_constant_return_type { |
584 | "static int"; |
585 | } |
586 | |
587 | sub C_constant_prefix_param { |
588 | ''; |
589 | } |
590 | |
591 | sub C_constant_prefix_param_defintion { |
592 | ''; |
593 | } |
594 | |
f8901870 |
595 | sub name_param_definition { |
af6ca1d0 |
596 | "const char *" . $_[0]->name_param; |
597 | } |
598 | |
f8901870 |
599 | sub namelen_param { |
af6ca1d0 |
600 | 'len'; |
601 | } |
602 | |
f8901870 |
603 | sub namelen_param_definition { |
604 | 'size_t ' . $_[0]->namelen_param; |
af6ca1d0 |
605 | } |
606 | |
f8901870 |
607 | sub C_constant_other_params { |
af6ca1d0 |
608 | ''; |
609 | } |
610 | |
f8901870 |
611 | sub C_constant_other_params_defintion { |
af6ca1d0 |
612 | ''; |
613 | } |
614 | |
615 | =item params WHAT |
616 | |
617 | An "internal" method, subject to change, currently called to allow an |
618 | overriding class to cache information that will then be passed into all |
619 | the C<*param*> calls. (Yes, having to read the source to make sense of this is |
620 | considered a known bug). I<WHAT> is be a hashref of types the constant |
621 | function will return. In ExtUtils::Constant::XS this method is used to |
622 | returns a hashref keyed IV NV PV SV to show which combination of pointers will |
f8901870 |
623 | be needed in the C argument list generated by |
624 | C_constant_other_params_definition and C_constant_other_params |
af6ca1d0 |
625 | |
626 | =cut |
627 | |
628 | sub params { |
629 | ''; |
630 | } |
631 | |
632 | |
633 | =item dogfood arg_hashref, ITEM... |
634 | |
635 | An internal function to generate the embedded perl code that will regenerate |
636 | the constant subroutines. Parameters are the same as for C_constant. |
637 | |
638 | Currently the base class does nothing and returns an empty string. |
639 | |
640 | =cut |
641 | |
642 | sub dogfood { |
643 | '' |
644 | } |
645 | |
646 | =item C_constant arg_hashref, ITEM... |
647 | |
648 | A function that returns a B<list> of C subroutine definitions that return |
649 | the value and type of constants when passed the name by the XS wrapper. |
650 | I<ITEM...> gives a list of constant names. Each can either be a string, |
651 | which is taken as a C macro name, or a reference to a hash with the following |
652 | keys |
653 | |
654 | =over 8 |
655 | |
656 | =item name |
657 | |
658 | The name of the constant, as seen by the perl code. |
659 | |
660 | =item type |
661 | |
662 | The type of the constant (I<IV>, I<NV> etc) |
663 | |
664 | =item value |
665 | |
666 | A C expression for the value of the constant, or a list of C expressions if |
667 | the type is aggregate. This defaults to the I<name> if not given. |
668 | |
669 | =item macro |
670 | |
671 | The C pre-processor macro to use in the C<#ifdef>. This defaults to the |
672 | I<name>, and is mainly used if I<value> is an C<enum>. If a reference an |
673 | array is passed then the first element is used in place of the C<#ifdef> |
674 | line, and the second element in place of the C<#endif>. This allows |
675 | pre-processor constructions such as |
676 | |
677 | #if defined (foo) |
678 | #if !defined (bar) |
679 | ... |
680 | #endif |
681 | #endif |
682 | |
683 | to be used to determine if a constant is to be defined. |
684 | |
685 | A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif> |
686 | test is omitted. |
687 | |
688 | =item default |
689 | |
690 | Default value to use (instead of C<croak>ing with "your vendor has not |
691 | defined...") to return if the macro isn't defined. Specify a reference to |
692 | an array with type followed by value(s). |
693 | |
694 | =item pre |
695 | |
696 | C code to use before the assignment of the value of the constant. This allows |
697 | you to use temporary variables to extract a value from part of a C<struct> |
698 | and return this as I<value>. This C code is places at the start of a block, |
699 | so you can declare variables in it. |
700 | |
701 | =item post |
702 | |
703 | C code to place between the assignment of value (to a temporary) and the |
704 | return from the function. This allows you to clear up anything in I<pre>. |
705 | Rarely needed. |
706 | |
707 | =item def_pre |
708 | |
709 | =item def_post |
710 | |
711 | Equivalents of I<pre> and I<post> for the default value. |
712 | |
713 | =item utf8 |
714 | |
715 | Generated internally. Is zero or undefined if name is 7 bit ASCII, |
716 | "no" if the name is 8 bit (and so should only match if SvUTF8() is false), |
717 | "yes" if the name is utf8 encoded. |
718 | |
719 | The internals automatically clone any name with characters 128-255 but none |
720 | 256+ (ie one that could be either in bytes or utf8) into a second entry |
721 | which is utf8 encoded. |
722 | |
b760f360 |
723 | =item weight |
724 | |
725 | Optional sorting weight for names, to determine the order of |
726 | linear testing when multiple names fall in the same case of a switch clause. |
727 | Higher comes earlier, undefined defaults to zero. |
728 | |
af6ca1d0 |
729 | =back |
730 | |
731 | In the argument hashref, I<package> is the name of the package, and is only |
732 | used in comments inside the generated C code. I<subname> defaults to |
733 | C<constant> if undefined. |
734 | |
735 | I<default_type> is the type returned by C<ITEM>s that don't specify their |
736 | type. It defaults to the value of C<default_type()>. I<types> should be given |
737 | either as a comma separated list of types that the C subroutine I<subname> |
738 | will generate or as a reference to a hash. I<default_type> will be added to |
739 | the list if not present, as will any types given in the list of I<ITEM>s. The |
740 | resultant list should be the same list of types that C<XS_constant> is |
741 | given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of |
742 | parameters to the constant function. I<indent> is currently unused and |
743 | ignored. In future it may be used to pass in information used to change the C |
744 | indentation style used.] The best way to maintain consistency is to pass in a |
745 | hash reference and let this function update it. |
746 | |
747 | I<breakout> governs when child functions of I<subname> are generated. If there |
748 | are I<breakout> or more I<ITEM>s with the same length of name, then the code |
749 | to switch between them is placed into a function named I<subname>_I<len>, for |
750 | example C<constant_5> for names 5 characters long. The default I<breakout> is |
751 | 3. A single C<ITEM> is always inlined. |
752 | |
753 | =cut |
754 | |
755 | # The parameter now BREAKOUT was previously documented as: |
756 | # |
757 | # I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of |
758 | # this length, and that the constant name passed in by perl is checked and |
759 | # also of this length. It is used during recursion, and should be C<undef> |
760 | # unless the caller has checked all the lengths during code generation, and |
761 | # the generated subroutine is only to be called with a name of this length. |
762 | # |
763 | # As you can see it now performs this function during recursion by being a |
764 | # scalar reference. |
765 | |
766 | sub C_constant { |
767 | my ($self, $args, @items) = @_; |
768 | my ($package, $subname, $default_type, $what, $indent, $breakout) = |
769 | @{$args}{qw(package subname default_type types indent breakout)}; |
770 | $package ||= 'Foo'; |
771 | $subname ||= 'constant'; |
772 | # I'm not using this. But a hashref could be used for full formatting without |
773 | # breaking this API |
774 | # $indent ||= 0; |
775 | |
776 | my ($namelen, $items); |
777 | if (ref $breakout) { |
778 | # We are called recursively. We trust @items to be normalised, $what to |
779 | # be a hashref, and pinch %$items from our parent to save recalculation. |
780 | ($namelen, $items) = @$breakout; |
781 | } else { |
782 | if ($is_perl56) { |
783 | # Need proper Unicode preserving hash keys. |
784 | require ExtUtils::Constant::Aaargh56Hash; |
785 | $items = {}; |
786 | tie %$items, 'ExtUtils::Constant::Aaargh56Hash'; |
787 | } |
788 | $breakout ||= 3; |
789 | $default_type ||= $self->default_type(); |
790 | if (!ref $what) { |
791 | # Convert line of the form IV,UV,NV to hash |
792 | $what = {map {$_ => 1} split /,\s*/, ($what || '')}; |
793 | # Figure out what types we're dealing with, and assign all unknowns to the |
794 | # default type |
795 | } |
796 | my @new_items; |
797 | foreach my $orig (@items) { |
798 | my ($name, $item); |
799 | if (ref $orig) { |
800 | # Make a copy which is a normalised version of the ref passed in. |
801 | $name = $orig->{name}; |
802 | my ($type, $macro, $value) = @$orig{qw (type macro value)}; |
803 | $type ||= $default_type; |
804 | $what->{$type} = 1; |
805 | $item = {name=>$name, type=>$type}; |
806 | |
807 | undef $macro if defined $macro and $macro eq $name; |
808 | $item->{macro} = $macro if defined $macro; |
809 | undef $value if defined $value and $value eq $name; |
810 | $item->{value} = $value if defined $value; |
b760f360 |
811 | foreach my $key (qw(default pre post def_pre def_post weight)) { |
af6ca1d0 |
812 | my $value = $orig->{$key}; |
813 | $item->{$key} = $value if defined $value; |
814 | # warn "$key $value"; |
815 | } |
816 | } else { |
817 | $name = $orig; |
818 | $item = {name=>$name, type=>$default_type}; |
819 | $what->{$default_type} = 1; |
820 | } |
821 | warn +(ref ($self) || $self) |
822 | . "doesn't know how to handle values of type $_ used in macro $name" |
823 | unless $self->valid_type ($item->{type}); |
824 | # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c |
825 | # doesn't work. Upgrade to 5.8 |
826 | # if ($name !~ tr/\0-\177//c || $] < 5.005_50) { |
827 | if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50) { |
828 | # No characters outside 7 bit ASCII. |
829 | if (exists $items->{$name}) { |
830 | die "Multiple definitions for macro $name"; |
831 | } |
832 | $items->{$name} = $item; |
833 | } else { |
834 | # No characters outside 8 bit. This is hardest. |
835 | if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') { |
836 | confess "Unexpected ASCII definition for macro $name"; |
837 | } |
838 | # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/; |
839 | # if ($name !~ tr/\0-\377//c) { |
840 | if ($name =~ tr/\0-\377// == length $name) { |
841 | # if ($] < 5.007) { |
842 | # $name = pack "C*", unpack "U*", $name; |
843 | # } |
844 | $item->{utf8} = 'no'; |
845 | $items->{$name}[1] = $item; |
846 | push @new_items, $item; |
847 | # Copy item, to create the utf8 variant. |
848 | $item = {%$item}; |
849 | } |
850 | # Encode the name as utf8 bytes. |
851 | unless ($is_perl56) { |
852 | utf8::encode($name); |
853 | } else { |
854 | # warn "Was >$name< " . length ${name}; |
855 | $name = pack 'C*', unpack 'C*', $name . pack 'U*'; |
856 | # warn "Now '${name}' " . length ${name}; |
857 | } |
858 | if ($items->{$name}[0]) { |
859 | die "Multiple definitions for macro $name"; |
860 | } |
861 | $item->{utf8} = 'yes'; |
862 | $item->{name} = $name; |
863 | $items->{$name}[0] = $item; |
864 | # We have need for the utf8 flag. |
865 | $what->{''} = 1; |
866 | } |
867 | push @new_items, $item; |
868 | } |
869 | @items = @new_items; |
870 | # use Data::Dumper; print Dumper @items; |
871 | } |
872 | my $params = $self->params ($what); |
873 | |
874 | # Probably "static int" |
875 | my ($body, @subs); |
876 | $body = $self->C_constant_return_type($params) . "\n$subname (" |
877 | # Eg "pTHX_ " |
878 | . $self->C_constant_prefix_param_defintion($params) |
879 | # Probably "const char *name" |
f8901870 |
880 | . $self->name_param_definition($params); |
af6ca1d0 |
881 | # Something like ", STRLEN len" |
f8901870 |
882 | $body .= ", " . $self->namelen_param_definition($params) |
af6ca1d0 |
883 | unless defined $namelen; |
f8901870 |
884 | $body .= $self->C_constant_other_params_defintion($params); |
af6ca1d0 |
885 | $body .= ") {\n"; |
886 | |
887 | if (defined $namelen) { |
888 | # We are a child subroutine. Print the simple description |
889 | my $comment = 'When generated this function returned values for the list' |
890 | . ' of names given here. However, subsequent manual editing may have' |
891 | . ' added or removed some.'; |
892 | $body .= $self->switch_clause ({indent=>2, comment=>$comment}, |
893 | $namelen, $items, @items); |
894 | } else { |
895 | # We are the top level. |
896 | $body .= " /* Initially switch on the length of the name. */\n"; |
897 | $body .= $self->dogfood ({package => $package, subname => $subname, |
898 | default_type => $default_type, what => $what, |
899 | indent => $indent, breakout => $breakout}, |
900 | @items); |
f8901870 |
901 | $body .= ' switch ('.$self->namelen_param().") {\n"; |
af6ca1d0 |
902 | # Need to group names of the same length |
903 | my @by_length; |
904 | foreach (@items) { |
905 | push @{$by_length[length $_->{name}]}, $_; |
906 | } |
907 | foreach my $i (0 .. $#by_length) { |
908 | next unless $by_length[$i]; # None of this length |
909 | $body .= " case $i:\n"; |
910 | if (@{$by_length[$i]} == 1) { |
911 | my $only_thing = $by_length[$i]->[0]; |
912 | if ($only_thing->{utf8}) { |
913 | if ($only_thing->{utf8} eq 'yes') { |
914 | # With utf8 on flag item is passed in element 0 |
915 | $body .= $self->match_clause (undef, [$only_thing]); |
916 | } else { |
917 | # With utf8 off flag item is passed in element 1 |
918 | $body .= $self->match_clause (undef, [undef, $only_thing]); |
919 | } |
920 | } else { |
921 | $body .= $self->match_clause (undef, $only_thing); |
922 | } |
923 | } elsif (@{$by_length[$i]} < $breakout) { |
924 | $body .= $self->switch_clause ({indent=>4}, |
925 | $i, $items, @{$by_length[$i]}); |
926 | } else { |
927 | # Only use the minimal set of parameters actually needed by the types |
928 | # of the names of this length. |
929 | my $what = {}; |
930 | foreach (@{$by_length[$i]}) { |
931 | $what->{$_->{type}} = 1; |
932 | $what->{''} = 1 if $_->{utf8}; |
933 | } |
934 | $params = $self->params ($what); |
935 | push @subs, $self->C_constant ({package=>$package, |
936 | subname=>"${subname}_$i", |
937 | default_type => $default_type, |
938 | types => $what, indent => $indent, |
939 | breakout => [$i, $items]}, |
940 | @{$by_length[$i]}); |
941 | $body .= " return ${subname}_$i (" |
942 | # Eg "aTHX_ " |
943 | . $self->C_constant_prefix_param($params) |
944 | # Probably "name" |
945 | . $self->name_param($params); |
f8901870 |
946 | $body .= $self->C_constant_other_params($params); |
af6ca1d0 |
947 | $body .= ");\n"; |
948 | } |
949 | $body .= " break;\n"; |
950 | } |
951 | $body .= " }\n"; |
952 | } |
953 | my $notfound = $self->return_statement_for_notfound(); |
954 | $body .= " $notfound\n" if $notfound; |
955 | $body .= "}\n"; |
956 | return (@subs, $body); |
957 | } |
958 | |
959 | 1; |
960 | __END__ |
961 | |
962 | =back |
963 | |
964 | =head1 BUGS |
965 | |
966 | Not everything is documented yet. |
967 | |
968 | Probably others. |
969 | |
970 | =head1 AUTHOR |
971 | |
972 | Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and |
973 | others |