Commit | Line | Data |
f600d105 |
1 | |
2 | package Locale::Maketext::Guts; |
3 | BEGIN { *zorp = sub { return scalar @_ } unless defined &zorp; } |
4 | # Just so we're nice and define SOMETHING in "our" package. |
5 | |
6 | package Locale::Maketext; |
7 | use strict; |
8 | use vars qw($USE_LITERALS $GUTSPATH); |
9 | |
10 | BEGIN { |
11 | $GUTSPATH = __FILE__; |
12 | *DEBUG = sub () {0} unless defined &DEBUG; |
13 | } |
14 | |
15 | use utf8; |
16 | |
17 | sub _compile { |
18 | # This big scary routine compiles an entry. |
19 | # It returns either a coderef if there's brackety bits in this, or |
20 | # otherwise a ref to a scalar. |
21 | |
22 | my $target = ref($_[0]) || $_[0]; |
23 | |
24 | my(@code); |
25 | my(@c) = (''); # "chunks" -- scratch. |
26 | my $call_count = 0; |
27 | my $big_pile = ''; |
28 | { |
29 | my $in_group = 0; # start out outside a group |
30 | my($m, @params); # scratch |
31 | |
32 | while($_[1] =~ # Iterate over chunks. |
33 | m<\G( |
34 | [^\~\[\]]+ # non-~[] stuff |
35 | | |
36 | ~. # ~[, ~], ~~, ~other |
37 | | |
38 | \[ # [ presumably opening a group |
39 | | |
40 | \] # ] presumably closing a group |
41 | | |
42 | ~ # terminal ~ ? |
43 | | |
44 | $ |
45 | )>xgs |
46 | ) { |
47 | print " \"$1\"\n" if DEBUG > 2; |
48 | |
49 | if($1 eq '[' or $1 eq '') { # "[" or end |
50 | # Whether this is "[" or end, force processing of any |
51 | # preceding literal. |
52 | if($in_group) { |
53 | if($1 eq '') { |
54 | $target->_die_pointing($_[1], "Unterminated bracket group"); |
55 | } else { |
56 | $target->_die_pointing($_[1], "You can't nest bracket groups"); |
57 | } |
58 | } else { |
59 | if($1 eq '') { |
60 | print " [end-string]\n" if DEBUG > 2; |
61 | } else { |
62 | $in_group = 1; |
63 | } |
64 | die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity |
65 | if(length $c[-1]) { |
66 | # Now actually processing the preceding literal |
67 | $big_pile .= $c[-1]; |
68 | if($USE_LITERALS and ( |
69 | (ord('A') == 65) |
70 | ? $c[-1] !~ m<[^\x20-\x7E]>s |
71 | # ASCII very safe chars |
72 | : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s |
73 | # EBCDIC very safe chars |
74 | )) { |
75 | # normal case -- all very safe chars |
76 | $c[-1] =~ s/'/\\'/g; |
77 | push @code, q{ '} . $c[-1] . "',\n"; |
78 | $c[-1] = ''; # reuse this slot |
79 | } else { |
80 | push @code, ' $c[' . $#c . "],\n"; |
81 | push @c, ''; # new chunk |
82 | } |
83 | } |
84 | # else just ignore the empty string. |
85 | } |
86 | |
87 | } elsif($1 eq ']') { # "]" |
88 | # close group -- go back in-band |
89 | if($in_group) { |
90 | $in_group = 0; |
91 | |
92 | print " --Closing group [$c[-1]]\n" if DEBUG > 2; |
93 | |
94 | # And now process the group... |
95 | |
96 | if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { |
97 | DEBUG > 2 and print " -- (Ignoring)\n"; |
98 | $c[-1] = ''; # reset out chink |
99 | next; |
100 | } |
101 | |
102 | #$c[-1] =~ s/^\s+//s; |
103 | #$c[-1] =~ s/\s+$//s; |
104 | ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/ |
105 | |
106 | # A bit of a hack -- we've turned "~,"'s into DELs, so turn |
107 | # 'em into real commas here. |
108 | if (ord('A') == 65) { # ASCII, etc |
109 | foreach($m, @params) { tr/\x7F/,/ } |
110 | } else { # EBCDIC (1047, 0037, POSIX-BC) |
111 | # Thanks to Peter Prymmer for the EBCDIC handling |
112 | foreach($m, @params) { tr/\x07/,/ } |
113 | } |
114 | |
115 | # Special-case handling of some method names: |
116 | if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) { |
117 | # Treat [_1,...] as [,_1,...], etc. |
118 | unshift @params, $m; |
119 | $m = ''; |
120 | } elsif($m eq '*') { |
121 | $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" |
122 | } elsif($m eq '#') { |
123 | $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" |
124 | } |
125 | |
126 | # Most common case: a simple, legal-looking method name |
127 | if($m eq '') { |
128 | # 0-length method name means to just interpolate: |
129 | push @code, ' ('; |
130 | } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s |
131 | and $m !~ m<(?:^|\:)\d>s |
132 | # exclude starting a (sub)package or symbol with a digit |
133 | ) { |
134 | # Yes, it even supports the demented (and undocumented?) |
135 | # $obj->Foo::bar(...) syntax. |
136 | $target->_die_pointing( |
137 | $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method", |
138 | 2 + length($c[-1]) |
139 | ) |
140 | if $m =~ m/^SUPER::/s; |
141 | # Because for SUPER:: to work, we'd have to compile this into |
142 | # the right package, and that seems just not worth the bother, |
143 | # unless someone convinces me otherwise. |
144 | |
145 | push @code, ' $_[0]->' . $m . '('; |
146 | } else { |
147 | # TODO: implement something? or just too icky to consider? |
148 | $target->_die_pointing( |
149 | $_[1], |
150 | "Can't use \"$m\" as a method name in bracket group", |
151 | 2 + length($c[-1]) |
152 | ); |
153 | } |
154 | |
155 | pop @c; # we don't need that chunk anymore |
156 | ++$call_count; |
157 | |
158 | foreach my $p (@params) { |
159 | if($p eq '_*') { |
160 | # Meaning: all parameters except $_[0] |
161 | $code[-1] .= ' @_[1 .. $#_], '; |
162 | # and yes, that does the right thing for all @_ < 3 |
163 | } elsif($p =~ m<^_(-?\d+)$>s) { |
164 | # _3 meaning $_[3] |
165 | $code[-1] .= '$_[' . (0 + $1) . '], '; |
166 | } elsif($USE_LITERALS and ( |
167 | (ord('A') == 65) |
168 | ? $p !~ m<[^\x20-\x7E]>s |
169 | # ASCII very safe chars |
170 | : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s |
171 | # EBCDIC very safe chars |
172 | )) { |
173 | # Normal case: a literal containing only safe characters |
174 | $p =~ s/'/\\'/g; |
175 | $code[-1] .= q{'} . $p . q{', }; |
176 | } else { |
177 | # Stow it on the chunk-stack, and just refer to that. |
178 | push @c, $p; |
179 | push @code, ' $c[' . $#c . "], "; |
180 | } |
181 | } |
182 | $code[-1] .= "),\n"; |
183 | |
184 | push @c, ''; |
185 | } else { |
186 | $target->_die_pointing($_[1], "Unbalanced ']'"); |
187 | } |
188 | |
189 | } elsif(substr($1,0,1) ne '~') { |
190 | # it's stuff not containing "~" or "[" or "]" |
191 | # i.e., a literal blob |
192 | $c[-1] .= $1; |
193 | |
194 | } elsif($1 eq '~~') { # "~~" |
195 | $c[-1] .= '~'; |
196 | |
197 | } elsif($1 eq '~[') { # "~[" |
198 | $c[-1] .= '['; |
199 | |
200 | } elsif($1 eq '~]') { # "~]" |
201 | $c[-1] .= ']'; |
202 | |
203 | } elsif($1 eq '~,') { # "~," |
204 | if($in_group) { |
205 | # This is a hack, based on the assumption that no-one will actually |
206 | # want a DEL inside a bracket group. Let's hope that's it's true. |
207 | if (ord('A') == 65) { # ASCII etc |
208 | $c[-1] .= "\x7F"; |
209 | } else { # EBCDIC (cp 1047, 0037, POSIX-BC) |
210 | $c[-1] .= "\x07"; |
211 | } |
212 | } else { |
213 | $c[-1] .= '~,'; |
214 | } |
215 | |
216 | } elsif($1 eq '~') { # possible only at string-end, it seems. |
217 | $c[-1] .= '~'; |
218 | |
219 | } else { |
220 | # It's a "~X" where X is not a special character. |
221 | # Consider it a literal ~ and X. |
222 | $c[-1] .= $1; |
223 | } |
224 | } |
225 | } |
226 | |
227 | if($call_count) { |
228 | undef $big_pile; # Well, nevermind that. |
229 | } else { |
230 | # It's all literals! Ahwell, that can happen. |
231 | # So don't bother with the eval. Return a SCALAR reference. |
232 | return \$big_pile; |
233 | } |
234 | |
235 | die "Last chunk isn't null??" if @c and length $c[-1]; # sanity |
236 | print scalar(@c), " chunks under closure\n" if DEBUG; |
237 | if(@code == 0) { # not possible? |
238 | print "Empty code\n" if DEBUG; |
239 | return \''; |
240 | } elsif(@code > 1) { # most cases, presumably! |
241 | unshift @code, "join '',\n"; |
242 | } |
243 | unshift @code, "use strict; sub {\n"; |
244 | push @code, "}\n"; |
245 | |
246 | print @code if DEBUG; |
247 | my $sub = eval(join '', @code); |
248 | die "$@ while evalling" . join('', @code) if $@; # Should be impossible. |
249 | return $sub; |
250 | } |
251 | |
252 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
253 | |
254 | sub _die_pointing { |
255 | # This is used by _compile to throw a fatal error |
256 | my $target = shift; # class name |
257 | # ...leaving $_[0] the error-causing text, and $_[1] the error message |
258 | |
259 | my $i = index($_[0], "\n"); |
260 | |
261 | my $pointy; |
262 | my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; |
263 | if($pos < 1) { |
264 | $pointy = "^=== near there\n"; |
265 | } else { # we need to space over |
266 | my $first_tab = index($_[0], "\t"); |
267 | if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { |
268 | # No tabs, or the first tab is harmlessly after where we will point to, |
269 | # AND we're far enough from the margin that we can draw a proper arrow. |
270 | $pointy = ('=' x $pos) . "^ near there\n"; |
271 | } else { |
272 | # tabs screw everything up! |
273 | $pointy = substr($_[0],0,$pos); |
274 | $pointy =~ tr/\t //cd; |
275 | # make everything into whitespace, but preseving tabs |
276 | $pointy .= "^=== near there\n"; |
277 | } |
278 | } |
279 | |
280 | my $errmsg = "$_[1], in\:\n$_[0]"; |
281 | |
282 | if($i == -1) { |
283 | # No newline. |
284 | $errmsg .= "\n" . $pointy; |
285 | } elsif($i == (length($_[0]) - 1) ) { |
286 | # Already has a newline at end. |
287 | $errmsg .= $pointy; |
288 | } else { |
289 | # don't bother with the pointy bit, I guess. |
290 | } |
291 | Carp::croak( "$errmsg via $target, as used" ); |
292 | } |
293 | |
294 | 1; |
295 | |