From: Gurusamy Sarathy Date: Sun, 28 Feb 1999 21:47:18 +0000 (+0000) Subject: todo item: permit extended control variables a la ${^Foo} (patch X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2b92dfceaa9d709661beb0761c3c790732df0cbc;p=p5sagit%2Fp5-mst-13.2.git todo item: permit extended control variables a la ${^Foo} (patch courtesy Mark-Jason Dominus ) p4raw-id: //depot/perl@3039 --- diff --git a/op.c b/op.c index d0f139b..fae524e 100644 --- a/op.c +++ b/op.c @@ -129,7 +129,21 @@ pad_allocmy(char *name) name[1] == '_' && (int)strlen(name) > 2)) { if (!isPRINT(name[1])) { - name[3] = '\0'; + /* 1999-02-27 mjd@plover.com */ + char *p; + p = strchr(name, '\0'); + /* The next block assumes the buffer is at least 205 chars + long. At present, it's always at least 256 chars. */ + if (p-name > 200) { + strcpy(name+200, "..."); + p = name+199; + } + else { + p[1] = '\0'; + } + /* Move everything else down one character */ + for (; p-name > 2; p--) + *p = *(p-1); name[2] = toCTRL(name[1]); name[1] = '^'; } diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 0c0520c..d73efcf 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -161,6 +161,28 @@ strings. See L. The new format type modifer '_' is useful for packing and unpacking native shorts, ints, and longs. See L. +=head2 $^X variables may now have names longer than one character + +Formerly, $^X was synonymous with ${"\cX"}, but $^XY was a syntax +error. Now variable names that begin with a control character may be +arbitrarily long. However, for compatibility reasons, these variables +I be written with explicit braces, as C<${^XY}> for example. +C<${^XYZ}> is synonymous with ${"\cXYZ"}. Variable names with more +than one control character, such as C<${^XY^Z}>, are illegal. + +The old syntax has not changed. As before, the `^X' may either be a +literal control-X character or the two character sequence `caret' plus +`X'. When the braces are omitted, the variable name stops after the +control character. Thus C<"$^XYZ"> continues to be synonymous with +C<$^X . "YZ"> as before. + +As before, lexical variables may not have names beginning with control +characters. As before, variables whose names begin with a control +character are always forced to be in package `main'. These variables +are all reserved for future extensions, except the ones that begin +with C<^_>, which may be used by user programs and will not acquire a +special meaning in any future version of Perl. + =head1 Significant bug fixes =head2 EHANDLEE on empty files diff --git a/pod/perltodo.pod b/pod/perltodo.pod index 11956a9..4b5a506 100644 --- a/pod/perltodo.pod +++ b/pod/perltodo.pod @@ -117,11 +117,6 @@ Verify complete 64 bit support so that the value of sysseek, or C<-s>, or stat(), or tell can fit into a perl number without losing precision. Work with the perl-64bit mailing list on perl.org. -=head2 Figure a way out of $^(capital letter) - -Figure out a clean way to extend $^(capital letter) beyond -the 26 alphabets. (${^WORD} maybe?) - =head2 Prototypes =over 4 @@ -842,6 +837,13 @@ Can we install modules as bytecode? =head1 Recently Finished Tasks +=head2 Figure a way out of $^(capital letter) + +Figure out a clean way to extend $^(capital letter) beyond +the 26 alphabets. (${^WORD} maybe?) + +Mark-Jason Dominus sent a patch which went into 5.005_56. + =head2 Filenames Make filenames in the distribution and in the standard module set diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 2dafc1c..5c851d9 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -1005,3 +1005,51 @@ pipe C, overwriting the old value. For more details, see the individual descriptions at L<$@>, L<$!>, L<$^E>, and L<$?>. + + +=head2 Technical Note on the Syntax of Variable Names + +Variable names in Perl can have several formats. Usually, they must +begin with a letter or underscore, in which case they can be +arbitrarily long (up to an internal limit of 256 characters) and may +contain letters, digits, underscores, or the special sequence C<::>. +In this case the part before the last C<::> is taken to be a I; see L. + +Perl variable names may also be a sequence of digits or a single +punctuation or control character. These names are all reserved for +special uses by Perl; for example, the all-digits names are used to +hold backreferences after a regulare expression match. Perl has a +special syntax for the single-control-character names: It understands +C<^X> (caret C) to mean the control-C character. For example, +the notation C<$^W> (dollar-sign caret C) is the scalar variable +whose name is the single character control-C. This is better than +typing a literal control-C into your program. + +Finally, new in Perl 5.006, Perl variable names may be alphanumeric +strings that begin with control characters. These variables must be +written in the form C<${^Foo}>; the braces are not optional. +C<${^Foo}> denotes the scalar variable whose name is a control-C +followed by two C's. These variables are reserved for future +special uses by Perl, except for the ones that begin with C<^_> +(control-underscore). No control-character name that begins with +C<^_> will acquire a special meaning in any future version of Perl; +such names may therefore be used safely in programs. C<^_> itself, +however, I reserved. + +All Perl variables that begin with digits, control characters, or +punctuation characters are exempt from the effects of the C +declaration and are always forced to be in package C
. A few +other names are also exempt: + + ENV STDIN + INC STDOUT + ARGV STDERR + ARGVOUT + SIG + +In particular, the new special C<${^_XYZ}> variables are always taken +to be in package C
regardless of any C declarations +presently in scope. + + diff --git a/t/base/lex.t b/t/base/lex.t index 045cb22..325fd42 100755 --- a/t/base/lex.t +++ b/t/base/lex.t @@ -2,7 +2,7 @@ # $RCSfile: lex.t,v $$Revision: 4.1 $$Date: 92/08/07 18:27:04 $ -print "1..30\n"; +print "1..41\n"; $x = 'x'; @@ -117,3 +117,68 @@ $foo =~ s/^not /substr(< -1; + print "ok 37\n"; +# print "($@)\n" if $@; + + eval 'my $ {^XYZ};'; + print "not " unless index ($@, 'Can\'t use global $^XYZ in "my"') > -1; + print "ok 38\n"; +# print "($@)\n" if $@; + +# Now let's make sure that caret variables are all forced into the main package. + package Someother; + $^N = 'Someother'; + $ {^Nostril} = 'Someother 2'; + $ {^M} = 'Someother 3'; + package main; + print "not " unless $^N eq 'Someother'; + print "ok 39\n"; + print "not " unless $ {^Nostril} eq 'Someother 2'; + print "ok 40\n"; + print "not " unless $ {^M} eq 'Someother 3'; + print "ok 41\n"; + + +} + + diff --git a/toke.c b/toke.c index 211f9b2..f1aca91 100644 --- a/toke.c +++ b/toke.c @@ -74,6 +74,10 @@ static char ident_too_long[] = "Identifier too long"; ? isALNUM(*(p)) \ : isALNUM_utf8((U8*)p)) +/* In variables name $^X, these are the legal values for X. + * 1999-02-27 mjd-perl-patch@plover.com */ +#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) + /* The following are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). */ @@ -5160,7 +5164,7 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 if (s < send) *d = *s++; d[1] = '\0'; - if (*d == '^' && *s && (isUPPER(*s) || strchr("[\\]^_?", *s))) { + if (*d == '^' && *s && isCONTROLVAR(*s)) { *d = toCTRL(*s); s++; } @@ -5188,8 +5192,10 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 s = e; } else { - while (isALNUM(*s) || *s == ':') + while ((isALNUM(*s) || *s == ':') && d < e) *d++ = *s++; + if (d >= e) + croak(ident_too_long); } *d = '\0'; while (s < send && (*s == ' ' || *s == '\t')) s++; @@ -5206,6 +5212,19 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; return s; } + } + /* Handle extended ${^Foo} variables + * 1999-02-27 mjd-perl-patch@plover.com */ + else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */ + && isALNUM(*s)) + { + d++; + while (isALNUM(*s) && d < e) { + *d++ = *s++; + } + if (d >= e) + croak(ident_too_long); + *d = '\0'; } if (*s == '}') { s++;