From: Jarkko Hietaniemi Date: Wed, 28 Feb 2001 16:22:26 +0000 (+0000) Subject: Undo qu. Retract #8814, rewrite op/each part of #8615, X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=945c54fd8d2501611a8e97dae49e901ff9478cad;p=p5sagit%2Fp5-mst-13.2.git Undo qu. Retract #8814, rewrite op/each part of #8615, retract toke.c/qu parts of #8583, retract #8485, retract or rewrite qu parts of #8439 of toke.c, keywords.h, keywords.pl, op/length.t, and MANIFEST, and delete t/op/qu.t. p4raw-id: //depot/perl@8967 --- diff --git a/MANIFEST b/MANIFEST index 4eda166..b2099dc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1596,7 +1596,6 @@ t/op/pat.t See if esoteric patterns work t/op/pos.t See if pos works t/op/push.t See if push and pop work t/op/pwent.t See if getpw*() functions work -t/op/qu.t See if qu works t/op/quotemeta.t See if quotemeta works t/op/rand.t See if rand works t/op/range.t See if .. works diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index c75818e..725b50e 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -96,9 +96,8 @@ than one place. =item Functions for SCALARs or strings C, C, C, C, C, C, C, C, -C, C, C, C, C, C, C, -C, C, C, C, C, C, C, -C +C, C, C, C, C, C, C, +C, C, C, C, C, C, C =item Regular expressions and pattern matching @@ -3463,12 +3462,10 @@ but is more efficient. Returns the new number of elements in the array. =item qr/STRING/ -=item qu/STRING/ +=item qx/STRING/ =item qw/STRING/ -=item qx/STRING/ - Generalized quotes. See L. =item quotemeta EXPR diff --git a/pod/perlop.pod b/pod/perlop.pod index 2bc889d..8f2ecde 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -645,7 +645,6 @@ any pair of delimiters you choose. Customary Generic Meaning Interpolates '' q{} Literal no "" qq{} Literal yes - qu{} Literal yes (UTF-8, see below) `` qx{} Command yes (unless '' is delimiter) qw{} Word list no // m{} Pattern match yes (unless '' is delimiter) @@ -1012,48 +1011,6 @@ Options are: See L for additional information on valid syntax for STRING, and for a detailed look at the semantics of regular expressions. -=item qw/STRING/ - -Evaluates to a list of the words extracted out of STRING, using embedded -whitespace as the word delimiters. It can be understood as being roughly -equivalent to: - - split(' ', q/STRING/); - -the difference being that it generates a real list at compile time. So -this expression: - - qw(foo bar baz) - -is semantically equivalent to the list: - - 'foo', 'bar', 'baz' - -Some frequently seen examples: - - use POSIX qw( setlocale localeconv ) - @EXPORT = qw( foo bar baz ); - -A common mistake is to try to separate the words with comma or to -put comments into a multi-line C-string. For this reason, the -C pragma and the B<-w> switch (that is, the C<$^W> variable) -produces warnings if the STRING contains the "," or the "#" character. - -=item qu/STRING/ - -Like L but explicitly generates UTF-8 from the \0ooo, \xHH, and -\x{HH} constructs if the code point is in the 0x80..0xff range (and -of course for the 0x100.. range). - -Normally you do not need to use this because whether characters are -internally encoded in UTF-8 should be transparent, and you can just -just use qq, also known as "". - -(In qq/STRING/ the \0ooo, \xHH, and the \x{HHH...} constructs -generate bytes for the 0x80..0xff range. For the whole 0x00..0xff -range the generated bytes are host-dependent: in ISO 8859-1 they will -be ISO 8859-1, in EBCDIC they will EBCDIC, and so on.) - =item qx/STRING/ =item `STRING` @@ -1135,6 +1092,33 @@ Just understand what you're getting yourself into. See L<"I/O Operators"> for more discussion. +=item qw/STRING/ + +Evaluates to a list of the words extracted out of STRING, using embedded +whitespace as the word delimiters. It can be understood as being roughly +equivalent to: + + split(' ', q/STRING/); + +the difference being that it generates a real list at compile time. So +this expression: + + qw(foo bar baz) + +is semantically equivalent to the list: + + 'foo', 'bar', 'baz' + +Some frequently seen examples: + + use POSIX qw( setlocale localeconv ) + @EXPORT = qw( foo bar baz ); + +A common mistake is to try to separate the words with comma or to +put comments into a multi-line C-string. For this reason, the +C pragma and the B<-w> switch (that is, the C<$^W> variable) +produces warnings if the STRING contains the "," or the "#" character. + =item s/PATTERN/REPLACEMENT/egimosx Searches a string for a pattern, and if found, replaces that pattern diff --git a/pod/perlre.pod b/pod/perlre.pod index 02dd2cd..ce2b9bd 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -179,7 +179,6 @@ In addition, Perl defines the following: \X Match eXtended Unicode "combining character sequence", equivalent to C<(?:\PM\pM*)> \C Match a single C char (octet) even under utf8. - (Currently this does not work correctly.) A C<\w> matches a single alphanumeric character or C<_>, not a whole word. Use C<\w+> to match a string of Perl-identifier characters (which isn't diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index b8bbc57..30a4482 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -16,8 +16,7 @@ The following areas need further work. There is currently no easy way to mark data read from a file or other external source as being utf8. This will be one of the major areas of -focus in the near future. Unfortunately it is unlikely that the Perl -5.6 and earlier will ever gain this capability. +focus in the near future. =item Regular Expressions @@ -67,8 +66,7 @@ or from literals and constants in the source text. If the C<-C> command line switch is used, (or the ${^WIDE_SYSTEM_CALLS} global flag is set to C<1>), all system calls will use the corresponding wide character APIs. This is currently only implemented -on Windows as other platforms do not have a unified way of handling -wide character APIs. +on Windows. Regardless of the above, the C pragma can always be used to force byte semantics in a particular lexical scope. See L. @@ -129,7 +127,8 @@ attempt to canonicalize variable names for you.) Regular expressions match characters instead of bytes. For instance, "." matches a character instead of a byte. (However, the C<\C> pattern -is available to force a match a single byte ("C" in C, hence C<\C>).) +is provided to force a match a single byte ("C" in C, hence +C<\C>).) =item * @@ -217,10 +216,7 @@ And finally, C reverses by character rather than by byte. =head2 Character encodings for input and output -This feature is in the process of getting implemented. - -(For Perl 5.6 and earlier the support is unlikely to get integrated -to the core language and some external module will be required.) +[XXX: This feature is not yet implemented.] =head1 CAVEATS diff --git a/t/op/each.t b/t/op/each.t index 397176a..2e80dcd 100755 --- a/t/op/each.t +++ b/t/op/each.t @@ -163,7 +163,7 @@ print "ok 23\n"; print "#$u{$_}\n" for keys %u; # Used to core dump before change #8056. print "ok 24\n"; -$d = qu"\xe3\x81\x82"; +$d = pack("U*", 0xe3, 0x81, 0x82); %u = ($d => "downgrade"); for (keys %u) { use bytes; @@ -172,6 +172,6 @@ for (keys %u) { } { use bytes; - print "not " if length($d) ne 6 or $d ne qu"\xe3\x81\x82"; + print "not " if length($d) ne 6; print "ok 26\n"; } diff --git a/t/op/length.t b/t/op/length.t index 46f0c59..df80fcd 100644 --- a/t/op/length.t +++ b/t/op/length.t @@ -33,7 +33,7 @@ print "ok 3\n"; } { - my $a = qu"\x{80}"; # make "\x{80}" to produce UTF-8 + my $a = pack("U", 0x80); print "not " unless length($a) == 1; print "ok 6\n"; diff --git a/t/op/qu.t b/t/op/qu.t deleted file mode 100644 index 2800204..0000000 --- a/t/op/qu.t +++ /dev/null @@ -1,24 +0,0 @@ -print "1..6\n"; - -my $foo = "foo"; - -print "not " unless qu(abc$foo) eq "abcfoo"; -print "ok 1\n"; - -# qu is always Unicode, even in EBCDIC, so \x41 is 'A' and \x{61} is 'a'. - -print "not " unless qu(abc\x41) eq "abcA"; -print "ok 2\n"; - -print "not " unless qu(abc\x{61}$foo) eq "abcafoo"; -print "ok 3\n"; - -print "not " unless qu(\x{41}\x{100}\x61\x{200}) eq "A\x{100}a\x{200}"; -print "ok 4\n"; - -print "not " unless join(" ", unpack("C*", qu(\x80))) eq "194 128"; -print "ok 5\n"; - -print "not " unless join(" ", unpack("C*", qu(\x{100}))) eq "196 128"; -print "ok 6\n"; - diff --git a/toke.c b/toke.c index f8d7145..26507a7 100644 --- a/toke.c +++ b/toke.c @@ -4744,11 +4744,7 @@ Perl_yylex(pTHX) TOKEN('('); case KEY_qq: - case KEY_qu: s = scan_str(s,FALSE,FALSE); - if (tmp == KEY_qu && - is_utf8_string((U8*)SvPVX(PL_lex_stuff), SvCUR(PL_lex_stuff))) - SvUTF8_on(PL_lex_stuff); if (!s) missingterm((char*)0); yylval.ival = OP_STRINGIFY; @@ -5581,7 +5577,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"q")) return KEY_q; if (strEQ(d,"qr")) return KEY_qr; if (strEQ(d,"qq")) return KEY_qq; - if (strEQ(d,"qu")) return KEY_qu; if (strEQ(d,"qw")) return KEY_qw; if (strEQ(d,"qx")) return KEY_qx; }