#define isALNUM(c) (isALPHA(c) || isDIGIT(c) || (c) == '_')
#define isIDFIRST(c) (isALPHA(c) || (c) == '_')
#define isALPHA(c) (isUPPER(c) || isLOWER(c))
+/* ALPHAU includes Unicode semantics for latin1 characters. It has an extra
+ * >= AA test to speed up ASCII-only tests at the expense of the others */
+#define isALPHAU(c) (isALPHA(c) || (NATIVE_TO_UNI((U8) c) >= 0xAA \
+ && ((NATIVE_TO_UNI((U8) c) >= 0xC0 \
+ && NATIVE_TO_UNI((U8) c) != 0xD7 && NATIVE_TO_UNI((U8) c) != 0xF7) \
+ || NATIVE_TO_UNI((U8) c) == 0xAA \
+ || NATIVE_TO_UNI((U8) c) == 0xB5 \
+ || NATIVE_TO_UNI((U8) c) == 0xBA)))
+#define isALNUMU(c) (isDIGIT(c) || isALPHAU(c) || (c) == '_')
+
+/* continuation character for legal NAME in \N{NAME} */
+#define isCHARNAME_CONT(c) (isALNUMU(c) || (c) == ' ' || (c) == '-' || (c) == '(' || (c) == ')' || (c) == ':' || NATIVE_TO_UNI((U8) c) == 0xA0)
#define isSPACE(c) \
((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) =='\r' || (c) == '\f')
#define isPSXSPC(c) (isSPACE(c) || (c) == '\v')
long for Perl to handle. You have to be seriously twisted to write code
that triggers this error.
+=item Deprecated character(s) in \\N{...} starting at '%s'
+
+(D deprecated) Just about anything is legal for the C<...> in C<\N{...}>.
+But starting in 5.12, non-reasonable ones that don't look like names are
+deprecated. A reasonable name begins with an alphabetic character and
+continues with any combination of alphanumerics, dashes, spaces, parentheses or
+colons.
+
=item Deprecated use of my() in false conditional
(D deprecated) You used a declaration similar to C<my $x if 0>.
}
-plan tests => 1155; # Update this when adding/deleting tests.
+plan tests => 1159; # Update this when adding/deleting tests.
run_tests() unless caller;
undef $w;
eval 'q(syntax error) =~ /\N{MALFORMED}/';
ok $@ && $@ =~ /Malformed/, 'Verify that malformed utf8 gives an error';
+ undef $w;
+ eval 'q() =~ /\N{4F}/';
+ ok $w && $w =~ /Deprecated/, 'Verify that leading digit in name gives warning';
+ undef $w;
+ eval 'q() =~ /\N{COM,MA}/';
+ ok $w && $w =~ /Deprecated/, 'Verify that comma in name gives warning';
+ undef $w;
+ my $name = "A\x{D7}O";
+ eval "q(W) =~ /\\N{$name}/";
+ ok $w && $w =~ /Deprecated/, 'Verify that latin1 symbol in name gives warning';
+ undef $w;
+ $name = "A\x{D1}O";
+ eval "q(W) =~ /\\N{$name}/";
+ ok ! $w, 'Verify that latin1 letter in name doesnt give warning';
}
d += len;
}
SvREFCNT_dec(res);
- }
+
+ /* Deprecate non-approved name syntax */
+ if (ckWARN_d(WARN_DEPRECATED)) {
+ bool problematic = FALSE;
+ char* i = s;
+
+ /* For non-ut8 input, look to see that the first
+ * character is an alpha, then loop through the rest
+ * checking that each is a continuation */
+ if (! this_utf8) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ else for (i = s + 1; i < e; i++) {
+ if (isCHARNAME_CONT(*i)) continue;
+ problematic = TRUE;
+ break;
+ }
+ }
+ else {
+ /* Similarly for utf8. For invariants can check
+ * directly. We accept anything above the latin1
+ * range because it is immaterial to Perl if it is
+ * correct or not, and is expensive to check. But
+ * it is fairly easy in the latin1 range to convert
+ * the variants into a single character and check
+ * those */
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+ *(i+1)))))
+ {
+ problematic = TRUE;
+ }
+ }
+ if (! problematic) for (i = s + UTF8SKIP(s);
+ i < e;
+ i+= UTF8SKIP(i))
+ {
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (isCHARNAME_CONT(*i)) continue;
+ } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ continue;
+ } else if (isCHARNAME_CONT(
+ UNI_TO_NATIVE(
+ UTF8_ACCUMULATE(*i, *(i+1)))))
+ {
+ continue;
+ }
+ problematic = TRUE;
+ break;
+ }
+ }
+ if (problematic) {
+ char *string;
+ Newx(string, e - i + 1, char);
+ Copy(i, string, e - i, char);
+ string[e - i] = '\0';
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Deprecated character(s) in \\N{...} starting at '%s'",
+ string);
+ Safefree(string);
+ }
+ }
+ } /* End \N{NAME} */
#ifdef EBCDIC
if (!dorange)
native_range = FALSE; /* \N{} is defined to be Unicode */