*
**** Alterations to Henry's code are...
****
- **** Copyright (c) 1991-2001, Larry Wall
+ **** Copyright (c) 1991-2002, Larry Wall
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
}
}
- if (UTF && OP(scan) == EXACTF) {
+ if (UTF && OP(scan) == EXACTF && STR_LEN(scan) >= 6) {
/*
Two problematic code points in Unicode casefolding of EXACT nodes:
/* FALL THROUGH*/
case '?': /* (??...) */
logical = 1;
+ if (*RExC_parse != '{')
+ goto unknown;
paren = *RExC_parse++;
/* FALL THROUGH */
case '{': /* (?{...}) */
if (len)
p = oldp;
else if (UTF) {
+ STRLEN unilen;
+
if (FOLD) {
/* Emit all the Unicode characters. */
for (foldbuf = tmpbuf;
foldlen;
foldlen -= numlen) {
ender = utf8_to_uvchr(foldbuf, &numlen);
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen;
- foldbuf += numlen;
+ if (numlen > 0) {
+ reguni(pRExC_state, ender, s, &unilen);
+ s += unilen;
+ len += unilen;
+ /* In EBCDIC the numlen
+ * and unilen can differ. */
+ foldbuf += numlen;
+ if (numlen >= foldlen)
+ break;
+ }
+ else
+ break; /* "Can't happen." */
}
}
else {
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen;
+ reguni(pRExC_state, ender, s, &unilen);
+ if (unilen > 0) {
+ s += unilen;
+ len += unilen;
+ }
}
}
else {
break;
}
if (UTF) {
+ STRLEN unilen;
+
if (FOLD) {
/* Emit all the Unicode characters. */
for (foldbuf = tmpbuf;
foldlen;
foldlen -= numlen) {
ender = utf8_to_uvchr(foldbuf, &numlen);
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen;
- foldbuf += numlen;
+ if (numlen > 0) {
+ reguni(pRExC_state, ender, s, &unilen);
+ len += unilen;
+ s += unilen;
+ /* In EBCDIC the numlen
+ * and unilen can differ. */
+ foldbuf += numlen;
+ if (numlen >= foldlen)
+ break;
+ }
+ else
+ break;
}
}
else {
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen;
+ reguni(pRExC_state, ender, s, &unilen);
+ if (unilen > 0) {
+ s += unilen;
+ len += unilen;
+ }
}
len--;
}
STATIC void
S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
{
- if (!SIZE_ONLY && ckWARN(WARN_REGEXP) &&
- POSIXCC(UCHARAT(RExC_parse))) {
+ if (!SIZE_ONLY && POSIXCC(UCHARAT(RExC_parse))) {
char *s = RExC_parse;
char c = *s++;
nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
- if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && POSIXCC(nextvalue))
+ if (!SIZE_ONLY && POSIXCC(nextvalue))
checkposixcc(pRExC_state);
/* allow 1st char to be ] (allowing it to be - is dealt with later) */
ANYOF_BITMAP_SET(ret, i);
}
if (value > 255 || UTF) {
+ UV prevnatvalue = NATIVE_TO_UNI(prevvalue);
+ UV natvalue = NATIVE_TO_UNI(value);
+
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
- if (prevvalue < value)
+ if (prevnatvalue < natvalue) { /* what about > ? */
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
- (UV)prevvalue, (UV)value);
- else if (prevvalue == value) {
- Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
- (UV)value);
+ prevnatvalue, natvalue);
+ }
+ else if (prevnatvalue == natvalue) {
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
if (FOLD) {
- U8 tmpbuf [UTF8_MAXLEN+1];
U8 foldbuf[UTF8_MAXLEN_FOLD+1];
STRLEN foldlen;
- UV f;
-
- uvchr_to_utf8(tmpbuf, value);
- to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- f = utf8_to_uvchr(foldbuf, 0);
+ UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
/* If folding and foldable and a single
* character, insert also the folded version