*/
/*
- * "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
+ * 'A fair jaw-cracker dwarf-language must be.' --Samwise Gamgee
+ *
+ * [p.285 of _The Lord of the Rings_, II/iii: "The Ring Goes South"]
*/
/* This file contains functions for compiling a regular expression. See
if ( folder )
TRIE_BITMAP_SET(trie, folder[ *ch ]);
DEBUG_OPTIMISE_r(
- PerlIO_printf(Perl_debug_log, (char*)ch)
+ PerlIO_printf(Perl_debug_log, "%s", (char*)ch)
);
}
}
#ifndef PERL_IN_XSUB_RE
REGEXP *
-Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
+Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
{
dVAR;
HV * const table = GvHV(PL_hintgv);
#endif
REGEXP *
-Perl_re_compile(pTHX_ const SV * const pattern, U32 pm_flags)
+Perl_re_compile(pTHX_ SV * const pattern, U32 pm_flags)
{
dVAR;
REGEXP *rx;
struct regexp *r;
register regexp_internal *ri;
STRLEN plen;
- char* exp = SvPV((SV*)pattern, plen);
+ char *exp = SvPV(pattern, plen);
char* xend = exp + plen;
regnode *scan;
I32 flags;
+ (sizeof(STD_PAT_MODS) - 1)
+ (sizeof("(?:)") - 1);
- p = sv_grow((SV *)rx, wraplen + 1);
+ p = sv_grow(MUTABLE_SV(rx), wraplen + 1);
SvCUR_set(rx, wraplen);
SvPOK_on(rx);
SvFLAGS(rx) |= SvUTF8(pattern);
if (flags & RXapif_FETCH) {
return reg_named_buff_fetch(rx, key, flags);
} else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
- Perl_croak(aTHX_ PL_no_modify);
+ Perl_croak(aTHX_ "%s", PL_no_modify);
return NULL;
} else if (flags & RXapif_EXISTS) {
return reg_named_buff_exists(rx, key, flags)
av_push(retarray, ret);
}
if (retarray)
- return newRV_noinc((SV*)retarray);
+ return newRV_noinc(MUTABLE_SV(retarray));
}
}
return NULL;
}
}
- return newRV_noinc((SV*)av);
+ return newRV_noinc(MUTABLE_SV(av));
}
void
PERL_UNUSED_ARG(value);
if (!PL_localizing)
- Perl_croak(aTHX_ PL_no_modify);
+ Perl_croak(aTHX_ "%s", PL_no_modify);
}
I32
goto getlen;
} else {
if (ckWARN(WARN_UNINITIALIZED))
- report_uninit((SV*)sv);
+ report_uninit((const SV *)sv);
return 0;
}
}
"panic: reg_scan_name returned NULL");
if (!RExC_paren_names) {
RExC_paren_names= newHV();
- sv_2mortal((SV*)RExC_paren_names);
+ sv_2mortal(MUTABLE_SV(RExC_paren_names));
#ifdef DEBUGGING
RExC_paren_name_list= newAV();
- sv_2mortal((SV*)RExC_paren_name_list);
+ sv_2mortal(MUTABLE_SV(RExC_paren_name_list));
#endif
}
he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
*flagp = WORST;
if (max > 0)
*flagp |= HASWIDTH;
- if (max && max < min)
+ if (max < min)
vFAIL("Can't do {n,m} with n > m");
if (!SIZE_ONLY) {
ARG1_SET(ret, (U16)min);
| PERL_SCAN_DISALLOW_PREFIX
| (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
UV cp;
- char string;
len = (STRLEN)(endbrace - name - 2);
cp = grok_hex(name + 2, &len, &fl, NULL);
if ( len != (STRLEN)(endbrace - name - 2) ) {
cp = 0xFFFD;
}
- if (cp > 0xff)
- RExC_utf8 = 1;
if ( valuep ) {
+ if (cp > 0xff) RExC_utf8 = 1;
*valuep = cp;
return NULL;
}
- string = (char)cp;
- sv_str= newSVpvn(&string, 1);
+
+ /* Need to convert to utf8 if either: won't fit into a byte, or the re
+ * is going to be in utf8 and the representation changes under utf8. */
+ if (cp > 0xff || (RExC_utf8 && ! UNI_IS_INVARIANT(cp))) {
+ U8 string[UTF8_MAXBYTES+1];
+ U8 *tmps;
+ RExC_utf8 = 1;
+ tmps = uvuni_to_utf8(string, cp);
+ sv_str = newSVpvn_utf8((char*)string, tmps - string, TRUE);
+ } else { /* Otherwise, no need for utf8, can skip that step */
+ char string;
+ string = (char)cp;
+ sv_str= newSVpvn(&string, 1);
+ }
} else {
/* fetch the charnames handler for this scope */
HV * const table = GvHV(PL_hintgv);
if (!RExC_charnames) {
/* make sure our cache is allocated */
RExC_charnames = newHV();
- sv_2mortal((SV*)RExC_charnames);
+ sv_2mortal(MUTABLE_SV(RExC_charnames));
}
/* see if we have looked this one up before */
he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
Set_Node_Cur_Length(ret); /* MJD */
RExC_parse--;
nextchar(pRExC_state);
- } else {
+ } else { /* zero length */
ret = reg_node(pRExC_state,NOTHING);
}
if (!cached) {
* used later (regexec.c:S_reginclass()). */
av_store(av, 0, listsv);
av_store(av, 1, NULL);
- av_store(av, 2, (SV*)unicode_alternate);
- rv = newRV_noinc((SV*)av);
+ av_store(av, 2, MUTABLE_SV(unicode_alternate));
+ rv = newRV_noinc(MUTABLE_SV(av));
n = add_data(pRExC_state, 1, "s");
RExC_rxi->data->data[n] = (void*)rv;
ARG_SET(ret, n);
PERL_ARGS_ASSERT_REGPROP;
- sv_setpvn(sv, "", 0);
+ sv_setpvs(sv, "");
if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */
/* It would be nice to FAIL() here, but this may be called from
}
else {
AV *list= MUTABLE_AV(progi->data->data[ progi->name_list_idx ]);
- SV *sv_dat=(SV*)progi->data->data[ ARG( o ) ];
+ SV *sv_dat= MUTABLE_SV(progi->data->data[ ARG( o ) ]);
I32 *nums=(I32*)SvPVX(sv_dat);
SV **name= av_fetch(list, nums[0], 0 );
I32 n;
else if (k == VERB) {
if (!o->flags)
Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
- SVfARG((SV*)progi->data->data[ ARG( o ) ]));
+ SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
} else if (k == LOGICAL)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
else if (k == FOLDCHAR)
else if (k == ANYOF) {
int i, rangestart = -1;
const U8 flags = ANYOF_FLAGS(o);
+ int do_sep = 0;
/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
static const char * const anyofs[] = {
"[:^alpha:]",
"[:ascii:]",
"[:^ascii:]",
- "[:ctrl:]",
- "[:^ctrl:]",
+ "[:cntrl:]",
+ "[:^cntrl:]",
"[:graph:]",
"[:^graph:]",
"[:lower:]",
sv_catpvs(sv, "-");
put_byte(sv, i - 1);
}
+ do_sep = 1;
rangestart = -1;
}
}
-
+ if (do_sep) {
+ sv_catpvs(sv,"][");
+ do_sep = 0;
+ }
+
if (o->flags & ANYOF_CLASS)
for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
- if (ANYOF_CLASS_TEST(o,i))
+ if (ANYOF_CLASS_TEST(o,i)) {
sv_catpv(sv, anyofs[i]);
+ do_sep = 1;
+ }
+
+ if (do_sep) {
+ sv_catpvs(sv,"][");
+ do_sep = 0;
+ }
if (flags & ANYOF_UNICODE)
sv_catpvs(sv, "{unicode}");
if (lv) {
if (sw) {
U8 s[UTF8_MAXBYTES_CASE+1];
-
+
for (i = 0; i <= 256; i++) { /* just the first 256 */
uvchr_to_utf8(s, i);
case 's':
case 'S':
case 'u':
- SvREFCNT_dec((SV*)ri->data->data[n]);
+ SvREFCNT_dec(MUTABLE_SV(ri->data->data[n]));
break;
case 'f':
Safefree(ri->data->data[n]);
op_free((OP_4tree*)ri->data->data[n]);
PAD_RESTORE_LOCAL(old_comppad);
- SvREFCNT_dec((SV*)new_comppad);
+ SvREFCNT_dec(MUTABLE_SV(new_comppad));
new_comppad = NULL;
break;
case 'n':
case 'S':
case 'p': /* actually an AV, but the dup function is identical. */
case 'u': /* actually an HV, but the dup function is identical. */
- d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
+ d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
break;
case 'f':
/* This is cheating. */
#endif
const regnode *nextbranch= NULL;
I32 word_idx;
- sv_setpvn(sv, "", 0);
+ sv_setpvs(sv, "");
for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);