|NULLOK const regnode *plast \
|NN SV* sv|I32 indent|U32 depth
Es |void |put_byte |NN SV* sv|int c
-Es |void |dump_trie |NN const struct _reg_trie_data *trie|U32 depth
-Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie|U32 next_alloc|U32 depth
-Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie|U32 next_alloc|U32 depth
+Es |void |dump_trie |NN const struct _reg_trie_data *trie\
+ |NULLOK HV* widecharmap|U32 depth
+Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
+ |NULLOK HV* widecharmap|U32 next_alloc|U32 depth
+Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
+ |NULLOK HV* widecharmap|U32 next_alloc|U32 depth
Es |U8 |regtail_study |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth
# endif
#endif
#if defined(PERL_CORE) || defined(PERL_EXT)
#define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
#define put_byte(a,b) S_put_byte(aTHX_ a,b)
-#define dump_trie(a,b) S_dump_trie(aTHX_ a,b)
-#define dump_trie_interim_list(a,b,c) S_dump_trie_interim_list(aTHX_ a,b,c)
-#define dump_trie_interim_table(a,b,c) S_dump_trie_interim_table(aTHX_ a,b,c)
+#define dump_trie(a,b,c) S_dump_trie(aTHX_ a,b,c)
+#define dump_trie_interim_list(a,b,c,d) S_dump_trie_interim_list(aTHX_ a,b,c,d)
+#define dump_trie_interim_table(a,b,c,d) S_dump_trie_interim_table(aTHX_ a,b,c,d)
#define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
#endif
# endif
STATIC void S_put_byte(pTHX_ SV* sv, int c)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, U32 depth)
+STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 depth)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth)
__attribute__nonnull__(pTHX_1);
STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *state, regnode *p, const regnode *val, U32 depth)
#ifdef DEBUGGING
/*
- dump_trie(trie)
- dump_trie_interim_list(trie,next_alloc)
- dump_trie_interim_table(trie,next_alloc)
+ dump_trie(trie,widecharmap)
+ dump_trie_interim_list(trie,widecharmap,next_alloc)
+ dump_trie_interim_table(trie,widecharmap,next_alloc)
These routines dump out a trie in a somewhat readable format.
The _interim_ variants are used for debugging the interim
*/
/*
- dump_trie(trie)
Dumps the final compressed table form of the trie to Perl_debug_log.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
+S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
}
}
/*
- dump_trie_interim_list(trie,next_alloc)
Dumps a fully constructed but uncompressed trie in list form.
List tries normally only are used for construction when the number of
possible chars (trie->uniquecharcount) is very high.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc,U32 depth)
+S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
+ HV *widecharmap, U32 next_alloc, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
/* print out the table precompression. */
PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s",
}
/*
- dump_trie_interim_table(trie,next_alloc)
Dumps a fully constructed but uncompressed trie in table form.
This is the normal DFA style state transition table, with a few
twists to facilitate compression later.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
+ HV *widecharmap, U32 next_alloc, U32 depth)
{
U32 state;
U16 charid;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
/*
dVAR;
/* first pass, loop through and scan words */
reg_trie_data *trie;
+ HV *widecharmap = NULL;
regnode *cur;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
STRLEN len = 0;
)
);
- const U32 data_slot = add_data( pRExC_state, 1, "t" );
+ const U32 data_slot = add_data( pRExC_state, 2, "tu" );
SV *re_trie_maxbuff;
#ifndef DEBUGGING
/* these are only used during construction but are useful during
}
} else {
SV** svpp;
- if ( !trie->widecharmap )
- trie->widecharmap = newHV();
+ if ( !widecharmap )
+ widecharmap = newHV();
- svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 );
+ svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
if ( !svpp )
Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
DEBUG_TRIE_COMPILE_r(
PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
(int)depth * 2 + 2,"",
- ( trie->widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
+ ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
(int)trie->minlen, (int)trie->maxlen )
);
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
} else {
- SV** const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
+ SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
if ( !svpp ) {
charid = 0;
} else {
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_list(trie,next_alloc,depth+1)
+ dump_trie_interim_list(trie,widecharmap,next_alloc,depth+1)
);
trie->trans
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
} else {
- SV* const * const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
+ SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
charid = svpp ? (U16)SvIV(*svpp) : 0;
}
if ( charid ) {
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_table(trie,next_alloc,depth+1)
+ dump_trie_interim_table(trie,widecharmap,next_alloc,depth+1)
);
{
/* and now dump out the compressed format */
DEBUG_TRIE_COMPILE_r(
- dump_trie(trie,depth+1)
+ dump_trie(trie,widecharmap,depth+1)
);
{ /* Modify the program and insert the new TRIE node*/
/* But first we check to see if there is a common prefix we can
split out as an EXACT and put in front of the TRIE node. */
trie->startstate= 1;
- if ( trie->bitmap && !trie->widecharmap && !trie->jump ) {
+ if ( trie->bitmap && !widecharmap && !trie->jump ) {
U32 state;
for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
U32 ofs = 0;
Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
});
} /* end node insert */
+ RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
#ifndef DEBUGGING
SvREFCNT_dec(TRIE_REVCHARMAP(trie));
#endif
switch (ri->data->what[n]) {
case 's':
case 'S':
+ case 'u':
SvREFCNT_dec((SV*)ri->data->data[n]);
break;
case 'f':
OP_REFCNT_UNLOCK;
if ( !refcount ) {
PerlMemShared_free(trie->charmap);
- if (trie->widecharmap)
- SvREFCNT_dec((SV*)trie->widecharmap);
PerlMemShared_free(trie->states);
PerlMemShared_free(trie->trans);
if (trie->bitmap)
for (i = 0; i < count; i++) {
d->what[i] = ri->data->what[i];
switch (d->what[i]) {
- /* legal options are one of: sSfpontT
+ /* legal options are one of: sSfpontTu
see also regcomp.h and pregfree() */
case 's':
case 'S':
case 'p': /* actually an AV, but the dup function is identical. */
+ case 'u': /* actually an HV, but the dup function is identical. */
d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
break;
case 'f':
* strings resulting from casefolding the single-character entries
* in the character class
* t - trie struct
+ * u - trie struct's widecharmap (a HV, so can't share, must dup)
* T - aho-trie struct
* S - sv for named capture lookup
* 20010712 mjd@plover.com
U16 uniquecharcount; /* unique chars in trie (width of trans table) */
U32 lasttrans; /* last valid transition element */
U16 *charmap; /* byte to charid lookup array */
- HV *widecharmap; /* code points > 255 to charid */
reg_trie_state *states; /* state data */
reg_trie_trans *trans; /* array of transition elements */
char *bitmap; /* stclass bitmap */
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
-foldlen, foldbuf, uniflags) STMT_START { \
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
+uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
switch (trie_type) { \
case trie_utf8_fold: \
if ( foldlen>0 ) { \
} \
else { \
charid = 0; \
- if (trie->widecharmap) { \
- SV** const svpp = hv_fetch(trie->widecharmap, \
+ if (widecharmap) { \
+ SV** const svpp = hv_fetch(widecharmap, \
(char*)&uvc, sizeof(UV), 0); \
if (svpp) \
charid = (U16)SvIV(*svpp); \
= (reg_ac_data*)progi->data->data[ ARG( c ) ];
reg_trie_data *trie
= (reg_trie_data*)progi->data->data[ aho->trie ];
+ HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
const char *last_start = strend - trie->minlen;
#ifdef DEBUGGING
}
points[pointpos++ % maxlen]= uc;
- REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
- uvc, charid, foldlen, foldbuf, uniflags);
+ REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+ uscan, len, uvc, charid, foldlen,
+ foldbuf, uniflags);
DEBUG_TRIE_EXECUTE_r({
dump_exec_pos( (char *)uc, c, strend, real_start,
s, do_utf8 );
/* what trie are we using right now */
reg_trie_data * const trie
= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
+ HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
U32 state = trie->startstate;
if (trie->bitmap && trie_type != trie_utf8_fold &&
});
if ( base ) {
- REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
- uvc, charid, foldlen, foldbuf, uniflags);
+ REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+ uscan, len, uvc, charid, foldlen,
+ foldbuf, uniflags);
if (charid &&
(base + charid > trie->uniquecharcount )