X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.h;h=0bd00e2a6dcb38b9612f797d1c508faca6b2f032;hb=43051805d53a3e4c5b2185a17655cab5bedc17ed;hp=9d07ff9d1c3c9af696f53564062df992587333d1;hpb=bbce6d69784bf43b0e69e8d312042d65f258af23;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.h b/regcomp.h index 9d07ff9..0bd00e2 100644 --- a/regcomp.h +++ b/regcomp.h @@ -1,6 +1,8 @@ /* regcomp.h */ +typedef OP OP_4tree; /* Will be redefined later. */ + /* * The "internal use only" fields in regexp.h are present to pass info from * compile to execute that permits the execute phase to run lots faster on @@ -31,6 +33,18 @@ * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.] */ +/* #ifndef gould */ +/* #ifndef cray */ +/* #ifndef eta10 */ +#define REGALIGN +/* #endif */ +/* #endif */ +/* #endif */ + +#ifdef REGALIGN +# define REGALIGN_STRUCT +#endif + /* * Structure for regexp "program". This is essentially a linear encoding * of a nondeterministic finite-state machine (aka syntax charts or @@ -76,9 +90,9 @@ #define OPEN 25 /* num Mark this point in input as start of #n. */ #define CLOSE 26 /* num Analogous to OPEN. */ #define MINMOD 27 /* no Next operator is not greedy. */ -#define GBOL 28 /* no Matches where last m//g left off. */ -#define IFMATCH 29 /* no Succeeds if the following matches. */ -#define UNLESSM 30 /* no Fails if the following matches. */ +#define GPOS 28 /* no Matches where last m//g left off. */ +#define IFMATCH 29 /* off Succeeds if the following matches. */ +#define UNLESSM 30 /* off Fails if the following matches. */ #define SUCCEED 31 /* no Return from a subroutine, basically. */ #define WHILEM 32 /* no Do curly processing and see if rest matches. */ #define ALNUM 33 /* no Match any alphanumeric character */ @@ -91,6 +105,21 @@ #define NSPACEL 40 /* no Match any non-whitespace char in locale */ #define DIGIT 41 /* no Match any numeric character */ #define NDIGIT 42 /* no Match any non-numeric character */ +#define CURLYM 43 /* no Match this medium-complex thing {n,m} times. */ +#define CURLYN 44 /* no Match next-after-this simple thing + {n,m} times, set parenths. */ +#define TAIL 45 /* no Match empty string. Can jump here from outside. */ +#define REFF 46 /* num Match already matched string, folded */ +#define REFFL 47 /* num Match already matched string, folded in loc. */ +#define EVAL 48 /* evl Execute some Perl code. */ +#define LONGJMP 49 /* off Jump far away, requires REGALIGN_STRUCT. */ +#define BRANCHJ 50 /* off BRANCH with long offset, requires REGALIGN_STRUCT. */ +#define IFTHEN 51 /* off Switch, should be preceeded by switcher . */ +#define GROUPP 52 /* num Whether the group matched. */ +#define LOGICAL 53 /* no Next opcode should set the flag only. */ +#define SUSPEND 54 /* off "Independent" sub-RE. */ +#define RENUM 55 /* off Group with independently numbered parens. */ +#define OPTIMIZED 56 /* off Placeholder for dump. */ /* * Opcode notes: @@ -111,25 +140,13 @@ * per match) are implemented with STAR and PLUS for speed * and to minimize recursive plunges. * - * OPEN,CLOSE ...are numbered at compile time. + * OPEN,CLOSE,GROUPP ...are numbered at compile time. */ #ifndef DOINIT -EXT char regarglen[]; -#else -EXT char regarglen[] = { - 0,0,0,0,0,0,0,0,0,0, - /*CURLY*/ 4, /*CURLYX*/ 4, - 0,0,0,0,0,0,0,0,0,0,0,0, - /*REF*/ 2, /*OPEN*/ 2, /*CLOSE*/ 2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; -#endif - -#ifndef DOINIT -EXT char regkind[]; +EXTCONST U8 regkind[]; #else -EXT char regkind[] = { +EXTCONST U8 regkind[] = { END, BOL, BOL, @@ -158,9 +175,9 @@ EXT char regkind[] = { OPEN, CLOSE, MINMOD, - BOL, - BRANCH, - BRANCH, + GPOS, + BRANCHJ, + BRANCHJ, END, WHILEM, ALNUM, @@ -173,23 +190,38 @@ EXT char regkind[] = { NSPACE, DIGIT, NDIGIT, + CURLY, + CURLY, + NOTHING, + REF, + REF, + EVAL, + LONGJMP, + BRANCHJ, + BRANCHJ, + GROUPP, + LOGICAL, + BRANCHJ, + BRANCHJ, + NOTHING, }; #endif -/* The following have no fixed length. */ +/* The following have no fixed length. char* since we do strchr on it. */ #ifndef DOINIT -EXT char varies[]; +EXTCONST char varies[]; #else -EXT char varies[] = { - BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, WHILEM, 0 +EXTCONST char varies[] = { + BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, + WHILEM, CURLYM, CURLYN, BRANCHJ, IFTHEN, SUSPEND, 0 }; #endif -/* The following always have a length of 1. */ +/* The following always have a length of 1. char* since we do strchr on it. */ #ifndef DOINIT -EXT char simple[]; +EXTCONST char simple[]; #else -EXT char simple[] = { +EXTCONST char simple[] = { ANY, SANY, ANYOF, ALNUM, ALNUML, NALNUM, NALNUML, SPACE, SPACEL, NSPACE, NSPACEL, @@ -197,8 +229,6 @@ EXT char simple[] = { }; #endif -EXT char regdummy; - /* * A node is one char of opcode followed by two chars of "next" pointer. * "Next" pointers are stored as two 8-bit pieces, high order first. The @@ -215,42 +245,121 @@ EXT char regdummy; * stored negative.] */ -#ifndef gould -#ifndef cray -#ifndef eta10 -#define REGALIGN -#endif -#endif -#endif +#ifdef REGALIGN_STRUCT -#define OP(p) (*(p)) +struct regnode_string { + U8 flags; + U8 type; + U16 next_off; + U8 string[1]; +}; + +struct regnode_1 { + U8 flags; + U8 type; + U16 next_off; + U32 arg1; +}; + +struct regnode_2 { + U8 flags; + U8 type; + U16 next_off; + U16 arg1; + U16 arg2; +}; + +#endif + +/* I16_MAX is no good for REG_INFTY because sizeof(short) > 2 + * is perfectly fine. In Cray C90 sizeof(short) == 4, + * in Cray T90 sizeof(short) == 8. */ +#define REG_INFTY ((1<<15)-1) -#ifndef lint #ifdef REGALIGN -#define NEXT(p) (*(short*)(p+1)) -#define ARG1(p) (*(unsigned short*)(p+3)) -#define ARG2(p) (*(unsigned short*)(p+5)) +# define ARG_VALUE(arg) (arg) +# define ARG__SET(arg,val) ((arg) = (val)) #else -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -#define ARG1(p) (((*((p)+3)&0377)<<8) + (*((p)+4)&0377)) -#define ARG2(p) (((*((p)+5)&0377)<<8) + (*((p)+6)&0377)) +# define ARG_VALUE(arg) (((*((char*)&arg)&0377)<<8) + (*(((char*)&arg)+1)&0377)) +# define ARG__SET(arg,val) (((char*)&arg)[0] = (val) >> 8; ((char*)&arg)[1] = (val) & 0377;) #endif + +#define ARG(p) ARG_VALUE(ARG_LOC(p)) +#define ARG1(p) ARG_VALUE(ARG1_LOC(p)) +#define ARG2(p) ARG_VALUE(ARG2_LOC(p)) +#define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val)) +#define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val)) +#define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val)) + +#ifndef lint +# ifdef REGALIGN +# ifdef REGALIGN_STRUCT +# define NEXT_OFF(p) ((p)->next_off) +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) ((node)->flags = 0xde) /* deadbeef */ +# else +# define NEXT_OFF(p) (*(short*)(p+1)) +# define NODE_ALIGN(node) ((!((long)node & 1)) ? node++ : 0) +# define NODE_ALIGN_FILL(node) ((!((long)node & 1)) ? *node++ = 127 : 0) +# endif +# else +# define NEXT_OFF(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) +# endif #else /* lint */ -#define NEXT(p) 0 +# define NEXT_OFF(p) 0 +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) #endif /* lint */ -#define OPERAND(p) ((p) + 3) +#define SIZE_ALIGN NODE_ALIGN + +#ifdef REGALIGN_STRUCT +# define OP(p) ((p)->type) +# define OPERAND(p) (((struct regnode_string *)p)->string) +# define NODE_ALIGN(node) +# define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) +# define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) +# define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) +# define NODE_STEP_REGNODE 1 /* sizeof(regnode)/sizeof(regnode) */ +# define EXTRA_STEP_2ARGS EXTRA_SIZE(struct regnode_2) +#else +# define OP(p) (*(p)) +# define OPERAND(p) ((p) + 3) +# define ARG_LOC(p) (*(unsigned short*)(p+3)) +# define ARG1_LOC(p) (*(unsigned short*)(p+3)) +# define ARG2_LOC(p) (*(unsigned short*)(p+5)) +typedef char* regnode; +# define NODE_STEP_REGNODE NODE_STEP_B +# define EXTRA_STEP_2ARGS 4 +#endif #ifdef REGALIGN -#define NEXTOPER(p) ((p) + 4) -#define PREVOPER(p) ((p) - 4) +# define NODE_STEP_B 4 +#else +# define NODE_STEP_B 3 +#endif + +#define NEXTOPER(p) ((p) + NODE_STEP_REGNODE) +#define PREVOPER(p) ((p) - NODE_STEP_REGNODE) + +#ifdef REGALIGN_STRUCT +# define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ + (ptr)->type = op; (ptr)->next_off = 0; (ptr)++; } STMT_END +# define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ + ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END #else -#define NEXTOPER(p) ((p) + 3) -#define PREVOPER(p) ((p) - 3) +# define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ + *(ptr)++ = op; *(ptr)++ = '\0'; *(ptr)++ = '\0'; } STMT_END +# define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ + ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 2; } STMT_END #endif #define MAGIC 0234 +#define SIZE_ONLY (regcode == ®dummy) + /* Flags for first parameter byte of ANYOF */ #define ANYOF_INVERT 0x40 #define ANYOF_FOLD 0x20 @@ -261,6 +370,20 @@ EXT char regdummy; #define ANYOF_SPACEL 0x02 #define ANYOF_NSPACEL 0x01 +/* Utility macros for bitmap of ANYOF */ +#define ANYOF_BYTE(p,c) (p)[1 + (((c) >> 3) & 31)] +#define ANYOF_BIT(c) (1 << ((c) & 7)) +#define ANYOF_SET(p,c) (ANYOF_BYTE(p,c) |= ANYOF_BIT(c)) +#define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c)) +#define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c)) + +#ifdef REGALIGN_STRUCT +#define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1) +#else +#define ANY_SKIP 32 /* overwrite the first byte of + * the next guy. */ +#endif + /* * Utility definitions. */ @@ -274,4 +397,63 @@ EXT char regdummy; #define UCHARAT(p) regdummy #endif /* lint */ -#define FAIL(m) croak("/%.127s/: %s",regprecomp,m) +#define FAIL(m) croak ("/%.127s/: %s", regprecomp,m) +#define FAIL2(pat,m) re_croak2("/%.127s/: ",pat,regprecomp,m) + +#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode)) + +#ifdef REG_COMP_C +const static U8 regarglen[] = { +# ifdef REGALIGN_STRUCT + 0,0,0,0,0,0,0,0,0,0, + /*CURLY*/ EXTRA_SIZE(struct regnode_2), + /*CURLYX*/ EXTRA_SIZE(struct regnode_2), + 0,0,0,0,0,0,0,0,0,0,0,0, + /*REF*/ EXTRA_SIZE(struct regnode_1), + /*OPEN*/ EXTRA_SIZE(struct regnode_1), + /*CLOSE*/ EXTRA_SIZE(struct regnode_1), + 0,0, + /*IFMATCH*/ EXTRA_SIZE(struct regnode_1), + /*UNLESSM*/ EXTRA_SIZE(struct regnode_1), + 0,0,0,0,0,0,0,0,0,0,0,0, + /*CURLYM*/ EXTRA_SIZE(struct regnode_2), + /*CURLYN*/ EXTRA_SIZE(struct regnode_2), + 0, + /*REFF*/ EXTRA_SIZE(struct regnode_1), + /*REFFL*/ EXTRA_SIZE(struct regnode_1), + /*EVAL*/ EXTRA_SIZE(struct regnode_1), + /*LONGJMP*/ EXTRA_SIZE(struct regnode_1), + /*BRANCHJ*/ EXTRA_SIZE(struct regnode_1), + /*IFTHEN*/ EXTRA_SIZE(struct regnode_1), + /*GROUPP*/ EXTRA_SIZE(struct regnode_1), + /*LOGICAL*/ 0, + /*SUSPEND*/ EXTRA_SIZE(struct regnode_1), + /*RENUM*/ EXTRA_SIZE(struct regnode_1), 0, +# else + 0,0,0,0,0,0,0,0,0,0, + /*CURLY*/ 4, /*CURLYX*/ 4, + 0,0,0,0,0,0,0,0,0,0,0,0, + /*REF*/ 2, /*OPEN*/ 2, /*CLOSE*/ 2, + 0,0, /*IFMATCH*/ 2, /*UNLESSM*/ 2, + 0,0,0,0,0,0,0,0,0,0,0,0,/*CURLYM*/ 4,/*CURLYN*/ 4, + 0, /*REFF*/ 2, /*REFFL*/ 2, /*EVAL*/ 2, /*LONGJMP*/ 2, /*BRANCHJ*/ 2, + /*IFTHEN*/ 2, /*GROUPP*/ 2, /*LOGICAL*/ 0, /*RENUM*/ 2, /*RENUM*/ 2, 0, +# endif +}; + +const static char reg_off_by_arg[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0 .. 15 */ + 0,0,0,0,0,0,0,0,0,0,0,0,0, /*IFMATCH*/ 2, /*UNLESSM*/ 2, 0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 32 .. 47 */ + 0, /*LONGJMP*/ 1, /*BRANCHJ*/ 1, /*IFTHEN*/ 1, 0, 0, + /*RENUM*/ 1, /*RENUM*/ 1,0, +}; +#endif + +#define REG_SEEN_ZERO_LEN 1 +#define REG_SEEN_LOOKBEHIND 2 +#define REG_SEEN_GPOS 4 + +#ifdef DEBUGGING +EXT char *colors[4]; /* not dEXT since we do EXTERN/INTERN.h shuffle */ +#endif