anonymous unions aren't legal ANSI c
[p5sagit/p5-mst-13.2.git] / regexp.h
index 81552ba..a9c2130 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -1,6 +1,7 @@
 /*    regexp.h
  *
- *    Copyright (c) 1997-2002, Larry Wall
+ *    Copyright (C) 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2003,
+ *    by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -36,6 +37,9 @@ typedef struct regexp {
         struct reg_data *data; /* Additional data. */
        char *subbeg;           /* saved or original string 
                                   so \digit works forever. */
+#ifdef PERL_OLD_COPY_ON_WRITE
+        SV *saved_copy;         /* If non-NULL, SV which is COW from original */
+#endif
         U32 *offsets;           /* offset annotations 20001228 MJD */
        I32 sublen;             /* Length of string pointed by subbeg */
        I32 refcnt;
@@ -71,6 +75,7 @@ typedef struct regexp {
 #define ROPT_NAUGHTY           0x20000 /* how exponential is this pattern? */
 #define ROPT_COPY_DONE         0x40000 /* subbeg is a copy of the string */
 #define ROPT_TAINTED_SEEN      0x80000
+#define ROPT_MATCH_UTF8                0x10000000 /* subbeg is utf-8 */
 
 #define RE_USE_INTUIT_NOML     0x0100000 /* Best to intuit before matching */
 #define RE_USE_INTUIT_ML       0x0200000
@@ -99,12 +104,35 @@ typedef struct regexp {
                                         ? RX_MATCH_COPIED_on(prog) \
                                         : RX_MATCH_COPIED_off(prog))
 
+#ifdef PERL_OLD_COPY_ON_WRITE
+#define RX_MATCH_COPY_FREE(rx) \
+       STMT_START {if (rx->saved_copy) { \
+           SV_CHECK_THINKFIRST_COW_DROP(rx->saved_copy); \
+       } \
+       if (RX_MATCH_COPIED(rx)) { \
+           Safefree(rx->subbeg); \
+           RX_MATCH_COPIED_off(rx); \
+       }} STMT_END
+#else
+#define RX_MATCH_COPY_FREE(rx) \
+       STMT_START {if (RX_MATCH_COPIED(rx)) { \
+           Safefree(rx->subbeg); \
+           RX_MATCH_COPIED_off(rx); \
+       }} STMT_END
+#endif
+
+#define RX_MATCH_UTF8(prog)            ((prog)->reganch & ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_on(prog)         ((prog)->reganch |= ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_off(prog)                ((prog)->reganch &= ~ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_set(prog, t)     ((t) \
+                       ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
+                       : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+    
 #define REXEC_COPY_STR 0x01            /* Need to copy the string. */
 #define REXEC_CHECKED  0x02            /* check_substr already checked. */
 #define REXEC_SCREAM   0x04            /* use scream table. */
 #define REXEC_IGNOREPOS        0x08            /* \G matches at start. */
 #define REXEC_NOT_FIRST        0x10            /* This is another iteration of //g. */
-#define REXEC_ML       0x20            /* $* was set. */
 
 #define ReREFCNT_inc(re) ((void)(re && re->refcnt++), re)
 #define ReREFCNT_dec(re) CALLREGFREE(aTHX_ re)
@@ -118,3 +146,144 @@ typedef struct regexp {
 #define FBMrf_MULTILINE        1
 
 struct re_scream_pos_data_s;
+
+/* an accepting state/position*/
+struct _reg_trie_accepted {
+    U8   *endpos;
+    U16  wordnum;
+};
+typedef struct _reg_trie_accepted reg_trie_accepted;
+
+
+/* structures for holding and saving the state maintained by regmatch() */
+
+typedef I32 CHECKPOINT;
+
+typedef struct re_cc_state
+{
+    I32 ss;
+    regnode *node;
+    struct re_cc_state *prev;
+    struct regmatch_state *cc; /* state corresponding to the current curly */
+    regexp *re;
+} re_cc_state;
+
+
+typedef enum {
+    resume_TRIE1,
+    resume_TRIE2,
+    resume_CURLYX,
+    resume_WHILEM1,
+    resume_WHILEM2,
+    resume_WHILEM3,
+    resume_WHILEM4,
+    resume_WHILEM5,
+    resume_WHILEM6,
+    resume_CURLYM1,
+    resume_CURLYM2,
+    resume_CURLYM3,
+    resume_CURLYM4,
+    resume_IFMATCH,
+    resume_PLUS1,
+    resume_PLUS2,
+    resume_PLUS3,
+    resume_PLUS4,
+    resume_END
+} regmatch_resume_states;
+
+
+typedef struct regmatch_state {
+
+    /* these vars contain state that needs to be maintained
+     * across the main while loop ... */
+
+    regmatch_resume_states resume_state; /* where to jump to on return */
+    regnode *scan;             /* Current node. */
+    regnode *next;             /* Next node. */
+    bool minmod;               /* the next "{n.m}" is a "{n,m}?" */
+    bool sw;                   /* the condition value in (?(cond)a|b) */
+    int logical;
+    I32 unwind;                        /* savestack index of current unwind block */
+    struct regmatch_state  *cc;        /* current innermost curly state */
+    char *locinput;
+
+    /* ... while the rest of these are local to an individual branch */
+
+    I32 n;                     /* no or next */
+    I32 ln;                    /* len or last */
+
+    union {
+       struct {
+           reg_trie_accepted *accept_buff;
+           U32 accepted;       /* how many accepting states we have seen */
+       } trie;
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           CHECKPOINT lastcp;
+       } eval;
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           struct regmatch_state *outercc; /* outer CURLYX state if any */
+
+           /* these contain the current curly state, and are accessed
+            * by subsequent WHILEMs */
+           int         parenfloor;/* how far back to strip paren data */
+           int         cur;    /* how many instances of scan we've matched */
+           int         min;    /* the minimal number of scans to match */
+           int         max;    /* the maximal number of scans to match */
+           regnode *   scan;   /* the thing to match */
+           char *      lastloc;/* where we started matching this scan */
+       } curlyx;
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           CHECKPOINT lastcp;
+           struct regmatch_state *savecc;
+           char *lastloc;      /* Detection of 0-len. */
+           I32 cache_offset;
+           I32 cache_bit;
+       } whilem;
+
+       struct {
+           I32 paren;
+           I32 c1, c2;         /* case fold search */
+           CHECKPOINT lastcp;
+           I32 l;
+           I32 matches;
+           I32 maxwanted;
+       } curlym;
+
+       struct {
+           I32 paren;
+           CHECKPOINT lastcp;
+           I32 c1, c2;         /* case fold search */
+           char *e;
+           char *old;
+           int count;
+       } plus; /* and CURLYN/CURLY/STAR */
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           CHECKPOINT lastcp;
+           struct regmatch_state *savecc;
+           re_cc_state *cur_call_cc;
+           regexp *end_re;
+       } end;
+    } u;
+
+    re_cc_state *reg_call_cc;  /* saved value of PL_reg_call_cc */
+} regmatch_state;
+
+/* how many regmatch_state structs to allocate as a single slab.
+ * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev
+ * pointers, plus 1 for any mythical malloc overhead. */
+#define PERL_REGMATCH_SLAB_SLOTS \
+    ((4096 - 3 * sizeof (void*)) / sizeof(regmatch_state))
+
+typedef struct regmatch_slab {
+    regmatch_state states[PERL_REGMATCH_SLAB_SLOTS];
+    struct regmatch_slab *prev, *next;
+} regmatch_slab;