1 /* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
3 This file is built by regcomp.pl from regcomp.sym.
4 Any changes made here will be lost!
7 /* Regops and State definitions */
10 #define REGMATCH_STATE_MAX 130
12 #define END 0 /* 0000 End of program. */
13 #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
14 #define BOL 2 /* 0x02 Match "" at beginning of line. */
15 #define MBOL 3 /* 0x03 Same, assuming multiline. */
16 #define SBOL 4 /* 0x04 Same, assuming singleline. */
17 #define EOS 5 /* 0x05 Match "" at end of string. */
18 #define EOL 6 /* 0x06 Match "" at end of line. */
19 #define MEOL 7 /* 0x07 Same, assuming multiline. */
20 #define SEOL 8 /* 0x08 Same, assuming singleline. */
21 #define BOUND 9 /* 0x09 Match "" at any word boundary */
22 #define BOUNDL 10 /* 0x0a Match "" at any word boundary */
23 #define NBOUND 11 /* 0x0b Match "" at any word non-boundary */
24 #define NBOUNDL 12 /* 0x0c Match "" at any word non-boundary */
25 #define GPOS 13 /* 0x0d Matches where last m//g left off. */
26 #define REG_ANY 14 /* 0x0e Match any one character (except newline). */
27 #define SANY 15 /* 0x0f Match any one character. */
28 #define CANY 16 /* 0x10 Match any one byte. */
29 #define ANYOF 17 /* 0x11 Match character in (or not in) this class. */
30 #define ALNUM 18 /* 0x12 Match any alphanumeric character */
31 #define ALNUML 19 /* 0x13 Match any alphanumeric char in locale */
32 #define NALNUM 20 /* 0x14 Match any non-alphanumeric character */
33 #define NALNUML 21 /* 0x15 Match any non-alphanumeric char in locale */
34 #define SPACE 22 /* 0x16 Match any whitespace character */
35 #define SPACEL 23 /* 0x17 Match any whitespace char in locale */
36 #define NSPACE 24 /* 0x18 Match any non-whitespace character */
37 #define NSPACEL 25 /* 0x19 Match any non-whitespace char in locale */
38 #define DIGIT 26 /* 0x1a Match any numeric character */
39 #define DIGITL 27 /* 0x1b Match any numeric character in locale */
40 #define NDIGIT 28 /* 0x1c Match any non-numeric character */
41 #define NDIGITL 29 /* 0x1d Match any non-numeric character in locale */
42 #define CLUMP 30 /* 0x1e Match any combining character sequence */
43 #define BRANCH 31 /* 0x1f Match this alternative, or the next... */
44 #define BACK 32 /* 0x20 Match "", "next" ptr points backward. */
45 #define EXACT 33 /* 0x21 Match this string (preceded by length). */
46 #define EXACTF 34 /* 0x22 Match this string, folded (prec. by length). */
47 #define EXACTFL 35 /* 0x23 Match this string, folded in locale (w/len). */
48 #define NOTHING 36 /* 0x24 Match empty string. */
49 #define TAIL 37 /* 0x25 Match empty string. Can jump here from outside. */
50 #define STAR 38 /* 0x26 Match this (simple) thing 0 or more times. */
51 #define PLUS 39 /* 0x27 Match this (simple) thing 1 or more times. */
52 #define CURLY 40 /* 0x28 Match this simple thing {n,m} times. */
53 #define CURLYN 41 /* 0x29 Capture next-after-this simple thing */
54 #define CURLYM 42 /* 0x2a Capture this medium-complex thing {n,m} times. */
55 #define CURLYX 43 /* 0x2b Match this complex thing {n,m} times. */
56 #define WHILEM 44 /* 0x2c Do curly processing and see if rest matches. */
57 #define OPEN 45 /* 0x2d Mark this point in input as start of */
58 #define CLOSE 46 /* 0x2e Analogous to OPEN. */
59 #define REF 47 /* 0x2f Match some already matched string */
60 #define REFF 48 /* 0x30 Match already matched string, folded */
61 #define REFFL 49 /* 0x31 Match already matched string, folded in loc. */
62 #define IFMATCH 50 /* 0x32 Succeeds if the following matches. */
63 #define UNLESSM 51 /* 0x33 Fails if the following matches. */
64 #define SUSPEND 52 /* 0x34 "Independent" sub-RE. */
65 #define IFTHEN 53 /* 0x35 Switch, should be preceeded by switcher . */
66 #define GROUPP 54 /* 0x36 Whether the group matched. */
67 #define LONGJMP 55 /* 0x37 Jump far away. */
68 #define BRANCHJ 56 /* 0x38 BRANCH with long offset. */
69 #define EVAL 57 /* 0x39 Execute some Perl code. */
70 #define MINMOD 58 /* 0x3a Next operator is not greedy. */
71 #define LOGICAL 59 /* 0x3b Next opcode should set the flag only. */
72 #define RENUM 60 /* 0x3c Group with independently numbered parens. */
73 #define TRIE 61 /* 0x3d Match many EXACT(FL?)? at once. flags==type */
74 #define TRIEC 62 /* 0x3e Same as TRIE, but with embedded charclass data */
75 #define AHOCORASICK 63 /* 0x3f Aho Corasick stclass. flags==type */
76 #define AHOCORASICKC 64 /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
77 #define GOSUB 65 /* 0x41 recurse to paren arg1 at (signed) ofs arg2 */
78 #define GOSTART 66 /* 0x42 recurse to start of pattern */
79 #define NREF 67 /* 0x43 Match some already matched string */
80 #define NREFF 68 /* 0x44 Match already matched string, folded */
81 #define NREFFL 69 /* 0x45 Match already matched string, folded in loc. */
82 #define NGROUPP 70 /* 0x46 Whether the group matched. */
83 #define INSUBP 71 /* 0x47 Whether we are in a specific recurse. */
84 #define DEFINEP 72 /* 0x48 Never execute directly. */
85 #define ENDLIKE 73 /* 0x49 Used only for the type field of verbs */
86 #define OPFAIL 74 /* 0x4a Same as (?!) */
87 #define ACCEPT 75 /* 0x4b Accepts the current matched string. */
88 #define VERB 76 /* 0x4c no-sv 1 Used only for the type field of verbs */
89 #define PRUNE 77 /* 0x4d Pattern fails at this startpoint if no-backtracking through this */
90 #define MARKPOINT 78 /* 0x4e Push the current location for rollback by cut. */
91 #define SKIP 79 /* 0x4f On failure skip forward (to the mark) before retrying */
92 #define COMMIT 80 /* 0x50 Pattern fails outright if backtracking through this */
93 #define CUTGROUP 81 /* 0x51 On failure go to the next alternation in the group */
94 #define KEEPS 82 /* 0x52 $& begins here. */
95 #define LNBREAK 83 /* 0x53 generic newline pattern */
96 #define VERTWS 84 /* 0x54 vertical whitespace (Perl 6) */
97 #define NVERTWS 85 /* 0x55 not vertical whitespace (Perl 6) */
98 #define HORIZWS 86 /* 0x56 horizontal whitespace (Perl 6) */
99 #define NHORIZWS 87 /* 0x57 not horizontal whitespace (Perl 6) */
100 #define FOLDCHAR 88 /* 0x58 codepoint with tricky case folding properties. */
101 #define OPTIMIZED 89 /* 0x59 Placeholder for dump. */
102 #define PSEUDO 90 /* 0x5a Pseudo opcode for internal use. */
103 /* ------------ States ------------- */
104 #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
105 #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
106 #define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */
107 #define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */
108 #define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */
109 #define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */
110 #define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */
111 #define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */
112 #define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */
113 #define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */
114 #define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */
115 #define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */
116 #define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */
117 #define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */
118 #define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */
119 #define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */
120 #define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */
121 #define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */
122 #define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */
123 #define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */
124 #define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */
125 #define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */
126 #define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */
127 #define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */
128 #define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */
129 #define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */
130 #define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
131 #define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
132 #define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
133 #define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
134 #define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
135 #define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
136 #define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
137 #define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
138 #define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
139 #define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
140 #define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
141 #define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
142 #define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
143 #define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
145 /* PL_regkind[] What type of regop or state is this. */
148 EXTCONST U8 PL_regkind[];
150 EXTCONST U8 PL_regkind[] = {
163 NBOUND, /* NBOUNDL */
165 REG_ANY, /* REG_ANY */
172 NALNUM, /* NALNUML */
176 NSPACE, /* NSPACEL */
180 NDIGIT, /* NDIGITL */
187 NOTHING, /* NOTHING */
201 BRANCHJ, /* IFMATCH */
202 BRANCHJ, /* UNLESSM */
203 BRANCHJ, /* SUSPEND */
204 BRANCHJ, /* IFTHEN */
206 LONGJMP, /* LONGJMP */
207 BRANCHJ, /* BRANCHJ */
210 LOGICAL, /* LOGICAL */
214 TRIE, /* AHOCORASICK */
215 TRIE, /* AHOCORASICKC */
217 GOSTART, /* GOSTART */
221 NGROUPP, /* NGROUPP */
223 DEFINEP, /* DEFINEP */
224 ENDLIKE, /* ENDLIKE */
225 ENDLIKE, /* OPFAIL */
226 ENDLIKE, /* ACCEPT */
229 VERB, /* MARKPOINT */
234 LNBREAK, /* LNBREAK */
236 NVERTWS, /* NVERTWS */
237 HORIZWS, /* HORIZWS */
238 NHORIZWS, /* NHORIZWS */
239 FOLDCHAR, /* FOLDCHAR */
240 NOTHING, /* OPTIMIZED */
242 /* ------------ States ------------- */
243 TRIE, /* TRIE_next */
244 TRIE, /* TRIE_next_fail */
246 EVAL, /* EVAL_AB_fail */
247 CURLYX, /* CURLYX_end */
248 CURLYX, /* CURLYX_end_fail */
249 WHILEM, /* WHILEM_A_pre */
250 WHILEM, /* WHILEM_A_pre_fail */
251 WHILEM, /* WHILEM_A_min */
252 WHILEM, /* WHILEM_A_min_fail */
253 WHILEM, /* WHILEM_A_max */
254 WHILEM, /* WHILEM_A_max_fail */
255 WHILEM, /* WHILEM_B_min */
256 WHILEM, /* WHILEM_B_min_fail */
257 WHILEM, /* WHILEM_B_max */
258 WHILEM, /* WHILEM_B_max_fail */
259 BRANCH, /* BRANCH_next */
260 BRANCH, /* BRANCH_next_fail */
261 CURLYM, /* CURLYM_A */
262 CURLYM, /* CURLYM_A_fail */
263 CURLYM, /* CURLYM_B */
264 CURLYM, /* CURLYM_B_fail */
265 IFMATCH, /* IFMATCH_A */
266 IFMATCH, /* IFMATCH_A_fail */
267 CURLY, /* CURLY_B_min_known */
268 CURLY, /* CURLY_B_min_known_fail */
269 CURLY, /* CURLY_B_min */
270 CURLY, /* CURLY_B_min_fail */
271 CURLY, /* CURLY_B_max */
272 CURLY, /* CURLY_B_max_fail */
273 COMMIT, /* COMMIT_next */
274 COMMIT, /* COMMIT_next_fail */
275 MARKPOINT, /* MARKPOINT_next */
276 MARKPOINT, /* MARKPOINT_next_fail */
277 SKIP, /* SKIP_next */
278 SKIP, /* SKIP_next_fail */
279 CUTGROUP, /* CUTGROUP_next */
280 CUTGROUP, /* CUTGROUP_next_fail */
281 KEEPS, /* KEEPS_next */
282 KEEPS, /* KEEPS_next_fail */
286 /* regarglen[] - How large is the argument part of the node (in regnodes) */
289 static const U8 regarglen[] = {
330 EXTRA_SIZE(struct regnode_2), /* CURLY */
331 EXTRA_SIZE(struct regnode_2), /* CURLYN */
332 EXTRA_SIZE(struct regnode_2), /* CURLYM */
333 EXTRA_SIZE(struct regnode_2), /* CURLYX */
335 EXTRA_SIZE(struct regnode_1), /* OPEN */
336 EXTRA_SIZE(struct regnode_1), /* CLOSE */
337 EXTRA_SIZE(struct regnode_1), /* REF */
338 EXTRA_SIZE(struct regnode_1), /* REFF */
339 EXTRA_SIZE(struct regnode_1), /* REFFL */
340 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
341 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
342 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
343 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
344 EXTRA_SIZE(struct regnode_1), /* GROUPP */
345 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
346 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
347 EXTRA_SIZE(struct regnode_1), /* EVAL */
350 EXTRA_SIZE(struct regnode_1), /* RENUM */
351 EXTRA_SIZE(struct regnode_1), /* TRIE */
352 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
353 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
354 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
355 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
357 EXTRA_SIZE(struct regnode_1), /* NREF */
358 EXTRA_SIZE(struct regnode_1), /* NREFF */
359 EXTRA_SIZE(struct regnode_1), /* NREFFL */
360 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
361 EXTRA_SIZE(struct regnode_1), /* INSUBP */
362 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
365 EXTRA_SIZE(struct regnode_1), /* ACCEPT */
367 EXTRA_SIZE(struct regnode_1), /* PRUNE */
368 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
369 EXTRA_SIZE(struct regnode_1), /* SKIP */
370 EXTRA_SIZE(struct regnode_1), /* COMMIT */
371 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
378 EXTRA_SIZE(struct regnode_1), /* FOLDCHAR */
383 /* reg_off_by_arg[] - Which argument holds the offset to the next node */
385 static const char reg_off_by_arg[] = {
450 0, /* AHOCORASICKC */
479 #endif /* REG_COMP_C */
481 /* reg_name[] - Opcode/state names in string form, for debugging */
484 EXTCONST char * PL_reg_name[];
486 EXTCONST char * const PL_reg_name[] = {
488 "SUCCEED", /* 0x01 */
499 "NBOUNDL", /* 0x0c */
501 "REG_ANY", /* 0x0e */
508 "NALNUML", /* 0x15 */
512 "NSPACEL", /* 0x19 */
516 "NDIGITL", /* 0x1d */
522 "EXACTFL", /* 0x23 */
523 "NOTHING", /* 0x24 */
537 "IFMATCH", /* 0x32 */
538 "UNLESSM", /* 0x33 */
539 "SUSPEND", /* 0x34 */
542 "LONGJMP", /* 0x37 */
543 "BRANCHJ", /* 0x38 */
546 "LOGICAL", /* 0x3b */
550 "AHOCORASICK", /* 0x3f */
551 "AHOCORASICKC", /* 0x40 */
553 "GOSTART", /* 0x42 */
557 "NGROUPP", /* 0x46 */
559 "DEFINEP", /* 0x48 */
560 "ENDLIKE", /* 0x49 */
565 "MARKPOINT", /* 0x4e */
568 "CUTGROUP", /* 0x51 */
570 "LNBREAK", /* 0x53 */
572 "NVERTWS", /* 0x55 */
573 "HORIZWS", /* 0x56 */
574 "NHORIZWS", /* 0x57 */
575 "FOLDCHAR", /* 0x58 */
576 "OPTIMIZED", /* 0x59 */
578 /* ------------ States ------------- */
579 "TRIE_next", /* REGNODE_MAX +0x01 */
580 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
581 "EVAL_AB", /* REGNODE_MAX +0x03 */
582 "EVAL_AB_fail", /* REGNODE_MAX +0x04 */
583 "CURLYX_end", /* REGNODE_MAX +0x05 */
584 "CURLYX_end_fail", /* REGNODE_MAX +0x06 */
585 "WHILEM_A_pre", /* REGNODE_MAX +0x07 */
586 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */
587 "WHILEM_A_min", /* REGNODE_MAX +0x09 */
588 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */
589 "WHILEM_A_max", /* REGNODE_MAX +0x0b */
590 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */
591 "WHILEM_B_min", /* REGNODE_MAX +0x0d */
592 "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */
593 "WHILEM_B_max", /* REGNODE_MAX +0x0f */
594 "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */
595 "BRANCH_next", /* REGNODE_MAX +0x11 */
596 "BRANCH_next_fail", /* REGNODE_MAX +0x12 */
597 "CURLYM_A", /* REGNODE_MAX +0x13 */
598 "CURLYM_A_fail", /* REGNODE_MAX +0x14 */
599 "CURLYM_B", /* REGNODE_MAX +0x15 */
600 "CURLYM_B_fail", /* REGNODE_MAX +0x16 */
601 "IFMATCH_A", /* REGNODE_MAX +0x17 */
602 "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */
603 "CURLY_B_min_known", /* REGNODE_MAX +0x19 */
604 "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */
605 "CURLY_B_min", /* REGNODE_MAX +0x1b */
606 "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
607 "CURLY_B_max", /* REGNODE_MAX +0x1d */
608 "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
609 "COMMIT_next", /* REGNODE_MAX +0x1f */
610 "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
611 "MARKPOINT_next", /* REGNODE_MAX +0x21 */
612 "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
613 "SKIP_next", /* REGNODE_MAX +0x23 */
614 "SKIP_next_fail", /* REGNODE_MAX +0x24 */
615 "CUTGROUP_next", /* REGNODE_MAX +0x25 */
616 "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
617 "KEEPS_next", /* REGNODE_MAX +0x27 */
618 "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
622 /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
625 EXTCONST char * PL_reg_extflags_name[];
627 EXTCONST char * const PL_reg_extflags_name[] = {
628 /* Bits in extflags defined: 11111111111101111111111100111111 */
629 "MULTILINE", /* 0x00000001 */
630 "SINGLELINE", /* 0x00000002 */
631 "FOLD", /* 0x00000004 */
632 "EXTENDED", /* 0x00000008 */
633 "KEEPCOPY", /* 0x00000010 */
634 "LOCALE", /* 0x00000020 */
635 "UNUSED_BIT_6", /* 0x00000040 */
636 "UNUSED_BIT_7", /* 0x00000080 */
637 "ANCH_BOL", /* 0x00000100 */
638 "ANCH_MBOL", /* 0x00000200 */
639 "ANCH_SBOL", /* 0x00000400 */
640 "ANCH_GPOS", /* 0x00000800 */
641 "GPOS_SEEN", /* 0x00001000 */
642 "GPOS_FLOAT", /* 0x00002000 */
643 "LOOKBEHIND_SEEN", /* 0x00004000 */
644 "EVAL_SEEN", /* 0x00008000 */
645 "CANY_SEEN", /* 0x00010000 */
646 "NOSCAN", /* 0x00020000 */
647 "CHECK_ALL", /* 0x00040000 */
648 "UNUSED_BIT_19", /* 0x00080000 */
649 "MATCH_UTF8", /* 0x00100000 */
650 "USE_INTUIT_NOML", /* 0x00200000 */
651 "USE_INTUIT_ML", /* 0x00400000 */
652 "INTUIT_TAIL", /* 0x00800000 */
653 "SPLIT", /* 0x01000000 */
654 "COPY_DONE", /* 0x02000000 */
655 "TAINTED_SEEN", /* 0x04000000 */
656 "TAINTED", /* 0x08000000 */
657 "START_ONLY", /* 0x10000000 */
658 "SKIPWHITE", /* 0x20000000 */
659 "WHITE", /* 0x40000000 */
660 "NULL", /* 0x80000000 */