From: Nicholas Clark <nick@ccl4.org>
Date: Tue, 16 Mar 2010 10:22:04 +0000 (+0000)
Subject: Re-work the regcomp.sym to remove use of hard tabs. No data change.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f8abb37e5b952f76a9e019137369e3f8ef5a58ae;p=p5sagit%2Fp5-mst-13.2.git

Re-work the regcomp.sym to remove use of hard tabs. No data change.

The tab separating name and type is replaced with whitespace, the tab marking
the start of the description is replaced by a semicolon.
---

diff --git a/regcomp.pl b/regcomp.pl
index 7fdbd13..2fbe6c6 100644
--- a/regcomp.pl
+++ b/regcomp.pl
@@ -36,11 +36,11 @@ while (<DESC>) {
     }
     unless ($lastregop) {
         $ind++;
-        ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3;  
+        ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+)\s*;\s*(.*)/;
         ($type[$ind], $code[$ind], $args[$ind], $longj[$ind]) 
           = split /[,\s]\s*/, $desc, 4;
     } else {
-        my ($type,@lists)=split /\s*\t+\s*/, $_;
+        my ($type,@lists)=split /\s+/, $_;
         die "No list? $type" if !@lists;
         foreach my $list (@lists) {
             my ($names,$special)=split /:/, $list , 2;
diff --git a/regcomp.sym b/regcomp.sym
index a1f59a9..32935bf 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -9,7 +9,7 @@
 # Note that the order in this file is important.
 #
 # Format for first section: 
-# NAME \t TYPE, arg-description [num-args] [longjump-len] \t DESCRIPTION
+# NAME \s+ TYPE, arg-description [num-args] [longjump-len] ; DESCRIPTION
 #
 #
 # run perl regen.pl after editing this file
@@ -18,127 +18,127 @@
 
 #* Exit points (0,1)
 
-END		END,    no	End of program.
-SUCCEED		END,    no	Return from a subroutine, basically.
+END         END,        no        ; End of program.
+SUCCEED     END,        no        ; Return from a subroutine, basically.
 
 #* Anchors: (2..13)
 
-BOL		BOL,    no	Match "" at beginning of line.
-MBOL		BOL,    no	Same, assuming multiline.
-SBOL		BOL,    no	Same, assuming singleline.
-EOS		EOL,    no	Match "" at end of string.
-EOL		EOL,    no	Match "" at end of line.
-MEOL		EOL,    no	Same, assuming multiline.
-SEOL		EOL,    no	Same, assuming singleline.
-BOUND		BOUND,  no	Match "" at any word boundary
-BOUNDL		BOUND,  no	Match "" at any word boundary
-NBOUND		NBOUND, no	Match "" at any word non-boundary
-NBOUNDL		NBOUND, no	Match "" at any word non-boundary
-GPOS		GPOS,   no	Matches where last m//g left off.
+BOL         BOL,        no        ; Match "" at beginning of line.
+MBOL        BOL,        no        ; Same, assuming multiline.
+SBOL        BOL,        no        ; Same, assuming singleline.
+EOS         EOL,        no        ; Match "" at end of string.
+EOL         EOL,        no        ; Match "" at end of line.
+MEOL        EOL,        no        ; Same, assuming multiline.
+SEOL        EOL,        no        ; Same, assuming singleline.
+BOUND       BOUND,      no        ; Match "" at any word boundary
+BOUNDL      BOUND,      no        ; Match "" at any word boundary
+NBOUND      NBOUND,     no        ; Match "" at any word non-boundary
+NBOUNDL     NBOUND,     no        ; Match "" at any word non-boundary
+GPOS        GPOS,       no        ; Matches where last m//g left off.
 
 #* [Special] alternatives: (14..30)
 
-REG_ANY		REG_ANY,    no	Match any one character (except newline).
-SANY		REG_ANY,    no	Match any one character.
-CANY		REG_ANY,    no	Match any one byte.
-ANYOF		ANYOF,  sv	Match character in (or not in) this class.
-ALNUM		ALNUM,  no	Match any alphanumeric character
-ALNUML		ALNUM,  no	Match any alphanumeric char in locale
-NALNUM		NALNUM, no	Match any non-alphanumeric character
-NALNUML		NALNUM, no	Match any non-alphanumeric char in locale
-SPACE		SPACE,  no	Match any whitespace character
-SPACEL		SPACE,  no	Match any whitespace char in locale
-NSPACE		NSPACE, no	Match any non-whitespace character
-NSPACEL		NSPACE, no	Match any non-whitespace char in locale
-DIGIT		DIGIT,  no	Match any numeric character
-DIGITL		DIGIT,  no	Match any numeric character in locale
-NDIGIT		NDIGIT, no	Match any non-numeric character
-NDIGITL		NDIGIT, no	Match any non-numeric character in locale
-CLUMP		CLUMP,  no	Match any combining character sequence
+REG_ANY     REG_ANY,    no        ; Match any one character (except newline).
+SANY        REG_ANY,    no        ; Match any one character.
+CANY        REG_ANY,    no        ; Match any one byte.
+ANYOF       ANYOF,      sv        ; Match character in (or not in) this class.
+ALNUM       ALNUM,      no        ; Match any alphanumeric character
+ALNUML      ALNUM,      no        ; Match any alphanumeric char in locale
+NALNUM      NALNUM,     no        ; Match any non-alphanumeric character
+NALNUML     NALNUM,     no        ; Match any non-alphanumeric char in locale
+SPACE       SPACE,      no        ; Match any whitespace character
+SPACEL      SPACE,      no        ; Match any whitespace char in locale
+NSPACE      NSPACE,     no        ; Match any non-whitespace character
+NSPACEL     NSPACE,     no        ; Match any non-whitespace char in locale
+DIGIT       DIGIT,      no        ; Match any numeric character
+DIGITL      DIGIT,      no        ; Match any numeric character in locale
+NDIGIT      NDIGIT,     no        ; Match any non-numeric character
+NDIGITL     NDIGIT,     no        ; Match any non-numeric character in locale
+CLUMP       CLUMP,      no        ; Match any combining character sequence
 
 #* Alternation (31)
 
-# BRANCH  	The set of branches constituting a single choice are hooked
-#		together with their "next" pointers, since precedence prevents
-#		anything being concatenated to any individual branch.  The
-#		"next" pointer of the last BRANCH in a choice points to the
-#		thing following the whole choice.  This is also where the
-#		final "next" pointer of each individual branch points; each
-#		branch starts with the operand node of a BRANCH node.
+# BRANCH        The set of branches constituting a single choice are hooked
+#               together with their "next" pointers, since precedence prevents
+#               anything being concatenated to any individual branch.  The
+#               "next" pointer of the last BRANCH in a choice points to the
+#               thing following the whole choice.  This is also where the
+#               final "next" pointer of each individual branch points; each
+#               branch starts with the operand node of a BRANCH node.
 #
-BRANCH		BRANCH, node	Match this alternative, or the next...
+BRANCH      BRANCH,     node      ; Match this alternative, or the next...
 
 #*Back pointer (32)
 
-# BACK		Normal "next" pointers all implicitly point forward; BACK
-#		exists to make loop structures possible.
+# BACK          Normal "next" pointers all implicitly point forward; BACK
+#               exists to make loop structures possible.
 # not used
-BACK		BACK,   no	Match "", "next" ptr points backward.
+BACK        BACK,       no        ; Match "", "next" ptr points backward.
 
 #*Literals (33..35)
 
-EXACT		EXACT,  str	Match this string (preceded by length).
-EXACTF		EXACT,  str	Match this string, folded (prec. by length).
-EXACTFL		EXACT,  str	Match this string, folded in locale (w/len).
+EXACT       EXACT,      str       ; Match this string (preceded by length).
+EXACTF      EXACT,      str       ; Match this string, folded (prec. by length).
+EXACTFL     EXACT,      str       ; Match this string, folded in locale (w/len).
 
 #*Do nothing types (36..37)
 
-NOTHING		NOTHING,no	Match empty string.
+NOTHING     NOTHING,    no        ; Match empty string.
 # A variant of above which delimits a group, thus stops optimizations
-TAIL		NOTHING,no	Match empty string. Can jump here from outside.
+TAIL        NOTHING,    no        ; Match empty string. Can jump here from outside.
 
 #*Loops (38..44)
 
-# STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
-#		BRANCH structures using BACK.  Simple cases (one character
-#		per match) are implemented with STAR and PLUS for speed
-#		and to minimize recursive plunges.
+# STAR,PLUS    '?', and complex '*' and '+', are implemented as circular
+#               BRANCH structures using BACK.  Simple cases (one character
+#               per match) are implemented with STAR and PLUS for speed
+#               and to minimize recursive plunges.
 #
-STAR		STAR,   node	Match this (simple) thing 0 or more times.
-PLUS		PLUS,   node	Match this (simple) thing 1 or more times.
+STAR        STAR,       node      ; Match this (simple) thing 0 or more times.
+PLUS        PLUS,       node      ; Match this (simple) thing 1 or more times.
 
-CURLY		CURLY,  sv 2	Match this simple thing {n,m} times.
-CURLYN		CURLY,  no 2	Capture next-after-this simple thing 
-CURLYM		CURLY,  no 2	Capture this medium-complex thing {n,m} times. 
-CURLYX		CURLY,  sv 2	Match this complex thing {n,m} times.
+CURLY       CURLY,      sv 2      ; Match this simple thing {n,m} times.
+CURLYN      CURLY,      no 2      ; Capture next-after-this simple thing 
+CURLYM      CURLY,      no 2      ; Capture this medium-complex thing {n,m} times. 
+CURLYX      CURLY,      sv 2      ; Match this complex thing {n,m} times.
 
 # This terminator creates a loop structure for CURLYX
-WHILEM		WHILEM, no	Do curly processing and see if rest matches.
+WHILEM      WHILEM,     no        ; Do curly processing and see if rest matches.
 
 #*Buffer related (45..49)
 
-# OPEN,CLOSE,GROUPP	...are numbered at compile time.
-OPEN		OPEN,   num 1	Mark this point in input as start of #n.
-CLOSE		CLOSE,  num 1	Analogous to OPEN.
+# OPEN,CLOSE,GROUPP     ...are numbered at compile time.
+OPEN        OPEN,       num 1     ; Mark this point in input as start of #n.
+CLOSE       CLOSE,      num 1     ; Analogous to OPEN.
 
-REF		REF,    num 1	Match some already matched string
-REFF		REF,    num 1	Match already matched string, folded
-REFFL		REF,    num 1	Match already matched string, folded in loc.
+REF         REF,        num 1     ; Match some already matched string
+REFF        REF,        num 1     ; Match already matched string, folded
+REFFL       REF,        num 1     ; Match already matched string, folded in loc.
 
 #*Grouping assertions (50..54)
 
-IFMATCH		BRANCHJ,off 1 2	Succeeds if the following matches.
-UNLESSM		BRANCHJ,off 1 2	Fails if the following matches.
-SUSPEND		BRANCHJ,off 1 1	"Independent" sub-RE.
-IFTHEN		BRANCHJ,off 1 1	Switch, should be preceeded by switcher .
-GROUPP		GROUPP, num 1	Whether the group matched.
+IFMATCH     BRANCHJ,    off 1 2   ; Succeeds if the following matches.
+UNLESSM     BRANCHJ,    off 1 2   ; Fails if the following matches.
+SUSPEND     BRANCHJ,    off 1 1   ; "Independent" sub-RE.
+IFTHEN      BRANCHJ,    off 1 1   ; Switch, should be preceeded by switcher .
+GROUPP      GROUPP,     num 1     ; Whether the group matched.
 
 #*Support for long RE (55..56)
 
-LONGJMP		LONGJMP,off 1 1	Jump far away.
-BRANCHJ		BRANCHJ,off 1 1	BRANCH with long offset.
+LONGJMP     LONGJMP,    off 1 1   ; Jump far away.
+BRANCHJ     BRANCHJ,    off 1 1   ; BRANCH with long offset.
 
 #*The heavy worker (57)
 
-EVAL		EVAL,   evl 1	Execute some Perl code.
+EVAL        EVAL,       evl 1     ; Execute some Perl code.
 
 #*Modifiers (58..59)
 
-MINMOD		MINMOD, no	Next operator is not greedy.
-LOGICAL		LOGICAL,no	Next opcode should set the flag only.
+MINMOD      MINMOD,     no        ; Next operator is not greedy.
+LOGICAL     LOGICAL,    no        ; Next opcode should set the flag only.
 
 # This is not used yet (60)
-RENUM		BRANCHJ,off 1 1	Group with independently numbered parens.
+RENUM       BRANCHJ,    off 1 1   ; Group with independently numbered parens.
 
 #*Trie Related (61..62)
 
@@ -146,53 +146,53 @@ RENUM		BRANCHJ,off 1 1	Group with independently numbered parens.
 # inline charclass data (ascii only), the 'C' store it in the structure.
 # NOTE: the relative order of the TRIE-like regops  is signifigant
 
-TRIE		TRIE,     trie 1	Match many EXACT(FL?)? at once. flags==type
-TRIEC		TRIE,trie charclass	Same as TRIE, but with embedded charclass data
+TRIE        TRIE,       trie 1    ; Match many EXACT(FL?)? at once. flags==type
+TRIEC       TRIE,trie charclass   ; Same as TRIE, but with embedded charclass data
 
 # For start classes, contains an added fail table.
-AHOCORASICK	TRIE,        trie 1	Aho Corasick stclass. flags==type
-AHOCORASICKC	TRIE,trie charclass	Same as AHOCORASICK, but with embedded charclass data
+AHOCORASICK     TRIE,   trie 1    ; Aho Corasick stclass. flags==type
+AHOCORASICKC    TRIE,trie charclass   ; Same as AHOCORASICK, but with embedded charclass data
 
 #*Regex Subroutines (65..66) 
-GOSUB		GOSUB,     num/ofs 2L	recurse to paren arg1 at (signed) ofs arg2
-GOSTART		GOSTART,   no   	recurse to start of pattern
+GOSUB       GOSUB,      num/ofs 2L    ; recurse to paren arg1 at (signed) ofs arg2
+GOSTART     GOSTART,    no        ; recurse to start of pattern
 
 #*Named references (67..69)
-NREF		REF,       no-sv 1	Match some already matched string
-NREFF		REF,       no-sv 1	Match already matched string, folded
-NREFFL		REF,       no-sv 1	Match already matched string, folded in loc.
+NREF        REF,        no-sv 1   ; Match some already matched string
+NREFF       REF,        no-sv 1   ; Match already matched string, folded
+NREFFL      REF,        no-sv 1   ; Match already matched string, folded in loc.
 
 
 #*Special conditionals  (70..72)
-NGROUPP		NGROUPP,   no-sv 1	Whether the group matched.            
-INSUBP		INSUBP,    num 1 	Whether we are in a specific recurse.  
-DEFINEP		DEFINEP,   none 1 	Never execute directly.               
+NGROUPP     NGROUPP,    no-sv 1   ; Whether the group matched.            
+INSUBP      INSUBP,     num 1     ; Whether we are in a specific recurse.  
+DEFINEP     DEFINEP,    none 1    ; Never execute directly.               
 
 #*Bactracking Verbs
-ENDLIKE		ENDLIKE,   none		Used only for the type field of verbs
-OPFAIL		ENDLIKE,   none 	Same as (?!)
-ACCEPT		ENDLIKE,   parno 1 	Accepts the current matched string.
+ENDLIKE     ENDLIKE,    none      ; Used only for the type field of verbs
+OPFAIL      ENDLIKE,    none      ; Same as (?!)
+ACCEPT      ENDLIKE,    parno 1   ; Accepts the current matched string.
 
 
 #*Verbs With Arguments
-VERB		VERB,      no-sv 1	Used only for the type field of verbs
-PRUNE		VERB,      no-sv 1 	Pattern fails at this startpoint if no-backtracking through this 
-MARKPOINT	VERB,      no-sv 1	Push the current location for rollback by cut.
-SKIP		VERB,      no-sv 1	On failure skip forward (to the mark) before retrying
-COMMIT		VERB,      no-sv 1	Pattern fails outright if backtracking through this
-CUTGROUP	VERB,      no-sv 1	On failure go to the next alternation in the group
+VERB        VERB,       no-sv 1   ; Used only for the type field of verbs
+PRUNE       VERB,       no-sv 1   ; Pattern fails at this startpoint if no-backtracking through this 
+MARKPOINT   VERB,       no-sv 1   ; Push the current location for rollback by cut.
+SKIP        VERB,       no-sv 1   ; On failure skip forward (to the mark) before retrying
+COMMIT      VERB,       no-sv 1   ; Pattern fails outright if backtracking through this
+CUTGROUP    VERB,       no-sv 1   ; On failure go to the next alternation in the group
 
 #*Control what to keep in $&.
-KEEPS		KEEPS,  no	$& begins here.
+KEEPS       KEEPS,      no        ; $& begins here.
 
 #*New charclass like patterns
-LNBREAK		LNBREAK,   none		generic newline pattern
-VERTWS		VERTWS,    none		vertical whitespace         (Perl 6)
-NVERTWS		NVERTWS,   none		not vertical whitespace     (Perl 6)
-HORIZWS		HORIZWS,   none		horizontal whitespace       (Perl 6)
-NHORIZWS	NHORIZWS,  none		not horizontal whitespace   (Perl 6)
+LNBREAK     LNBREAK,    none      ; generic newline pattern
+VERTWS      VERTWS,     none      ; vertical whitespace         (Perl 6)
+NVERTWS     NVERTWS,    none      ; not vertical whitespace     (Perl 6)
+HORIZWS     HORIZWS,    none      ; horizontal whitespace       (Perl 6)
+NHORIZWS    NHORIZWS,   none      ; not horizontal whitespace   (Perl 6)
 
-FOLDCHAR	FOLDCHAR,  codepoint 1	codepoint with tricky case folding properties.
+FOLDCHAR    FOLDCHAR,   codepoint 1 ; codepoint with tricky case folding properties.
 
 # NEW STUFF ABOVE THIS LINE  
 
@@ -202,14 +202,14 @@ FOLDCHAR	FOLDCHAR,  codepoint 1	codepoint with tricky case folding properties.
 
 # This is not really a node, but an optimized away piece of a "long" node.
 # To simplify debugging output, we mark it as if it were a node
-OPTIMIZED	NOTHING,off	Placeholder for dump.
+OPTIMIZED   NOTHING,    off       ; Placeholder for dump.
 
 # Special opcode with the property that no opcode in a compiled program
 # will ever be of this type. Thus it can be used as a flag value that
 # no other opcode has been seen. END is used similarly, in that an END
 # node cant be optimized. So END implies "unoptimizable" and PSEUDO mean
 # "not seen anything to optimize yet".
-PSEUDO		PSEUDO,off	Pseudo opcode for internal use.
+PSEUDO      PSEUDO,     off       ; Pseudo opcode for internal use.
 
 -------------------------------------------------------------------------------
 # Format for second section:
@@ -221,16 +221,16 @@ PSEUDO		PSEUDO,off	Pseudo opcode for internal use.
 # Anything below is a state
 #
 #
-TRIE    	next:FAIL	
-EVAL    	AB:FAIL	
-CURLYX  	end:FAIL	
-WHILEM  	A_pre,A_min,A_max,B_min,B_max:FAIL
-BRANCH  	next:FAIL	
-CURLYM  	A,B:FAIL	
-IFMATCH 	A:FAIL	
-CURLY   	B_min_known,B_min,B_max:FAIL	
-COMMIT		next:FAIL
-MARKPOINT	next:FAIL
-SKIP		next:FAIL
-CUTGROUP	next:FAIL
-KEEPS		next:FAIL
+TRIE            next:FAIL
+EVAL            AB:FAIL
+CURLYX          end:FAIL
+WHILEM          A_pre,A_min,A_max,B_min,B_max:FAIL
+BRANCH          next:FAIL
+CURLYM          A,B:FAIL
+IFMATCH         A:FAIL
+CURLY           B_min_known,B_min,B_max:FAIL
+COMMIT          next:FAIL
+MARKPOINT       next:FAIL
+SKIP            next:FAIL
+CUTGROUP        next:FAIL
+KEEPS           next:FAIL