From: Nicholas Clark Date: Wed, 17 Mar 2010 17:16:24 +0000 (+0000) Subject: Convert REGNODE_{SIMPLE,VARIES} to a bitmask lookup, from a strchr() lookup. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ded4dd2add376b302c561318612805c584ef9e6a;p=p5sagit%2Fp5-mst-13.2.git Convert REGNODE_{SIMPLE,VARIES} to a bitmask lookup, from a strchr() lookup. This is O(1) with no branching, instead of O(n) with branching. Deprecate the old implementation's externally visible variables PL_simple and PL_varies. Google codesearch suggests that nothing outside the core regexp code was using these. --- diff --git a/regcomp.pl b/regcomp.pl index 6a61f98..9370487 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -85,22 +85,46 @@ sub process_flags { $ind = 0; my @selected; + my $bitmap = ''; do { - push @selected, $name[$ind] if $flags[$ind] && $flags[$ind] eq $flag; + my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0; + # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic + # ops in the C code. + my $current = do { + no warnings 'uninitialized'; + ord do { + no warnings 'substr'; + substr $bitmap, ($ind >> 3); + } + }; + substr $bitmap, ($ind >> 3), 1, chr($current | ($set << ($ind & 7))); + + push @selected, $name[$ind] if $set; } while (++$ind < $lastregop); my $out_string = join ', ', @selected, 0; $out_string =~ s/(.{1,70},) /$1\n /g; + + my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap; + return $comment . <<"EOP"; -#define REGNODE_\U$varname\E(node) strchr((const char *)PL_${varname}, (node)) +#define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) #ifndef DOINIT -EXTCONST U8 PL_${varname}[]; +EXTCONST U8 PL_${varname}[] __attribute__deprecated__; #else -EXTCONST U8 PL_${varname}[] = { +EXTCONST U8 PL_${varname}[] __attribute__deprecated__ = { $out_string }; #endif /* DOINIT */ +#ifndef DOINIT +EXTCONST U8 PL_${varname}_bitmask[]; +#else +EXTCONST U8 PL_${varname}_bitmask[] = { + $out_mask +}; +#endif /* DOINIT */ + EOP } diff --git a/regnodes.h b/regnodes.h index c9ba109..348410c 100644 --- a/regnodes.h +++ b/regnodes.h @@ -662,26 +662,34 @@ EXTCONST char * const PL_reg_extflags_name[] = { #endif /* DOINIT */ /* The following have no fixed length. U8 so we can do strchr() on it. */ -#define REGNODE_VARIES(node) strchr((const char *)PL_varies, (node)) +#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7))) #ifndef DOINIT -EXTCONST U8 PL_varies[]; +EXTCONST U8 PL_varies[] __attribute__deprecated__; #else -EXTCONST U8 PL_varies[] = { +EXTCONST U8 PL_varies[] __attribute__deprecated__ = { CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF, REFF, REFFL, SUSPEND, IFTHEN, BRANCHJ, NREF, NREFF, NREFFL, 0 }; #endif /* DOINIT */ +#ifndef DOINIT +EXTCONST U8 PL_varies_bitmask[]; +#else +EXTCONST U8 PL_varies_bitmask[] = { + 0x00, 0x00, 0x00, 0xC0, 0xC1, 0x9F, 0x33, 0x01, 0x38, 0x00, 0x00, 0x00 +}; +#endif /* DOINIT */ + /* The following always have a length of 1. U8 we can do strchr() on it. */ /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ -#define REGNODE_SIMPLE(node) strchr((const char *)PL_simple, (node)) +#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7))) #ifndef DOINIT -EXTCONST U8 PL_simple[]; +EXTCONST U8 PL_simple[] __attribute__deprecated__; #else -EXTCONST U8 PL_simple[] = { +EXTCONST U8 PL_simple[] __attribute__deprecated__ = { REG_ANY, SANY, CANY, ANYOF, ALNUM, ALNUML, NALNUM, NALNUML, SPACE, SPACEL, NSPACE, NSPACEL, DIGIT, NDIGIT, VERTWS, NVERTWS, HORIZWS, NHORIZWS, @@ -689,4 +697,12 @@ EXTCONST U8 PL_simple[] = { }; #endif /* DOINIT */ +#ifndef DOINIT +EXTCONST U8 PL_simple_bitmask[]; +#else +EXTCONST U8 PL_simple_bitmask[] = { + 0x00, 0xC0, 0xFF, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00 +}; +#endif /* DOINIT */ + /* ex: set ro: */