From: Yves Orton <demerphq@gmail.com>
Date: Sun, 22 Apr 2007 23:34:55 +0000 (+0200)
Subject: Change meaning of \v, \V, and add \h, \H to match Perl6, add \R to match PCRE and... 
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e1d1eefb8c88e0dcaf2bb9e6c04d7f6192be966f;p=p5sagit%2Fp5-mst-13.2.git

Change meaning of \v, \V, and add \h, \H to match Perl6, add \R to match PCRE and unicode tr18
Message-ID: <9b18b3110704221434g43457742p28cab00289f83639@mail.gmail.com>

p4raw-id: //depot/perl@31026
---

diff --git a/MANIFEST b/MANIFEST
index baf1e13..d997f37 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3297,6 +3297,7 @@ regcomp.c			Regular expression compiler
 regcomp.h			Private declarations for above
 regcomp.pl			Builder of regnodes.h
 regcomp.sym			Data for regnodes.h
+regcharclass.h			Match various character classes efficiently
 regen_lib.pl			Common file routines for generator scripts
 regen_perly.pl			generate perly.{act,h,tab} from perly.y
 regen.pl			Run all scripts that (re)generate files
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 72f33cb..64de8b1 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -829,7 +829,10 @@ sub UnicodeData_Txt()
 	Table->New(Is => 'Word',   Desc => "[[:Word:]]",   Fuzzy => 0);
     $Cat{SpacePerl} =
 	Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
-
+    $Cat{VertSpace} =
+	Table->New(Is => 'VertSpace', Desc => '\v', Fuzzy => 0);
+    $Cat{HorizSpace} =
+	Table->New(Is => 'HorizSpace', Desc => '\h', Fuzzy => 0);
     my %To;
     $To{Upper} = Table->New();
     $To{Lower} = Table->New();
@@ -886,6 +889,15 @@ sub UnicodeData_Txt()
         $Cat{SpacePerl}->$op($code) if $isspace
 	                               && $code != 0x000B; # Backward compat.
 
+        $Cat{VertSpace}->$op($code) if grep {$code == $_} 
+            ( 0x0A..0x0D,0x85,0x2028,0x2029 );
+
+        $Cat{HorizSpace}->$op($code) if grep {$code == $_} (
+            0x09,   0x20,   0xa0,   0x1680, 0x180e, 0x2000, 0x2001, 0x2002,
+            0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
+            0x202f, 0x205f, 0x3000
+        ); 
+
         $Cat{Blank}->$op($code) if $isspace
                                 && !($code == 0x000A ||
 				     $code == 0x000B ||
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 66935d2..1865232 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -233,7 +233,7 @@ An unescaped C<$> or C<@> interpolates the corresponding variable,
 while escaping will cause the literal string C<\$> to be matched.
 You'll need to write something like C<m/\Quser\E\@\Qhost/>.
 
-=head3 Character classes
+=head3 Character Classes and other Special Escapes
 
 In addition, Perl defines the following:
 X<\w> X<\W> X<\s> X<\S> X<\d> X<\D> X<\X> X<\p> X<\P> X<\C>
@@ -265,8 +265,11 @@ X<word> X<whitespace> X<character class> X<backreference>
     \x12     Hexadecimal escape sequence
     \x{1234} Long hexadecimal escape sequence
     \K       Keep the stuff left of the \K, don't include it in $&
-    \v       Shortcut for (*PRUNE)
-    \V       Shortcut for (*SKIP)
+    \v       Vertical whitespace
+    \V       Not vertical whitespace
+    \h       Horizontal whitespace
+    \H       Not horizontal whitespace
+    \R       Linebreak (matches like \v inside of a charclass)
 
 A C<\w> matches a single alphanumeric character (an alphabetic
 character, or a decimal digit) or C<_>, not a whole word.  Use C<\w+>
@@ -283,6 +286,15 @@ your own C<\p> and C<\P> properties, and L<perluniintro> about Unicode
 in general.
 X<\w> X<\W> X<word>
 
+C<\R> will atomically match a linebreak, including the network line-ending
+"\x0D\x0A".  Specifically, X<\R> is exactly equivelent to
+
+  (?>\x0D\x0A?|[\x0A-\x0C\x85\x{2028}\x{2029}])
+
+B<Note:> C<\R> has no special meaning inside of a character class;
+use C<\v> instead (vertical whitespace).
+X<\R>
+
 The POSIX character class syntax
 X<character class>
 
diff --git a/regcharclass.h b/regcharclass.h
new file mode 100644
index 0000000..3fc92d7
--- /dev/null
+++ b/regcharclass.h
@@ -0,0 +1,250 @@
+/****** WARNING WARNING WARNING ********/
+/*                                     */
+/* Autogenerated code, do not modify!  */
+/*                                     */
+/****** WARNING WARNING WARNING ********/
+
+/* LNBREAK Line Break: \j \J 
+  Codepoints:  0x0A, 0x0B, 0x0C, 0x0D, 0x0D.0x0A, 0x85, 0x2028, 0x2029
+  */
+#define is_LNBREAK(s,is_utf8) /*** Line Break: \j \J ***/                   \
+( ( ((U8*)s)[0]==13 ) ?                                                     \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 :                              \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
+  ( ((U8*)s)[0]==133 ) ) ) )                                                
+
+#define is_LNBREAK_safe(s,e,is_utf8) /*** Line Break: \j \J ***/            \
+( ( (e) - (s) > 2 ) ?                                                       \
+  ( ( ((U8*)s)[0]==13 ) ?                                                   \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 :                              \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
+  ( ((U8*)s)[0]==133 ) ) ) ) :                                              \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0]==13 ) ?                                                   \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 :                              \
+( (is_utf8) ?                                                               \
+  ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==133 )) ? 2 : 0) :                \
+  ( ((U8*)s)[0]==133 ) ) ) ) :                                              \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 :                              \
+( (!is_utf8) ?                                                              \
+  ( ((U8*)s)[0]==133 ) : 0 ) ) : 0 ) ) )                                    
+
+#define is_LNBREAK_utf8(s) /*** Line Break: \j \J ***/                      \
+( ( ((U8*)s)[0]==194 ) ?                                                    \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
+  ( ( ((U8*)s)[0]==13 ) ?                                                   \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) )                                
+
+#define is_LNBREAK_utf8_safe(s,e) /*** Line Break: \j \J ***/               \
+( ( (e) - (s) > 2 ) ?                                                       \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
+  ( ( ((U8*)s)[0]==13 ) ?                                                   \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) ) :                              \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==13 ) ?                                                   \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) :                                \
+( ( (e) - (s) > 0 ) ?                                                       \
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) : 0 ) ) )                            
+
+#define is_LNBREAK_latin1(s) /*** Line Break: \j \J ***/                    \
+( ( ((U8*)s)[0]==13 ) ?                                                     \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) || ((U8*)s)[0]==133 ) )            
+
+#define is_LNBREAK_latin1_safe(s,e) /*** Line Break: \j \J ***/             \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0]==13 ) ?                                                   \
+    ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) :                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) || ((U8*)s)[0]==133 ) ) :          \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) : 0 ) )      
+
+#define is_LNBREAK_cp(cp) /*** Line Break: \j \J ***/                       \
+( (10<=cp && cp<=13) || ( cp > 13 && ( cp==133 || ( cp > 133 && ( cp==8232 || ( cp > 8232 && cp==8233 ) ) ) ) ) )
+
+/****** WARNING WARNING WARNING ********/
+/*                                     */
+/* Autogenerated code, do not modify!  */
+/*                                     */
+/****** WARNING WARNING WARNING ********/
+
+/* HORIZWS Horizontal Whitespace: \h \H 
+  Codepoints:  0x09, 0x20, 0xA0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002,
+  0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F,
+  0x205F, 0x3000
+  */
+#define is_HORIZWS(s,is_utf8) /*** Horizontal Whitespace: \h \H ***/        \
+( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 :                               \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==225 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ?                                                \
+      ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) :            \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ( ( ((U8*)s)[1]==129 ) ?                                                \
+      ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
+  (((( ((U8*)s)[0]==227 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) ) ) :\
+  ( ((U8*)s)[0]==160 ) ) )                                                  
+
+#define is_HORIZWS_safe(s,e,is_utf8) /*** Horizontal Whitespace: \h \H ***/ \
+( ( (e) - (s) > 2 ) ?                                                       \
+  ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 :                             \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==225 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ?                                                \
+      ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) :            \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ( ( ((U8*)s)[1]==129 ) ?                                                \
+      ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
+  (((( ((U8*)s)[0]==227 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) ) ) :\
+  ( ((U8*)s)[0]==160 ) ) ) :                                                \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 :                             \
+( (is_utf8) ?                                                               \
+  ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==160 )) ? 2 : 0) :                \
+  ( ((U8*)s)[0]==160 ) ) ) :                                                \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 :                             \
+( (!is_utf8) ?                                                              \
+  ( ((U8*)s)[0]==160 ) : 0 ) ) : 0 ) ) )                                    
+
+#define is_HORIZWS_utf8(s) /*** Horizontal Whitespace: \h \H ***/           \
+( ( ((U8*)s)[0]==194 ) ?                                                    \
+    ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==225 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ?                                                \
+      ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) :            \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ( ( ((U8*)s)[1]==129 ) ?                                                \
+      ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
+  ( ( ((U8*)s)[0]==227 ) ?                                                  \
+    ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) :              \
+  ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) ) ) )                             
+
+#define is_HORIZWS_utf8_safe(s,e) /*** Horizontal Whitespace: \h \H ***/    \
+( ( (e) - (s) > 2 ) ?                                                       \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==225 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ?                                                \
+      ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) :            \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ( ( ((U8*)s)[1]==129 ) ?                                                \
+      ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) :                                    \
+    ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
+  ( ( ((U8*)s)[0]==227 ) ?                                                  \
+    ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) :              \
+  ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) ) ) ) :                           \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) :                                      \
+  ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) :                                 \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) : 0 ) ) )                           
+
+#define is_HORIZWS_latin1(s) /*** Horizontal Whitespace: \h \H ***/         \
+( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 || ((U8*)s)[0]==160 )                   
+
+#define is_HORIZWS_latin1_safe(s,e) /*** Horizontal Whitespace: \h \H ***/  \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 || ((U8*)s)[0]==160 ) : 0 )           
+
+#define is_HORIZWS_cp(cp) /*** Horizontal Whitespace: \h \H ***/            \
+( cp==9 || ( cp > 9 && ( cp==32 || ( cp > 32 && ( cp==160 || ( cp > 160 && ( cp==5760 || ( cp > 5760 && ( cp==6158 || ( cp > 6158 && ( (8192<=cp && cp<=8202) || ( cp > 8202 && ( cp==8239 || ( cp > 8239 && ( cp==8287 || ( cp > 8287 && cp==12288 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+
+/****** WARNING WARNING WARNING ********/
+/*                                     */
+/* Autogenerated code, do not modify!  */
+/*                                     */
+/****** WARNING WARNING WARNING ********/
+
+/* VERTWS Vertical Whitespace: \v \V 
+  Codepoints:  0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029
+  */
+#define is_VERTWS(s,is_utf8) /*** Vertical Whitespace: \v \V ***/           \
+( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 :                                \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
+  ( ((U8*)s)[0]==133 ) ) )                                                  
+
+#define is_VERTWS_safe(s,e,is_utf8) /*** Vertical Whitespace: \v \V ***/    \
+( ( (e) - (s) > 2 ) ?                                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 :                              \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
+  ( ((U8*)s)[0]==133 ) ) ) :                                                \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 :                              \
+( (is_utf8) ?                                                               \
+  ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==133 )) ? 2 : 0) :                \
+  ( ((U8*)s)[0]==133 ) ) ) :                                                \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 :                              \
+( (!is_utf8) ?                                                              \
+  ( ((U8*)s)[0]==133 ) : 0 ) ) : 0 ) ) )                                    
+
+#define is_VERTWS_utf8(s) /*** Vertical Whitespace: \v \V ***/              \
+( ( ((U8*)s)[0]==194 ) ?                                                    \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) )                                  
+
+#define is_VERTWS_utf8_safe(s,e) /*** Vertical Whitespace: \v \V ***/       \
+( ( (e) - (s) > 2 ) ?                                                       \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  ( ( ((U8*)s)[0]==226 ) ?                                                  \
+    ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) ) :                                \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0]==194 ) ?                                                  \
+    ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) :                                      \
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) :                                  \
+( ( (e) - (s) > 0 ) ?                                                       \
+  (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) : 0 ) ) )                            
+
+#define is_VERTWS_latin1(s) /*** Vertical Whitespace: \v \V ***/            \
+( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 )                
+
+#define is_VERTWS_latin1_safe(s,e) /*** Vertical Whitespace: \v \V ***/     \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) : 0 )        
+
+#define is_VERTWS_cp(cp) /*** Vertical Whitespace: \v \V ***/               \
+( (10<=cp && cp<=13) || ( cp > 13 && ( cp==133 || ( cp > 133 && ( cp==8232 || ( cp > 8232 && cp==8233 ) ) ) ) ) )
+
diff --git a/regcomp.c b/regcomp.c
index e24d146..48a8a30 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2400,6 +2400,34 @@ typedef struct scan_frame {
 
 #define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
 
+#define CASE_SYNST_FNC(nAmE)                                       \
+case nAmE:                                                         \
+    if (flags & SCF_DO_STCLASS_AND) {                              \
+	    for (value = 0; value < 256; value++)                  \
+		if (!is_ ## nAmE ## _cp(value))                       \
+		    ANYOF_BITMAP_CLEAR(data->start_class, value);  \
+    }                                                              \
+    else {                                                         \
+	    for (value = 0; value < 256; value++)                  \
+		if (is_ ## nAmE ## _cp(value))                        \
+		    ANYOF_BITMAP_SET(data->start_class, value);	   \
+    }                                                              \
+    break;                                                         \
+case N ## nAmE:                                                    \
+    if (flags & SCF_DO_STCLASS_AND) {                              \
+	    for (value = 0; value < 256; value++)                   \
+		if (is_ ## nAmE ## _cp(value))                         \
+		    ANYOF_BITMAP_CLEAR(data->start_class, value);   \
+    }                                                               \
+    else {                                                          \
+	    for (value = 0; value < 256; value++)                   \
+		if (!is_ ## nAmE ## _cp(value))                        \
+		    ANYOF_BITMAP_SET(data->start_class, value);	    \
+    }                                                               \
+    break
+
+
+
 STATIC I32
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         I32 *minlenp, I32 *deltap,
@@ -3330,6 +3358,34 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		break;
 	    }
 	}
+	else if (OP(scan) == LNBREAK) {
+	    if (flags & SCF_DO_STCLASS) {
+		int value = 0;
+		data->start_class->flags &= ~ANYOF_EOS;	/* No match on empty */
+    	        if (flags & SCF_DO_STCLASS_AND) {
+                    for (value = 0; value < 256; value++)
+                        if (!is_LNBREAK_cp(value))                   
+                            ANYOF_BITMAP_CLEAR(data->start_class, value);  
+                }                                                              
+                else {                                                         
+                    for (value = 0; value < 256; value++)
+                        if (is_LNBREAK_cp(value))                    
+                            ANYOF_BITMAP_SET(data->start_class, value);	   
+                }                                                              
+                if (flags & SCF_DO_STCLASS_OR)
+		    cl_and(data->start_class, and_withp);
+		flags &= ~SCF_DO_STCLASS;
+            }
+	    min += 1;
+	    delta += 2;
+            if (flags & SCF_DO_SUBSTR) {
+    	        SCAN_COMMIT(pRExC_state,data,minlenp);	/* Cannot expect anything... */
+    	        data->pos_min += 1;
+    	        data->pos_delta += 2;
+		data->longest = &(data->longest_float);
+    	    }
+    	    
+	}
 	else if (strchr((const char*)PL_simple,OP(scan))) {
 	    int value = 0;
 
@@ -3524,6 +3580,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 			}
 		    }
 		    break;
+		CASE_SYNST_FNC(VERTWS);
+		CASE_SYNST_FNC(HORIZWS);
+		
 		}
 		if (flags & SCF_DO_STCLASS_OR)
 		    cl_and(data->start_class, and_withp);
@@ -3894,6 +3953,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 	}
 #endif /* old or new */
 #endif /* TRIE_STUDY_OPT */	
+
 	/* Else: zero-length, ignore. */
 	scan = regnext(scan);
     }
@@ -6585,15 +6645,25 @@ tryagain:
 	    ret = reg_node(pRExC_state, NDIGIT);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    goto finish_meta_pat;
+	case 'R':
+	    ret = reg_node(pRExC_state, LNBREAK);
+	    *flagp |= HASWIDTH|SIMPLE;
+	    goto finish_meta_pat;
+	case 'h':
+	    ret = reg_node(pRExC_state, HORIZWS);
+	    *flagp |= HASWIDTH|SIMPLE;
+	    goto finish_meta_pat;
+	case 'H':
+	    ret = reg_node(pRExC_state, NHORIZWS);
+	    *flagp |= HASWIDTH|SIMPLE;
+	    goto finish_meta_pat;
 	case 'v':
-	    ret = reganode(pRExC_state, PRUNE, 0);
-	    ret->flags = 1;
-	    *flagp |= SIMPLE;
+	    ret = reg_node(pRExC_state, VERTWS);
+	    *flagp |= HASWIDTH|SIMPLE;
 	    goto finish_meta_pat;
 	case 'V':
-	    ret = reganode(pRExC_state, SKIP, 0);
-	    ret->flags = 1;
-	    *flagp |= SIMPLE;
+	    ret = reg_node(pRExC_state, NVERTWS);
+	    *flagp |= HASWIDTH|SIMPLE;
          finish_meta_pat:	    
 	    nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -6815,11 +6885,13 @@ tryagain:
 		    case 'C':             /* Single char !DANGEROUS! */
 		    case 'd': case 'D':   /* digit class */
 		    case 'g': case 'G':   /* generic-backref, pos assertion */
+		    case 'h': case 'H':   /* HORIZWS */
 		    case 'k': case 'K':   /* named backref, keep marker */
 		    case 'N':             /* named char sequence */
 		    case 'p': case 'P':   /* unicode property */
+		              case 'R':   /* LNBREAK */
 		    case 's': case 'S':   /* space class */
-		    case 'v': case 'V':   /* (*PRUNE) and (*SKIP) */
+		    case 'v': case 'V':   /* VERTWS */
 		    case 'w': case 'W':   /* word class */
 		    case 'X':             /* eXtended Unicode "combining character sequence" */
 		    case 'z': case 'Z':   /* End of line/string assertion */
@@ -7242,6 +7314,21 @@ case ANYOF_N##NAME:                                     \
     what = WORD;                                        \
     break
 
+#define _C_C_T_NOLOC_(NAME,TEST,WORD)                   \
+ANYOF_##NAME:                                           \
+	for (value = 0; value < 256; value++)           \
+	    if (TEST)                                   \
+		ANYOF_BITMAP_SET(ret, value);           \
+    yesno = '+';                                        \
+    what = WORD;                                        \
+    break;                                              \
+case ANYOF_N##NAME:                                     \
+	for (value = 0; value < 256; value++)           \
+	    if (!TEST)                                  \
+		ANYOF_BITMAP_SET(ret, value);           \
+    yesno = '!';                                        \
+    what = WORD;                                        \
+    break
 
 /*
    parse a class specification and produce either an ANYOF node that
@@ -7254,10 +7341,10 @@ STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 {
     dVAR;
-    register UV value = 0;
     register UV nextvalue;
     register IV prevvalue = OOB_UNICODE;
     register IV range = 0;
+    UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
     register regnode *ret;
     STRLEN numlen;
     IV namedclass;
@@ -7360,6 +7447,10 @@ parseit:
 	    case 'S':	namedclass = ANYOF_NSPACE;	break;
 	    case 'd':	namedclass = ANYOF_DIGIT;	break;
 	    case 'D':	namedclass = ANYOF_NDIGIT;	break;
+	    case 'v':	namedclass = ANYOF_VERTWS;	break;
+	    case 'V':	namedclass = ANYOF_NVERTWS;	break;
+	    case 'h':	namedclass = ANYOF_HORIZWS;	break;
+	    case 'H':	namedclass = ANYOF_NHORIZWS;	break;
             case 'N':  /* Handle \N{NAME} in class */
                 {
                     /* We only pay attention to the first char of 
@@ -7538,6 +7629,8 @@ parseit:
 		case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
 		case _C_C_T_(UPPER, isUPPER(value), "Upper");
 		case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
+		case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
+		case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
 		case ANYOF_ASCII:
 		    if (LOC)
 			ANYOF_CLASS_SET(ret, ANYOF_ASCII);
diff --git a/regcomp.h b/regcomp.h
index 1a0916a..70fdeb5 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -7,6 +7,7 @@
  *    License or the Artistic License, as specified in the README file.
  *
  */
+#include "regcharclass.h"
 
 typedef OP OP_4tree;			/* Will be redefined later. */
 
@@ -177,7 +178,7 @@ struct regnode_2 {
 
 
 #define ANYOF_BITMAP_SIZE	32	/* 256 b/(8 b/B) */
-#define ANYOF_CLASSBITMAP_SIZE	 4	/* up to 32 (8*4) named classes */
+#define ANYOF_CLASSBITMAP_SIZE	 4	/* up to 40 (8*5) named classes */
 
 /* also used by trie */
 struct regnode_charclass {
@@ -345,6 +346,14 @@ struct regnode_charclass_class {	/* has [[:blah:]] classes */
 
 #define ANYOF_MAX	32
 
+/* pseudo classes, not stored in the class bitmap, but used as flags
+   during compilation of char classes */
+
+#define ANYOF_VERTWS	(ANYOF_MAX+1)
+#define ANYOF_NVERTWS	(ANYOF_MAX+2)
+#define ANYOF_HORIZWS	(ANYOF_MAX+3)
+#define ANYOF_NHORIZWS	(ANYOF_MAX+4)
+
 /* Backward source code compatibility. */
 
 #define ANYOF_ALNUML	 ANYOF_ALNUM
@@ -444,6 +453,8 @@ EXTCONST U8 PL_simple[] = {
     SPACE,	SPACEL,
     NSPACE,	NSPACEL,
     DIGIT,	NDIGIT,
+    VERTWS,     NVERTWS,
+    HORIZWS,    NHORIZWS,
     0
 };
 #endif
@@ -799,3 +810,4 @@ re.pm, especially to the documentation.
 #endif /* DEBUG RELATED DEFINES */
 
 
+
diff --git a/regcomp.sym b/regcomp.sym
index c57a386..070fe98 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -185,7 +185,14 @@ CUTGROUP	VERB,      no-sv 1	On failure go to the next alternation in the group
 #*Control what to keep in $&.
 KEEPS		KEEPS,  no	$& begins here.
 
-# NEW STUFF ABOVE THIS LINE -- Please update counts below. 
+#*New charclass like patterns
+LNBREAK		LNBREAK,   none		generic newline pattern
+VERTWS		VERTWS,    none		vertical whitespace         (Perl 6)
+NVERTWS		NVERTWS,   none		not vertical whitespace     (Perl 6)
+HORIZWS		HORIZWS,   none		horizontal whitespace       (Perl 6)
+NHORIZWS	NHORIZWS,  none		not horizontal whitespace   (Perl 6)
+
+# NEW STUFF ABOVE THIS LINE  
 
 ################################################################################
 
diff --git a/regexec.c b/regexec.c
index 1eb7ff2..fa853a4 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1110,6 +1110,15 @@ REXEC_FBC_SCAN(                                       \
 if ((!reginfo || regtry(reginfo, &s))) \
     goto got_it
 
+#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
+    if (do_utf8) {                                             \
+	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
+    }                                                          \
+    else {                                                     \
+	REXEC_FBC_CLASS_SCAN(CoNd);                            \
+    }                                                          \
+    break
+    
 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
     if (do_utf8) {                                             \
 	UtFpReLoAd;                                            \
@@ -1425,6 +1434,31 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 		!isDIGIT_LC_utf8((U8*)s),
 		!isDIGIT_LC(*s)
 	    );
+	case LNBREAK:
+	    REXEC_FBC_CSCAN(
+		is_LNBREAK_utf8(s),
+		is_LNBREAK_latin1(s)
+	    );
+	case VERTWS:
+	    REXEC_FBC_CSCAN(
+		is_VERTWS_utf8(s),
+		is_VERTWS_latin1(s)
+	    );
+	case NVERTWS:
+	    REXEC_FBC_CSCAN(
+		!is_VERTWS_utf8(s),
+		!is_VERTWS_latin1(s)
+	    );
+	case HORIZWS:
+	    REXEC_FBC_CSCAN(
+		is_HORIZWS_utf8(s),
+		is_HORIZWS_latin1(s)
+	    );
+	case NHORIZWS:
+	    REXEC_FBC_CSCAN(
+		!is_HORIZWS_utf8(s),
+		!is_HORIZWS_latin1(s)
+	    );	    
 	case AHOCORASICKC:
 	case AHOCORASICK: 
 	    {
@@ -3207,8 +3241,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		      * pack("U0U*", 0xDF) =~ /ss/i,
 		      * the 0xC3 0x9F are the UTF-8
 		      * byte sequence for the U+00DF. */
+
 		     if (!(do_utf8 &&
-			   toLOWER(s[0]) == 's' &&
+		           toLOWER(s[0]) == 's' &&
 			   ln >= 2 &&
 			   toLOWER(s[1]) == 's' &&
 			   (U8)l[0] == 0xC3 &&
@@ -4972,6 +5007,35 @@ NULL
             /* NOTREACHED */
 #undef ST
 
+        case LNBREAK:
+            if ((n=is_LNBREAK(locinput,do_utf8))) {
+                locinput += n;
+                nextchr = UCHARAT(locinput);
+            } else
+                sayNO;
+            break;
+
+#define CASE_CLASS(nAmE)                              \
+        case nAmE:                                    \
+            if ((n=is_##nAmE(locinput,do_utf8))) {    \
+                locinput += n;                        \
+                nextchr = UCHARAT(locinput);          \
+            } else                                    \
+                sayNO;                                \
+            break;                                    \
+        case N##nAmE:                                 \
+            if ((n=is_##nAmE(locinput,do_utf8))) {    \
+                sayNO;                                \
+            } else {                                  \
+                locinput += UTF8SKIP(locinput);       \
+                nextchr = UCHARAT(locinput);          \
+            }                                         \
+            break
+
+        CASE_CLASS(VERTWS);
+        CASE_CLASS(HORIZWS);
+#undef CASE_CLASS
+
 	default:
 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
 			  PTR2UV(scan), OP(scan));
@@ -5382,7 +5446,77 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
 	    while (scan < loceol && !isDIGIT(*scan))
 		scan++;
 	}
+    case LNBREAK:
+        if (do_utf8) {
+	    loceol = PL_regeol;
+	    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
+		scan += c;
+		hardcount++;
+	    }
+	} else {
+	    /*
+	      LNBREAK can match two latin chars, which is ok,
+	      because we have a null terminated string, but we
+	      have to use hardcount in this situation
+	    */
+	    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
+		scan+=c;
+		hardcount++;
+	    }
+	}	
+	break;
+    case HORIZWS:
+        if (do_utf8) {
+	    loceol = PL_regeol;
+	    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
+		scan += c;
+		hardcount++;
+	    }
+	} else {
+	    while (scan < loceol && is_HORIZWS_latin1(scan)) 
+		scan++;		
+	}	
 	break;
+    case NHORIZWS:
+        if (do_utf8) {
+	    loceol = PL_regeol;
+	    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
+		scan += UTF8SKIP(scan);
+		hardcount++;
+	    }
+	} else {
+	    while (scan < loceol && !is_HORIZWS_latin1(scan))
+		scan++;
+
+	}	
+	break;
+    case VERTWS:
+        if (do_utf8) {
+	    loceol = PL_regeol;
+	    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
+		scan += c;
+		hardcount++;
+	    }
+	} else {
+	    while (scan < loceol && is_VERTWS_latin1(scan)) 
+		scan++;
+
+	}	
+	break;
+    case NVERTWS:
+        if (do_utf8) {
+	    loceol = PL_regeol;
+	    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
+		scan += UTF8SKIP(scan);
+		hardcount++;
+	    }
+	} else {
+	    while (scan < loceol && !is_VERTWS_latin1(scan)) 
+		scan++;
+          
+	}	
+	break;
+
     default:		/* Called on something of 0 width. */
 	break;		/* So match right here or not at all. */
     }
diff --git a/regnodes.h b/regnodes.h
index e704427..3c3a5d6 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX           	84
-#define REGMATCH_STATE_MAX    	124
+#define REGNODE_MAX           	89
+#define REGMATCH_STATE_MAX    	129
 
 #define	END                   	0	/* 0000 End of program. */
 #define	SUCCEED               	1	/* 0x01 Return from a subroutine, basically. */
@@ -92,8 +92,13 @@
 #define	COMMIT                	80	/* 0x50 Pattern fails outright if backtracking through this */
 #define	CUTGROUP              	81	/* 0x51 On failure go to the next alternation in the group */
 #define	KEEPS                 	82	/* 0x52 $& begins here. */
-#define	OPTIMIZED             	83	/* 0x53 Placeholder for dump. */
-#define	PSEUDO                	84	/* 0x54 Pseudo opcode for internal use. */
+#define	LNBREAK               	83	/* 0x53 generic newline pattern */
+#define	VERTWS                	84	/* 0x54 vertical whitespace         (Perl 6) */
+#define	NVERTWS               	85	/* 0x55 not vertical whitespace     (Perl 6) */
+#define	HORIZWS               	86	/* 0x56 horizontal whitespace       (Perl 6) */
+#define	NHORIZWS              	87	/* 0x57 not horizontal whitespace   (Perl 6) */
+#define	OPTIMIZED             	88	/* 0x58 Placeholder for dump. */
+#define	PSEUDO                	89	/* 0x59 Pseudo opcode for internal use. */
 	/* ------------ States ------------- */
 #define	TRIE_next             	(REGNODE_MAX + 1)	/* state for TRIE */
 #define	TRIE_next_fail        	(REGNODE_MAX + 2)	/* state for TRIE */
@@ -225,6 +230,11 @@ EXTCONST U8 PL_regkind[] = {
 	VERB,     	/* COMMIT                 */
 	VERB,     	/* CUTGROUP               */
 	KEEPS,    	/* KEEPS                  */
+	LNBREAK,  	/* LNBREAK                */
+	VERTWS,   	/* VERTWS                 */
+	NVERTWS,  	/* NVERTWS                */
+	HORIZWS,  	/* HORIZWS                */
+	NHORIZWS, 	/* NHORIZWS               */
 	NOTHING,  	/* OPTIMIZED              */
 	PSEUDO,   	/* PSEUDO                 */
 	/* ------------ States ------------- */
@@ -358,6 +368,11 @@ static const U8 regarglen[] = {
 	EXTRA_SIZE(struct regnode_1),        	/* COMMIT       */
 	EXTRA_SIZE(struct regnode_1),        	/* CUTGROUP     */
 	0,                                   	/* KEEPS        */
+	0,                                   	/* LNBREAK      */
+	0,                                   	/* VERTWS       */
+	0,                                   	/* NVERTWS      */
+	0,                                   	/* HORIZWS      */
+	0,                                   	/* NHORIZWS     */
 	0,                                   	/* OPTIMIZED    */
 	0,                                   	/* PSEUDO       */
 };
@@ -448,6 +463,11 @@ static const char reg_off_by_arg[] = {
 	0,	/* COMMIT       */
 	0,	/* CUTGROUP     */
 	0,	/* KEEPS        */
+	0,	/* LNBREAK      */
+	0,	/* VERTWS       */
+	0,	/* NVERTWS      */
+	0,	/* HORIZWS      */
+	0,	/* NHORIZWS     */
 	0,	/* OPTIMIZED    */
 	0,	/* PSEUDO       */
 };
@@ -543,8 +563,13 @@ EXTCONST char * const PL_reg_name[] = {
 	"COMMIT",                	/* 0x50 */
 	"CUTGROUP",              	/* 0x51 */
 	"KEEPS",                 	/* 0x52 */
-	"OPTIMIZED",             	/* 0x53 */
-	"PSEUDO",                	/* 0x54 */
+	"LNBREAK",               	/* 0x53 */
+	"VERTWS",                	/* 0x54 */
+	"NVERTWS",               	/* 0x55 */
+	"HORIZWS",               	/* 0x56 */
+	"NHORIZWS",              	/* 0x57 */
+	"OPTIMIZED",             	/* 0x58 */
+	"PSEUDO",                	/* 0x59 */
 	/* ------------ States ------------- */
 	"TRIE_next",             	/* REGNODE_MAX +0x01 */
 	"TRIE_next_fail",        	/* REGNODE_MAX +0x02 */
diff --git a/t/op/pat.t b/t/op/pat.t
index 1af8fb3..a5b98f6 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -3386,7 +3386,7 @@ ok(("foba  ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i)
 }
 
 
-
+print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n";
 if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
     my @normal=qw(these are some normal words);
     my $psycho=join "|",@normal,map chr $_,255..20000;
@@ -3773,6 +3773,7 @@ sub iseq($$;$) {
 if ($ENV{PERL_SKIP_PSYCHO_TEST}){
   printf "ok %d Skip: No psycho tests\n", $test++;
 } else {    
+  print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n";
   my $r = qr/^
   	    (?:
   		( (?:a|z+)+ )
@@ -3913,25 +3914,6 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g;
     iseq($count,4,"/.(*PRUNE)/");
 }
-{   # Test the \v form of the (*PRUNE) pattern
-    our $count = 0;
-    'aaab'=~/a+b?(?{$count++})(*FAIL)/;
-    iseq($count,9,"expect 9 for no \\v");
-    $count = 0;
-    'aaab'=~/a+b?\v(?{$count++})(*FAIL)/;
-    iseq($count,3,"expect 3 with \\v");
-    local $_='aaab';
-    $count=0;
-    1 while /.\v(?{$count++})(*FAIL)/g;
-    iseq($count,4,"/.\\v/");
-    $count = 0;
-    'aaab'=~/a+b?(??{'\v'})(?{$count++})(*FAIL)/;
-    iseq($count,3,"expect 3 with \\v");
-    local $_='aaab';
-    $count=0;
-    1 while /.(??{'\v'})(?{$count++})(*FAIL)/g;
-    iseq($count,4,"/.\\v/");
-}
 {   # Test the (*SKIP) pattern
     our $count = 0;
     'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/;
@@ -3947,21 +3929,6 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     iseq($count,2,"Expect 2 with (*SKIP)" );
     iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" );
 }
-{   # Test the \V form of the (*SKIP) pattern
-    our $count = 0;
-    'aaab'=~/a+b?\V(?{$count++})(*FAIL)/;
-    iseq($count,1,"expect 1 with \\V");
-    local $_='aaab';
-    $count=0;
-    1 while /.\V(?{$count++})(*FAIL)/g;
-    iseq($count,4,"/.\\V/");
-    $_='aaabaaab';
-    $count=0;
-    our @res=();
-    1 while /(a+b?)\V(?{$count++; push @res,$1})(*FAIL)/g;
-    iseq($count,2,"Expect 2 with \\V" );
-    iseq("@res","aaab aaab","adjacent \\V works as expected" );
-}
 {   # Test the (*SKIP) pattern
     our $count = 0;
     'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/;
@@ -4345,7 +4312,41 @@ sub kt
         iseq("$1$2",'foooooobaaaaar');    
     }
     iseq("$1$2","foobar");
+}
+{
+    local $_="\t \r\n \n \t".chr(11)."\n";
+    s/\H/H/g;
+    s/\h/h/g;
+    iseq($_,"hhHHhHhhHH");
+    $_="\t \r\n \n \t".chr(11)."\n";
+    utf8::upgrade($_);
+    s/\H/H/g;
+    s/\h/h/g;
+    iseq($_,"hhHHhHhhHH");
 }    
+{
+    my @h=map { chr( $_ ) } (
+        0x09,   0x20,   0xa0,   0x1680, 0x180e, 0x2000, 0x2001, 0x2002,
+        0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
+        0x202f, 0x205f, 0x3000
+    );
+    my @v=map { chr( $_ ) } ( 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 );
+    my @lb=( "\x0D\x0A",
+             map { chr( $_ ) } ( 0x0A..0x0D,0x85,0x2028,0x2029 ));
+    foreach my $t ([\@h,qr/\h/,qr/\h+/],[\@v,qr/\v/,qr/\v+/],[\@lb,qr/\R/,qr/\R+/],){
+        my $ary=shift @$t;
+        foreach my $pat (@$t) {
+            foreach my $str (@$ary) {
+                ok($str=~/($pat)/);
+                iseq($1,$str);
+                utf8::upgrade($str);
+                ok($str=~/($pat)/);
+                iseq($1,$str);
+            }
+        }
+    }
+}
+
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
 # Keep the following tests last -- they may crash perl
@@ -4427,7 +4428,8 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/);
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1663;
+    $::TestCount = 1928;
     print "1..$::TestCount\n";
 }
 
+
diff --git a/t/op/re_tests b/t/op/re_tests
index aa07b56..6eb03a2 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -1283,3 +1283,44 @@ X(\w+)(?=\s)|X(\w+)	Xab	y	[$1-$2]	[-ab]
 (?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A)	a	y	$&	a
 (?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A)	aa	y	$&	aa
 \x{100}?(??{""})xxx	xxx	y	$&	xxx
+
+foo(\R)bar	foo\r\nbar	y	$1	\r\n
+foo(\R)bar	foo\nbar	y	$1	\n
+foo(\R)bar	foo\rbar	y	$1	\r
+
+foo(\R+)bar	foo\r\n\x{85}\r\n\nbar	y	$1	\r\n\x{85}\r\n\n
+(\V+)(\R)	foo\r\n\x{85}\r\n\nbar	y	$1-$2	foo-\r\n
+(\R+)(\V)	foo\r\n\x{85}\r\n\nbar	y	$1-$2	\r\n\x{85}\r\n\n-b
+foo(\R)bar	foo\x{85}bar	y	$1	\x{85}
+(\V)(\R)	foo\x{85}bar	y	$1-$2	o-\x{85}
+(\R)(\V)	foo\x{85}bar	y	$1-$2	\x{85}-b
+foo(\R)bar	foo\r\nbar	y	$1	\r\n
+(\V)(\R)	foo\r\nbar	y	$1-$2	o-\r\n
+(\R)(\V)	foo\r\nbar	y	$1-$2	\r\n-b
+foo(\R)bar	foo\r\nbar	y	$1	\r\n
+(\V)(\R)	foo\r\nbar	y	$1-$2	o-\r\n
+(\R)(\V)	foo\r\nbar	y	$1-$2	\r\n-b
+foo(\R)bar	foo\rbar	y	$1	\r
+(\V)(\R)	foo\rbar	y	$1-$2	o-\r
+(\R)(\V)	foo\rbar	y	$1-$2	\r-b
+
+foo(\v+)bar	foo\r\n\x{85}\r\n\nbar	y	$1	\r\n\x{85}\r\n\n
+(\V+)(\v)	foo\r\n\x{85}\r\n\nbar	y	$1-$2	foo-\r
+(\v+)(\V)	foo\r\n\x{85}\r\n\nbar	y	$1-$2	\r\n\x{85}\r\n\n-b
+foo(\v)bar	foo\x{85}bar	y	$1	\x{85}
+(\V)(\v)	foo\x{85}bar	y	$1-$2	o-\x{85}
+(\v)(\V)	foo\x{85}bar	y	$1-$2	\x{85}-b
+foo(\v)bar	foo\rbar	y	$1	\r
+(\V)(\v)	foo\rbar	y	$1-$2	o-\r
+(\v)(\V)	foo\rbar	y	$1-$2	\r-b
+
+
+foo(\h+)bar	foo\t\x{A0}bar	y	$1	\t\x{A0}
+(\H+)(\h)	foo\t\x{A0}bar	y	$1-$2	foo-\t
+(\h+)(\H)	foo\t\x{A0}bar	y	$1-$2	\t\x{A0}-b
+foo(\h)bar	foo\x{A0}bar	y	$1	\x{A0}
+(\H)(\h)	foo\x{A0}bar	y	$1-$2	o-\x{A0}
+(\h)(\H)	foo\x{A0}bar	y	$1-$2	\x{A0}-b
+foo(\h)bar	foo\tbar	y	$1	\t
+(\H)(\h)	foo\tbar	y	$1-$2	o-\t
+(\h)(\H)	foo\tbar	y	$1-$2	\t-b
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 919a239..7ad7d89 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -127,6 +127,9 @@ EOFCODE
                 \$got = "$repl";
 EOFCODE
         }
+        #$code.=qq[\n\$expect="$expect";\n];
+        #use Devel::Peek;
+        #die Dump($code) if $pat=~/\\h/ and $subject=~/\x{A0}/;
 	{
 	    # Probably we should annotate specific tests with which warnings
 	    # categories they're known to trigger, and hence should be