X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pod%2Fperlre.pod;h=1336c5c24d4fd5c3291f5fcc9eeb59ebb76d3f7b;hb=6d1e6673d7386f4f9139111a6e44d555b8252741;hp=07ff02213c704deb342db403e05b2a4fb97eaf1c;hpb=64c5a5665d9d2e73526d93f8e1b8e0488ead3228;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/pod/perlre.pod b/pod/perlre.pod
index 07ff022..1336c5c 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -102,7 +102,7 @@ X</x>
 
 =head3 Metacharacters
 
-The patterns used in Perl pattern matching evolved from the ones supplied in
+The patterns used in Perl pattern matching evolved from those supplied in
 the Version 8 regex routines.  (The routines are derived
 (distantly) from Henry Spencer's freely redistributable reimplementation
 of the V8 routines.)  See L<Version 8 Regular Expressions> for
@@ -223,9 +223,9 @@ X<\0> X<\c> X<\N> X<\x>
     \e		escape (think troff)  (ESC)
     \033	octal char            (example: ESC)
     \x1B	hex char              (example: ESC)
-    \x{263a}	wide hex char         (example: Unicode SMILEY)
+    \x{263a}	long hex char         (example: Unicode SMILEY)
     \cK		control char          (example: VT)
-    \N{name}	named char
+    \N{name}	named Unicode character
     \l		lowercase next char (think vi)
     \u		uppercase next char (think vi)
     \L		lowercase till \E (think vi)
@@ -258,7 +258,7 @@ X<word> X<whitespace> X<character class> X<backreference>
     \pP	     Match P, named property.  Use \p{Prop} for longer names.
     \PP	     Match non-P
     \X	     Match eXtended Unicode "combining character sequence",
-             equivalent to (?:\PM\pM*)
+             equivalent to (?>\PM\pM*)
     \C	     Match a single C char (octet) even under Unicode.
 	     NOTE: breaks up characters into their UTF-8 bytes,
 	     so you may end up with malformed pieces of UTF-8.
@@ -270,10 +270,8 @@ X<word> X<whitespace> X<character class> X<backreference>
              optionally be wrapped in curly brackets for safer parsing.
     \g{name} Named backreference
     \k<name> Named backreference
-    \N{name} Named Unicode character, or Unicode escape
-    \x12     Hexadecimal escape sequence
-    \x{1234} Long hexadecimal escape sequence
     \K       Keep the stuff left of the \K, don't include it in $&
+    \N       Any character but \n
     \v       Vertical whitespace
     \V       Not vertical whitespace
     \h       Horizontal whitespace
@@ -318,26 +316,34 @@ they must always be used within a character class expression.
     # this is not, and will generate a warning:
     $string =~ /[:alpha:]/;
 
-The available classes and their backslash equivalents (if available) are
-as follows:
-X<character class>
+The following table shows the mapping of POSIX character class
+names, common escapes, literal escape sequences and their equivalent
+Unicode style property names.
+X<character class> X<\p> X<\p{}>
 X<alpha> X<alnum> X<ascii> X<blank> X<cntrl> X<digit> X<graph>
 X<lower> X<print> X<punct> X<space> X<upper> X<word> X<xdigit>
 
-    alpha
-    alnum
-    ascii
-    blank		[1]
-    cntrl
-    digit       \d
-    graph
-    lower
-    print
-    punct
-    space       \s	[2]
-    upper
-    word        \w	[3]
-    xdigit
+B<Note:> up to Perl 5.10 the property names used were shared with
+standard Unicode properties, this was changed in Perl 5.11, see
+L<perl5110delta> for details.
+
+    POSIX  Esc  Class               Property		Note
+    --------------------------------------------------------
+    alnum       [0-9A-Za-z]         IsPosixAlnum
+    alpha       [A-Za-z]            IsPosixAlpha
+    ascii       [\000-\177]         IsASCII
+    blank       [\011 ]             IsPosixBlank	[1]
+    cntrl       [\0-\37\177]        IsPosixCntrl
+    digit   \d  [0-9]               IsPosixDigit
+    graph       [!-~]               IsPosixGraph
+    lower       [a-z]               IsPosixLower
+    print       [ -~]               IsPosixPrint
+    punct       [!-/:-@[-`{-~]      IsPosixPunct
+    space       [\11-\15 ]          IsPosixSpace        [2]
+            \s  [\11\12\14\15 ]     IsPerlSpace         [2]
+    upper       [A-Z]               IsPosixUpper
+    word    \w  [0-9A-Z_a-z]        IsPerlWord		[3]
+    xdigit      [0-9A-Fa-f]         IsXDigit
 
 =over
 
@@ -347,8 +353,9 @@ A GNU extension equivalent to C<[ \t]>, "all horizontal whitespace".
 
 =item [2]
 
-Not exactly equivalent to C<\s> since the C<[[:space:]]> includes
-also the (very rare) "vertical tabulator", "\cK" or chr(11) in ASCII.
+Note that C<\s> and C<[[:space:]]> are B<not> equivalent as C<[[:space:]]>
+includes also the (very rare) "vertical tabulator", "\cK" or chr(11) in
+ASCII.
 
 =item [3]
 
@@ -364,35 +371,23 @@ whole character class.  For example:
 
 matches zero, one, any alphabetic character, and the percent sign.
 
-The following equivalences to Unicode \p{} constructs and equivalent
-backslash character classes (if available), will hold:
-X<character class> X<\p> X<\p{}>
+=item C<$>
 
-    [[:...:]]	\p{...}		backslash
+Currency symbol
 
-    alpha       IsAlpha
-    alnum       IsAlnum
-    ascii       IsASCII
-    blank
-    cntrl       IsCntrl
-    digit       IsDigit        \d
-    graph       IsGraph
-    lower       IsLower
-    print       IsPrint
-    punct       IsPunct
-    space       IsSpace
-                IsSpacePerl    \s
-    upper       IsUpper
-    word        IsWord
-    xdigit      IsXDigit
+=item C<+> C<< < >> C<=> C<< > >> C<|> C<~>
 
-For example C<[[:lower:]]> and C<\p{IsLower}> are equivalent.
+Mathematical symbols
 
-If the C<utf8> pragma is not used but the C<locale> pragma is, the
-classes correlate with the usual isalpha(3) interface (except for
-"word" and "blank").
+=item C<^> C<`>
 
-The assumedly non-obviously named classes are:
+Modifier symbols (accents)
+
+=back
+
+=back
+
+The other named classes are:
 
 =over 4
 
@@ -435,9 +430,9 @@ X<character class, negation>
 
     POSIX         traditional  Unicode
 
-    [[:^digit:]]    \D         \P{IsDigit}
-    [[:^space:]]    \S         \P{IsSpace}
-    [[:^word:]]	    \W         \P{IsWord}
+    [[:^digit:]]    \D         \P{IsPosixDigit}
+    [[:^space:]]    \S         \P{IsPosixSpace}
+    [[:^word:]]     \W         \P{IsPerlWord}
 
 Perl respects the POSIX standard in that POSIX character classes are
 only supported within a character class.  The POSIX character classes
@@ -524,14 +519,14 @@ backreferences.
 
 X<\g{1}> X<\g{-1}> X<\g{name}> X<relative backreference> X<named backreference>
 In order to provide a safer and easier way to construct patterns using
-backreferences, Perl 5.10 provides the C<\g{N}> notation. The curly
-brackets are optional, however omitting them is less safe as the meaning
-of the pattern can be changed by text (such as digits) following it.
-When N is a positive integer the C<\g{N}> notation is exactly equivalent
-to using normal backreferences. When N is a negative integer then it is
-a relative backreference referring to the previous N'th capturing group.
-When the bracket form is used and N is not an integer, it is treated as a
-reference to a named buffer.
+backreferences, Perl provides the C<\g{N}> notation (starting with perl
+5.10.0). The curly brackets are optional, however omitting them is less
+safe as the meaning of the pattern can be changed by text (such as digits)
+following it. When N is a positive integer the C<\g{N}> notation is
+exactly equivalent to using normal backreferences. When N is a negative
+integer then it is a relative backreference referring to the previous N'th
+capturing group. When the bracket form is used and N is not an integer, it
+is treated as a reference to a named buffer.
 
 Thus C<\g{-1}> refers to the last buffer, C<\g{-2}> refers to the
 buffer before that. For example:
@@ -547,7 +542,7 @@ buffer before that. For example:
 
 and would match the same as C</(Y) ( (X) \3 \1 )/x>.
 
-Additionally, as of Perl 5.10 you may use named capture buffers and named
+Additionally, as of Perl 5.10.0 you may use named capture buffers and named
 backreferences. The notation is C<< (?<name>...) >> to declare and C<< \k<name> >>
 to reference. You may also use apostrophes instead of angle brackets to delimit the
 name; and you may use the bracketed C<< \g{name} >> backreference syntax.
@@ -617,7 +612,7 @@ already paid the price.  As of 5.005, C<$&> is not so costly as the
 other two.
 X<$&> X<$`> X<$'>
 
-As a workaround for this problem, Perl 5.10 introduces C<${^PREMATCH}>,
+As a workaround for this problem, Perl 5.10.0 introduces C<${^PREMATCH}>,
 C<${^MATCH}> and C<${^POSTMATCH}>, which are equivalent to C<$`>, C<$&>
 and C<$'>, B<except> that they are only guaranteed to be defined after a
 successful match that was executed with the C</p> (preserve) modifier.
@@ -743,7 +738,7 @@ X<(?|)> X<Branch reset>
 
 This is the "branch reset" pattern, which has the special property
 that the capture buffers are numbered from the same starting point
-in each alternation branch. It is available starting from perl 5.10.
+in each alternation branch. It is available starting from perl 5.10.0.
 
 Capture buffers are numbered from left to right, but inside this
 construct the numbering is restarted for each branch.
@@ -764,6 +759,9 @@ which buffer the captured content will be stored.
     / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
     # 1            2         2  3        2     3     4  
 
+Note: as of Perl 5.10.0, branch resets interfere with the contents of
+the C<%+> hash, that holds named captures. Consider using C<%-> instead.
+
 =item Look-Around Assertions
 X<look-around assertion> X<lookaround assertion> X<look-around> X<lookaround>
 
@@ -840,9 +838,9 @@ only for fixed-width look-behind.
 X<< (?<NAME>) >> X<(?'NAME')> X<named capture> X<capture>
 
 A named capture buffer. Identical in every respect to normal capturing
-parentheses C<()> but for the additional fact that C<%+> may be used after
-a successful match to refer to a named buffer. See C<perlvar> for more
-details on the C<%+> hash.
+parentheses C<()> but for the additional fact that C<%+> or C<%-> may be
+used after a successful match to refer to a named buffer. See C<perlvar>
+for more details on the C<%+> and C<%-> hashes.
 
 If multiple distinct capture buffers have the same name then the
 $+{NAME} will refer to the leftmost defined buffer in the match.
@@ -865,7 +863,7 @@ its Unicode extension (see L<utf8>),
 though it isn't extended by the locale (see L<perllocale>).
 
 B<NOTE:> In order to make things easier for programmers with experience
-with the Python or PCRE regex engines, the pattern C<< (?P<NAME>pattern) >>
+with the Python or PCRE regex engines, the pattern C<< (?PE<lt>NAMEE<gt>pattern) >>
 may be used instead of C<< (?<NAME>pattern) >>; however this form does not
 support the use of single quotes as a delimiter for the name.
 
@@ -1389,7 +1387,7 @@ If we add a C<(*PRUNE)> before the count like the following
     print "Count=$count\n";
 
 we prevent backtracking and find the count of the longest matching
-at each matching startpoint like so:
+at each matching starting point like so:
 
     aaab
     aab
@@ -1435,7 +1433,7 @@ outputs
     Count=2
 
 Once the 'aaab' at the start of the string has matched, and the C<(*SKIP)>
-executed, the next startpoint will be where the cursor was when the
+executed, the next starting point will be where the cursor was when the
 C<(*SKIP)> was executed.
 
 =item C<(*MARK:NAME)> C<(*:NAME)>
@@ -2108,13 +2106,13 @@ part of this regular expression needs to be converted explicitly
 
 =head1 PCRE/Python Support
 
-As of Perl 5.10 Perl supports several Python/PCRE specific extensions
+As of Perl 5.10.0, Perl supports several Python/PCRE specific extensions
 to the regex syntax. While Perl programmers are encouraged to use the
-Perl specific syntax, the following are legal in Perl 5.10:
+Perl specific syntax, the following are also accepted:
 
 =over 4
 
-=item C<< (?P<NAME>pattern) >>
+=item C<< (?PE<lt>NAMEE<gt>pattern) >>
 
 Define a named capture buffer. Equivalent to C<< (?<NAME>pattern) >>.