Update Module::Load::Conditional to 0.20

[p5sagit/p5-mst-13.2.git] / pod / perlreapi.pod
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod

index 3b5dc85..6e5be84 100644 (file)
--- a/pod/perlreapi.pod
+++ b/pod/perlreapi.pod
@@ -4,9 +4,11 @@ perlreapi - perl regular expression plugin interface
 
 =head1 DESCRIPTION
 
-As of Perl 5.9.5 there is a new interface for using other regexp
-engines than the default one.  Each engine is supposed to provide
-access to a constant structure of the following format:
+As of Perl 5.9.5 there is a new interface for plugging and using other
+regular expression engines than the default one.
+
+Each engine is supposed to provide access to a constant structure of the
+following format:
 
     typedef struct regexp_engine {
         REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags);
@@ -34,7 +36,7 @@ access to a constant structure of the following format:
     #endif
 
 When a regexp is compiled, its C<engine> field is then set to point at
-the appropriate structure so that when it needs to be used Perl can find
+the appropriate structure, so that when it needs to be used Perl can find
 the right routines to do so.
 
 In order to install a new regexp handler, C<$^H{regcomp}> is set
@@ -61,7 +63,7 @@ the individual fields in the REGEXP struct.
 
 The C<pattern> parameter is the scalar that was used as the
 pattern. previous versions of perl would pass two C<char*> indicating
-the start and end of the stringifed pattern, the following snippet can
+the start and end of the stringified pattern, the following snippet can
 be used to get the old parameters:
 
     STRLEN plen;
@@ -75,7 +77,7 @@ expression (C<< "ook" =~ qr/eek/ >>). perl's own engine will always
 stringify everything using the snippet above but that doesn't mean
 other engines have to.
 
-The C<flags> paramater is a bitfield which indicates which of the
+The C<flags> parameter is a bitfield which indicates which of the
 C<msixp> flags the regex was compiled with. It also contains
 additional info such as whether C<use locale> is in effect.
 
@@ -124,24 +126,10 @@ Additional flags:
 
 =over 4
 
-=item RXf_SKIPWHITE
-
-If C<split> is invoked as C<split ' '> or with no arguments (which
-really means C<split(' ', $_>, see L<split|perlfunc/split>), perl will set
-this flag and change the pattern from C<" "> to C<"\s+"> before it's
-passed to the comp routine.
-
-If the flag is present in C<< rx->extflags >> C<split> to delete
-whitespace from the start of the subject string before it's operated
-on. What is considered whitespace depends on whether the subject is a
-UTF-8 string and whether the C<RXf_PMf_LOCALE> flag is set.
-
-This probably always be preserved verbatim in C<< rx->extflags >>.
-
 =item RXf_PMf_LOCALE
 
 Set if C<use locale> is in effect. If present in C<< rx->extflags >>
-C<split> will use the locale dependant definition of whitespace under
+C<split> will use the locale dependent definition of whitespace under
 when RXf_SKIPWHITE or RXf_WHITE are in effect. Under ASCII whitespace
 is defined as per L<isSPACE|perlapi/ISSPACE>, and by the internal
 macros C<is_utf8_space> under UTF-8 and C<isSPACE_LC> under C<use
@@ -156,6 +144,16 @@ compilation. The perl engine for instance may upgrade non-UTF-8
 strings to UTF-8 if the pattern includes constructs such as C<\x{...}>
 that can only match Unicode values.
 
+=item RXf_SPLIT
+
+If C<split> is invoked as C<split ' '> or with no arguments (which
+really means C<split(' ', $_)>, see L<split|perlfunc/split>), perl will
+set this flag. The regex engine can then check for it and set the
+SKIPWHITE and WHITE extflags. To do this the perl engine does:
+
+    if (flags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
+        r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
+
 =back
 
 These flags can be set during compilation to enable optimizations in
@@ -163,6 +161,16 @@ the C<split> operator.
 
 =over 4
 
+=item RXf_SKIPWHITE
+
+If the flag is present in C<< rx->extflags >> C<split> will delete
+whitespace from the start of the subject string before it's operated
+on. What is considered whitespace depends on whether the subject is a
+UTF-8 string and whether the C<RXf_PMf_LOCALE> flag is set.
+
+If RXf_WHITE is set in addition to this flag C<split> will behave like
+C<split " "> under the perl engine.
+
 =item RXf_START_ONLY
 
 Tells the split operator to split the target string on newlines
@@ -180,9 +188,17 @@ without invoking the regex engine. The definition of whitespace varies
 depending on whether the target string is a UTF-8 string and on
 whether RXf_PMf_LOCALE is set.
 
-Perl's engine sets this flag if the pattern is C<\s+>, which it will be if
-the pattern actually was C<\s+> or if it was originally C<" "> (see
-C<RXf_SKIPWHITE> above).
+Perl's engine sets this flag if the pattern is C<\s+>.
+
+=item RXf_NULL
+
+Tells the split operator to split the target string on
+characters. The definition of character varies depending on whether
+the target string is a UTF-8 string.
+
+Perl's engine sets this flag on empty patterns, this optimization
+makes C<split //> much faster than it would otherwise be. It's even
+faster than C<unpack>.
 
 =back
 
@@ -229,7 +245,7 @@ Called to get/set the value of C<$`>, C<$'>, C<$&> and their named
 equivalents, ${^PREMATCH}, ${^POSTMATCH} and $^{MATCH}, as well as the
 numbered capture buffers (C<$1>, C<$2>, ...).
 
-The C<paren> paramater will be C<-2> for C<$`>, C<-1> for C<$'>, C<0>
+The C<paren> parameter will be C<-2> for C<$`>, C<-1> for C<$'>, C<0>
 for C<$&>, C<1> for C<$1> and so forth.
 
 The names have been chosen by analogy with L<Tie::Scalar> methods
@@ -271,7 +287,7 @@ Example:
     }
 
 Perl's own engine will croak on any attempt to modify the capture
-variables, to do this in another engine use the following callack
+variables, to do this in another engine use the following callback
 (copied from C<Perl_reg_numbered_buff_store>):
 
     void
@@ -345,27 +361,27 @@ currently defined:
 Which L<Tie::Hash> operation is being performed from the Perl level on
 C<%+> or C<%+>, if any:
 
-    RXf_HASH_FETCH
-    RXf_HASH_STORE
-    RXf_HASH_DELETE
-    RXf_HASH_CLEAR
-    RXf_HASH_EXISTS
-    RXf_HASH_SCALAR
-    RXf_HASH_FIRSTKEY
-    RXf_HASH_NEXTKEY
+    RXapif_FETCH
+    RXapif_STORE
+    RXapif_DELETE
+    RXapif_CLEAR
+    RXapif_EXISTS
+    RXapif_SCALAR
+    RXapif_FIRSTKEY
+    RXapif_NEXTKEY
 
 Whether C<%+> or C<%-> is being operated on, if any.
 
-    RXf_HASH_ONE /* %+ */
-    RXf_HASH_ALL /* %- */
+    RXapif_ONE /* %+ */
+    RXapif_ALL /* %- */
 
 Whether this is being called as C<re::regname>, C<re::regnames> or
 C<re::regnames_count>, if any. The first two will be combined with
-C<RXf_HASH_ONE> or C<RXf_HASH_ALL>.
+C<RXapif_ONE> or C<RXapif_ALL>.
 
-    RXf_HASH_REGNAME
-    RXf_HASH_REGNAMES
-    RXf_HASH_REGNAMES_COUNT
+    RXapif_REGNAME
+    RXapif_REGNAMES
+    RXapif_REGNAMES_COUNT
 
 Internally C<%+> and C<%-> are implemented with a real tied interface
 via L<Tie::Hash::NamedCapture>. The methods in that package will call
@@ -426,11 +442,11 @@ Functions>.
     void* dupe(pTHX_ REGEXP * const rx, CLONE_PARAMS *param);
 
 On threaded builds a regexp may need to be duplicated so that the pattern
-can be used by mutiple threads. This routine is expected to handle the
+can be used by multiple threads. This routine is expected to handle the
 duplication of any private data pointed to by the C<pprivate> member of
 the regexp structure.  It will be called with the preconstructed new
 regexp structure as an argument, the C<pprivate> member will point at
-the B<old> private structue, and it is this routine's responsibility to
+the B<old> private structure, and it is this routine's responsibility to
 construct a copy and return a pointer to it (which perl will then use to
 overwrite the field as passed to this routine.)
 
@@ -550,7 +566,7 @@ following pattern:
 where the C<minlen> would be 3 but C<minlenret> would only be 2 as the \d is
 required to match but is not actually included in the matched content. This
 distinction is particularly important as the substitution logic uses the
-C<minlenret> to tell whether it can do in-place substition which can result in
+C<minlenret> to tell whether it can do in-place substitution which can result in
 considerable speedup.
 
 =head2 C<gofs>