lexer API

diff --git a/MANIFEST b/MANIFEST

index 22db6a3..b8073eb 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -3203,6 +3203,7 @@ ext/XS-APItest-KeywordRPN/KeywordRPN.xs   XS::APItest::KeywordRPN extension
 ext/XS-APItest-KeywordRPN/Makefile.PL  XS::APItest::KeywordRPN extension
 ext/XS-APItest-KeywordRPN/README       XS::APItest::KeywordRPN extension
 ext/XS-APItest-KeywordRPN/t/keyword_plugin.t   test keyword plugin mechanism
+ext/XS-APItest-KeywordRPN/t/multiline.t        test plugin parsing across lines
 ext/XS-APItest/Makefile.PL     XS::APItest extension
 ext/XS-APItest/MANIFEST                XS::APItest extension
 ext/XS-APItest/notcore.c       Test API functions when PERL_CORE is not defined
diff --git a/embed.fnc b/embed.fnc

index 7522055..f17f7ad 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -515,6 +515,19 @@ Ap |void   |leave_scope    |I32 base
 EXp    |void   |lex_end
 : Used in various files
 p      |void   |lex_start      |NULLOK SV* line|NULLOK PerlIO *rsfp|bool new_filter
+: Public lexer API
+Apd    |bool   |lex_bufutf8
+Apd    |char*  |lex_grow_linestr|STRLEN len
+Apd    |void   |lex_stuff_pvn  |NN char* pv|STRLEN len|U32 flags
+Apd    |void   |lex_stuff_sv   |NN SV* sv|U32 flags
+Apd    |void   |lex_unstuff    |NN char* ptr
+Apd    |void   |lex_read_to    |NN char* ptr
+Apd    |void   |lex_discard_to |NN char* ptr
+Apd    |bool   |lex_next_chunk |U32 flags
+Apd    |I32    |lex_peek_unichar|U32 flags
+Apd    |I32    |lex_read_unichar|U32 flags
+Apd    |void   |lex_read_space |U32 flags
+: Used in various files
 Ap     |void   |op_null        |NN OP* o
 : FIXME. Used by Data::Alias
 EXp    |void   |op_clear       |NN OP* o
diff --git a/embed.h b/embed.h

index 52e40c6..17bf11a 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -408,6 +408,17 @@
 #ifdef PERL_CORE
 #define lex_start              Perl_lex_start
 #endif
+#define lex_bufutf8            Perl_lex_bufutf8
+#define lex_grow_linestr       Perl_lex_grow_linestr
+#define lex_stuff_pvn          Perl_lex_stuff_pvn
+#define lex_stuff_sv           Perl_lex_stuff_sv
+#define lex_unstuff            Perl_lex_unstuff
+#define lex_read_to            Perl_lex_read_to
+#define lex_discard_to         Perl_lex_discard_to
+#define lex_next_chunk         Perl_lex_next_chunk
+#define lex_peek_unichar       Perl_lex_peek_unichar
+#define lex_read_unichar       Perl_lex_read_unichar
+#define lex_read_space         Perl_lex_read_space
 #define op_null                        Perl_op_null
 #if defined(PERL_CORE) || defined(PERL_EXT)
 #define op_clear               Perl_op_clear
@@ -2786,6 +2797,17 @@
 #ifdef PERL_CORE
 #define lex_start(a,b,c)       Perl_lex_start(aTHX_ a,b,c)
 #endif
+#define lex_bufutf8()          Perl_lex_bufutf8(aTHX)
+#define lex_grow_linestr(a)    Perl_lex_grow_linestr(aTHX_ a)
+#define lex_stuff_pvn(a,b,c)   Perl_lex_stuff_pvn(aTHX_ a,b,c)
+#define lex_stuff_sv(a,b)      Perl_lex_stuff_sv(aTHX_ a,b)
+#define lex_unstuff(a)         Perl_lex_unstuff(aTHX_ a)
+#define lex_read_to(a)         Perl_lex_read_to(aTHX_ a)
+#define lex_discard_to(a)      Perl_lex_discard_to(aTHX_ a)
+#define lex_next_chunk(a)      Perl_lex_next_chunk(aTHX_ a)
+#define lex_peek_unichar(a)    Perl_lex_peek_unichar(aTHX_ a)
+#define lex_read_unichar(a)    Perl_lex_read_unichar(aTHX_ a)
+#define lex_read_space(a)      Perl_lex_read_space(aTHX_ a)
 #define op_null(a)             Perl_op_null(aTHX_ a)
 #if defined(PERL_CORE) || defined(PERL_EXT)
 #define op_clear(a)            Perl_op_clear(aTHX_ a)
diff --git a/ext/XS-APItest-KeywordRPN/KeywordRPN.pm b/ext/XS-APItest-KeywordRPN/KeywordRPN.pm

index 8894447..2114c61 100644 (file)
--- a/ext/XS-APItest-KeywordRPN/KeywordRPN.pm
+++ b/ext/XS-APItest-KeywordRPN/KeywordRPN.pm
@@ -84,7 +84,7 @@ package XS::APItest::KeywordRPN;
 use warnings;
 use strict;
 
-our $VERSION = "0.002";
+our $VERSION = "0.003";
 
 require XSLoader;
 XSLoader::load(__PACKAGE__, $VERSION);
@@ -120,9 +120,6 @@ due to it being intended only for demonstration and test purposes.
 The RPN parser is liable to leak memory when a parse error occurs.
 It doesn't leak on success, however.
 
-The linkage with Perl's lexer is liable to fail when an RPN expression
-is spread across multiple lines.
-
 =head1 SEE ALSO
 
 L<Devel::Declare>,
diff --git a/ext/XS-APItest-KeywordRPN/KeywordRPN.xs b/ext/XS-APItest-KeywordRPN/KeywordRPN.xs

index d095774..e205eea 100644 (file)
--- a/ext/XS-APItest-KeywordRPN/KeywordRPN.xs
+++ b/ext/XS-APItest-KeywordRPN/KeywordRPN.xs
@@ -16,55 +16,26 @@ static int (*next_keyword_plugin)(pTHX_ char *, STRLEN, OP **);
 #define PL_bufptr (PL_parser->bufptr)
 #define PL_bufend (PL_parser->bufend)
 
-static char THX_peek_char(pTHX)
-{
-       if(PL_bufptr == PL_bufend)
-               croak("unexpected EOF "
-                       "(or you were unlucky about buffer position, FIXME)");
-       return *PL_bufptr;
-}
-#define peek_char() THX_peek_char(aTHX)
-
-static char THX_read_char(pTHX)
-{
-       char c = peek_char();
-       PL_bufptr++;
-       if(c == '\n') CopLINE_inc(PL_curcop);
-       return c;
-}
-#define read_char() THX_read_char(aTHX)
-
-static void THX_skip_opt_ws(pTHX)
-{
-       while(1) {
-               switch(peek_char()) {
-                       case '\t': case '\n': case '\v': case '\f': case ' ':
-                               read_char();
-                               break;
-                       default:
-                               return;
-               }
-       }
-}
-#define skip_opt_ws() THX_skip_opt_ws(aTHX)
-
 /* RPN parser */
 
 static OP *THX_parse_var(pTHX)
 {
-       SV *varname = sv_2mortal(newSVpvs("$"));
+       char *s = PL_bufptr;
+       char *start = s;
        PADOFFSET varpos;
        OP *padop;
-       if(peek_char() != '$') croak("RPN syntax error");
-       read_char();
+       if(*s != '$') croak("RPN syntax error");
        while(1) {
-               char c = peek_char();
+               char c = *++s;
                if(!isALNUM(c)) break;
-               read_char();
-               sv_catpvn_nomg(varname, &c, 1);
        }
-       if(SvCUR(varname) < 2) croak("RPN syntax error");
-       varpos = pad_findmy(SvPVX(varname), SvCUR(varname), 0);
+       if(s-start < 2) croak("RPN syntax error");
+       lex_read_to(s);
+       {
+               /* because pad_findmy() doesn't really use length yet */
+               SV *namesv = sv_2mortal(newSVpvn(start, s-start));
+               varpos = pad_findmy(SvPVX(namesv), s-start, 0);
+       }
        if(varpos == NOT_IN_PAD || PAD_COMPNAME_FLAGS_isOUR(varpos))
                croak("RPN only supports \"my\" variables");
        padop = newOP(OP_PADSV, 0);
@@ -84,9 +55,9 @@ static OP *THX_parse_rpn_expr(pTHX)
 {
        OP *stack = NULL, *tmpop;
        while(1) {
-               char c;
-               skip_opt_ws();
-               c = peek_char();
+               I32 c;
+               lex_read_space(0);
+               c = lex_peek_unichar(0);
                switch(c) {
                        case /*(*/')': case /*{*/'}': {
                                OP *result = pop_rpn_item();
@@ -99,9 +70,9 @@ static OP *THX_parse_rpn_expr(pTHX)
                        case '5': case '6': case '7': case '8': case '9': {
                                UV val = 0;
                                do {
-                                       read_char();
+                                       lex_read_unichar(0);
                                        val = 10*val + (c - '0');
-                                       c = peek_char();
+                                       c = lex_peek_unichar(0);
                                } while(c >= '0' && c <= '9');
                                push_rpn_item(newSVOP(OP_CONST, 0,
                                        newSVuv(val)));
@@ -112,31 +83,31 @@ static OP *THX_parse_rpn_expr(pTHX)
                        case '+': {
                                OP *b = pop_rpn_item();
                                OP *a = pop_rpn_item();
-                               read_char();
+                               lex_read_unichar(0);
                                push_rpn_item(newBINOP(OP_I_ADD, 0, a, b));
                        } break;
                        case '-': {
                                OP *b = pop_rpn_item();
                                OP *a = pop_rpn_item();
-                               read_char();
+                               lex_read_unichar(0);
                                push_rpn_item(newBINOP(OP_I_SUBTRACT, 0, a, b));
                        } break;
                        case '*': {
                                OP *b = pop_rpn_item();
                                OP *a = pop_rpn_item();
-                               read_char();
+                               lex_read_unichar(0);
                                push_rpn_item(newBINOP(OP_I_MULTIPLY, 0, a, b));
                        } break;
                        case '/': {
                                OP *b = pop_rpn_item();
                                OP *a = pop_rpn_item();
-                               read_char();
+                               lex_read_unichar(0);
                                push_rpn_item(newBINOP(OP_I_DIVIDE, 0, a, b));
                        } break;
                        case '%': {
                                OP *b = pop_rpn_item();
                                OP *a = pop_rpn_item();
-                               read_char();
+                               lex_read_unichar(0);
                                push_rpn_item(newBINOP(OP_I_MODULO, 0, a, b));
                        } break;
                        default: {
@@ -150,14 +121,14 @@ static OP *THX_parse_rpn_expr(pTHX)
 static OP *THX_parse_keyword_rpn(pTHX)
 {
        OP *op;
-       skip_opt_ws();
-       if(peek_char() != '('/*)*/)
+       lex_read_space(0);
+       if(lex_peek_unichar(0) != '('/*)*/)
                croak("RPN expression must be parenthesised");
-       read_char();
+       lex_read_unichar(0);
        op = parse_rpn_expr();
-       if(peek_char() != /*(*/')')
+       if(lex_peek_unichar(0) != /*(*/')')
                croak("RPN expression must be parenthesised");
-       read_char();
+       lex_read_unichar(0);
        return op;
 }
 #define parse_keyword_rpn() THX_parse_keyword_rpn(aTHX)
@@ -165,16 +136,16 @@ static OP *THX_parse_keyword_rpn(pTHX)
 static OP *THX_parse_keyword_calcrpn(pTHX)
 {
        OP *varop, *exprop;
-       skip_opt_ws();
+       lex_read_space(0);
        varop = parse_var();
-       skip_opt_ws();
-       if(peek_char() != '{'/*}*/)
+       lex_read_space(0);
+       if(lex_peek_unichar(0) != '{'/*}*/)
                croak("RPN expression must be braced");
-       read_char();
+       lex_read_unichar(0);
        exprop = parse_rpn_expr();
-       if(peek_char() != /*{*/'}')
+       if(lex_peek_unichar(0) != /*{*/'}')
                croak("RPN expression must be braced");
-       read_char();
+       lex_read_unichar(0);
        return newASSIGNOP(OPf_STACKED, varop, 0, exprop);
 }
 #define parse_keyword_calcrpn() THX_parse_keyword_calcrpn(aTHX)
diff --git a/ext/XS-APItest-KeywordRPN/t/keyword_plugin.t b/ext/XS-APItest-KeywordRPN/t/keyword_plugin.t

index 2b705d7..85f4b60 100644 (file)
--- a/ext/XS-APItest-KeywordRPN/t/keyword_plugin.t
+++ b/ext/XS-APItest-KeywordRPN/t/keyword_plugin.t
@@ -6,70 +6,70 @@ use Test::More tests => 13;
 BEGIN { $^H |= 0x20000; }
 no warnings;
 
-my($t, $n);
-$n = 5;
+my($triangle, $num);
+$num = 5;
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN ();
-       $t = rpn($n $n 1 + * 2 /);
+       $triangle = rpn($num $num 1 + * 2 /);
 };
 isnt $@, "";
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(rpn);
-       $t = rpn($n $n 1 + * 2 /);
+       $triangle = rpn($num $num 1 + * 2 /);
 };
 is $@, "";
-is $t, 15;
+is $triangle, 15;
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(rpn);
-       $t = join(":", "x", rpn($n $n 1 + * 2 /), "y");
+       $triangle = join(":", "x", rpn($num $num 1 + * 2 /), "y");
 };
 is $@, "";
-is $t, "x:15:y";
+is $triangle, "x:15:y";
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(rpn);
-       $t = 1 + rpn($n $n 1 + * 2 /) * 10;
+       $triangle = 1 + rpn($num $num 1 + * 2 /) * 10;
 };
 is $@, "";
-is $t, 151;
+is $triangle, 151;
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(rpn);
-       $t = rpn($n $n 1 + * 2 /);
-       $t++;
+       $triangle = rpn($num $num 1 + * 2 /);
+       $triangle++;
 };
 is $@, "";
-is $t, 16;
+is $triangle, 16;
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(rpn);
-       $t = rpn($n $n 1 + * 2 /)
-       $t++;
+       $triangle = rpn($num $num 1 + * 2 /)
+       $triangle++;
 };
 isnt $@, "";
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(calcrpn);
-       calcrpn $t { $n $n 1 + * 2 / }
-       $t++;
+       calcrpn $triangle { $num $num 1 + * 2 / }
+       $triangle++;
 };
 is $@, "";
-is $t, 16;
+is $triangle, 16;
 
-$t = undef;
+$triangle = undef;
 eval q{
        use XS::APItest::KeywordRPN qw(calcrpn);
-       123 + calcrpn $t { $n $n 1 + * 2 / } ;
+       123 + calcrpn $triangle { $num $num 1 + * 2 / } ;
 };
 isnt $@, "";
 
diff --git a/ext/XS-APItest-KeywordRPN/t/multiline.t b/ext/XS-APItest-KeywordRPN/t/multiline.t

new file mode 100644 (file)

index 0000000..b5c9c83
--- /dev/null
+++ b/ext/XS-APItest-KeywordRPN/t/multiline.t
@@ -0,0 +1,27 @@
+use warnings;
+use strict;
+
+use Test::More tests => 4;
+
+my($t, $n);
+$n = 5;
+
+use XS::APItest::KeywordRPN qw(rpn);
+$t = rpn($n
+        $n 1 +
+               * #wibble
+#wobble
+2
+               /
+);
+is $t, 15;
+is __LINE__, 18;
+
+$t = 0;
+$t = rpn($n $n 1 + *
+#line 100
+       2 /);
+is $t, 15;
+is __LINE__, 102;
+
+1;
diff --git a/global.sym b/global.sym

index 6000af7..6a44049 100644 (file)
--- a/global.sym
+++ b/global.sym
@@ -244,6 +244,17 @@ Perl_is_utf8_xdigit
 Perl_is_utf8_mark
 Perl_leave_scope
 Perl_lex_end
+Perl_lex_bufutf8
+Perl_lex_grow_linestr
+Perl_lex_stuff_pvn
+Perl_lex_stuff_sv
+Perl_lex_unstuff
+Perl_lex_read_to
+Perl_lex_discard_to
+Perl_lex_next_chunk
+Perl_lex_peek_unichar
+Perl_lex_read_unichar
+Perl_lex_read_space
 Perl_op_null
 Perl_op_clear
 Perl_op_refcnt_lock
diff --git a/parser.h b/parser.h

index 74d8ef2..462dcfd 100644 (file)
--- a/parser.h
+++ b/parser.h
@@ -107,6 +107,10 @@ typedef struct yy_parser {
 
 } yy_parser;
 
+/* flags for lexer API */
+#define LEX_STUFF_UTF8         0x00000001
+#define LEX_KEEP_PREVIOUS      0x00000002
+
 /*
  * Local variables:
  * c-indentation-style: bsd
diff --git a/perlvars.h b/perlvars.h

index 3639bd6..3d37891 100644 (file)
--- a/perlvars.h
+++ b/perlvars.h
@@ -207,14 +207,7 @@ C<KEYWORD_PLUGIN_DECLINE>, and the normal parser process will continue.
 
 If the function wants to handle the keyword, it first must
 parse anything following the keyword that is part of the syntax
-introduced by the keyword.  The lexer interface is poorly documented.
-Broadly speaking, parsing needs to look at the buffer that extends
-from C<PL_parser-E<gt>bufptr> to C<PL_parser-E<gt>bufend>, and
-C<PL_parser-E<gt>bufptr> must be advanced across whatever text is
-consumed by the parsing process.  The buffer end is not necessarily the
-real end of the input text, but refilling the buffer is too complicated
-to discuss here.  See L<Devel::Declare> for some parsing experience,
-and hope for more core support in a future version of Perl.
+introduced by the keyword.  See L</Lexer interface> for details.
 
 When a keyword is being handled, the plugin function must build
 a tree of C<OP> structures, representing the code that was parsed.
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index d38244e..c46927c 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2260,6 +2260,20 @@ effective uids or gids failed.
 length/code combination tried to obtain more data. This results in
 an undefined value for the length. See L<perlfunc/pack>.
 
+=item Lexing code attempted to stuff non-Latin-1 character into Latin-1 input
+
+(F) An extension is attempting to insert text into the current parse
+(using L<lex_stuff_pvn_flags|perlapi/lex_stuff_pvn_flags> or similar), but
+tried to insert a character that couldn't be part of the current input.
+This is an inherent pitfall of the stuffing mechanism, and one of the
+reasons to avoid it.  Where it is necessary to stuff, stuffing only
+plain ASCII is recommended.
+
+=item Lexing code internal error (%s)
+
+(F) Lexing code supplied by an extension violated the lexer's API in a
+detectable way.
+
 =item listen() on closed socket %s
 
 (W closed) You tried to do a listen on a closed socket.  Did you forget
diff --git a/proto.h b/proto.h

index f1ab3d0..fd2eb36 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -1440,6 +1440,37 @@ STATIC OP*       S_is_inplace_av(pTHX_ OP* o, OP* oright)
 PERL_CALLCONV void     Perl_leave_scope(pTHX_ I32 base);
 PERL_CALLCONV void     Perl_lex_end(pTHX);
 PERL_CALLCONV void     Perl_lex_start(pTHX_ SV* line, PerlIO *rsfp, bool new_filter);
+PERL_CALLCONV bool     Perl_lex_bufutf8(pTHX);
+PERL_CALLCONV char*    Perl_lex_grow_linestr(pTHX_ STRLEN len);
+PERL_CALLCONV void     Perl_lex_stuff_pvn(pTHX_ char* pv, STRLEN len, U32 flags)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_LEX_STUFF_PVN \
+       assert(pv)
+
+PERL_CALLCONV void     Perl_lex_stuff_sv(pTHX_ SV* sv, U32 flags)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_LEX_STUFF_SV  \
+       assert(sv)
+
+PERL_CALLCONV void     Perl_lex_unstuff(pTHX_ char* ptr)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_LEX_UNSTUFF   \
+       assert(ptr)
+
+PERL_CALLCONV void     Perl_lex_read_to(pTHX_ char* ptr)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_LEX_READ_TO   \
+       assert(ptr)
+
+PERL_CALLCONV void     Perl_lex_discard_to(pTHX_ char* ptr)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_LEX_DISCARD_TO        \
+       assert(ptr)
+
+PERL_CALLCONV bool     Perl_lex_next_chunk(pTHX_ U32 flags);
+PERL_CALLCONV I32      Perl_lex_peek_unichar(pTHX_ U32 flags);
+PERL_CALLCONV I32      Perl_lex_read_unichar(pTHX_ U32 flags);
+PERL_CALLCONV void     Perl_lex_read_space(pTHX_ U32 flags);
 PERL_CALLCONV void     Perl_op_null(pTHX_ OP* o)
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_OP_NULL       \
diff --git a/toke.c b/toke.c

index b8abbd8..6793f7b 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -21,6 +21,21 @@
  * The main routine is yylex(), which returns the next token.
  */
 
+/*
+=head1 Lexer interface
+
+This is the lower layer of the Perl parser, managing characters and tokens.
+
+=for apidoc AmU|yy_parser *|PL_parser
+
+Pointer to a structure encapsulating the state of the parsing operation
+currently in progress.  The pointer can be locally changed to perform
+a nested parse without interfering with the state of an outer parse.
+Individual members of C<PL_parser> have their own documentation.
+
+=cut
+*/
+
 #include "EXTERN.h"
 #define PERL_IN_TOKE_C
 #include "perl.h"
@@ -756,6 +771,697 @@ Perl_lex_end(pTHX)
 }
 
 /*
+=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
+
+Buffer scalar containing the chunk currently under consideration of the
+text currently being lexed.  This is always a plain string scalar (for
+which C<SvPOK> is true).  It is not intended to be used as a scalar by
+normal scalar means; instead refer to the buffer directly by the pointer
+variables described below.
+
+The lexer maintains various C<char*> pointers to things in the
+C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
+reallocated, all of these pointers must be updated.  Don't attempt to
+do this manually, but rather use L</lex_grow_linestr> if you need to
+reallocate the buffer.
+
+The content of the text chunk in the buffer is commonly exactly one
+complete line of input, up to and including a newline terminator,
+but there are situations where it is otherwise.  The octets of the
+buffer may be intended to be interpreted as either UTF-8 or Latin-1.
+The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
+flag on this scalar, which may disagree with it.
+
+For direct examination of the buffer, the variable
+L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
+lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
+of these pointers is usually preferable to examination of the scalar
+through normal scalar means.
+
+=for apidoc AmxU|char *|PL_parser-E<gt>bufend
+
+Direct pointer to the end of the chunk of text currently being lexed, the
+end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
++ SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
+always located at the end of the buffer, and does not count as part of
+the buffer's contents.
+
+=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
+
+Points to the current position of lexing inside the lexer buffer.
+Characters around this point may be freely examined, within
+the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
+L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
+interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
+
+Lexing code (whether in the Perl core or not) moves this pointer past
+the characters that it consumes.  It is also expected to perform some
+bookkeeping whenever a newline character is consumed.  This movement
+can be more conveniently performed by the function L</lex_read_to>,
+which handles newlines appropriately.
+
+Interpretation of the buffer's octets can be abstracted out by
+using the slightly higher-level functions L</lex_peek_unichar> and
+L</lex_read_unichar>.
+
+=for apidoc AmxU|char *|PL_parser-E<gt>linestart
+
+Points to the start of the current line inside the lexer buffer.
+This is useful for indicating at which column an error occurred, and
+not much else.  This must be updated by any lexing code that consumes
+a newline; the function L</lex_read_to> handles this detail.
+
+=cut
+*/
+
+/*
+=for apidoc Amx|bool|lex_bufutf8
+
+Indicates whether the octets in the lexer buffer
+(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
+of Unicode characters.  If not, they should be interpreted as Latin-1
+characters.  This is analogous to the C<SvUTF8> flag for scalars.
+
+In UTF-8 mode, it is not guaranteed that the lexer buffer actually
+contains valid UTF-8.  Lexing code must be robust in the face of invalid
+encoding.
+
+The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
+is significant, but not the whole story regarding the input character
+encoding.  Normally, when a file is being read, the scalar contains octets
+and its C<SvUTF8> flag is off, but the octets should be interpreted as
+UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
+however, the scalar may have the C<SvUTF8> flag on, and in this case its
+octets should be interpreted as UTF-8 unless the C<use bytes> pragma
+is in effect.  This logic may change in the future; use this function
+instead of implementing the logic yourself.
+
+=cut
+*/
+
+bool
+Perl_lex_bufutf8(pTHX)
+{
+    return UTF;
+}
+
+/*
+=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
+
+Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
+at least I<len> octets (including terminating NUL).  Returns a
+pointer to the reallocated buffer.  This is necessary before making
+any direct modification of the buffer that would increase its length.
+L</lex_stuff_pvn> provides a more convenient way to insert text into
+the buffer.
+
+Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
+this function updates all of the lexer's variables that point directly
+into the buffer.
+
+=cut
+*/
+
+char *
+Perl_lex_grow_linestr(pTHX_ STRLEN len)
+{
+    SV *linestr;
+    char *buf;
+    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
+    STRLEN linestart_pos, last_uni_pos, last_lop_pos;
+    linestr = PL_parser->linestr;
+    buf = SvPVX(linestr);
+    if (len <= SvLEN(linestr))
+       return buf;
+    bufend_pos = PL_parser->bufend - buf;
+    bufptr_pos = PL_parser->bufptr - buf;
+    oldbufptr_pos = PL_parser->oldbufptr - buf;
+    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+    linestart_pos = PL_parser->linestart - buf;
+    last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+    last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+    buf = sv_grow(linestr, len);
+    PL_parser->bufend = buf + bufend_pos;
+    PL_parser->bufptr = buf + bufptr_pos;
+    PL_parser->oldbufptr = buf + oldbufptr_pos;
+    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+    PL_parser->linestart = buf + linestart_pos;
+    if (PL_parser->last_uni)
+       PL_parser->last_uni = buf + last_uni_pos;
+    if (PL_parser->last_lop)
+       PL_parser->last_lop = buf + last_lop_pos;
+    return buf;
+}
+
+/*
+=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
+
+Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
+immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
+reallocating the buffer if necessary.  This means that lexing code that
+runs later will see the characters as if they had appeared in the input.
+It is not recommended to do this as part of normal parsing, and most
+uses of this facility run the risk of the inserted characters being
+interpreted in an unintended manner.
+
+The string to be inserted is represented by I<len> octets starting
+at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
+according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
+The characters are recoded for the lexer buffer, according to how the
+buffer is currently being interpreted (L</lex_bufutf8>).  If a string
+to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
+function is more convenient.
+
+=cut
+*/
+
+void
+Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
+{
+    char *bufptr;
+    PERL_ARGS_ASSERT_LEX_STUFF_PVN;
+    if (flags & ~(LEX_STUFF_UTF8))
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
+    if (UTF) {
+       if (flags & LEX_STUFF_UTF8) {
+           goto plain_copy;
+       } else {
+           STRLEN highhalf = 0;
+           char *p, *e = pv+len;
+           for (p = pv; p != e; p++)
+               highhalf += !!(((U8)*p) & 0x80);
+           if (!highhalf)
+               goto plain_copy;
+           lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
+           bufptr = PL_parser->bufptr;
+           Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+           PL_parser->bufend += len+highhalf;
+           for (p = pv; p != e; p++) {
+               U8 c = (U8)*p;
+               if (c & 0x80) {
+                   *bufptr++ = (char)(0xc0 | (c >> 6));
+                   *bufptr++ = (char)(0x80 | (c & 0x3f));
+               } else {
+                   *bufptr++ = (char)c;
+               }
+           }
+       }
+    } else {
+       if (flags & LEX_STUFF_UTF8) {
+           STRLEN highhalf = 0;
+           char *p, *e = pv+len;
+           for (p = pv; p != e; p++) {
+               U8 c = (U8)*p;
+               if (c >= 0xc4) {
+                   Perl_croak(aTHX_ "Lexing code attempted to stuff "
+                               "non-Latin-1 character into Latin-1 input");
+               } else if (c >= 0xc2 && p+1 != e &&
+                           (((U8)p[1]) & 0xc0) == 0x80) {
+                   p++;
+                   highhalf++;
+               } else if (c >= 0x80) {
+                   /* malformed UTF-8 */
+                   ENTER;
+                   SAVESPTR(PL_warnhook);
+                   PL_warnhook = PERL_WARNHOOK_FATAL;
+                   utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+                   LEAVE;
+               }
+           }
+           if (!highhalf)
+               goto plain_copy;
+           lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
+           bufptr = PL_parser->bufptr;
+           Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+           PL_parser->bufend += len-highhalf;
+           for (p = pv; p != e; p++) {
+               U8 c = (U8)*p;
+               if (c & 0x80) {
+                   *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
+                   p++;
+               } else {
+                   *bufptr++ = (char)c;
+               }
+           }
+       } else {
+           plain_copy:
+           lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
+           bufptr = PL_parser->bufptr;
+           Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+           PL_parser->bufend += len;
+           Copy(pv, bufptr, len, char);
+       }
+    }
+}
+
+/*
+=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
+
+Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
+immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
+reallocating the buffer if necessary.  This means that lexing code that
+runs later will see the characters as if they had appeared in the input.
+It is not recommended to do this as part of normal parsing, and most
+uses of this facility run the risk of the inserted characters being
+interpreted in an unintended manner.
+
+The string to be inserted is the string value of I<sv>.  The characters
+are recoded for the lexer buffer, according to how the buffer is currently
+being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
+not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
+need to construct a scalar.
+
+=cut
+*/
+
+void
+Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
+{
+    char *pv;
+    STRLEN len;
+    PERL_ARGS_ASSERT_LEX_STUFF_SV;
+    if (flags)
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
+    pv = SvPV(sv, len);
+    lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
+}
+
+/*
+=for apidoc Amx|void|lex_unstuff|char *ptr
+
+Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
+I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
+This hides the discarded text from any lexing code that runs later,
+as if the text had never appeared.
+
+This is not the normal way to consume lexed text.  For that, use
+L</lex_read_to>.
+
+=cut
+*/
+
+void
+Perl_lex_unstuff(pTHX_ char *ptr)
+{
+    char *buf, *bufend;
+    STRLEN unstuff_len;
+    PERL_ARGS_ASSERT_LEX_UNSTUFF;
+    buf = PL_parser->bufptr;
+    if (ptr < buf)
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
+    if (ptr == buf)
+       return;
+    bufend = PL_parser->bufend;
+    if (ptr > bufend)
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
+    unstuff_len = ptr - buf;
+    Move(ptr, buf, bufend+1-ptr, char);
+    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
+    PL_parser->bufend = bufend - unstuff_len;
+}
+
+/*
+=for apidoc Amx|void|lex_read_to|char *ptr
+
+Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
+to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
+performing the correct bookkeeping whenever a newline character is passed.
+This is the normal way to consume lexed text.
+
+Interpretation of the buffer's octets can be abstracted out by
+using the slightly higher-level functions L</lex_peek_unichar> and
+L</lex_read_unichar>.
+
+=cut
+*/
+
+void
+Perl_lex_read_to(pTHX_ char *ptr)
+{
+    char *s;
+    PERL_ARGS_ASSERT_LEX_READ_TO;
+    s = PL_parser->bufptr;
+    if (ptr < s || ptr > PL_parser->bufend)
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
+    for (; s != ptr; s++)
+       if (*s == '\n') {
+           CopLINE_inc(PL_curcop);
+           PL_parser->linestart = s+1;
+       }
+    PL_parser->bufptr = ptr;
+}
+
+/*
+=for apidoc Amx|void|lex_discard_to|char *ptr
+
+Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
+up to I<ptr>.  The remaining content of the buffer will be moved, and
+all pointers into the buffer updated appropriately.  I<ptr> must not
+be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
+it is not permitted to discard text that has yet to be lexed.
+
+Normally it is not necessarily to do this directly, because it suffices to
+use the implicit discarding behaviour of L</lex_next_chunk> and things
+based on it.  However, if a token stretches across multiple lines,
+and the lexing code has kept multiple lines of text in the buffer fof
+that purpose, then after completion of the token it would be wise to
+explicitly discard the now-unneeded earlier lines, to avoid future
+multi-line tokens growing the buffer without bound.
+
+=cut
+*/
+
+void
+Perl_lex_discard_to(pTHX_ char *ptr)
+{
+    char *buf;
+    STRLEN discard_len;
+    PERL_ARGS_ASSERT_LEX_DISCARD_TO;
+    buf = SvPVX(PL_parser->linestr);
+    if (ptr < buf)
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
+    if (ptr == buf)
+       return;
+    if (ptr > PL_parser->bufptr)
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
+    discard_len = ptr - buf;
+    if (PL_parser->oldbufptr < ptr)
+       PL_parser->oldbufptr = ptr;
+    if (PL_parser->oldoldbufptr < ptr)
+       PL_parser->oldoldbufptr = ptr;
+    if (PL_parser->last_uni && PL_parser->last_uni < ptr)
+       PL_parser->last_uni = NULL;
+    if (PL_parser->last_lop && PL_parser->last_lop < ptr)
+       PL_parser->last_lop = NULL;
+    Move(ptr, buf, PL_parser->bufend+1-ptr, char);
+    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
+    PL_parser->bufend -= discard_len;
+    PL_parser->bufptr -= discard_len;
+    PL_parser->oldbufptr -= discard_len;
+    PL_parser->oldoldbufptr -= discard_len;
+    if (PL_parser->last_uni)
+       PL_parser->last_uni -= discard_len;
+    if (PL_parser->last_lop)
+       PL_parser->last_lop -= discard_len;
+}
+
+/*
+=for apidoc Amx|bool|lex_next_chunk|U32 flags
+
+Reads in the next chunk of text to be lexed, appending it to
+L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
+looked to the end of the current chunk and wants to know more.  It is
+usual, but not necessary, for lexing to have consumed the entirety of
+the current chunk at this time.
+
+If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
+chunk (i.e., the current chunk has been entirely consumed), normally the
+current chunk will be discarded at the same time that the new chunk is
+read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
+will not be discarded.  If the current chunk has not been entirely
+consumed, then it will not be discarded regardless of the flag.
+
+Returns true if some new text was added to the buffer, or false if the
+buffer has reached the end of the input text.
+
+=cut
+*/
+
+#define LEX_FAKE_EOF 0x80000000
+
+bool
+Perl_lex_next_chunk(pTHX_ U32 flags)
+{
+    SV *linestr;
+    char *buf;
+    STRLEN old_bufend_pos, new_bufend_pos;
+    STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
+    STRLEN linestart_pos, last_uni_pos, last_lop_pos;
+    bool got_some;
+    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+#ifdef PERL_MAD
+    flags |= LEX_KEEP_PREVIOUS;
+#endif /* PERL_MAD */
+    linestr = PL_parser->linestr;
+    buf = SvPVX(linestr);
+    if (!(flags & LEX_KEEP_PREVIOUS) &&
+           PL_parser->bufptr == PL_parser->bufend) {
+       old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
+       linestart_pos = 0;
+       if (PL_parser->last_uni != PL_parser->bufend)
+           PL_parser->last_uni = NULL;
+       if (PL_parser->last_lop != PL_parser->bufend)
+           PL_parser->last_lop = NULL;
+       last_uni_pos = last_lop_pos = 0;
+       *buf = 0;
+       SvCUR(linestr) = 0;
+    } else {
+       old_bufend_pos = PL_parser->bufend - buf;
+       bufptr_pos = PL_parser->bufptr - buf;
+       oldbufptr_pos = PL_parser->oldbufptr - buf;
+       oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+       linestart_pos = PL_parser->linestart - buf;
+       last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+       last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+    }
+    if (flags & LEX_FAKE_EOF) {
+       goto eof;
+    } else if (!PL_parser->rsfp) {
+       got_some = 0;
+    } else if (filter_gets(linestr, old_bufend_pos)) {
+       got_some = 1;
+    } else {
+       eof:
+       /* End of real input.  Close filehandle (unless it was STDIN),
+        * then add implicit termination.
+        */
+       if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
+           PerlIO_clearerr(PL_parser->rsfp);
+       else if (PL_parser->rsfp)
+           (void)PerlIO_close(PL_parser->rsfp);
+       PL_parser->rsfp = NULL;
+       PL_doextract = FALSE;
+#ifdef PERL_MAD
+       if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
+           PL_faketokens = 1;
+#endif
+       if (!PL_in_eval && PL_minus_p) {
+           sv_catpvs(linestr,
+               /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
+           PL_minus_n = PL_minus_p = 0;
+       } else if (!PL_in_eval && PL_minus_n) {
+           sv_catpvs(linestr, /*{*/";}");
+           PL_minus_n = 0;
+       } else
+           sv_catpvs(linestr, ";");
+       got_some = 1;
+    }
+    buf = SvPVX(linestr);
+    new_bufend_pos = SvCUR(linestr);
+    PL_parser->bufend = buf + new_bufend_pos;
+    PL_parser->bufptr = buf + bufptr_pos;
+    PL_parser->oldbufptr = buf + oldbufptr_pos;
+    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+    PL_parser->linestart = buf + linestart_pos;
+    if (PL_parser->last_uni)
+       PL_parser->last_uni = buf + last_uni_pos;
+    if (PL_parser->last_lop)
+       PL_parser->last_lop = buf + last_lop_pos;
+    if (got_some && (PERLDB_LINE || PERLDB_SAVESRC) &&
+           PL_curstash != PL_debstash) {
+       /* debugger active and we're not compiling the debugger code,
+        * so store the line into the debugger's array of lines
+        */
+       update_debugger_info(NULL, buf+old_bufend_pos,
+           new_bufend_pos-old_bufend_pos);
+    }
+    return got_some;
+}
+
+/*
+=for apidoc Amx|I32|lex_peek_unichar|U32 flags
+
+Looks ahead one (Unicode) character in the text currently being lexed.
+Returns the codepoint (unsigned integer value) of the next character,
+or -1 if lexing has reached the end of the input text.  To consume the
+peeked character, use L</lex_read_unichar>.
+
+If the next character is in (or extends into) the next chunk of input
+text, the next chunk will be read in.  Normally the current chunk will be
+discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
+then the current chunk will not be discarded.
+
+If the input is being interpreted as UTF-8 and a UTF-8 encoding error
+is encountered, an exception is generated.
+
+=cut
+*/
+
+I32
+Perl_lex_peek_unichar(pTHX_ U32 flags)
+{
+    char *s, *bufend;
+    if (flags & ~(LEX_KEEP_PREVIOUS))
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
+    s = PL_parser->bufptr;
+    bufend = PL_parser->bufend;
+    if (UTF) {
+       U8 head;
+       I32 unichar;
+       STRLEN len, retlen;
+       if (s == bufend) {
+           if (!lex_next_chunk(flags))
+               return -1;
+           s = PL_parser->bufptr;
+           bufend = PL_parser->bufend;
+       }
+       head = (U8)*s;
+       if (!(head & 0x80))
+           return head;
+       if (head & 0x40) {
+           len = PL_utf8skip[head];
+           while ((STRLEN)(bufend-s) < len) {
+               if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
+                   break;
+               s = PL_parser->bufptr;
+               bufend = PL_parser->bufend;
+           }
+       }
+       unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+       if (retlen == (STRLEN)-1) {
+           /* malformed UTF-8 */
+           ENTER;
+           SAVESPTR(PL_warnhook);
+           PL_warnhook = PERL_WARNHOOK_FATAL;
+           utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+           LEAVE;
+       }
+       return unichar;
+    } else {
+       if (s == bufend) {
+           if (!lex_next_chunk(flags))
+               return -1;
+           s = PL_parser->bufptr;
+       }
+       return (U8)*s;
+    }
+}
+
+/*
+=for apidoc Amx|I32|lex_read_unichar|U32 flags
+
+Reads the next (Unicode) character in the text currently being lexed.
+Returns the codepoint (unsigned integer value) of the character read,
+and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
+if lexing has reached the end of the input text.  To non-destructively
+examine the next character, use L</lex_peek_unichar> instead.
+
+If the next character is in (or extends into) the next chunk of input
+text, the next chunk will be read in.  Normally the current chunk will be
+discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
+then the current chunk will not be discarded.
+
+If the input is being interpreted as UTF-8 and a UTF-8 encoding error
+is encountered, an exception is generated.
+
+=cut
+*/
+
+I32
+Perl_lex_read_unichar(pTHX_ U32 flags)
+{
+    I32 c;
+    if (flags & ~(LEX_KEEP_PREVIOUS))
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
+    c = lex_peek_unichar(flags);
+    if (c != -1) {
+       if (c == '\n')
+           CopLINE_inc(PL_curcop);
+       PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+    }
+    return c;
+}
+
+/*
+=for apidoc Amx|void|lex_read_space|U32 flags
+
+Reads optional spaces, in Perl style, in the text currently being
+lexed.  The spaces may include ordinary whitespace characters and
+Perl-style comments.  C<#line> directives are processed if encountered.
+L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
+at a non-space character (or the end of the input text).
+
+If spaces extend into the next chunk of input text, the next chunk will
+be read in.  Normally the current chunk will be discarded at the same
+time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
+chunk will not be discarded.
+
+=cut
+*/
+
+void
+Perl_lex_read_space(pTHX_ U32 flags)
+{
+    char *s, *bufend;
+    bool need_incline = 0;
+    if (flags & ~(LEX_KEEP_PREVIOUS))
+       Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
+#ifdef PERL_MAD
+    if (PL_skipwhite) {
+       sv_free(PL_skipwhite);
+       PL_skipwhite = NULL;
+    }
+    if (PL_madskills)
+       PL_skipwhite = newSVpvs("");
+#endif /* PERL_MAD */
+    s = PL_parser->bufptr;
+    bufend = PL_parser->bufend;
+    while (1) {
+       char c = *s;
+       if (c == '#') {
+           do {
+               c = *++s;
+           } while (!(c == '\n' || (c == 0 && s == bufend)));
+       } else if (c == '\n') {
+           s++;
+           PL_parser->linestart = s;
+           if (s == bufend)
+               need_incline = 1;
+           else
+               incline(s);
+       } else if (isSPACE(c)) {
+           s++;
+       } else if (c == 0 && s == bufend) {
+           bool got_more;
+#ifdef PERL_MAD
+           if (PL_madskills)
+               sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
+#endif /* PERL_MAD */
+           PL_parser->bufptr = s;
+           CopLINE_inc(PL_curcop);
+           got_more = lex_next_chunk(flags);
+           CopLINE_dec(PL_curcop);
+           s = PL_parser->bufptr;
+           bufend = PL_parser->bufend;
+           if (!got_more)
+               break;
+           if (need_incline && PL_parser->rsfp) {
+               incline(s);
+               need_incline = 0;
+           }
+       } else {
+           break;
+       }
+    }
+#ifdef PERL_MAD
+    if (PL_madskills)
+       sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
+#endif /* PERL_MAD */
+    PL_parser->bufptr = s;
+}
+
+/*
  * S_incline
  * This subroutine has nothing to do with tilting, whether at windmills
  * or pinball tables.  Its name is short for "increment line".  It
@@ -994,177 +1700,44 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
 STATIC char *
 S_skipspace(pTHX_ register char *s)
 {
-    dVAR;
 #ifdef PERL_MAD
-    int curoff;
-    int startoff = s - SvPVX(PL_linestr);
-
+    char *start = s;
+#endif /* PERL_MAD */
     PERL_ARGS_ASSERT_SKIPSPACE;
-
+#ifdef PERL_MAD
     if (PL_skipwhite) {
        sv_free(PL_skipwhite);
-       PL_skipwhite = 0;
+       PL_skipwhite = NULL;
     }
-#endif
-    PERL_ARGS_ASSERT_SKIPSPACE;
-
+#endif /* PERL_MAD */
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
        while (s < PL_bufend && SPACE_OR_TAB(*s))
            s++;
-#ifdef PERL_MAD
-       goto done;
-#else
-       return s;
-#endif
-    }
-    for (;;) {
-       STRLEN prevlen;
-       SSize_t oldprevlen, oldoldprevlen;
-       SSize_t oldloplen = 0, oldunilen = 0;
-       while (s < PL_bufend && isSPACE(*s)) {
-           if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
-               incline(s);
-       }
-
-       /* comment */
-       if (s < PL_bufend && *s == '#') {
-           while (s < PL_bufend && *s != '\n')
-               s++;
-           if (s < PL_bufend) {
+    } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
+       while (isSPACE(*s) && *s != '\n')
+           s++;
+       if (*s == '#') {
+           do {
                s++;
-               if (PL_in_eval && !PL_rsfp) {
-                   incline(s);
-                   continue;
-               }
-           }
+           } while (s != PL_bufend && *s != '\n');
        }
-
-       /* only continue to recharge the buffer if we're at the end
-        * of the buffer, we're not reading from a source filter, and
-        * we're in normal lexing mode
-        */
-       if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
-               PL_lex_state == LEX_FORMLINE)
-#ifdef PERL_MAD
-           goto done;
-#else
-           return s;
-#endif
-
-       /* try to recharge the buffer */
-#ifdef PERL_MAD
-       curoff = s - SvPVX(PL_linestr);
-#endif
-
-       if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr))))
-           == NULL)
-       {
-#ifdef PERL_MAD
-           if (PL_madskills && curoff != startoff) {
-               if (!PL_skipwhite)
-                   PL_skipwhite = newSVpvs("");
-               sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
-                                       curoff - startoff);
-           }
-
-           /* mustn't throw out old stuff yet if madpropping */
-           SvCUR(PL_linestr) = curoff;
-           s = SvPVX(PL_linestr) + curoff;
-           *s = 0;
-           if (curoff && s[-1] == '\n')
-               s[-1] = ' ';
-#endif
-
-           /* end of file.  Add on the -p or -n magic */
-           /* XXX these shouldn't really be added here, can't set PL_faketokens */
-           if (PL_minus_p) {
-#ifdef PERL_MAD
-               sv_catpvs(PL_linestr,
-                        ";}continue{print or die qq(-p destination: $!\\n);}");
-#else
-               sv_setpvs(PL_linestr,
-                        ";}continue{print or die qq(-p destination: $!\\n);}");
-#endif
-               PL_minus_n = PL_minus_p = 0;
-           }
-           else if (PL_minus_n) {
-#ifdef PERL_MAD
-               sv_catpvs(PL_linestr, ";}");
-#else
-               sv_setpvs(PL_linestr, ";}");
-#endif
-               PL_minus_n = 0;
-           }
-           else
-#ifdef PERL_MAD
-               sv_catpvs(PL_linestr,";");
-#else
-               sv_setpvs(PL_linestr,";");
-#endif
-
-           /* reset variables for next time we lex */
-           PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
-               = SvPVX(PL_linestr)
-#ifdef PERL_MAD
-               + curoff
-#endif
-               ;
-           PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-           PL_last_lop = PL_last_uni = NULL;
-
-           /* Close the filehandle.  Could be from
-            * STDIN, or a regular file.  If we were reading code from
-            * STDIN (because the commandline held no -e or filename)
-            * then we don't close it, we reset it so the code can
-            * read from STDIN too.
-            */
-
-           if ((PerlIO*)PL_rsfp == PerlIO_stdin())
-               PerlIO_clearerr(PL_rsfp);
-           else
-               (void)PerlIO_close(PL_rsfp);
-           PL_rsfp = NULL;
-           return s;
-       }
-
-       /* not at end of file, so we only read another line */
-       /* make corresponding updates to old pointers, for yyerror() */
-       oldprevlen = PL_oldbufptr - PL_bufend;
-       oldoldprevlen = PL_oldoldbufptr - PL_bufend;
-       if (PL_last_uni)
-           oldunilen = PL_last_uni - PL_bufend;
-       if (PL_last_lop)
-           oldloplen = PL_last_lop - PL_bufend;
-       PL_linestart = PL_bufptr = s + prevlen;
-       PL_bufend = s + SvCUR(PL_linestr);
+       if (*s == '\n')
+           s++;
+    } else {
+       STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
+       PL_bufptr = s;
+       lex_read_space(LEX_KEEP_PREVIOUS);
        s = PL_bufptr;
-       PL_oldbufptr = s + oldprevlen;
-       PL_oldoldbufptr = s + oldoldprevlen;
-       if (PL_last_uni)
-           PL_last_uni = s + oldunilen;
-       if (PL_last_lop)
-           PL_last_lop = s + oldloplen;
-       incline(s);
-
-       /* debugger active and we're not compiling the debugger code,
-        * so store the line into the debugger's array of lines
-        */
-       if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
-           update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
+       PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
+       if (PL_linestart > PL_bufptr)
+           PL_bufptr = PL_linestart;
+       return s;
     }
-
 #ifdef PERL_MAD
-  done:
-    if (PL_madskills) {
-       if (!PL_skipwhite)
-           PL_skipwhite = newSVpvs("");
-       curoff = s - SvPVX(PL_linestr);
-       if (curoff - startoff)
-           sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
-                               curoff - startoff);
-    }
+    if (PL_madskills)
+       PL_skipwhite = newSVpvn(start, s-start);
+#endif /* PERL_MAD */
     return s;
-#endif
 }
 
 /*
@@ -3702,7 +4275,7 @@ Perl_yylex(pTHX)
                sv_catpvs(PL_linestr,
                          "use feature ':5." STRINGIFY(PERL_VERSION) "';");
            if (PL_minus_n || PL_minus_p) {
-               sv_catpvs(PL_linestr, "LINE: while (<>) {");
+               sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
                if (PL_minus_l)
                    sv_catpvs(PL_linestr,"chomp;");
                if (PL_minus_a) {
@@ -3741,43 +4314,25 @@ Perl_yylex(pTHX)
            goto retry;
        }
        do {
+           U32 fake_eof = 0;
            bof = PL_rsfp ? TRUE : FALSE;
-           if ((s = filter_gets(PL_linestr, 0)) == NULL) {
+           if (0) {
              fake_eof:
+               fake_eof = LEX_FAKE_EOF;
+           }
+           PL_bufptr = PL_bufend;
+           if (!lex_next_chunk(fake_eof)) {
+               s = PL_bufptr;
+               TOKEN(';');     /* not infinite loop because rsfp is NULL now */
+           }
 #ifdef PERL_MAD
+           if (!PL_rsfp)
                PL_realtokenstart = -1;
 #endif
-               if (PL_rsfp) {
-                   if ((PerlIO *)PL_rsfp == PerlIO_stdin())
-                       PerlIO_clearerr(PL_rsfp);
-                   else
-                       (void)PerlIO_close(PL_rsfp);
-                   PL_rsfp = NULL;
-                   PL_doextract = FALSE;
-               }
-               if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
-#ifdef PERL_MAD
-                   if (PL_madskills)
-                       PL_faketokens = 1;
-#endif
-                   if (PL_minus_p)
-                       sv_setpvs(PL_linestr, ";}continue{print;}");
-                   else
-                       sv_setpvs(PL_linestr, ";}");
-                   PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
-                   PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-                   PL_last_lop = PL_last_uni = NULL;
-                   PL_minus_n = PL_minus_p = 0;
-                   goto retry;
-               }
-               PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
-               PL_last_lop = PL_last_uni = NULL;
-               sv_setpvs(PL_linestr,"");
-               TOKEN(';');     /* not infinite loop because rsfp is NULL now */
-           }
+           s = PL_bufptr;
            /* If it looks like the start of a BOM or raw UTF-16,
             * check if it in fact is. */
-           else if (bof &&
+           if (bof &&
                     (*s == 0 ||
                      *(U8*)s == 0xEF ||
                      *(U8*)s >= 0xFE ||
@@ -11472,12 +12027,12 @@ S_scan_heredoc(pTHX_ register char *s)
                PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
        }
 #endif
-       if (!outer ||
-        !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
-          = filter_gets(PL_linestr, 0))) {
+       PL_bufptr = s;
+       if (!outer || !lex_next_chunk(0)) {
            CopLINE_set(PL_curcop, (line_t)PL_multi_start);
            missingterm(PL_tokenbuf);
        }
+       s = PL_bufptr;
 #ifdef PERL_MAD
        stuffstart = s - SvPVX(PL_linestr);
 #endif
@@ -11985,26 +12540,17 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
                PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
        }
 #endif
-       if (!PL_rsfp ||
-        !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
-          = filter_gets(PL_linestr, 0))) {
+       CopLINE_inc(PL_curcop);
+       PL_bufptr = PL_bufend;
+       if (!lex_next_chunk(0)) {
            sv_free(sv);
            CopLINE_set(PL_curcop, (line_t)PL_multi_start);
            return NULL;
        }
+       s = PL_bufptr;
 #ifdef PERL_MAD
        stuffstart = 0;
 #endif
-       /* we read a line, so increment our line counter */
-       CopLINE_inc(PL_curcop);
-
-       /* update debugger info */
-       if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
-           update_debugger_info(PL_linestr, NULL, 0);
-
-       /* having changed the buffer, we must update PL_bufend */
-       PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-       PL_last_lop = PL_last_uni = NULL;
     }
 
     /* at this point, we have successfully read the delimited string */
@@ -12530,6 +13076,7 @@ S_scan_formline(pTHX_ register char *s)
        }
        s = (char*)eol;
        if (PL_rsfp) {
+           bool got_some;
 #ifdef PERL_MAD
            if (PL_madskills) {
                if (PL_thistoken)
@@ -12538,18 +13085,16 @@ S_scan_formline(pTHX_ register char *s)
                    PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
            }
 #endif
-           s = filter_gets(PL_linestr, 0);
+           PL_bufptr = PL_bufend;
+           CopLINE_inc(PL_curcop);
+           got_some = lex_next_chunk(0);
+           CopLINE_dec(PL_curcop);
+           s = PL_bufptr;
 #ifdef PERL_MAD
-           tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
-#else
-           PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
+           tokenstart = PL_bufptr;
 #endif
-           PL_bufend = PL_bufptr + SvCUR(PL_linestr);
-           PL_last_lop = PL_last_uni = NULL;
-           if (!s) {
-               s = PL_bufptr;
+           if (!got_some)
                break;
-           }
        }
        incline(s);
     }
MANIFEST		patch \| blob \| blame \| history
embed.fnc		patch \| blob \| blame \| history
embed.h		patch \| blob \| blame \| history
ext/XS-APItest-KeywordRPN/KeywordRPN.pm		patch \| blob \| blame \| history
ext/XS-APItest-KeywordRPN/KeywordRPN.xs		patch \| blob \| blame \| history
ext/XS-APItest-KeywordRPN/t/keyword_plugin.t		patch \| blob \| blame \| history
ext/XS-APItest-KeywordRPN/t/multiline.t	[new file with mode: 0644]	patch \| blob
global.sym		patch \| blob \| blame \| history
parser.h		patch \| blob \| blame \| history
perlvars.h		patch \| blob \| blame \| history
pod/perldiag.pod		patch \| blob \| blame \| history
proto.h		patch \| blob \| blame \| history
toke.c		patch \| blob \| blame \| history