#endif
static char * filter_gets _((SV *sv, PerlIO *fp, STRLEN append));
static void restore_rsfp _((void *f));
+static void restore_expect _((void *e));
+static void restore_lex_expect _((void *e));
static char ident_too_long[] = "Identifier too long";
#define OLDLOP(f) return(yylval.ival=f,expect = XTERM,bufptr = s,(int)LSTOP)
static int
-ao(toketype)
-int toketype;
+ao(int toketype)
{
if (*bufptr == '=') {
bufptr++;
}
static void
-no_op(what, s)
-char *what;
-char *s;
+no_op(char *what, char *s)
{
char *oldbp = bufptr;
bool is_first = (oldbufptr == linestart);
}
static void
-missingterm(s)
-char *s;
+missingterm(char *s)
{
char tmpbuf[3];
char q;
if (s) {
char *nl = strrchr(s,'\n');
- if (nl)
+ if (nl)
*nl = '\0';
}
else if (multi_close < 32 || multi_close == 127) {
}
void
-deprecate(s)
-char *s;
+deprecate(char *s)
{
if (dowarn)
warn("Use of %s is deprecated", s);
}
static void
-depcom()
+depcom(void)
{
deprecate("comma-less variable list");
}
+#ifdef WIN32
+
+static I32
+win32_textfilter(int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count > 0 && !maxlen)
+ win32_strip_return(sv);
+ return count;
+}
+#endif
+
+
void
-lex_start(line)
-SV *line;
+lex_start(SV *line)
{
dTHR;
char *s;
SAVEPPTR(lex_brackstack);
SAVEPPTR(lex_casestack);
SAVEDESTRUCTOR(restore_rsfp, rsfp);
+ SAVESPTR(lex_stuff);
+ SAVEI32(lex_defer);
+ SAVESPTR(lex_repl);
+ SAVEDESTRUCTOR(restore_expect, tokenbuf + expect); /* encode as pointer */
+ SAVEDESTRUCTOR(restore_lex_expect, tokenbuf + expect);
lex_state = LEX_NORMAL;
lex_defer = 0;
*lex_casestack = '\0';
lex_dojoin = 0;
lex_starts = 0;
- if (lex_stuff)
- SvREFCNT_dec(lex_stuff);
lex_stuff = Nullsv;
- if (lex_repl)
- SvREFCNT_dec(lex_repl);
lex_repl = Nullsv;
lex_inpat = 0;
lex_inwhat = 0;
}
void
-lex_end()
+lex_end(void)
{
doextract = FALSE;
}
static void
-restore_rsfp(f)
-void *f;
+restore_rsfp(void *f)
{
PerlIO *fp = (PerlIO*)f;
}
static void
-incline(s)
-char *s;
+restore_expect(e)
+void *e;
+{
+ /* a safe way to store a small integer in a pointer */
+ expect = (expectation)((char *)e - tokenbuf);
+}
+
+static void
+restore_lex_expect(e)
+void *e;
+{
+ /* a safe way to store a small integer in a pointer */
+ lex_expect = (expectation)((char *)e - tokenbuf);
+}
+
+static void
+incline(char *s)
{
dTHR;
char *t;
}
static char *
-skipspace(s)
-register char *s;
+skipspace(register char *s)
{
dTHR;
if (lex_formbrack && lex_brackets <= lex_formbrack) {
oldoldbufptr = oldbufptr = bufptr = s = linestart = SvPVX(linestr);
bufend = SvPVX(linestr) + SvCUR(linestr);
if (preprocess && !in_eval)
- (void)my_pclose(rsfp);
+ (void)PerlProc_pclose(rsfp);
else if ((PerlIO*)rsfp == PerlIO_stdin())
PerlIO_clearerr(rsfp);
else
}
static void
-check_uni() {
+check_uni(void) {
char *s;
char ch;
char *t;
#define UNI(f) return uni(f,s)
static int
-uni(f,s)
-I32 f;
-char *s;
+uni(I32 f, char *s)
{
yylval.ival = f;
expect = XTERM;
#define LOP(f,x) return lop(f,x,s)
static I32
-lop
-#ifdef CAN_PROTOTYPE
- (I32 f, expectation x, char *s)
-#else
- (f,x,s)
-I32 f;
-expectation x;
-char *s;
-#endif /* CAN_PROTOTYPE */
+lop(I32 f, expectation x, char *s)
{
dTHR;
yylval.ival = f;
}
static void
-force_next(type)
-I32 type;
+force_next(I32 type)
{
nexttype[nexttoke] = type;
nexttoke++;
}
static char *
-force_word(start,token,check_keyword,allow_pack,allow_tick)
-register char *start;
-int token;
-int check_keyword;
-int allow_pack;
-int allow_tick;
+force_word(register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
{
register char *s;
STRLEN len;
s = start;
if (isIDFIRST(*s) ||
(allow_pack && *s == ':') ||
- (allow_tick && *s == '\'') )
+ (allow_initial_tick && *s == '\'') )
{
s = scan_word(s, tokenbuf, sizeof tokenbuf, allow_pack, &len);
if (check_keyword && keyword(tokenbuf, len))
}
static void
-force_ident(s, kind)
-register char *s;
-int kind;
+force_ident(register char *s, int kind)
{
if (s && *s) {
OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
/* XXX see note in pp_entereval() for why we forgo typo
warnings if the symbol must be introduced in an eval.
GSAR 96-10-12 */
- gv_fetchpv(s, in_eval ? GV_ADDMULTI : TRUE,
+ gv_fetchpv(s, in_eval ? (GV_ADDMULTI | 8) : TRUE,
kind == '$' ? SVt_PV :
kind == '@' ? SVt_PVAV :
kind == '%' ? SVt_PVHV :
}
static char *
-force_version(s)
-char *s;
+force_version(char *s)
{
OP *version = Nullop;
}
static SV *
-q(sv)
-SV *sv;
+q(SV *sv)
{
register char *s;
register char *send;
}
static I32
-sublex_start()
+sublex_start(void)
{
register I32 op_type = yylval.ival;
}
static I32
-sublex_push()
+sublex_push(void)
{
dTHR;
- push_scope();
+ ENTER;
lex_state = sublex_info.super_state;
SAVEI32(lex_dojoin);
}
static I32
-sublex_done()
+sublex_done(void)
{
if (!lex_starts++) {
expect = XOPERATOR;
return ',';
}
else {
- pop_scope();
+ LEAVE;
bufend = SvPVX(linestr);
bufend += SvCUR(linestr);
expect = XOPERATOR;
}
}
+/*
+ scan_const
+
+ Extracts a pattern, double-quoted string, or transliteration. This
+ is terrifying code.
+
+ It looks at lex_inwhat and lex_inpat to find out whether it's
+ processing a pattern (lex_inpat is true), a transliteration
+ (lex_inwhat & OP_TRANS is true), or a double-quoted string.
+
+ In patterns:
+ backslashes:
+ double-quoted style: \r and \n
+ regexp special ones: \D \s
+ constants: \x3
+ backrefs: \1 (deprecated in substitution replacements)
+ case and quoting: \U \Q \E
+ stops on @ and $, but not for $ as tail anchor
+
+ In transliterations:
+ characters are VERY literal, except for - not at the start or end
+ of the string, which indicates a range. scan_const expands the
+ range to the full set of intermediate characters.
+
+ In double-quoted strings:
+ backslashes:
+ double-quoted style: \r and \n
+ constants: \x3
+ backrefs: \1 (deprecated)
+ case and quoting: \U \Q \E
+ stops on @ and $
+
+ scan_const does *not* construct ops to handle interpolated strings.
+ It stops processing as soon as it finds an embedded $ or @ variable
+ and leaves it to the caller to work out what's going on.
+
+ @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
+
+ $ in pattern could be $foo or could be tail anchor. Assumption:
+ it's a tail anchor if $ is the last thing in the string, or if it's
+ followed by one of ")| \n\t"
+
+ \1 (backreferences) are turned into $1
+
+ The structure of the code is
+ while (there's a character to process) {
+ handle transliteration ranges
+ skip regexp comments
+ skip # initiated comments in //x patterns
+ check for embedded @foo
+ check for embedded scalars
+ if (backslash) {
+ leave intact backslashes from leave (below)
+ deprecate \1 in strings and sub replacements
+ handle string-changing backslashes \l \U \Q \E, etc.
+ switch (what was escaped) {
+ handle - in a transliteration (becomes a literal -)
+ handle \132 octal characters
+ handle 0x15 hex characters
+ handle \cV (control V)
+ handle printf backslashes (\f, \r, \n, etc)
+ } (end switch)
+ } (end if backslash)
+ } (end while character to read)
+
+*/
+
static char *
-scan_const(start)
-char *start;
+scan_const(char *start)
{
- register char *send = bufend;
- SV *sv = NEWSV(93, send - start);
- register char *s = start;
- register char *d = SvPVX(sv);
- bool dorange = FALSE;
- I32 len;
+ register char *send = bufend; /* end of the constant */
+ SV *sv = NEWSV(93, send - start); /* sv for the constant */
+ register char *s = start; /* start of the constant */
+ register char *d = SvPVX(sv); /* destination for copies */
+ bool dorange = FALSE; /* are we in a translit range? */
+ I32 len; /* ? */
+
+ /*
+ leave is the set of acceptably-backslashed characters.
+
+ I do *not* understand why there's the double hook here.
+ */
char *leaveit =
lex_inpat
? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxc0123456789[{]} \t\n\r\f\v#"
: "";
while (s < send || dorange) {
+ /* get transliterations out of the way (they're most literal) */
if (lex_inwhat == OP_TRANS) {
+ /* expand a range A-Z to the full set of characters. AIE! */
if (dorange) {
- I32 i;
- I32 max;
- i = d - SvPVX(sv);
- SvGROW(sv, SvLEN(sv) + 256);
- d = SvPVX(sv) + i;
- d -= 2;
- max = (U8)d[1];
+ I32 i; /* current expanded character */
+ I32 max; /* last character in range */
+
+ i = d - SvPVX(sv); /* remember current offset */
+ SvGROW(sv, SvLEN(sv) + 256); /* expand the sv -- there'll never be more'n 256 chars in a range for it to grow by */
+ d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */
+ d -= 2; /* eat the first char and the - */
+
+ max = (U8)d[1]; /* last char in range */
+
for (i = (U8)*d; i <= max; i++)
*d++ = i;
+
+ /* mark the range as done, and continue */
dorange = FALSE;
continue;
}
+
+ /* range begins (ignore - as first or last char) */
else if (*s == '-' && s+1 < send && s != start) {
dorange = TRUE;
s++;
}
}
- else if (*s == '(' && lex_inpat && s[1] == '?' && s[2] == '#') {
- while (s < send && *s != ')')
- *d++ = *s++;
+
+ /* if we get here, we're not doing a transliteration */
+
+ /* skip for regexp comments /(?#comment)/ */
+ else if (*s == '(' && lex_inpat && s[1] == '?') {
+ if (s[2] == '#') {
+ while (s < send && *s != ')')
+ *d++ = *s++;
+ } else if (s[2] == '{') { /* This should march regcomp.c */
+ I32 count = 1;
+ char *regparse = s + 3;
+ char c;
+
+ while (count && (c = *regparse)) {
+ if (c == '\\' && regparse[1])
+ regparse++;
+ else if (c == '{')
+ count++;
+ else if (c == '}')
+ count--;
+ regparse++;
+ }
+ if (*regparse == ')')
+ regparse++;
+ else
+ yyerror("Sequence (?{...}) not terminated or not {}-balanced");
+ while (s < regparse && *s != ')')
+ *d++ = *s++;
+ }
}
+
+ /* likewise skip #-initiated comments in //x patterns */
else if (*s == '#' && lex_inpat &&
((PMOP*)lex_inpat)->op_pmflags & PMf_EXTENDED) {
while (s+1 < send && *s != '\n')
*d++ = *s++;
}
+
+ /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
else if (*s == '@' && s[1] && (isALNUM(s[1]) || strchr(":'{$", s[1])))
break;
+
+ /* check for embedded scalars. only stop if we're sure it's a
+ variable.
+ */
else if (*s == '$') {
if (!lex_inpat) /* not a regexp, so $ must be var */
break;
- if (s + 1 < send && !strchr(")| \n\t", s[1]))
+ if (s + 1 < send && !strchr("()| \n\t", s[1]))
break; /* in regexp, $ might be tail anchor */
}
+
+ /* backslashes */
if (*s == '\\' && s+1 < send) {
s++;
+
+ /* some backslashes we leave behind */
if (*s && strchr(leaveit, *s)) {
*d++ = '\\';
*d++ = *s++;
continue;
}
+
+ /* deprecate \1 in strings and substitution replacements */
if (lex_inwhat == OP_SUBST && !lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
{
*--s = '$';
break;
}
+
+ /* string-change backslash escapes */
if (lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
--s;
break;
}
+
+ /* if we get here, it's either a quoted -, or a digit */
switch (*s) {
+
+ /* quoted - in transliterations */
case '-':
if (lex_inwhat == OP_TRANS) {
*d++ = *s++;
continue;
}
/* FALL THROUGH */
+ /* default action is to copy the quoted character */
default:
*d++ = *s++;
continue;
+
+ /* \132 indicates an octal constant */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
*d++ = scan_oct(s, 3, &len);
s += len;
continue;
+
+ /* \x24 indicates a hex constant */
case 'x':
*d++ = scan_hex(++s, 2, &len);
s += len;
continue;
+
+ /* \c is a control character */
case 'c':
s++;
len = *s++;
*d++ = toCTRL(len);
continue;
+
+ /* printf-style backslashes, formfeeds, newlines, etc */
case 'b':
*d++ = '\b';
break;
case 'a':
*d++ = '\007';
break;
- }
+ } /* end switch */
+
s++;
continue;
- }
+ } /* end if (backslash) */
+
*d++ = *s++;
- }
+ } /* while loop to process each character */
+
+ /* terminate the string and set up the sv */
*d = '\0';
SvCUR_set(sv, d - SvPVX(sv));
SvPOK_on(sv);
+ /* shrink the sv if we allocated more than we used */
if (SvCUR(sv) + 5 < SvLEN(sv)) {
SvLEN_set(sv, SvCUR(sv) + 1);
Renew(SvPVX(sv), SvLEN(sv), char);
}
+
+ /* ??? */
if (s > bufptr)
yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
else
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
static int
-intuit_more(s)
-register char *s;
+intuit_more(register char *s)
{
if (lex_brackets)
return TRUE;
}
static int
-intuit_method(start,gv)
-char *start;
-GV *gv;
+intuit_method(char *start, GV *gv)
{
char *s = start + (*start == '$');
char tmpbuf[sizeof tokenbuf];
GV* indirgv;
if (gv) {
+ CV *cv;
if (GvIO(gv))
return 0;
- if (!GvCVu(gv))
+ if ((cv = GvCVu(gv))) {
+ char *proto = SvPVX(cv);
+ if (proto) {
+ if (*proto == ';')
+ proto++;
+ if (*proto == '*')
+ return 0;
+ }
+ } else
gv = 0;
}
s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
return *s == '(' ? FUNCMETH : METHOD;
}
if (!keyword(tmpbuf, len)) {
- indirgv = gv_fetchpv(tmpbuf,FALSE, SVt_PVCV);
+ if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
+ len -= 2;
+ tmpbuf[len] = '\0';
+ goto bare_package;
+ }
+ indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
if (indirgv && GvCVu(indirgv))
return 0;
/* filehandle or package name makes it a method */
s = skipspace(s);
if ((bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
return 0; /* no assumptions -- "=>" quotes bearword */
- nextval[nexttoke].opval =
- (OP*)newSVOP(OP_CONST, 0,
- newSVpv(tmpbuf,0));
- nextval[nexttoke].opval->op_private =
- OPpCONST_BARE;
+ bare_package:
+ nextval[nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
+ newSVpv(tmpbuf,0));
+ nextval[nexttoke].opval->op_private = OPpCONST_BARE;
expect = XTERM;
force_next(WORD);
bufptr = s;
}
static char*
-incl_perldb()
+incl_perldb(void)
{
if (perldb) {
- char *pdb = getenv("PERL5DB");
+ char *pdb = PerlEnv_getenv("PERL5DB");
if (pdb)
return pdb;
+ SETERRNO(0,SS$_NORMAL);
return "BEGIN { require 'perl5db.pl' }";
}
return "";
static int filter_debug = 0;
SV *
-filter_add(funcp, datasv)
- filter_t funcp;
- SV *datasv;
+filter_add(filter_t funcp, SV *datasv)
{
if (!funcp){ /* temporary handy debugging hack to be deleted */
filter_debug = atoi((char*)datasv);
if (!rsfp_filters)
rsfp_filters = newAV();
if (!datasv)
- datasv = newSV(0);
+ datasv = NEWSV(255,0);
if (!SvUPGRADE(datasv, SVt_PVIO))
die("Can't upgrade filter_add data to SVt_PVIO");
IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
/* Delete most recently added instance of this filter function. */
void
-filter_del(funcp)
- filter_t funcp;
+filter_del(filter_t funcp)
{
if (filter_debug)
warn("filter_del func %p", funcp);
- if (!rsfp_filters || AvFILL(rsfp_filters)<0)
+ if (!rsfp_filters || AvFILLp(rsfp_filters)<0)
return;
/* if filter is on top of stack (usual case) just pop it off */
- if (IoDIRP(FILTER_DATA(0)) == (void*)funcp){
- /* sv_free(av_pop(rsfp_filters)); */
- sv_free(av_shift(rsfp_filters));
+ if (IoDIRP(FILTER_DATA(AvFILLp(rsfp_filters))) == (void*)funcp){
+ sv_free(av_pop(rsfp_filters));
return;
}
/* Invoke the n'th filter function for the current rsfp. */
I32
-filter_read(idx, buf_sv, maxlen)
- int idx;
- SV *buf_sv;
- int maxlen; /* 0 = read one text line */
+filter_read(int idx, SV *buf_sv, int maxlen)
+
+
+ /* 0 = read one text line */
{
filter_t funcp;
SV *datasv = NULL;
if (!rsfp_filters)
return -1;
- if (idx > AvFILL(rsfp_filters)){ /* Any more filters? */
+ if (idx > AvFILLp(rsfp_filters)){ /* Any more filters? */
/* Provide a default input filter to make life easy. */
/* Note that we append to the line. This is handy. */
if (filter_debug)
}
static char *
-filter_gets(sv,fp, append)
-register SV *sv;
-register PerlIO *fp;
-STRLEN append;
+filter_gets(register SV *sv, register PerlIO *fp, STRLEN append)
{
+#ifdef WIN32FILTER
+ if (!rsfp_filters) {
+ filter_add(win32_textfilter,NULL);
+ }
+#endif
if (rsfp_filters) {
if (!append)
}
else
return (sv_gets(sv, fp, append));
-
}
EXT int yychar; /* last token */
+/*
+ yylex
+
+ Works out what to call the token just pulled out of the input
+ stream. The yacc parser takes care of taking the ops we return and
+ stitching them into a tree.
+
+ Returns:
+ PRIVATEREF
+
+ Structure:
+ if read an identifier
+ if we're in a my declaration
+ croak if they tried to say my($foo::bar)
+ build the ops for a my() declaration
+ if it's an access to a my() variable
+ are we in a sort block?
+ croak if my($a); $a <=> $b
+ build ops for access to a my() variable
+ if in a dq string, and they've said @foo and we can't find @foo
+ croak
+ build ops for a bareword
+ if we already built the token before, use it.
+*/
+
int
-yylex()
+yylex(void)
{
dTHR;
register char *s;
register char *d;
register I32 tmp;
STRLEN len;
+ GV *gv = Nullgv;
+ GV **gvp = 0;
+ /* check if there's an identifier for us to look at */
if (pending_ident) {
+ /* pit holds the identifier we read and pending_ident is reset */
char pit = pending_ident;
pending_ident = 0;
+ /* if we're in a my(), we can't allow dynamics here.
+ $foo'bar has already been turned into $foo::bar, so
+ just check for colons.
+
+ if it's a legal name, the OP is a PADANY.
+ */
if (in_my) {
if (strchr(tokenbuf,':'))
croak(no_myglob,tokenbuf);
+
yylval.opval = newOP(OP_PADANY, 0);
yylval.opval->op_targ = pad_allocmy(tokenbuf);
return PRIVATEREF;
}
+ /*
+ build the ops for accesses to a my() variable.
+
+ Deny my($a) or my($b) in a sort block, *if* $a or $b is
+ then used in a comparison. This catches most, but not
+ all cases. For instance, it catches
+ sort { my($a); $a <=> $b }
+ but not
+ sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
+ (although why you'd do that is anyone's guess).
+ */
+
if (!strchr(tokenbuf,':')) {
#ifdef USE_THREADS
- /* Check for single character per-thread magicals */
+ /* Check for single character per-thread SVs */
if (tokenbuf[0] == '$' && tokenbuf[2] == '\0'
- && !isALPHA(tokenbuf[1]) /* Rule out obvious non-magicals */
- && (tmp = find_thread_magical(&tokenbuf[1])) != NOT_IN_PAD)
+ && !isALPHA(tokenbuf[1]) /* Rule out obvious non-threadsvs */
+ && (tmp = find_threadsv(&tokenbuf[1])) != NOT_IN_PAD)
{
- yylval.opval = newOP(OP_SPECIFIC, 0);
+ yylval.opval = newOP(OP_THREADSV, 0);
yylval.opval->op_targ = tmp;
return PRIVATEREF;
}
#endif /* USE_THREADS */
if ((tmp = pad_findmy(tokenbuf)) != NOT_IN_PAD) {
+ /* if it's a sort block and they're naming $a or $b */
if (last_lop_op == OP_SORT &&
tokenbuf[0] == '$' &&
(tokenbuf[1] == 'a' || tokenbuf[1] == 'b')
}
}
- /* Force them to make up their mind on "@foo". */
+ /*
+ Whine if they've said @foo in a doublequoted string,
+ and @foo isn't a variable we can find in the symbol
+ table.
+ */
if (pit == '@' && lex_state != LEX_NORMAL && !lex_brackets) {
GV *gv = gv_fetchpv(tokenbuf+1, FALSE, SVt_PVAV);
if (!gv || ((tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
tokenbuf, tokenbuf));
}
+ /* build ops for a bareword */
yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf+1, 0));
yylval.opval->op_private = OPpCONST_ENTERED;
- gv_fetchpv(tokenbuf+1, in_eval ? GV_ADDMULTI : TRUE,
+ gv_fetchpv(tokenbuf+1, in_eval ? (GV_ADDMULTI | 8) : TRUE,
((tokenbuf[0] == '$') ? SVt_PV
: (tokenbuf[0] == '@') ? SVt_PVAV
: SVt_PVHV));
return WORD;
}
+ /* no identifier pending identification */
+
switch (lex_state) {
#ifdef COMMENTARY
case LEX_NORMAL: /* Some compilers will produce faster */
break;
#endif
+ /* when we're already built the next token, just pull it out the queue */
case LEX_KNOWNEXT:
nexttoke--;
yylval = nextval[nexttoke];
}
return(nexttype[nexttoke]);
+ /* interpolated case modifiers like \L \U, including \Q and \E.
+ when we get here, bufptr is at the \
+ */
case LEX_INTERPCASEMOD:
#ifdef DEBUGGING
if (bufptr != bufend && *bufptr != '\\')
croak("panic: INTERPCASEMOD");
#endif
- if (bufptr == bufend || bufptr[1] == 'E') {
+ /* handle \E or end of string */
+ if (bufptr == bufend || bufptr[1] == 'E') {
char oldmod;
+
+ /* if at a \E */
if (lex_casemods) {
oldmod = lex_casestack[--lex_casemods];
lex_casestack[lex_casemods] = '\0';
+
if (bufptr != bufend && strchr("LUQ", oldmod)) {
bufptr += 2;
lex_state = LEX_INTERPCONCAT;
nextval[nexttoke].ival = 0;
force_next(',');
#ifdef USE_THREADS
- nextval[nexttoke].opval = newOP(OP_SPECIFIC, 0);
- nextval[nexttoke].opval->op_targ = find_thread_magical("\"");
+ nextval[nexttoke].opval = newOP(OP_THREADSV, 0);
+ nextval[nexttoke].opval->op_targ = find_threadsv("\"");
force_next(PRIVATEREF);
#else
force_ident("\"", '$');
if (SvCUR(linestr))
sv_catpv(linestr,";");
if (preambleav){
- while(AvFILL(preambleav) >= 0) {
+ while(AvFILLp(preambleav) >= 0) {
SV *tmpsv = av_shift(preambleav);
sv_catsv(linestr, tmpsv);
sv_catpv(linestr, ";");
fake_eof:
if (rsfp) {
if (preprocess && !in_eval)
- (void)my_pclose(rsfp);
+ (void)PerlProc_pclose(rsfp);
else if ((PerlIO *)rsfp == PerlIO_stdin())
PerlIO_clearerr(rsfp);
else
}
goto retry;
case '\r':
+#ifndef WIN32CHEAT
warn("Illegal character \\%03o (carriage return)", '\r');
croak(
"(Maybe you didn't strip carriage returns after a network transfer?)\n");
+#endif
case ' ': case '\t': case '\f': case 013:
s++;
goto retry;
s++;
if (strnEQ(s,"=>",2)) {
- if (dowarn)
- warn("Ambiguous use of -%c => resolved to \"-%c\" =>",
- (int)tmp, (int)tmp);
s = force_word(bufptr,WORD,FALSE,FALSE,FALSE);
OPERATOR('-'); /* unary minus */
}
d++;
if (*d == '}') {
char minus = (tokenbuf[0] == '-');
- if (dowarn &&
- (keyword(tokenbuf + 1, len) ||
- (minus && len == 1 && isALPHA(tokenbuf[1])) ||
- perl_get_cv(tokenbuf + 1, FALSE) ))
- warn("Ambiguous use of {%s} resolved to {\"%s\"}",
- tokenbuf + !minus, tokenbuf + !minus);
s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
if (minus)
force_next('-');
else
lex_brackstack[lex_brackets++] = XOPERATOR;
s = skipspace(s);
- if (*s == '}')
+ if (*s == '}') {
+ if (expect == XSTATE) {
+ lex_brackstack[lex_brackets-1] = XSTATE;
+ break;
+ }
OPERATOR(HASHBRACK);
+ }
/* This hack serves to disambiguate a pair of curlies
* as being a block or an anon hash. Normally, expectation
* determines that, but in cases where we're not in a
case '/': /* may either be division or pattern */
case '?': /* may either be conditional or pattern */
if (expect != XOPERATOR) {
- check_uni();
+ /* Disable warning on "study /blah/" */
+ if (oldoldbufptr == last_uni
+ && (*last_uni != 's' || s - last_uni < 5
+ || memNE(last_uni, "study", 5) || isALNUM(last_uni[5])))
+ check_uni();
s = scan_pat(s);
TERM(sublex_start());
}
case 'z': case 'Z':
keylookup: {
- GV *gv = Nullgv;
- GV **gvp = 0;
+ gv = Nullgv;
+ gvp = 0;
bufptr = s;
s = scan_word(s, tokenbuf, sizeof tokenbuf, FALSE, &len);
/* Is this a word before a => operator? */
if (strnEQ(d,"=>",2)) {
CLINE;
- if (dowarn && (tmp || perl_get_cv(tokenbuf, FALSE)))
- warn("Ambiguous use of %s => resolved to \"%s\" =>",
- tokenbuf, tokenbuf);
yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf,0));
yylval.opval->op_private = OPpCONST_BARE;
TERM(WORD);
}
else if (gv && !gvp
&& -tmp==KEY_lock /* XXX generalizable kludge */
- && !gv_fetchpv("Thread::join",FALSE,SVt_PVCV))
+ && !hv_fetch(GvHVn(incgv), "Thread.pm", 9, FALSE))
{
tmp = 0; /* any sub overrides "weak" keyword */
}
/* Get the rest if it looks like a package qualifier */
if (*s == '\'' || *s == ':' && s[1] == ':') {
+ STRLEN morelen;
s = scan_word(s, tokenbuf + len, sizeof tokenbuf - len,
- TRUE, &len);
- if (!len)
+ TRUE, &morelen);
+ if (!morelen)
croak("Bad name after %s::", tokenbuf);
+ len += morelen;
}
if (expect == XOPERATOR) {
no_op("Bareword",s);
}
- /* Look for a subroutine with this name in current package. */
+ /* Look for a subroutine with this name in current package,
+ unless name is "Foo::", in which case Foo is a bearword
+ (and a package name). */
+
+ if (len > 2 &&
+ tokenbuf[len - 2] == ':' && tokenbuf[len - 1] == ':')
+ {
+ if (dowarn && ! gv_fetchpv(tokenbuf, FALSE, SVt_PVHV))
+ warn("Bareword \"%s\" refers to nonexistent package",
+ tokenbuf);
+ len -= 2;
+ tokenbuf[len] = '\0';
+ gv = Nullgv;
+ gvp = 0;
+ }
+ else {
+ len = 0;
+ if (!gv)
+ gv = gv_fetchpv(tokenbuf, FALSE, SVt_PVCV);
+ }
+
+ /* if we saw a global override before, get the right name */
if (gvp) {
sv = newSVpv("CORE::GLOBAL::",14);
}
else
sv = newSVpv(tokenbuf,0);
- if (!gv)
- gv = gv_fetchpv(tokenbuf,FALSE, SVt_PVCV);
/* Presume this is going to be a bareword of some sort. */
yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
yylval.opval->op_private = OPpCONST_BARE;
+ /* And if "Foo::", then that's what it certainly is. */
+
+ if (len)
+ goto safe_bareword;
+
/* See if it's the indirect object for a list operator. */
if (oldoldbufptr &&
(oldoldbufptr == last_lop || oldoldbufptr == last_uni) &&
/* NO SKIPSPACE BEFORE HERE! */
(expect == XREF ||
- (opargs[last_lop_op] >> OASHIFT & 7) == OA_FILEREF) )
+ ((opargs[last_lop_op] >> OASHIFT)& 7) == OA_FILEREF) )
{
bool immediate_paren = *s == '(';
warn(warn_reserved, tokenbuf);
}
}
+
+ safe_bareword:
if (lastchar && strchr("*%&", lastchar)) {
warn("Operator or semicolon missing before %c%s",
lastchar, tokenbuf);
case KEY_foreach:
yylval.ival = curcop->cop_line;
s = skipspace(s);
- if (isIDFIRST(*s)) {
+ if (expect == XSTATE && isIDFIRST(*s)) {
char *p = s;
if ((bufend - p) >= 3 &&
strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
if (*s == ';' || *s == ')') /* probably a close */
croak("sort is now a reserved word");
expect = XTERM;
- s = force_word(s,WORD,TRUE,TRUE,TRUE);
+ s = force_word(s,WORD,TRUE,TRUE,FALSE);
LOP(OP_SORT,XREF);
case KEY_split:
}
I32
-keyword(d, len)
-register char *d;
-I32 len;
+keyword(register char *d, I32 len)
{
switch (*d) {
case '_':
}
static void
-checkcomma(s,name,what)
-register char *s;
-char *name;
-char *what;
+checkcomma(register char *s, char *name, char *what)
{
char *w;
}
static char *
-scan_word(s, dest, destlen, allow_package, slp)
-register char *s;
-char *dest;
-STRLEN destlen;
-int allow_package;
-STRLEN *slp;
+scan_word(register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
register char *d = dest;
register char *e = d + destlen - 3; /* two-character token, ending NUL */
*d++ = ':';
s++;
}
- else if (*s == ':' && s[1] == ':' && allow_package && isIDFIRST(s[2])) {
+ else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
*d++ = *s++;
*d++ = *s++;
}
}
static char *
-scan_ident(s, send, dest, destlen, ck_uni)
-register char *s;
-register char *send;
-char *dest;
-STRLEN destlen;
-I32 ck_uni;
+scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
{
register char *d;
register char *e;
return s;
}
-void pmflag(pmfl,ch)
-U16* pmfl;
-int ch;
+void pmflag(U16 *pmfl, int ch)
{
if (ch == 'i')
*pmfl |= PMf_FOLD;
}
static char *
-scan_pat(start)
-char *start;
+scan_pat(char *start)
{
PMOP *pm;
char *s;
}
static char *
-scan_subst(start)
-char *start;
+scan_subst(char *start)
{
register char *s;
register PMOP *pm;
return s;
}
-void
-hoistmust(pm)
-register PMOP *pm;
-{
- dTHR;
- if (!pm->op_pmshort && pm->op_pmregexp->regstart &&
- (!pm->op_pmregexp->regmust || pm->op_pmregexp->reganch & ROPT_ANCH)
- ) {
- if (!(pm->op_pmregexp->reganch & ROPT_ANCH))
- pm->op_pmflags |= PMf_SCANFIRST;
- pm->op_pmshort = SvREFCNT_inc(pm->op_pmregexp->regstart);
- pm->op_pmslen = SvCUR(pm->op_pmshort);
- }
- else if (pm->op_pmregexp->regmust) {/* is there a better short-circuit? */
- if (pm->op_pmshort &&
- sv_eq(pm->op_pmshort,pm->op_pmregexp->regmust))
- {
- if (pm->op_pmflags & PMf_SCANFIRST) {
- SvREFCNT_dec(pm->op_pmshort);
- pm->op_pmshort = Nullsv;
- }
- else {
- SvREFCNT_dec(pm->op_pmregexp->regmust);
- pm->op_pmregexp->regmust = Nullsv;
- return;
- }
- }
- /* promote the better string */
- if ((!pm->op_pmshort &&
- !(pm->op_pmregexp->reganch & ROPT_ANCH_GPOS)) ||
- ((pm->op_pmflags & PMf_SCANFIRST) &&
- (SvCUR(pm->op_pmshort) < SvCUR(pm->op_pmregexp->regmust)))) {
- SvREFCNT_dec(pm->op_pmshort); /* ok if null */
- pm->op_pmshort = pm->op_pmregexp->regmust;
- pm->op_pmslen = SvCUR(pm->op_pmshort);
- pm->op_pmregexp->regmust = Nullsv;
- pm->op_pmflags |= PMf_SCANFIRST;
- }
- }
-}
-
static char *
-scan_trans(start)
-char *start;
+scan_trans(char *start)
{
register char* s;
OP *o;
short *tbl;
I32 squash;
- I32 delete;
+ I32 Delete;
I32 complement;
yylval.ival = OP_NULL;
if (lex_stuff)
SvREFCNT_dec(lex_stuff);
lex_stuff = Nullsv;
- croak("Translation pattern not terminated");
+ croak("Transliteration pattern not terminated");
}
if (s[-1] == multi_open)
s--;
if (lex_repl)
SvREFCNT_dec(lex_repl);
lex_repl = Nullsv;
- croak("Translation replacement not terminated");
+ croak("Transliteration replacement not terminated");
}
New(803,tbl,256,short);
o = newPVOP(OP_TRANS, 0, (char*)tbl);
- complement = delete = squash = 0;
+ complement = Delete = squash = 0;
while (*s == 'c' || *s == 'd' || *s == 's') {
if (*s == 'c')
complement = OPpTRANS_COMPLEMENT;
else if (*s == 'd')
- delete = OPpTRANS_DELETE;
+ Delete = OPpTRANS_DELETE;
else
squash = OPpTRANS_SQUASH;
s++;
}
- o->op_private = delete|squash|complement;
+ o->op_private = Delete|squash|complement;
lex_op = o;
yylval.ival = OP_TRANS;
}
static char *
-scan_heredoc(s)
-register char *s;
+scan_heredoc(register char *s)
{
dTHR;
SV *herewas;
register char *d;
register char *e;
char *peek;
- int outer = (rsfp && !lex_inwhat);
+ int outer = (rsfp && !(lex_inwhat == OP_SCALAR));
s += 2;
d = tokenbuf;
}
sv_setpvn(tmpstr,d+1,s-d);
s += len - 1;
+ curcop->cop_line++; /* the preceding stmt passes a newline */
+
sv_catpvn(herewas,s,bufend-s);
sv_setsv(linestr,herewas);
oldoldbufptr = oldbufptr = bufptr = s = linestart = SvPVX(linestr);
return s;
}
+/* scan_inputsymbol
+ takes: current position in input buffer
+ returns: new position in input buffer
+ side-effects: yylval and lex_op are set.
+
+ This code handles:
+
+ <> read from ARGV
+ <FH> read from filehandle
+ <pkg::FH> read from package qualified filehandle
+ <pkg'FH> read from package qualified filehandle
+ <$fh> read from filehandle in $fh
+ <*.h> filename glob
+
+*/
+
static char *
-scan_inputsymbol(start)
-char *start;
+scan_inputsymbol(char *start)
{
- register char *s = start;
+ register char *s = start; /* current position in buffer */
register char *d;
register char *e;
I32 len;
- d = tokenbuf;
- e = tokenbuf + sizeof tokenbuf;
- s = delimcpy(d, e, s + 1, bufend, '>', &len);
+ d = tokenbuf; /* start of temp holding space */
+ e = tokenbuf + sizeof tokenbuf; /* end of temp holding space */
+ s = delimcpy(d, e, s + 1, bufend, '>', &len); /* extract until > */
+
+ /* die if we didn't have space for the contents of the <>,
+ or if it didn't end
+ */
+
if (len >= sizeof tokenbuf)
croak("Excessively long <> operator");
if (s >= bufend)
croak("Unterminated <> operator");
+
s++;
+
+ /* check for <$fh>
+ Remember, only scalar variables are interpreted as filehandles by
+ this code. Anything more complex (e.g., <$fh{$num}>) will be
+ treated as a glob() call.
+ This code makes use of the fact that except for the $ at the front,
+ a scalar variable and a filehandle look the same.
+ */
if (*d == '$' && d[1]) d++;
+
+ /* allow <Pkg'VALUE> or <Pkg::VALUE> */
while (*d && (isALNUM(*d) || *d == '\'' || *d == ':'))
d++;
+
+ /* If we've tried to read what we allow filehandles to look like, and
+ there's still text left, then it must be a glob() and not a getline.
+ Use scan_str to pull out the stuff between the <> and treat it
+ as nothing more than a string.
+ */
+
if (d - tokenbuf != len) {
yylval.ival = OP_GLOB;
set_csh();
s = scan_str(start);
if (!s)
- croak("Glob not terminated");
+ croak("Glob not terminated");
return s;
}
else {
+ /* we're in a filehandle read situation */
d = tokenbuf;
+
+ /* turn <> into <ARGV> */
if (!len)
(void)strcpy(d,"ARGV");
+
+ /* if <$fh>, create the ops to turn the variable into a
+ filehandle
+ */
if (*d == '$') {
I32 tmp;
+
+ /* try to find it in the pad for this block, otherwise find
+ add symbol table ops
+ */
if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
OP *o = newOP(OP_PADSV, 0);
o->op_targ = tmp;
newUNOP(OP_RV2SV, 0,
newGVOP(OP_GV, 0, gv))));
}
+ /* we created the ops in lex_op, so make yylval.ival a null op */
yylval.ival = OP_NULL;
}
+
+ /* If it's none of the above, it must be a literal filehandle
+ (<Foo::BAR> or <FOO>) so build a simple readline OP */
else {
GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
yylval.ival = OP_NULL;
}
}
+
return s;
}
+
+/* scan_str
+ takes: start position in buffer
+ returns: position to continue reading from buffer
+ side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
+ updates the read buffer.
+
+ This subroutine pulls a string out of the input. It is called for:
+ q single quotes q(literal text)
+ ' single quotes 'literal text'
+ qq double quotes qq(interpolate $here please)
+ " double quotes "interpolate $here please"
+ qx backticks qx(/bin/ls -l)
+ ` backticks `/bin/ls -l`
+ qw quote words @EXPORT_OK = qw( func() $spam )
+ m// regexp match m/this/
+ s/// regexp substitute s/this/that/
+ tr/// string transliterate tr/this/that/
+ y/// string transliterate y/this/that/
+ ($*@) sub prototypes sub foo ($)
+ <> readline or globs <FOO>, <>, <$fh>, or <*.c>
+
+ In most of these cases (all but <>, patterns and transliterate)
+ yylex() calls scan_str(). m// makes yylex() call scan_pat() which
+ calls scan_str(). s/// makes yylex() call scan_subst() which calls
+ scan_str(). tr/// and y/// make yylex() call scan_trans() which
+ calls scan_str().
+
+ It skips whitespace before the string starts, and treats the first
+ character as the delimiter. If the delimiter is one of ([{< then
+ the corresponding "close" character )]}> is used as the closing
+ delimiter. It allows quoting of delimiters, and if the string has
+ balanced delimiters ([{<>}]) it allows nesting.
+
+ The lexer always reads these strings into lex_stuff, except in the
+ case of the operators which take *two* arguments (s/// and tr///)
+ when it checks to see if lex_stuff is full (presumably with the 1st
+ arg to s or tr) and if so puts the string into lex_repl.
+
+*/
+
static char *
-scan_str(start)
-char *start;
+scan_str(char *start)
{
dTHR;
- SV *sv;
- char *tmps;
- register char *s = start;
- register char term;
- register char *to;
- I32 brackets = 1;
-
+ SV *sv; /* scalar value: string */
+ char *tmps; /* temp string, used for delimiter matching */
+ register char *s = start; /* current position in the buffer */
+ register char term; /* terminating character */
+ register char *to; /* current position in the sv's data */
+ I32 brackets = 1; /* bracket nesting level */
+
+ /* skip space before the delimiter */
if (isSPACE(*s))
s = skipspace(s);
+
+ /* mark where we are, in case we need to report errors */
CLINE;
+
+ /* after skipping whitespace, the next character is the terminator */
term = *s;
+ /* mark where we are */
multi_start = curcop->cop_line;
multi_open = term;
+
+ /* find corresponding closing delimiter */
if (term && (tmps = strchr("([{< )]}> )]}>",term)))
term = tmps[5];
multi_close = term;
+ /* create a new SV to hold the contents. 87 is leak category, I'm
+ assuming. 80 is the SV's initial length. What a random number. */
sv = NEWSV(87,80);
sv_upgrade(sv, SVt_PVIV);
SvIVX(sv) = term;
(void)SvPOK_only(sv); /* validate pointer */
+
+ /* move past delimiter and try to read a complete string */
s++;
for (;;) {
+ /* extend sv if need be */
SvGROW(sv, SvCUR(sv) + (bufend - s) + 1);
+ /* set 'to' to the next character in the sv's string */
to = SvPVX(sv)+SvCUR(sv);
+
+ /* if open delimiter is the close delimiter read unbridle */
if (multi_open == multi_close) {
for (; s < bufend; s++,to++) {
+ /* embedded newlines increment the current line number */
if (*s == '\n' && !rsfp)
curcop->cop_line++;
+ /* handle quoted delimiters */
if (*s == '\\' && s+1 < bufend && term != '\\') {
if (s[1] == term)
s++;
+ /* any other quotes are simply copied straight through */
else
*to++ = *s++;
}
+ /* terminate when run out of buffer (the for() condition), or
+ have found the terminator */
else if (*s == term)
break;
*to = *s;
}
}
+
+ /* if the terminator isn't the same as the start character (e.g.,
+ matched brackets), we have to allow more in the quoting, and
+ be prepared for nested brackets.
+ */
else {
+ /* read until we run out of string, or we find the terminator */
for (; s < bufend; s++,to++) {
+ /* embedded newlines increment the line count */
if (*s == '\n' && !rsfp)
curcop->cop_line++;
+ /* backslashes can escape the open or closing characters */
if (*s == '\\' && s+1 < bufend) {
if ((s[1] == multi_open) || (s[1] == multi_close))
s++;
else
*to++ = *s++;
}
+ /* allow nested opens and closes */
else if (*s == multi_close && --brackets <= 0)
break;
else if (*s == multi_open)
*to = *s;
}
}
+ /* terminate the copied string and update the sv's end-of-string */
*to = '\0';
SvCUR_set(sv, to - SvPVX(sv));
- if (s < bufend) break; /* string ends on this line? */
+ /*
+ * this next chunk reads more into the buffer if we're not done yet
+ */
+
+ if (s < bufend) break; /* handle case where we are done yet :-) */
+ /* if we're out of file, or a read fails, bail and reset the current
+ line marker so we can report where the unterminated string began
+ */
if (!rsfp ||
!(oldoldbufptr = oldbufptr = s = linestart = filter_gets(linestr, rsfp, 0))) {
sv_free(sv);
curcop->cop_line = multi_start;
return Nullch;
}
+ /* we read a line, so increment our line counter */
curcop->cop_line++;
+
+ /* update debugger info */
if (PERLDB_LINE && curstash != debstash) {
SV *sv = NEWSV(88,0);
av_store(GvAV(curcop->cop_filegv),
(I32)curcop->cop_line, sv);
}
+
+ /* having changed the buffer, we must update bufend */
bufend = SvPVX(linestr) + SvCUR(linestr);
}
+
+ /* at this point, we have successfully read the delimited string */
+
multi_end = curcop->cop_line;
s++;
+
+ /* if we allocated too much space, give some back */
if (SvCUR(sv) + 5 < SvLEN(sv)) {
SvLEN_set(sv, SvCUR(sv) + 1);
Renew(SvPVX(sv), SvLEN(sv), char);
}
+
+ /* decide whether this is the first or second quoted string we've read
+ for this op
+ */
+
if (lex_stuff)
lex_repl = sv;
else
return s;
}
+/*
+ scan_num
+ takes: pointer to position in buffer
+ returns: pointer to new position in buffer
+ side-effects: builds ops for the constant in yylval.op
+
+ Read a number in any of the formats that Perl accepts:
+
+ 0(x[0-7A-F]+)|([0-7]+)
+ [\d_]+(\.[\d_]*)?[Ee](\d+)
+
+ Underbars (_) are allowed in decimal numbers. If -w is on,
+ underbars before a decimal point must be at three digit intervals.
+
+ Like most scan_ routines, it uses the tokenbuf buffer to hold the
+ thing it reads.
+
+ If it reads a number without a decimal point or an exponent, it will
+ try converting the number to an integer and see if it can do so
+ without loss of precision.
+*/
+
char *
-scan_num(start)
-char *start;
+scan_num(char *start)
{
- register char *s = start;
- register char *d;
- register char *e;
- I32 tryiv;
- double value;
- SV *sv;
- I32 floatit;
- char *lastub = 0;
+ register char *s = start; /* current position in buffer */
+ register char *d; /* destination in temp buffer */
+ register char *e; /* end of temp buffer */
+ I32 tryiv; /* used to see if it can be an int */
+ double value; /* number read, as a double */
+ SV *sv; /* place to put the converted number */
+ I32 floatit; /* boolean: int or float? */
+ char *lastub = 0; /* position of last underbar */
static char number_too_long[] = "Number too long";
+ /* We use the first character to decide what type of number this is */
+
switch (*s) {
default:
- croak("panic: scan_num");
+ croak("panic: scan_num");
+
+ /* if it starts with a 0, it could be an octal number, a decimal in
+ 0.13 disguise, or a hexadecimal number.
+ */
case '0':
{
+ /* variables:
+ u holds the "number so far"
+ shift the power of 2 of the base (hex == 4, octal == 3)
+ overflowed was the number more than we can hold?
+
+ Shift is used when we add a digit. It also serves as an "are
+ we in octal or hex?" indicator to disallow hex characters when
+ in octal mode.
+ */
UV u;
I32 shift;
bool overflowed = FALSE;
+ /* check for hex */
if (s[1] == 'x') {
shift = 4;
s += 2;
}
+ /* check for a decimal in disguise */
else if (s[1] == '.')
goto decimal;
+ /* so it must be octal */
else
shift = 3;
u = 0;
+
+ /* read the rest of the octal number */
for (;;) {
- UV n, b;
+ UV n, b; /* n is used in the overflow test, b is the digit we're adding on */
switch (*s) {
+
+ /* if we don't mention it, we're done */
default:
goto out;
+
+ /* _ are ignored */
case '_':
s++;
break;
+
+ /* 8 and 9 are not octal */
case '8': case '9':
if (shift != 4)
yyerror("Illegal octal digit");
/* FALL THROUGH */
+
+ /* octal digits */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
- b = *s++ & 15;
+ b = *s++ & 15; /* ASCII digit -> value of digit */
goto digit;
+
+ /* hex digits */
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ /* make sure they said 0x */
if (shift != 4)
goto out;
b = (*s++ & 7) + 9;
+
+ /* Prepare to put the digit we have onto the end
+ of the number so far. We check for overflows.
+ */
+
digit:
- n = u << shift;
+ n = u << shift; /* make room for the digit */
if (!overflowed && (n >> shift) != u) {
warn("Integer overflow in %s number",
(shift == 4) ? "hex" : "octal");
overflowed = TRUE;
}
- u = n | b;
+ u = n | b; /* add the digit to the end */
break;
}
}
+
+ /* if we get here, we had success: make a scalar value from
+ the number.
+ */
out:
sv = NEWSV(92,0);
sv_setuv(sv, u);
}
break;
+
+ /*
+ handle decimal numbers.
+ we're also sent here when we read a 0 as the first digit
+ */
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': case '.':
decimal:
d = tokenbuf;
e = tokenbuf + sizeof tokenbuf - 6; /* room for various punctuation */
floatit = FALSE;
+
+ /* read next group of digits and _ and copy into d */
while (isDIGIT(*s) || *s == '_') {
+ /* skip underscores, checking for misplaced ones
+ if -w is on
+ */
if (*s == '_') {
if (dowarn && lastub && s - lastub != 3)
warn("Misplaced _ in number");
lastub = ++s;
}
else {
+ /* check for end of fixed-length buffer */
if (d >= e)
croak(number_too_long);
+ /* if we're ok, copy the character */
*d++ = *s++;
}
}
+
+ /* final misplaced underbar check */
if (dowarn && lastub && s - lastub != 3)
warn("Misplaced _ in number");
+
+ /* read a decimal portion if there is one. avoid
+ 3..5 being interpreted as the number 3. followed
+ by .5
+ */
if (*s == '.' && s[1] != '.') {
floatit = TRUE;
*d++ = *s++;
+
+ /* copy, ignoring underbars, until we run out of
+ digits. Note: no misplaced underbar checks!
+ */
for (; isDIGIT(*s) || *s == '_'; s++) {
+ /* fixed length buffer check */
if (d >= e)
croak(number_too_long);
if (*s != '_')
*d++ = *s;
}
}
+
+ /* read exponent part, if present */
if (*s && strchr("eE",*s) && strchr("+-0123456789",s[1])) {
floatit = TRUE;
s++;
+
+ /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
*d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
+
+ /* allow positive or negative exponent */
if (*s == '+' || *s == '-')
*d++ = *s++;
+
+ /* read digits of exponent (no underbars :-) */
while (isDIGIT(*s)) {
if (d >= e)
croak(number_too_long);
*d++ = *s++;
}
}
+
+ /* terminate the string */
*d = '\0';
+
+ /* make an sv from the string */
sv = NEWSV(92,0);
+ /* reset numeric locale in case we were earlier left in Swaziland */
SET_NUMERIC_STANDARD();
value = atof(tokenbuf);
+
+ /*
+ See if we can make do with an integer value without loss of
+ precision. We use I_V to cast to an int, because some
+ compilers have issues. Then we try casting it back and see
+ if it was the same. We only do this if we know we
+ specifically read an integer.
+
+ Note: if floatit is true, then we don't need to do the
+ conversion at all.
+ */
tryiv = I_V(value);
if (!floatit && (double)tryiv == value)
sv_setiv(sv, tryiv);
break;
}
+ /* make the op for the constant and return */
+
yylval.opval = newSVOP(OP_CONST, 0, sv);
return s;
}
static char *
-scan_formline(s)
-register char *s;
+scan_formline(register char *s)
{
dTHR;
register char *eol;
}
static void
-set_csh()
+set_csh(void)
{
#ifdef CSH
if (!cshlen)
}
I32
-start_subparse(is_format, flags)
-I32 is_format;
-U32 flags;
+start_subparse(I32 is_format, U32 flags)
{
dTHR;
I32 oldsavestack_ix = savestack_ix;
}
int
-yywarn(s)
-char *s;
+yywarn(char *s)
{
dTHR;
--error_count;
}
int
-yyerror(s)
-char *s;
+yyerror(char *s)
{
dTHR;
char *where = NULL;
if (in_eval & 2)
warn("%_", msg);
else if (in_eval)
- sv_catsv(errsv, msg);
+ sv_catsv(ERRSV, msg);
else
PerlIO_write(PerlIO_stderr(), SvPVX(msg), SvCUR(msg));
if (++error_count >= 10)
in_my_stash = Nullhv;
return 0;
}
+
+