Add casting to allow g++ (3.3.5) to compile the core code.
[p5sagit/p5-mst-13.2.git] / ext / Devel / DProf / DProf.xs
index 220a1e2..10d4172 100644 (file)
@@ -1,63 +1,72 @@
-/* XXX DProf could use some cleanups for PERL_IMPLICIT_CONTEXT */
-
-#define PERL_POLLUTE
-
+#define PERL_NO_GET_CONTEXT
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
 
-/* For older Perls */
-#ifndef dTHR
-#  define dTHR int dummy_thr
-#endif /* dTHR */ 
-
+/* define DBG_SUB to cause a warning on each subroutine entry. */
 /*#define DBG_SUB 1      */
-/*#define DBG_TIMER 1    */
+
+/* define DBG_TIMER to cause a warning when the timer is turned on and off. */
+/*#define DBG_TIMER 1  */
+
+#ifdef DEBUGGING
+#define ASSERT(x) assert(x)
+#else
+#define ASSERT(x)
+#endif
+
+static CV *
+db_get_cv(pTHX_ SV *sv)
+{
+       CV *cv;
+
+       if (SvIOK(sv)) {                        /* if (PERLDB_SUB_NN) { */
+           cv = INT2PTR(CV*,SvIVX(sv));
+       } else {
+           if (SvPOK(sv)) {
+               cv = get_cv(SvPVX(sv), TRUE);
+           } else if (SvROK(sv)) {
+               cv = (CV*)SvRV(sv);
+           } else {
+               croak("DProf: don't know what subroutine to profile");
+           }
+       }
+       return cv;
+}
 
 #ifdef DBG_SUB
-#  define DBG_SUB_NOTIFY(A,B) warn( A, B )
+#  define DBG_SUB_NOTIFY(A) dprof_dbg_sub_notify(aTHX_ A)
+void
+dprof_dbg_sub_notify(pTHX_ SV *Sub) {
+    CV   *cv = db_get_cv(aTHX_ Sub);
+    GV   *gv = cv ? CvGV(cv) : NULL;
+    if (cv && gv) {
+       warn("XS DBsub(%s::%s)\n",
+            ((GvSTASH(gv) && HvNAME(GvSTASH(gv))) ?
+             HvNAME(GvSTASH(gv)) : "(null)"),
+            GvNAME(gv));
+    } else {
+       warn("XS DBsub(unknown) at %x", Sub);
+    }
+}
 #else
-#  define DBG_SUB_NOTIFY(A,B)  /* nothing */
+#  define DBG_SUB_NOTIFY(A)  /* nothing */
 #endif
 
+
 #ifdef DBG_TIMER
-#  define DBG_TIMER_NOTIFY(A) warn( A )
+#  define DBG_TIMER_NOTIFY(A) warn(A)
 #else
 #  define DBG_TIMER_NOTIFY(A)  /* nothing */
 #endif
 
-static U32 dprof_ticks;
-
 /* HZ == clock ticks per second */
 #ifdef VMS
 #  define HZ ((I32)CLK_TCK)
 #  define DPROF_HZ HZ
 #  include <starlet.h>  /* prototype for sys$gettim() */
-   clock_t dprof_times(struct tms *bufptr) {
-        clock_t retval;
-       dTHX;
-        /* Get wall time and convert to 10 ms intervals to
-         * produce the return value dprof expects */
-#  if defined(__DECC) && defined (__ALPHA)
-#    include <ints.h>
-        uint64 vmstime;
-        _ckvmssts(sys$gettim(&vmstime));
-        vmstime /= 100000;
-        retval = vmstime & 0x7fffffff;
-#  else
-        /* (Older hw or ccs don't have an atomic 64-bit type, so we
-         * juggle 32-bit ints (and a float) to produce a time_t result
-         * with minimal loss of information.) */
-        long int vmstime[2],remainder,divisor = 100000;
-        _ckvmssts(sys$gettim((unsigned long int *)vmstime));
-        vmstime[1] &= 0x7fff;  /* prevent overflow in EDIV */
-        _ckvmssts(lib$ediv(&divisor,vmstime,(long int *)&retval,&remainder));
-#  endif
-        /* Fill in the struct tms using the CRTL routine . . .*/
-        times((tbuffer_t *)bufptr);
-        return (clock_t) retval;
-   }
-#  define Times(ptr) (dprof_times(ptr))
+#  include <lib$routines.h>
+#  define Times(ptr) (dprof_times(aTHX_ ptr))
 #else
 #  ifndef HZ
 #    ifdef CLK_TCK
@@ -67,37 +76,12 @@ static U32 dprof_ticks;
 #    endif
 #  endif
 #  ifdef OS2                           /* times() has significant overhead */
-#    define Times(ptr) (dprof_times(ptr))
+#    define Times(ptr) (dprof_times(aTHX_ ptr))
 #    define INCL_DOSPROFILE
 #    define INCL_DOSERRORS
 #    include <os2.h>
 #    define toLongLong(arg) (*(long long*)&(arg))
-#    define DPROF_HZ dprof_ticks
-
-static ULONG frequ;
-static long long start_cnt;
-clock_t
-dprof_times(struct tms *t)
-{
-    ULONG rc;
-    QWORD cnt;
-    
-    if (!frequ) {
-       if (CheckOSError(DosTmrQueryFreq(&frequ)))
-           croak("DosTmrQueryFreq: %s", SvPV(perl_get_sv("!",TRUE),na));
-       else
-           frequ = frequ/DPROF_HZ;     /* count per tick */
-       if (CheckOSError(DosTmrQueryTime(&cnt)))
-           croak("DosTmrQueryTime: %s",
-                 SvPV(perl_get_sv("!",TRUE),na));
-       start_cnt = toLongLong(cnt);
-    }
-
-    if (CheckOSError(DosTmrQueryTime(&cnt)))
-           croak("DosTmrQueryTime: %s", SvPV(perl_get_sv("!",TRUE),na));
-    t->tms_stime = 0;
-    return (t->tms_utime = (toLongLong(cnt) - start_cnt)/frequ);
-}
+#    define DPROF_HZ g_dprof_ticks
 #  else
 #    define Times(ptr) (times(ptr))
 #    define DPROF_HZ HZ
@@ -106,29 +90,10 @@ dprof_times(struct tms *t)
 
 XS(XS_Devel__DProf_END);        /* used by prof_mark() */
 
-static SV * Sub;        /* pointer to $DB::sub */
-static PerlIO *fp;      /* pointer to tmon.out file */
-
-/* Added -JH */
-static long TIMES_LOCATION=42;/* Where in the file to store the time totals */
-static int SAVE_STACK = 1<<14;         /* How much data to buffer until */
-                                       /* end of run */
-
-static int prof_pid;    /* pid of profiled process */
-
 /* Everything is built on times(2).  See its manpage for a description
  * of the timings.
  */
 
-static
-struct tms      prof_start,
-                prof_end;
-
-static
-clock_t         rprof_start, /* elapsed real time, in ticks */
-                rprof_end,
-               wprof_u, wprof_s, wprof_r;
-
 union prof_any {
         clock_t tms_utime;  /* cpu time spent in user space */
         clock_t tms_stime;  /* cpu time spent in system */
@@ -140,57 +105,164 @@ union prof_any {
 
 typedef union prof_any PROFANY;
 
-static PROFANY  *profstack;
-static int      profstack_max = 128;
-static int      profstack_ix = 0;
+typedef struct {
+    U32                dprof_ticks;
+    char*      out_file_name;  /* output file (defaults to tmon.out) */
+    PerlIO*    fp;             /* pointer to tmon.out file */
+    Off_t      TIMES_LOCATION; /* Where in the file to store the time totals */
+    int                SAVE_STACK;     /* How much data to buffer until end of run */
+    int                prof_pid;       /* pid of profiled process */
+    struct tms prof_start;
+    struct tms prof_end;
+    clock_t    rprof_start;    /* elapsed real time ticks */
+    clock_t    rprof_end;
+    clock_t    wprof_u;
+    clock_t    wprof_s;
+    clock_t    wprof_r;
+    clock_t    otms_utime;
+    clock_t    otms_stime;
+    clock_t    orealtime;
+    PROFANY*   profstack;
+    int                profstack_max;
+    int                profstack_ix;
+    HV*                cv_hash;        /* cache of CV to identifier mappings */
+    SV*                key_hash;       /* key for cv_hash */
+    U32                total;
+    U32                lastid;
+    U32                default_perldb;
+    UV         depth;
+#ifdef OS2
+    ULONG      frequ;
+    long long  start_cnt;
+#endif
+#ifdef PERL_IMPLICIT_CONTEXT
+#  define register
+    pTHX;
+#  undef register
+#endif
+} prof_state_t;
+
+prof_state_t g_prof_state;
+
+#define g_dprof_ticks          g_prof_state.dprof_ticks
+#define g_out_file_name                g_prof_state.out_file_name
+#define g_fp                   g_prof_state.fp
+#define g_TIMES_LOCATION       g_prof_state.TIMES_LOCATION
+#define g_SAVE_STACK           g_prof_state.SAVE_STACK
+#define g_prof_pid             g_prof_state.prof_pid
+#define g_prof_start           g_prof_state.prof_start
+#define g_prof_end             g_prof_state.prof_end
+#define g_rprof_start          g_prof_state.rprof_start
+#define g_rprof_end            g_prof_state.rprof_end
+#define g_wprof_u              g_prof_state.wprof_u
+#define g_wprof_s              g_prof_state.wprof_s
+#define g_wprof_r              g_prof_state.wprof_r
+#define g_otms_utime           g_prof_state.otms_utime
+#define g_otms_stime           g_prof_state.otms_stime
+#define g_orealtime            g_prof_state.orealtime
+#define g_profstack            g_prof_state.profstack
+#define g_profstack_max                g_prof_state.profstack_max
+#define g_profstack_ix         g_prof_state.profstack_ix
+#define g_cv_hash              g_prof_state.cv_hash
+#define g_key_hash             g_prof_state.key_hash
+#define g_total                        g_prof_state.total
+#define g_lastid               g_prof_state.lastid
+#define g_default_perldb       g_prof_state.default_perldb
+#define g_depth                        g_prof_state.depth
+#ifdef PERL_IMPLICIT_CONTEXT
+#  define g_THX                        g_prof_state.aTHX
+#endif
+#ifdef OS2
+#  define g_frequ              g_prof_state.frequ
+#  define g_start_cnt          g_prof_state.start_cnt
+#endif
 
-static void
-prof_dump(opcode ptype, char *name)
+clock_t
+dprof_times(pTHX_ struct tms *t)
 {
-    if(ptype == OP_LEAVESUB){
-       PerlIO_printf(fp,"- & %s\n", name );
-    } else if(ptype == OP_ENTERSUB) {
-       PerlIO_printf(fp,"+ & %s\n", name );
-    } else if(ptype == OP_DIE) {
-       PerlIO_printf(fp,"/ & %s\n", name );
-    } else {
-       PerlIO_printf(fp,"Profiler unknown prof code %d\n", ptype);
+#ifdef OS2
+    ULONG rc;
+    QWORD cnt;
+    STRLEN n_a;
+    
+    if (!g_frequ) {
+       if (CheckOSError(DosTmrQueryFreq(&g_frequ)))
+           croak("DosTmrQueryFreq: %s", SvPV(perl_get_sv("!",TRUE),n_a));
+       else
+           g_frequ = g_frequ/DPROF_HZ; /* count per tick */
+       if (CheckOSError(DosTmrQueryTime(&cnt)))
+           croak("DosTmrQueryTime: %s",
+                 SvPV(perl_get_sv("!",TRUE), n_a));
+       g_start_cnt = toLongLong(cnt);
     }
-    safefree(name);
-}   
+
+    if (CheckOSError(DosTmrQueryTime(&cnt)))
+           croak("DosTmrQueryTime: %s", SvPV(perl_get_sv("!",TRUE), n_a));
+    t->tms_stime = 0;
+    return (t->tms_utime = (toLongLong(cnt) - g_start_cnt)/g_frequ);
+#else          /* !OS2 */
+#  ifdef VMS
+    clock_t retval;
+    /* Get wall time and convert to 10 ms intervals to
+     * produce the return value dprof expects */
+#    if defined(__DECC) && defined (__ALPHA)
+#      include <ints.h>
+    uint64 vmstime;
+    _ckvmssts(sys$gettim(&vmstime));
+    vmstime /= 100000;
+    retval = vmstime & 0x7fffffff;
+#    else
+    /* (Older hw or ccs don't have an atomic 64-bit type, so we
+     * juggle 32-bit ints (and a float) to produce a time_t result
+     * with minimal loss of information.) */
+    long int vmstime[2],remainder,divisor = 100000;
+    _ckvmssts(sys$gettim((unsigned long int *)vmstime));
+    vmstime[1] &= 0x7fff;  /* prevent overflow in EDIV */
+    _ckvmssts(lib$ediv(&divisor,vmstime,(long int *)&retval,&remainder));
+#    endif
+    /* Fill in the struct tms using the CRTL routine . . .*/
+    times((tbuffer_t *)t);
+    return (clock_t) retval;
+#  else                /* !VMS && !OS2 */
+    return times(t);
+#  endif
+#endif
+}
 
 static void
-prof_dumpa(opcode ptype, U32 id)
+prof_dumpa(pTHX_ opcode ptype, U32 id)
 {
-    if(ptype == OP_LEAVESUB){
-       PerlIO_printf(fp,"- %lx\n", id );
-    } else if(ptype == OP_ENTERSUB) {
-       PerlIO_printf(fp,"+ %lx\n", id );
-    } else if(ptype == OP_GOTO) {
-       PerlIO_printf(fp,"* %lx\n", id );
-    } else if(ptype == OP_DIE) {
-       PerlIO_printf(fp,"/ %lx\n", id );
-    } else {
-       PerlIO_printf(fp,"Profiler unknown prof code %d\n", ptype);
+    if (ptype == OP_LEAVESUB) {
+       PerlIO_printf(g_fp,"- %"UVxf"\n", (UV)id);
+    }
+    else if(ptype == OP_ENTERSUB) {
+       PerlIO_printf(g_fp,"+ %"UVxf"\n", (UV)id);
+    }
+    else if(ptype == OP_GOTO) {
+       PerlIO_printf(g_fp,"* %"UVxf"\n", (UV)id);
+    }
+    else if(ptype == OP_DIE) {
+       PerlIO_printf(g_fp,"/ %"UVxf"\n", (UV)id);
+    }
+    else {
+       PerlIO_printf(g_fp,"Profiler unknown prof code %d\n", ptype);
     }
 }   
 
 static void
-prof_dumps(U32 id, char *pname, char *gname)
+prof_dumps(pTHX_ U32 id, char *pname, char *gname)
 {
-    PerlIO_printf(fp,"& %lx %s %s\n", id, pname, gname);
+    PerlIO_printf(g_fp,"& %"UVxf" %s %s\n", (UV)id, pname, gname);
 }   
 
-static clock_t otms_utime, otms_stime, orealtime;
-
 static void
-prof_dumpt(long tms_utime, long tms_stime, long realtime)
+prof_dumpt(pTHX_ long tms_utime, long tms_stime, long realtime)
 {
-    PerlIO_printf(fp,"@ %ld %ld %ld\n", tms_utime, tms_stime, realtime);
+    PerlIO_printf(g_fp,"@ %ld %ld %ld\n", tms_utime, tms_stime, realtime);
 }   
 
 static void
-prof_dump_until(long ix)
+prof_dump_until(pTHX_ long ix)
 {
     long base = 0;
     struct tms t1, t2;
@@ -198,266 +270,212 @@ prof_dump_until(long ix)
 
     realtime1 = Times(&t1);
 
-    while( base < ix ){
-       opcode ptype = profstack[base++].ptype;
+    while (base < ix) {
+       opcode ptype = g_profstack[base++].ptype;
        if (ptype == OP_TIME) {
-           long tms_utime = profstack[base++].tms_utime;
-           long tms_stime = profstack[base++].tms_stime;
-           long realtime = profstack[base++].realtime;
+           long tms_utime = g_profstack[base++].tms_utime;
+           long tms_stime = g_profstack[base++].tms_stime;
+           long realtime = g_profstack[base++].realtime;
 
-           prof_dumpt(tms_utime, tms_stime, realtime);
-       } else if (ptype == OP_GV) {
-           U32 id = profstack[base++].id;
-           char *pname = profstack[base++].name;
-           char *gname = profstack[base++].name;
+           prof_dumpt(aTHX_ tms_utime, tms_stime, realtime);
+       }
+       else if (ptype == OP_GV) {
+           U32 id = g_profstack[base++].id;
+           char *pname = g_profstack[base++].name;
+           char *gname = g_profstack[base++].name;
 
-           prof_dumps(id, pname, gname);
-       } else {
-#ifdef PERLDBf_NONAME
-           U32 id = profstack[base++].id;
-           prof_dumpa(ptype, id);
-#else
-           char *name = profstack[base++].name;
-           prof_dump(ptype, name);
-#endif 
+           prof_dumps(aTHX_ id, pname, gname);
+       }
+       else {
+           U32 id = g_profstack[base++].id;
+           prof_dumpa(aTHX_ ptype, id);
        }
     }
-    PerlIO_flush(fp);
+    PerlIO_flush(g_fp);
     realtime2 = Times(&t2);
     if (realtime2 != realtime1 || t1.tms_utime != t2.tms_utime
        || t1.tms_stime != t2.tms_stime) {
-       wprof_r += realtime2 - realtime1;
-       wprof_u += t2.tms_utime - t1.tms_utime;
-       wprof_s += t2.tms_stime - t1.tms_stime;
-
-       PerlIO_printf(fp,"+ & Devel::DProf::write\n" );
-       PerlIO_printf(fp,"@ %ld %ld %ld\n", 
-               t2.tms_utime - t1.tms_utime, t2.tms_stime - t1.tms_stime, 
-               realtime2 - realtime1);
-       PerlIO_printf(fp,"- & Devel::DProf::write\n" );
-       otms_utime = t2.tms_utime;
-       otms_stime = t2.tms_stime;
-       orealtime = realtime2;
-       PerlIO_flush(fp);
+       g_wprof_r += realtime2 - realtime1;
+       g_wprof_u += t2.tms_utime - t1.tms_utime;
+       g_wprof_s += t2.tms_stime - t1.tms_stime;
+
+       PerlIO_printf(g_fp,"+ & Devel::DProf::write\n");
+       PerlIO_printf(g_fp,"@ %"IVdf" %"IVdf" %"IVdf"\n", 
+                     /* The (IV) casts are one possibility:
+                      * the Painfully Correct Way would be to
+                      * have Clock_t_f. */
+                     (IV)(t2.tms_utime - t1.tms_utime),
+                     (IV)(t2.tms_stime - t1.tms_stime), 
+                     (IV)(realtime2 - realtime1));
+       PerlIO_printf(g_fp,"- & Devel::DProf::write\n");
+       g_otms_utime = t2.tms_utime;
+       g_otms_stime = t2.tms_stime;
+       g_orealtime = realtime2;
+       PerlIO_flush(g_fp);
     }
 }
 
-static HV* cv_hash;
-static U32 total = 0;
+static void
+set_cv_key(pTHX_ CV *cv, char *pname, char *gname)
+{
+       SvGROW(g_key_hash, sizeof(CV**) + strlen(pname) + strlen(gname) + 3);
+       sv_setpvn(g_key_hash, (char*)&cv, sizeof(CV**));
+       sv_catpv(g_key_hash, pname);
+       sv_catpv(g_key_hash, "::");
+       sv_catpv(g_key_hash, gname);
+}
 
 static void
-prof_mark( opcode ptype )
+prof_mark(pTHX_ opcode ptype)
 {
-        struct tms t;
-        clock_t realtime, rdelta, udelta, sdelta;
-        char *name, *pv;
-        char *hvname;
-        STRLEN len;
-        SV *sv;
-       U32 id;
-
-        if( SAVE_STACK ){
-                if( profstack_ix + 5 > profstack_max ){
-                        profstack_max = profstack_max * 3 / 2;
-                        Renew( profstack, profstack_max, PROFANY );
-                }
-        }
+    struct tms t;
+    clock_t realtime, rdelta, udelta, sdelta;
+    U32 id;
+    SV *Sub = GvSV(PL_DBsub);  /* name of current sub */
+
+    if (g_SAVE_STACK) {
+       if (g_profstack_ix + 10 > g_profstack_max) {
+               g_profstack_max = g_profstack_max * 3 / 2;
+               Renew(g_profstack, g_profstack_max, PROFANY);
+       }
+    }
 
-        realtime = Times(&t);
-       rdelta = realtime - orealtime;
-       udelta = t.tms_utime - otms_utime;
-       sdelta = t.tms_stime - otms_stime;
-       if (rdelta || udelta || sdelta) {
-           if (SAVE_STACK) {
-               profstack[profstack_ix++].ptype = OP_TIME;
-               profstack[profstack_ix++].tms_utime = udelta;
-               profstack[profstack_ix++].tms_stime = sdelta;
-               profstack[profstack_ix++].realtime = rdelta;
-           } else { /* Write it to disk now so's not to eat up core */
-               if (prof_pid == (int)getpid()) {
-                   prof_dumpt(udelta, sdelta, rdelta);
-                   PerlIO_flush(fp);
-               }
+    realtime = Times(&t);
+    rdelta = realtime - g_orealtime;
+    udelta = t.tms_utime - g_otms_utime;
+    sdelta = t.tms_stime - g_otms_stime;
+    if (rdelta || udelta || sdelta) {
+       if (g_SAVE_STACK) {
+           ASSERT(g_profstack_ix + 4 <= g_profstack_max);
+           g_profstack[g_profstack_ix++].ptype = OP_TIME;
+           g_profstack[g_profstack_ix++].tms_utime = udelta;
+           g_profstack[g_profstack_ix++].tms_stime = sdelta;
+           g_profstack[g_profstack_ix++].realtime = rdelta;
+       }
+       else { /* Write it to disk now so's not to eat up core */
+           if (g_prof_pid == (int)getpid()) {
+               prof_dumpt(aTHX_ udelta, sdelta, rdelta);
+               PerlIO_flush(g_fp);
            }
-           orealtime = realtime;
-           otms_stime = t.tms_stime;
-           otms_utime = t.tms_utime;
        }
+       g_orealtime = realtime;
+       g_otms_stime = t.tms_stime;
+       g_otms_utime = t.tms_utime;
+    }
 
-#ifdef PERLDBf_NONAME
-       {
-           dTHX;
-           SV **svp;
-           char *gname, *pname;
-           static U32 lastid;
-           CV *cv;
-
-           cv = INT2PTR(CV*,SvIVX(Sub));
-           svp = hv_fetch(cv_hash, (char*)&cv, sizeof(CV*), TRUE);
-           if (!SvOK(*svp)) {
-               GV *gv = CvGV(cv);
-                   
-               sv_setiv(*svp, id = ++lastid);
-               pname = ((GvSTASH(gv) && HvNAME(GvSTASH(gv))) 
-                        ? HvNAME(GvSTASH(gv)) 
-                        : "(null)");
-               gname = GvNAME(gv);
-               if (CvXSUB(cv) == XS_Devel__DProf_END)
-                   return;
-               if (SAVE_STACK) { /* Store it for later recording  -JH */
-                   profstack[profstack_ix++].ptype = OP_GV;
-                   profstack[profstack_ix++].id = id;
-                   profstack[profstack_ix++].name = pname;
-                   profstack[profstack_ix++].name = gname;
-               } else { /* Write it to disk now so's not to eat up core */
-
-                   /* Only record the parent's info */
-                   if (prof_pid == (int)getpid()) {
-                       prof_dumps(id, pname, gname);
-                       PerlIO_flush(fp);
-                   } else
-                       perldb = 0;             /* Do not debug the kid. */
+    {
+       SV **svp;
+       char *gname, *pname;
+       CV *cv;
+       GV *gv;
+
+       cv = db_get_cv(aTHX_ Sub);
+       gv = CvGV(cv);
+       pname = ((GvSTASH(gv) && HvNAME(GvSTASH(gv))) 
+                ? HvNAME(GvSTASH(gv)) 
+                : (char *) "(null)");
+       gname = GvNAME(gv);
+
+       set_cv_key(aTHX_ cv, pname, gname);
+       svp = hv_fetch(g_cv_hash, SvPVX(g_key_hash), SvCUR(g_key_hash), TRUE);
+       if (!SvOK(*svp)) {
+           sv_setiv(*svp, id = ++g_lastid);
+           if (CvXSUB(cv) == XS_Devel__DProf_END)
+               return;
+           if (g_SAVE_STACK) { /* Store it for later recording  -JH */
+               ASSERT(g_profstack_ix + 4 <= g_profstack_max);
+               g_profstack[g_profstack_ix++].ptype = OP_GV;
+               g_profstack[g_profstack_ix++].id = id;
+               g_profstack[g_profstack_ix++].name = pname;
+               g_profstack[g_profstack_ix++].name = gname;
+           }
+           else { /* Write it to disk now so's not to eat up core */
+               /* Only record the parent's info */
+               if (g_prof_pid == (int)getpid()) {
+                   prof_dumps(aTHX_ id, pname, gname);
+                   PerlIO_flush(g_fp);
                }
-           } else {
-               id = SvIV(*svp);
+               else
+                   PL_perldb = 0;              /* Do not debug the kid. */
            }
        }
-#else
-       pv = SvPV( Sub, len );
-
-        if( SvROK(Sub) ){
-                /* Attempt to make CODE refs slightly identifiable by
-                 * including their package name.
-                 */
-                sv = (SV*)SvRV(Sub);
-                if( sv && SvTYPE(sv) == SVt_PVCV ){
-                        if( CvSTASH(sv) ){
-                                hvname = HvNAME(CvSTASH(sv));
-                        }
-                        else if( CvXSUB(sv) == &XS_Devel__DProf_END ){
-                                /*warn( "prof_mark() found dprof::end");*/
-                                return; /* don't profile Devel::DProf::END */
-                        }
-                        else{
-                    croak( "DProf prof_mark() lost on CODE ref %s\n", pv );
-                        }
-                        len += strlen( hvname ) + 2;  /* +2 for ::'s */
-
-                }
-                else{
-        croak( "DProf prof_mark() lost on supposed CODE ref %s.\n", pv );
-                }
-                name = (char *)safemalloc( len * sizeof(char) + 1 );
-                strcpy( name, hvname );
-                strcat( name, "::" );
-                strcat( name, pv );
-        }
-        else{
-                if( *(pv+len-1) == 'D' ){
-                        /* It could be an &AUTOLOAD. */
-
-                        /* I measured a bunch of *.pl and *.pm (from Perl
-                         * distribution and other misc things) and found
-                         * 780 fully-qualified names.  They averaged
-                         * about 19 chars each.  Only 1 of those names
-                         * ended with 'D' and wasn't an &AUTOLOAD--it
-                         * was &overload::OVERLOAD.
-                         *    --dmr 2/19/96
-                         */
-
-                        if( strcmp( pv+len-9, ":AUTOLOAD" ) == 0 ){
-                                /* The sub name is in $AUTOLOAD */
-                                sv = perl_get_sv( pv, 0 );
-                                if( sv == NULL ){
-                croak("DProf prof_mark() lost on AUTOLOAD (%s).\n", pv );
-                                }
-                                pv = SvPV( sv, na );
-                                DBG_SUB_NOTIFY( "  AUTOLOAD(%s)\n", pv );
-                        }
-                }
-                name = savepv( pv );
-        }
-#endif /* PERLDBf_NONAME */
+       else {
+           id = SvIV(*svp);
+       }
+    }
 
-       total++;
-        if (SAVE_STACK) { /* Store it for later recording  -JH */
-           profstack[profstack_ix++].ptype = ptype;
-#ifdef PERLDBf_NONAME
-           profstack[profstack_ix++].id = id;
-#else
-           profstack[profstack_ix++].name = name;
-#endif 
-            /* Only record the parent's info */
-           if (SAVE_STACK < profstack_ix) {
-               if (prof_pid == (int)getpid())
-                   prof_dump_until(profstack_ix);
-               else
-                   perldb = 0;         /* Do not debug the kid. */
-               profstack_ix = 0;
-           }
-        } else { /* Write it to disk now so's not to eat up core */
+    g_total++;
+    if (g_SAVE_STACK) { /* Store it for later recording  -JH */
+       ASSERT(g_profstack_ix + 2 <= g_profstack_max);
+       g_profstack[g_profstack_ix++].ptype = ptype;
+       g_profstack[g_profstack_ix++].id = id;
+
+       /* Only record the parent's info */
+       if (g_SAVE_STACK < g_profstack_ix) {
+           if (g_prof_pid == (int)getpid())
+               prof_dump_until(aTHX_ g_profstack_ix);
+           else
+               PL_perldb = 0;          /* Do not debug the kid. */
+           g_profstack_ix = 0;
+       }
+    }
+    else { /* Write it to disk now so's not to eat up core */
 
-            /* Only record the parent's info */
-            if (prof_pid == (int)getpid()) {
-#ifdef PERLDBf_NONAME
-               prof_dumpa(ptype, id);
-#else
-               prof_dump(ptype, name);
-#endif 
-                PerlIO_flush(fp);
-            } else
-               perldb = 0;             /* Do not debug the kid. */
-        }
+       /* Only record the parent's info */
+       if (g_prof_pid == (int)getpid()) {
+           prof_dumpa(aTHX_ ptype, id);
+           PerlIO_flush(g_fp);
+       }
+       else
+           PL_perldb = 0;              /* Do not debug the kid. */
+    }
 }
 
-static U32 default_perldb;
-
 #ifdef PL_NEEDED
 #  define defstash PL_defstash
 #endif
 
 /* Counts overhead of prof_mark and extra XS call. */
 static void
-test_time(clock_t *r, clock_t *u, clock_t *s)
+test_time(pTHX_ clock_t *r, clock_t *u, clock_t *s)
 {
-    dTHR;
-    dTHX;
     CV *cv = perl_get_cv("Devel::DProf::NONESUCH_noxs", FALSE);
     int i, j, k = 0;
-    HV *oldstash = curstash;
+    HV *oldstash = PL_curstash;
     struct tms t1, t2;
-    clock_t realtime1, realtime2;
-    U32 ototal = total;
-    U32 ostack = SAVE_STACK;
-    U32 operldb = perldb;
+    clock_t realtime1 = 0, realtime2 = 0;
+    U32 ototal = g_total;
+    U32 ostack = g_SAVE_STACK;
+    U32 operldb = PL_perldb;
 
-    SAVE_STACK = 1000000;
+    g_SAVE_STACK = 1000000;
     realtime1 = Times(&t1);
     
     while (k < 2) {
        i = 0;
            /* Disable debugging of perl_call_sv on second pass: */
-       curstash = (k == 0 ? defstash : debstash);
-       perldb = default_perldb;
+       PL_curstash = (k == 0 ? PL_defstash : PL_debstash);
+       PL_perldb = g_default_perldb;
        while (++i <= 100) {
            j = 0;
-           profstack_ix = 0;           /* Do not let the stack grow */
+           g_profstack_ix = 0;         /* Do not let the stack grow */
            while (++j <= 100) {
-/*             prof_mark( OP_ENTERSUB ); */
+/*             prof_mark(aTHX_ OP_ENTERSUB); */
 
-               PUSHMARK( stack_sp );
-               perl_call_sv( (SV*)cv, G_SCALAR );
-               stack_sp--;
-/*             prof_mark( OP_LEAVESUB ); */
+               PUSHMARK(PL_stack_sp);
+               perl_call_sv((SV*)cv, G_SCALAR);
+               PL_stack_sp--;
+/*             prof_mark(aTHX_ OP_LEAVESUB); */
            }
        }
-       curstash = oldstash;
+       PL_curstash = oldstash;
        if (k == 0) {                   /* Put time with debugging */
            realtime2 = Times(&t2);
            *r = realtime2 - realtime1;
            *u = t2.tms_utime - t1.tms_utime;
            *s = t2.tms_stime - t1.tms_stime;
-       } else {                        /* Subtract time without debug */
+       }
+       else {                          /* Subtract time without debug */
            realtime1 = Times(&t1);
            *r -= realtime1 - realtime2;
            *u -= t1.tms_utime - t2.tms_utime;
@@ -465,81 +483,86 @@ test_time(clock_t *r, clock_t *u, clock_t *s)
        }
        k++;
     }
-    total = ototal;
-    SAVE_STACK = ostack;
-    perldb = operldb;
+    g_total = ototal;
+    g_SAVE_STACK = ostack;
+    PL_perldb = operldb;
 }
 
 static void
-prof_recordheader()
+prof_recordheader(pTHX)
 {
-       clock_t r, u, s;
-
-        /* fp is opened in the BOOT section */
-        PerlIO_printf(fp, "#fOrTyTwO\n" );
-        PerlIO_printf(fp, "$hz=%d;\n", DPROF_HZ );
-        PerlIO_printf(fp, "$XS_VERSION='DProf %s';\n", XS_VERSION );
-        PerlIO_printf(fp, "# All values are given in HZ\n" );
-       test_time(&r, &u, &s);
-        PerlIO_printf(fp, "$over_utime=%ld; $over_stime=%ld; $over_rtime=%ld;\n",
-                u, s, r);
-        PerlIO_printf(fp, "$over_tests=10000;\n");
-
-        TIMES_LOCATION = PerlIO_tell(fp);
-
-        /* Pad with whitespace. */
-        /* This should be enough even for very large numbers. */
-        PerlIO_printf(fp, "%*s\n", 240 , "");
-
-        PerlIO_printf(fp, "\n");
-        PerlIO_printf(fp, "PART2\n" );
-
-        PerlIO_flush(fp);
+    clock_t r, u, s;
+
+    /* g_fp is opened in the BOOT section */
+    PerlIO_printf(g_fp, "#fOrTyTwO\n");
+    PerlIO_printf(g_fp, "$hz=%"IVdf";\n", (IV)DPROF_HZ);
+    PerlIO_printf(g_fp, "$XS_VERSION='DProf %s';\n", XS_VERSION);
+    PerlIO_printf(g_fp, "# All values are given in HZ\n");
+    test_time(aTHX_ &r, &u, &s);
+    PerlIO_printf(g_fp,
+                 "$over_utime=%"IVdf"; $over_stime=%"IVdf"; $over_rtime=%"IVdf";\n",
+                 /* The (IV) casts are one possibility:
+                  * the Painfully Correct Way would be to
+                  * have Clock_t_f. */
+                 (IV)u, (IV)s, (IV)r);
+    PerlIO_printf(g_fp, "$over_tests=10000;\n");
+
+    g_TIMES_LOCATION = PerlIO_tell(g_fp);
+
+    /* Pad with whitespace. */
+    /* This should be enough even for very large numbers. */
+    PerlIO_printf(g_fp, "%*s\n", 240 , "");
+
+    PerlIO_printf(g_fp, "\n");
+    PerlIO_printf(g_fp, "PART2\n");
+
+    PerlIO_flush(g_fp);
 }
 
 static void
-prof_record()
+prof_record(pTHX)
 {
-        /* fp is opened in the BOOT section */
+    /* g_fp is opened in the BOOT section */
 
-        /* Now that we know the runtimes, fill them in at the recorded
-           location -JH */
+    /* Now that we know the runtimes, fill them in at the recorded
+       location -JH */
 
-       clock_t r, u, s;
+    if (g_SAVE_STACK) {
+       prof_dump_until(aTHX_ g_profstack_ix);
+    }
+    PerlIO_seek(g_fp, g_TIMES_LOCATION, SEEK_SET);
+    /* Write into reserved 240 bytes: */
+    PerlIO_printf(g_fp,
+                 "$rrun_utime=%"IVdf"; $rrun_stime=%"IVdf"; $rrun_rtime=%"IVdf";",
+                 /* The (IV) casts are one possibility:
+                  * the Painfully Correct Way would be to
+                  * have Clock_t_f. */
+                 (IV)(g_prof_end.tms_utime-g_prof_start.tms_utime-g_wprof_u),
+                 (IV)(g_prof_end.tms_stime-g_prof_start.tms_stime-g_wprof_s),
+                 (IV)(g_rprof_end-g_rprof_start-g_wprof_r));
+    PerlIO_printf(g_fp, "\n$total_marks=%"IVdf, (IV)g_total);
     
-        if(SAVE_STACK){
-           prof_dump_until(profstack_ix);
-        }
-        PerlIO_seek(fp, TIMES_LOCATION, SEEK_SET);
-       /* Write into reserved 240 bytes: */
-        PerlIO_printf(fp, "$rrun_utime=%ld; $rrun_stime=%ld; $rrun_rtime=%ld;",
-                prof_end.tms_utime - prof_start.tms_utime - wprof_u,
-                prof_end.tms_stime - prof_start.tms_stime - wprof_s,
-                rprof_end - rprof_start - wprof_r );
-        PerlIO_printf(fp, "\n$total_marks=%ld;", total);
-       
-        PerlIO_close( fp );
+    PerlIO_close(g_fp);
 }
 
 #define NONESUCH()
 
-static U32 depth = 0;
-
 static void
 check_depth(pTHX_ void *foo)
 {
-    U32 need_depth = (U32)foo;
-    if (need_depth != depth) {
-       if (need_depth > depth) {
+    U32 need_depth = PTR2UV(foo);
+    if (need_depth != g_depth) {
+       if (need_depth > g_depth) {
            warn("garbled call depth when profiling");
-       } else {
-           I32 marks = depth - need_depth;
+       }
+       else {
+           IV marks = g_depth - need_depth;
 
-/*         warn("Check_depth: got %d, expected %d\n", depth, need_depth); */
+/*         warn("Check_depth: got %d, expected %d\n", g_depth, need_depth); */
            while (marks--) {
-               prof_mark( OP_DIE );
+               prof_mark(aTHX_ OP_DIE);
            }
-           depth = need_depth;
+           g_depth = need_depth;
        }
     }
 }
@@ -549,48 +572,55 @@ check_depth(pTHX_ void *foo)
 
 XS(XS_DB_sub)
 {
-        dXSARGS;
-        dORIGMARK;
-        HV *oldstash = curstash;
-
-        SP -= items;
-
-        DBG_SUB_NOTIFY( "XS DBsub(%s)\n", SvPV(Sub, na) );
-
-#ifndef PERLDBf_NONAME                 /* Was needed on older Perls */
-        sv_setiv( DBsingle, 0 ); /* disable DB single-stepping */
-#endif 
-
-       SAVEDESTRUCTOR_X(check_depth, (void*)depth);
-       depth++;
-
-        prof_mark( OP_ENTERSUB );
-        PUSHMARK( ORIGMARK );
-
-#ifdef G_NODEBUG
-        perl_call_sv( INT2PTR(SV*,SvIV(Sub)), GIMME | G_NODEBUG);
-#else
-        curstash = debstash;    /* To disable debugging of perl_call_sv */
-#ifdef PERLDBf_NONAME
-        perl_call_sv( (SV*)SvIV(Sub), GIMME );
-#else
-        perl_call_sv( Sub, GIMME );
-#endif 
-        curstash = oldstash;
-#endif 
-
-        prof_mark( OP_LEAVESUB );
-       depth--;
-
-        SPAGAIN;
-        PUTBACK;
-        return;
+    dMARK;
+    dORIGMARK;
+    SV *Sub = GvSV(PL_DBsub);          /* name of current sub */
+
+#ifdef PERL_IMPLICIT_CONTEXT
+    /* profile only the interpreter that loaded us */
+    if (g_THX != aTHX) {
+        PUSHMARK(ORIGMARK);
+        perl_call_sv((SV*)db_get_cv(aTHX_ Sub), GIMME_V | G_NODEBUG);
+    }
+    else
+#endif
+    {
+       HV *oldstash = PL_curstash;
+       I32 old_scopestack_ix = PL_scopestack_ix;
+       I32 old_cxstack_ix = cxstack_ix;
+
+        DBG_SUB_NOTIFY(Sub);
+
+       SAVEDESTRUCTOR_X(check_depth, INT2PTR(void*,g_depth));
+       g_depth++;
+
+        prof_mark(aTHX_ OP_ENTERSUB);
+        PUSHMARK(ORIGMARK);
+        perl_call_sv((SV*)db_get_cv(aTHX_ Sub), GIMME_V | G_NODEBUG);
+        PL_curstash = oldstash;
+
+       /* Make sure we are on the same context and scope as before the call
+        * to the sub. If the called sub was exited via a goto, next or
+        * last then this will try to croak(), however perl may still crash
+        * with a segfault. */
+       if (PL_scopestack_ix != old_scopestack_ix || cxstack_ix != old_cxstack_ix)
+           croak("panic: Devel::DProf inconsistent subroutine return");
+
+        prof_mark(aTHX_ OP_LEAVESUB);
+       g_depth--;
+    }
+    return;
 }
 
 XS(XS_DB_goto)
 {
-        prof_mark( OP_GOTO );
+#ifdef PERL_IMPLICIT_CONTEXT
+    if (g_THX == aTHX)
+#endif
+    {
+        prof_mark(aTHX_ OP_GOTO);
         return;
+    }
 }
 
 #endif /* for_real */
@@ -601,26 +631,27 @@ XS(XS_DB_goto)
 
         void
         sub(...)
-                PPCODE:
-
+       PPCODE:
+           {
                 dORIGMARK;
-                HV *oldstash = curstash;
+                HV *oldstash = PL_curstash;
+               SV *Sub = GvSV(PL_DBsub);       /* name of current sub */
                 /* SP -= items;  added by xsubpp */
-                DBG_SUB_NOTIFY( "XS DBsub(%s)\n", SvPV(Sub, na) );
+                DBG_SUB_NOTIFY(Sub);
 
-                sv_setiv( DBsingle, 0 ); /* disable DB single-stepping */
+                sv_setiv(PL_DBsingle, 0);      /* disable DB single-stepping */
 
-                prof_mark( OP_ENTERSUB );
-                PUSHMARK( ORIGMARK );
+                prof_mark(aTHX_ OP_ENTERSUB);
+                PUSHMARK(ORIGMARK);
 
-                curstash = debstash;    /* To disable debugging of perl_call_sv
-*/
-                perl_call_sv( Sub, GIMME );
-                curstash = oldstash;
+                PL_curstash = PL_debstash;     /* To disable debugging of perl_call_sv */
+                perl_call_sv(Sub, GIMME_V);
+                PL_curstash = oldstash;
 
-                prof_mark( OP_LEAVESUB );
+                prof_mark(aTHX_ OP_LEAVESUB);
                 SPAGAIN;
                 /* PUTBACK;  added by xsubpp */
+           }
 
 #endif /* testing */
 
@@ -628,80 +659,91 @@ MODULE = Devel::DProf           PACKAGE = Devel::DProf
 
 void
 END()
-        PPCODE:
-        if( DBsub ){
-                /* maybe the process forked--we want only
-                 * the parent's profile.
-                 */
-                if( prof_pid == (int)getpid() ){
-                        rprof_end = Times(&prof_end);
-                        DBG_TIMER_NOTIFY("Profiler timer is off.\n");
-                        prof_record();
-                }
-        }
+PPCODE:
+    {
+        if (PL_DBsub) {
+           /* maybe the process forked--we want only
+            * the parent's profile.
+            */
+           if (
+#ifdef PERL_IMPLICIT_CONTEXT
+               g_THX == aTHX &&
+#endif
+               g_prof_pid == (int)getpid())
+           {
+               g_rprof_end = Times(&g_prof_end);
+               DBG_TIMER_NOTIFY("Profiler timer is off.\n");
+               prof_record(aTHX);
+           }
+       }
+    }
 
 void
 NONESUCH()
 
 BOOT:
+    {
+       g_TIMES_LOCATION = 42;
+       g_SAVE_STACK = 1<<14;
+       g_profstack_max = 128;
+#ifdef PERL_IMPLICIT_CONTEXT
+       g_THX = aTHX;
+#endif
+
         /* Before we go anywhere make sure we were invoked
          * properly, else we'll dump core.
          */
-        if( ! DBsub )
-                croak("DProf: run perl with -d to use DProf.\n");
+        if (!PL_DBsub)
+           croak("DProf: run perl with -d to use DProf.\n");
 
         /* When we hook up the XS DB::sub we'll be redefining
          * the DB::sub from the PM file.  Turn off warnings
          * while we do this.
          */
         {
-                I32 warn_tmp = dowarn;
-                dowarn = 0;
-                newXS("DB::sub", XS_DB_sub, file);
-                newXS("DB::goto", XS_DB_goto, file);
-                dowarn = warn_tmp;
+           bool warn_tmp = PL_dowarn;
+           PL_dowarn = 0;
+           newXS("DB::sub", XS_DB_sub, file);
+           newXS("DB::goto", XS_DB_goto, file);
+           PL_dowarn = warn_tmp;
         }
 
-        Sub = GvSV(DBsub);       /* name of current sub */
-        sv_setiv( DBsingle, 0 ); /* disable DB single-stepping */
+        sv_setiv(PL_DBsingle, 0);      /* disable DB single-stepping */
 
        {
            char *buffer = getenv("PERL_DPROF_BUFFER");
 
            if (buffer) {
-               SAVE_STACK = atoi(buffer);
+               g_SAVE_STACK = atoi(buffer);
            }
 
            buffer = getenv("PERL_DPROF_TICKS");
 
            if (buffer) {
-               dprof_ticks = atoi(buffer); /* Used under OS/2 only */
-           } else {
-               dprof_ticks = HZ;
+               g_dprof_ticks = atoi(buffer); /* Used under OS/2 only */
+           }
+           else {
+               g_dprof_ticks = HZ;
            }
-       }
 
-        if( (fp = PerlIO_open( "tmon.out", "w" )) == NULL )
-                croak("DProf: unable to write tmon.out, errno = %d\n", errno );
-#ifdef PERLDBf_NONAME
-       default_perldb = PERLDBf_NONAME | PERLDBf_SUB; /* no name needed. */
-#ifdef PERLDBf_GOTO
-       default_perldb = default_perldb | PERLDBf_GOTO;
-#endif 
-       cv_hash = newHV();
-#else
-#  ifdef PERLDBf_SUB
-       default_perldb = PERLDBf_SUB;           /* debug subroutines only. */
-#  endif
-#endif
-        prof_pid = (int)getpid();
+           buffer = getenv("PERL_DPROF_OUT_FILE_NAME");
+           g_out_file_name = savepv(buffer ? buffer : "tmon.out");
+       }
 
-       New( 0, profstack, profstack_max, PROFANY );
+        if ((g_fp = PerlIO_open(g_out_file_name, "w")) == NULL)
+           croak("DProf: unable to write '%s', errno = %d\n",
+                 g_out_file_name, errno);
 
-        prof_recordheader();
+       g_default_perldb = PERLDBf_NONAME | PERLDBf_SUB | PERLDBf_GOTO;
+       g_cv_hash = newHV();
+       g_key_hash = newSV(256);
+        g_prof_pid = (int)getpid();
 
+       New(0, g_profstack, g_profstack_max, PROFANY);
+        prof_recordheader(aTHX);
         DBG_TIMER_NOTIFY("Profiler timer is on.\n");
-        orealtime = rprof_start = Times(&prof_start);
-       otms_utime = prof_start.tms_utime;
-       otms_stime = prof_start.tms_stime;
-       perldb = default_perldb;
+       g_orealtime = g_rprof_start = Times(&g_prof_start);
+       g_otms_utime = g_prof_start.tms_utime;
+       g_otms_stime = g_prof_start.tms_stime;
+       PL_perldb = g_default_perldb;
+    }