+++ /dev/null
-#ifdef JEMALLOC_PROF
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
-typedef struct prof_tdata_s prof_tdata_t;
-
-/* Option defaults. */
-#define PROF_PREFIX_DEFAULT "jeprof"
-#define LG_PROF_BT_MAX_DEFAULT 7
-#define LG_PROF_SAMPLE_DEFAULT 0
-#define LG_PROF_INTERVAL_DEFAULT -1
-#define LG_PROF_TCMAX_DEFAULT -1
-
-/*
- * Hard limit on stack backtrace depth. Note that the version of
- * prof_backtrace() that is based on __builtin_return_address() necessarily has
- * a hard-coded number of backtrace frame handlers.
- */
-#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
-# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
-#else
-# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
-#endif
-#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
-
-/* Initial hash table size. */
-#define PROF_CKH_MINITEMS 64
-
-/* Size of memory buffer to use when writing dump files. */
-#define PROF_DUMP_BUF_SIZE 65536
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct prof_bt_s {
- /* Backtrace, stored as len program counters. */
- void **vec;
- unsigned len;
-};
-
-#ifdef JEMALLOC_PROF_LIBGCC
-/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
-typedef struct {
- prof_bt_t *bt;
- unsigned nignore;
- unsigned max;
-} prof_unwind_data_t;
-#endif
-
-struct prof_cnt_s {
- /*
- * Profiling counters. An allocation/deallocation pair can operate on
- * different prof_thr_cnt_t objects that are linked into the same
- * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
- * negative. In principle it is possible for the *bytes counters to
- * overflow/underflow, but a general solution would require something
- * like 128-bit counters; this implementation doesn't bother to solve
- * that problem.
- */
- int64_t curobjs;
- int64_t curbytes;
- uint64_t accumobjs;
- uint64_t accumbytes;
-};
-
-struct prof_thr_cnt_s {
- /* Linkage into prof_ctx_t's cnts_ql. */
- ql_elm(prof_thr_cnt_t) cnts_link;
-
- /* Linkage into thread's LRU. */
- ql_elm(prof_thr_cnt_t) lru_link;
-
- /*
- * Associated context. If a thread frees an object that it did not
- * allocate, it is possible that the context is not cached in the
- * thread's hash table, in which case it must be able to look up the
- * context, insert a new prof_thr_cnt_t into the thread's hash table,
- * and link it into the prof_ctx_t's cnts_ql.
- */
- prof_ctx_t *ctx;
-
- /*
- * Threads use memory barriers to update the counters. Since there is
- * only ever one writer, the only challenge is for the reader to get a
- * consistent read of the counters.
- *
- * The writer uses this series of operations:
- *
- * 1) Increment epoch to an odd number.
- * 2) Update counters.
- * 3) Increment epoch to an even number.
- *
- * The reader must assure 1) that the epoch is even while it reads the
- * counters, and 2) that the epoch doesn't change between the time it
- * starts and finishes reading the counters.
- */
- unsigned epoch;
-
- /* Profiling counters. */
- prof_cnt_t cnts;
-};
-
-struct prof_ctx_s {
- /* Associated backtrace. */
- prof_bt_t *bt;
-
- /* Protects cnt_merged and cnts_ql. */
- malloc_mutex_t lock;
-
- /* Temporary storage for summation during dump. */
- prof_cnt_t cnt_summed;
-
- /* When threads exit, they merge their stats into cnt_merged. */
- prof_cnt_t cnt_merged;
-
- /*
- * List of profile counters, one for each thread that has allocated in
- * this context.
- */
- ql_head(prof_thr_cnt_t) cnts_ql;
-};
-
-struct prof_tdata_s {
- /*
- * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
- * cache of backtraces, with associated thread-specific prof_thr_cnt_t
- * objects. Other threads may read the prof_thr_cnt_t contents, but no
- * others will ever write them.
- *
- * Upon thread exit, the thread must merge all the prof_thr_cnt_t
- * counter data into the associated prof_ctx_t objects, and unlink/free
- * the prof_thr_cnt_t objects.
- */
- ckh_t bt2cnt;
-
- /* LRU for contents of bt2cnt. */
- ql_head(prof_thr_cnt_t) lru_ql;
-
- /* Backtrace vector, used for calls to prof_backtrace(). */
- void **vec;
-
- /* Sampling state. */
- uint64_t prn_state;
- uint64_t threshold;
- uint64_t accum;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool opt_prof;
-/*
- * Even if opt_prof is true, sampling can be temporarily disabled by setting
- * opt_prof_active to false. No locking is used when updating opt_prof_active,
- * so there are no guarantees regarding how long it will take for all threads
- * to notice state changes.
- */
-extern bool opt_prof_active;
-extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
-extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
-extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_gdump; /* High-water memory dumping. */
-extern bool opt_prof_leak; /* Dump leak summary at exit. */
-extern bool opt_prof_accum; /* Report cumulative bytes. */
-extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */
-extern char opt_prof_prefix[PATH_MAX + 1];
-
-/*
- * Profile dump interval, measured in bytes allocated. Each arena triggers a
- * profile dump when it reaches this threshold. The effect is that the
- * interval between profile dumps averages prof_interval, though the actual
- * interval between dumps will tend to be sporadic, and the interval will be a
- * maximum of approximately (prof_interval * narenas).
- */
-extern uint64_t prof_interval;
-
-/*
- * If true, promote small sampled objects to large objects, since small run
- * headers do not have embedded profile context pointers.
- */
-extern bool prof_promote;
-
-/* (1U << opt_lg_prof_bt_max). */
-extern unsigned prof_bt_max;
-
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-#ifndef NO_TLS
-extern __thread prof_tdata_t *prof_tdata_tls
- JEMALLOC_ATTR(tls_model("initial-exec"));
-# define PROF_TCACHE_GET() prof_tdata_tls
-# define PROF_TCACHE_SET(v) do { \
- prof_tdata_tls = (v); \
- pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
-} while (0)
-#else
-# define PROF_TCACHE_GET() \
- ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
-# define PROF_TCACHE_SET(v) do { \
- pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
-} while (0)
-#endif
-/*
- * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
- * called when a thread exits, so that prof_tdata_tls contents can be merged,
- * unlinked, and deallocated.
- */
-extern pthread_key_t prof_tdata_tsd;
-
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
-prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
-void prof_idump(void);
-bool prof_mdump(const char *filename);
-void prof_gdump(void);
-prof_tdata_t *prof_tdata_init(void);
-void prof_boot0(void);
-void prof_boot1(void);
-bool prof_boot2(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#define PROF_ALLOC_PREP(nignore, size, ret) do { \
- prof_tdata_t *prof_tdata; \
- prof_bt_t bt; \
- \
- assert(size == s2u(size)); \
- \
- prof_tdata = PROF_TCACHE_GET(); \
- if (prof_tdata == NULL) { \
- prof_tdata = prof_tdata_init(); \
- if (prof_tdata == NULL) { \
- ret = NULL; \
- break; \
- } \
- } \
- \
- if (opt_prof_active == false) { \
- /* Sampling is currently inactive, so avoid sampling. */\
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } else if (opt_lg_prof_sample == 0) { \
- /* Don't bother with sampling logic, since sampling */\
- /* interval is 1. */\
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore, prof_bt_max); \
- ret = prof_lookup(&bt); \
- } else { \
- if (prof_tdata->threshold == 0) { \
- /* Initialize. Seed the prng differently for */\
- /* each thread. */\
- prof_tdata->prn_state = \
- (uint64_t)(uintptr_t)&size; \
- prof_sample_threshold_update(prof_tdata); \
- } \
- \
- /* Determine whether to capture a backtrace based on */\
- /* whether size is enough for prof_accum to reach */\
- /* prof_tdata->threshold. However, delay updating */\
- /* these variables until prof_{m,re}alloc(), because */\
- /* we don't know for sure that the allocation will */\
- /* succeed. */\
- /* */\
- /* Use subtraction rather than addition to avoid */\
- /* potential integer overflow. */\
- if (size >= prof_tdata->threshold - \
- prof_tdata->accum) { \
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore, prof_bt_max); \
- ret = prof_lookup(&bt); \
- } else \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } \
-} while (0)
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
-prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-bool prof_sample_accum_update(size_t size);
-void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
- size_t old_size, prof_ctx_t *old_ctx);
-void prof_free(const void *ptr, size_t size);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-JEMALLOC_INLINE void
-prof_sample_threshold_update(prof_tdata_t *prof_tdata)
-{
- uint64_t r;
- double u;
-
- /*
- * Compute sample threshold as a geometrically distributed random
- * variable with mean (2^opt_lg_prof_sample).
- *
- * __ __
- * | log(u) | 1
- * prof_tdata->threshold = | -------- |, where p = -------------------
- * | log(1-p) | opt_lg_prof_sample
- * 2
- *
- * For more information on the math, see:
- *
- * Non-Uniform Random Variate Generation
- * Luc Devroye
- * Springer-Verlag, New York, 1986
- * pp 500
- * (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
- */
- prn64(r, 53, prof_tdata->prn_state,
- (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
- u = (double)r * (1.0/9007199254740992.0L);
- prof_tdata->threshold = (uint64_t)(log(u) /
- log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
- + (uint64_t)1U;
-}
-
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- dassert(chunk->arena->magic == ARENA_MAGIC);
-
- ret = arena_prof_ctx_get(ptr);
- } else
- ret = huge_prof_ctx_get(ptr);
-
- return (ret);
-}
-
-JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
-{
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- dassert(chunk->arena->magic == ARENA_MAGIC);
-
- arena_prof_ctx_set(ptr, ctx);
- } else
- huge_prof_ctx_set(ptr, ctx);
-}
-
-JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size)
-{
- prof_tdata_t *prof_tdata;
-
- /* Sampling logic is unnecessary if the interval is 1. */
- assert(opt_lg_prof_sample != 0);
-
- prof_tdata = PROF_TCACHE_GET();
- assert(prof_tdata != NULL);
-
- /* Take care to avoid integer overflow. */
- if (size >= prof_tdata->threshold - prof_tdata->accum) {
- prof_tdata->accum -= (prof_tdata->threshold - size);
- /* Compute new sample threshold. */
- prof_sample_threshold_update(prof_tdata);
- while (prof_tdata->accum >= prof_tdata->threshold) {
- prof_tdata->accum -= prof_tdata->threshold;
- prof_sample_threshold_update(prof_tdata);
- }
- return (false);
- } else {
- prof_tdata->accum += size;
- return (true);
- }
-}
-
-JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
-{
-
- assert(ptr != NULL);
- assert(size == isalloc(ptr));
-
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(size)) {
- /*
- * Don't sample. For malloc()-like allocation, it is
- * always possible to tell in advance how large an
- * object's usable size will be, so there should never
- * be a difference between the size passed to
- * PROF_ALLOC_PREP() and prof_malloc().
- */
- assert((uintptr_t)cnt == (uintptr_t)1U);
- }
- }
-
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
- }
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
-}
-
-JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
- size_t old_size, prof_ctx_t *old_ctx)
-{
- prof_thr_cnt_t *told_cnt;
-
- assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
-
- if (ptr != NULL) {
- assert(size == isalloc(ptr));
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(size)) {
- /*
- * Don't sample. The size passed to
- * PROF_ALLOC_PREP() was larger than what
- * actually got allocated, so a backtrace was
- * captured for this allocation, even though
- * its actual size was insufficient to cross
- * the sample threshold.
- */
- cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- }
- }
-
- if ((uintptr_t)old_ctx > (uintptr_t)1U) {
- told_cnt = prof_lookup(old_ctx->bt);
- if (told_cnt == NULL) {
- /*
- * It's too late to propagate OOM for this realloc(),
- * so operate directly on old_cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(&old_ctx->lock);
- old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_size;
- malloc_mutex_unlock(&old_ctx->lock);
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- } else
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
- cnt->epoch++;
- } else
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U) {
- told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_size;
- }
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
- }
- }
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U)
- cnt->epoch++;
- /*********/
- mb_write(); /* Not strictly necessary. */
-}
-
-JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
-{
- prof_ctx_t *ctx = prof_ctx_get(ptr);
-
- if ((uintptr_t)ctx > (uintptr_t)1) {
- assert(size == isalloc(ptr));
- prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
-
- if (tcnt != NULL) {
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- tcnt->cnts.curobjs--;
- tcnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else {
- /*
- * OOM during free() cannot be propagated, so operate
- * directly on cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(&ctx->lock);
- ctx->cnt_merged.curobjs--;
- ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(&ctx->lock);
- }
- }
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-#endif /* JEMALLOC_PROF */