]> git.saurik.com Git - redis.git/blob - deps/jemalloc/include/jemalloc/internal/prof.h
f943873507787c042ce219080d3dbc143e510865
[redis.git] / deps / jemalloc / include / jemalloc / internal / prof.h
1 #ifdef JEMALLOC_PROF
2 /******************************************************************************/
3 #ifdef JEMALLOC_H_TYPES
4
5 typedef struct prof_bt_s prof_bt_t;
6 typedef struct prof_cnt_s prof_cnt_t;
7 typedef struct prof_thr_cnt_s prof_thr_cnt_t;
8 typedef struct prof_ctx_s prof_ctx_t;
9 typedef struct prof_tdata_s prof_tdata_t;
10
11 /* Option defaults. */
12 #define PROF_PREFIX_DEFAULT "jeprof"
13 #define LG_PROF_BT_MAX_DEFAULT 7
14 #define LG_PROF_SAMPLE_DEFAULT 0
15 #define LG_PROF_INTERVAL_DEFAULT -1
16 #define LG_PROF_TCMAX_DEFAULT -1
17
18 /*
19 * Hard limit on stack backtrace depth. Note that the version of
20 * prof_backtrace() that is based on __builtin_return_address() necessarily has
21 * a hard-coded number of backtrace frame handlers.
22 */
23 #if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
24 # define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
25 #else
26 # define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
27 #endif
28 #define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
29
30 /* Initial hash table size. */
31 #define PROF_CKH_MINITEMS 64
32
33 /* Size of memory buffer to use when writing dump files. */
34 #define PROF_DUMP_BUF_SIZE 65536
35
36 #endif /* JEMALLOC_H_TYPES */
37 /******************************************************************************/
38 #ifdef JEMALLOC_H_STRUCTS
39
40 struct prof_bt_s {
41 /* Backtrace, stored as len program counters. */
42 void **vec;
43 unsigned len;
44 };
45
46 #ifdef JEMALLOC_PROF_LIBGCC
47 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
48 typedef struct {
49 prof_bt_t *bt;
50 unsigned nignore;
51 unsigned max;
52 } prof_unwind_data_t;
53 #endif
54
55 struct prof_cnt_s {
56 /*
57 * Profiling counters. An allocation/deallocation pair can operate on
58 * different prof_thr_cnt_t objects that are linked into the same
59 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
60 * negative. In principle it is possible for the *bytes counters to
61 * overflow/underflow, but a general solution would require something
62 * like 128-bit counters; this implementation doesn't bother to solve
63 * that problem.
64 */
65 int64_t curobjs;
66 int64_t curbytes;
67 uint64_t accumobjs;
68 uint64_t accumbytes;
69 };
70
71 struct prof_thr_cnt_s {
72 /* Linkage into prof_ctx_t's cnts_ql. */
73 ql_elm(prof_thr_cnt_t) cnts_link;
74
75 /* Linkage into thread's LRU. */
76 ql_elm(prof_thr_cnt_t) lru_link;
77
78 /*
79 * Associated context. If a thread frees an object that it did not
80 * allocate, it is possible that the context is not cached in the
81 * thread's hash table, in which case it must be able to look up the
82 * context, insert a new prof_thr_cnt_t into the thread's hash table,
83 * and link it into the prof_ctx_t's cnts_ql.
84 */
85 prof_ctx_t *ctx;
86
87 /*
88 * Threads use memory barriers to update the counters. Since there is
89 * only ever one writer, the only challenge is for the reader to get a
90 * consistent read of the counters.
91 *
92 * The writer uses this series of operations:
93 *
94 * 1) Increment epoch to an odd number.
95 * 2) Update counters.
96 * 3) Increment epoch to an even number.
97 *
98 * The reader must assure 1) that the epoch is even while it reads the
99 * counters, and 2) that the epoch doesn't change between the time it
100 * starts and finishes reading the counters.
101 */
102 unsigned epoch;
103
104 /* Profiling counters. */
105 prof_cnt_t cnts;
106 };
107
108 struct prof_ctx_s {
109 /* Associated backtrace. */
110 prof_bt_t *bt;
111
112 /* Protects cnt_merged and cnts_ql. */
113 malloc_mutex_t lock;
114
115 /* Temporary storage for summation during dump. */
116 prof_cnt_t cnt_summed;
117
118 /* When threads exit, they merge their stats into cnt_merged. */
119 prof_cnt_t cnt_merged;
120
121 /*
122 * List of profile counters, one for each thread that has allocated in
123 * this context.
124 */
125 ql_head(prof_thr_cnt_t) cnts_ql;
126 };
127
128 struct prof_tdata_s {
129 /*
130 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
131 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
132 * objects. Other threads may read the prof_thr_cnt_t contents, but no
133 * others will ever write them.
134 *
135 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
136 * counter data into the associated prof_ctx_t objects, and unlink/free
137 * the prof_thr_cnt_t objects.
138 */
139 ckh_t bt2cnt;
140
141 /* LRU for contents of bt2cnt. */
142 ql_head(prof_thr_cnt_t) lru_ql;
143
144 /* Backtrace vector, used for calls to prof_backtrace(). */
145 void **vec;
146
147 /* Sampling state. */
148 uint64_t prn_state;
149 uint64_t threshold;
150 uint64_t accum;
151 };
152
153 #endif /* JEMALLOC_H_STRUCTS */
154 /******************************************************************************/
155 #ifdef JEMALLOC_H_EXTERNS
156
157 extern bool opt_prof;
158 /*
159 * Even if opt_prof is true, sampling can be temporarily disabled by setting
160 * opt_prof_active to false. No locking is used when updating opt_prof_active,
161 * so there are no guarantees regarding how long it will take for all threads
162 * to notice state changes.
163 */
164 extern bool opt_prof_active;
165 extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
166 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
167 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
168 extern bool opt_prof_gdump; /* High-water memory dumping. */
169 extern bool opt_prof_leak; /* Dump leak summary at exit. */
170 extern bool opt_prof_accum; /* Report cumulative bytes. */
171 extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */
172 extern char opt_prof_prefix[PATH_MAX + 1];
173
174 /*
175 * Profile dump interval, measured in bytes allocated. Each arena triggers a
176 * profile dump when it reaches this threshold. The effect is that the
177 * interval between profile dumps averages prof_interval, though the actual
178 * interval between dumps will tend to be sporadic, and the interval will be a
179 * maximum of approximately (prof_interval * narenas).
180 */
181 extern uint64_t prof_interval;
182
183 /*
184 * If true, promote small sampled objects to large objects, since small run
185 * headers do not have embedded profile context pointers.
186 */
187 extern bool prof_promote;
188
189 /* (1U << opt_lg_prof_bt_max). */
190 extern unsigned prof_bt_max;
191
192 /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
193 #ifndef NO_TLS
194 extern __thread prof_tdata_t *prof_tdata_tls
195 JEMALLOC_ATTR(tls_model("initial-exec"));
196 # define PROF_TCACHE_GET() prof_tdata_tls
197 # define PROF_TCACHE_SET(v) do { \
198 prof_tdata_tls = (v); \
199 pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
200 } while (0)
201 #else
202 # define PROF_TCACHE_GET() \
203 ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
204 # define PROF_TCACHE_SET(v) do { \
205 pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
206 } while (0)
207 #endif
208 /*
209 * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
210 * called when a thread exits, so that prof_tdata_tls contents can be merged,
211 * unlinked, and deallocated.
212 */
213 extern pthread_key_t prof_tdata_tsd;
214
215 void bt_init(prof_bt_t *bt, void **vec);
216 void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
217 prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
218 void prof_idump(void);
219 bool prof_mdump(const char *filename);
220 void prof_gdump(void);
221 prof_tdata_t *prof_tdata_init(void);
222 void prof_boot0(void);
223 void prof_boot1(void);
224 bool prof_boot2(void);
225
226 #endif /* JEMALLOC_H_EXTERNS */
227 /******************************************************************************/
228 #ifdef JEMALLOC_H_INLINES
229
230 #ifndef JEMALLOC_ENABLE_INLINE
231 void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
232 prof_thr_cnt_t *prof_alloc_prep(size_t size);
233 prof_ctx_t *prof_ctx_get(const void *ptr);
234 void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
235 bool prof_sample_accum_update(size_t size);
236 void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
237 void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
238 size_t old_size, prof_ctx_t *old_ctx);
239 void prof_free(const void *ptr, size_t size);
240 #endif
241
242 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
243 JEMALLOC_INLINE void
244 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
245 {
246 uint64_t r;
247 double u;
248
249 /*
250 * Compute sample threshold as a geometrically distributed random
251 * variable with mean (2^opt_lg_prof_sample).
252 *
253 * __ __
254 * | log(u) | 1
255 * prof_tdata->threshold = | -------- |, where p = -------------------
256 * | log(1-p) | opt_lg_prof_sample
257 * 2
258 *
259 * For more information on the math, see:
260 *
261 * Non-Uniform Random Variate Generation
262 * Luc Devroye
263 * Springer-Verlag, New York, 1986
264 * pp 500
265 * (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
266 */
267 prn64(r, 53, prof_tdata->prn_state,
268 (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
269 u = (double)r * (1.0/9007199254740992.0L);
270 prof_tdata->threshold = (uint64_t)(log(u) /
271 log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
272 + (uint64_t)1U;
273 }
274
275 JEMALLOC_INLINE prof_thr_cnt_t *
276 prof_alloc_prep(size_t size)
277 {
278 #ifdef JEMALLOC_ENABLE_INLINE
279 /* This function does not have its own stack frame, because it is inlined. */
280 # define NIGNORE 1
281 #else
282 # define NIGNORE 2
283 #endif
284 prof_thr_cnt_t *ret;
285 prof_tdata_t *prof_tdata;
286 prof_bt_t bt;
287
288 assert(size == s2u(size));
289
290 prof_tdata = PROF_TCACHE_GET();
291 if (prof_tdata == NULL) {
292 prof_tdata = prof_tdata_init();
293 if (prof_tdata == NULL)
294 return (NULL);
295 }
296
297 if (opt_prof_active == false) {
298 /* Sampling is currently inactive, so avoid sampling. */
299 ret = (prof_thr_cnt_t *)(uintptr_t)1U;
300 } else if (opt_lg_prof_sample == 0) {
301 /*
302 * Don't bother with sampling logic, since sampling interval is
303 * 1.
304 */
305 bt_init(&bt, prof_tdata->vec);
306 prof_backtrace(&bt, NIGNORE, prof_bt_max);
307 ret = prof_lookup(&bt);
308 } else {
309 if (prof_tdata->threshold == 0) {
310 /*
311 * Initialize. Seed the prng differently for each
312 * thread.
313 */
314 prof_tdata->prn_state = (uint64_t)(uintptr_t)&size;
315 prof_sample_threshold_update(prof_tdata);
316 }
317
318 /*
319 * Determine whether to capture a backtrace based on whether
320 * size is enough for prof_accum to reach
321 * prof_tdata->threshold. However, delay updating these
322 * variables until prof_{m,re}alloc(), because we don't know
323 * for sure that the allocation will succeed.
324 *
325 * Use subtraction rather than addition to avoid potential
326 * integer overflow.
327 */
328 if (size >= prof_tdata->threshold - prof_tdata->accum) {
329 bt_init(&bt, prof_tdata->vec);
330 prof_backtrace(&bt, NIGNORE, prof_bt_max);
331 ret = prof_lookup(&bt);
332 } else
333 ret = (prof_thr_cnt_t *)(uintptr_t)1U;
334 }
335
336 return (ret);
337 #undef NIGNORE
338 }
339
340 JEMALLOC_INLINE prof_ctx_t *
341 prof_ctx_get(const void *ptr)
342 {
343 prof_ctx_t *ret;
344 arena_chunk_t *chunk;
345
346 assert(ptr != NULL);
347
348 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
349 if (chunk != ptr) {
350 /* Region. */
351 dassert(chunk->arena->magic == ARENA_MAGIC);
352
353 ret = arena_prof_ctx_get(ptr);
354 } else
355 ret = huge_prof_ctx_get(ptr);
356
357 return (ret);
358 }
359
360 JEMALLOC_INLINE void
361 prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
362 {
363 arena_chunk_t *chunk;
364
365 assert(ptr != NULL);
366
367 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
368 if (chunk != ptr) {
369 /* Region. */
370 dassert(chunk->arena->magic == ARENA_MAGIC);
371
372 arena_prof_ctx_set(ptr, ctx);
373 } else
374 huge_prof_ctx_set(ptr, ctx);
375 }
376
377 JEMALLOC_INLINE bool
378 prof_sample_accum_update(size_t size)
379 {
380 prof_tdata_t *prof_tdata;
381
382 /* Sampling logic is unnecessary if the interval is 1. */
383 assert(opt_lg_prof_sample != 0);
384
385 prof_tdata = PROF_TCACHE_GET();
386 assert(prof_tdata != NULL);
387
388 /* Take care to avoid integer overflow. */
389 if (size >= prof_tdata->threshold - prof_tdata->accum) {
390 prof_tdata->accum -= (prof_tdata->threshold - size);
391 /* Compute new sample threshold. */
392 prof_sample_threshold_update(prof_tdata);
393 while (prof_tdata->accum >= prof_tdata->threshold) {
394 prof_tdata->accum -= prof_tdata->threshold;
395 prof_sample_threshold_update(prof_tdata);
396 }
397 return (false);
398 } else {
399 prof_tdata->accum += size;
400 return (true);
401 }
402 }
403
404 JEMALLOC_INLINE void
405 prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
406 {
407
408 assert(ptr != NULL);
409 assert(size == isalloc(ptr));
410
411 if (opt_lg_prof_sample != 0) {
412 if (prof_sample_accum_update(size)) {
413 /*
414 * Don't sample. For malloc()-like allocation, it is
415 * always possible to tell in advance how large an
416 * object's usable size will be, so there should never
417 * be a difference between the size passed to
418 * prof_alloc_prep() and prof_malloc().
419 */
420 assert((uintptr_t)cnt == (uintptr_t)1U);
421 }
422 }
423
424 if ((uintptr_t)cnt > (uintptr_t)1U) {
425 prof_ctx_set(ptr, cnt->ctx);
426
427 cnt->epoch++;
428 /*********/
429 mb_write();
430 /*********/
431 cnt->cnts.curobjs++;
432 cnt->cnts.curbytes += size;
433 if (opt_prof_accum) {
434 cnt->cnts.accumobjs++;
435 cnt->cnts.accumbytes += size;
436 }
437 /*********/
438 mb_write();
439 /*********/
440 cnt->epoch++;
441 /*********/
442 mb_write();
443 /*********/
444 } else
445 prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
446 }
447
448 JEMALLOC_INLINE void
449 prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
450 size_t old_size, prof_ctx_t *old_ctx)
451 {
452 prof_thr_cnt_t *told_cnt;
453
454 assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
455
456 if (ptr != NULL) {
457 assert(size == isalloc(ptr));
458 if (opt_lg_prof_sample != 0) {
459 if (prof_sample_accum_update(size)) {
460 /*
461 * Don't sample. The size passed to
462 * prof_alloc_prep() was larger than what
463 * actually got allocated, so a backtrace was
464 * captured for this allocation, even though
465 * its actual size was insufficient to cross
466 * the sample threshold.
467 */
468 cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
469 }
470 }
471 }
472
473 if ((uintptr_t)old_ctx > (uintptr_t)1U) {
474 told_cnt = prof_lookup(old_ctx->bt);
475 if (told_cnt == NULL) {
476 /*
477 * It's too late to propagate OOM for this realloc(),
478 * so operate directly on old_cnt->ctx->cnt_merged.
479 */
480 malloc_mutex_lock(&old_ctx->lock);
481 old_ctx->cnt_merged.curobjs--;
482 old_ctx->cnt_merged.curbytes -= old_size;
483 malloc_mutex_unlock(&old_ctx->lock);
484 told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
485 }
486 } else
487 told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
488
489 if ((uintptr_t)told_cnt > (uintptr_t)1U)
490 told_cnt->epoch++;
491 if ((uintptr_t)cnt > (uintptr_t)1U) {
492 prof_ctx_set(ptr, cnt->ctx);
493 cnt->epoch++;
494 } else
495 prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
496 /*********/
497 mb_write();
498 /*********/
499 if ((uintptr_t)told_cnt > (uintptr_t)1U) {
500 told_cnt->cnts.curobjs--;
501 told_cnt->cnts.curbytes -= old_size;
502 }
503 if ((uintptr_t)cnt > (uintptr_t)1U) {
504 cnt->cnts.curobjs++;
505 cnt->cnts.curbytes += size;
506 if (opt_prof_accum) {
507 cnt->cnts.accumobjs++;
508 cnt->cnts.accumbytes += size;
509 }
510 }
511 /*********/
512 mb_write();
513 /*********/
514 if ((uintptr_t)told_cnt > (uintptr_t)1U)
515 told_cnt->epoch++;
516 if ((uintptr_t)cnt > (uintptr_t)1U)
517 cnt->epoch++;
518 /*********/
519 mb_write(); /* Not strictly necessary. */
520 }
521
522 JEMALLOC_INLINE void
523 prof_free(const void *ptr, size_t size)
524 {
525 prof_ctx_t *ctx = prof_ctx_get(ptr);
526
527 if ((uintptr_t)ctx > (uintptr_t)1) {
528 assert(size == isalloc(ptr));
529 prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
530
531 if (tcnt != NULL) {
532 tcnt->epoch++;
533 /*********/
534 mb_write();
535 /*********/
536 tcnt->cnts.curobjs--;
537 tcnt->cnts.curbytes -= size;
538 /*********/
539 mb_write();
540 /*********/
541 tcnt->epoch++;
542 /*********/
543 mb_write();
544 /*********/
545 } else {
546 /*
547 * OOM during free() cannot be propagated, so operate
548 * directly on cnt->ctx->cnt_merged.
549 */
550 malloc_mutex_lock(&ctx->lock);
551 ctx->cnt_merged.curobjs--;
552 ctx->cnt_merged.curbytes -= size;
553 malloc_mutex_unlock(&ctx->lock);
554 }
555 }
556 }
557 #endif
558
559 #endif /* JEMALLOC_H_INLINES */
560 /******************************************************************************/
561 #endif /* JEMALLOC_PROF */