]> git.saurik.com Git - redis.git/blob - deps/jemalloc/include/jemalloc/internal/prof.h
c3e3f9e4bcd83419e2babb7cf1196a8740664895
[redis.git] / deps / jemalloc / include / jemalloc / internal / prof.h
1 /******************************************************************************/
2 #ifdef JEMALLOC_H_TYPES
3
4 typedef struct prof_bt_s prof_bt_t;
5 typedef struct prof_cnt_s prof_cnt_t;
6 typedef struct prof_thr_cnt_s prof_thr_cnt_t;
7 typedef struct prof_ctx_s prof_ctx_t;
8 typedef struct prof_tdata_s prof_tdata_t;
9
10 /* Option defaults. */
11 #define PROF_PREFIX_DEFAULT "jeprof"
12 #define LG_PROF_SAMPLE_DEFAULT 19
13 #define LG_PROF_INTERVAL_DEFAULT -1
14
15 /*
16 * Hard limit on stack backtrace depth. The version of prof_backtrace() that
17 * is based on __builtin_return_address() necessarily has a hard-coded number
18 * of backtrace frame handlers, and should be kept in sync with this setting.
19 */
20 #define PROF_BT_MAX 128
21
22 /* Maximum number of backtraces to store in each per thread LRU cache. */
23 #define PROF_TCMAX 1024
24
25 /* Initial hash table size. */
26 #define PROF_CKH_MINITEMS 64
27
28 /* Size of memory buffer to use when writing dump files. */
29 #define PROF_DUMP_BUFSIZE 65536
30
31 /* Size of stack-allocated buffer used by prof_printf(). */
32 #define PROF_PRINTF_BUFSIZE 128
33
34 /*
35 * Number of mutexes shared among all ctx's. No space is allocated for these
36 * unless profiling is enabled, so it's okay to over-provision.
37 */
38 #define PROF_NCTX_LOCKS 1024
39
40 /*
41 * prof_tdata pointers close to NULL are used to encode state information that
42 * is used for cleaning up during thread shutdown.
43 */
44 #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
45 #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
46 #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
47
48 #endif /* JEMALLOC_H_TYPES */
49 /******************************************************************************/
50 #ifdef JEMALLOC_H_STRUCTS
51
52 struct prof_bt_s {
53 /* Backtrace, stored as len program counters. */
54 void **vec;
55 unsigned len;
56 };
57
58 #ifdef JEMALLOC_PROF_LIBGCC
59 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
60 typedef struct {
61 prof_bt_t *bt;
62 unsigned nignore;
63 unsigned max;
64 } prof_unwind_data_t;
65 #endif
66
67 struct prof_cnt_s {
68 /*
69 * Profiling counters. An allocation/deallocation pair can operate on
70 * different prof_thr_cnt_t objects that are linked into the same
71 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
72 * negative. In principle it is possible for the *bytes counters to
73 * overflow/underflow, but a general solution would require something
74 * like 128-bit counters; this implementation doesn't bother to solve
75 * that problem.
76 */
77 int64_t curobjs;
78 int64_t curbytes;
79 uint64_t accumobjs;
80 uint64_t accumbytes;
81 };
82
83 struct prof_thr_cnt_s {
84 /* Linkage into prof_ctx_t's cnts_ql. */
85 ql_elm(prof_thr_cnt_t) cnts_link;
86
87 /* Linkage into thread's LRU. */
88 ql_elm(prof_thr_cnt_t) lru_link;
89
90 /*
91 * Associated context. If a thread frees an object that it did not
92 * allocate, it is possible that the context is not cached in the
93 * thread's hash table, in which case it must be able to look up the
94 * context, insert a new prof_thr_cnt_t into the thread's hash table,
95 * and link it into the prof_ctx_t's cnts_ql.
96 */
97 prof_ctx_t *ctx;
98
99 /*
100 * Threads use memory barriers to update the counters. Since there is
101 * only ever one writer, the only challenge is for the reader to get a
102 * consistent read of the counters.
103 *
104 * The writer uses this series of operations:
105 *
106 * 1) Increment epoch to an odd number.
107 * 2) Update counters.
108 * 3) Increment epoch to an even number.
109 *
110 * The reader must assure 1) that the epoch is even while it reads the
111 * counters, and 2) that the epoch doesn't change between the time it
112 * starts and finishes reading the counters.
113 */
114 unsigned epoch;
115
116 /* Profiling counters. */
117 prof_cnt_t cnts;
118 };
119
120 struct prof_ctx_s {
121 /* Associated backtrace. */
122 prof_bt_t *bt;
123
124 /* Protects nlimbo, cnt_merged, and cnts_ql. */
125 malloc_mutex_t *lock;
126
127 /*
128 * Number of threads that currently cause this ctx to be in a state of
129 * limbo due to one of:
130 * - Initializing per thread counters associated with this ctx.
131 * - Preparing to destroy this ctx.
132 * nlimbo must be 1 (single destroyer) in order to safely destroy the
133 * ctx.
134 */
135 unsigned nlimbo;
136
137 /* Temporary storage for summation during dump. */
138 prof_cnt_t cnt_summed;
139
140 /* When threads exit, they merge their stats into cnt_merged. */
141 prof_cnt_t cnt_merged;
142
143 /*
144 * List of profile counters, one for each thread that has allocated in
145 * this context.
146 */
147 ql_head(prof_thr_cnt_t) cnts_ql;
148 };
149
150 struct prof_tdata_s {
151 /*
152 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
153 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
154 * objects. Other threads may read the prof_thr_cnt_t contents, but no
155 * others will ever write them.
156 *
157 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
158 * counter data into the associated prof_ctx_t objects, and unlink/free
159 * the prof_thr_cnt_t objects.
160 */
161 ckh_t bt2cnt;
162
163 /* LRU for contents of bt2cnt. */
164 ql_head(prof_thr_cnt_t) lru_ql;
165
166 /* Backtrace vector, used for calls to prof_backtrace(). */
167 void **vec;
168
169 /* Sampling state. */
170 uint64_t prng_state;
171 uint64_t threshold;
172 uint64_t accum;
173
174 /* State used to avoid dumping while operating on prof internals. */
175 bool enq;
176 bool enq_idump;
177 bool enq_gdump;
178 };
179
180 #endif /* JEMALLOC_H_STRUCTS */
181 /******************************************************************************/
182 #ifdef JEMALLOC_H_EXTERNS
183
184 extern bool opt_prof;
185 /*
186 * Even if opt_prof is true, sampling can be temporarily disabled by setting
187 * opt_prof_active to false. No locking is used when updating opt_prof_active,
188 * so there are no guarantees regarding how long it will take for all threads
189 * to notice state changes.
190 */
191 extern bool opt_prof_active;
192 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
193 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
194 extern bool opt_prof_gdump; /* High-water memory dumping. */
195 extern bool opt_prof_final; /* Final profile dumping. */
196 extern bool opt_prof_leak; /* Dump leak summary at exit. */
197 extern bool opt_prof_accum; /* Report cumulative bytes. */
198 extern char opt_prof_prefix[PATH_MAX + 1];
199
200 /*
201 * Profile dump interval, measured in bytes allocated. Each arena triggers a
202 * profile dump when it reaches this threshold. The effect is that the
203 * interval between profile dumps averages prof_interval, though the actual
204 * interval between dumps will tend to be sporadic, and the interval will be a
205 * maximum of approximately (prof_interval * narenas).
206 */
207 extern uint64_t prof_interval;
208
209 /*
210 * If true, promote small sampled objects to large objects, since small run
211 * headers do not have embedded profile context pointers.
212 */
213 extern bool prof_promote;
214
215 void bt_init(prof_bt_t *bt, void **vec);
216 void prof_backtrace(prof_bt_t *bt, unsigned nignore);
217 prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
218 void prof_idump(void);
219 bool prof_mdump(const char *filename);
220 void prof_gdump(void);
221 prof_tdata_t *prof_tdata_init(void);
222 void prof_tdata_cleanup(void *arg);
223 void prof_boot0(void);
224 void prof_boot1(void);
225 bool prof_boot2(void);
226
227 #endif /* JEMALLOC_H_EXTERNS */
228 /******************************************************************************/
229 #ifdef JEMALLOC_H_INLINES
230
231 #define PROF_ALLOC_PREP(nignore, size, ret) do { \
232 prof_tdata_t *prof_tdata; \
233 prof_bt_t bt; \
234 \
235 assert(size == s2u(size)); \
236 \
237 prof_tdata = prof_tdata_get(); \
238 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \
239 if (prof_tdata != NULL) \
240 ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
241 else \
242 ret = NULL; \
243 break; \
244 } \
245 \
246 if (opt_prof_active == false) { \
247 /* Sampling is currently inactive, so avoid sampling. */\
248 ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
249 } else if (opt_lg_prof_sample == 0) { \
250 /* Don't bother with sampling logic, since sampling */\
251 /* interval is 1. */\
252 bt_init(&bt, prof_tdata->vec); \
253 prof_backtrace(&bt, nignore); \
254 ret = prof_lookup(&bt); \
255 } else { \
256 if (prof_tdata->threshold == 0) { \
257 /* Initialize. Seed the prng differently for */\
258 /* each thread. */\
259 prof_tdata->prng_state = \
260 (uint64_t)(uintptr_t)&size; \
261 prof_sample_threshold_update(prof_tdata); \
262 } \
263 \
264 /* Determine whether to capture a backtrace based on */\
265 /* whether size is enough for prof_accum to reach */\
266 /* prof_tdata->threshold. However, delay updating */\
267 /* these variables until prof_{m,re}alloc(), because */\
268 /* we don't know for sure that the allocation will */\
269 /* succeed. */\
270 /* */\
271 /* Use subtraction rather than addition to avoid */\
272 /* potential integer overflow. */\
273 if (size >= prof_tdata->threshold - \
274 prof_tdata->accum) { \
275 bt_init(&bt, prof_tdata->vec); \
276 prof_backtrace(&bt, nignore); \
277 ret = prof_lookup(&bt); \
278 } else \
279 ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
280 } \
281 } while (0)
282
283 #ifndef JEMALLOC_ENABLE_INLINE
284 malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
285
286 prof_tdata_t *prof_tdata_get(void);
287 void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
288 prof_ctx_t *prof_ctx_get(const void *ptr);
289 void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
290 bool prof_sample_accum_update(size_t size);
291 void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
292 void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
293 size_t old_size, prof_ctx_t *old_ctx);
294 void prof_free(const void *ptr, size_t size);
295 #endif
296
297 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
298 /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
299 malloc_tsd_externs(prof_tdata, prof_tdata_t *)
300 malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
301 prof_tdata_cleanup)
302
303 JEMALLOC_INLINE prof_tdata_t *
304 prof_tdata_get(void)
305 {
306 prof_tdata_t *prof_tdata;
307
308 cassert(config_prof);
309
310 prof_tdata = *prof_tdata_tsd_get();
311 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
312 if (prof_tdata == NULL)
313 prof_tdata = prof_tdata_init();
314 }
315
316 return (prof_tdata);
317 }
318
319 JEMALLOC_INLINE void
320 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
321 {
322 uint64_t r;
323 double u;
324
325 cassert(config_prof);
326
327 /*
328 * Compute sample threshold as a geometrically distributed random
329 * variable with mean (2^opt_lg_prof_sample).
330 *
331 * __ __
332 * | log(u) | 1
333 * prof_tdata->threshold = | -------- |, where p = -------------------
334 * | log(1-p) | opt_lg_prof_sample
335 * 2
336 *
337 * For more information on the math, see:
338 *
339 * Non-Uniform Random Variate Generation
340 * Luc Devroye
341 * Springer-Verlag, New York, 1986
342 * pp 500
343 * (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
344 */
345 prng64(r, 53, prof_tdata->prng_state,
346 UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
347 u = (double)r * (1.0/9007199254740992.0L);
348 prof_tdata->threshold = (uint64_t)(log(u) /
349 log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
350 + (uint64_t)1U;
351 }
352
353 JEMALLOC_INLINE prof_ctx_t *
354 prof_ctx_get(const void *ptr)
355 {
356 prof_ctx_t *ret;
357 arena_chunk_t *chunk;
358
359 cassert(config_prof);
360 assert(ptr != NULL);
361
362 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
363 if (chunk != ptr) {
364 /* Region. */
365 ret = arena_prof_ctx_get(ptr);
366 } else
367 ret = huge_prof_ctx_get(ptr);
368
369 return (ret);
370 }
371
372 JEMALLOC_INLINE void
373 prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
374 {
375 arena_chunk_t *chunk;
376
377 cassert(config_prof);
378 assert(ptr != NULL);
379
380 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
381 if (chunk != ptr) {
382 /* Region. */
383 arena_prof_ctx_set(ptr, ctx);
384 } else
385 huge_prof_ctx_set(ptr, ctx);
386 }
387
388 JEMALLOC_INLINE bool
389 prof_sample_accum_update(size_t size)
390 {
391 prof_tdata_t *prof_tdata;
392
393 cassert(config_prof);
394 /* Sampling logic is unnecessary if the interval is 1. */
395 assert(opt_lg_prof_sample != 0);
396
397 prof_tdata = *prof_tdata_tsd_get();
398 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
399 return (true);
400
401 /* Take care to avoid integer overflow. */
402 if (size >= prof_tdata->threshold - prof_tdata->accum) {
403 prof_tdata->accum -= (prof_tdata->threshold - size);
404 /* Compute new sample threshold. */
405 prof_sample_threshold_update(prof_tdata);
406 while (prof_tdata->accum >= prof_tdata->threshold) {
407 prof_tdata->accum -= prof_tdata->threshold;
408 prof_sample_threshold_update(prof_tdata);
409 }
410 return (false);
411 } else {
412 prof_tdata->accum += size;
413 return (true);
414 }
415 }
416
417 JEMALLOC_INLINE void
418 prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
419 {
420
421 cassert(config_prof);
422 assert(ptr != NULL);
423 assert(size == isalloc(ptr, true));
424
425 if (opt_lg_prof_sample != 0) {
426 if (prof_sample_accum_update(size)) {
427 /*
428 * Don't sample. For malloc()-like allocation, it is
429 * always possible to tell in advance how large an
430 * object's usable size will be, so there should never
431 * be a difference between the size passed to
432 * PROF_ALLOC_PREP() and prof_malloc().
433 */
434 assert((uintptr_t)cnt == (uintptr_t)1U);
435 }
436 }
437
438 if ((uintptr_t)cnt > (uintptr_t)1U) {
439 prof_ctx_set(ptr, cnt->ctx);
440
441 cnt->epoch++;
442 /*********/
443 mb_write();
444 /*********/
445 cnt->cnts.curobjs++;
446 cnt->cnts.curbytes += size;
447 if (opt_prof_accum) {
448 cnt->cnts.accumobjs++;
449 cnt->cnts.accumbytes += size;
450 }
451 /*********/
452 mb_write();
453 /*********/
454 cnt->epoch++;
455 /*********/
456 mb_write();
457 /*********/
458 } else
459 prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
460 }
461
462 JEMALLOC_INLINE void
463 prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
464 size_t old_size, prof_ctx_t *old_ctx)
465 {
466 prof_thr_cnt_t *told_cnt;
467
468 cassert(config_prof);
469 assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
470
471 if (ptr != NULL) {
472 assert(size == isalloc(ptr, true));
473 if (opt_lg_prof_sample != 0) {
474 if (prof_sample_accum_update(size)) {
475 /*
476 * Don't sample. The size passed to
477 * PROF_ALLOC_PREP() was larger than what
478 * actually got allocated, so a backtrace was
479 * captured for this allocation, even though
480 * its actual size was insufficient to cross
481 * the sample threshold.
482 */
483 cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
484 }
485 }
486 }
487
488 if ((uintptr_t)old_ctx > (uintptr_t)1U) {
489 told_cnt = prof_lookup(old_ctx->bt);
490 if (told_cnt == NULL) {
491 /*
492 * It's too late to propagate OOM for this realloc(),
493 * so operate directly on old_cnt->ctx->cnt_merged.
494 */
495 malloc_mutex_lock(old_ctx->lock);
496 old_ctx->cnt_merged.curobjs--;
497 old_ctx->cnt_merged.curbytes -= old_size;
498 malloc_mutex_unlock(old_ctx->lock);
499 told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
500 }
501 } else
502 told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
503
504 if ((uintptr_t)told_cnt > (uintptr_t)1U)
505 told_cnt->epoch++;
506 if ((uintptr_t)cnt > (uintptr_t)1U) {
507 prof_ctx_set(ptr, cnt->ctx);
508 cnt->epoch++;
509 } else
510 prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
511 /*********/
512 mb_write();
513 /*********/
514 if ((uintptr_t)told_cnt > (uintptr_t)1U) {
515 told_cnt->cnts.curobjs--;
516 told_cnt->cnts.curbytes -= old_size;
517 }
518 if ((uintptr_t)cnt > (uintptr_t)1U) {
519 cnt->cnts.curobjs++;
520 cnt->cnts.curbytes += size;
521 if (opt_prof_accum) {
522 cnt->cnts.accumobjs++;
523 cnt->cnts.accumbytes += size;
524 }
525 }
526 /*********/
527 mb_write();
528 /*********/
529 if ((uintptr_t)told_cnt > (uintptr_t)1U)
530 told_cnt->epoch++;
531 if ((uintptr_t)cnt > (uintptr_t)1U)
532 cnt->epoch++;
533 /*********/
534 mb_write(); /* Not strictly necessary. */
535 }
536
537 JEMALLOC_INLINE void
538 prof_free(const void *ptr, size_t size)
539 {
540 prof_ctx_t *ctx = prof_ctx_get(ptr);
541
542 cassert(config_prof);
543
544 if ((uintptr_t)ctx > (uintptr_t)1) {
545 prof_thr_cnt_t *tcnt;
546 assert(size == isalloc(ptr, true));
547 tcnt = prof_lookup(ctx->bt);
548
549 if (tcnt != NULL) {
550 tcnt->epoch++;
551 /*********/
552 mb_write();
553 /*********/
554 tcnt->cnts.curobjs--;
555 tcnt->cnts.curbytes -= size;
556 /*********/
557 mb_write();
558 /*********/
559 tcnt->epoch++;
560 /*********/
561 mb_write();
562 /*********/
563 } else {
564 /*
565 * OOM during free() cannot be propagated, so operate
566 * directly on cnt->ctx->cnt_merged.
567 */
568 malloc_mutex_lock(ctx->lock);
569 ctx->cnt_merged.curobjs--;
570 ctx->cnt_merged.curbytes -= size;
571 malloc_mutex_unlock(ctx->lock);
572 }
573 }
574 }
575 #endif
576
577 #endif /* JEMALLOC_H_INLINES */
578 /******************************************************************************/