]> git.saurik.com Git - redis.git/blame_incremental - deps/jemalloc/src/prof.c
Better installation info inside README file.
[redis.git] / deps / jemalloc / src / prof.c
... / ...
CommitLineData
1#define JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3#ifdef JEMALLOC_PROF
4/******************************************************************************/
5
6#ifdef JEMALLOC_PROF_LIBUNWIND
7#define UNW_LOCAL_ONLY
8#include <libunwind.h>
9#endif
10
11#ifdef JEMALLOC_PROF_LIBGCC
12#include <unwind.h>
13#endif
14
15/******************************************************************************/
16/* Data. */
17
18bool opt_prof = false;
19bool opt_prof_active = true;
20size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
21size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23bool opt_prof_gdump = false;
24bool opt_prof_leak = false;
25bool opt_prof_accum = true;
26ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
27char opt_prof_prefix[PATH_MAX + 1];
28
29uint64_t prof_interval;
30bool prof_promote;
31
32unsigned prof_bt_max;
33
34#ifndef NO_TLS
35__thread prof_tdata_t *prof_tdata_tls
36 JEMALLOC_ATTR(tls_model("initial-exec"));
37#endif
38pthread_key_t prof_tdata_tsd;
39
40/*
41 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
42 * structure that knows about all backtraces currently captured.
43 */
44static ckh_t bt2ctx;
45static malloc_mutex_t bt2ctx_mtx;
46
47static malloc_mutex_t prof_dump_seq_mtx;
48static uint64_t prof_dump_seq;
49static uint64_t prof_dump_iseq;
50static uint64_t prof_dump_mseq;
51static uint64_t prof_dump_useq;
52
53/*
54 * This buffer is rather large for stack allocation, so use a single buffer for
55 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since
56 * it must be locked anyway during dumping.
57 */
58static char prof_dump_buf[PROF_DUMP_BUF_SIZE];
59static unsigned prof_dump_buf_end;
60static int prof_dump_fd;
61
62/* Do not dump any profiles until bootstrapping is complete. */
63static bool prof_booted = false;
64
65static malloc_mutex_t enq_mtx;
66static bool enq;
67static bool enq_idump;
68static bool enq_gdump;
69
70/******************************************************************************/
71/* Function prototypes for non-inline static functions. */
72
73static prof_bt_t *bt_dup(prof_bt_t *bt);
74static void bt_destroy(prof_bt_t *bt);
75#ifdef JEMALLOC_PROF_LIBGCC
76static _Unwind_Reason_Code prof_unwind_init_callback(
77 struct _Unwind_Context *context, void *arg);
78static _Unwind_Reason_Code prof_unwind_callback(
79 struct _Unwind_Context *context, void *arg);
80#endif
81static bool prof_flush(bool propagate_err);
82static bool prof_write(const char *s, bool propagate_err);
83static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
84 size_t *leak_nctx);
85static void prof_ctx_destroy(prof_ctx_t *ctx);
86static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
87static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
88 bool propagate_err);
89static bool prof_dump_maps(bool propagate_err);
90static bool prof_dump(const char *filename, bool leakcheck,
91 bool propagate_err);
92static void prof_dump_filename(char *filename, char v, int64_t vseq);
93static void prof_fdump(void);
94static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
95 size_t *hash2);
96static bool prof_bt_keycomp(const void *k1, const void *k2);
97static void prof_tdata_cleanup(void *arg);
98
99/******************************************************************************/
100
101void
102bt_init(prof_bt_t *bt, void **vec)
103{
104
105 bt->vec = vec;
106 bt->len = 0;
107}
108
109static void
110bt_destroy(prof_bt_t *bt)
111{
112
113 idalloc(bt);
114}
115
116static prof_bt_t *
117bt_dup(prof_bt_t *bt)
118{
119 prof_bt_t *ret;
120
121 /*
122 * Create a single allocation that has space for vec immediately
123 * following the prof_bt_t structure. The backtraces that get
124 * stored in the backtrace caches are copied from stack-allocated
125 * temporary variables, so size is known at creation time. Making this
126 * a contiguous object improves cache locality.
127 */
128 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
129 (bt->len * sizeof(void *)));
130 if (ret == NULL)
131 return (NULL);
132 ret->vec = (void **)((uintptr_t)ret +
133 QUANTUM_CEILING(sizeof(prof_bt_t)));
134 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
135 ret->len = bt->len;
136
137 return (ret);
138}
139
140static inline void
141prof_enter(void)
142{
143
144 malloc_mutex_lock(&enq_mtx);
145 enq = true;
146 malloc_mutex_unlock(&enq_mtx);
147
148 malloc_mutex_lock(&bt2ctx_mtx);
149}
150
151static inline void
152prof_leave(void)
153{
154 bool idump, gdump;
155
156 malloc_mutex_unlock(&bt2ctx_mtx);
157
158 malloc_mutex_lock(&enq_mtx);
159 enq = false;
160 idump = enq_idump;
161 enq_idump = false;
162 gdump = enq_gdump;
163 enq_gdump = false;
164 malloc_mutex_unlock(&enq_mtx);
165
166 if (idump)
167 prof_idump();
168 if (gdump)
169 prof_gdump();
170}
171
172#ifdef JEMALLOC_PROF_LIBUNWIND
173void
174prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
175{
176 unw_context_t uc;
177 unw_cursor_t cursor;
178 unsigned i;
179 int err;
180
181 assert(bt->len == 0);
182 assert(bt->vec != NULL);
183 assert(max <= (1U << opt_lg_prof_bt_max));
184
185 unw_getcontext(&uc);
186 unw_init_local(&cursor, &uc);
187
188 /* Throw away (nignore+1) stack frames, if that many exist. */
189 for (i = 0; i < nignore + 1; i++) {
190 err = unw_step(&cursor);
191 if (err <= 0)
192 return;
193 }
194
195 /*
196 * Iterate over stack frames until there are no more, or until no space
197 * remains in bt.
198 */
199 for (i = 0; i < max; i++) {
200 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
201 bt->len++;
202 err = unw_step(&cursor);
203 if (err <= 0)
204 break;
205 }
206}
207#endif
208#ifdef JEMALLOC_PROF_LIBGCC
209static _Unwind_Reason_Code
210prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
211{
212
213 return (_URC_NO_REASON);
214}
215
216static _Unwind_Reason_Code
217prof_unwind_callback(struct _Unwind_Context *context, void *arg)
218{
219 prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
220
221 if (data->nignore > 0)
222 data->nignore--;
223 else {
224 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
225 data->bt->len++;
226 if (data->bt->len == data->max)
227 return (_URC_END_OF_STACK);
228 }
229
230 return (_URC_NO_REASON);
231}
232
233void
234prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
235{
236 prof_unwind_data_t data = {bt, nignore, max};
237
238 _Unwind_Backtrace(prof_unwind_callback, &data);
239}
240#endif
241#ifdef JEMALLOC_PROF_GCC
242void
243prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
244{
245#define BT_FRAME(i) \
246 if ((i) < nignore + max) { \
247 void *p; \
248 if (__builtin_frame_address(i) == 0) \
249 return; \
250 p = __builtin_return_address(i); \
251 if (p == NULL) \
252 return; \
253 if (i >= nignore) { \
254 bt->vec[(i) - nignore] = p; \
255 bt->len = (i) - nignore + 1; \
256 } \
257 } else \
258 return;
259
260 assert(nignore <= 3);
261 assert(max <= (1U << opt_lg_prof_bt_max));
262
263 BT_FRAME(0)
264 BT_FRAME(1)
265 BT_FRAME(2)
266 BT_FRAME(3)
267 BT_FRAME(4)
268 BT_FRAME(5)
269 BT_FRAME(6)
270 BT_FRAME(7)
271 BT_FRAME(8)
272 BT_FRAME(9)
273
274 BT_FRAME(10)
275 BT_FRAME(11)
276 BT_FRAME(12)
277 BT_FRAME(13)
278 BT_FRAME(14)
279 BT_FRAME(15)
280 BT_FRAME(16)
281 BT_FRAME(17)
282 BT_FRAME(18)
283 BT_FRAME(19)
284
285 BT_FRAME(20)
286 BT_FRAME(21)
287 BT_FRAME(22)
288 BT_FRAME(23)
289 BT_FRAME(24)
290 BT_FRAME(25)
291 BT_FRAME(26)
292 BT_FRAME(27)
293 BT_FRAME(28)
294 BT_FRAME(29)
295
296 BT_FRAME(30)
297 BT_FRAME(31)
298 BT_FRAME(32)
299 BT_FRAME(33)
300 BT_FRAME(34)
301 BT_FRAME(35)
302 BT_FRAME(36)
303 BT_FRAME(37)
304 BT_FRAME(38)
305 BT_FRAME(39)
306
307 BT_FRAME(40)
308 BT_FRAME(41)
309 BT_FRAME(42)
310 BT_FRAME(43)
311 BT_FRAME(44)
312 BT_FRAME(45)
313 BT_FRAME(46)
314 BT_FRAME(47)
315 BT_FRAME(48)
316 BT_FRAME(49)
317
318 BT_FRAME(50)
319 BT_FRAME(51)
320 BT_FRAME(52)
321 BT_FRAME(53)
322 BT_FRAME(54)
323 BT_FRAME(55)
324 BT_FRAME(56)
325 BT_FRAME(57)
326 BT_FRAME(58)
327 BT_FRAME(59)
328
329 BT_FRAME(60)
330 BT_FRAME(61)
331 BT_FRAME(62)
332 BT_FRAME(63)
333 BT_FRAME(64)
334 BT_FRAME(65)
335 BT_FRAME(66)
336 BT_FRAME(67)
337 BT_FRAME(68)
338 BT_FRAME(69)
339
340 BT_FRAME(70)
341 BT_FRAME(71)
342 BT_FRAME(72)
343 BT_FRAME(73)
344 BT_FRAME(74)
345 BT_FRAME(75)
346 BT_FRAME(76)
347 BT_FRAME(77)
348 BT_FRAME(78)
349 BT_FRAME(79)
350
351 BT_FRAME(80)
352 BT_FRAME(81)
353 BT_FRAME(82)
354 BT_FRAME(83)
355 BT_FRAME(84)
356 BT_FRAME(85)
357 BT_FRAME(86)
358 BT_FRAME(87)
359 BT_FRAME(88)
360 BT_FRAME(89)
361
362 BT_FRAME(90)
363 BT_FRAME(91)
364 BT_FRAME(92)
365 BT_FRAME(93)
366 BT_FRAME(94)
367 BT_FRAME(95)
368 BT_FRAME(96)
369 BT_FRAME(97)
370 BT_FRAME(98)
371 BT_FRAME(99)
372
373 BT_FRAME(100)
374 BT_FRAME(101)
375 BT_FRAME(102)
376 BT_FRAME(103)
377 BT_FRAME(104)
378 BT_FRAME(105)
379 BT_FRAME(106)
380 BT_FRAME(107)
381 BT_FRAME(108)
382 BT_FRAME(109)
383
384 BT_FRAME(110)
385 BT_FRAME(111)
386 BT_FRAME(112)
387 BT_FRAME(113)
388 BT_FRAME(114)
389 BT_FRAME(115)
390 BT_FRAME(116)
391 BT_FRAME(117)
392 BT_FRAME(118)
393 BT_FRAME(119)
394
395 BT_FRAME(120)
396 BT_FRAME(121)
397 BT_FRAME(122)
398 BT_FRAME(123)
399 BT_FRAME(124)
400 BT_FRAME(125)
401 BT_FRAME(126)
402 BT_FRAME(127)
403
404 /* Extras to compensate for nignore. */
405 BT_FRAME(128)
406 BT_FRAME(129)
407 BT_FRAME(130)
408#undef BT_FRAME
409}
410#endif
411
412prof_thr_cnt_t *
413prof_lookup(prof_bt_t *bt)
414{
415 union {
416 prof_thr_cnt_t *p;
417 void *v;
418 } ret;
419 prof_tdata_t *prof_tdata;
420
421 prof_tdata = PROF_TCACHE_GET();
422 if (prof_tdata == NULL) {
423 prof_tdata = prof_tdata_init();
424 if (prof_tdata == NULL)
425 return (NULL);
426 }
427
428 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
429 union {
430 prof_bt_t *p;
431 void *v;
432 } btkey;
433 union {
434 prof_ctx_t *p;
435 void *v;
436 } ctx;
437 bool new_ctx;
438
439 /*
440 * This thread's cache lacks bt. Look for it in the global
441 * cache.
442 */
443 prof_enter();
444 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
445 /* bt has never been seen before. Insert it. */
446 ctx.v = imalloc(sizeof(prof_ctx_t));
447 if (ctx.v == NULL) {
448 prof_leave();
449 return (NULL);
450 }
451 btkey.p = bt_dup(bt);
452 if (btkey.v == NULL) {
453 prof_leave();
454 idalloc(ctx.v);
455 return (NULL);
456 }
457 ctx.p->bt = btkey.p;
458 if (malloc_mutex_init(&ctx.p->lock)) {
459 prof_leave();
460 idalloc(btkey.v);
461 idalloc(ctx.v);
462 return (NULL);
463 }
464 memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
465 ql_new(&ctx.p->cnts_ql);
466 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
467 /* OOM. */
468 prof_leave();
469 malloc_mutex_destroy(&ctx.p->lock);
470 idalloc(btkey.v);
471 idalloc(ctx.v);
472 return (NULL);
473 }
474 /*
475 * Artificially raise curobjs, in order to avoid a race
476 * condition with prof_ctx_merge()/prof_ctx_destroy().
477 */
478 ctx.p->cnt_merged.curobjs++;
479 new_ctx = true;
480 } else
481 new_ctx = false;
482 prof_leave();
483
484 /* Link a prof_thd_cnt_t into ctx for this thread. */
485 if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
486 == (ZU(1) << opt_lg_prof_tcmax)) {
487 assert(ckh_count(&prof_tdata->bt2cnt) > 0);
488 /*
489 * Flush the least recently used cnt in order to keep
490 * bt2cnt from becoming too large.
491 */
492 ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
493 assert(ret.v != NULL);
494 ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
495 NULL);
496 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
497 prof_ctx_merge(ret.p->ctx, ret.p);
498 /* ret can now be re-used. */
499 } else {
500 assert(opt_lg_prof_tcmax < 0 ||
501 ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
502 opt_lg_prof_tcmax));
503 /* Allocate and partially initialize a new cnt. */
504 ret.v = imalloc(sizeof(prof_thr_cnt_t));
505 if (ret.p == NULL) {
506 if (new_ctx) {
507 malloc_mutex_lock(&ctx.p->lock);
508 ctx.p->cnt_merged.curobjs--;
509 malloc_mutex_unlock(&ctx.p->lock);
510 }
511 return (NULL);
512 }
513 ql_elm_new(ret.p, cnts_link);
514 ql_elm_new(ret.p, lru_link);
515 }
516 /* Finish initializing ret. */
517 ret.p->ctx = ctx.p;
518 ret.p->epoch = 0;
519 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
520 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
521 if (new_ctx) {
522 malloc_mutex_lock(&ctx.p->lock);
523 ctx.p->cnt_merged.curobjs--;
524 malloc_mutex_unlock(&ctx.p->lock);
525 }
526 idalloc(ret.v);
527 return (NULL);
528 }
529 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
530 malloc_mutex_lock(&ctx.p->lock);
531 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
532 if (new_ctx)
533 ctx.p->cnt_merged.curobjs--;
534 malloc_mutex_unlock(&ctx.p->lock);
535 } else {
536 /* Move ret to the front of the LRU. */
537 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
538 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
539 }
540
541 return (ret.p);
542}
543
544static bool
545prof_flush(bool propagate_err)
546{
547 bool ret = false;
548 ssize_t err;
549
550 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
551 if (err == -1) {
552 if (propagate_err == false) {
553 malloc_write("<jemalloc>: write() failed during heap "
554 "profile flush\n");
555 if (opt_abort)
556 abort();
557 }
558 ret = true;
559 }
560 prof_dump_buf_end = 0;
561
562 return (ret);
563}
564
565static bool
566prof_write(const char *s, bool propagate_err)
567{
568 unsigned i, slen, n;
569
570 i = 0;
571 slen = strlen(s);
572 while (i < slen) {
573 /* Flush the buffer if it is full. */
574 if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
575 if (prof_flush(propagate_err) && propagate_err)
576 return (true);
577
578 if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
579 /* Finish writing. */
580 n = slen - i;
581 } else {
582 /* Write as much of s as will fit. */
583 n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
584 }
585 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
586 prof_dump_buf_end += n;
587 i += n;
588 }
589
590 return (false);
591}
592
593static void
594prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
595{
596 prof_thr_cnt_t *thr_cnt;
597 prof_cnt_t tcnt;
598
599 malloc_mutex_lock(&ctx->lock);
600
601 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
602 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
603 volatile unsigned *epoch = &thr_cnt->epoch;
604
605 while (true) {
606 unsigned epoch0 = *epoch;
607
608 /* Make sure epoch is even. */
609 if (epoch0 & 1U)
610 continue;
611
612 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
613
614 /* Terminate if epoch didn't change while reading. */
615 if (*epoch == epoch0)
616 break;
617 }
618
619 ctx->cnt_summed.curobjs += tcnt.curobjs;
620 ctx->cnt_summed.curbytes += tcnt.curbytes;
621 if (opt_prof_accum) {
622 ctx->cnt_summed.accumobjs += tcnt.accumobjs;
623 ctx->cnt_summed.accumbytes += tcnt.accumbytes;
624 }
625 }
626
627 if (ctx->cnt_summed.curobjs != 0)
628 (*leak_nctx)++;
629
630 /* Add to cnt_all. */
631 cnt_all->curobjs += ctx->cnt_summed.curobjs;
632 cnt_all->curbytes += ctx->cnt_summed.curbytes;
633 if (opt_prof_accum) {
634 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
635 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
636 }
637
638 malloc_mutex_unlock(&ctx->lock);
639}
640
641static void
642prof_ctx_destroy(prof_ctx_t *ctx)
643{
644
645 /*
646 * Check that ctx is still unused by any thread cache before destroying
647 * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
648 * avoid a race condition with this function, and prof_ctx_merge()
649 * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
650 * between the main body of prof_ctx_merge() and entry into this
651 * function.
652 */
653 prof_enter();
654 malloc_mutex_lock(&ctx->lock);
655 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
656 assert(ctx->cnt_merged.curbytes == 0);
657 assert(ctx->cnt_merged.accumobjs == 0);
658 assert(ctx->cnt_merged.accumbytes == 0);
659 /* Remove ctx from bt2ctx. */
660 ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
661 prof_leave();
662 /* Destroy ctx. */
663 malloc_mutex_unlock(&ctx->lock);
664 bt_destroy(ctx->bt);
665 malloc_mutex_destroy(&ctx->lock);
666 idalloc(ctx);
667 } else {
668 /* Compensate for increment in prof_ctx_merge(). */
669 ctx->cnt_merged.curobjs--;
670 malloc_mutex_unlock(&ctx->lock);
671 prof_leave();
672 }
673}
674
675static void
676prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
677{
678 bool destroy;
679
680 /* Merge cnt stats and detach from ctx. */
681 malloc_mutex_lock(&ctx->lock);
682 ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
683 ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
684 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
685 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
686 ql_remove(&ctx->cnts_ql, cnt, cnts_link);
687 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
688 ctx->cnt_merged.curobjs == 0) {
689 /*
690 * Artificially raise ctx->cnt_merged.curobjs in order to keep
691 * another thread from winning the race to destroy ctx while
692 * this one has ctx->lock dropped. Without this, it would be
693 * possible for another thread to:
694 *
695 * 1) Sample an allocation associated with ctx.
696 * 2) Deallocate the sampled object.
697 * 3) Successfully prof_ctx_destroy(ctx).
698 *
699 * The result would be that ctx no longer exists by the time
700 * this thread accesses it in prof_ctx_destroy().
701 */
702 ctx->cnt_merged.curobjs++;
703 destroy = true;
704 } else
705 destroy = false;
706 malloc_mutex_unlock(&ctx->lock);
707 if (destroy)
708 prof_ctx_destroy(ctx);
709}
710
711static bool
712prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
713{
714 char buf[UMAX2S_BUFSIZE];
715 unsigned i;
716
717 if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
718 assert(ctx->cnt_summed.curbytes == 0);
719 assert(ctx->cnt_summed.accumobjs == 0);
720 assert(ctx->cnt_summed.accumbytes == 0);
721 return (false);
722 }
723
724 if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
725 || prof_write(": ", propagate_err)
726 || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
727 propagate_err)
728 || prof_write(" [", propagate_err)
729 || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
730 propagate_err)
731 || prof_write(": ", propagate_err)
732 || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
733 propagate_err)
734 || prof_write("] @", propagate_err))
735 return (true);
736
737 for (i = 0; i < bt->len; i++) {
738 if (prof_write(" 0x", propagate_err)
739 || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
740 propagate_err))
741 return (true);
742 }
743
744 if (prof_write("\n", propagate_err))
745 return (true);
746
747 return (false);
748}
749
750static bool
751prof_dump_maps(bool propagate_err)
752{
753 int mfd;
754 char buf[UMAX2S_BUFSIZE];
755 char *s;
756 unsigned i, slen;
757 /* /proc/<pid>/maps\0 */
758 char mpath[6 + UMAX2S_BUFSIZE
759 + 5 + 1];
760
761 i = 0;
762
763 s = "/proc/";
764 slen = strlen(s);
765 memcpy(&mpath[i], s, slen);
766 i += slen;
767
768 s = u2s(getpid(), 10, buf);
769 slen = strlen(s);
770 memcpy(&mpath[i], s, slen);
771 i += slen;
772
773 s = "/maps";
774 slen = strlen(s);
775 memcpy(&mpath[i], s, slen);
776 i += slen;
777
778 mpath[i] = '\0';
779
780 mfd = open(mpath, O_RDONLY);
781 if (mfd != -1) {
782 ssize_t nread;
783
784 if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
785 propagate_err)
786 return (true);
787 nread = 0;
788 do {
789 prof_dump_buf_end += nread;
790 if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
791 /* Make space in prof_dump_buf before read(). */
792 if (prof_flush(propagate_err) && propagate_err)
793 return (true);
794 }
795 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
796 PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
797 } while (nread > 0);
798 close(mfd);
799 } else
800 return (true);
801
802 return (false);
803}
804
805static bool
806prof_dump(const char *filename, bool leakcheck, bool propagate_err)
807{
808 prof_cnt_t cnt_all;
809 size_t tabind;
810 union {
811 prof_bt_t *p;
812 void *v;
813 } bt;
814 union {
815 prof_ctx_t *p;
816 void *v;
817 } ctx;
818 char buf[UMAX2S_BUFSIZE];
819 size_t leak_nctx;
820
821 prof_enter();
822 prof_dump_fd = creat(filename, 0644);
823 if (prof_dump_fd == -1) {
824 if (propagate_err == false) {
825 malloc_write("<jemalloc>: creat(\"");
826 malloc_write(filename);
827 malloc_write("\", 0644) failed\n");
828 if (opt_abort)
829 abort();
830 }
831 goto ERROR;
832 }
833
834 /* Merge per thread profile stats, and sum them in cnt_all. */
835 memset(&cnt_all, 0, sizeof(prof_cnt_t));
836 leak_nctx = 0;
837 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
838 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
839
840 /* Dump profile header. */
841 if (prof_write("heap profile: ", propagate_err)
842 || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
843 || prof_write(": ", propagate_err)
844 || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
845 || prof_write(" [", propagate_err)
846 || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
847 || prof_write(": ", propagate_err)
848 || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
849 goto ERROR;
850
851 if (opt_lg_prof_sample == 0) {
852 if (prof_write("] @ heapprofile\n", propagate_err))
853 goto ERROR;
854 } else {
855 if (prof_write("] @ heap_v2/", propagate_err)
856 || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
857 buf), propagate_err)
858 || prof_write("\n", propagate_err))
859 goto ERROR;
860 }
861
862 /* Dump per ctx profile stats. */
863 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
864 == false;) {
865 if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
866 goto ERROR;
867 }
868
869 /* Dump /proc/<pid>/maps if possible. */
870 if (prof_dump_maps(propagate_err))
871 goto ERROR;
872
873 if (prof_flush(propagate_err))
874 goto ERROR;
875 close(prof_dump_fd);
876 prof_leave();
877
878 if (leakcheck && cnt_all.curbytes != 0) {
879 malloc_write("<jemalloc>: Leak summary: ");
880 malloc_write(u2s(cnt_all.curbytes, 10, buf));
881 malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
882 malloc_write(u2s(cnt_all.curobjs, 10, buf));
883 malloc_write((cnt_all.curobjs != 1) ? " objects, " :
884 " object, ");
885 malloc_write(u2s(leak_nctx, 10, buf));
886 malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
887 malloc_write("<jemalloc>: Run pprof on \"");
888 malloc_write(filename);
889 malloc_write("\" for leak detail\n");
890 }
891
892 return (false);
893ERROR:
894 prof_leave();
895 return (true);
896}
897
898#define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \
899 + 1 \
900 + UMAX2S_BUFSIZE \
901 + 2 \
902 + UMAX2S_BUFSIZE \
903 + 5 + 1)
904static void
905prof_dump_filename(char *filename, char v, int64_t vseq)
906{
907 char buf[UMAX2S_BUFSIZE];
908 char *s;
909 unsigned i, slen;
910
911 /*
912 * Construct a filename of the form:
913 *
914 * <prefix>.<pid>.<seq>.v<vseq>.heap\0
915 */
916
917 i = 0;
918
919 s = opt_prof_prefix;
920 slen = strlen(s);
921 memcpy(&filename[i], s, slen);
922 i += slen;
923
924 s = ".";
925 slen = strlen(s);
926 memcpy(&filename[i], s, slen);
927 i += slen;
928
929 s = u2s(getpid(), 10, buf);
930 slen = strlen(s);
931 memcpy(&filename[i], s, slen);
932 i += slen;
933
934 s = ".";
935 slen = strlen(s);
936 memcpy(&filename[i], s, slen);
937 i += slen;
938
939 s = u2s(prof_dump_seq, 10, buf);
940 prof_dump_seq++;
941 slen = strlen(s);
942 memcpy(&filename[i], s, slen);
943 i += slen;
944
945 s = ".";
946 slen = strlen(s);
947 memcpy(&filename[i], s, slen);
948 i += slen;
949
950 filename[i] = v;
951 i++;
952
953 if (vseq != 0xffffffffffffffffLLU) {
954 s = u2s(vseq, 10, buf);
955 slen = strlen(s);
956 memcpy(&filename[i], s, slen);
957 i += slen;
958 }
959
960 s = ".heap";
961 slen = strlen(s);
962 memcpy(&filename[i], s, slen);
963 i += slen;
964
965 filename[i] = '\0';
966}
967
968static void
969prof_fdump(void)
970{
971 char filename[DUMP_FILENAME_BUFSIZE];
972
973 if (prof_booted == false)
974 return;
975
976 if (opt_prof_prefix[0] != '\0') {
977 malloc_mutex_lock(&prof_dump_seq_mtx);
978 prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
979 malloc_mutex_unlock(&prof_dump_seq_mtx);
980 prof_dump(filename, opt_prof_leak, false);
981 }
982}
983
984void
985prof_idump(void)
986{
987 char filename[DUMP_FILENAME_BUFSIZE];
988
989 if (prof_booted == false)
990 return;
991 malloc_mutex_lock(&enq_mtx);
992 if (enq) {
993 enq_idump = true;
994 malloc_mutex_unlock(&enq_mtx);
995 return;
996 }
997 malloc_mutex_unlock(&enq_mtx);
998
999 if (opt_prof_prefix[0] != '\0') {
1000 malloc_mutex_lock(&prof_dump_seq_mtx);
1001 prof_dump_filename(filename, 'i', prof_dump_iseq);
1002 prof_dump_iseq++;
1003 malloc_mutex_unlock(&prof_dump_seq_mtx);
1004 prof_dump(filename, false, false);
1005 }
1006}
1007
1008bool
1009prof_mdump(const char *filename)
1010{
1011 char filename_buf[DUMP_FILENAME_BUFSIZE];
1012
1013 if (opt_prof == false || prof_booted == false)
1014 return (true);
1015
1016 if (filename == NULL) {
1017 /* No filename specified, so automatically generate one. */
1018 if (opt_prof_prefix[0] == '\0')
1019 return (true);
1020 malloc_mutex_lock(&prof_dump_seq_mtx);
1021 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1022 prof_dump_mseq++;
1023 malloc_mutex_unlock(&prof_dump_seq_mtx);
1024 filename = filename_buf;
1025 }
1026 return (prof_dump(filename, false, true));
1027}
1028
1029void
1030prof_gdump(void)
1031{
1032 char filename[DUMP_FILENAME_BUFSIZE];
1033
1034 if (prof_booted == false)
1035 return;
1036 malloc_mutex_lock(&enq_mtx);
1037 if (enq) {
1038 enq_gdump = true;
1039 malloc_mutex_unlock(&enq_mtx);
1040 return;
1041 }
1042 malloc_mutex_unlock(&enq_mtx);
1043
1044 if (opt_prof_prefix[0] != '\0') {
1045 malloc_mutex_lock(&prof_dump_seq_mtx);
1046 prof_dump_filename(filename, 'u', prof_dump_useq);
1047 prof_dump_useq++;
1048 malloc_mutex_unlock(&prof_dump_seq_mtx);
1049 prof_dump(filename, false, false);
1050 }
1051}
1052
1053static void
1054prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
1055{
1056 size_t ret1, ret2;
1057 uint64_t h;
1058 prof_bt_t *bt = (prof_bt_t *)key;
1059
1060 assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
1061 assert(hash1 != NULL);
1062 assert(hash2 != NULL);
1063
1064 h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
1065 if (minbits <= 32) {
1066 /*
1067 * Avoid doing multiple hashes, since a single hash provides
1068 * enough bits.
1069 */
1070 ret1 = h & ZU(0xffffffffU);
1071 ret2 = h >> 32;
1072 } else {
1073 ret1 = h;
1074 ret2 = hash(bt->vec, bt->len * sizeof(void *),
1075 0x8432a476666bbc13U);
1076 }
1077
1078 *hash1 = ret1;
1079 *hash2 = ret2;
1080}
1081
1082static bool
1083prof_bt_keycomp(const void *k1, const void *k2)
1084{
1085 const prof_bt_t *bt1 = (prof_bt_t *)k1;
1086 const prof_bt_t *bt2 = (prof_bt_t *)k2;
1087
1088 if (bt1->len != bt2->len)
1089 return (false);
1090 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1091}
1092
1093prof_tdata_t *
1094prof_tdata_init(void)
1095{
1096 prof_tdata_t *prof_tdata;
1097
1098 /* Initialize an empty cache for this thread. */
1099 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1100 if (prof_tdata == NULL)
1101 return (NULL);
1102
1103 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1104 prof_bt_hash, prof_bt_keycomp)) {
1105 idalloc(prof_tdata);
1106 return (NULL);
1107 }
1108 ql_new(&prof_tdata->lru_ql);
1109
1110 prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
1111 if (prof_tdata->vec == NULL) {
1112
1113 ckh_delete(&prof_tdata->bt2cnt);
1114 idalloc(prof_tdata);
1115 return (NULL);
1116 }
1117
1118 prof_tdata->prn_state = 0;
1119 prof_tdata->threshold = 0;
1120 prof_tdata->accum = 0;
1121
1122 PROF_TCACHE_SET(prof_tdata);
1123
1124 return (prof_tdata);
1125}
1126
1127static void
1128prof_tdata_cleanup(void *arg)
1129{
1130 prof_tdata_t *prof_tdata;
1131
1132 prof_tdata = PROF_TCACHE_GET();
1133 if (prof_tdata != NULL) {
1134 prof_thr_cnt_t *cnt;
1135
1136 /*
1137 * Delete the hash table. All of its contents can still be
1138 * iterated over via the LRU.
1139 */
1140 ckh_delete(&prof_tdata->bt2cnt);
1141
1142 /*
1143 * Iteratively merge cnt's into the global stats and delete
1144 * them.
1145 */
1146 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1147 prof_ctx_merge(cnt->ctx, cnt);
1148 ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1149 idalloc(cnt);
1150 }
1151
1152 idalloc(prof_tdata->vec);
1153
1154 idalloc(prof_tdata);
1155 PROF_TCACHE_SET(NULL);
1156 }
1157}
1158
1159void
1160prof_boot0(void)
1161{
1162
1163 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1164 sizeof(PROF_PREFIX_DEFAULT));
1165}
1166
1167void
1168prof_boot1(void)
1169{
1170
1171 /*
1172 * opt_prof and prof_promote must be in their final state before any
1173 * arenas are initialized, so this function must be executed early.
1174 */
1175
1176 if (opt_prof_leak && opt_prof == false) {
1177 /*
1178 * Enable opt_prof, but in such a way that profiles are never
1179 * automatically dumped.
1180 */
1181 opt_prof = true;
1182 opt_prof_gdump = false;
1183 prof_interval = 0;
1184 } else if (opt_prof) {
1185 if (opt_lg_prof_interval >= 0) {
1186 prof_interval = (((uint64_t)1U) <<
1187 opt_lg_prof_interval);
1188 } else
1189 prof_interval = 0;
1190 }
1191
1192 prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
1193}
1194
1195bool
1196prof_boot2(void)
1197{
1198
1199 if (opt_prof) {
1200 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1201 prof_bt_keycomp))
1202 return (true);
1203 if (malloc_mutex_init(&bt2ctx_mtx))
1204 return (true);
1205 if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
1206 != 0) {
1207 malloc_write(
1208 "<jemalloc>: Error in pthread_key_create()\n");
1209 abort();
1210 }
1211
1212 prof_bt_max = (1U << opt_lg_prof_bt_max);
1213 if (malloc_mutex_init(&prof_dump_seq_mtx))
1214 return (true);
1215
1216 if (malloc_mutex_init(&enq_mtx))
1217 return (true);
1218 enq = false;
1219 enq_idump = false;
1220 enq_gdump = false;
1221
1222 if (atexit(prof_fdump) != 0) {
1223 malloc_write("<jemalloc>: Error in atexit()\n");
1224 if (opt_abort)
1225 abort();
1226 }
1227 }
1228
1229#ifdef JEMALLOC_PROF_LIBGCC
1230 /*
1231 * Cause the backtracing machinery to allocate its internal state
1232 * before enabling profiling.
1233 */
1234 _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1235#endif
1236
1237 prof_booted = true;
1238
1239 return (false);
1240}
1241
1242/******************************************************************************/
1243#endif /* JEMALLOC_PROF */