]> git.saurik.com Git - redis.git/blob - deps/jemalloc/src/prof.c
If the computer running the Redis test is slow, we revert to --clients 1 to avoid...
[redis.git] / deps / jemalloc / src / prof.c
1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 #ifdef JEMALLOC_PROF
4 /******************************************************************************/
5
6 #ifdef JEMALLOC_PROF_LIBUNWIND
7 #define UNW_LOCAL_ONLY
8 #include <libunwind.h>
9 #endif
10
11 #ifdef JEMALLOC_PROF_LIBGCC
12 #include <unwind.h>
13 #endif
14
15 /******************************************************************************/
16 /* Data. */
17
18 bool opt_prof = false;
19 bool opt_prof_active = true;
20 size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23 bool opt_prof_gdump = false;
24 bool opt_prof_leak = false;
25 bool opt_prof_accum = true;
26 ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
27 char opt_prof_prefix[PATH_MAX + 1];
28
29 uint64_t prof_interval;
30 bool prof_promote;
31
32 unsigned prof_bt_max;
33
34 #ifndef NO_TLS
35 __thread prof_tdata_t *prof_tdata_tls
36 JEMALLOC_ATTR(tls_model("initial-exec"));
37 #endif
38 pthread_key_t prof_tdata_tsd;
39
40 /*
41 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
42 * structure that knows about all backtraces currently captured.
43 */
44 static ckh_t bt2ctx;
45 static malloc_mutex_t bt2ctx_mtx;
46
47 static malloc_mutex_t prof_dump_seq_mtx;
48 static uint64_t prof_dump_seq;
49 static uint64_t prof_dump_iseq;
50 static uint64_t prof_dump_mseq;
51 static uint64_t prof_dump_useq;
52
53 /*
54 * This buffer is rather large for stack allocation, so use a single buffer for
55 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since
56 * it must be locked anyway during dumping.
57 */
58 static char prof_dump_buf[PROF_DUMP_BUF_SIZE];
59 static unsigned prof_dump_buf_end;
60 static int prof_dump_fd;
61
62 /* Do not dump any profiles until bootstrapping is complete. */
63 static bool prof_booted = false;
64
65 static malloc_mutex_t enq_mtx;
66 static bool enq;
67 static bool enq_idump;
68 static bool enq_gdump;
69
70 /******************************************************************************/
71 /* Function prototypes for non-inline static functions. */
72
73 static prof_bt_t *bt_dup(prof_bt_t *bt);
74 static void bt_destroy(prof_bt_t *bt);
75 #ifdef JEMALLOC_PROF_LIBGCC
76 static _Unwind_Reason_Code prof_unwind_init_callback(
77 struct _Unwind_Context *context, void *arg);
78 static _Unwind_Reason_Code prof_unwind_callback(
79 struct _Unwind_Context *context, void *arg);
80 #endif
81 static bool prof_flush(bool propagate_err);
82 static bool prof_write(const char *s, bool propagate_err);
83 static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
84 size_t *leak_nctx);
85 static void prof_ctx_destroy(prof_ctx_t *ctx);
86 static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
87 static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
88 bool propagate_err);
89 static bool prof_dump_maps(bool propagate_err);
90 static bool prof_dump(const char *filename, bool leakcheck,
91 bool propagate_err);
92 static void prof_dump_filename(char *filename, char v, int64_t vseq);
93 static void prof_fdump(void);
94 static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
95 size_t *hash2);
96 static bool prof_bt_keycomp(const void *k1, const void *k2);
97 static void prof_tdata_cleanup(void *arg);
98
99 /******************************************************************************/
100
101 void
102 bt_init(prof_bt_t *bt, void **vec)
103 {
104
105 bt->vec = vec;
106 bt->len = 0;
107 }
108
109 static void
110 bt_destroy(prof_bt_t *bt)
111 {
112
113 idalloc(bt);
114 }
115
116 static prof_bt_t *
117 bt_dup(prof_bt_t *bt)
118 {
119 prof_bt_t *ret;
120
121 /*
122 * Create a single allocation that has space for vec immediately
123 * following the prof_bt_t structure. The backtraces that get
124 * stored in the backtrace caches are copied from stack-allocated
125 * temporary variables, so size is known at creation time. Making this
126 * a contiguous object improves cache locality.
127 */
128 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
129 (bt->len * sizeof(void *)));
130 if (ret == NULL)
131 return (NULL);
132 ret->vec = (void **)((uintptr_t)ret +
133 QUANTUM_CEILING(sizeof(prof_bt_t)));
134 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
135 ret->len = bt->len;
136
137 return (ret);
138 }
139
140 static inline void
141 prof_enter(void)
142 {
143
144 malloc_mutex_lock(&enq_mtx);
145 enq = true;
146 malloc_mutex_unlock(&enq_mtx);
147
148 malloc_mutex_lock(&bt2ctx_mtx);
149 }
150
151 static inline void
152 prof_leave(void)
153 {
154 bool idump, gdump;
155
156 malloc_mutex_unlock(&bt2ctx_mtx);
157
158 malloc_mutex_lock(&enq_mtx);
159 enq = false;
160 idump = enq_idump;
161 enq_idump = false;
162 gdump = enq_gdump;
163 enq_gdump = false;
164 malloc_mutex_unlock(&enq_mtx);
165
166 if (idump)
167 prof_idump();
168 if (gdump)
169 prof_gdump();
170 }
171
172 #ifdef JEMALLOC_PROF_LIBUNWIND
173 void
174 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
175 {
176 unw_context_t uc;
177 unw_cursor_t cursor;
178 unsigned i;
179 int err;
180
181 assert(bt->len == 0);
182 assert(bt->vec != NULL);
183 assert(max <= (1U << opt_lg_prof_bt_max));
184
185 unw_getcontext(&uc);
186 unw_init_local(&cursor, &uc);
187
188 /* Throw away (nignore+1) stack frames, if that many exist. */
189 for (i = 0; i < nignore + 1; i++) {
190 err = unw_step(&cursor);
191 if (err <= 0)
192 return;
193 }
194
195 /*
196 * Iterate over stack frames until there are no more, or until no space
197 * remains in bt.
198 */
199 for (i = 0; i < max; i++) {
200 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
201 bt->len++;
202 err = unw_step(&cursor);
203 if (err <= 0)
204 break;
205 }
206 }
207 #endif
208 #ifdef JEMALLOC_PROF_LIBGCC
209 static _Unwind_Reason_Code
210 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
211 {
212
213 return (_URC_NO_REASON);
214 }
215
216 static _Unwind_Reason_Code
217 prof_unwind_callback(struct _Unwind_Context *context, void *arg)
218 {
219 prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
220
221 if (data->nignore > 0)
222 data->nignore--;
223 else {
224 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
225 data->bt->len++;
226 if (data->bt->len == data->max)
227 return (_URC_END_OF_STACK);
228 }
229
230 return (_URC_NO_REASON);
231 }
232
233 void
234 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
235 {
236 prof_unwind_data_t data = {bt, nignore, max};
237
238 _Unwind_Backtrace(prof_unwind_callback, &data);
239 }
240 #endif
241 #ifdef JEMALLOC_PROF_GCC
242 void
243 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
244 {
245 #define BT_FRAME(i) \
246 if ((i) < nignore + max) { \
247 void *p; \
248 if (__builtin_frame_address(i) == 0) \
249 return; \
250 p = __builtin_return_address(i); \
251 if (p == NULL) \
252 return; \
253 if (i >= nignore) { \
254 bt->vec[(i) - nignore] = p; \
255 bt->len = (i) - nignore + 1; \
256 } \
257 } else \
258 return;
259
260 assert(nignore <= 3);
261 assert(max <= (1U << opt_lg_prof_bt_max));
262
263 BT_FRAME(0)
264 BT_FRAME(1)
265 BT_FRAME(2)
266 BT_FRAME(3)
267 BT_FRAME(4)
268 BT_FRAME(5)
269 BT_FRAME(6)
270 BT_FRAME(7)
271 BT_FRAME(8)
272 BT_FRAME(9)
273
274 BT_FRAME(10)
275 BT_FRAME(11)
276 BT_FRAME(12)
277 BT_FRAME(13)
278 BT_FRAME(14)
279 BT_FRAME(15)
280 BT_FRAME(16)
281 BT_FRAME(17)
282 BT_FRAME(18)
283 BT_FRAME(19)
284
285 BT_FRAME(20)
286 BT_FRAME(21)
287 BT_FRAME(22)
288 BT_FRAME(23)
289 BT_FRAME(24)
290 BT_FRAME(25)
291 BT_FRAME(26)
292 BT_FRAME(27)
293 BT_FRAME(28)
294 BT_FRAME(29)
295
296 BT_FRAME(30)
297 BT_FRAME(31)
298 BT_FRAME(32)
299 BT_FRAME(33)
300 BT_FRAME(34)
301 BT_FRAME(35)
302 BT_FRAME(36)
303 BT_FRAME(37)
304 BT_FRAME(38)
305 BT_FRAME(39)
306
307 BT_FRAME(40)
308 BT_FRAME(41)
309 BT_FRAME(42)
310 BT_FRAME(43)
311 BT_FRAME(44)
312 BT_FRAME(45)
313 BT_FRAME(46)
314 BT_FRAME(47)
315 BT_FRAME(48)
316 BT_FRAME(49)
317
318 BT_FRAME(50)
319 BT_FRAME(51)
320 BT_FRAME(52)
321 BT_FRAME(53)
322 BT_FRAME(54)
323 BT_FRAME(55)
324 BT_FRAME(56)
325 BT_FRAME(57)
326 BT_FRAME(58)
327 BT_FRAME(59)
328
329 BT_FRAME(60)
330 BT_FRAME(61)
331 BT_FRAME(62)
332 BT_FRAME(63)
333 BT_FRAME(64)
334 BT_FRAME(65)
335 BT_FRAME(66)
336 BT_FRAME(67)
337 BT_FRAME(68)
338 BT_FRAME(69)
339
340 BT_FRAME(70)
341 BT_FRAME(71)
342 BT_FRAME(72)
343 BT_FRAME(73)
344 BT_FRAME(74)
345 BT_FRAME(75)
346 BT_FRAME(76)
347 BT_FRAME(77)
348 BT_FRAME(78)
349 BT_FRAME(79)
350
351 BT_FRAME(80)
352 BT_FRAME(81)
353 BT_FRAME(82)
354 BT_FRAME(83)
355 BT_FRAME(84)
356 BT_FRAME(85)
357 BT_FRAME(86)
358 BT_FRAME(87)
359 BT_FRAME(88)
360 BT_FRAME(89)
361
362 BT_FRAME(90)
363 BT_FRAME(91)
364 BT_FRAME(92)
365 BT_FRAME(93)
366 BT_FRAME(94)
367 BT_FRAME(95)
368 BT_FRAME(96)
369 BT_FRAME(97)
370 BT_FRAME(98)
371 BT_FRAME(99)
372
373 BT_FRAME(100)
374 BT_FRAME(101)
375 BT_FRAME(102)
376 BT_FRAME(103)
377 BT_FRAME(104)
378 BT_FRAME(105)
379 BT_FRAME(106)
380 BT_FRAME(107)
381 BT_FRAME(108)
382 BT_FRAME(109)
383
384 BT_FRAME(110)
385 BT_FRAME(111)
386 BT_FRAME(112)
387 BT_FRAME(113)
388 BT_FRAME(114)
389 BT_FRAME(115)
390 BT_FRAME(116)
391 BT_FRAME(117)
392 BT_FRAME(118)
393 BT_FRAME(119)
394
395 BT_FRAME(120)
396 BT_FRAME(121)
397 BT_FRAME(122)
398 BT_FRAME(123)
399 BT_FRAME(124)
400 BT_FRAME(125)
401 BT_FRAME(126)
402 BT_FRAME(127)
403
404 /* Extras to compensate for nignore. */
405 BT_FRAME(128)
406 BT_FRAME(129)
407 BT_FRAME(130)
408 #undef BT_FRAME
409 }
410 #endif
411
412 prof_thr_cnt_t *
413 prof_lookup(prof_bt_t *bt)
414 {
415 union {
416 prof_thr_cnt_t *p;
417 void *v;
418 } ret;
419 prof_tdata_t *prof_tdata;
420
421 prof_tdata = PROF_TCACHE_GET();
422 if (prof_tdata == NULL) {
423 prof_tdata = prof_tdata_init();
424 if (prof_tdata == NULL)
425 return (NULL);
426 }
427
428 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
429 union {
430 prof_bt_t *p;
431 void *v;
432 } btkey;
433 union {
434 prof_ctx_t *p;
435 void *v;
436 } ctx;
437 bool new_ctx;
438
439 /*
440 * This thread's cache lacks bt. Look for it in the global
441 * cache.
442 */
443 prof_enter();
444 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
445 /* bt has never been seen before. Insert it. */
446 ctx.v = imalloc(sizeof(prof_ctx_t));
447 if (ctx.v == NULL) {
448 prof_leave();
449 return (NULL);
450 }
451 btkey.p = bt_dup(bt);
452 if (btkey.v == NULL) {
453 prof_leave();
454 idalloc(ctx.v);
455 return (NULL);
456 }
457 ctx.p->bt = btkey.p;
458 if (malloc_mutex_init(&ctx.p->lock)) {
459 prof_leave();
460 idalloc(btkey.v);
461 idalloc(ctx.v);
462 return (NULL);
463 }
464 memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
465 ql_new(&ctx.p->cnts_ql);
466 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
467 /* OOM. */
468 prof_leave();
469 malloc_mutex_destroy(&ctx.p->lock);
470 idalloc(btkey.v);
471 idalloc(ctx.v);
472 return (NULL);
473 }
474 /*
475 * Artificially raise curobjs, in order to avoid a race
476 * condition with prof_ctx_merge()/prof_ctx_destroy().
477 *
478 * No locking is necessary for ctx here because no other
479 * threads have had the opportunity to fetch it from
480 * bt2ctx yet.
481 */
482 ctx.p->cnt_merged.curobjs++;
483 new_ctx = true;
484 } else {
485 /*
486 * Artificially raise curobjs, in order to avoid a race
487 * condition with prof_ctx_merge()/prof_ctx_destroy().
488 */
489 malloc_mutex_lock(&ctx.p->lock);
490 ctx.p->cnt_merged.curobjs++;
491 malloc_mutex_unlock(&ctx.p->lock);
492 new_ctx = false;
493 }
494 prof_leave();
495
496 /* Link a prof_thd_cnt_t into ctx for this thread. */
497 if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
498 == (ZU(1) << opt_lg_prof_tcmax)) {
499 assert(ckh_count(&prof_tdata->bt2cnt) > 0);
500 /*
501 * Flush the least recently used cnt in order to keep
502 * bt2cnt from becoming too large.
503 */
504 ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
505 assert(ret.v != NULL);
506 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
507 NULL, NULL))
508 assert(false);
509 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
510 prof_ctx_merge(ret.p->ctx, ret.p);
511 /* ret can now be re-used. */
512 } else {
513 assert(opt_lg_prof_tcmax < 0 ||
514 ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
515 opt_lg_prof_tcmax));
516 /* Allocate and partially initialize a new cnt. */
517 ret.v = imalloc(sizeof(prof_thr_cnt_t));
518 if (ret.p == NULL) {
519 if (new_ctx)
520 prof_ctx_destroy(ctx.p);
521 return (NULL);
522 }
523 ql_elm_new(ret.p, cnts_link);
524 ql_elm_new(ret.p, lru_link);
525 }
526 /* Finish initializing ret. */
527 ret.p->ctx = ctx.p;
528 ret.p->epoch = 0;
529 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
530 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
531 if (new_ctx)
532 prof_ctx_destroy(ctx.p);
533 idalloc(ret.v);
534 return (NULL);
535 }
536 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
537 malloc_mutex_lock(&ctx.p->lock);
538 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
539 ctx.p->cnt_merged.curobjs--;
540 malloc_mutex_unlock(&ctx.p->lock);
541 } else {
542 /* Move ret to the front of the LRU. */
543 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
544 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
545 }
546
547 return (ret.p);
548 }
549
550 static bool
551 prof_flush(bool propagate_err)
552 {
553 bool ret = false;
554 ssize_t err;
555
556 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
557 if (err == -1) {
558 if (propagate_err == false) {
559 malloc_write("<jemalloc>: write() failed during heap "
560 "profile flush\n");
561 if (opt_abort)
562 abort();
563 }
564 ret = true;
565 }
566 prof_dump_buf_end = 0;
567
568 return (ret);
569 }
570
571 static bool
572 prof_write(const char *s, bool propagate_err)
573 {
574 unsigned i, slen, n;
575
576 i = 0;
577 slen = strlen(s);
578 while (i < slen) {
579 /* Flush the buffer if it is full. */
580 if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
581 if (prof_flush(propagate_err) && propagate_err)
582 return (true);
583
584 if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
585 /* Finish writing. */
586 n = slen - i;
587 } else {
588 /* Write as much of s as will fit. */
589 n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
590 }
591 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
592 prof_dump_buf_end += n;
593 i += n;
594 }
595
596 return (false);
597 }
598
599 static void
600 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
601 {
602 prof_thr_cnt_t *thr_cnt;
603 prof_cnt_t tcnt;
604
605 malloc_mutex_lock(&ctx->lock);
606
607 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
608 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
609 volatile unsigned *epoch = &thr_cnt->epoch;
610
611 while (true) {
612 unsigned epoch0 = *epoch;
613
614 /* Make sure epoch is even. */
615 if (epoch0 & 1U)
616 continue;
617
618 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
619
620 /* Terminate if epoch didn't change while reading. */
621 if (*epoch == epoch0)
622 break;
623 }
624
625 ctx->cnt_summed.curobjs += tcnt.curobjs;
626 ctx->cnt_summed.curbytes += tcnt.curbytes;
627 if (opt_prof_accum) {
628 ctx->cnt_summed.accumobjs += tcnt.accumobjs;
629 ctx->cnt_summed.accumbytes += tcnt.accumbytes;
630 }
631 }
632
633 if (ctx->cnt_summed.curobjs != 0)
634 (*leak_nctx)++;
635
636 /* Add to cnt_all. */
637 cnt_all->curobjs += ctx->cnt_summed.curobjs;
638 cnt_all->curbytes += ctx->cnt_summed.curbytes;
639 if (opt_prof_accum) {
640 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
641 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
642 }
643
644 malloc_mutex_unlock(&ctx->lock);
645 }
646
647 static void
648 prof_ctx_destroy(prof_ctx_t *ctx)
649 {
650
651 /*
652 * Check that ctx is still unused by any thread cache before destroying
653 * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in
654 * order to avoid a race condition with this function, as does
655 * prof_ctx_merge() in order to avoid a race between the main body of
656 * prof_ctx_merge() and entry into this function.
657 */
658 prof_enter();
659 malloc_mutex_lock(&ctx->lock);
660 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
661 assert(ctx->cnt_merged.curbytes == 0);
662 assert(ctx->cnt_merged.accumobjs == 0);
663 assert(ctx->cnt_merged.accumbytes == 0);
664 /* Remove ctx from bt2ctx. */
665 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
666 assert(false);
667 prof_leave();
668 /* Destroy ctx. */
669 malloc_mutex_unlock(&ctx->lock);
670 bt_destroy(ctx->bt);
671 malloc_mutex_destroy(&ctx->lock);
672 idalloc(ctx);
673 } else {
674 /*
675 * Compensate for increment in prof_ctx_merge() or
676 * prof_lookup().
677 */
678 ctx->cnt_merged.curobjs--;
679 malloc_mutex_unlock(&ctx->lock);
680 prof_leave();
681 }
682 }
683
684 static void
685 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
686 {
687 bool destroy;
688
689 /* Merge cnt stats and detach from ctx. */
690 malloc_mutex_lock(&ctx->lock);
691 ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
692 ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
693 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
694 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
695 ql_remove(&ctx->cnts_ql, cnt, cnts_link);
696 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
697 ctx->cnt_merged.curobjs == 0) {
698 /*
699 * Artificially raise ctx->cnt_merged.curobjs in order to keep
700 * another thread from winning the race to destroy ctx while
701 * this one has ctx->lock dropped. Without this, it would be
702 * possible for another thread to:
703 *
704 * 1) Sample an allocation associated with ctx.
705 * 2) Deallocate the sampled object.
706 * 3) Successfully prof_ctx_destroy(ctx).
707 *
708 * The result would be that ctx no longer exists by the time
709 * this thread accesses it in prof_ctx_destroy().
710 */
711 ctx->cnt_merged.curobjs++;
712 destroy = true;
713 } else
714 destroy = false;
715 malloc_mutex_unlock(&ctx->lock);
716 if (destroy)
717 prof_ctx_destroy(ctx);
718 }
719
720 static bool
721 prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
722 {
723 char buf[UMAX2S_BUFSIZE];
724 unsigned i;
725
726 if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
727 assert(ctx->cnt_summed.curbytes == 0);
728 assert(ctx->cnt_summed.accumobjs == 0);
729 assert(ctx->cnt_summed.accumbytes == 0);
730 return (false);
731 }
732
733 if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
734 || prof_write(": ", propagate_err)
735 || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
736 propagate_err)
737 || prof_write(" [", propagate_err)
738 || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
739 propagate_err)
740 || prof_write(": ", propagate_err)
741 || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
742 propagate_err)
743 || prof_write("] @", propagate_err))
744 return (true);
745
746 for (i = 0; i < bt->len; i++) {
747 if (prof_write(" 0x", propagate_err)
748 || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
749 propagate_err))
750 return (true);
751 }
752
753 if (prof_write("\n", propagate_err))
754 return (true);
755
756 return (false);
757 }
758
759 static bool
760 prof_dump_maps(bool propagate_err)
761 {
762 int mfd;
763 char buf[UMAX2S_BUFSIZE];
764 char *s;
765 unsigned i, slen;
766 /* /proc/<pid>/maps\0 */
767 char mpath[6 + UMAX2S_BUFSIZE
768 + 5 + 1];
769
770 i = 0;
771
772 s = "/proc/";
773 slen = strlen(s);
774 memcpy(&mpath[i], s, slen);
775 i += slen;
776
777 s = u2s(getpid(), 10, buf);
778 slen = strlen(s);
779 memcpy(&mpath[i], s, slen);
780 i += slen;
781
782 s = "/maps";
783 slen = strlen(s);
784 memcpy(&mpath[i], s, slen);
785 i += slen;
786
787 mpath[i] = '\0';
788
789 mfd = open(mpath, O_RDONLY);
790 if (mfd != -1) {
791 ssize_t nread;
792
793 if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
794 propagate_err)
795 return (true);
796 nread = 0;
797 do {
798 prof_dump_buf_end += nread;
799 if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
800 /* Make space in prof_dump_buf before read(). */
801 if (prof_flush(propagate_err) && propagate_err)
802 return (true);
803 }
804 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
805 PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
806 } while (nread > 0);
807 close(mfd);
808 } else
809 return (true);
810
811 return (false);
812 }
813
814 static bool
815 prof_dump(const char *filename, bool leakcheck, bool propagate_err)
816 {
817 prof_cnt_t cnt_all;
818 size_t tabind;
819 union {
820 prof_bt_t *p;
821 void *v;
822 } bt;
823 union {
824 prof_ctx_t *p;
825 void *v;
826 } ctx;
827 char buf[UMAX2S_BUFSIZE];
828 size_t leak_nctx;
829
830 prof_enter();
831 prof_dump_fd = creat(filename, 0644);
832 if (prof_dump_fd == -1) {
833 if (propagate_err == false) {
834 malloc_write("<jemalloc>: creat(\"");
835 malloc_write(filename);
836 malloc_write("\", 0644) failed\n");
837 if (opt_abort)
838 abort();
839 }
840 goto ERROR;
841 }
842
843 /* Merge per thread profile stats, and sum them in cnt_all. */
844 memset(&cnt_all, 0, sizeof(prof_cnt_t));
845 leak_nctx = 0;
846 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
847 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
848
849 /* Dump profile header. */
850 if (prof_write("heap profile: ", propagate_err)
851 || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
852 || prof_write(": ", propagate_err)
853 || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
854 || prof_write(" [", propagate_err)
855 || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
856 || prof_write(": ", propagate_err)
857 || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
858 goto ERROR;
859
860 if (opt_lg_prof_sample == 0) {
861 if (prof_write("] @ heapprofile\n", propagate_err))
862 goto ERROR;
863 } else {
864 if (prof_write("] @ heap_v2/", propagate_err)
865 || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
866 buf), propagate_err)
867 || prof_write("\n", propagate_err))
868 goto ERROR;
869 }
870
871 /* Dump per ctx profile stats. */
872 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
873 == false;) {
874 if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
875 goto ERROR;
876 }
877
878 /* Dump /proc/<pid>/maps if possible. */
879 if (prof_dump_maps(propagate_err))
880 goto ERROR;
881
882 if (prof_flush(propagate_err))
883 goto ERROR;
884 close(prof_dump_fd);
885 prof_leave();
886
887 if (leakcheck && cnt_all.curbytes != 0) {
888 malloc_write("<jemalloc>: Leak summary: ");
889 malloc_write(u2s(cnt_all.curbytes, 10, buf));
890 malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
891 malloc_write(u2s(cnt_all.curobjs, 10, buf));
892 malloc_write((cnt_all.curobjs != 1) ? " objects, " :
893 " object, ");
894 malloc_write(u2s(leak_nctx, 10, buf));
895 malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
896 malloc_write("<jemalloc>: Run pprof on \"");
897 malloc_write(filename);
898 malloc_write("\" for leak detail\n");
899 }
900
901 return (false);
902 ERROR:
903 prof_leave();
904 return (true);
905 }
906
907 #define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \
908 + 1 \
909 + UMAX2S_BUFSIZE \
910 + 2 \
911 + UMAX2S_BUFSIZE \
912 + 5 + 1)
913 static void
914 prof_dump_filename(char *filename, char v, int64_t vseq)
915 {
916 char buf[UMAX2S_BUFSIZE];
917 char *s;
918 unsigned i, slen;
919
920 /*
921 * Construct a filename of the form:
922 *
923 * <prefix>.<pid>.<seq>.v<vseq>.heap\0
924 */
925
926 i = 0;
927
928 s = opt_prof_prefix;
929 slen = strlen(s);
930 memcpy(&filename[i], s, slen);
931 i += slen;
932
933 s = ".";
934 slen = strlen(s);
935 memcpy(&filename[i], s, slen);
936 i += slen;
937
938 s = u2s(getpid(), 10, buf);
939 slen = strlen(s);
940 memcpy(&filename[i], s, slen);
941 i += slen;
942
943 s = ".";
944 slen = strlen(s);
945 memcpy(&filename[i], s, slen);
946 i += slen;
947
948 s = u2s(prof_dump_seq, 10, buf);
949 prof_dump_seq++;
950 slen = strlen(s);
951 memcpy(&filename[i], s, slen);
952 i += slen;
953
954 s = ".";
955 slen = strlen(s);
956 memcpy(&filename[i], s, slen);
957 i += slen;
958
959 filename[i] = v;
960 i++;
961
962 if (vseq != 0xffffffffffffffffLLU) {
963 s = u2s(vseq, 10, buf);
964 slen = strlen(s);
965 memcpy(&filename[i], s, slen);
966 i += slen;
967 }
968
969 s = ".heap";
970 slen = strlen(s);
971 memcpy(&filename[i], s, slen);
972 i += slen;
973
974 filename[i] = '\0';
975 }
976
977 static void
978 prof_fdump(void)
979 {
980 char filename[DUMP_FILENAME_BUFSIZE];
981
982 if (prof_booted == false)
983 return;
984
985 if (opt_prof_prefix[0] != '\0') {
986 malloc_mutex_lock(&prof_dump_seq_mtx);
987 prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
988 malloc_mutex_unlock(&prof_dump_seq_mtx);
989 prof_dump(filename, opt_prof_leak, false);
990 }
991 }
992
993 void
994 prof_idump(void)
995 {
996 char filename[DUMP_FILENAME_BUFSIZE];
997
998 if (prof_booted == false)
999 return;
1000 malloc_mutex_lock(&enq_mtx);
1001 if (enq) {
1002 enq_idump = true;
1003 malloc_mutex_unlock(&enq_mtx);
1004 return;
1005 }
1006 malloc_mutex_unlock(&enq_mtx);
1007
1008 if (opt_prof_prefix[0] != '\0') {
1009 malloc_mutex_lock(&prof_dump_seq_mtx);
1010 prof_dump_filename(filename, 'i', prof_dump_iseq);
1011 prof_dump_iseq++;
1012 malloc_mutex_unlock(&prof_dump_seq_mtx);
1013 prof_dump(filename, false, false);
1014 }
1015 }
1016
1017 bool
1018 prof_mdump(const char *filename)
1019 {
1020 char filename_buf[DUMP_FILENAME_BUFSIZE];
1021
1022 if (opt_prof == false || prof_booted == false)
1023 return (true);
1024
1025 if (filename == NULL) {
1026 /* No filename specified, so automatically generate one. */
1027 if (opt_prof_prefix[0] == '\0')
1028 return (true);
1029 malloc_mutex_lock(&prof_dump_seq_mtx);
1030 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1031 prof_dump_mseq++;
1032 malloc_mutex_unlock(&prof_dump_seq_mtx);
1033 filename = filename_buf;
1034 }
1035 return (prof_dump(filename, false, true));
1036 }
1037
1038 void
1039 prof_gdump(void)
1040 {
1041 char filename[DUMP_FILENAME_BUFSIZE];
1042
1043 if (prof_booted == false)
1044 return;
1045 malloc_mutex_lock(&enq_mtx);
1046 if (enq) {
1047 enq_gdump = true;
1048 malloc_mutex_unlock(&enq_mtx);
1049 return;
1050 }
1051 malloc_mutex_unlock(&enq_mtx);
1052
1053 if (opt_prof_prefix[0] != '\0') {
1054 malloc_mutex_lock(&prof_dump_seq_mtx);
1055 prof_dump_filename(filename, 'u', prof_dump_useq);
1056 prof_dump_useq++;
1057 malloc_mutex_unlock(&prof_dump_seq_mtx);
1058 prof_dump(filename, false, false);
1059 }
1060 }
1061
1062 static void
1063 prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
1064 {
1065 size_t ret1, ret2;
1066 uint64_t h;
1067 prof_bt_t *bt = (prof_bt_t *)key;
1068
1069 assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
1070 assert(hash1 != NULL);
1071 assert(hash2 != NULL);
1072
1073 h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
1074 if (minbits <= 32) {
1075 /*
1076 * Avoid doing multiple hashes, since a single hash provides
1077 * enough bits.
1078 */
1079 ret1 = h & ZU(0xffffffffU);
1080 ret2 = h >> 32;
1081 } else {
1082 ret1 = h;
1083 ret2 = hash(bt->vec, bt->len * sizeof(void *),
1084 0x8432a476666bbc13LLU);
1085 }
1086
1087 *hash1 = ret1;
1088 *hash2 = ret2;
1089 }
1090
1091 static bool
1092 prof_bt_keycomp(const void *k1, const void *k2)
1093 {
1094 const prof_bt_t *bt1 = (prof_bt_t *)k1;
1095 const prof_bt_t *bt2 = (prof_bt_t *)k2;
1096
1097 if (bt1->len != bt2->len)
1098 return (false);
1099 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1100 }
1101
1102 prof_tdata_t *
1103 prof_tdata_init(void)
1104 {
1105 prof_tdata_t *prof_tdata;
1106
1107 /* Initialize an empty cache for this thread. */
1108 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1109 if (prof_tdata == NULL)
1110 return (NULL);
1111
1112 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1113 prof_bt_hash, prof_bt_keycomp)) {
1114 idalloc(prof_tdata);
1115 return (NULL);
1116 }
1117 ql_new(&prof_tdata->lru_ql);
1118
1119 prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
1120 if (prof_tdata->vec == NULL) {
1121 ckh_delete(&prof_tdata->bt2cnt);
1122 idalloc(prof_tdata);
1123 return (NULL);
1124 }
1125
1126 prof_tdata->prn_state = 0;
1127 prof_tdata->threshold = 0;
1128 prof_tdata->accum = 0;
1129
1130 PROF_TCACHE_SET(prof_tdata);
1131
1132 return (prof_tdata);
1133 }
1134
1135 static void
1136 prof_tdata_cleanup(void *arg)
1137 {
1138 prof_thr_cnt_t *cnt;
1139 prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
1140
1141 /*
1142 * Delete the hash table. All of its contents can still be iterated
1143 * over via the LRU.
1144 */
1145 ckh_delete(&prof_tdata->bt2cnt);
1146
1147 /* Iteratively merge cnt's into the global stats and delete them. */
1148 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1149 ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1150 prof_ctx_merge(cnt->ctx, cnt);
1151 idalloc(cnt);
1152 }
1153
1154 idalloc(prof_tdata->vec);
1155
1156 idalloc(prof_tdata);
1157 PROF_TCACHE_SET(NULL);
1158 }
1159
1160 void
1161 prof_boot0(void)
1162 {
1163
1164 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1165 sizeof(PROF_PREFIX_DEFAULT));
1166 }
1167
1168 void
1169 prof_boot1(void)
1170 {
1171
1172 /*
1173 * opt_prof and prof_promote must be in their final state before any
1174 * arenas are initialized, so this function must be executed early.
1175 */
1176
1177 if (opt_prof_leak && opt_prof == false) {
1178 /*
1179 * Enable opt_prof, but in such a way that profiles are never
1180 * automatically dumped.
1181 */
1182 opt_prof = true;
1183 opt_prof_gdump = false;
1184 prof_interval = 0;
1185 } else if (opt_prof) {
1186 if (opt_lg_prof_interval >= 0) {
1187 prof_interval = (((uint64_t)1U) <<
1188 opt_lg_prof_interval);
1189 } else
1190 prof_interval = 0;
1191 }
1192
1193 prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
1194 }
1195
1196 bool
1197 prof_boot2(void)
1198 {
1199
1200 if (opt_prof) {
1201 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1202 prof_bt_keycomp))
1203 return (true);
1204 if (malloc_mutex_init(&bt2ctx_mtx))
1205 return (true);
1206 if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
1207 != 0) {
1208 malloc_write(
1209 "<jemalloc>: Error in pthread_key_create()\n");
1210 abort();
1211 }
1212
1213 prof_bt_max = (1U << opt_lg_prof_bt_max);
1214 if (malloc_mutex_init(&prof_dump_seq_mtx))
1215 return (true);
1216
1217 if (malloc_mutex_init(&enq_mtx))
1218 return (true);
1219 enq = false;
1220 enq_idump = false;
1221 enq_gdump = false;
1222
1223 if (atexit(prof_fdump) != 0) {
1224 malloc_write("<jemalloc>: Error in atexit()\n");
1225 if (opt_abort)
1226 abort();
1227 }
1228 }
1229
1230 #ifdef JEMALLOC_PROF_LIBGCC
1231 /*
1232 * Cause the backtracing machinery to allocate its internal state
1233 * before enabling profiling.
1234 */
1235 _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1236 #endif
1237
1238 prof_booted = true;
1239
1240 return (false);
1241 }
1242
1243 /******************************************************************************/
1244 #endif /* JEMALLOC_PROF */