1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 /******************************************************************************/
5 #ifdef JEMALLOC_PROF_LIBUNWIND
10 #ifdef JEMALLOC_PROF_LIBGCC
14 /******************************************************************************/
17 malloc_tsd_data(, prof_tdata
, prof_tdata_t
*, NULL
)
19 bool opt_prof
= false;
20 bool opt_prof_active
= true;
21 size_t opt_lg_prof_sample
= LG_PROF_SAMPLE_DEFAULT
;
22 ssize_t opt_lg_prof_interval
= LG_PROF_INTERVAL_DEFAULT
;
23 bool opt_prof_gdump
= false;
24 bool opt_prof_final
= true;
25 bool opt_prof_leak
= false;
26 bool opt_prof_accum
= false;
27 char opt_prof_prefix
[PATH_MAX
+ 1];
29 uint64_t prof_interval
;
33 * Table of mutexes that are shared among ctx's. These are leaf locks, so
34 * there is no problem with using them for more than one ctx at the same time.
35 * The primary motivation for this sharing though is that ctx's are ephemeral,
36 * and destroying mutexes causes complications for systems that allocate when
37 * creating/destroying mutexes.
39 static malloc_mutex_t
*ctx_locks
;
40 static unsigned cum_ctxs
; /* Atomic counter. */
43 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
44 * structure that knows about all backtraces currently captured.
47 static malloc_mutex_t bt2ctx_mtx
;
49 static malloc_mutex_t prof_dump_seq_mtx
;
50 static uint64_t prof_dump_seq
;
51 static uint64_t prof_dump_iseq
;
52 static uint64_t prof_dump_mseq
;
53 static uint64_t prof_dump_useq
;
56 * This buffer is rather large for stack allocation, so use a single buffer for
57 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since
58 * it must be locked anyway during dumping.
60 static char prof_dump_buf
[PROF_DUMP_BUFSIZE
];
61 static unsigned prof_dump_buf_end
;
62 static int prof_dump_fd
;
64 /* Do not dump any profiles until bootstrapping is complete. */
65 static bool prof_booted
= false;
67 /******************************************************************************/
68 /* Function prototypes for non-inline static functions. */
70 static prof_bt_t
*bt_dup(prof_bt_t
*bt
);
71 static void bt_destroy(prof_bt_t
*bt
);
72 #ifdef JEMALLOC_PROF_LIBGCC
73 static _Unwind_Reason_Code
prof_unwind_init_callback(
74 struct _Unwind_Context
*context
, void *arg
);
75 static _Unwind_Reason_Code
prof_unwind_callback(
76 struct _Unwind_Context
*context
, void *arg
);
78 static bool prof_flush(bool propagate_err
);
79 static bool prof_write(bool propagate_err
, const char *s
);
80 static bool prof_printf(bool propagate_err
, const char *format
, ...)
81 JEMALLOC_ATTR(format(printf
, 2, 3));
82 static void prof_ctx_sum(prof_ctx_t
*ctx
, prof_cnt_t
*cnt_all
,
84 static void prof_ctx_destroy(prof_ctx_t
*ctx
);
85 static void prof_ctx_merge(prof_ctx_t
*ctx
, prof_thr_cnt_t
*cnt
);
86 static bool prof_dump_ctx(bool propagate_err
, prof_ctx_t
*ctx
,
88 static bool prof_dump_maps(bool propagate_err
);
89 static bool prof_dump(bool propagate_err
, const char *filename
,
91 static void prof_dump_filename(char *filename
, char v
, int64_t vseq
);
92 static void prof_fdump(void);
93 static void prof_bt_hash(const void *key
, unsigned minbits
, size_t *hash1
,
95 static bool prof_bt_keycomp(const void *k1
, const void *k2
);
96 static malloc_mutex_t
*prof_ctx_mutex_choose(void);
98 /******************************************************************************/
101 bt_init(prof_bt_t
*bt
, void **vec
)
104 cassert(config_prof
);
111 bt_destroy(prof_bt_t
*bt
)
114 cassert(config_prof
);
120 bt_dup(prof_bt_t
*bt
)
124 cassert(config_prof
);
127 * Create a single allocation that has space for vec immediately
128 * following the prof_bt_t structure. The backtraces that get
129 * stored in the backtrace caches are copied from stack-allocated
130 * temporary variables, so size is known at creation time. Making this
131 * a contiguous object improves cache locality.
133 ret
= (prof_bt_t
*)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t
)) +
134 (bt
->len
* sizeof(void *)));
137 ret
->vec
= (void **)((uintptr_t)ret
+
138 QUANTUM_CEILING(sizeof(prof_bt_t
)));
139 memcpy(ret
->vec
, bt
->vec
, bt
->len
* sizeof(void *));
146 prof_enter(prof_tdata_t
*prof_tdata
)
149 cassert(config_prof
);
151 assert(prof_tdata
->enq
== false);
152 prof_tdata
->enq
= true;
154 malloc_mutex_lock(&bt2ctx_mtx
);
158 prof_leave(prof_tdata_t
*prof_tdata
)
162 cassert(config_prof
);
164 malloc_mutex_unlock(&bt2ctx_mtx
);
166 assert(prof_tdata
->enq
);
167 prof_tdata
->enq
= false;
168 idump
= prof_tdata
->enq_idump
;
169 prof_tdata
->enq_idump
= false;
170 gdump
= prof_tdata
->enq_gdump
;
171 prof_tdata
->enq_gdump
= false;
179 #ifdef JEMALLOC_PROF_LIBUNWIND
181 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
)
188 cassert(config_prof
);
189 assert(bt
->len
== 0);
190 assert(bt
->vec
!= NULL
);
193 unw_init_local(&cursor
, &uc
);
195 /* Throw away (nignore+1) stack frames, if that many exist. */
196 for (i
= 0; i
< nignore
+ 1; i
++) {
197 err
= unw_step(&cursor
);
203 * Iterate over stack frames until there are no more, or until no space
206 for (i
= 0; i
< PROF_BT_MAX
; i
++) {
207 unw_get_reg(&cursor
, UNW_REG_IP
, (unw_word_t
*)&bt
->vec
[i
]);
209 err
= unw_step(&cursor
);
214 #elif (defined(JEMALLOC_PROF_LIBGCC))
215 static _Unwind_Reason_Code
216 prof_unwind_init_callback(struct _Unwind_Context
*context
, void *arg
)
219 cassert(config_prof
);
221 return (_URC_NO_REASON
);
224 static _Unwind_Reason_Code
225 prof_unwind_callback(struct _Unwind_Context
*context
, void *arg
)
227 prof_unwind_data_t
*data
= (prof_unwind_data_t
*)arg
;
229 cassert(config_prof
);
231 if (data
->nignore
> 0)
234 data
->bt
->vec
[data
->bt
->len
] = (void *)_Unwind_GetIP(context
);
236 if (data
->bt
->len
== data
->max
)
237 return (_URC_END_OF_STACK
);
240 return (_URC_NO_REASON
);
244 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
)
246 prof_unwind_data_t data
= {bt
, nignore
, PROF_BT_MAX
};
248 cassert(config_prof
);
250 _Unwind_Backtrace(prof_unwind_callback
, &data
);
252 #elif (defined(JEMALLOC_PROF_GCC))
254 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
)
256 #define BT_FRAME(i) \
257 if ((i) < nignore + PROF_BT_MAX) { \
259 if (__builtin_frame_address(i) == 0) \
261 p = __builtin_return_address(i); \
264 if (i >= nignore) { \
265 bt->vec[(i) - nignore] = p; \
266 bt->len = (i) - nignore + 1; \
271 cassert(config_prof
);
272 assert(nignore
<= 3);
415 /* Extras to compensate for nignore. */
423 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
)
426 cassert(config_prof
);
432 prof_lookup(prof_bt_t
*bt
)
438 prof_tdata_t
*prof_tdata
;
440 cassert(config_prof
);
442 prof_tdata
= prof_tdata_get();
443 if ((uintptr_t)prof_tdata
<= (uintptr_t)PROF_TDATA_STATE_MAX
)
446 if (ckh_search(&prof_tdata
->bt2cnt
, bt
, NULL
, &ret
.v
)) {
458 * This thread's cache lacks bt. Look for it in the global
461 prof_enter(prof_tdata
);
462 if (ckh_search(&bt2ctx
, bt
, &btkey
.v
, &ctx
.v
)) {
463 /* bt has never been seen before. Insert it. */
464 ctx
.v
= imalloc(sizeof(prof_ctx_t
));
466 prof_leave(prof_tdata
);
469 btkey
.p
= bt_dup(bt
);
470 if (btkey
.v
== NULL
) {
471 prof_leave(prof_tdata
);
476 ctx
.p
->lock
= prof_ctx_mutex_choose();
478 * Set nlimbo to 1, in order to avoid a race condition
479 * with prof_ctx_merge()/prof_ctx_destroy().
482 memset(&ctx
.p
->cnt_merged
, 0, sizeof(prof_cnt_t
));
483 ql_new(&ctx
.p
->cnts_ql
);
484 if (ckh_insert(&bt2ctx
, btkey
.v
, ctx
.v
)) {
486 prof_leave(prof_tdata
);
494 * Increment nlimbo, in order to avoid a race condition
495 * with prof_ctx_merge()/prof_ctx_destroy().
497 malloc_mutex_lock(ctx
.p
->lock
);
499 malloc_mutex_unlock(ctx
.p
->lock
);
502 prof_leave(prof_tdata
);
504 /* Link a prof_thd_cnt_t into ctx for this thread. */
505 if (ckh_count(&prof_tdata
->bt2cnt
) == PROF_TCMAX
) {
506 assert(ckh_count(&prof_tdata
->bt2cnt
) > 0);
508 * Flush the least recently used cnt in order to keep
509 * bt2cnt from becoming too large.
511 ret
.p
= ql_last(&prof_tdata
->lru_ql
, lru_link
);
512 assert(ret
.v
!= NULL
);
513 if (ckh_remove(&prof_tdata
->bt2cnt
, ret
.p
->ctx
->bt
,
516 ql_remove(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
517 prof_ctx_merge(ret
.p
->ctx
, ret
.p
);
518 /* ret can now be re-used. */
520 assert(ckh_count(&prof_tdata
->bt2cnt
) < PROF_TCMAX
);
521 /* Allocate and partially initialize a new cnt. */
522 ret
.v
= imalloc(sizeof(prof_thr_cnt_t
));
525 prof_ctx_destroy(ctx
.p
);
528 ql_elm_new(ret
.p
, cnts_link
);
529 ql_elm_new(ret
.p
, lru_link
);
531 /* Finish initializing ret. */
534 memset(&ret
.p
->cnts
, 0, sizeof(prof_cnt_t
));
535 if (ckh_insert(&prof_tdata
->bt2cnt
, btkey
.v
, ret
.v
)) {
537 prof_ctx_destroy(ctx
.p
);
541 ql_head_insert(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
542 malloc_mutex_lock(ctx
.p
->lock
);
543 ql_tail_insert(&ctx
.p
->cnts_ql
, ret
.p
, cnts_link
);
545 malloc_mutex_unlock(ctx
.p
->lock
);
547 /* Move ret to the front of the LRU. */
548 ql_remove(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
549 ql_head_insert(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
556 prof_flush(bool propagate_err
)
561 cassert(config_prof
);
563 err
= write(prof_dump_fd
, prof_dump_buf
, prof_dump_buf_end
);
565 if (propagate_err
== false) {
566 malloc_write("<jemalloc>: write() failed during heap "
573 prof_dump_buf_end
= 0;
579 prof_write(bool propagate_err
, const char *s
)
583 cassert(config_prof
);
588 /* Flush the buffer if it is full. */
589 if (prof_dump_buf_end
== PROF_DUMP_BUFSIZE
)
590 if (prof_flush(propagate_err
) && propagate_err
)
593 if (prof_dump_buf_end
+ slen
<= PROF_DUMP_BUFSIZE
) {
594 /* Finish writing. */
597 /* Write as much of s as will fit. */
598 n
= PROF_DUMP_BUFSIZE
- prof_dump_buf_end
;
600 memcpy(&prof_dump_buf
[prof_dump_buf_end
], &s
[i
], n
);
601 prof_dump_buf_end
+= n
;
608 JEMALLOC_ATTR(format(printf
, 2, 3))
610 prof_printf(bool propagate_err
, const char *format
, ...)
614 char buf
[PROF_PRINTF_BUFSIZE
];
616 va_start(ap
, format
);
617 malloc_vsnprintf(buf
, sizeof(buf
), format
, ap
);
619 ret
= prof_write(propagate_err
, buf
);
625 prof_ctx_sum(prof_ctx_t
*ctx
, prof_cnt_t
*cnt_all
, size_t *leak_nctx
)
627 prof_thr_cnt_t
*thr_cnt
;
630 cassert(config_prof
);
632 malloc_mutex_lock(ctx
->lock
);
634 memcpy(&ctx
->cnt_summed
, &ctx
->cnt_merged
, sizeof(prof_cnt_t
));
635 ql_foreach(thr_cnt
, &ctx
->cnts_ql
, cnts_link
) {
636 volatile unsigned *epoch
= &thr_cnt
->epoch
;
639 unsigned epoch0
= *epoch
;
641 /* Make sure epoch is even. */
645 memcpy(&tcnt
, &thr_cnt
->cnts
, sizeof(prof_cnt_t
));
647 /* Terminate if epoch didn't change while reading. */
648 if (*epoch
== epoch0
)
652 ctx
->cnt_summed
.curobjs
+= tcnt
.curobjs
;
653 ctx
->cnt_summed
.curbytes
+= tcnt
.curbytes
;
654 if (opt_prof_accum
) {
655 ctx
->cnt_summed
.accumobjs
+= tcnt
.accumobjs
;
656 ctx
->cnt_summed
.accumbytes
+= tcnt
.accumbytes
;
660 if (ctx
->cnt_summed
.curobjs
!= 0)
663 /* Add to cnt_all. */
664 cnt_all
->curobjs
+= ctx
->cnt_summed
.curobjs
;
665 cnt_all
->curbytes
+= ctx
->cnt_summed
.curbytes
;
666 if (opt_prof_accum
) {
667 cnt_all
->accumobjs
+= ctx
->cnt_summed
.accumobjs
;
668 cnt_all
->accumbytes
+= ctx
->cnt_summed
.accumbytes
;
671 malloc_mutex_unlock(ctx
->lock
);
675 prof_ctx_destroy(prof_ctx_t
*ctx
)
677 prof_tdata_t
*prof_tdata
;
679 cassert(config_prof
);
682 * Check that ctx is still unused by any thread cache before destroying
683 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
684 * condition with this function, as does prof_ctx_merge() in order to
685 * avoid a race between the main body of prof_ctx_merge() and entry
686 * into this function.
688 prof_tdata
= *prof_tdata_tsd_get();
689 assert((uintptr_t)prof_tdata
> (uintptr_t)PROF_TDATA_STATE_MAX
);
690 prof_enter(prof_tdata
);
691 malloc_mutex_lock(ctx
->lock
);
692 if (ql_first(&ctx
->cnts_ql
) == NULL
&& ctx
->cnt_merged
.curobjs
== 0 &&
694 assert(ctx
->cnt_merged
.curbytes
== 0);
695 assert(ctx
->cnt_merged
.accumobjs
== 0);
696 assert(ctx
->cnt_merged
.accumbytes
== 0);
697 /* Remove ctx from bt2ctx. */
698 if (ckh_remove(&bt2ctx
, ctx
->bt
, NULL
, NULL
))
700 prof_leave(prof_tdata
);
702 malloc_mutex_unlock(ctx
->lock
);
707 * Compensate for increment in prof_ctx_merge() or
711 malloc_mutex_unlock(ctx
->lock
);
712 prof_leave(prof_tdata
);
717 prof_ctx_merge(prof_ctx_t
*ctx
, prof_thr_cnt_t
*cnt
)
721 cassert(config_prof
);
723 /* Merge cnt stats and detach from ctx. */
724 malloc_mutex_lock(ctx
->lock
);
725 ctx
->cnt_merged
.curobjs
+= cnt
->cnts
.curobjs
;
726 ctx
->cnt_merged
.curbytes
+= cnt
->cnts
.curbytes
;
727 ctx
->cnt_merged
.accumobjs
+= cnt
->cnts
.accumobjs
;
728 ctx
->cnt_merged
.accumbytes
+= cnt
->cnts
.accumbytes
;
729 ql_remove(&ctx
->cnts_ql
, cnt
, cnts_link
);
730 if (opt_prof_accum
== false && ql_first(&ctx
->cnts_ql
) == NULL
&&
731 ctx
->cnt_merged
.curobjs
== 0 && ctx
->nlimbo
== 0) {
733 * Increment ctx->nlimbo in order to keep another thread from
734 * winning the race to destroy ctx while this one has ctx->lock
735 * dropped. Without this, it would be possible for another
738 * 1) Sample an allocation associated with ctx.
739 * 2) Deallocate the sampled object.
740 * 3) Successfully prof_ctx_destroy(ctx).
742 * The result would be that ctx no longer exists by the time
743 * this thread accesses it in prof_ctx_destroy().
749 malloc_mutex_unlock(ctx
->lock
);
751 prof_ctx_destroy(ctx
);
755 prof_dump_ctx(bool propagate_err
, prof_ctx_t
*ctx
, prof_bt_t
*bt
)
759 cassert(config_prof
);
762 * Current statistics can sum to 0 as a result of unmerged per thread
763 * statistics. Additionally, interval- and growth-triggered dumps can
764 * occur between the time a ctx is created and when its statistics are
765 * filled in. Avoid dumping any ctx that is an artifact of either
766 * implementation detail.
768 if ((opt_prof_accum
== false && ctx
->cnt_summed
.curobjs
== 0) ||
769 (opt_prof_accum
&& ctx
->cnt_summed
.accumobjs
== 0)) {
770 assert(ctx
->cnt_summed
.curobjs
== 0);
771 assert(ctx
->cnt_summed
.curbytes
== 0);
772 assert(ctx
->cnt_summed
.accumobjs
== 0);
773 assert(ctx
->cnt_summed
.accumbytes
== 0);
777 if (prof_printf(propagate_err
, "%"PRId64
": %"PRId64
778 " [%"PRIu64
": %"PRIu64
"] @",
779 ctx
->cnt_summed
.curobjs
, ctx
->cnt_summed
.curbytes
,
780 ctx
->cnt_summed
.accumobjs
, ctx
->cnt_summed
.accumbytes
))
783 for (i
= 0; i
< bt
->len
; i
++) {
784 if (prof_printf(propagate_err
, " %#"PRIxPTR
,
785 (uintptr_t)bt
->vec
[i
]))
789 if (prof_write(propagate_err
, "\n"))
796 prof_dump_maps(bool propagate_err
)
799 char filename
[PATH_MAX
+ 1];
801 cassert(config_prof
);
803 malloc_snprintf(filename
, sizeof(filename
), "/proc/%d/maps",
805 mfd
= open(filename
, O_RDONLY
);
809 if (prof_write(propagate_err
, "\nMAPPED_LIBRARIES:\n") &&
814 prof_dump_buf_end
+= nread
;
815 if (prof_dump_buf_end
== PROF_DUMP_BUFSIZE
) {
816 /* Make space in prof_dump_buf before read(). */
817 if (prof_flush(propagate_err
) && propagate_err
)
820 nread
= read(mfd
, &prof_dump_buf
[prof_dump_buf_end
],
821 PROF_DUMP_BUFSIZE
- prof_dump_buf_end
);
831 prof_dump(bool propagate_err
, const char *filename
, bool leakcheck
)
833 prof_tdata_t
*prof_tdata
;
846 cassert(config_prof
);
848 prof_tdata
= prof_tdata_get();
849 if ((uintptr_t)prof_tdata
<= (uintptr_t)PROF_TDATA_STATE_MAX
)
851 prof_enter(prof_tdata
);
852 prof_dump_fd
= creat(filename
, 0644);
853 if (prof_dump_fd
== -1) {
854 if (propagate_err
== false) {
856 "<jemalloc>: creat(\"%s\"), 0644) failed\n",
864 /* Merge per thread profile stats, and sum them in cnt_all. */
865 memset(&cnt_all
, 0, sizeof(prof_cnt_t
));
867 for (tabind
= 0; ckh_iter(&bt2ctx
, &tabind
, NULL
, &ctx
.v
) == false;)
868 prof_ctx_sum(ctx
.p
, &cnt_all
, &leak_nctx
);
870 /* Dump profile header. */
871 if (opt_lg_prof_sample
== 0) {
872 if (prof_printf(propagate_err
,
873 "heap profile: %"PRId64
": %"PRId64
874 " [%"PRIu64
": %"PRIu64
"] @ heapprofile\n",
875 cnt_all
.curobjs
, cnt_all
.curbytes
,
876 cnt_all
.accumobjs
, cnt_all
.accumbytes
))
879 if (prof_printf(propagate_err
,
880 "heap profile: %"PRId64
": %"PRId64
881 " [%"PRIu64
": %"PRIu64
"] @ heap_v2/%"PRIu64
"\n",
882 cnt_all
.curobjs
, cnt_all
.curbytes
,
883 cnt_all
.accumobjs
, cnt_all
.accumbytes
,
884 ((uint64_t)1U << opt_lg_prof_sample
)))
888 /* Dump per ctx profile stats. */
889 for (tabind
= 0; ckh_iter(&bt2ctx
, &tabind
, &bt
.v
, &ctx
.v
)
891 if (prof_dump_ctx(propagate_err
, ctx
.p
, bt
.p
))
895 /* Dump /proc/<pid>/maps if possible. */
896 if (prof_dump_maps(propagate_err
))
899 if (prof_flush(propagate_err
))
902 prof_leave(prof_tdata
);
904 if (leakcheck
&& cnt_all
.curbytes
!= 0) {
905 malloc_printf("<jemalloc>: Leak summary: %"PRId64
" byte%s, %"
906 PRId64
" object%s, %zu context%s\n",
907 cnt_all
.curbytes
, (cnt_all
.curbytes
!= 1) ? "s" : "",
908 cnt_all
.curobjs
, (cnt_all
.curobjs
!= 1) ? "s" : "",
909 leak_nctx
, (leak_nctx
!= 1) ? "s" : "");
911 "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
917 prof_leave(prof_tdata
);
921 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
923 prof_dump_filename(char *filename
, char v
, int64_t vseq
)
926 cassert(config_prof
);
928 if (vseq
!= UINT64_C(0xffffffffffffffff)) {
929 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
930 malloc_snprintf(filename
, DUMP_FILENAME_BUFSIZE
,
931 "%s.%d.%"PRIu64
".%c%"PRId64
".heap",
932 opt_prof_prefix
, (int)getpid(), prof_dump_seq
, v
, vseq
);
934 /* "<prefix>.<pid>.<seq>.<v>.heap" */
935 malloc_snprintf(filename
, DUMP_FILENAME_BUFSIZE
,
936 "%s.%d.%"PRIu64
".%c.heap",
937 opt_prof_prefix
, (int)getpid(), prof_dump_seq
, v
);
945 char filename
[DUMP_FILENAME_BUFSIZE
];
947 cassert(config_prof
);
949 if (prof_booted
== false)
952 if (opt_prof_final
&& opt_prof_prefix
[0] != '\0') {
953 malloc_mutex_lock(&prof_dump_seq_mtx
);
954 prof_dump_filename(filename
, 'f', UINT64_C(0xffffffffffffffff));
955 malloc_mutex_unlock(&prof_dump_seq_mtx
);
956 prof_dump(false, filename
, opt_prof_leak
);
963 prof_tdata_t
*prof_tdata
;
964 char filename
[PATH_MAX
+ 1];
966 cassert(config_prof
);
968 if (prof_booted
== false)
971 * Don't call prof_tdata_get() here, because it could cause recursive
974 prof_tdata
= *prof_tdata_tsd_get();
975 if ((uintptr_t)prof_tdata
<= (uintptr_t)PROF_TDATA_STATE_MAX
)
977 if (prof_tdata
->enq
) {
978 prof_tdata
->enq_idump
= true;
982 if (opt_prof_prefix
[0] != '\0') {
983 malloc_mutex_lock(&prof_dump_seq_mtx
);
984 prof_dump_filename(filename
, 'i', prof_dump_iseq
);
986 malloc_mutex_unlock(&prof_dump_seq_mtx
);
987 prof_dump(false, filename
, false);
992 prof_mdump(const char *filename
)
994 char filename_buf
[DUMP_FILENAME_BUFSIZE
];
996 cassert(config_prof
);
998 if (opt_prof
== false || prof_booted
== false)
1001 if (filename
== NULL
) {
1002 /* No filename specified, so automatically generate one. */
1003 if (opt_prof_prefix
[0] == '\0')
1005 malloc_mutex_lock(&prof_dump_seq_mtx
);
1006 prof_dump_filename(filename_buf
, 'm', prof_dump_mseq
);
1008 malloc_mutex_unlock(&prof_dump_seq_mtx
);
1009 filename
= filename_buf
;
1011 return (prof_dump(true, filename
, false));
1017 prof_tdata_t
*prof_tdata
;
1018 char filename
[DUMP_FILENAME_BUFSIZE
];
1020 cassert(config_prof
);
1022 if (prof_booted
== false)
1025 * Don't call prof_tdata_get() here, because it could cause recursive
1028 prof_tdata
= *prof_tdata_tsd_get();
1029 if ((uintptr_t)prof_tdata
<= (uintptr_t)PROF_TDATA_STATE_MAX
)
1031 if (prof_tdata
->enq
) {
1032 prof_tdata
->enq_gdump
= true;
1036 if (opt_prof_prefix
[0] != '\0') {
1037 malloc_mutex_lock(&prof_dump_seq_mtx
);
1038 prof_dump_filename(filename
, 'u', prof_dump_useq
);
1040 malloc_mutex_unlock(&prof_dump_seq_mtx
);
1041 prof_dump(false, filename
, false);
1046 prof_bt_hash(const void *key
, unsigned minbits
, size_t *hash1
, size_t *hash2
)
1050 prof_bt_t
*bt
= (prof_bt_t
*)key
;
1052 cassert(config_prof
);
1053 assert(minbits
<= 32 || (SIZEOF_PTR
== 8 && minbits
<= 64));
1054 assert(hash1
!= NULL
);
1055 assert(hash2
!= NULL
);
1057 h
= hash(bt
->vec
, bt
->len
* sizeof(void *),
1058 UINT64_C(0x94122f335b332aea));
1059 if (minbits
<= 32) {
1061 * Avoid doing multiple hashes, since a single hash provides
1064 ret1
= h
& ZU(0xffffffffU
);
1068 ret2
= hash(bt
->vec
, bt
->len
* sizeof(void *),
1069 UINT64_C(0x8432a476666bbc13));
1077 prof_bt_keycomp(const void *k1
, const void *k2
)
1079 const prof_bt_t
*bt1
= (prof_bt_t
*)k1
;
1080 const prof_bt_t
*bt2
= (prof_bt_t
*)k2
;
1082 cassert(config_prof
);
1084 if (bt1
->len
!= bt2
->len
)
1086 return (memcmp(bt1
->vec
, bt2
->vec
, bt1
->len
* sizeof(void *)) == 0);
1089 static malloc_mutex_t
*
1090 prof_ctx_mutex_choose(void)
1092 unsigned nctxs
= atomic_add_u(&cum_ctxs
, 1);
1094 return (&ctx_locks
[(nctxs
- 1) % PROF_NCTX_LOCKS
]);
1098 prof_tdata_init(void)
1100 prof_tdata_t
*prof_tdata
;
1102 cassert(config_prof
);
1104 /* Initialize an empty cache for this thread. */
1105 prof_tdata
= (prof_tdata_t
*)imalloc(sizeof(prof_tdata_t
));
1106 if (prof_tdata
== NULL
)
1109 if (ckh_new(&prof_tdata
->bt2cnt
, PROF_CKH_MINITEMS
,
1110 prof_bt_hash
, prof_bt_keycomp
)) {
1111 idalloc(prof_tdata
);
1114 ql_new(&prof_tdata
->lru_ql
);
1116 prof_tdata
->vec
= imalloc(sizeof(void *) * PROF_BT_MAX
);
1117 if (prof_tdata
->vec
== NULL
) {
1118 ckh_delete(&prof_tdata
->bt2cnt
);
1119 idalloc(prof_tdata
);
1123 prof_tdata
->prng_state
= 0;
1124 prof_tdata
->threshold
= 0;
1125 prof_tdata
->accum
= 0;
1127 prof_tdata
->enq
= false;
1128 prof_tdata
->enq_idump
= false;
1129 prof_tdata
->enq_gdump
= false;
1131 prof_tdata_tsd_set(&prof_tdata
);
1133 return (prof_tdata
);
1137 prof_tdata_cleanup(void *arg
)
1139 prof_thr_cnt_t
*cnt
;
1140 prof_tdata_t
*prof_tdata
= *(prof_tdata_t
**)arg
;
1142 cassert(config_prof
);
1144 if (prof_tdata
== PROF_TDATA_STATE_REINCARNATED
) {
1146 * Another destructor deallocated memory after this destructor
1147 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
1148 * in order to receive another callback.
1150 prof_tdata
= PROF_TDATA_STATE_PURGATORY
;
1151 prof_tdata_tsd_set(&prof_tdata
);
1152 } else if (prof_tdata
== PROF_TDATA_STATE_PURGATORY
) {
1154 * The previous time this destructor was called, we set the key
1155 * to PROF_TDATA_STATE_PURGATORY so that other destructors
1156 * wouldn't cause re-creation of the prof_tdata. This time, do
1157 * nothing, so that the destructor will not be called again.
1159 } else if (prof_tdata
!= NULL
) {
1161 * Delete the hash table. All of its contents can still be
1162 * iterated over via the LRU.
1164 ckh_delete(&prof_tdata
->bt2cnt
);
1166 * Iteratively merge cnt's into the global stats and delete
1169 while ((cnt
= ql_last(&prof_tdata
->lru_ql
, lru_link
)) != NULL
) {
1170 ql_remove(&prof_tdata
->lru_ql
, cnt
, lru_link
);
1171 prof_ctx_merge(cnt
->ctx
, cnt
);
1174 idalloc(prof_tdata
->vec
);
1175 idalloc(prof_tdata
);
1176 prof_tdata
= PROF_TDATA_STATE_PURGATORY
;
1177 prof_tdata_tsd_set(&prof_tdata
);
1185 cassert(config_prof
);
1187 memcpy(opt_prof_prefix
, PROF_PREFIX_DEFAULT
,
1188 sizeof(PROF_PREFIX_DEFAULT
));
1195 cassert(config_prof
);
1198 * opt_prof and prof_promote must be in their final state before any
1199 * arenas are initialized, so this function must be executed early.
1202 if (opt_prof_leak
&& opt_prof
== false) {
1204 * Enable opt_prof, but in such a way that profiles are never
1205 * automatically dumped.
1208 opt_prof_gdump
= false;
1210 } else if (opt_prof
) {
1211 if (opt_lg_prof_interval
>= 0) {
1212 prof_interval
= (((uint64_t)1U) <<
1213 opt_lg_prof_interval
);
1218 prof_promote
= (opt_prof
&& opt_lg_prof_sample
> LG_PAGE
);
1225 cassert(config_prof
);
1230 if (ckh_new(&bt2ctx
, PROF_CKH_MINITEMS
, prof_bt_hash
,
1233 if (malloc_mutex_init(&bt2ctx_mtx
))
1235 if (prof_tdata_tsd_boot()) {
1237 "<jemalloc>: Error in pthread_key_create()\n");
1241 if (malloc_mutex_init(&prof_dump_seq_mtx
))
1244 if (atexit(prof_fdump
) != 0) {
1245 malloc_write("<jemalloc>: Error in atexit()\n");
1250 ctx_locks
= (malloc_mutex_t
*)base_alloc(PROF_NCTX_LOCKS
*
1251 sizeof(malloc_mutex_t
));
1252 if (ctx_locks
== NULL
)
1254 for (i
= 0; i
< PROF_NCTX_LOCKS
; i
++) {
1255 if (malloc_mutex_init(&ctx_locks
[i
]))
1260 #ifdef JEMALLOC_PROF_LIBGCC
1262 * Cause the backtracing machinery to allocate its internal state
1263 * before enabling profiling.
1265 _Unwind_Backtrace(prof_unwind_init_callback
, NULL
);
1280 malloc_mutex_lock(&bt2ctx_mtx
);
1281 malloc_mutex_lock(&prof_dump_seq_mtx
);
1282 for (i
= 0; i
< PROF_NCTX_LOCKS
; i
++)
1283 malloc_mutex_lock(&ctx_locks
[i
]);
1288 prof_postfork_parent(void)
1294 for (i
= 0; i
< PROF_NCTX_LOCKS
; i
++)
1295 malloc_mutex_postfork_parent(&ctx_locks
[i
]);
1296 malloc_mutex_postfork_parent(&prof_dump_seq_mtx
);
1297 malloc_mutex_postfork_parent(&bt2ctx_mtx
);
1302 prof_postfork_child(void)
1308 for (i
= 0; i
< PROF_NCTX_LOCKS
; i
++)
1309 malloc_mutex_postfork_child(&ctx_locks
[i
]);
1310 malloc_mutex_postfork_child(&prof_dump_seq_mtx
);
1311 malloc_mutex_postfork_child(&bt2ctx_mtx
);
1315 /******************************************************************************/