1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
4 /******************************************************************************/
6 #ifdef JEMALLOC_PROF_LIBUNWIND
11 #ifdef JEMALLOC_PROF_LIBGCC
15 /******************************************************************************/
18 bool opt_prof
= false;
19 bool opt_prof_active
= true;
20 size_t opt_lg_prof_bt_max
= LG_PROF_BT_MAX_DEFAULT
;
21 size_t opt_lg_prof_sample
= LG_PROF_SAMPLE_DEFAULT
;
22 ssize_t opt_lg_prof_interval
= LG_PROF_INTERVAL_DEFAULT
;
23 bool opt_prof_gdump
= false;
24 bool opt_prof_leak
= false;
25 bool opt_prof_accum
= true;
26 ssize_t opt_lg_prof_tcmax
= LG_PROF_TCMAX_DEFAULT
;
27 char opt_prof_prefix
[PATH_MAX
+ 1];
29 uint64_t prof_interval
;
35 __thread prof_tdata_t
*prof_tdata_tls
36 JEMALLOC_ATTR(tls_model("initial-exec"));
38 pthread_key_t prof_tdata_tsd
;
41 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
42 * structure that knows about all backtraces currently captured.
45 static malloc_mutex_t bt2ctx_mtx
;
47 static malloc_mutex_t prof_dump_seq_mtx
;
48 static uint64_t prof_dump_seq
;
49 static uint64_t prof_dump_iseq
;
50 static uint64_t prof_dump_mseq
;
51 static uint64_t prof_dump_useq
;
54 * This buffer is rather large for stack allocation, so use a single buffer for
55 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since
56 * it must be locked anyway during dumping.
58 static char prof_dump_buf
[PROF_DUMP_BUF_SIZE
];
59 static unsigned prof_dump_buf_end
;
60 static int prof_dump_fd
;
62 /* Do not dump any profiles until bootstrapping is complete. */
63 static bool prof_booted
= false;
65 static malloc_mutex_t enq_mtx
;
67 static bool enq_idump
;
68 static bool enq_gdump
;
70 /******************************************************************************/
71 /* Function prototypes for non-inline static functions. */
73 static prof_bt_t
*bt_dup(prof_bt_t
*bt
);
74 static void bt_destroy(prof_bt_t
*bt
);
75 #ifdef JEMALLOC_PROF_LIBGCC
76 static _Unwind_Reason_Code
prof_unwind_init_callback(
77 struct _Unwind_Context
*context
, void *arg
);
78 static _Unwind_Reason_Code
prof_unwind_callback(
79 struct _Unwind_Context
*context
, void *arg
);
81 static bool prof_flush(bool propagate_err
);
82 static bool prof_write(const char *s
, bool propagate_err
);
83 static void prof_ctx_sum(prof_ctx_t
*ctx
, prof_cnt_t
*cnt_all
,
85 static void prof_ctx_destroy(prof_ctx_t
*ctx
);
86 static void prof_ctx_merge(prof_ctx_t
*ctx
, prof_thr_cnt_t
*cnt
);
87 static bool prof_dump_ctx(prof_ctx_t
*ctx
, prof_bt_t
*bt
,
89 static bool prof_dump_maps(bool propagate_err
);
90 static bool prof_dump(const char *filename
, bool leakcheck
,
92 static void prof_dump_filename(char *filename
, char v
, int64_t vseq
);
93 static void prof_fdump(void);
94 static void prof_bt_hash(const void *key
, unsigned minbits
, size_t *hash1
,
96 static bool prof_bt_keycomp(const void *k1
, const void *k2
);
97 static void prof_tdata_cleanup(void *arg
);
99 /******************************************************************************/
102 bt_init(prof_bt_t
*bt
, void **vec
)
110 bt_destroy(prof_bt_t
*bt
)
117 bt_dup(prof_bt_t
*bt
)
122 * Create a single allocation that has space for vec immediately
123 * following the prof_bt_t structure. The backtraces that get
124 * stored in the backtrace caches are copied from stack-allocated
125 * temporary variables, so size is known at creation time. Making this
126 * a contiguous object improves cache locality.
128 ret
= (prof_bt_t
*)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t
)) +
129 (bt
->len
* sizeof(void *)));
132 ret
->vec
= (void **)((uintptr_t)ret
+
133 QUANTUM_CEILING(sizeof(prof_bt_t
)));
134 memcpy(ret
->vec
, bt
->vec
, bt
->len
* sizeof(void *));
144 malloc_mutex_lock(&enq_mtx
);
146 malloc_mutex_unlock(&enq_mtx
);
148 malloc_mutex_lock(&bt2ctx_mtx
);
156 malloc_mutex_unlock(&bt2ctx_mtx
);
158 malloc_mutex_lock(&enq_mtx
);
164 malloc_mutex_unlock(&enq_mtx
);
172 #ifdef JEMALLOC_PROF_LIBUNWIND
174 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
, unsigned max
)
181 assert(bt
->len
== 0);
182 assert(bt
->vec
!= NULL
);
183 assert(max
<= (1U << opt_lg_prof_bt_max
));
186 unw_init_local(&cursor
, &uc
);
188 /* Throw away (nignore+1) stack frames, if that many exist. */
189 for (i
= 0; i
< nignore
+ 1; i
++) {
190 err
= unw_step(&cursor
);
196 * Iterate over stack frames until there are no more, or until no space
199 for (i
= 0; i
< max
; i
++) {
200 unw_get_reg(&cursor
, UNW_REG_IP
, (unw_word_t
*)&bt
->vec
[i
]);
202 err
= unw_step(&cursor
);
208 #ifdef JEMALLOC_PROF_LIBGCC
209 static _Unwind_Reason_Code
210 prof_unwind_init_callback(struct _Unwind_Context
*context
, void *arg
)
213 return (_URC_NO_REASON
);
216 static _Unwind_Reason_Code
217 prof_unwind_callback(struct _Unwind_Context
*context
, void *arg
)
219 prof_unwind_data_t
*data
= (prof_unwind_data_t
*)arg
;
221 if (data
->nignore
> 0)
224 data
->bt
->vec
[data
->bt
->len
] = (void *)_Unwind_GetIP(context
);
226 if (data
->bt
->len
== data
->max
)
227 return (_URC_END_OF_STACK
);
230 return (_URC_NO_REASON
);
234 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
, unsigned max
)
236 prof_unwind_data_t data
= {bt
, nignore
, max
};
238 _Unwind_Backtrace(prof_unwind_callback
, &data
);
241 #ifdef JEMALLOC_PROF_GCC
243 prof_backtrace(prof_bt_t
*bt
, unsigned nignore
, unsigned max
)
245 #define BT_FRAME(i) \
246 if ((i) < nignore + max) { \
248 if (__builtin_frame_address(i) == 0) \
250 p = __builtin_return_address(i); \
253 if (i >= nignore) { \
254 bt->vec[(i) - nignore] = p; \
255 bt->len = (i) - nignore + 1; \
260 assert(nignore
<= 3);
261 assert(max
<= (1U << opt_lg_prof_bt_max
));
404 /* Extras to compensate for nignore. */
413 prof_lookup(prof_bt_t
*bt
)
419 prof_tdata_t
*prof_tdata
;
421 prof_tdata
= PROF_TCACHE_GET();
422 if (prof_tdata
== NULL
) {
423 prof_tdata
= prof_tdata_init();
424 if (prof_tdata
== NULL
)
428 if (ckh_search(&prof_tdata
->bt2cnt
, bt
, NULL
, &ret
.v
)) {
440 * This thread's cache lacks bt. Look for it in the global
444 if (ckh_search(&bt2ctx
, bt
, &btkey
.v
, &ctx
.v
)) {
445 /* bt has never been seen before. Insert it. */
446 ctx
.v
= imalloc(sizeof(prof_ctx_t
));
451 btkey
.p
= bt_dup(bt
);
452 if (btkey
.v
== NULL
) {
458 if (malloc_mutex_init(&ctx
.p
->lock
)) {
464 memset(&ctx
.p
->cnt_merged
, 0, sizeof(prof_cnt_t
));
465 ql_new(&ctx
.p
->cnts_ql
);
466 if (ckh_insert(&bt2ctx
, btkey
.v
, ctx
.v
)) {
469 malloc_mutex_destroy(&ctx
.p
->lock
);
475 * Artificially raise curobjs, in order to avoid a race
476 * condition with prof_ctx_merge()/prof_ctx_destroy().
478 * No locking is necessary for ctx here because no other
479 * threads have had the opportunity to fetch it from
482 ctx
.p
->cnt_merged
.curobjs
++;
486 * Artificially raise curobjs, in order to avoid a race
487 * condition with prof_ctx_merge()/prof_ctx_destroy().
489 malloc_mutex_lock(&ctx
.p
->lock
);
490 ctx
.p
->cnt_merged
.curobjs
++;
491 malloc_mutex_unlock(&ctx
.p
->lock
);
496 /* Link a prof_thd_cnt_t into ctx for this thread. */
497 if (opt_lg_prof_tcmax
>= 0 && ckh_count(&prof_tdata
->bt2cnt
)
498 == (ZU(1) << opt_lg_prof_tcmax
)) {
499 assert(ckh_count(&prof_tdata
->bt2cnt
) > 0);
501 * Flush the least recently used cnt in order to keep
502 * bt2cnt from becoming too large.
504 ret
.p
= ql_last(&prof_tdata
->lru_ql
, lru_link
);
505 assert(ret
.v
!= NULL
);
506 if (ckh_remove(&prof_tdata
->bt2cnt
, ret
.p
->ctx
->bt
,
509 ql_remove(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
510 prof_ctx_merge(ret
.p
->ctx
, ret
.p
);
511 /* ret can now be re-used. */
513 assert(opt_lg_prof_tcmax
< 0 ||
514 ckh_count(&prof_tdata
->bt2cnt
) < (ZU(1) <<
516 /* Allocate and partially initialize a new cnt. */
517 ret
.v
= imalloc(sizeof(prof_thr_cnt_t
));
520 prof_ctx_destroy(ctx
.p
);
523 ql_elm_new(ret
.p
, cnts_link
);
524 ql_elm_new(ret
.p
, lru_link
);
526 /* Finish initializing ret. */
529 memset(&ret
.p
->cnts
, 0, sizeof(prof_cnt_t
));
530 if (ckh_insert(&prof_tdata
->bt2cnt
, btkey
.v
, ret
.v
)) {
532 prof_ctx_destroy(ctx
.p
);
536 ql_head_insert(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
537 malloc_mutex_lock(&ctx
.p
->lock
);
538 ql_tail_insert(&ctx
.p
->cnts_ql
, ret
.p
, cnts_link
);
539 ctx
.p
->cnt_merged
.curobjs
--;
540 malloc_mutex_unlock(&ctx
.p
->lock
);
542 /* Move ret to the front of the LRU. */
543 ql_remove(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
544 ql_head_insert(&prof_tdata
->lru_ql
, ret
.p
, lru_link
);
551 prof_flush(bool propagate_err
)
556 err
= write(prof_dump_fd
, prof_dump_buf
, prof_dump_buf_end
);
558 if (propagate_err
== false) {
559 malloc_write("<jemalloc>: write() failed during heap "
566 prof_dump_buf_end
= 0;
572 prof_write(const char *s
, bool propagate_err
)
579 /* Flush the buffer if it is full. */
580 if (prof_dump_buf_end
== PROF_DUMP_BUF_SIZE
)
581 if (prof_flush(propagate_err
) && propagate_err
)
584 if (prof_dump_buf_end
+ slen
<= PROF_DUMP_BUF_SIZE
) {
585 /* Finish writing. */
588 /* Write as much of s as will fit. */
589 n
= PROF_DUMP_BUF_SIZE
- prof_dump_buf_end
;
591 memcpy(&prof_dump_buf
[prof_dump_buf_end
], &s
[i
], n
);
592 prof_dump_buf_end
+= n
;
600 prof_ctx_sum(prof_ctx_t
*ctx
, prof_cnt_t
*cnt_all
, size_t *leak_nctx
)
602 prof_thr_cnt_t
*thr_cnt
;
605 malloc_mutex_lock(&ctx
->lock
);
607 memcpy(&ctx
->cnt_summed
, &ctx
->cnt_merged
, sizeof(prof_cnt_t
));
608 ql_foreach(thr_cnt
, &ctx
->cnts_ql
, cnts_link
) {
609 volatile unsigned *epoch
= &thr_cnt
->epoch
;
612 unsigned epoch0
= *epoch
;
614 /* Make sure epoch is even. */
618 memcpy(&tcnt
, &thr_cnt
->cnts
, sizeof(prof_cnt_t
));
620 /* Terminate if epoch didn't change while reading. */
621 if (*epoch
== epoch0
)
625 ctx
->cnt_summed
.curobjs
+= tcnt
.curobjs
;
626 ctx
->cnt_summed
.curbytes
+= tcnt
.curbytes
;
627 if (opt_prof_accum
) {
628 ctx
->cnt_summed
.accumobjs
+= tcnt
.accumobjs
;
629 ctx
->cnt_summed
.accumbytes
+= tcnt
.accumbytes
;
633 if (ctx
->cnt_summed
.curobjs
!= 0)
636 /* Add to cnt_all. */
637 cnt_all
->curobjs
+= ctx
->cnt_summed
.curobjs
;
638 cnt_all
->curbytes
+= ctx
->cnt_summed
.curbytes
;
639 if (opt_prof_accum
) {
640 cnt_all
->accumobjs
+= ctx
->cnt_summed
.accumobjs
;
641 cnt_all
->accumbytes
+= ctx
->cnt_summed
.accumbytes
;
644 malloc_mutex_unlock(&ctx
->lock
);
648 prof_ctx_destroy(prof_ctx_t
*ctx
)
652 * Check that ctx is still unused by any thread cache before destroying
653 * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in
654 * order to avoid a race condition with this function, as does
655 * prof_ctx_merge() in order to avoid a race between the main body of
656 * prof_ctx_merge() and entry into this function.
659 malloc_mutex_lock(&ctx
->lock
);
660 if (ql_first(&ctx
->cnts_ql
) == NULL
&& ctx
->cnt_merged
.curobjs
== 1) {
661 assert(ctx
->cnt_merged
.curbytes
== 0);
662 assert(ctx
->cnt_merged
.accumobjs
== 0);
663 assert(ctx
->cnt_merged
.accumbytes
== 0);
664 /* Remove ctx from bt2ctx. */
665 if (ckh_remove(&bt2ctx
, ctx
->bt
, NULL
, NULL
))
669 malloc_mutex_unlock(&ctx
->lock
);
671 malloc_mutex_destroy(&ctx
->lock
);
675 * Compensate for increment in prof_ctx_merge() or
678 ctx
->cnt_merged
.curobjs
--;
679 malloc_mutex_unlock(&ctx
->lock
);
685 prof_ctx_merge(prof_ctx_t
*ctx
, prof_thr_cnt_t
*cnt
)
689 /* Merge cnt stats and detach from ctx. */
690 malloc_mutex_lock(&ctx
->lock
);
691 ctx
->cnt_merged
.curobjs
+= cnt
->cnts
.curobjs
;
692 ctx
->cnt_merged
.curbytes
+= cnt
->cnts
.curbytes
;
693 ctx
->cnt_merged
.accumobjs
+= cnt
->cnts
.accumobjs
;
694 ctx
->cnt_merged
.accumbytes
+= cnt
->cnts
.accumbytes
;
695 ql_remove(&ctx
->cnts_ql
, cnt
, cnts_link
);
696 if (opt_prof_accum
== false && ql_first(&ctx
->cnts_ql
) == NULL
&&
697 ctx
->cnt_merged
.curobjs
== 0) {
699 * Artificially raise ctx->cnt_merged.curobjs in order to keep
700 * another thread from winning the race to destroy ctx while
701 * this one has ctx->lock dropped. Without this, it would be
702 * possible for another thread to:
704 * 1) Sample an allocation associated with ctx.
705 * 2) Deallocate the sampled object.
706 * 3) Successfully prof_ctx_destroy(ctx).
708 * The result would be that ctx no longer exists by the time
709 * this thread accesses it in prof_ctx_destroy().
711 ctx
->cnt_merged
.curobjs
++;
715 malloc_mutex_unlock(&ctx
->lock
);
717 prof_ctx_destroy(ctx
);
721 prof_dump_ctx(prof_ctx_t
*ctx
, prof_bt_t
*bt
, bool propagate_err
)
723 char buf
[UMAX2S_BUFSIZE
];
726 if (opt_prof_accum
== false && ctx
->cnt_summed
.curobjs
== 0) {
727 assert(ctx
->cnt_summed
.curbytes
== 0);
728 assert(ctx
->cnt_summed
.accumobjs
== 0);
729 assert(ctx
->cnt_summed
.accumbytes
== 0);
733 if (prof_write(u2s(ctx
->cnt_summed
.curobjs
, 10, buf
), propagate_err
)
734 || prof_write(": ", propagate_err
)
735 || prof_write(u2s(ctx
->cnt_summed
.curbytes
, 10, buf
),
737 || prof_write(" [", propagate_err
)
738 || prof_write(u2s(ctx
->cnt_summed
.accumobjs
, 10, buf
),
740 || prof_write(": ", propagate_err
)
741 || prof_write(u2s(ctx
->cnt_summed
.accumbytes
, 10, buf
),
743 || prof_write("] @", propagate_err
))
746 for (i
= 0; i
< bt
->len
; i
++) {
747 if (prof_write(" 0x", propagate_err
)
748 || prof_write(u2s((uintptr_t)bt
->vec
[i
], 16, buf
),
753 if (prof_write("\n", propagate_err
))
760 prof_dump_maps(bool propagate_err
)
763 char buf
[UMAX2S_BUFSIZE
];
766 /* /proc/<pid>/maps\0 */
767 char mpath
[6 + UMAX2S_BUFSIZE
774 memcpy(&mpath
[i
], s
, slen
);
777 s
= u2s(getpid(), 10, buf
);
779 memcpy(&mpath
[i
], s
, slen
);
784 memcpy(&mpath
[i
], s
, slen
);
789 mfd
= open(mpath
, O_RDONLY
);
793 if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err
) &&
798 prof_dump_buf_end
+= nread
;
799 if (prof_dump_buf_end
== PROF_DUMP_BUF_SIZE
) {
800 /* Make space in prof_dump_buf before read(). */
801 if (prof_flush(propagate_err
) && propagate_err
)
804 nread
= read(mfd
, &prof_dump_buf
[prof_dump_buf_end
],
805 PROF_DUMP_BUF_SIZE
- prof_dump_buf_end
);
815 prof_dump(const char *filename
, bool leakcheck
, bool propagate_err
)
827 char buf
[UMAX2S_BUFSIZE
];
831 prof_dump_fd
= creat(filename
, 0644);
832 if (prof_dump_fd
== -1) {
833 if (propagate_err
== false) {
834 malloc_write("<jemalloc>: creat(\"");
835 malloc_write(filename
);
836 malloc_write("\", 0644) failed\n");
843 /* Merge per thread profile stats, and sum them in cnt_all. */
844 memset(&cnt_all
, 0, sizeof(prof_cnt_t
));
846 for (tabind
= 0; ckh_iter(&bt2ctx
, &tabind
, NULL
, &ctx
.v
) == false;)
847 prof_ctx_sum(ctx
.p
, &cnt_all
, &leak_nctx
);
849 /* Dump profile header. */
850 if (prof_write("heap profile: ", propagate_err
)
851 || prof_write(u2s(cnt_all
.curobjs
, 10, buf
), propagate_err
)
852 || prof_write(": ", propagate_err
)
853 || prof_write(u2s(cnt_all
.curbytes
, 10, buf
), propagate_err
)
854 || prof_write(" [", propagate_err
)
855 || prof_write(u2s(cnt_all
.accumobjs
, 10, buf
), propagate_err
)
856 || prof_write(": ", propagate_err
)
857 || prof_write(u2s(cnt_all
.accumbytes
, 10, buf
), propagate_err
))
860 if (opt_lg_prof_sample
== 0) {
861 if (prof_write("] @ heapprofile\n", propagate_err
))
864 if (prof_write("] @ heap_v2/", propagate_err
)
865 || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample
, 10,
867 || prof_write("\n", propagate_err
))
871 /* Dump per ctx profile stats. */
872 for (tabind
= 0; ckh_iter(&bt2ctx
, &tabind
, &bt
.v
, &ctx
.v
)
874 if (prof_dump_ctx(ctx
.p
, bt
.p
, propagate_err
))
878 /* Dump /proc/<pid>/maps if possible. */
879 if (prof_dump_maps(propagate_err
))
882 if (prof_flush(propagate_err
))
887 if (leakcheck
&& cnt_all
.curbytes
!= 0) {
888 malloc_write("<jemalloc>: Leak summary: ");
889 malloc_write(u2s(cnt_all
.curbytes
, 10, buf
));
890 malloc_write((cnt_all
.curbytes
!= 1) ? " bytes, " : " byte, ");
891 malloc_write(u2s(cnt_all
.curobjs
, 10, buf
));
892 malloc_write((cnt_all
.curobjs
!= 1) ? " objects, " :
894 malloc_write(u2s(leak_nctx
, 10, buf
));
895 malloc_write((leak_nctx
!= 1) ? " contexts\n" : " context\n");
896 malloc_write("<jemalloc>: Run pprof on \"");
897 malloc_write(filename
);
898 malloc_write("\" for leak detail\n");
907 #define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \
914 prof_dump_filename(char *filename
, char v
, int64_t vseq
)
916 char buf
[UMAX2S_BUFSIZE
];
921 * Construct a filename of the form:
923 * <prefix>.<pid>.<seq>.v<vseq>.heap\0
930 memcpy(&filename
[i
], s
, slen
);
935 memcpy(&filename
[i
], s
, slen
);
938 s
= u2s(getpid(), 10, buf
);
940 memcpy(&filename
[i
], s
, slen
);
945 memcpy(&filename
[i
], s
, slen
);
948 s
= u2s(prof_dump_seq
, 10, buf
);
951 memcpy(&filename
[i
], s
, slen
);
956 memcpy(&filename
[i
], s
, slen
);
962 if (vseq
!= 0xffffffffffffffffLLU
) {
963 s
= u2s(vseq
, 10, buf
);
965 memcpy(&filename
[i
], s
, slen
);
971 memcpy(&filename
[i
], s
, slen
);
980 char filename
[DUMP_FILENAME_BUFSIZE
];
982 if (prof_booted
== false)
985 if (opt_prof_prefix
[0] != '\0') {
986 malloc_mutex_lock(&prof_dump_seq_mtx
);
987 prof_dump_filename(filename
, 'f', 0xffffffffffffffffLLU
);
988 malloc_mutex_unlock(&prof_dump_seq_mtx
);
989 prof_dump(filename
, opt_prof_leak
, false);
996 char filename
[DUMP_FILENAME_BUFSIZE
];
998 if (prof_booted
== false)
1000 malloc_mutex_lock(&enq_mtx
);
1003 malloc_mutex_unlock(&enq_mtx
);
1006 malloc_mutex_unlock(&enq_mtx
);
1008 if (opt_prof_prefix
[0] != '\0') {
1009 malloc_mutex_lock(&prof_dump_seq_mtx
);
1010 prof_dump_filename(filename
, 'i', prof_dump_iseq
);
1012 malloc_mutex_unlock(&prof_dump_seq_mtx
);
1013 prof_dump(filename
, false, false);
1018 prof_mdump(const char *filename
)
1020 char filename_buf
[DUMP_FILENAME_BUFSIZE
];
1022 if (opt_prof
== false || prof_booted
== false)
1025 if (filename
== NULL
) {
1026 /* No filename specified, so automatically generate one. */
1027 if (opt_prof_prefix
[0] == '\0')
1029 malloc_mutex_lock(&prof_dump_seq_mtx
);
1030 prof_dump_filename(filename_buf
, 'm', prof_dump_mseq
);
1032 malloc_mutex_unlock(&prof_dump_seq_mtx
);
1033 filename
= filename_buf
;
1035 return (prof_dump(filename
, false, true));
1041 char filename
[DUMP_FILENAME_BUFSIZE
];
1043 if (prof_booted
== false)
1045 malloc_mutex_lock(&enq_mtx
);
1048 malloc_mutex_unlock(&enq_mtx
);
1051 malloc_mutex_unlock(&enq_mtx
);
1053 if (opt_prof_prefix
[0] != '\0') {
1054 malloc_mutex_lock(&prof_dump_seq_mtx
);
1055 prof_dump_filename(filename
, 'u', prof_dump_useq
);
1057 malloc_mutex_unlock(&prof_dump_seq_mtx
);
1058 prof_dump(filename
, false, false);
1063 prof_bt_hash(const void *key
, unsigned minbits
, size_t *hash1
, size_t *hash2
)
1067 prof_bt_t
*bt
= (prof_bt_t
*)key
;
1069 assert(minbits
<= 32 || (SIZEOF_PTR
== 8 && minbits
<= 64));
1070 assert(hash1
!= NULL
);
1071 assert(hash2
!= NULL
);
1073 h
= hash(bt
->vec
, bt
->len
* sizeof(void *), 0x94122f335b332aeaLLU
);
1074 if (minbits
<= 32) {
1076 * Avoid doing multiple hashes, since a single hash provides
1079 ret1
= h
& ZU(0xffffffffU
);
1083 ret2
= hash(bt
->vec
, bt
->len
* sizeof(void *),
1084 0x8432a476666bbc13LLU
);
1092 prof_bt_keycomp(const void *k1
, const void *k2
)
1094 const prof_bt_t
*bt1
= (prof_bt_t
*)k1
;
1095 const prof_bt_t
*bt2
= (prof_bt_t
*)k2
;
1097 if (bt1
->len
!= bt2
->len
)
1099 return (memcmp(bt1
->vec
, bt2
->vec
, bt1
->len
* sizeof(void *)) == 0);
1103 prof_tdata_init(void)
1105 prof_tdata_t
*prof_tdata
;
1107 /* Initialize an empty cache for this thread. */
1108 prof_tdata
= (prof_tdata_t
*)imalloc(sizeof(prof_tdata_t
));
1109 if (prof_tdata
== NULL
)
1112 if (ckh_new(&prof_tdata
->bt2cnt
, PROF_CKH_MINITEMS
,
1113 prof_bt_hash
, prof_bt_keycomp
)) {
1114 idalloc(prof_tdata
);
1117 ql_new(&prof_tdata
->lru_ql
);
1119 prof_tdata
->vec
= imalloc(sizeof(void *) * prof_bt_max
);
1120 if (prof_tdata
->vec
== NULL
) {
1121 ckh_delete(&prof_tdata
->bt2cnt
);
1122 idalloc(prof_tdata
);
1126 prof_tdata
->prn_state
= 0;
1127 prof_tdata
->threshold
= 0;
1128 prof_tdata
->accum
= 0;
1130 PROF_TCACHE_SET(prof_tdata
);
1132 return (prof_tdata
);
1136 prof_tdata_cleanup(void *arg
)
1138 prof_thr_cnt_t
*cnt
;
1139 prof_tdata_t
*prof_tdata
= (prof_tdata_t
*)arg
;
1142 * Delete the hash table. All of its contents can still be iterated
1145 ckh_delete(&prof_tdata
->bt2cnt
);
1147 /* Iteratively merge cnt's into the global stats and delete them. */
1148 while ((cnt
= ql_last(&prof_tdata
->lru_ql
, lru_link
)) != NULL
) {
1149 ql_remove(&prof_tdata
->lru_ql
, cnt
, lru_link
);
1150 prof_ctx_merge(cnt
->ctx
, cnt
);
1154 idalloc(prof_tdata
->vec
);
1156 idalloc(prof_tdata
);
1157 PROF_TCACHE_SET(NULL
);
1164 memcpy(opt_prof_prefix
, PROF_PREFIX_DEFAULT
,
1165 sizeof(PROF_PREFIX_DEFAULT
));
1173 * opt_prof and prof_promote must be in their final state before any
1174 * arenas are initialized, so this function must be executed early.
1177 if (opt_prof_leak
&& opt_prof
== false) {
1179 * Enable opt_prof, but in such a way that profiles are never
1180 * automatically dumped.
1183 opt_prof_gdump
= false;
1185 } else if (opt_prof
) {
1186 if (opt_lg_prof_interval
>= 0) {
1187 prof_interval
= (((uint64_t)1U) <<
1188 opt_lg_prof_interval
);
1193 prof_promote
= (opt_prof
&& opt_lg_prof_sample
> PAGE_SHIFT
);
1201 if (ckh_new(&bt2ctx
, PROF_CKH_MINITEMS
, prof_bt_hash
,
1204 if (malloc_mutex_init(&bt2ctx_mtx
))
1206 if (pthread_key_create(&prof_tdata_tsd
, prof_tdata_cleanup
)
1209 "<jemalloc>: Error in pthread_key_create()\n");
1213 prof_bt_max
= (1U << opt_lg_prof_bt_max
);
1214 if (malloc_mutex_init(&prof_dump_seq_mtx
))
1217 if (malloc_mutex_init(&enq_mtx
))
1223 if (atexit(prof_fdump
) != 0) {
1224 malloc_write("<jemalloc>: Error in atexit()\n");
1230 #ifdef JEMALLOC_PROF_LIBGCC
1232 * Cause the backtracing machinery to allocate its internal state
1233 * before enabling profiling.
1235 _Unwind_Backtrace(prof_unwind_init_callback
, NULL
);
1243 /******************************************************************************/
1244 #endif /* JEMALLOC_PROF */