deps/jemalloc/src/prof.c

   1 #define JEMALLOC_PROF_C_
   2 #include "jemalloc/internal/jemalloc_internal.h"
   3 #ifdef JEMALLOC_PROF
   4 /******************************************************************************/
   5
   6 #ifdef JEMALLOC_PROF_LIBUNWIND
   7 #define UNW_LOCAL_ONLY
   8 #include <libunwind.h>
   9 #endif
  10
  11 #ifdef JEMALLOC_PROF_LIBGCC
  12 #include <unwind.h>
  13 #endif
  14
  15 /******************************************************************************/
  16 /* Data. */
  17
  18 bool            opt_prof = false;
  19 bool            opt_prof_active = true;
  20 size_t          opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
  21 size_t          opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
  22 ssize_t         opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
  23 bool            opt_prof_gdump = false;
  24 bool            opt_prof_leak = false;
  25 bool            opt_prof_accum = true;
  26 ssize_t         opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
  27 char            opt_prof_prefix[PATH_MAX + 1];
  28
  29 uint64_t        prof_interval;
  30 bool            prof_promote;
  31
  32 unsigned        prof_bt_max;
  33
  34 #ifndef NO_TLS
  35 __thread prof_tdata_t   *prof_tdata_tls
  36     JEMALLOC_ATTR(tls_model("initial-exec"));
  37 #endif
  38 pthread_key_t   prof_tdata_tsd;
  39
  40 /*
  41  * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
  42  * structure that knows about all backtraces currently captured.
  43  */
  44 static ckh_t            bt2ctx;
  45 static malloc_mutex_t   bt2ctx_mtx;
  46
  47 static malloc_mutex_t   prof_dump_seq_mtx;
  48 static uint64_t         prof_dump_seq;
  49 static uint64_t         prof_dump_iseq;
  50 static uint64_t         prof_dump_mseq;
  51 static uint64_t         prof_dump_useq;
  52
  53 /*
  54  * This buffer is rather large for stack allocation, so use a single buffer for
  55  * all profile dumps.  The buffer is implicitly protected by bt2ctx_mtx, since
  56  * it must be locked anyway during dumping.
  57  */
  58 static char             prof_dump_buf[PROF_DUMP_BUF_SIZE];
  59 static unsigned         prof_dump_buf_end;
  60 static int              prof_dump_fd;
  61
  62 /* Do not dump any profiles until bootstrapping is complete. */
  63 static bool             prof_booted = false;
  64
  65 static malloc_mutex_t   enq_mtx;
  66 static bool             enq;
  67 static bool             enq_idump;
  68 static bool             enq_gdump;
  69
  70 /******************************************************************************/
  71 /* Function prototypes for non-inline static functions. */
  72
  73 static prof_bt_t        *bt_dup(prof_bt_t *bt);
  74 static void     bt_destroy(prof_bt_t *bt);
  75 #ifdef JEMALLOC_PROF_LIBGCC
  76 static _Unwind_Reason_Code      prof_unwind_init_callback(
  77     struct _Unwind_Context *context, void *arg);
  78 static _Unwind_Reason_Code      prof_unwind_callback(
  79     struct _Unwind_Context *context, void *arg);
  80 #endif
  81 static bool     prof_flush(bool propagate_err);
  82 static bool     prof_write(const char *s, bool propagate_err);
  83 static void     prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
  84     size_t *leak_nctx);
  85 static void     prof_ctx_destroy(prof_ctx_t *ctx);
  86 static void     prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
  87 static bool     prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
  88     bool propagate_err);
  89 static bool     prof_dump_maps(bool propagate_err);
  90 static bool     prof_dump(const char *filename, bool leakcheck,
  91     bool propagate_err);
  92 static void     prof_dump_filename(char *filename, char v, int64_t vseq);
  93 static void     prof_fdump(void);
  94 static void     prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
  95     size_t *hash2);
  96 static bool     prof_bt_keycomp(const void *k1, const void *k2);
  97 static void     prof_tdata_cleanup(void *arg);
  98
  99 /******************************************************************************/
 100
 101 void
 102 bt_init(prof_bt_t *bt, void **vec)
 103 {
 104
 105         bt->vec = vec;
 106         bt->len = 0;
 107 }
 108
 109 static void
 110 bt_destroy(prof_bt_t *bt)
 111 {
 112
 113         idalloc(bt);
 114 }
 115
 116 static prof_bt_t *
 117 bt_dup(prof_bt_t *bt)
 118 {
 119         prof_bt_t *ret;
 120
 121         /*
 122          * Create a single allocation that has space for vec immediately
 123          * following the prof_bt_t structure.  The backtraces that get
 124          * stored in the backtrace caches are copied from stack-allocated
 125          * temporary variables, so size is known at creation time.  Making this
 126          * a contiguous object improves cache locality.
 127          */
 128         ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
 129             (bt->len * sizeof(void *)));
 130         if (ret == NULL)
 131                 return (NULL);
 132         ret->vec = (void **)((uintptr_t)ret +
 133             QUANTUM_CEILING(sizeof(prof_bt_t)));
 134         memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
 135         ret->len = bt->len;
 136
 137         return (ret);
 138 }
 139
 140 static inline void
 141 prof_enter(void)
 142 {
 143
 144         malloc_mutex_lock(&enq_mtx);
 145         enq = true;
 146         malloc_mutex_unlock(&enq_mtx);
 147
 148         malloc_mutex_lock(&bt2ctx_mtx);
 149 }
 150
 151 static inline void
 152 prof_leave(void)
 153 {
 154         bool idump, gdump;
 155
 156         malloc_mutex_unlock(&bt2ctx_mtx);
 157
 158         malloc_mutex_lock(&enq_mtx);
 159         enq = false;
 160         idump = enq_idump;
 161         enq_idump = false;
 162         gdump = enq_gdump;
 163         enq_gdump = false;
 164         malloc_mutex_unlock(&enq_mtx);
 165
 166         if (idump)
 167                 prof_idump();
 168         if (gdump)
 169                 prof_gdump();
 170 }
 171
 172 #ifdef JEMALLOC_PROF_LIBUNWIND
 173 void
 174 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 175 {
 176         unw_context_t uc;
 177         unw_cursor_t cursor;
 178         unsigned i;
 179         int err;
 180
 181         assert(bt->len == 0);
 182         assert(bt->vec != NULL);
 183         assert(max <= (1U << opt_lg_prof_bt_max));
 184
 185         unw_getcontext(&uc);
 186         unw_init_local(&cursor, &uc);
 187
 188         /* Throw away (nignore+1) stack frames, if that many exist. */
 189         for (i = 0; i < nignore + 1; i++) {
 190                 err = unw_step(&cursor);
 191                 if (err <= 0)
 192                         return;
 193         }
 194
 195         /*
 196          * Iterate over stack frames until there are no more, or until no space
 197          * remains in bt.
 198          */
 199         for (i = 0; i < max; i++) {
 200                 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
 201                 bt->len++;
 202                 err = unw_step(&cursor);
 203                 if (err <= 0)
 204                         break;
 205         }
 206 }
 207 #endif
 208 #ifdef JEMALLOC_PROF_LIBGCC
 209 static _Unwind_Reason_Code
 210 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
 211 {
 212
 213         return (_URC_NO_REASON);
 214 }
 215
 216 static _Unwind_Reason_Code
 217 prof_unwind_callback(struct _Unwind_Context *context, void *arg)
 218 {
 219         prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
 220
 221         if (data->nignore > 0)
 222                 data->nignore--;
 223         else {
 224                 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
 225                 data->bt->len++;
 226                 if (data->bt->len == data->max)
 227                         return (_URC_END_OF_STACK);
 228         }
 229
 230         return (_URC_NO_REASON);
 231 }
 232
 233 void
 234 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 235 {
 236         prof_unwind_data_t data = {bt, nignore, max};
 237
 238         _Unwind_Backtrace(prof_unwind_callback, &data);
 239 }
 240 #endif
 241 #ifdef JEMALLOC_PROF_GCC
 242 void
 243 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 244 {
 245 #define BT_FRAME(i)                                                     \
 246         if ((i) < nignore + max) {                                      \
 247                 void *p;                                                \
 248                 if (__builtin_frame_address(i) == 0)                    \
 249                         return;                                         \
 250                 p = __builtin_return_address(i);                        \
 251                 if (p == NULL)                                          \
 252                         return;                                         \
 253                 if (i >= nignore) {                                     \
 254                         bt->vec[(i) - nignore] = p;                     \
 255                         bt->len = (i) - nignore + 1;                    \
 256                 }                                                       \
 257         } else                                                          \
 258                 return;
 259
 260         assert(nignore <= 3);
 261         assert(max <= (1U << opt_lg_prof_bt_max));
 262
 263         BT_FRAME(0)
 264         BT_FRAME(1)
 265         BT_FRAME(2)
 266         BT_FRAME(3)
 267         BT_FRAME(4)
 268         BT_FRAME(5)
 269         BT_FRAME(6)
 270         BT_FRAME(7)
 271         BT_FRAME(8)
 272         BT_FRAME(9)
 273
 274         BT_FRAME(10)
 275         BT_FRAME(11)
 276         BT_FRAME(12)
 277         BT_FRAME(13)
 278         BT_FRAME(14)
 279         BT_FRAME(15)
 280         BT_FRAME(16)
 281         BT_FRAME(17)
 282         BT_FRAME(18)
 283         BT_FRAME(19)
 284
 285         BT_FRAME(20)
 286         BT_FRAME(21)
 287         BT_FRAME(22)
 288         BT_FRAME(23)
 289         BT_FRAME(24)
 290         BT_FRAME(25)
 291         BT_FRAME(26)
 292         BT_FRAME(27)
 293         BT_FRAME(28)
 294         BT_FRAME(29)
 295
 296         BT_FRAME(30)
 297         BT_FRAME(31)
 298         BT_FRAME(32)
 299         BT_FRAME(33)
 300         BT_FRAME(34)
 301         BT_FRAME(35)
 302         BT_FRAME(36)
 303         BT_FRAME(37)
 304         BT_FRAME(38)
 305         BT_FRAME(39)
 306
 307         BT_FRAME(40)
 308         BT_FRAME(41)
 309         BT_FRAME(42)
 310         BT_FRAME(43)
 311         BT_FRAME(44)
 312         BT_FRAME(45)
 313         BT_FRAME(46)
 314         BT_FRAME(47)
 315         BT_FRAME(48)
 316         BT_FRAME(49)
 317
 318         BT_FRAME(50)
 319         BT_FRAME(51)
 320         BT_FRAME(52)
 321         BT_FRAME(53)
 322         BT_FRAME(54)
 323         BT_FRAME(55)
 324         BT_FRAME(56)
 325         BT_FRAME(57)
 326         BT_FRAME(58)
 327         BT_FRAME(59)
 328
 329         BT_FRAME(60)
 330         BT_FRAME(61)
 331         BT_FRAME(62)
 332         BT_FRAME(63)
 333         BT_FRAME(64)
 334         BT_FRAME(65)
 335         BT_FRAME(66)
 336         BT_FRAME(67)
 337         BT_FRAME(68)
 338         BT_FRAME(69)
 339
 340         BT_FRAME(70)
 341         BT_FRAME(71)
 342         BT_FRAME(72)
 343         BT_FRAME(73)
 344         BT_FRAME(74)
 345         BT_FRAME(75)
 346         BT_FRAME(76)
 347         BT_FRAME(77)
 348         BT_FRAME(78)
 349         BT_FRAME(79)
 350
 351         BT_FRAME(80)
 352         BT_FRAME(81)
 353         BT_FRAME(82)
 354         BT_FRAME(83)
 355         BT_FRAME(84)
 356         BT_FRAME(85)
 357         BT_FRAME(86)
 358         BT_FRAME(87)
 359         BT_FRAME(88)
 360         BT_FRAME(89)
 361
 362         BT_FRAME(90)
 363         BT_FRAME(91)
 364         BT_FRAME(92)
 365         BT_FRAME(93)
 366         BT_FRAME(94)
 367         BT_FRAME(95)
 368         BT_FRAME(96)
 369         BT_FRAME(97)
 370         BT_FRAME(98)
 371         BT_FRAME(99)
 372
 373         BT_FRAME(100)
 374         BT_FRAME(101)
 375         BT_FRAME(102)
 376         BT_FRAME(103)
 377         BT_FRAME(104)
 378         BT_FRAME(105)
 379         BT_FRAME(106)
 380         BT_FRAME(107)
 381         BT_FRAME(108)
 382         BT_FRAME(109)
 383
 384         BT_FRAME(110)
 385         BT_FRAME(111)
 386         BT_FRAME(112)
 387         BT_FRAME(113)
 388         BT_FRAME(114)
 389         BT_FRAME(115)
 390         BT_FRAME(116)
 391         BT_FRAME(117)
 392         BT_FRAME(118)
 393         BT_FRAME(119)
 394
 395         BT_FRAME(120)
 396         BT_FRAME(121)
 397         BT_FRAME(122)
 398         BT_FRAME(123)
 399         BT_FRAME(124)
 400         BT_FRAME(125)
 401         BT_FRAME(126)
 402         BT_FRAME(127)
 403
 404         /* Extras to compensate for nignore. */
 405         BT_FRAME(128)
 406         BT_FRAME(129)
 407         BT_FRAME(130)
 408 #undef BT_FRAME
 409 }
 410 #endif
 411
 412 prof_thr_cnt_t *
 413 prof_lookup(prof_bt_t *bt)
 414 {
 415         union {
 416                 prof_thr_cnt_t  *p;
 417                 void            *v;
 418         } ret;
 419         prof_tdata_t *prof_tdata;
 420
 421         prof_tdata = PROF_TCACHE_GET();
 422         if (prof_tdata == NULL) {
 423                 prof_tdata = prof_tdata_init();
 424                 if (prof_tdata == NULL)
 425                         return (NULL);
 426         }
 427
 428         if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
 429                 union {
 430                         prof_bt_t       *p;
 431                         void            *v;
 432                 } btkey;
 433                 union {
 434                         prof_ctx_t      *p;
 435                         void            *v;
 436                 } ctx;
 437                 bool new_ctx;
 438
 439                 /*
 440                  * This thread's cache lacks bt.  Look for it in the global
 441                  * cache.
 442                  */
 443                 prof_enter();
 444                 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
 445                         /* bt has never been seen before.  Insert it. */
 446                         ctx.v = imalloc(sizeof(prof_ctx_t));
 447                         if (ctx.v == NULL) {
 448                                 prof_leave();
 449                                 return (NULL);
 450                         }
 451                         btkey.p = bt_dup(bt);
 452                         if (btkey.v == NULL) {
 453                                 prof_leave();
 454                                 idalloc(ctx.v);
 455                                 return (NULL);
 456                         }
 457                         ctx.p->bt = btkey.p;
 458                         if (malloc_mutex_init(&ctx.p->lock)) {
 459                                 prof_leave();
 460                                 idalloc(btkey.v);
 461                                 idalloc(ctx.v);
 462                                 return (NULL);
 463                         }
 464                         memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
 465                         ql_new(&ctx.p->cnts_ql);
 466                         if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
 467                                 /* OOM. */
 468                                 prof_leave();
 469                                 malloc_mutex_destroy(&ctx.p->lock);
 470                                 idalloc(btkey.v);
 471                                 idalloc(ctx.v);
 472                                 return (NULL);
 473                         }
 474                         /*
 475                          * Artificially raise curobjs, in order to avoid a race
 476                          * condition with prof_ctx_merge()/prof_ctx_destroy().
 477                          *
 478                          * No locking is necessary for ctx here because no other
 479                          * threads have had the opportunity to fetch it from
 480                          * bt2ctx yet.
 481                          */
 482                         ctx.p->cnt_merged.curobjs++;
 483                         new_ctx = true;
 484                 } else {
 485                         /*
 486                          * Artificially raise curobjs, in order to avoid a race
 487                          * condition with prof_ctx_merge()/prof_ctx_destroy().
 488                          */
 489                         malloc_mutex_lock(&ctx.p->lock);
 490                         ctx.p->cnt_merged.curobjs++;
 491                         malloc_mutex_unlock(&ctx.p->lock);
 492                         new_ctx = false;
 493                 }
 494                 prof_leave();
 495
 496                 /* Link a prof_thd_cnt_t into ctx for this thread. */
 497                 if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
 498                     == (ZU(1) << opt_lg_prof_tcmax)) {
 499                         assert(ckh_count(&prof_tdata->bt2cnt) > 0);
 500                         /*
 501                          * Flush the least recently used cnt in order to keep
 502                          * bt2cnt from becoming too large.
 503                          */
 504                         ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
 505                         assert(ret.v != NULL);
 506                         if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
 507                             NULL, NULL))
 508                                 assert(false);
 509                         ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 510                         prof_ctx_merge(ret.p->ctx, ret.p);
 511                         /* ret can now be re-used. */
 512                 } else {
 513                         assert(opt_lg_prof_tcmax < 0 ||
 514                             ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
 515                             opt_lg_prof_tcmax));
 516                         /* Allocate and partially initialize a new cnt. */
 517                         ret.v = imalloc(sizeof(prof_thr_cnt_t));
 518                         if (ret.p == NULL) {
 519                                 if (new_ctx)
 520                                         prof_ctx_destroy(ctx.p);
 521                                 return (NULL);
 522                         }
 523                         ql_elm_new(ret.p, cnts_link);
 524                         ql_elm_new(ret.p, lru_link);
 525                 }
 526                 /* Finish initializing ret. */
 527                 ret.p->ctx = ctx.p;
 528                 ret.p->epoch = 0;
 529                 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 530                 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
 531                         if (new_ctx)
 532                                 prof_ctx_destroy(ctx.p);
 533                         idalloc(ret.v);
 534                         return (NULL);
 535                 }
 536                 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
 537                 malloc_mutex_lock(&ctx.p->lock);
 538                 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
 539                 ctx.p->cnt_merged.curobjs--;
 540                 malloc_mutex_unlock(&ctx.p->lock);
 541         } else {
 542                 /* Move ret to the front of the LRU. */
 543                 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 544                 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
 545         }
 546
 547         return (ret.p);
 548 }
 549
 550 static bool
 551 prof_flush(bool propagate_err)
 552 {
 553         bool ret = false;
 554         ssize_t err;
 555
 556         err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
 557         if (err == -1) {
 558                 if (propagate_err == false) {
 559                         malloc_write("<jemalloc>: write() failed during heap "
 560                             "profile flush\n");
 561                         if (opt_abort)
 562                                 abort();
 563                 }
 564                 ret = true;
 565         }
 566         prof_dump_buf_end = 0;
 567
 568         return (ret);
 569 }
 570
 571 static bool
 572 prof_write(const char *s, bool propagate_err)
 573 {
 574         unsigned i, slen, n;
 575
 576         i = 0;
 577         slen = strlen(s);
 578         while (i < slen) {
 579                 /* Flush the buffer if it is full. */
 580                 if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
 581                         if (prof_flush(propagate_err) && propagate_err)
 582                                 return (true);
 583
 584                 if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
 585                         /* Finish writing. */
 586                         n = slen - i;
 587                 } else {
 588                         /* Write as much of s as will fit. */
 589                         n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
 590                 }
 591                 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
 592                 prof_dump_buf_end += n;
 593                 i += n;
 594         }
 595
 596         return (false);
 597 }
 598
 599 static void
 600 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
 601 {
 602         prof_thr_cnt_t *thr_cnt;
 603         prof_cnt_t tcnt;
 604
 605         malloc_mutex_lock(&ctx->lock);
 606
 607         memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
 608         ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
 609                 volatile unsigned *epoch = &thr_cnt->epoch;
 610
 611                 while (true) {
 612                         unsigned epoch0 = *epoch;
 613
 614                         /* Make sure epoch is even. */
 615                         if (epoch0 & 1U)
 616                                 continue;
 617
 618                         memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
 619
 620                         /* Terminate if epoch didn't change while reading. */
 621                         if (*epoch == epoch0)
 622                                 break;
 623                 }
 624
 625                 ctx->cnt_summed.curobjs += tcnt.curobjs;
 626                 ctx->cnt_summed.curbytes += tcnt.curbytes;
 627                 if (opt_prof_accum) {
 628                         ctx->cnt_summed.accumobjs += tcnt.accumobjs;
 629                         ctx->cnt_summed.accumbytes += tcnt.accumbytes;
 630                 }
 631         }
 632
 633         if (ctx->cnt_summed.curobjs != 0)
 634                 (*leak_nctx)++;
 635
 636         /* Add to cnt_all. */
 637         cnt_all->curobjs += ctx->cnt_summed.curobjs;
 638         cnt_all->curbytes += ctx->cnt_summed.curbytes;
 639         if (opt_prof_accum) {
 640                 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
 641                 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
 642         }
 643
 644         malloc_mutex_unlock(&ctx->lock);
 645 }
 646
 647 static void
 648 prof_ctx_destroy(prof_ctx_t *ctx)
 649 {
 650
 651         /*
 652          * Check that ctx is still unused by any thread cache before destroying
 653          * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
 654          * order to avoid a race condition with this function, as does
 655          * prof_ctx_merge() in order to avoid a race between the main body of
 656          * prof_ctx_merge() and entry into this function.
 657          */
 658         prof_enter();
 659         malloc_mutex_lock(&ctx->lock);
 660         if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
 661                 assert(ctx->cnt_merged.curbytes == 0);
 662                 assert(ctx->cnt_merged.accumobjs == 0);
 663                 assert(ctx->cnt_merged.accumbytes == 0);
 664                 /* Remove ctx from bt2ctx. */
 665                 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
 666                         assert(false);
 667                 prof_leave();
 668                 /* Destroy ctx. */
 669                 malloc_mutex_unlock(&ctx->lock);
 670                 bt_destroy(ctx->bt);
 671                 malloc_mutex_destroy(&ctx->lock);
 672                 idalloc(ctx);
 673         } else {
 674                 /*
 675                  * Compensate for increment in prof_ctx_merge() or
 676                  * prof_lookup().
 677                  */
 678                 ctx->cnt_merged.curobjs--;
 679                 malloc_mutex_unlock(&ctx->lock);
 680                 prof_leave();
 681         }
 682 }
 683
 684 static void
 685 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
 686 {
 687         bool destroy;
 688
 689         /* Merge cnt stats and detach from ctx. */
 690         malloc_mutex_lock(&ctx->lock);
 691         ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
 692         ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
 693         ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
 694         ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
 695         ql_remove(&ctx->cnts_ql, cnt, cnts_link);
 696         if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
 697             ctx->cnt_merged.curobjs == 0) {
 698                 /*
 699                  * Artificially raise ctx->cnt_merged.curobjs in order to keep
 700                  * another thread from winning the race to destroy ctx while
 701                  * this one has ctx->lock dropped.  Without this, it would be
 702                  * possible for another thread to:
 703                  *
 704                  * 1) Sample an allocation associated with ctx.
 705                  * 2) Deallocate the sampled object.
 706                  * 3) Successfully prof_ctx_destroy(ctx).
 707                  *
 708                  * The result would be that ctx no longer exists by the time
 709                  * this thread accesses it in prof_ctx_destroy().
 710                  */
 711                 ctx->cnt_merged.curobjs++;
 712                 destroy = true;
 713         } else
 714                 destroy = false;
 715         malloc_mutex_unlock(&ctx->lock);
 716         if (destroy)
 717                 prof_ctx_destroy(ctx);
 718 }
 719
 720 static bool
 721 prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
 722 {
 723         char buf[UMAX2S_BUFSIZE];
 724         unsigned i;
 725
 726         if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
 727                 assert(ctx->cnt_summed.curbytes == 0);
 728                 assert(ctx->cnt_summed.accumobjs == 0);
 729                 assert(ctx->cnt_summed.accumbytes == 0);
 730                 return (false);
 731         }
 732
 733         if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
 734             || prof_write(": ", propagate_err)
 735             || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
 736             propagate_err)
 737             || prof_write(" [", propagate_err)
 738             || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
 739             propagate_err)
 740             || prof_write(": ", propagate_err)
 741             || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
 742             propagate_err)
 743             || prof_write("] @", propagate_err))
 744                 return (true);
 745
 746         for (i = 0; i < bt->len; i++) {
 747                 if (prof_write(" 0x", propagate_err)
 748                     || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
 749                     propagate_err))
 750                         return (true);
 751         }
 752
 753         if (prof_write("\n", propagate_err))
 754                 return (true);
 755
 756         return (false);
 757 }
 758
 759 static bool
 760 prof_dump_maps(bool propagate_err)
 761 {
 762         int mfd;
 763         char buf[UMAX2S_BUFSIZE];
 764         char *s;
 765         unsigned i, slen;
 766         /*         /proc/<pid>/maps\0 */
 767         char mpath[6     + UMAX2S_BUFSIZE
 768                               + 5  + 1];
 769
 770         i = 0;
 771
 772         s = "/proc/";
 773         slen = strlen(s);
 774         memcpy(&mpath[i], s, slen);
 775         i += slen;
 776
 777         s = u2s(getpid(), 10, buf);
 778         slen = strlen(s);
 779         memcpy(&mpath[i], s, slen);
 780         i += slen;
 781
 782         s = "/maps";
 783         slen = strlen(s);
 784         memcpy(&mpath[i], s, slen);
 785         i += slen;
 786
 787         mpath[i] = '\0';
 788
 789         mfd = open(mpath, O_RDONLY);
 790         if (mfd != -1) {
 791                 ssize_t nread;
 792
 793                 if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
 794                     propagate_err)
 795                         return (true);
 796                 nread = 0;
 797                 do {
 798                         prof_dump_buf_end += nread;
 799                         if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
 800                                 /* Make space in prof_dump_buf before read(). */
 801                                 if (prof_flush(propagate_err) && propagate_err)
 802                                         return (true);
 803                         }
 804                         nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
 805                             PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
 806                 } while (nread > 0);
 807                 close(mfd);
 808         } else
 809                 return (true);
 810
 811         return (false);
 812 }
 813
 814 static bool
 815 prof_dump(const char *filename, bool leakcheck, bool propagate_err)
 816 {
 817         prof_cnt_t cnt_all;
 818         size_t tabind;
 819         union {
 820                 prof_bt_t       *p;
 821                 void            *v;
 822         } bt;
 823         union {
 824                 prof_ctx_t      *p;
 825                 void            *v;
 826         } ctx;
 827         char buf[UMAX2S_BUFSIZE];
 828         size_t leak_nctx;
 829
 830         prof_enter();
 831         prof_dump_fd = creat(filename, 0644);
 832         if (prof_dump_fd == -1) {
 833                 if (propagate_err == false) {
 834                         malloc_write("<jemalloc>: creat(\"");
 835                         malloc_write(filename);
 836                         malloc_write("\", 0644) failed\n");
 837                         if (opt_abort)
 838                                 abort();
 839                 }
 840                 goto ERROR;
 841         }
 842
 843         /* Merge per thread profile stats, and sum them in cnt_all. */
 844         memset(&cnt_all, 0, sizeof(prof_cnt_t));
 845         leak_nctx = 0;
 846         for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
 847                 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
 848
 849         /* Dump profile header. */
 850         if (prof_write("heap profile: ", propagate_err)
 851             || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
 852             || prof_write(": ", propagate_err)
 853             || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
 854             || prof_write(" [", propagate_err)
 855             || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
 856             || prof_write(": ", propagate_err)
 857             || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
 858                 goto ERROR;
 859
 860         if (opt_lg_prof_sample == 0) {
 861                 if (prof_write("] @ heapprofile\n", propagate_err))
 862                         goto ERROR;
 863         } else {
 864                 if (prof_write("] @ heap_v2/", propagate_err)
 865                     || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
 866                     buf), propagate_err)
 867                     || prof_write("\n", propagate_err))
 868                         goto ERROR;
 869         }
 870
 871         /* Dump  per ctx profile stats. */
 872         for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
 873             == false;) {
 874                 if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
 875                         goto ERROR;
 876         }
 877
 878         /* Dump /proc/<pid>/maps if possible. */
 879         if (prof_dump_maps(propagate_err))
 880                 goto ERROR;
 881
 882         if (prof_flush(propagate_err))
 883                 goto ERROR;
 884         close(prof_dump_fd);
 885         prof_leave();
 886
 887         if (leakcheck && cnt_all.curbytes != 0) {
 888                 malloc_write("<jemalloc>: Leak summary: ");
 889                 malloc_write(u2s(cnt_all.curbytes, 10, buf));
 890                 malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
 891                 malloc_write(u2s(cnt_all.curobjs, 10, buf));
 892                 malloc_write((cnt_all.curobjs != 1) ? " objects, " :
 893                     " object, ");
 894                 malloc_write(u2s(leak_nctx, 10, buf));
 895                 malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
 896                 malloc_write("<jemalloc>: Run pprof on \"");
 897                 malloc_write(filename);
 898                 malloc_write("\" for leak detail\n");
 899         }
 900
 901         return (false);
 902 ERROR:
 903         prof_leave();
 904         return (true);
 905 }
 906
 907 #define DUMP_FILENAME_BUFSIZE   (PATH_MAX+ UMAX2S_BUFSIZE               \
 908                                                + 1                      \
 909                                                 + UMAX2S_BUFSIZE        \
 910                                                      + 2                \
 911                                                        + UMAX2S_BUFSIZE \
 912                                                              + 5  + 1)
 913 static void
 914 prof_dump_filename(char *filename, char v, int64_t vseq)
 915 {
 916         char buf[UMAX2S_BUFSIZE];
 917         char *s;
 918         unsigned i, slen;
 919
 920         /*
 921          * Construct a filename of the form:
 922          *
 923          *   <prefix>.<pid>.<seq>.v<vseq>.heap\0
 924          */
 925
 926         i = 0;
 927
 928         s = opt_prof_prefix;
 929         slen = strlen(s);
 930         memcpy(&filename[i], s, slen);
 931         i += slen;
 932
 933         s = ".";
 934         slen = strlen(s);
 935         memcpy(&filename[i], s, slen);
 936         i += slen;
 937
 938         s = u2s(getpid(), 10, buf);
 939         slen = strlen(s);
 940         memcpy(&filename[i], s, slen);
 941         i += slen;
 942
 943         s = ".";
 944         slen = strlen(s);
 945         memcpy(&filename[i], s, slen);
 946         i += slen;
 947
 948         s = u2s(prof_dump_seq, 10, buf);
 949         prof_dump_seq++;
 950         slen = strlen(s);
 951         memcpy(&filename[i], s, slen);
 952         i += slen;
 953
 954         s = ".";
 955         slen = strlen(s);
 956         memcpy(&filename[i], s, slen);
 957         i += slen;
 958
 959         filename[i] = v;
 960         i++;
 961
 962         if (vseq != 0xffffffffffffffffLLU) {
 963                 s = u2s(vseq, 10, buf);
 964                 slen = strlen(s);
 965                 memcpy(&filename[i], s, slen);
 966                 i += slen;
 967         }
 968
 969         s = ".heap";
 970         slen = strlen(s);
 971         memcpy(&filename[i], s, slen);
 972         i += slen;
 973
 974         filename[i] = '\0';
 975 }
 976
 977 static void
 978 prof_fdump(void)
 979 {
 980         char filename[DUMP_FILENAME_BUFSIZE];
 981
 982         if (prof_booted == false)
 983                 return;
 984
 985         if (opt_prof_prefix[0] != '\0') {
 986                 malloc_mutex_lock(&prof_dump_seq_mtx);
 987                 prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
 988                 malloc_mutex_unlock(&prof_dump_seq_mtx);
 989                 prof_dump(filename, opt_prof_leak, false);
 990         }
 991 }
 992
 993 void
 994 prof_idump(void)
 995 {
 996         char filename[DUMP_FILENAME_BUFSIZE];
 997
 998         if (prof_booted == false)
 999                 return;
1000         malloc_mutex_lock(&enq_mtx);
1001         if (enq) {
1002                 enq_idump = true;
1003                 malloc_mutex_unlock(&enq_mtx);
1004                 return;
1005         }
1006         malloc_mutex_unlock(&enq_mtx);
1007
1008         if (opt_prof_prefix[0] != '\0') {
1009                 malloc_mutex_lock(&prof_dump_seq_mtx);
1010                 prof_dump_filename(filename, 'i', prof_dump_iseq);
1011                 prof_dump_iseq++;
1012                 malloc_mutex_unlock(&prof_dump_seq_mtx);
1013                 prof_dump(filename, false, false);
1014         }
1015 }
1016
1017 bool
1018 prof_mdump(const char *filename)
1019 {
1020         char filename_buf[DUMP_FILENAME_BUFSIZE];
1021
1022         if (opt_prof == false || prof_booted == false)
1023                 return (true);
1024
1025         if (filename == NULL) {
1026                 /* No filename specified, so automatically generate one. */
1027                 if (opt_prof_prefix[0] == '\0')
1028                         return (true);
1029                 malloc_mutex_lock(&prof_dump_seq_mtx);
1030                 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1031                 prof_dump_mseq++;
1032                 malloc_mutex_unlock(&prof_dump_seq_mtx);
1033                 filename = filename_buf;
1034         }
1035         return (prof_dump(filename, false, true));
1036 }
1037
1038 void
1039 prof_gdump(void)
1040 {
1041         char filename[DUMP_FILENAME_BUFSIZE];
1042
1043         if (prof_booted == false)
1044                 return;
1045         malloc_mutex_lock(&enq_mtx);
1046         if (enq) {
1047                 enq_gdump = true;
1048                 malloc_mutex_unlock(&enq_mtx);
1049                 return;
1050         }
1051         malloc_mutex_unlock(&enq_mtx);
1052
1053         if (opt_prof_prefix[0] != '\0') {
1054                 malloc_mutex_lock(&prof_dump_seq_mtx);
1055                 prof_dump_filename(filename, 'u', prof_dump_useq);
1056                 prof_dump_useq++;
1057                 malloc_mutex_unlock(&prof_dump_seq_mtx);
1058                 prof_dump(filename, false, false);
1059         }
1060 }
1061
1062 static void
1063 prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
1064 {
1065         size_t ret1, ret2;
1066         uint64_t h;
1067         prof_bt_t *bt = (prof_bt_t *)key;
1068
1069         assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
1070         assert(hash1 != NULL);
1071         assert(hash2 != NULL);
1072
1073         h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
1074         if (minbits <= 32) {
1075                 /*
1076                  * Avoid doing multiple hashes, since a single hash provides
1077                  * enough bits.
1078                  */
1079                 ret1 = h & ZU(0xffffffffU);
1080                 ret2 = h >> 32;
1081         } else {
1082                 ret1 = h;
1083                 ret2 = hash(bt->vec, bt->len * sizeof(void *),
1084                     0x8432a476666bbc13LLU);
1085         }
1086
1087         *hash1 = ret1;
1088         *hash2 = ret2;
1089 }
1090
1091 static bool
1092 prof_bt_keycomp(const void *k1, const void *k2)
1093 {
1094         const prof_bt_t *bt1 = (prof_bt_t *)k1;
1095         const prof_bt_t *bt2 = (prof_bt_t *)k2;
1096
1097         if (bt1->len != bt2->len)
1098                 return (false);
1099         return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1100 }
1101
1102 prof_tdata_t *
1103 prof_tdata_init(void)
1104 {
1105         prof_tdata_t *prof_tdata;
1106
1107         /* Initialize an empty cache for this thread. */
1108         prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1109         if (prof_tdata == NULL)
1110                 return (NULL);
1111
1112         if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1113             prof_bt_hash, prof_bt_keycomp)) {
1114                 idalloc(prof_tdata);
1115                 return (NULL);
1116         }
1117         ql_new(&prof_tdata->lru_ql);
1118
1119         prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
1120         if (prof_tdata->vec == NULL) {
1121                 ckh_delete(&prof_tdata->bt2cnt);
1122                 idalloc(prof_tdata);
1123                 return (NULL);
1124         }
1125
1126         prof_tdata->prn_state = 0;
1127         prof_tdata->threshold = 0;
1128         prof_tdata->accum = 0;
1129
1130         PROF_TCACHE_SET(prof_tdata);
1131
1132         return (prof_tdata);
1133 }
1134
1135 static void
1136 prof_tdata_cleanup(void *arg)
1137 {
1138         prof_thr_cnt_t *cnt;
1139         prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
1140
1141         /*
1142          * Delete the hash table.  All of its contents can still be iterated
1143          * over via the LRU.
1144          */
1145         ckh_delete(&prof_tdata->bt2cnt);
1146
1147         /* Iteratively merge cnt's into the global stats and delete them. */
1148         while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1149                 ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1150                 prof_ctx_merge(cnt->ctx, cnt);
1151                 idalloc(cnt);
1152         }
1153
1154         idalloc(prof_tdata->vec);
1155
1156         idalloc(prof_tdata);
1157         PROF_TCACHE_SET(NULL);
1158 }
1159
1160 void
1161 prof_boot0(void)
1162 {
1163
1164         memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1165             sizeof(PROF_PREFIX_DEFAULT));
1166 }
1167
1168 void
1169 prof_boot1(void)
1170 {
1171
1172         /*
1173          * opt_prof and prof_promote must be in their final state before any
1174          * arenas are initialized, so this function must be executed early.
1175          */
1176
1177         if (opt_prof_leak && opt_prof == false) {
1178                 /*
1179                  * Enable opt_prof, but in such a way that profiles are never
1180                  * automatically dumped.
1181                  */
1182                 opt_prof = true;
1183                 opt_prof_gdump = false;
1184                 prof_interval = 0;
1185         } else if (opt_prof) {
1186                 if (opt_lg_prof_interval >= 0) {
1187                         prof_interval = (((uint64_t)1U) <<
1188                             opt_lg_prof_interval);
1189                 } else
1190                         prof_interval = 0;
1191         }
1192
1193         prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
1194 }
1195
1196 bool
1197 prof_boot2(void)
1198 {
1199
1200         if (opt_prof) {
1201                 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1202                     prof_bt_keycomp))
1203                         return (true);
1204                 if (malloc_mutex_init(&bt2ctx_mtx))
1205                         return (true);
1206                 if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
1207                     != 0) {
1208                         malloc_write(
1209                             "<jemalloc>: Error in pthread_key_create()\n");
1210                         abort();
1211                 }
1212
1213                 prof_bt_max = (1U << opt_lg_prof_bt_max);
1214                 if (malloc_mutex_init(&prof_dump_seq_mtx))
1215                         return (true);
1216
1217                 if (malloc_mutex_init(&enq_mtx))
1218                         return (true);
1219                 enq = false;
1220                 enq_idump = false;
1221                 enq_gdump = false;
1222
1223                 if (atexit(prof_fdump) != 0) {
1224                         malloc_write("<jemalloc>: Error in atexit()\n");
1225                         if (opt_abort)
1226                                 abort();
1227                 }
1228         }
1229
1230 #ifdef JEMALLOC_PROF_LIBGCC
1231         /*
1232          * Cause the backtracing machinery to allocate its internal state
1233          * before enabling profiling.
1234          */
1235         _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1236 #endif
1237
1238         prof_booted = true;
1239
1240         return (false);
1241 }
1242
1243 /******************************************************************************/
1244 #endif /* JEMALLOC_PROF */