deps/jemalloc/src/prof.c

   1 #define JEMALLOC_PROF_C_
   2 #include "jemalloc/internal/jemalloc_internal.h"
   3 #ifdef JEMALLOC_PROF
   4 /******************************************************************************/
   5
   6 #ifdef JEMALLOC_PROF_LIBUNWIND
   7 #define UNW_LOCAL_ONLY
   8 #include <libunwind.h>
   9 #endif
  10
  11 #ifdef JEMALLOC_PROF_LIBGCC
  12 #include <unwind.h>
  13 #endif
  14
  15 /******************************************************************************/
  16 /* Data. */
  17
  18 bool            opt_prof = false;
  19 bool            opt_prof_active = true;
  20 size_t          opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
  21 size_t          opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
  22 ssize_t         opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
  23 bool            opt_prof_gdump = false;
  24 bool            opt_prof_leak = false;
  25 bool            opt_prof_accum = true;
  26 ssize_t         opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
  27 char            opt_prof_prefix[PATH_MAX + 1];
  28
  29 uint64_t        prof_interval;
  30 bool            prof_promote;
  31
  32 unsigned        prof_bt_max;
  33
  34 #ifndef NO_TLS
  35 __thread prof_tdata_t   *prof_tdata_tls
  36     JEMALLOC_ATTR(tls_model("initial-exec"));
  37 #endif
  38 pthread_key_t   prof_tdata_tsd;
  39
  40 /*
  41  * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
  42  * structure that knows about all backtraces currently captured.
  43  */
  44 static ckh_t            bt2ctx;
  45 static malloc_mutex_t   bt2ctx_mtx;
  46
  47 static malloc_mutex_t   prof_dump_seq_mtx;
  48 static uint64_t         prof_dump_seq;
  49 static uint64_t         prof_dump_iseq;
  50 static uint64_t         prof_dump_mseq;
  51 static uint64_t         prof_dump_useq;
  52
  53 /*
  54  * This buffer is rather large for stack allocation, so use a single buffer for
  55  * all profile dumps.  The buffer is implicitly protected by bt2ctx_mtx, since
  56  * it must be locked anyway during dumping.
  57  */
  58 static char             prof_dump_buf[PROF_DUMP_BUF_SIZE];
  59 static unsigned         prof_dump_buf_end;
  60 static int              prof_dump_fd;
  61
  62 /* Do not dump any profiles until bootstrapping is complete. */
  63 static bool             prof_booted = false;
  64
  65 static malloc_mutex_t   enq_mtx;
  66 static bool             enq;
  67 static bool             enq_idump;
  68 static bool             enq_gdump;
  69
  70 /******************************************************************************/
  71 /* Function prototypes for non-inline static functions. */
  72
  73 static prof_bt_t        *bt_dup(prof_bt_t *bt);
  74 static void     bt_destroy(prof_bt_t *bt);
  75 #ifdef JEMALLOC_PROF_LIBGCC
  76 static _Unwind_Reason_Code      prof_unwind_init_callback(
  77     struct _Unwind_Context *context, void *arg);
  78 static _Unwind_Reason_Code      prof_unwind_callback(
  79     struct _Unwind_Context *context, void *arg);
  80 #endif
  81 static bool     prof_flush(bool propagate_err);
  82 static bool     prof_write(const char *s, bool propagate_err);
  83 static void     prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
  84     size_t *leak_nctx);
  85 static void     prof_ctx_destroy(prof_ctx_t *ctx);
  86 static void     prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
  87 static bool     prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
  88     bool propagate_err);
  89 static bool     prof_dump_maps(bool propagate_err);
  90 static bool     prof_dump(const char *filename, bool leakcheck,
  91     bool propagate_err);
  92 static void     prof_dump_filename(char *filename, char v, int64_t vseq);
  93 static void     prof_fdump(void);
  94 static void     prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
  95     size_t *hash2);
  96 static bool     prof_bt_keycomp(const void *k1, const void *k2);
  97 static void     prof_tdata_cleanup(void *arg);
  98
  99 /******************************************************************************/
 100
 101 void
 102 bt_init(prof_bt_t *bt, void **vec)
 103 {
 104
 105         bt->vec = vec;
 106         bt->len = 0;
 107 }
 108
 109 static void
 110 bt_destroy(prof_bt_t *bt)
 111 {
 112
 113         idalloc(bt);
 114 }
 115
 116 static prof_bt_t *
 117 bt_dup(prof_bt_t *bt)
 118 {
 119         prof_bt_t *ret;
 120
 121         /*
 122          * Create a single allocation that has space for vec immediately
 123          * following the prof_bt_t structure.  The backtraces that get
 124          * stored in the backtrace caches are copied from stack-allocated
 125          * temporary variables, so size is known at creation time.  Making this
 126          * a contiguous object improves cache locality.
 127          */
 128         ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
 129             (bt->len * sizeof(void *)));
 130         if (ret == NULL)
 131                 return (NULL);
 132         ret->vec = (void **)((uintptr_t)ret +
 133             QUANTUM_CEILING(sizeof(prof_bt_t)));
 134         memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
 135         ret->len = bt->len;
 136
 137         return (ret);
 138 }
 139
 140 static inline void
 141 prof_enter(void)
 142 {
 143
 144         malloc_mutex_lock(&enq_mtx);
 145         enq = true;
 146         malloc_mutex_unlock(&enq_mtx);
 147
 148         malloc_mutex_lock(&bt2ctx_mtx);
 149 }
 150
 151 static inline void
 152 prof_leave(void)
 153 {
 154         bool idump, gdump;
 155
 156         malloc_mutex_unlock(&bt2ctx_mtx);
 157
 158         malloc_mutex_lock(&enq_mtx);
 159         enq = false;
 160         idump = enq_idump;
 161         enq_idump = false;
 162         gdump = enq_gdump;
 163         enq_gdump = false;
 164         malloc_mutex_unlock(&enq_mtx);
 165
 166         if (idump)
 167                 prof_idump();
 168         if (gdump)
 169                 prof_gdump();
 170 }
 171
 172 #ifdef JEMALLOC_PROF_LIBUNWIND
 173 void
 174 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 175 {
 176         unw_context_t uc;
 177         unw_cursor_t cursor;
 178         unsigned i;
 179         int err;
 180
 181         assert(bt->len == 0);
 182         assert(bt->vec != NULL);
 183         assert(max <= (1U << opt_lg_prof_bt_max));
 184
 185         unw_getcontext(&uc);
 186         unw_init_local(&cursor, &uc);
 187
 188         /* Throw away (nignore+1) stack frames, if that many exist. */
 189         for (i = 0; i < nignore + 1; i++) {
 190                 err = unw_step(&cursor);
 191                 if (err <= 0)
 192                         return;
 193         }
 194
 195         /*
 196          * Iterate over stack frames until there are no more, or until no space
 197          * remains in bt.
 198          */
 199         for (i = 0; i < max; i++) {
 200                 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
 201                 bt->len++;
 202                 err = unw_step(&cursor);
 203                 if (err <= 0)
 204                         break;
 205         }
 206 }
 207 #endif
 208 #ifdef JEMALLOC_PROF_LIBGCC
 209 static _Unwind_Reason_Code
 210 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
 211 {
 212
 213         return (_URC_NO_REASON);
 214 }
 215
 216 static _Unwind_Reason_Code
 217 prof_unwind_callback(struct _Unwind_Context *context, void *arg)
 218 {
 219         prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
 220
 221         if (data->nignore > 0)
 222                 data->nignore--;
 223         else {
 224                 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
 225                 data->bt->len++;
 226                 if (data->bt->len == data->max)
 227                         return (_URC_END_OF_STACK);
 228         }
 229
 230         return (_URC_NO_REASON);
 231 }
 232
 233 void
 234 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 235 {
 236         prof_unwind_data_t data = {bt, nignore, max};
 237
 238         _Unwind_Backtrace(prof_unwind_callback, &data);
 239 }
 240 #endif
 241 #ifdef JEMALLOC_PROF_GCC
 242 void
 243 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 244 {
 245 #define BT_FRAME(i)                                                     \
 246         if ((i) < nignore + max) {                                      \
 247                 void *p;                                                \
 248                 if (__builtin_frame_address(i) == 0)                    \
 249                         return;                                         \
 250                 p = __builtin_return_address(i);                        \
 251                 if (p == NULL)                                          \
 252                         return;                                         \
 253                 if (i >= nignore) {                                     \
 254                         bt->vec[(i) - nignore] = p;                     \
 255                         bt->len = (i) - nignore + 1;                    \
 256                 }                                                       \
 257         } else                                                          \
 258                 return;
 259
 260         assert(nignore <= 3);
 261         assert(max <= (1U << opt_lg_prof_bt_max));
 262
 263         BT_FRAME(0)
 264         BT_FRAME(1)
 265         BT_FRAME(2)
 266         BT_FRAME(3)
 267         BT_FRAME(4)
 268         BT_FRAME(5)
 269         BT_FRAME(6)
 270         BT_FRAME(7)
 271         BT_FRAME(8)
 272         BT_FRAME(9)
 273
 274         BT_FRAME(10)
 275         BT_FRAME(11)
 276         BT_FRAME(12)
 277         BT_FRAME(13)
 278         BT_FRAME(14)
 279         BT_FRAME(15)
 280         BT_FRAME(16)
 281         BT_FRAME(17)
 282         BT_FRAME(18)
 283         BT_FRAME(19)
 284
 285         BT_FRAME(20)
 286         BT_FRAME(21)
 287         BT_FRAME(22)
 288         BT_FRAME(23)
 289         BT_FRAME(24)
 290         BT_FRAME(25)
 291         BT_FRAME(26)
 292         BT_FRAME(27)
 293         BT_FRAME(28)
 294         BT_FRAME(29)
 295
 296         BT_FRAME(30)
 297         BT_FRAME(31)
 298         BT_FRAME(32)
 299         BT_FRAME(33)
 300         BT_FRAME(34)
 301         BT_FRAME(35)
 302         BT_FRAME(36)
 303         BT_FRAME(37)
 304         BT_FRAME(38)
 305         BT_FRAME(39)
 306
 307         BT_FRAME(40)
 308         BT_FRAME(41)
 309         BT_FRAME(42)
 310         BT_FRAME(43)
 311         BT_FRAME(44)
 312         BT_FRAME(45)
 313         BT_FRAME(46)
 314         BT_FRAME(47)
 315         BT_FRAME(48)
 316         BT_FRAME(49)
 317
 318         BT_FRAME(50)
 319         BT_FRAME(51)
 320         BT_FRAME(52)
 321         BT_FRAME(53)
 322         BT_FRAME(54)
 323         BT_FRAME(55)
 324         BT_FRAME(56)
 325         BT_FRAME(57)
 326         BT_FRAME(58)
 327         BT_FRAME(59)
 328
 329         BT_FRAME(60)
 330         BT_FRAME(61)
 331         BT_FRAME(62)
 332         BT_FRAME(63)
 333         BT_FRAME(64)
 334         BT_FRAME(65)
 335         BT_FRAME(66)
 336         BT_FRAME(67)
 337         BT_FRAME(68)
 338         BT_FRAME(69)
 339
 340         BT_FRAME(70)
 341         BT_FRAME(71)
 342         BT_FRAME(72)
 343         BT_FRAME(73)
 344         BT_FRAME(74)
 345         BT_FRAME(75)
 346         BT_FRAME(76)
 347         BT_FRAME(77)
 348         BT_FRAME(78)
 349         BT_FRAME(79)
 350
 351         BT_FRAME(80)
 352         BT_FRAME(81)
 353         BT_FRAME(82)
 354         BT_FRAME(83)
 355         BT_FRAME(84)
 356         BT_FRAME(85)
 357         BT_FRAME(86)
 358         BT_FRAME(87)
 359         BT_FRAME(88)
 360         BT_FRAME(89)
 361
 362         BT_FRAME(90)
 363         BT_FRAME(91)
 364         BT_FRAME(92)
 365         BT_FRAME(93)
 366         BT_FRAME(94)
 367         BT_FRAME(95)
 368         BT_FRAME(96)
 369         BT_FRAME(97)
 370         BT_FRAME(98)
 371         BT_FRAME(99)
 372
 373         BT_FRAME(100)
 374         BT_FRAME(101)
 375         BT_FRAME(102)
 376         BT_FRAME(103)
 377         BT_FRAME(104)
 378         BT_FRAME(105)
 379         BT_FRAME(106)
 380         BT_FRAME(107)
 381         BT_FRAME(108)
 382         BT_FRAME(109)
 383
 384         BT_FRAME(110)
 385         BT_FRAME(111)
 386         BT_FRAME(112)
 387         BT_FRAME(113)
 388         BT_FRAME(114)
 389         BT_FRAME(115)
 390         BT_FRAME(116)
 391         BT_FRAME(117)
 392         BT_FRAME(118)
 393         BT_FRAME(119)
 394
 395         BT_FRAME(120)
 396         BT_FRAME(121)
 397         BT_FRAME(122)
 398         BT_FRAME(123)
 399         BT_FRAME(124)
 400         BT_FRAME(125)
 401         BT_FRAME(126)
 402         BT_FRAME(127)
 403
 404         /* Extras to compensate for nignore. */
 405         BT_FRAME(128)
 406         BT_FRAME(129)
 407         BT_FRAME(130)
 408 #undef BT_FRAME
 409 }
 410 #endif
 411
 412 prof_thr_cnt_t *
 413 prof_lookup(prof_bt_t *bt)
 414 {
 415         union {
 416                 prof_thr_cnt_t  *p;
 417                 void            *v;
 418         } ret;
 419         prof_tdata_t *prof_tdata;
 420
 421         prof_tdata = PROF_TCACHE_GET();
 422         if (prof_tdata == NULL) {
 423                 prof_tdata = prof_tdata_init();
 424                 if (prof_tdata == NULL)
 425                         return (NULL);
 426         }
 427
 428         if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
 429                 union {
 430                         prof_bt_t       *p;
 431                         void            *v;
 432                 } btkey;
 433                 union {
 434                         prof_ctx_t      *p;
 435                         void            *v;
 436                 } ctx;
 437                 bool new_ctx;
 438
 439                 /*
 440                  * This thread's cache lacks bt.  Look for it in the global
 441                  * cache.
 442                  */
 443                 prof_enter();
 444                 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
 445                         /* bt has never been seen before.  Insert it. */
 446                         ctx.v = imalloc(sizeof(prof_ctx_t));
 447                         if (ctx.v == NULL) {
 448                                 prof_leave();
 449                                 return (NULL);
 450                         }
 451                         btkey.p = bt_dup(bt);
 452                         if (btkey.v == NULL) {
 453                                 prof_leave();
 454                                 idalloc(ctx.v);
 455                                 return (NULL);
 456                         }
 457                         ctx.p->bt = btkey.p;
 458                         if (malloc_mutex_init(&ctx.p->lock)) {
 459                                 prof_leave();
 460                                 idalloc(btkey.v);
 461                                 idalloc(ctx.v);
 462                                 return (NULL);
 463                         }
 464                         memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
 465                         ql_new(&ctx.p->cnts_ql);
 466                         if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
 467                                 /* OOM. */
 468                                 prof_leave();
 469                                 malloc_mutex_destroy(&ctx.p->lock);
 470                                 idalloc(btkey.v);
 471                                 idalloc(ctx.v);
 472                                 return (NULL);
 473                         }
 474                         /*
 475                          * Artificially raise curobjs, in order to avoid a race
 476                          * condition with prof_ctx_merge()/prof_ctx_destroy().
 477                          */
 478                         ctx.p->cnt_merged.curobjs++;
 479                         new_ctx = true;
 480                 } else
 481                         new_ctx = false;
 482                 prof_leave();
 483
 484                 /* Link a prof_thd_cnt_t into ctx for this thread. */
 485                 if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
 486                     == (ZU(1) << opt_lg_prof_tcmax)) {
 487                         assert(ckh_count(&prof_tdata->bt2cnt) > 0);
 488                         /*
 489                          * Flush the least recently used cnt in order to keep
 490                          * bt2cnt from becoming too large.
 491                          */
 492                         ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
 493                         assert(ret.v != NULL);
 494                         ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
 495                             NULL);
 496                         ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 497                         prof_ctx_merge(ret.p->ctx, ret.p);
 498                         /* ret can now be re-used. */
 499                 } else {
 500                         assert(opt_lg_prof_tcmax < 0 ||
 501                             ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
 502                             opt_lg_prof_tcmax));
 503                         /* Allocate and partially initialize a new cnt. */
 504                         ret.v = imalloc(sizeof(prof_thr_cnt_t));
 505                         if (ret.p == NULL) {
 506                                 if (new_ctx) {
 507                                         malloc_mutex_lock(&ctx.p->lock);
 508                                         ctx.p->cnt_merged.curobjs--;
 509                                         malloc_mutex_unlock(&ctx.p->lock);
 510                                 }
 511                                 return (NULL);
 512                         }
 513                         ql_elm_new(ret.p, cnts_link);
 514                         ql_elm_new(ret.p, lru_link);
 515                 }
 516                 /* Finish initializing ret. */
 517                 ret.p->ctx = ctx.p;
 518                 ret.p->epoch = 0;
 519                 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 520                 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
 521                         if (new_ctx) {
 522                                 malloc_mutex_lock(&ctx.p->lock);
 523                                 ctx.p->cnt_merged.curobjs--;
 524                                 malloc_mutex_unlock(&ctx.p->lock);
 525                         }
 526                         idalloc(ret.v);
 527                         return (NULL);
 528                 }
 529                 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
 530                 malloc_mutex_lock(&ctx.p->lock);
 531                 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
 532                 if (new_ctx)
 533                         ctx.p->cnt_merged.curobjs--;
 534                 malloc_mutex_unlock(&ctx.p->lock);
 535         } else {
 536                 /* Move ret to the front of the LRU. */
 537                 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 538                 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
 539         }
 540
 541         return (ret.p);
 542 }
 543
 544 static bool
 545 prof_flush(bool propagate_err)
 546 {
 547         bool ret = false;
 548         ssize_t err;
 549
 550         err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
 551         if (err == -1) {
 552                 if (propagate_err == false) {
 553                         malloc_write("<jemalloc>: write() failed during heap "
 554                             "profile flush\n");
 555                         if (opt_abort)
 556                                 abort();
 557                 }
 558                 ret = true;
 559         }
 560         prof_dump_buf_end = 0;
 561
 562         return (ret);
 563 }
 564
 565 static bool
 566 prof_write(const char *s, bool propagate_err)
 567 {
 568         unsigned i, slen, n;
 569
 570         i = 0;
 571         slen = strlen(s);
 572         while (i < slen) {
 573                 /* Flush the buffer if it is full. */
 574                 if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
 575                         if (prof_flush(propagate_err) && propagate_err)
 576                                 return (true);
 577
 578                 if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
 579                         /* Finish writing. */
 580                         n = slen - i;
 581                 } else {
 582                         /* Write as much of s as will fit. */
 583                         n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
 584                 }
 585                 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
 586                 prof_dump_buf_end += n;
 587                 i += n;
 588         }
 589
 590         return (false);
 591 }
 592
 593 static void
 594 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
 595 {
 596         prof_thr_cnt_t *thr_cnt;
 597         prof_cnt_t tcnt;
 598
 599         malloc_mutex_lock(&ctx->lock);
 600
 601         memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
 602         ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
 603                 volatile unsigned *epoch = &thr_cnt->epoch;
 604
 605                 while (true) {
 606                         unsigned epoch0 = *epoch;
 607
 608                         /* Make sure epoch is even. */
 609                         if (epoch0 & 1U)
 610                                 continue;
 611
 612                         memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
 613
 614                         /* Terminate if epoch didn't change while reading. */
 615                         if (*epoch == epoch0)
 616                                 break;
 617                 }
 618
 619                 ctx->cnt_summed.curobjs += tcnt.curobjs;
 620                 ctx->cnt_summed.curbytes += tcnt.curbytes;
 621                 if (opt_prof_accum) {
 622                         ctx->cnt_summed.accumobjs += tcnt.accumobjs;
 623                         ctx->cnt_summed.accumbytes += tcnt.accumbytes;
 624                 }
 625         }
 626
 627         if (ctx->cnt_summed.curobjs != 0)
 628                 (*leak_nctx)++;
 629
 630         /* Add to cnt_all. */
 631         cnt_all->curobjs += ctx->cnt_summed.curobjs;
 632         cnt_all->curbytes += ctx->cnt_summed.curbytes;
 633         if (opt_prof_accum) {
 634                 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
 635                 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
 636         }
 637
 638         malloc_mutex_unlock(&ctx->lock);
 639 }
 640
 641 static void
 642 prof_ctx_destroy(prof_ctx_t *ctx)
 643 {
 644
 645         /*
 646          * Check that ctx is still unused by any thread cache before destroying
 647          * it.  prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
 648          * avoid a race condition with this function, and prof_ctx_merge()
 649          * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
 650          * between the main body of prof_ctx_merge() and entry into this
 651          * function.
 652          */
 653         prof_enter();
 654         malloc_mutex_lock(&ctx->lock);
 655         if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
 656                 assert(ctx->cnt_merged.curbytes == 0);
 657                 assert(ctx->cnt_merged.accumobjs == 0);
 658                 assert(ctx->cnt_merged.accumbytes == 0);
 659                 /* Remove ctx from bt2ctx. */
 660                 ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
 661                 prof_leave();
 662                 /* Destroy ctx. */
 663                 malloc_mutex_unlock(&ctx->lock);
 664                 bt_destroy(ctx->bt);
 665                 malloc_mutex_destroy(&ctx->lock);
 666                 idalloc(ctx);
 667         } else {
 668                 /* Compensate for increment in prof_ctx_merge(). */
 669                 ctx->cnt_merged.curobjs--;
 670                 malloc_mutex_unlock(&ctx->lock);
 671                 prof_leave();
 672         }
 673 }
 674
 675 static void
 676 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
 677 {
 678         bool destroy;
 679
 680         /* Merge cnt stats and detach from ctx. */
 681         malloc_mutex_lock(&ctx->lock);
 682         ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
 683         ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
 684         ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
 685         ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
 686         ql_remove(&ctx->cnts_ql, cnt, cnts_link);
 687         if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
 688             ctx->cnt_merged.curobjs == 0) {
 689                 /*
 690                  * Artificially raise ctx->cnt_merged.curobjs in order to keep
 691                  * another thread from winning the race to destroy ctx while
 692                  * this one has ctx->lock dropped.  Without this, it would be
 693                  * possible for another thread to:
 694                  *
 695                  * 1) Sample an allocation associated with ctx.
 696                  * 2) Deallocate the sampled object.
 697                  * 3) Successfully prof_ctx_destroy(ctx).
 698                  *
 699                  * The result would be that ctx no longer exists by the time
 700                  * this thread accesses it in prof_ctx_destroy().
 701                  */
 702                 ctx->cnt_merged.curobjs++;
 703                 destroy = true;
 704         } else
 705                 destroy = false;
 706         malloc_mutex_unlock(&ctx->lock);
 707         if (destroy)
 708                 prof_ctx_destroy(ctx);
 709 }
 710
 711 static bool
 712 prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
 713 {
 714         char buf[UMAX2S_BUFSIZE];
 715         unsigned i;
 716
 717         if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
 718                 assert(ctx->cnt_summed.curbytes == 0);
 719                 assert(ctx->cnt_summed.accumobjs == 0);
 720                 assert(ctx->cnt_summed.accumbytes == 0);
 721                 return (false);
 722         }
 723
 724         if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
 725             || prof_write(": ", propagate_err)
 726             || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
 727             propagate_err)
 728             || prof_write(" [", propagate_err)
 729             || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
 730             propagate_err)
 731             || prof_write(": ", propagate_err)
 732             || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
 733             propagate_err)
 734             || prof_write("] @", propagate_err))
 735                 return (true);
 736
 737         for (i = 0; i < bt->len; i++) {
 738                 if (prof_write(" 0x", propagate_err)
 739                     || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
 740                     propagate_err))
 741                         return (true);
 742         }
 743
 744         if (prof_write("\n", propagate_err))
 745                 return (true);
 746
 747         return (false);
 748 }
 749
 750 static bool
 751 prof_dump_maps(bool propagate_err)
 752 {
 753         int mfd;
 754         char buf[UMAX2S_BUFSIZE];
 755         char *s;
 756         unsigned i, slen;
 757         /*         /proc/<pid>/maps\0 */
 758         char mpath[6     + UMAX2S_BUFSIZE
 759                               + 5  + 1];
 760
 761         i = 0;
 762
 763         s = "/proc/";
 764         slen = strlen(s);
 765         memcpy(&mpath[i], s, slen);
 766         i += slen;
 767
 768         s = u2s(getpid(), 10, buf);
 769         slen = strlen(s);
 770         memcpy(&mpath[i], s, slen);
 771         i += slen;
 772
 773         s = "/maps";
 774         slen = strlen(s);
 775         memcpy(&mpath[i], s, slen);
 776         i += slen;
 777
 778         mpath[i] = '\0';
 779
 780         mfd = open(mpath, O_RDONLY);
 781         if (mfd != -1) {
 782                 ssize_t nread;
 783
 784                 if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
 785                     propagate_err)
 786                         return (true);
 787                 nread = 0;
 788                 do {
 789                         prof_dump_buf_end += nread;
 790                         if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
 791                                 /* Make space in prof_dump_buf before read(). */
 792                                 if (prof_flush(propagate_err) && propagate_err)
 793                                         return (true);
 794                         }
 795                         nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
 796                             PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
 797                 } while (nread > 0);
 798                 close(mfd);
 799         } else
 800                 return (true);
 801
 802         return (false);
 803 }
 804
 805 static bool
 806 prof_dump(const char *filename, bool leakcheck, bool propagate_err)
 807 {
 808         prof_cnt_t cnt_all;
 809         size_t tabind;
 810         union {
 811                 prof_bt_t       *p;
 812                 void            *v;
 813         } bt;
 814         union {
 815                 prof_ctx_t      *p;
 816                 void            *v;
 817         } ctx;
 818         char buf[UMAX2S_BUFSIZE];
 819         size_t leak_nctx;
 820
 821         prof_enter();
 822         prof_dump_fd = creat(filename, 0644);
 823         if (prof_dump_fd == -1) {
 824                 if (propagate_err == false) {
 825                         malloc_write("<jemalloc>: creat(\"");
 826                         malloc_write(filename);
 827                         malloc_write("\", 0644) failed\n");
 828                         if (opt_abort)
 829                                 abort();
 830                 }
 831                 goto ERROR;
 832         }
 833
 834         /* Merge per thread profile stats, and sum them in cnt_all. */
 835         memset(&cnt_all, 0, sizeof(prof_cnt_t));
 836         leak_nctx = 0;
 837         for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
 838                 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
 839
 840         /* Dump profile header. */
 841         if (prof_write("heap profile: ", propagate_err)
 842             || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
 843             || prof_write(": ", propagate_err)
 844             || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
 845             || prof_write(" [", propagate_err)
 846             || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
 847             || prof_write(": ", propagate_err)
 848             || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
 849                 goto ERROR;
 850
 851         if (opt_lg_prof_sample == 0) {
 852                 if (prof_write("] @ heapprofile\n", propagate_err))
 853                         goto ERROR;
 854         } else {
 855                 if (prof_write("] @ heap_v2/", propagate_err)
 856                     || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
 857                     buf), propagate_err)
 858                     || prof_write("\n", propagate_err))
 859                         goto ERROR;
 860         }
 861
 862         /* Dump  per ctx profile stats. */
 863         for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
 864             == false;) {
 865                 if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
 866                         goto ERROR;
 867         }
 868
 869         /* Dump /proc/<pid>/maps if possible. */
 870         if (prof_dump_maps(propagate_err))
 871                 goto ERROR;
 872
 873         if (prof_flush(propagate_err))
 874                 goto ERROR;
 875         close(prof_dump_fd);
 876         prof_leave();
 877
 878         if (leakcheck && cnt_all.curbytes != 0) {
 879                 malloc_write("<jemalloc>: Leak summary: ");
 880                 malloc_write(u2s(cnt_all.curbytes, 10, buf));
 881                 malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
 882                 malloc_write(u2s(cnt_all.curobjs, 10, buf));
 883                 malloc_write((cnt_all.curobjs != 1) ? " objects, " :
 884                     " object, ");
 885                 malloc_write(u2s(leak_nctx, 10, buf));
 886                 malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
 887                 malloc_write("<jemalloc>: Run pprof on \"");
 888                 malloc_write(filename);
 889                 malloc_write("\" for leak detail\n");
 890         }
 891
 892         return (false);
 893 ERROR:
 894         prof_leave();
 895         return (true);
 896 }
 897
 898 #define DUMP_FILENAME_BUFSIZE   (PATH_MAX+ UMAX2S_BUFSIZE               \
 899                                                + 1                      \
 900                                                 + UMAX2S_BUFSIZE        \
 901                                                      + 2                \
 902                                                        + UMAX2S_BUFSIZE \
 903                                                              + 5  + 1)
 904 static void
 905 prof_dump_filename(char *filename, char v, int64_t vseq)
 906 {
 907         char buf[UMAX2S_BUFSIZE];
 908         char *s;
 909         unsigned i, slen;
 910
 911         /*
 912          * Construct a filename of the form:
 913          *
 914          *   <prefix>.<pid>.<seq>.v<vseq>.heap\0
 915          */
 916
 917         i = 0;
 918
 919         s = opt_prof_prefix;
 920         slen = strlen(s);
 921         memcpy(&filename[i], s, slen);
 922         i += slen;
 923
 924         s = ".";
 925         slen = strlen(s);
 926         memcpy(&filename[i], s, slen);
 927         i += slen;
 928
 929         s = u2s(getpid(), 10, buf);
 930         slen = strlen(s);
 931         memcpy(&filename[i], s, slen);
 932         i += slen;
 933
 934         s = ".";
 935         slen = strlen(s);
 936         memcpy(&filename[i], s, slen);
 937         i += slen;
 938
 939         s = u2s(prof_dump_seq, 10, buf);
 940         prof_dump_seq++;
 941         slen = strlen(s);
 942         memcpy(&filename[i], s, slen);
 943         i += slen;
 944
 945         s = ".";
 946         slen = strlen(s);
 947         memcpy(&filename[i], s, slen);
 948         i += slen;
 949
 950         filename[i] = v;
 951         i++;
 952
 953         if (vseq != 0xffffffffffffffffLLU) {
 954                 s = u2s(vseq, 10, buf);
 955                 slen = strlen(s);
 956                 memcpy(&filename[i], s, slen);
 957                 i += slen;
 958         }
 959
 960         s = ".heap";
 961         slen = strlen(s);
 962         memcpy(&filename[i], s, slen);
 963         i += slen;
 964
 965         filename[i] = '\0';
 966 }
 967
 968 static void
 969 prof_fdump(void)
 970 {
 971         char filename[DUMP_FILENAME_BUFSIZE];
 972
 973         if (prof_booted == false)
 974                 return;
 975
 976         if (opt_prof_prefix[0] != '\0') {
 977                 malloc_mutex_lock(&prof_dump_seq_mtx);
 978                 prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
 979                 malloc_mutex_unlock(&prof_dump_seq_mtx);
 980                 prof_dump(filename, opt_prof_leak, false);
 981         }
 982 }
 983
 984 void
 985 prof_idump(void)
 986 {
 987         char filename[DUMP_FILENAME_BUFSIZE];
 988
 989         if (prof_booted == false)
 990                 return;
 991         malloc_mutex_lock(&enq_mtx);
 992         if (enq) {
 993                 enq_idump = true;
 994                 malloc_mutex_unlock(&enq_mtx);
 995                 return;
 996         }
 997         malloc_mutex_unlock(&enq_mtx);
 998
 999         if (opt_prof_prefix[0] != '\0') {
1000                 malloc_mutex_lock(&prof_dump_seq_mtx);
1001                 prof_dump_filename(filename, 'i', prof_dump_iseq);
1002                 prof_dump_iseq++;
1003                 malloc_mutex_unlock(&prof_dump_seq_mtx);
1004                 prof_dump(filename, false, false);
1005         }
1006 }
1007
1008 bool
1009 prof_mdump(const char *filename)
1010 {
1011         char filename_buf[DUMP_FILENAME_BUFSIZE];
1012
1013         if (opt_prof == false || prof_booted == false)
1014                 return (true);
1015
1016         if (filename == NULL) {
1017                 /* No filename specified, so automatically generate one. */
1018                 if (opt_prof_prefix[0] == '\0')
1019                         return (true);
1020                 malloc_mutex_lock(&prof_dump_seq_mtx);
1021                 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1022                 prof_dump_mseq++;
1023                 malloc_mutex_unlock(&prof_dump_seq_mtx);
1024                 filename = filename_buf;
1025         }
1026         return (prof_dump(filename, false, true));
1027 }
1028
1029 void
1030 prof_gdump(void)
1031 {
1032         char filename[DUMP_FILENAME_BUFSIZE];
1033
1034         if (prof_booted == false)
1035                 return;
1036         malloc_mutex_lock(&enq_mtx);
1037         if (enq) {
1038                 enq_gdump = true;
1039                 malloc_mutex_unlock(&enq_mtx);
1040                 return;
1041         }
1042         malloc_mutex_unlock(&enq_mtx);
1043
1044         if (opt_prof_prefix[0] != '\0') {
1045                 malloc_mutex_lock(&prof_dump_seq_mtx);
1046                 prof_dump_filename(filename, 'u', prof_dump_useq);
1047                 prof_dump_useq++;
1048                 malloc_mutex_unlock(&prof_dump_seq_mtx);
1049                 prof_dump(filename, false, false);
1050         }
1051 }
1052
1053 static void
1054 prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
1055 {
1056         size_t ret1, ret2;
1057         uint64_t h;
1058         prof_bt_t *bt = (prof_bt_t *)key;
1059
1060         assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
1061         assert(hash1 != NULL);
1062         assert(hash2 != NULL);
1063
1064         h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
1065         if (minbits <= 32) {
1066                 /*
1067                  * Avoid doing multiple hashes, since a single hash provides
1068                  * enough bits.
1069                  */
1070                 ret1 = h & ZU(0xffffffffU);
1071                 ret2 = h >> 32;
1072         } else {
1073                 ret1 = h;
1074                 ret2 = hash(bt->vec, bt->len * sizeof(void *),
1075                     0x8432a476666bbc13U);
1076         }
1077
1078         *hash1 = ret1;
1079         *hash2 = ret2;
1080 }
1081
1082 static bool
1083 prof_bt_keycomp(const void *k1, const void *k2)
1084 {
1085         const prof_bt_t *bt1 = (prof_bt_t *)k1;
1086         const prof_bt_t *bt2 = (prof_bt_t *)k2;
1087
1088         if (bt1->len != bt2->len)
1089                 return (false);
1090         return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1091 }
1092
1093 prof_tdata_t *
1094 prof_tdata_init(void)
1095 {
1096         prof_tdata_t *prof_tdata;
1097
1098         /* Initialize an empty cache for this thread. */
1099         prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1100         if (prof_tdata == NULL)
1101                 return (NULL);
1102
1103         if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1104             prof_bt_hash, prof_bt_keycomp)) {
1105                 idalloc(prof_tdata);
1106                 return (NULL);
1107         }
1108         ql_new(&prof_tdata->lru_ql);
1109
1110         prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
1111         if (prof_tdata->vec == NULL) {
1112
1113                 ckh_delete(&prof_tdata->bt2cnt);
1114                 idalloc(prof_tdata);
1115                 return (NULL);
1116         }
1117
1118         prof_tdata->prn_state = 0;
1119         prof_tdata->threshold = 0;
1120         prof_tdata->accum = 0;
1121
1122         PROF_TCACHE_SET(prof_tdata);
1123
1124         return (prof_tdata);
1125 }
1126
1127 static void
1128 prof_tdata_cleanup(void *arg)
1129 {
1130         prof_tdata_t *prof_tdata;
1131
1132         prof_tdata = PROF_TCACHE_GET();
1133         if (prof_tdata != NULL) {
1134                 prof_thr_cnt_t *cnt;
1135
1136                 /*
1137                  * Delete the hash table.  All of its contents can still be
1138                  * iterated over via the LRU.
1139                  */
1140                 ckh_delete(&prof_tdata->bt2cnt);
1141
1142                 /*
1143                  * Iteratively merge cnt's into the global stats and delete
1144                  * them.
1145                  */
1146                 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1147                         prof_ctx_merge(cnt->ctx, cnt);
1148                         ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1149                         idalloc(cnt);
1150                 }
1151
1152                 idalloc(prof_tdata->vec);
1153
1154                 idalloc(prof_tdata);
1155                 PROF_TCACHE_SET(NULL);
1156         }
1157 }
1158
1159 void
1160 prof_boot0(void)
1161 {
1162
1163         memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1164             sizeof(PROF_PREFIX_DEFAULT));
1165 }
1166
1167 void
1168 prof_boot1(void)
1169 {
1170
1171         /*
1172          * opt_prof and prof_promote must be in their final state before any
1173          * arenas are initialized, so this function must be executed early.
1174          */
1175
1176         if (opt_prof_leak && opt_prof == false) {
1177                 /*
1178                  * Enable opt_prof, but in such a way that profiles are never
1179                  * automatically dumped.
1180                  */
1181                 opt_prof = true;
1182                 opt_prof_gdump = false;
1183                 prof_interval = 0;
1184         } else if (opt_prof) {
1185                 if (opt_lg_prof_interval >= 0) {
1186                         prof_interval = (((uint64_t)1U) <<
1187                             opt_lg_prof_interval);
1188                 } else
1189                         prof_interval = 0;
1190         }
1191
1192         prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
1193 }
1194
1195 bool
1196 prof_boot2(void)
1197 {
1198
1199         if (opt_prof) {
1200                 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1201                     prof_bt_keycomp))
1202                         return (true);
1203                 if (malloc_mutex_init(&bt2ctx_mtx))
1204                         return (true);
1205                 if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
1206                     != 0) {
1207                         malloc_write(
1208                             "<jemalloc>: Error in pthread_key_create()\n");
1209                         abort();
1210                 }
1211
1212                 prof_bt_max = (1U << opt_lg_prof_bt_max);
1213                 if (malloc_mutex_init(&prof_dump_seq_mtx))
1214                         return (true);
1215
1216                 if (malloc_mutex_init(&enq_mtx))
1217                         return (true);
1218                 enq = false;
1219                 enq_idump = false;
1220                 enq_gdump = false;
1221
1222                 if (atexit(prof_fdump) != 0) {
1223                         malloc_write("<jemalloc>: Error in atexit()\n");
1224                         if (opt_abort)
1225                                 abort();
1226                 }
1227         }
1228
1229 #ifdef JEMALLOC_PROF_LIBGCC
1230         /*
1231          * Cause the backtracing machinery to allocate its internal state
1232          * before enabling profiling.
1233          */
1234         _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1235 #endif
1236
1237         prof_booted = true;
1238
1239         return (false);
1240 }
1241
1242 /******************************************************************************/
1243 #endif /* JEMALLOC_PROF */