deps/jemalloc.orig/src/tcache.c

   1 #define JEMALLOC_TCACHE_C_
   2 #include "jemalloc/internal/jemalloc_internal.h"
   3 #ifdef JEMALLOC_TCACHE
   4 /******************************************************************************/
   5 /* Data. */
   6
   7 bool    opt_tcache = true;
   8 ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
   9 ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
  10
  11 tcache_bin_info_t       *tcache_bin_info;
  12 static unsigned         stack_nelms; /* Total stack elms per tcache. */
  13
  14 /* Map of thread-specific caches. */
  15 #ifndef NO_TLS
  16 __thread tcache_t       *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
  17 #endif
  18
  19 /*
  20  * Same contents as tcache, but initialized such that the TSD destructor is
  21  * called when a thread exits, so that the cache can be cleaned up.
  22  */
  23 pthread_key_t           tcache_tsd;
  24
  25 size_t                          nhbins;
  26 size_t                          tcache_maxclass;
  27 unsigned                        tcache_gc_incr;
  28
  29 /******************************************************************************/
  30 /* Function prototypes for non-inline static functions. */
  31
  32 static void     tcache_thread_cleanup(void *arg);
  33
  34 /******************************************************************************/
  35
  36 void *
  37 tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
  38 {
  39         void *ret;
  40
  41         arena_tcache_fill_small(tcache->arena, tbin, binind
  42 #ifdef JEMALLOC_PROF
  43             , tcache->prof_accumbytes
  44 #endif
  45             );
  46 #ifdef JEMALLOC_PROF
  47         tcache->prof_accumbytes = 0;
  48 #endif
  49         ret = tcache_alloc_easy(tbin);
  50
  51         return (ret);
  52 }
  53
  54 void
  55 tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
  56 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
  57     , tcache_t *tcache
  58 #endif
  59     )
  60 {
  61         void *ptr;
  62         unsigned i, nflush, ndeferred;
  63 #ifdef JEMALLOC_STATS
  64         bool merged_stats = false;
  65 #endif
  66
  67         assert(binind < nbins);
  68         assert(rem <= tbin->ncached);
  69
  70         for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
  71                 /* Lock the arena bin associated with the first object. */
  72                 arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
  73                     tbin->avail[0]);
  74                 arena_t *arena = chunk->arena;
  75                 arena_bin_t *bin = &arena->bins[binind];
  76
  77 #ifdef JEMALLOC_PROF
  78                 if (arena == tcache->arena) {
  79                         malloc_mutex_lock(&arena->lock);
  80                         arena_prof_accum(arena, tcache->prof_accumbytes);
  81                         malloc_mutex_unlock(&arena->lock);
  82                         tcache->prof_accumbytes = 0;
  83                 }
  84 #endif
  85
  86                 malloc_mutex_lock(&bin->lock);
  87 #ifdef JEMALLOC_STATS
  88                 if (arena == tcache->arena) {
  89                         assert(merged_stats == false);
  90                         merged_stats = true;
  91                         bin->stats.nflushes++;
  92                         bin->stats.nrequests += tbin->tstats.nrequests;
  93                         tbin->tstats.nrequests = 0;
  94                 }
  95 #endif
  96                 ndeferred = 0;
  97                 for (i = 0; i < nflush; i++) {
  98                         ptr = tbin->avail[i];
  99                         assert(ptr != NULL);
 100                         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 101                         if (chunk->arena == arena) {
 102                                 size_t pageind = ((uintptr_t)ptr -
 103                                     (uintptr_t)chunk) >> PAGE_SHIFT;
 104                                 arena_chunk_map_t *mapelm =
 105                                     &chunk->map[pageind-map_bias];
 106                                 arena_dalloc_bin(arena, chunk, ptr, mapelm);
 107                         } else {
 108                                 /*
 109                                  * This object was allocated via a different
 110                                  * arena bin than the one that is currently
 111                                  * locked.  Stash the object, so that it can be
 112                                  * handled in a future pass.
 113                                  */
 114                                 tbin->avail[ndeferred] = ptr;
 115                                 ndeferred++;
 116                         }
 117                 }
 118                 malloc_mutex_unlock(&bin->lock);
 119         }
 120 #ifdef JEMALLOC_STATS
 121         if (merged_stats == false) {
 122                 /*
 123                  * The flush loop didn't happen to flush to this thread's
 124                  * arena, so the stats didn't get merged.  Manually do so now.
 125                  */
 126                 arena_bin_t *bin = &tcache->arena->bins[binind];
 127                 malloc_mutex_lock(&bin->lock);
 128                 bin->stats.nflushes++;
 129                 bin->stats.nrequests += tbin->tstats.nrequests;
 130                 tbin->tstats.nrequests = 0;
 131                 malloc_mutex_unlock(&bin->lock);
 132         }
 133 #endif
 134
 135         memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
 136             rem * sizeof(void *));
 137         tbin->ncached = rem;
 138         if ((int)tbin->ncached < tbin->low_water)
 139                 tbin->low_water = tbin->ncached;
 140 }
 141
 142 void
 143 tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 144 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 145     , tcache_t *tcache
 146 #endif
 147     )
 148 {
 149         void *ptr;
 150         unsigned i, nflush, ndeferred;
 151 #ifdef JEMALLOC_STATS
 152         bool merged_stats = false;
 153 #endif
 154
 155         assert(binind < nhbins);
 156         assert(rem <= tbin->ncached);
 157
 158         for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 159                 /* Lock the arena associated with the first object. */
 160                 arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 161                     tbin->avail[0]);
 162                 arena_t *arena = chunk->arena;
 163
 164                 malloc_mutex_lock(&arena->lock);
 165 #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 166                 if (arena == tcache->arena) {
 167 #endif
 168 #ifdef JEMALLOC_PROF
 169                         arena_prof_accum(arena, tcache->prof_accumbytes);
 170                         tcache->prof_accumbytes = 0;
 171 #endif
 172 #ifdef JEMALLOC_STATS
 173                         merged_stats = true;
 174                         arena->stats.nrequests_large += tbin->tstats.nrequests;
 175                         arena->stats.lstats[binind - nbins].nrequests +=
 176                             tbin->tstats.nrequests;
 177                         tbin->tstats.nrequests = 0;
 178 #endif
 179 #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 180                 }
 181 #endif
 182                 ndeferred = 0;
 183                 for (i = 0; i < nflush; i++) {
 184                         ptr = tbin->avail[i];
 185                         assert(ptr != NULL);
 186                         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 187                         if (chunk->arena == arena)
 188                                 arena_dalloc_large(arena, chunk, ptr);
 189                         else {
 190                                 /*
 191                                  * This object was allocated via a different
 192                                  * arena than the one that is currently locked.
 193                                  * Stash the object, so that it can be handled
 194                                  * in a future pass.
 195                                  */
 196                                 tbin->avail[ndeferred] = ptr;
 197                                 ndeferred++;
 198                         }
 199                 }
 200                 malloc_mutex_unlock(&arena->lock);
 201         }
 202 #ifdef JEMALLOC_STATS
 203         if (merged_stats == false) {
 204                 /*
 205                  * The flush loop didn't happen to flush to this thread's
 206                  * arena, so the stats didn't get merged.  Manually do so now.
 207                  */
 208                 arena_t *arena = tcache->arena;
 209                 malloc_mutex_lock(&arena->lock);
 210                 arena->stats.nrequests_large += tbin->tstats.nrequests;
 211                 arena->stats.lstats[binind - nbins].nrequests +=
 212                     tbin->tstats.nrequests;
 213                 tbin->tstats.nrequests = 0;
 214                 malloc_mutex_unlock(&arena->lock);
 215         }
 216 #endif
 217
 218         memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
 219             rem * sizeof(void *));
 220         tbin->ncached = rem;
 221         if ((int)tbin->ncached < tbin->low_water)
 222                 tbin->low_water = tbin->ncached;
 223 }
 224
 225 tcache_t *
 226 tcache_create(arena_t *arena)
 227 {
 228         tcache_t *tcache;
 229         size_t size, stack_offset;
 230         unsigned i;
 231
 232         size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
 233         /* Naturally align the pointer stacks. */
 234         size = PTR_CEILING(size);
 235         stack_offset = size;
 236         size += stack_nelms * sizeof(void *);
 237         /*
 238          * Round up to the nearest multiple of the cacheline size, in order to
 239          * avoid the possibility of false cacheline sharing.
 240          *
 241          * That this works relies on the same logic as in ipalloc(), but we
 242          * cannot directly call ipalloc() here due to tcache bootstrapping
 243          * issues.
 244          */
 245         size = (size + CACHELINE_MASK) & (-CACHELINE);
 246
 247         if (size <= small_maxclass)
 248                 tcache = (tcache_t *)arena_malloc_small(arena, size, true);
 249         else if (size <= tcache_maxclass)
 250                 tcache = (tcache_t *)arena_malloc_large(arena, size, true);
 251         else
 252                 tcache = (tcache_t *)icalloc(size);
 253
 254         if (tcache == NULL)
 255                 return (NULL);
 256
 257 #ifdef JEMALLOC_STATS
 258         /* Link into list of extant tcaches. */
 259         malloc_mutex_lock(&arena->lock);
 260         ql_elm_new(tcache, link);
 261         ql_tail_insert(&arena->tcache_ql, tcache, link);
 262         malloc_mutex_unlock(&arena->lock);
 263 #endif
 264
 265         tcache->arena = arena;
 266         assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 267         for (i = 0; i < nhbins; i++) {
 268                 tcache->tbins[i].lg_fill_div = 1;
 269                 tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
 270                     (uintptr_t)stack_offset);
 271                 stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 272         }
 273
 274         TCACHE_SET(tcache);
 275
 276         return (tcache);
 277 }
 278
 279 void
 280 tcache_destroy(tcache_t *tcache)
 281 {
 282         unsigned i;
 283         size_t tcache_size;
 284
 285 #ifdef JEMALLOC_STATS
 286         /* Unlink from list of extant tcaches. */
 287         malloc_mutex_lock(&tcache->arena->lock);
 288         ql_remove(&tcache->arena->tcache_ql, tcache, link);
 289         malloc_mutex_unlock(&tcache->arena->lock);
 290         tcache_stats_merge(tcache, tcache->arena);
 291 #endif
 292
 293         for (i = 0; i < nbins; i++) {
 294                 tcache_bin_t *tbin = &tcache->tbins[i];
 295                 tcache_bin_flush_small(tbin, i, 0
 296 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 297                     , tcache
 298 #endif
 299                     );
 300
 301 #ifdef JEMALLOC_STATS
 302                 if (tbin->tstats.nrequests != 0) {
 303                         arena_t *arena = tcache->arena;
 304                         arena_bin_t *bin = &arena->bins[i];
 305                         malloc_mutex_lock(&bin->lock);
 306                         bin->stats.nrequests += tbin->tstats.nrequests;
 307                         malloc_mutex_unlock(&bin->lock);
 308                 }
 309 #endif
 310         }
 311
 312         for (; i < nhbins; i++) {
 313                 tcache_bin_t *tbin = &tcache->tbins[i];
 314                 tcache_bin_flush_large(tbin, i, 0
 315 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 316                     , tcache
 317 #endif
 318                     );
 319
 320 #ifdef JEMALLOC_STATS
 321                 if (tbin->tstats.nrequests != 0) {
 322                         arena_t *arena = tcache->arena;
 323                         malloc_mutex_lock(&arena->lock);
 324                         arena->stats.nrequests_large += tbin->tstats.nrequests;
 325                         arena->stats.lstats[i - nbins].nrequests +=
 326                             tbin->tstats.nrequests;
 327                         malloc_mutex_unlock(&arena->lock);
 328                 }
 329 #endif
 330         }
 331
 332 #ifdef JEMALLOC_PROF
 333         if (tcache->prof_accumbytes > 0) {
 334                 malloc_mutex_lock(&tcache->arena->lock);
 335                 arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
 336                 malloc_mutex_unlock(&tcache->arena->lock);
 337         }
 338 #endif
 339
 340         tcache_size = arena_salloc(tcache);
 341         if (tcache_size <= small_maxclass) {
 342                 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 343                 arena_t *arena = chunk->arena;
 344                 size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
 345                     PAGE_SHIFT;
 346                 arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
 347                 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 348                     (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
 349                     PAGE_SHIFT));
 350                 arena_bin_t *bin = run->bin;
 351
 352                 malloc_mutex_lock(&bin->lock);
 353                 arena_dalloc_bin(arena, chunk, tcache, mapelm);
 354                 malloc_mutex_unlock(&bin->lock);
 355         } else if (tcache_size <= tcache_maxclass) {
 356                 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 357                 arena_t *arena = chunk->arena;
 358
 359                 malloc_mutex_lock(&arena->lock);
 360                 arena_dalloc_large(arena, chunk, tcache);
 361                 malloc_mutex_unlock(&arena->lock);
 362         } else
 363                 idalloc(tcache);
 364 }
 365
 366 static void
 367 tcache_thread_cleanup(void *arg)
 368 {
 369         tcache_t *tcache = (tcache_t *)arg;
 370
 371         if (tcache == (void *)(uintptr_t)1) {
 372                 /*
 373                  * The previous time this destructor was called, we set the key
 374                  * to 1 so that other destructors wouldn't cause re-creation of
 375                  * the tcache.  This time, do nothing, so that the destructor
 376                  * will not be called again.
 377                  */
 378         } else if (tcache == (void *)(uintptr_t)2) {
 379                 /*
 380                  * Another destructor called an allocator function after this
 381                  * destructor was called.  Reset tcache to 1 in order to
 382                  * receive another callback.
 383                  */
 384                 TCACHE_SET((uintptr_t)1);
 385         } else if (tcache != NULL) {
 386                 assert(tcache != (void *)(uintptr_t)1);
 387                 tcache_destroy(tcache);
 388                 TCACHE_SET((uintptr_t)1);
 389         }
 390 }
 391
 392 #ifdef JEMALLOC_STATS
 393 void
 394 tcache_stats_merge(tcache_t *tcache, arena_t *arena)
 395 {
 396         unsigned i;
 397
 398         /* Merge and reset tcache stats. */
 399         for (i = 0; i < nbins; i++) {
 400                 arena_bin_t *bin = &arena->bins[i];
 401                 tcache_bin_t *tbin = &tcache->tbins[i];
 402                 malloc_mutex_lock(&bin->lock);
 403                 bin->stats.nrequests += tbin->tstats.nrequests;
 404                 malloc_mutex_unlock(&bin->lock);
 405                 tbin->tstats.nrequests = 0;
 406         }
 407
 408         for (; i < nhbins; i++) {
 409                 malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
 410                 tcache_bin_t *tbin = &tcache->tbins[i];
 411                 arena->stats.nrequests_large += tbin->tstats.nrequests;
 412                 lstats->nrequests += tbin->tstats.nrequests;
 413                 tbin->tstats.nrequests = 0;
 414         }
 415 }
 416 #endif
 417
 418 bool
 419 tcache_boot(void)
 420 {
 421
 422         if (opt_tcache) {
 423                 unsigned i;
 424
 425                 /*
 426                  * If necessary, clamp opt_lg_tcache_max, now that
 427                  * small_maxclass and arena_maxclass are known.
 428                  */
 429                 if (opt_lg_tcache_max < 0 || (1U <<
 430                     opt_lg_tcache_max) < small_maxclass)
 431                         tcache_maxclass = small_maxclass;
 432                 else if ((1U << opt_lg_tcache_max) > arena_maxclass)
 433                         tcache_maxclass = arena_maxclass;
 434                 else
 435                         tcache_maxclass = (1U << opt_lg_tcache_max);
 436
 437                 nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
 438
 439                 /* Initialize tcache_bin_info. */
 440                 tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
 441                     sizeof(tcache_bin_info_t));
 442                 if (tcache_bin_info == NULL)
 443                         return (true);
 444                 stack_nelms = 0;
 445                 for (i = 0; i < nbins; i++) {
 446                         if ((arena_bin_info[i].nregs << 1) <=
 447                             TCACHE_NSLOTS_SMALL_MAX) {
 448                                 tcache_bin_info[i].ncached_max =
 449                                     (arena_bin_info[i].nregs << 1);
 450                         } else {
 451                                 tcache_bin_info[i].ncached_max =
 452                                     TCACHE_NSLOTS_SMALL_MAX;
 453                         }
 454                         stack_nelms += tcache_bin_info[i].ncached_max;
 455                 }
 456                 for (; i < nhbins; i++) {
 457                         tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
 458                         stack_nelms += tcache_bin_info[i].ncached_max;
 459                 }
 460
 461                 /* Compute incremental GC event threshold. */
 462                 if (opt_lg_tcache_gc_sweep >= 0) {
 463                         tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
 464                             nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
 465                             0) ? 0 : 1);
 466                 } else
 467                         tcache_gc_incr = 0;
 468
 469                 if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
 470                     0) {
 471                         malloc_write(
 472                             "<jemalloc>: Error in pthread_key_create()\n");
 473                         abort();
 474                 }
 475         }
 476
 477         return (false);
 478 }
 479 /******************************************************************************/
 480 #endif /* JEMALLOC_TCACHE */