deps/jemalloc/include/jemalloc/internal/arena.h

   1 /******************************************************************************/
   2 #ifdef JEMALLOC_H_TYPES
   3
   4 /*
   5  * Subpages are an artificially designated partitioning of pages.  Their only
   6  * purpose is to support subpage-spaced size classes.
   7  *
   8  * There must be at least 4 subpages per page, due to the way size classes are
   9  * handled.
  10  */
  11 #define LG_SUBPAGE              8
  12 #define SUBPAGE                 ((size_t)(1U << LG_SUBPAGE))
  13 #define SUBPAGE_MASK            (SUBPAGE - 1)
  14
  15 /* Return the smallest subpage multiple that is >= s. */
  16 #define SUBPAGE_CEILING(s)                                              \
  17         (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
  18
  19 #ifdef JEMALLOC_TINY
  20    /* Smallest size class to support. */
  21 #  define LG_TINY_MIN           LG_SIZEOF_PTR
  22 #  define TINY_MIN              (1U << LG_TINY_MIN)
  23 #endif
  24
  25 /*
  26  * Maximum size class that is a multiple of the quantum, but not (necessarily)
  27  * a power of 2.  Above this size, allocations are rounded up to the nearest
  28  * power of 2.
  29  */
  30 #define LG_QSPACE_MAX_DEFAULT   7
  31
  32 /*
  33  * Maximum size class that is a multiple of the cacheline, but not (necessarily)
  34  * a power of 2.  Above this size, allocations are rounded up to the nearest
  35  * power of 2.
  36  */
  37 #define LG_CSPACE_MAX_DEFAULT   9
  38
  39 /*
  40  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
  41  * as small as possible such that this setting is still honored, without
  42  * violating other constraints.  The goal is to make runs as small as possible
  43  * without exceeding a per run external fragmentation threshold.
  44  *
  45  * We use binary fixed point math for overhead computations, where the binary
  46  * point is implicitly RUN_BFP bits to the left.
  47  *
  48  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
  49  * honored for some/all object sizes, since when heap profiling is enabled
  50  * there is one pointer of header overhead per object (plus a constant).  This
  51  * constraint is relaxed (ignored) for runs that are so small that the
  52  * per-region overhead is greater than:
  53  *
  54  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
  55  */
  56 #define RUN_BFP                 12
  57 /*                                    \/   Implicit binary fixed point. */
  58 #define RUN_MAX_OVRHD           0x0000003dU
  59 #define RUN_MAX_OVRHD_RELAX     0x00001800U
  60
  61 /* Maximum number of regions in one run. */
  62 #define LG_RUN_MAXREGS          11
  63 #define RUN_MAXREGS             (1U << LG_RUN_MAXREGS)
  64
  65 /*
  66  * The minimum ratio of active:dirty pages per arena is computed as:
  67  *
  68  *   (nactive >> opt_lg_dirty_mult) >= ndirty
  69  *
  70  * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
  71  * times as many active pages as dirty pages.
  72  */
  73 #define LG_DIRTY_MULT_DEFAULT   5
  74
  75 typedef struct arena_chunk_map_s arena_chunk_map_t;
  76 typedef struct arena_chunk_s arena_chunk_t;
  77 typedef struct arena_run_s arena_run_t;
  78 typedef struct arena_bin_info_s arena_bin_info_t;
  79 typedef struct arena_bin_s arena_bin_t;
  80 typedef struct arena_s arena_t;
  81
  82 #endif /* JEMALLOC_H_TYPES */
  83 /******************************************************************************/
  84 #ifdef JEMALLOC_H_STRUCTS
  85
  86 /* Each element of the chunk map corresponds to one page within the chunk. */
  87 struct arena_chunk_map_s {
  88         union {
  89                 /*
  90                  * Linkage for run trees.  There are two disjoint uses:
  91                  *
  92                  * 1) arena_t's runs_avail_{clean,dirty} trees.
  93                  * 2) arena_run_t conceptually uses this linkage for in-use
  94                  *    non-full runs, rather than directly embedding linkage.
  95                  */
  96                 rb_node(arena_chunk_map_t)      rb_link;
  97                 /*
  98                  * List of runs currently in purgatory.  arena_chunk_purge()
  99                  * temporarily allocates runs that contain dirty pages while
 100                  * purging, so that other threads cannot use the runs while the
 101                  * purging thread is operating without the arena lock held.
 102                  */
 103                 ql_elm(arena_chunk_map_t)       ql_link;
 104         }                               u;
 105
 106 #ifdef JEMALLOC_PROF
 107         /* Profile counters, used for large object runs. */
 108         prof_ctx_t                      *prof_ctx;
 109 #endif
 110
 111         /*
 112          * Run address (or size) and various flags are stored together.  The bit
 113          * layout looks like (assuming 32-bit system):
 114          *
 115          *   ???????? ???????? ????---- ----dula
 116          *
 117          * ? : Unallocated: Run address for first/last pages, unset for internal
 118          *                  pages.
 119          *     Small: Run page offset.
 120          *     Large: Run size for first page, unset for trailing pages.
 121          * - : Unused.
 122          * d : dirty?
 123          * u : unzeroed?
 124          * l : large?
 125          * a : allocated?
 126          *
 127          * Following are example bit patterns for the three types of runs.
 128          *
 129          * p : run page offset
 130          * s : run size
 131          * c : (binind+1) for size class (used only if prof_promote is true)
 132          * x : don't care
 133          * - : 0
 134          * + : 1
 135          * [DULA] : bit set
 136          * [dula] : bit unset
 137          *
 138          *   Unallocated (clean):
 139          *     ssssssss ssssssss ssss---- ----du-a
 140          *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
 141          *     ssssssss ssssssss ssss---- ----dU-a
 142          *
 143          *   Unallocated (dirty):
 144          *     ssssssss ssssssss ssss---- ----D--a
 145          *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
 146          *     ssssssss ssssssss ssss---- ----D--a
 147          *
 148          *   Small:
 149          *     pppppppp pppppppp pppp---- ----d--A
 150          *     pppppppp pppppppp pppp---- -------A
 151          *     pppppppp pppppppp pppp---- ----d--A
 152          *
 153          *   Large:
 154          *     ssssssss ssssssss ssss---- ----D-LA
 155          *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
 156          *     -------- -------- -------- ----D-LA
 157          *
 158          *   Large (sampled, size <= PAGE_SIZE):
 159          *     ssssssss ssssssss sssscccc ccccD-LA
 160          *
 161          *   Large (not sampled, size == PAGE_SIZE):
 162          *     ssssssss ssssssss ssss---- ----D-LA
 163          */
 164         size_t                          bits;
 165 #ifdef JEMALLOC_PROF
 166 #define CHUNK_MAP_CLASS_SHIFT   4
 167 #define CHUNK_MAP_CLASS_MASK    ((size_t)0xff0U)
 168 #endif
 169 #define CHUNK_MAP_FLAGS_MASK    ((size_t)0xfU)
 170 #define CHUNK_MAP_DIRTY         ((size_t)0x8U)
 171 #define CHUNK_MAP_UNZEROED      ((size_t)0x4U)
 172 #define CHUNK_MAP_LARGE         ((size_t)0x2U)
 173 #define CHUNK_MAP_ALLOCATED     ((size_t)0x1U)
 174 #define CHUNK_MAP_KEY           CHUNK_MAP_ALLOCATED
 175 };
 176 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
 177 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 178
 179 /* Arena chunk header. */
 180 struct arena_chunk_s {
 181         /* Arena that owns the chunk. */
 182         arena_t         *arena;
 183
 184         /* Linkage for the arena's chunks_dirty list. */
 185         ql_elm(arena_chunk_t) link_dirty;
 186
 187         /*
 188          * True if the chunk is currently in the chunks_dirty list, due to
 189          * having at some point contained one or more dirty pages.  Removal
 190          * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
 191          */
 192         bool            dirtied;
 193
 194         /* Number of dirty pages. */
 195         size_t          ndirty;
 196
 197         /*
 198          * Map of pages within chunk that keeps track of free/large/small.  The
 199          * first map_bias entries are omitted, since the chunk header does not
 200          * need to be tracked in the map.  This omission saves a header page
 201          * for common chunk sizes (e.g. 4 MiB).
 202          */
 203         arena_chunk_map_t map[1]; /* Dynamically sized. */
 204 };
 205 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 206
 207 struct arena_run_s {
 208 #ifdef JEMALLOC_DEBUG
 209         uint32_t        magic;
 210 #  define ARENA_RUN_MAGIC 0x384adf93
 211 #endif
 212
 213         /* Bin this run is associated with. */
 214         arena_bin_t     *bin;
 215
 216         /* Index of next region that has never been allocated, or nregs. */
 217         uint32_t        nextind;
 218
 219         /* Number of free regions in run. */
 220         unsigned        nfree;
 221 };
 222
 223 /*
 224  * Read-only information associated with each element of arena_t's bins array
 225  * is stored separately, partly to reduce memory usage (only one copy, rather
 226  * than one per arena), but mainly to avoid false cacheline sharing.
 227  */
 228 struct arena_bin_info_s {
 229         /* Size of regions in a run for this bin's size class. */
 230         size_t          reg_size;
 231
 232         /* Total size of a run for this bin's size class. */
 233         size_t          run_size;
 234
 235         /* Total number of regions in a run for this bin's size class. */
 236         uint32_t        nregs;
 237
 238         /*
 239          * Offset of first bitmap_t element in a run header for this bin's size
 240          * class.
 241          */
 242         uint32_t        bitmap_offset;
 243
 244         /*
 245          * Metadata used to manipulate bitmaps for runs associated with this
 246          * bin.
 247          */
 248         bitmap_info_t   bitmap_info;
 249
 250 #ifdef JEMALLOC_PROF
 251         /*
 252          * Offset of first (prof_ctx_t *) in a run header for this bin's size
 253          * class, or 0 if (opt_prof == false).
 254          */
 255         uint32_t        ctx0_offset;
 256 #endif
 257
 258         /* Offset of first region in a run for this bin's size class. */
 259         uint32_t        reg0_offset;
 260 };
 261
 262 struct arena_bin_s {
 263         /*
 264          * All operations on runcur, runs, and stats require that lock be
 265          * locked.  Run allocation/deallocation are protected by the arena lock,
 266          * which may be acquired while holding one or more bin locks, but not
 267          * vise versa.
 268          */
 269         malloc_mutex_t  lock;
 270
 271         /*
 272          * Current run being used to service allocations of this bin's size
 273          * class.
 274          */
 275         arena_run_t     *runcur;
 276
 277         /*
 278          * Tree of non-full runs.  This tree is used when looking for an
 279          * existing run when runcur is no longer usable.  We choose the
 280          * non-full run that is lowest in memory; this policy tends to keep
 281          * objects packed well, and it can also help reduce the number of
 282          * almost-empty chunks.
 283          */
 284         arena_run_tree_t runs;
 285
 286 #ifdef JEMALLOC_STATS
 287         /* Bin statistics. */
 288         malloc_bin_stats_t stats;
 289 #endif
 290 };
 291
 292 struct arena_s {
 293 #ifdef JEMALLOC_DEBUG
 294         uint32_t                magic;
 295 #  define ARENA_MAGIC 0x947d3d24
 296 #endif
 297
 298         /* This arena's index within the arenas array. */
 299         unsigned                ind;
 300
 301         /*
 302          * Number of threads currently assigned to this arena.  This field is
 303          * protected by arenas_lock.
 304          */
 305         unsigned                nthreads;
 306
 307         /*
 308          * There are three classes of arena operations from a locking
 309          * perspective:
 310          * 1) Thread asssignment (modifies nthreads) is protected by
 311          *    arenas_lock.
 312          * 2) Bin-related operations are protected by bin locks.
 313          * 3) Chunk- and run-related operations are protected by this mutex.
 314          */
 315         malloc_mutex_t          lock;
 316
 317 #ifdef JEMALLOC_STATS
 318         arena_stats_t           stats;
 319 #  ifdef JEMALLOC_TCACHE
 320         /*
 321          * List of tcaches for extant threads associated with this arena.
 322          * Stats from these are merged incrementally, and at exit.
 323          */
 324         ql_head(tcache_t)       tcache_ql;
 325 #  endif
 326 #endif
 327
 328 #ifdef JEMALLOC_PROF
 329         uint64_t                prof_accumbytes;
 330 #endif
 331
 332         /* List of dirty-page-containing chunks this arena manages. */
 333         ql_head(arena_chunk_t)  chunks_dirty;
 334
 335         /*
 336          * In order to avoid rapid chunk allocation/deallocation when an arena
 337          * oscillates right on the cusp of needing a new chunk, cache the most
 338          * recently freed chunk.  The spare is left in the arena's chunk trees
 339          * until it is deleted.
 340          *
 341          * There is one spare chunk per arena, rather than one spare total, in
 342          * order to avoid interactions between multiple threads that could make
 343          * a single spare inadequate.
 344          */
 345         arena_chunk_t           *spare;
 346
 347         /* Number of pages in active runs. */
 348         size_t                  nactive;
 349
 350         /*
 351          * Current count of pages within unused runs that are potentially
 352          * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
 353          * By tracking this, we can institute a limit on how much dirty unused
 354          * memory is mapped for each arena.
 355          */
 356         size_t                  ndirty;
 357
 358         /*
 359          * Approximate number of pages being purged.  It is possible for
 360          * multiple threads to purge dirty pages concurrently, and they use
 361          * npurgatory to indicate the total number of pages all threads are
 362          * attempting to purge.
 363          */
 364         size_t                  npurgatory;
 365
 366         /*
 367          * Size/address-ordered trees of this arena's available runs.  The trees
 368          * are used for first-best-fit run allocation.  The dirty tree contains
 369          * runs with dirty pages (i.e. very likely to have been touched and
 370          * therefore have associated physical pages), whereas the clean tree
 371          * contains runs with pages that either have no associated physical
 372          * pages, or have pages that the kernel may recycle at any time due to
 373          * previous madvise(2) calls.  The dirty tree is used in preference to
 374          * the clean tree for allocations, because using dirty pages reduces
 375          * the amount of dirty purging necessary to keep the active:dirty page
 376          * ratio below the purge threshold.
 377          */
 378         arena_avail_tree_t      runs_avail_clean;
 379         arena_avail_tree_t      runs_avail_dirty;
 380
 381         /*
 382          * bins is used to store trees of free regions of the following sizes,
 383          * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
 384          * default MALLOC_CONF.
 385          *
 386          *   bins[i] |   size |
 387          *   --------+--------+
 388          *        0  |      8 |
 389          *   --------+--------+
 390          *        1  |     16 |
 391          *        2  |     32 |
 392          *        3  |     48 |
 393          *           :        :
 394          *        6  |     96 |
 395          *        7  |    112 |
 396          *        8  |    128 |
 397          *   --------+--------+
 398          *        9  |    192 |
 399          *       10  |    256 |
 400          *       11  |    320 |
 401          *       12  |    384 |
 402          *       13  |    448 |
 403          *       14  |    512 |
 404          *   --------+--------+
 405          *       15  |    768 |
 406          *       16  |   1024 |
 407          *       17  |   1280 |
 408          *           :        :
 409          *       25  |   3328 |
 410          *       26  |   3584 |
 411          *       27  |   3840 |
 412          *   --------+--------+
 413          */
 414         arena_bin_t             bins[1]; /* Dynamically sized. */
 415 };
 416
 417 #endif /* JEMALLOC_H_STRUCTS */
 418 /******************************************************************************/
 419 #ifdef JEMALLOC_H_EXTERNS
 420
 421 extern size_t   opt_lg_qspace_max;
 422 extern size_t   opt_lg_cspace_max;
 423 extern ssize_t  opt_lg_dirty_mult;
 424 /*
 425  * small_size2bin is a compact lookup table that rounds request sizes up to
 426  * size classes.  In order to reduce cache footprint, the table is compressed,
 427  * and all accesses are via the SMALL_SIZE2BIN macro.
 428  */
 429 extern uint8_t const    *small_size2bin;
 430 #define SMALL_SIZE2BIN(s)       (small_size2bin[(s-1) >> LG_TINY_MIN])
 431
 432 extern arena_bin_info_t *arena_bin_info;
 433
 434 /* Various bin-related settings. */
 435 #ifdef JEMALLOC_TINY            /* Number of (2^n)-spaced tiny bins. */
 436 #  define               ntbins  ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
 437 #else
 438 #  define               ntbins  0
 439 #endif
 440 extern unsigned         nqbins; /* Number of quantum-spaced bins. */
 441 extern unsigned         ncbins; /* Number of cacheline-spaced bins. */
 442 extern unsigned         nsbins; /* Number of subpage-spaced bins. */
 443 extern unsigned         nbins;
 444 #ifdef JEMALLOC_TINY
 445 #  define               tspace_max      ((size_t)(QUANTUM >> 1))
 446 #endif
 447 #define                 qspace_min      QUANTUM
 448 extern size_t           qspace_max;
 449 extern size_t           cspace_min;
 450 extern size_t           cspace_max;
 451 extern size_t           sspace_min;
 452 extern size_t           sspace_max;
 453 #define                 small_maxclass  sspace_max
 454
 455 #define                 nlclasses (chunk_npages - map_bias)
 456
 457 void    arena_purge_all(arena_t *arena);
 458 #ifdef JEMALLOC_PROF
 459 void    arena_prof_accum(arena_t *arena, uint64_t accumbytes);
 460 #endif
 461 #ifdef JEMALLOC_TCACHE
 462 void    arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
 463     size_t binind
 464 #  ifdef JEMALLOC_PROF
 465     , uint64_t prof_accumbytes
 466 #  endif
 467     );
 468 #endif
 469 void    *arena_malloc_small(arena_t *arena, size_t size, bool zero);
 470 void    *arena_malloc_large(arena_t *arena, size_t size, bool zero);
 471 void    *arena_malloc(size_t size, bool zero);
 472 void    *arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
 473     size_t alignment, bool zero);
 474 size_t  arena_salloc(const void *ptr);
 475 #ifdef JEMALLOC_PROF
 476 void    arena_prof_promoted(const void *ptr, size_t size);
 477 size_t  arena_salloc_demote(const void *ptr);
 478 #endif
 479 void    arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 480     arena_chunk_map_t *mapelm);
 481 void    arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 482 #ifdef JEMALLOC_STATS
 483 void    arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
 484     arena_stats_t *astats, malloc_bin_stats_t *bstats,
 485     malloc_large_stats_t *lstats);
 486 #endif
 487 void    *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
 488     size_t extra, bool zero);
 489 void    *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 490     size_t alignment, bool zero);
 491 bool    arena_new(arena_t *arena, unsigned ind);
 492 bool    arena_boot(void);
 493
 494 #endif /* JEMALLOC_H_EXTERNS */
 495 /******************************************************************************/
 496 #ifdef JEMALLOC_H_INLINES
 497
 498 #ifndef JEMALLOC_ENABLE_INLINE
 499 size_t  arena_bin_index(arena_t *arena, arena_bin_t *bin);
 500 unsigned        arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
 501     const void *ptr);
 502 #  ifdef JEMALLOC_PROF
 503 prof_ctx_t      *arena_prof_ctx_get(const void *ptr);
 504 void    arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
 505 #  endif
 506 void    arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 507 #endif
 508
 509 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 510 JEMALLOC_INLINE size_t
 511 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 512 {
 513         size_t binind = bin - arena->bins;
 514         assert(binind < nbins);
 515         return (binind);
 516 }
 517
 518 JEMALLOC_INLINE unsigned
 519 arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 520 {
 521         unsigned shift, diff, regind;
 522         size_t size;
 523
 524         dassert(run->magic == ARENA_RUN_MAGIC);
 525         /*
 526          * Freeing a pointer lower than region zero can cause assertion
 527          * failure.
 528          */
 529         assert((uintptr_t)ptr >= (uintptr_t)run +
 530             (uintptr_t)bin_info->reg0_offset);
 531
 532         /*
 533          * Avoid doing division with a variable divisor if possible.  Using
 534          * actual division here can reduce allocator throughput by over 20%!
 535          */
 536         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
 537             bin_info->reg0_offset);
 538
 539         /* Rescale (factor powers of 2 out of the numerator and denominator). */
 540         size = bin_info->reg_size;
 541         shift = ffs(size) - 1;
 542         diff >>= shift;
 543         size >>= shift;
 544
 545         if (size == 1) {
 546                 /* The divisor was a power of 2. */
 547                 regind = diff;
 548         } else {
 549                 /*
 550                  * To divide by a number D that is not a power of two we
 551                  * multiply by (2^21 / D) and then right shift by 21 positions.
 552                  *
 553                  *   X / D
 554                  *
 555                  * becomes
 556                  *
 557                  *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
 558                  *
 559                  * We can omit the first three elements, because we never
 560                  * divide by 0, and 1 and 2 are both powers of two, which are
 561                  * handled above.
 562                  */
 563 #define SIZE_INV_SHIFT  ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
 564 #define SIZE_INV(s)     (((1U << SIZE_INV_SHIFT) / (s)) + 1)
 565                 static const unsigned size_invs[] = {
 566                     SIZE_INV(3),
 567                     SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
 568                     SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
 569                     SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
 570                     SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
 571                     SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
 572                     SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
 573                     SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
 574                 };
 575
 576                 if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
 577                         regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
 578                 else
 579                         regind = diff / size;
 580 #undef SIZE_INV
 581 #undef SIZE_INV_SHIFT
 582         }
 583         assert(diff == regind * size);
 584         assert(regind < bin_info->nregs);
 585
 586         return (regind);
 587 }
 588
 589 #ifdef JEMALLOC_PROF
 590 JEMALLOC_INLINE prof_ctx_t *
 591 arena_prof_ctx_get(const void *ptr)
 592 {
 593         prof_ctx_t *ret;
 594         arena_chunk_t *chunk;
 595         size_t pageind, mapbits;
 596
 597         assert(ptr != NULL);
 598         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 599
 600         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 601         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
 602         mapbits = chunk->map[pageind-map_bias].bits;
 603         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 604         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 605                 if (prof_promote)
 606                         ret = (prof_ctx_t *)(uintptr_t)1U;
 607                 else {
 608                         arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 609                             (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 610                             PAGE_SHIFT));
 611                         size_t binind = arena_bin_index(chunk->arena, run->bin);
 612                         arena_bin_info_t *bin_info = &arena_bin_info[binind];
 613                         unsigned regind;
 614
 615                         dassert(run->magic == ARENA_RUN_MAGIC);
 616                         regind = arena_run_regind(run, bin_info, ptr);
 617                         ret = *(prof_ctx_t **)((uintptr_t)run +
 618                             bin_info->ctx0_offset + (regind *
 619                             sizeof(prof_ctx_t *)));
 620                 }
 621         } else
 622                 ret = chunk->map[pageind-map_bias].prof_ctx;
 623
 624         return (ret);
 625 }
 626
 627 JEMALLOC_INLINE void
 628 arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 629 {
 630         arena_chunk_t *chunk;
 631         size_t pageind, mapbits;
 632
 633         assert(ptr != NULL);
 634         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 635
 636         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 637         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
 638         mapbits = chunk->map[pageind-map_bias].bits;
 639         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 640         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 641                 if (prof_promote == false) {
 642                         arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 643                             (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 644                             PAGE_SHIFT));
 645                         arena_bin_t *bin = run->bin;
 646                         size_t binind;
 647                         arena_bin_info_t *bin_info;
 648                         unsigned regind;
 649
 650                         dassert(run->magic == ARENA_RUN_MAGIC);
 651                         binind = arena_bin_index(chunk->arena, bin);
 652                         bin_info = &arena_bin_info[binind];
 653                         regind = arena_run_regind(run, bin_info, ptr);
 654
 655                         *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
 656                             + (regind * sizeof(prof_ctx_t *)))) = ctx;
 657                 } else
 658                         assert((uintptr_t)ctx == (uintptr_t)1U);
 659         } else
 660                 chunk->map[pageind-map_bias].prof_ctx = ctx;
 661 }
 662 #endif
 663
 664 JEMALLOC_INLINE void
 665 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 666 {
 667         size_t pageind;
 668         arena_chunk_map_t *mapelm;
 669
 670         assert(arena != NULL);
 671         dassert(arena->magic == ARENA_MAGIC);
 672         assert(chunk->arena == arena);
 673         assert(ptr != NULL);
 674         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 675
 676         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
 677         mapelm = &chunk->map[pageind-map_bias];
 678         assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
 679         if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
 680                 /* Small allocation. */
 681 #ifdef JEMALLOC_TCACHE
 682                 tcache_t *tcache;
 683
 684                 if ((tcache = tcache_get()) != NULL)
 685                         tcache_dalloc_small(tcache, ptr);
 686                 else {
 687 #endif
 688                         arena_run_t *run;
 689                         arena_bin_t *bin;
 690
 691                         run = (arena_run_t *)((uintptr_t)chunk +
 692                             (uintptr_t)((pageind - (mapelm->bits >>
 693                             PAGE_SHIFT)) << PAGE_SHIFT));
 694                         dassert(run->magic == ARENA_RUN_MAGIC);
 695                         bin = run->bin;
 696 #ifdef JEMALLOC_DEBUG
 697                         {
 698                                 size_t binind = arena_bin_index(arena, bin);
 699                                 arena_bin_info_t *bin_info =
 700                                     &arena_bin_info[binind];
 701                                 assert(((uintptr_t)ptr - ((uintptr_t)run +
 702                                     (uintptr_t)bin_info->reg0_offset)) %
 703                                     bin_info->reg_size == 0);
 704                         }
 705 #endif
 706                         malloc_mutex_lock(&bin->lock);
 707                         arena_dalloc_bin(arena, chunk, ptr, mapelm);
 708                         malloc_mutex_unlock(&bin->lock);
 709 #ifdef JEMALLOC_TCACHE
 710                 }
 711 #endif
 712         } else {
 713 #ifdef JEMALLOC_TCACHE
 714                 size_t size = mapelm->bits & ~PAGE_MASK;
 715
 716                 assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 717                 if (size <= tcache_maxclass) {
 718                         tcache_t *tcache;
 719
 720                         if ((tcache = tcache_get()) != NULL)
 721                                 tcache_dalloc_large(tcache, ptr, size);
 722                         else {
 723                                 malloc_mutex_lock(&arena->lock);
 724                                 arena_dalloc_large(arena, chunk, ptr);
 725                                 malloc_mutex_unlock(&arena->lock);
 726                         }
 727                 } else {
 728                         malloc_mutex_lock(&arena->lock);
 729                         arena_dalloc_large(arena, chunk, ptr);
 730                         malloc_mutex_unlock(&arena->lock);
 731                 }
 732 #else
 733                 assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 734                 malloc_mutex_lock(&arena->lock);
 735                 arena_dalloc_large(arena, chunk, ptr);
 736                 malloc_mutex_unlock(&arena->lock);
 737 #endif
 738         }
 739 }
 740 #endif
 741
 742 #endif /* JEMALLOC_H_INLINES */
 743 /******************************************************************************/