deps/jemalloc/include/jemalloc/internal/arena.h

   1 /******************************************************************************/
   2 #ifdef JEMALLOC_H_TYPES
   3
   4 /*
   5  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
   6  * as small as possible such that this setting is still honored, without
   7  * violating other constraints.  The goal is to make runs as small as possible
   8  * without exceeding a per run external fragmentation threshold.
   9  *
  10  * We use binary fixed point math for overhead computations, where the binary
  11  * point is implicitly RUN_BFP bits to the left.
  12  *
  13  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
  14  * honored for some/all object sizes, since when heap profiling is enabled
  15  * there is one pointer of header overhead per object (plus a constant).  This
  16  * constraint is relaxed (ignored) for runs that are so small that the
  17  * per-region overhead is greater than:
  18  *
  19  *   (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP))
  20  */
  21 #define RUN_BFP                 12
  22 /*                                    \/   Implicit binary fixed point. */
  23 #define RUN_MAX_OVRHD           0x0000003dU
  24 #define RUN_MAX_OVRHD_RELAX     0x00001800U
  25
  26 /* Maximum number of regions in one run. */
  27 #define LG_RUN_MAXREGS          11
  28 #define RUN_MAXREGS             (1U << LG_RUN_MAXREGS)
  29
  30 /*
  31  * Minimum redzone size.  Redzones may be larger than this if necessary to
  32  * preserve region alignment.
  33  */
  34 #define REDZONE_MINSIZE         16
  35
  36 /*
  37  * The minimum ratio of active:dirty pages per arena is computed as:
  38  *
  39  *   (nactive >> opt_lg_dirty_mult) >= ndirty
  40  *
  41  * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
  42  * times as many active pages as dirty pages.
  43  */
  44 #define LG_DIRTY_MULT_DEFAULT   5
  45
  46 typedef struct arena_chunk_map_s arena_chunk_map_t;
  47 typedef struct arena_chunk_s arena_chunk_t;
  48 typedef struct arena_run_s arena_run_t;
  49 typedef struct arena_bin_info_s arena_bin_info_t;
  50 typedef struct arena_bin_s arena_bin_t;
  51 typedef struct arena_s arena_t;
  52
  53 #endif /* JEMALLOC_H_TYPES */
  54 /******************************************************************************/
  55 #ifdef JEMALLOC_H_STRUCTS
  56
  57 /* Each element of the chunk map corresponds to one page within the chunk. */
  58 struct arena_chunk_map_s {
  59 #ifndef JEMALLOC_PROF
  60         /*
  61          * Overlay prof_ctx in order to allow it to be referenced by dead code.
  62          * Such antics aren't warranted for per arena data structures, but
  63          * chunk map overhead accounts for a percentage of memory, rather than
  64          * being just a fixed cost.
  65          */
  66         union {
  67 #endif
  68         union {
  69                 /*
  70                  * Linkage for run trees.  There are two disjoint uses:
  71                  *
  72                  * 1) arena_t's runs_avail_{clean,dirty} trees.
  73                  * 2) arena_run_t conceptually uses this linkage for in-use
  74                  *    non-full runs, rather than directly embedding linkage.
  75                  */
  76                 rb_node(arena_chunk_map_t)      rb_link;
  77                 /*
  78                  * List of runs currently in purgatory.  arena_chunk_purge()
  79                  * temporarily allocates runs that contain dirty pages while
  80                  * purging, so that other threads cannot use the runs while the
  81                  * purging thread is operating without the arena lock held.
  82                  */
  83                 ql_elm(arena_chunk_map_t)       ql_link;
  84         }                               u;
  85
  86         /* Profile counters, used for large object runs. */
  87         prof_ctx_t                      *prof_ctx;
  88 #ifndef JEMALLOC_PROF
  89         }; /* union { ... }; */
  90 #endif
  91
  92         /*
  93          * Run address (or size) and various flags are stored together.  The bit
  94          * layout looks like (assuming 32-bit system):
  95          *
  96          *   ???????? ???????? ????nnnn nnnndula
  97          *
  98          * ? : Unallocated: Run address for first/last pages, unset for internal
  99          *                  pages.
 100          *     Small: Run page offset.
 101          *     Large: Run size for first page, unset for trailing pages.
 102          * n : binind for small size class, BININD_INVALID for large size class.
 103          * d : dirty?
 104          * u : unzeroed?
 105          * l : large?
 106          * a : allocated?
 107          *
 108          * Following are example bit patterns for the three types of runs.
 109          *
 110          * p : run page offset
 111          * s : run size
 112          * n : binind for size class; large objects set these to BININD_INVALID
 113          *     except for promoted allocations (see prof_promote)
 114          * x : don't care
 115          * - : 0
 116          * + : 1
 117          * [DULA] : bit set
 118          * [dula] : bit unset
 119          *
 120          *   Unallocated (clean):
 121          *     ssssssss ssssssss ssss++++ ++++du-a
 122          *     xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx
 123          *     ssssssss ssssssss ssss++++ ++++dU-a
 124          *
 125          *   Unallocated (dirty):
 126          *     ssssssss ssssssss ssss++++ ++++D--a
 127          *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
 128          *     ssssssss ssssssss ssss++++ ++++D--a
 129          *
 130          *   Small:
 131          *     pppppppp pppppppp ppppnnnn nnnnd--A
 132          *     pppppppp pppppppp ppppnnnn nnnn---A
 133          *     pppppppp pppppppp ppppnnnn nnnnd--A
 134          *
 135          *   Large:
 136          *     ssssssss ssssssss ssss++++ ++++D-LA
 137          *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
 138          *     -------- -------- ----++++ ++++D-LA
 139          *
 140          *   Large (sampled, size <= PAGE):
 141          *     ssssssss ssssssss ssssnnnn nnnnD-LA
 142          *
 143          *   Large (not sampled, size == PAGE):
 144          *     ssssssss ssssssss ssss++++ ++++D-LA
 145          */
 146         size_t                          bits;
 147 #define CHUNK_MAP_BININD_SHIFT  4
 148 #define BININD_INVALID          ((size_t)0xffU)
 149 /*     CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */
 150 #define CHUNK_MAP_BININD_MASK   ((size_t)0xff0U)
 151 #define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
 152 #define CHUNK_MAP_FLAGS_MASK    ((size_t)0xcU)
 153 #define CHUNK_MAP_DIRTY         ((size_t)0x8U)
 154 #define CHUNK_MAP_UNZEROED      ((size_t)0x4U)
 155 #define CHUNK_MAP_LARGE         ((size_t)0x2U)
 156 #define CHUNK_MAP_ALLOCATED     ((size_t)0x1U)
 157 #define CHUNK_MAP_KEY           CHUNK_MAP_ALLOCATED
 158 };
 159 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
 160 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 161
 162 /* Arena chunk header. */
 163 struct arena_chunk_s {
 164         /* Arena that owns the chunk. */
 165         arena_t         *arena;
 166
 167         /* Linkage for the arena's chunks_dirty list. */
 168         ql_elm(arena_chunk_t) link_dirty;
 169
 170         /*
 171          * True if the chunk is currently in the chunks_dirty list, due to
 172          * having at some point contained one or more dirty pages.  Removal
 173          * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
 174          */
 175         bool            dirtied;
 176
 177         /* Number of dirty pages. */
 178         size_t          ndirty;
 179
 180         /*
 181          * Map of pages within chunk that keeps track of free/large/small.  The
 182          * first map_bias entries are omitted, since the chunk header does not
 183          * need to be tracked in the map.  This omission saves a header page
 184          * for common chunk sizes (e.g. 4 MiB).
 185          */
 186         arena_chunk_map_t map[1]; /* Dynamically sized. */
 187 };
 188 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 189
 190 struct arena_run_s {
 191         /* Bin this run is associated with. */
 192         arena_bin_t     *bin;
 193
 194         /* Index of next region that has never been allocated, or nregs. */
 195         uint32_t        nextind;
 196
 197         /* Number of free regions in run. */
 198         unsigned        nfree;
 199 };
 200
 201 /*
 202  * Read-only information associated with each element of arena_t's bins array
 203  * is stored separately, partly to reduce memory usage (only one copy, rather
 204  * than one per arena), but mainly to avoid false cacheline sharing.
 205  *
 206  * Each run has the following layout:
 207  *
 208  *               /--------------------\
 209  *               | arena_run_t header |
 210  *               | ...                |
 211  * bitmap_offset | bitmap             |
 212  *               | ...                |
 213  *   ctx0_offset | ctx map            |
 214  *               | ...                |
 215  *               |--------------------|
 216  *               | redzone            |
 217  *   reg0_offset | region 0           |
 218  *               | redzone            |
 219  *               |--------------------| \
 220  *               | redzone            | |
 221  *               | region 1           |  > reg_interval
 222  *               | redzone            | /
 223  *               |--------------------|
 224  *               | ...                |
 225  *               | ...                |
 226  *               | ...                |
 227  *               |--------------------|
 228  *               | redzone            |
 229  *               | region nregs-1     |
 230  *               | redzone            |
 231  *               |--------------------|
 232  *               | alignment pad?     |
 233  *               \--------------------/
 234  *
 235  * reg_interval has at least the same minimum alignment as reg_size; this
 236  * preserves the alignment constraint that sa2u() depends on.  Alignment pad is
 237  * either 0 or redzone_size; it is present only if needed to align reg0_offset.
 238  */
 239 struct arena_bin_info_s {
 240         /* Size of regions in a run for this bin's size class. */
 241         size_t          reg_size;
 242
 243         /* Redzone size. */
 244         size_t          redzone_size;
 245
 246         /* Interval between regions (reg_size + (redzone_size << 1)). */
 247         size_t          reg_interval;
 248
 249         /* Total size of a run for this bin's size class. */
 250         size_t          run_size;
 251
 252         /* Total number of regions in a run for this bin's size class. */
 253         uint32_t        nregs;
 254
 255         /*
 256          * Offset of first bitmap_t element in a run header for this bin's size
 257          * class.
 258          */
 259         uint32_t        bitmap_offset;
 260
 261         /*
 262          * Metadata used to manipulate bitmaps for runs associated with this
 263          * bin.
 264          */
 265         bitmap_info_t   bitmap_info;
 266
 267         /*
 268          * Offset of first (prof_ctx_t *) in a run header for this bin's size
 269          * class, or 0 if (config_prof == false || opt_prof == false).
 270          */
 271         uint32_t        ctx0_offset;
 272
 273         /* Offset of first region in a run for this bin's size class. */
 274         uint32_t        reg0_offset;
 275 };
 276
 277 struct arena_bin_s {
 278         /*
 279          * All operations on runcur, runs, and stats require that lock be
 280          * locked.  Run allocation/deallocation are protected by the arena lock,
 281          * which may be acquired while holding one or more bin locks, but not
 282          * vise versa.
 283          */
 284         malloc_mutex_t  lock;
 285
 286         /*
 287          * Current run being used to service allocations of this bin's size
 288          * class.
 289          */
 290         arena_run_t     *runcur;
 291
 292         /*
 293          * Tree of non-full runs.  This tree is used when looking for an
 294          * existing run when runcur is no longer usable.  We choose the
 295          * non-full run that is lowest in memory; this policy tends to keep
 296          * objects packed well, and it can also help reduce the number of
 297          * almost-empty chunks.
 298          */
 299         arena_run_tree_t runs;
 300
 301         /* Bin statistics. */
 302         malloc_bin_stats_t stats;
 303 };
 304
 305 struct arena_s {
 306         /* This arena's index within the arenas array. */
 307         unsigned                ind;
 308
 309         /*
 310          * Number of threads currently assigned to this arena.  This field is
 311          * protected by arenas_lock.
 312          */
 313         unsigned                nthreads;
 314
 315         /*
 316          * There are three classes of arena operations from a locking
 317          * perspective:
 318          * 1) Thread asssignment (modifies nthreads) is protected by
 319          *    arenas_lock.
 320          * 2) Bin-related operations are protected by bin locks.
 321          * 3) Chunk- and run-related operations are protected by this mutex.
 322          */
 323         malloc_mutex_t          lock;
 324
 325         arena_stats_t           stats;
 326         /*
 327          * List of tcaches for extant threads associated with this arena.
 328          * Stats from these are merged incrementally, and at exit.
 329          */
 330         ql_head(tcache_t)       tcache_ql;
 331
 332         uint64_t                prof_accumbytes;
 333
 334         /* List of dirty-page-containing chunks this arena manages. */
 335         ql_head(arena_chunk_t)  chunks_dirty;
 336
 337         /*
 338          * In order to avoid rapid chunk allocation/deallocation when an arena
 339          * oscillates right on the cusp of needing a new chunk, cache the most
 340          * recently freed chunk.  The spare is left in the arena's chunk trees
 341          * until it is deleted.
 342          *
 343          * There is one spare chunk per arena, rather than one spare total, in
 344          * order to avoid interactions between multiple threads that could make
 345          * a single spare inadequate.
 346          */
 347         arena_chunk_t           *spare;
 348
 349         /* Number of pages in active runs. */
 350         size_t                  nactive;
 351
 352         /*
 353          * Current count of pages within unused runs that are potentially
 354          * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
 355          * By tracking this, we can institute a limit on how much dirty unused
 356          * memory is mapped for each arena.
 357          */
 358         size_t                  ndirty;
 359
 360         /*
 361          * Approximate number of pages being purged.  It is possible for
 362          * multiple threads to purge dirty pages concurrently, and they use
 363          * npurgatory to indicate the total number of pages all threads are
 364          * attempting to purge.
 365          */
 366         size_t                  npurgatory;
 367
 368         /*
 369          * Size/address-ordered trees of this arena's available runs.  The trees
 370          * are used for first-best-fit run allocation.  The dirty tree contains
 371          * runs with dirty pages (i.e. very likely to have been touched and
 372          * therefore have associated physical pages), whereas the clean tree
 373          * contains runs with pages that either have no associated physical
 374          * pages, or have pages that the kernel may recycle at any time due to
 375          * previous madvise(2) calls.  The dirty tree is used in preference to
 376          * the clean tree for allocations, because using dirty pages reduces
 377          * the amount of dirty purging necessary to keep the active:dirty page
 378          * ratio below the purge threshold.
 379          */
 380         arena_avail_tree_t      runs_avail_clean;
 381         arena_avail_tree_t      runs_avail_dirty;
 382
 383         /* bins is used to store trees of free regions. */
 384         arena_bin_t             bins[NBINS];
 385 };
 386
 387 #endif /* JEMALLOC_H_STRUCTS */
 388 /******************************************************************************/
 389 #ifdef JEMALLOC_H_EXTERNS
 390
 391 extern ssize_t  opt_lg_dirty_mult;
 392 /*
 393  * small_size2bin is a compact lookup table that rounds request sizes up to
 394  * size classes.  In order to reduce cache footprint, the table is compressed,
 395  * and all accesses are via the SMALL_SIZE2BIN macro.
 396  */
 397 extern uint8_t const    small_size2bin[];
 398 #define SMALL_SIZE2BIN(s)       (small_size2bin[(s-1) >> LG_TINY_MIN])
 399
 400 extern arena_bin_info_t arena_bin_info[NBINS];
 401
 402 /* Number of large size classes. */
 403 #define                 nlclasses (chunk_npages - map_bias)
 404
 405 void    arena_purge_all(arena_t *arena);
 406 void    arena_prof_accum(arena_t *arena, uint64_t accumbytes);
 407 void    arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
 408     size_t binind, uint64_t prof_accumbytes);
 409 void    arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
 410     bool zero);
 411 void    arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 412 void    *arena_malloc_small(arena_t *arena, size_t size, bool zero);
 413 void    *arena_malloc_large(arena_t *arena, size_t size, bool zero);
 414 void    *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
 415 void    arena_prof_promoted(const void *ptr, size_t size);
 416 void    arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 417     arena_chunk_map_t *mapelm);
 418 void    arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 419     size_t pageind, arena_chunk_map_t *mapelm);
 420 void    arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 421     size_t pageind);
 422 void    arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
 423     void *ptr);
 424 void    arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 425 void    arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
 426     arena_stats_t *astats, malloc_bin_stats_t *bstats,
 427     malloc_large_stats_t *lstats);
 428 void    *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
 429     size_t extra, bool zero);
 430 void    *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 431     size_t alignment, bool zero, bool try_tcache);
 432 bool    arena_new(arena_t *arena, unsigned ind);
 433 void    arena_boot(void);
 434 void    arena_prefork(arena_t *arena);
 435 void    arena_postfork_parent(arena_t *arena);
 436 void    arena_postfork_child(arena_t *arena);
 437
 438 #endif /* JEMALLOC_H_EXTERNS */
 439 /******************************************************************************/
 440 #ifdef JEMALLOC_H_INLINES
 441
 442 #ifndef JEMALLOC_ENABLE_INLINE
 443 arena_chunk_map_t       *arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
 444 size_t  *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
 445 size_t  arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
 446 size_t  arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
 447     size_t pageind);
 448 size_t  arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
 449 size_t  arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
 450 size_t  arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
 451 size_t  arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
 452 size_t  arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
 453 size_t  arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
 454 size_t  arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
 455 void    arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
 456     size_t size, size_t flags);
 457 void    arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
 458     size_t size);
 459 void    arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
 460     size_t size, size_t flags);
 461 void    arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
 462     size_t binind);
 463 void    arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
 464     size_t runind, size_t binind, size_t flags);
 465 void    arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
 466     size_t unzeroed);
 467 size_t  arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 468 size_t  arena_bin_index(arena_t *arena, arena_bin_t *bin);
 469 unsigned        arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
 470     const void *ptr);
 471 prof_ctx_t      *arena_prof_ctx_get(const void *ptr);
 472 void    arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
 473 void    *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
 474 size_t  arena_salloc(const void *ptr, bool demote);
 475 void    arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 476     bool try_tcache);
 477 #endif
 478
 479 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 480 #  ifdef JEMALLOC_ARENA_INLINE_A
 481 JEMALLOC_INLINE arena_chunk_map_t *
 482 arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
 483 {
 484
 485         assert(pageind >= map_bias);
 486         assert(pageind < chunk_npages);
 487
 488         return (&chunk->map[pageind-map_bias]);
 489 }
 490
 491 JEMALLOC_INLINE size_t *
 492 arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
 493 {
 494
 495         return (&arena_mapp_get(chunk, pageind)->bits);
 496 }
 497
 498 JEMALLOC_INLINE size_t
 499 arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
 500 {
 501
 502         return (*arena_mapbitsp_get(chunk, pageind));
 503 }
 504
 505 JEMALLOC_INLINE size_t
 506 arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
 507 {
 508         size_t mapbits;
 509
 510         mapbits = arena_mapbits_get(chunk, pageind);
 511         assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
 512         return (mapbits & ~PAGE_MASK);
 513 }
 514
 515 JEMALLOC_INLINE size_t
 516 arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
 517 {
 518         size_t mapbits;
 519
 520         mapbits = arena_mapbits_get(chunk, pageind);
 521         assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 522             (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
 523         return (mapbits & ~PAGE_MASK);
 524 }
 525
 526 JEMALLOC_INLINE size_t
 527 arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
 528 {
 529         size_t mapbits;
 530
 531         mapbits = arena_mapbits_get(chunk, pageind);
 532         assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 533             CHUNK_MAP_ALLOCATED);
 534         return (mapbits >> LG_PAGE);
 535 }
 536
 537 JEMALLOC_INLINE size_t
 538 arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
 539 {
 540         size_t mapbits;
 541         size_t binind;
 542
 543         mapbits = arena_mapbits_get(chunk, pageind);
 544         binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 545         assert(binind < NBINS || binind == BININD_INVALID);
 546         return (binind);
 547 }
 548
 549 JEMALLOC_INLINE size_t
 550 arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
 551 {
 552         size_t mapbits;
 553
 554         mapbits = arena_mapbits_get(chunk, pageind);
 555         return (mapbits & CHUNK_MAP_DIRTY);
 556 }
 557
 558 JEMALLOC_INLINE size_t
 559 arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
 560 {
 561         size_t mapbits;
 562
 563         mapbits = arena_mapbits_get(chunk, pageind);
 564         return (mapbits & CHUNK_MAP_UNZEROED);
 565 }
 566
 567 JEMALLOC_INLINE size_t
 568 arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
 569 {
 570         size_t mapbits;
 571
 572         mapbits = arena_mapbits_get(chunk, pageind);
 573         return (mapbits & CHUNK_MAP_LARGE);
 574 }
 575
 576 JEMALLOC_INLINE size_t
 577 arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
 578 {
 579         size_t mapbits;
 580
 581         mapbits = arena_mapbits_get(chunk, pageind);
 582         return (mapbits & CHUNK_MAP_ALLOCATED);
 583 }
 584
 585 JEMALLOC_INLINE void
 586 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
 587     size_t flags)
 588 {
 589         size_t *mapbitsp;
 590
 591         mapbitsp = arena_mapbitsp_get(chunk, pageind);
 592         assert((size & PAGE_MASK) == 0);
 593         assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
 594         assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
 595         *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags;
 596 }
 597
 598 JEMALLOC_INLINE void
 599 arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
 600     size_t size)
 601 {
 602         size_t *mapbitsp;
 603
 604         mapbitsp = arena_mapbitsp_get(chunk, pageind);
 605         assert((size & PAGE_MASK) == 0);
 606         assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
 607         *mapbitsp = size | (*mapbitsp & PAGE_MASK);
 608 }
 609
 610 JEMALLOC_INLINE void
 611 arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
 612     size_t flags)
 613 {
 614         size_t *mapbitsp;
 615         size_t unzeroed;
 616
 617         mapbitsp = arena_mapbitsp_get(chunk, pageind);
 618         assert((size & PAGE_MASK) == 0);
 619         assert((flags & CHUNK_MAP_DIRTY) == flags);
 620         unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
 621         *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed |
 622             CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
 623 }
 624
 625 JEMALLOC_INLINE void
 626 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
 627     size_t binind)
 628 {
 629         size_t *mapbitsp;
 630
 631         assert(binind <= BININD_INVALID);
 632         mapbitsp = arena_mapbitsp_get(chunk, pageind);
 633         assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
 634         *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind <<
 635             CHUNK_MAP_BININD_SHIFT);
 636 }
 637
 638 JEMALLOC_INLINE void
 639 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
 640     size_t binind, size_t flags)
 641 {
 642         size_t *mapbitsp;
 643         size_t unzeroed;
 644
 645         assert(binind < BININD_INVALID);
 646         mapbitsp = arena_mapbitsp_get(chunk, pageind);
 647         assert(pageind - runind >= map_bias);
 648         assert((flags & CHUNK_MAP_DIRTY) == flags);
 649         unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
 650         *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) |
 651             flags | unzeroed | CHUNK_MAP_ALLOCATED;
 652 }
 653
 654 JEMALLOC_INLINE void
 655 arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
 656     size_t unzeroed)
 657 {
 658         size_t *mapbitsp;
 659
 660         mapbitsp = arena_mapbitsp_get(chunk, pageind);
 661         *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed;
 662 }
 663
 664 JEMALLOC_INLINE size_t
 665 arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 666 {
 667         size_t binind;
 668
 669         binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 670
 671         if (config_debug) {
 672                 arena_chunk_t *chunk;
 673                 arena_t *arena;
 674                 size_t pageind;
 675                 size_t actual_mapbits;
 676                 arena_run_t *run;
 677                 arena_bin_t *bin;
 678                 size_t actual_binind;
 679                 arena_bin_info_t *bin_info;
 680
 681                 assert(binind != BININD_INVALID);
 682                 assert(binind < NBINS);
 683                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 684                 arena = chunk->arena;
 685                 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 686                 actual_mapbits = arena_mapbits_get(chunk, pageind);
 687                 assert(mapbits == actual_mapbits);
 688                 assert(arena_mapbits_large_get(chunk, pageind) == 0);
 689                 assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 690                 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
 691                     (actual_mapbits >> LG_PAGE)) << LG_PAGE));
 692                 bin = run->bin;
 693                 actual_binind = bin - arena->bins;
 694                 assert(binind == actual_binind);
 695                 bin_info = &arena_bin_info[actual_binind];
 696                 assert(((uintptr_t)ptr - ((uintptr_t)run +
 697                     (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
 698                     == 0);
 699         }
 700
 701         return (binind);
 702 }
 703 #  endif /* JEMALLOC_ARENA_INLINE_A */
 704
 705 #  ifdef JEMALLOC_ARENA_INLINE_B
 706 JEMALLOC_INLINE size_t
 707 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 708 {
 709         size_t binind = bin - arena->bins;
 710         assert(binind < NBINS);
 711         return (binind);
 712 }
 713
 714 JEMALLOC_INLINE unsigned
 715 arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 716 {
 717         unsigned shift, diff, regind;
 718         size_t interval;
 719
 720         /*
 721          * Freeing a pointer lower than region zero can cause assertion
 722          * failure.
 723          */
 724         assert((uintptr_t)ptr >= (uintptr_t)run +
 725             (uintptr_t)bin_info->reg0_offset);
 726
 727         /*
 728          * Avoid doing division with a variable divisor if possible.  Using
 729          * actual division here can reduce allocator throughput by over 20%!
 730          */
 731         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
 732             bin_info->reg0_offset);
 733
 734         /* Rescale (factor powers of 2 out of the numerator and denominator). */
 735         interval = bin_info->reg_interval;
 736         shift = ffs(interval) - 1;
 737         diff >>= shift;
 738         interval >>= shift;
 739
 740         if (interval == 1) {
 741                 /* The divisor was a power of 2. */
 742                 regind = diff;
 743         } else {
 744                 /*
 745                  * To divide by a number D that is not a power of two we
 746                  * multiply by (2^21 / D) and then right shift by 21 positions.
 747                  *
 748                  *   X / D
 749                  *
 750                  * becomes
 751                  *
 752                  *   (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
 753                  *
 754                  * We can omit the first three elements, because we never
 755                  * divide by 0, and 1 and 2 are both powers of two, which are
 756                  * handled above.
 757                  */
 758 #define SIZE_INV_SHIFT  ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
 759 #define SIZE_INV(s)     (((1U << SIZE_INV_SHIFT) / (s)) + 1)
 760                 static const unsigned interval_invs[] = {
 761                     SIZE_INV(3),
 762                     SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
 763                     SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
 764                     SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
 765                     SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
 766                     SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
 767                     SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
 768                     SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
 769                 };
 770
 771                 if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) +
 772                     2)) {
 773                         regind = (diff * interval_invs[interval - 3]) >>
 774                             SIZE_INV_SHIFT;
 775                 } else
 776                         regind = diff / interval;
 777 #undef SIZE_INV
 778 #undef SIZE_INV_SHIFT
 779         }
 780         assert(diff == regind * interval);
 781         assert(regind < bin_info->nregs);
 782
 783         return (regind);
 784 }
 785
 786 JEMALLOC_INLINE prof_ctx_t *
 787 arena_prof_ctx_get(const void *ptr)
 788 {
 789         prof_ctx_t *ret;
 790         arena_chunk_t *chunk;
 791         size_t pageind, mapbits;
 792
 793         cassert(config_prof);
 794         assert(ptr != NULL);
 795         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 796
 797         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 798         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 799         mapbits = arena_mapbits_get(chunk, pageind);
 800         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 801         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 802                 if (prof_promote)
 803                         ret = (prof_ctx_t *)(uintptr_t)1U;
 804                 else {
 805                         arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 806                             (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
 807                             LG_PAGE));
 808                         size_t binind = arena_ptr_small_binind_get(ptr,
 809                             mapbits);
 810                         arena_bin_info_t *bin_info = &arena_bin_info[binind];
 811                         unsigned regind;
 812
 813                         regind = arena_run_regind(run, bin_info, ptr);
 814                         ret = *(prof_ctx_t **)((uintptr_t)run +
 815                             bin_info->ctx0_offset + (regind *
 816                             sizeof(prof_ctx_t *)));
 817                 }
 818         } else
 819                 ret = arena_mapp_get(chunk, pageind)->prof_ctx;
 820
 821         return (ret);
 822 }
 823
 824 JEMALLOC_INLINE void
 825 arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 826 {
 827         arena_chunk_t *chunk;
 828         size_t pageind, mapbits;
 829
 830         cassert(config_prof);
 831         assert(ptr != NULL);
 832         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 833
 834         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 835         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 836         mapbits = arena_mapbits_get(chunk, pageind);
 837         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 838         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 839                 if (prof_promote == false) {
 840                         arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 841                             (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
 842                             LG_PAGE));
 843                         size_t binind;
 844                         arena_bin_info_t *bin_info;
 845                         unsigned regind;
 846
 847                         binind = arena_ptr_small_binind_get(ptr, mapbits);
 848                         bin_info = &arena_bin_info[binind];
 849                         regind = arena_run_regind(run, bin_info, ptr);
 850
 851                         *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
 852                             + (regind * sizeof(prof_ctx_t *)))) = ctx;
 853                 } else
 854                         assert((uintptr_t)ctx == (uintptr_t)1U);
 855         } else
 856                 arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
 857 }
 858
 859 JEMALLOC_INLINE void *
 860 arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache)
 861 {
 862         tcache_t *tcache;
 863
 864         assert(size != 0);
 865         assert(size <= arena_maxclass);
 866
 867         if (size <= SMALL_MAXCLASS) {
 868                 if (try_tcache && (tcache = tcache_get(true)) != NULL)
 869                         return (tcache_alloc_small(tcache, size, zero));
 870                 else {
 871                         return (arena_malloc_small(choose_arena(arena), size,
 872                             zero));
 873                 }
 874         } else {
 875                 /*
 876                  * Initialize tcache after checking size in order to avoid
 877                  * infinite recursion during tcache initialization.
 878                  */
 879                 if (try_tcache && size <= tcache_maxclass && (tcache =
 880                     tcache_get(true)) != NULL)
 881                         return (tcache_alloc_large(tcache, size, zero));
 882                 else {
 883                         return (arena_malloc_large(choose_arena(arena), size,
 884                             zero));
 885                 }
 886         }
 887 }
 888
 889 /* Return the size of the allocation pointed to by ptr. */
 890 JEMALLOC_INLINE size_t
 891 arena_salloc(const void *ptr, bool demote)
 892 {
 893         size_t ret;
 894         arena_chunk_t *chunk;
 895         size_t pageind, binind;
 896
 897         assert(ptr != NULL);
 898         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 899
 900         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 901         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 902         assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 903         binind = arena_mapbits_binind_get(chunk, pageind);
 904         if (binind == BININD_INVALID || (config_prof && demote == false &&
 905             prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
 906                 /*
 907                  * Large allocation.  In the common case (demote == true), and
 908                  * as this is an inline function, most callers will only end up
 909                  * looking at binind to determine that ptr is a small
 910                  * allocation.
 911                  */
 912                 assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 913                 ret = arena_mapbits_large_size_get(chunk, pageind);
 914                 assert(ret != 0);
 915                 assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
 916                 assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
 917                     pageind+(ret>>LG_PAGE)-1) == 0);
 918                 assert(binind == arena_mapbits_binind_get(chunk,
 919                     pageind+(ret>>LG_PAGE)-1));
 920                 assert(arena_mapbits_dirty_get(chunk, pageind) ==
 921                     arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
 922         } else {
 923                 /*
 924                  * Small allocation (possibly promoted to a large object due to
 925                  * prof_promote).
 926                  */
 927                 assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
 928                     arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 929                     pageind)) == binind);
 930                 ret = arena_bin_info[binind].reg_size;
 931         }
 932
 933         return (ret);
 934 }
 935
 936 JEMALLOC_INLINE void
 937 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
 938 {
 939         size_t pageind, mapbits;
 940         tcache_t *tcache;
 941
 942         assert(arena != NULL);
 943         assert(chunk->arena == arena);
 944         assert(ptr != NULL);
 945         assert(CHUNK_ADDR2BASE(ptr) != ptr);
 946
 947         pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 948         mapbits = arena_mapbits_get(chunk, pageind);
 949         assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 950         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 951                 /* Small allocation. */
 952                 if (try_tcache && (tcache = tcache_get(false)) != NULL) {
 953                         size_t binind;
 954
 955                         binind = arena_ptr_small_binind_get(ptr, mapbits);
 956                         tcache_dalloc_small(tcache, ptr, binind);
 957                 } else
 958                         arena_dalloc_small(arena, chunk, ptr, pageind);
 959         } else {
 960                 size_t size = arena_mapbits_large_size_get(chunk, pageind);
 961
 962                 assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 963
 964                 if (try_tcache && size <= tcache_maxclass && (tcache =
 965                     tcache_get(false)) != NULL) {
 966                         tcache_dalloc_large(tcache, ptr, size);
 967                 } else
 968                         arena_dalloc_large(arena, chunk, ptr);
 969         }
 970 }
 971 #  endif /* JEMALLOC_ARENA_INLINE_B */
 972 #endif
 973
 974 #endif /* JEMALLOC_H_INLINES */
 975 /******************************************************************************/