X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/20d7cd4c186bcbb50f0bb56ce882b5680664d965..224c70764cab4e0e39a26aaf3ad3016552f62f55:/gen/scalable_malloc.c diff --git a/gen/scalable_malloc.c b/gen/scalable_malloc.c index 386aab4..6ac0ae6 100644 --- a/gen/scalable_malloc.c +++ b/gen/scalable_malloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,6 +24,8 @@ /* Author: Bertrand Serlet, August 1999 */ #include "scalable_malloc.h" +#include "malloc_printf.h" +#include "_simple.h" #include @@ -31,6 +33,8 @@ #include #include #include +#include +#include /********************* DEFINITIONS ************************/ @@ -65,13 +69,14 @@ do { \ typedef unsigned short msize_t; // a size in multiples of SHIFT_SMALL_QUANTUM or SHIFT_TINY_QUANTUM -/* - * Note that in the LP64 case, this is 24 bytes, necessitating the 32-byte tiny grain size. - */ +typedef union { + void *p; + uintptr_t u; +} ptr_union; + typedef struct { - uintptr_t checksum; - void *previous; - void *next; + ptr_union previous; + ptr_union next; } free_list_t; typedef struct { @@ -86,12 +91,6 @@ typedef unsigned char grain_t; #define CHECK_REGIONS (1 << 31) -#ifdef __LP64__ -# define CHECKSUM_MAGIC 0xdeadbeef0badc0de -#else -# define CHECKSUM_MAGIC 0x357B -#endif - #define MAX_RECORDER_BUFFER 256 /********************* DEFINITIONS for tiny ************************/ @@ -100,17 +99,21 @@ typedef unsigned char grain_t; * Memory in the Tiny range is allocated from regions (heaps) pointed to by the szone's tiny_regions * pointer. * - * Each region is laid out as a heap (1MB in 32-bit mode, 2MB in 64-bit mode), followed by a header - * block. The header block is arranged: + * Each region is laid out as a heap, followed by a header block, all within + * a 1MB (2^20) block. This means there are 64520 16-byte blocks and the header is + * 16138 bytes, making the total 1048458 bytes, leaving 118 bytes unused. + * The header block is arranged: * - * 0x0 + * 0xfc080 * header bits - * 0x2000 + * 0xfe001 * 0xffffffff pad word - * 0x2004 + * 0xfe005 * in-use bits - * 0x4004 + * 0xfff86 * pad word (not written) + * 0xfff8a-0xfffff + * unused * * Each bitfield comprises NUM_TINY_BLOCKS bits, and refers to the corresponding TINY_QUANTUM block * within the heap. @@ -120,67 +123,58 @@ typedef unsigned char grain_t; * in-use bit is set for the header if the block has been handed out (allocated). If the header * bit is not set, the in-use bit is invalid. * - * The szone maintains an array of 32 freelists, each of which is used to hold free objects - * of the corresponding quantum size. + * The szone maintains an array of 32 freelists, each of which is used to hold + * free objects of the corresponding quantum size. * - * A free block is laid out as: + * A free block is laid out depending on its size, in order to fit all free + * blocks in 16 bytes, on both 32 and 64 bit platforms. One quantum blocks do + * not store their size in the block, instead relying on the header information + * to determine their size. Blocks of two or more quanta have room to store + * their size in the block, and store it both after the 'next' pointer, and in + * the last 2 bytes of the block. * + * 1-quantum block * Offset (32-bit mode) (64-bit mode) - * 0x0 0x0 - * checksum - * 0x4 0x08 - * previous - * 0x8 0x10 - * next - * 0xc 0x18 - * size (in quantum counts) - * end - 2 end - 2 - * size (in quantum counts) - * end end + * 0x0 0x0 : previous + * 0x4 0x08 : next + * end end + * + * >1-quantum block + * Offset (32-bit mode) (64-bit mode) + * 0x0 0x0 : previous + * 0x4 0x08 : next + * 0x8 0x10 : size (in quantum counts) + * end - 2 end - 2 : size (in quantum counts) + * end end * * All fields are pointer-sized, except for the size which is an unsigned short. * */ -#ifdef __LP64__ -# define SHIFT_TINY_QUANTUM 5 // Required to fit free_list_t -#else -# define SHIFT_TINY_QUANTUM 4 // Required for AltiVec -#endif +#define SHIFT_TINY_QUANTUM 4 // Required for AltiVec #define TINY_QUANTUM (1 << SHIFT_TINY_QUANTUM) #define FOLLOWING_TINY_PTR(ptr,msize) (((unsigned char *)(ptr)) + ((msize) << SHIFT_TINY_QUANTUM)) #define NUM_TINY_SLOTS 32 // number of slots for free-lists -#define SHIFT_NUM_TINY_BLOCKS 16 -#define NUM_TINY_BLOCKS (1 << SHIFT_NUM_TINY_BLOCKS) -#define TINY_BLOCKS_ALIGN (SHIFT_NUM_TINY_BLOCKS + SHIFT_TINY_QUANTUM) +#define NUM_TINY_BLOCKS 64520 +#define SHIFT_TINY_CEIL_BLOCKS 16 // ceil(log2(NUM_TINY_BLOCKS)) +#define NUM_TINY_CEIL_BLOCKS (1 << SHIFT_TINY_CEIL_BLOCKS) +#define TINY_BLOCKS_ALIGN (SHIFT_TINY_CEIL_BLOCKS + SHIFT_TINY_QUANTUM) /* * Enough room for the data, followed by the bit arrays (2-bits per block) plus 2 words of padding * as our bitmap operators overflow, plus rounding to the nearest page. */ -#define TINY_REGION_SIZE ((NUM_TINY_BLOCKS * TINY_QUANTUM + (NUM_TINY_BLOCKS >> 2) + 8 + vm_page_size - 1) & ~ (vm_page_size - 1)) - -/* - * Obtain the size of a free tiny block (in msize_t units). - */ -#ifdef __LP64__ -# define TINY_FREE_SIZE(ptr) (((msize_t *)(ptr))[14]) -#else -# define TINY_FREE_SIZE(ptr) (((msize_t *)(ptr))[6]) -#endif -/* - * The size is also kept at the very end of a free block. - */ -#define TINY_PREVIOUS_MSIZE(ptr) ((msize_t *)(ptr))[-1] +#define TINY_HEADER_SIZE ((NUM_TINY_BLOCKS >> 2) + 8) +#define TINY_REGION_SIZE ((NUM_TINY_BLOCKS * TINY_QUANTUM + TINY_HEADER_SIZE + vm_page_size - 1) & ~ (vm_page_size - 1)) /* * Beginning and end pointers for a region's heap. */ #define TINY_REGION_ADDRESS(region) ((void *)(region)) -#define TINY_REGION_END(region) (TINY_REGION_ADDRESS(region) + (1 << TINY_BLOCKS_ALIGN)) +#define TINY_REGION_END(region) (TINY_REGION_ADDRESS(region) + (NUM_TINY_BLOCKS * TINY_QUANTUM)) /* * Locate the heap base for a pointer known to be within a tiny region. @@ -193,10 +187,18 @@ typedef unsigned char grain_t; #define TINY_BYTES_FOR_MSIZE(_m) ((_m) << SHIFT_TINY_QUANTUM) #define TINY_MSIZE_FOR_BYTES(_b) ((_b) >> SHIFT_TINY_QUANTUM) +#ifdef __LP64__ +# define TINY_FREE_SIZE(ptr) (((msize_t *)(ptr))[8]) +#else +# define TINY_FREE_SIZE(ptr) (((msize_t *)(ptr))[4]) +#endif +#define TINY_PREVIOUS_MSIZE(ptr) ((msize_t *)(ptr))[-1] + /* * Locate the block header for a pointer known to be within a tiny region. */ -#define TINY_BLOCK_HEADER_FOR_PTR(_p) ((void *)(((((uintptr_t)(_p)) >> TINY_BLOCKS_ALIGN) + 1) << TINY_BLOCKS_ALIGN)) +#define TINY_HEADER_START (NUM_TINY_BLOCKS * TINY_QUANTUM) +#define TINY_BLOCK_HEADER_FOR_PTR(_p) ((void *)((uintptr_t)TINY_REGION_FOR_PTR(_p) + TINY_HEADER_START)) /* * Locate the inuse map for a given block header pointer. @@ -206,11 +208,7 @@ typedef unsigned char grain_t; /* * Compute the bitmap index for a pointer known to be within a tiny region. */ -#define TINY_INDEX_FOR_PTR(_p) (((uintptr_t)(_p) >> SHIFT_TINY_QUANTUM) & (NUM_TINY_BLOCKS - 1)) - -typedef void *tiny_region_t; - -#define INITIAL_NUM_TINY_REGIONS 24 // must be even for szone to be aligned +#define TINY_INDEX_FOR_PTR(_p) (((uintptr_t)(_p) >> SHIFT_TINY_QUANTUM) & (NUM_TINY_CEIL_BLOCKS - 1)) #define TINY_CACHE 1 // This governs a last-free cache of 1 that bypasses the free-list @@ -224,8 +222,10 @@ typedef void *tiny_region_t; * Memory in the Small range is allocated from regions (heaps) pointed to by the szone's small_regions * pointer. * - * Each region is laid out as a heap (8MB in 32-bit and 64-bit mode), followed by the metadata array. + * Each region is laid out as a heap, followed by the metadata array, all within an 8MB (2^23) block. * The array is arranged as an array of shorts, one for each SMALL_QUANTUM in the heap. + * This means there are 16320 512-blocks and the array is 16320*2 bytes, which totals 8388480, leaving + * 128 bytes unused. * * The MSB of each short is set for the first quantum in a free block. The low 15 bits encode the * block size (in SMALL_QUANTUM units), or are zero if the quantum is not the first in a block. @@ -236,16 +236,10 @@ typedef void *tiny_region_t; * A free block is laid out as: * * Offset (32-bit mode) (64-bit mode) - * 0x0 0x0 - * checksum - * 0x4 0x08 - * previous - * 0x8 0x10 - * next - * 0xc 0x18 - * size (in quantum counts) - * end - 2 end - 2 - * size (in quantum counts) + * 0x0 0x0 : previous + * 0x4 0x08 : next + * 0x8 0x10 : size (in quantum counts) + * end - 2 end - 2 : size (in quantum counts) * end end * * All fields are pointer-sized, except for the size which is an unsigned short. @@ -265,10 +259,12 @@ typedef void *tiny_region_t; * We can only represent up to 1<<15 for msize; but we choose to stay even below that to avoid the * convention msize=0 => msize = (1<<15) */ -#define SHIFT_NUM_SMALL_BLOCKS 14 -#define NUM_SMALL_BLOCKS (1 << SHIFT_NUM_SMALL_BLOCKS) -#define SMALL_BLOCKS_ALIGN (SHIFT_NUM_SMALL_BLOCKS + SHIFT_SMALL_QUANTUM) // 23 -#define SMALL_REGION_SIZE (NUM_SMALL_BLOCKS * SMALL_QUANTUM + NUM_SMALL_BLOCKS * 2) // data + meta data +#define NUM_SMALL_BLOCKS 16320 +#define SHIFT_SMALL_CEIL_BLOCKS 14 // ceil(log2(NUM_SMALL_BLOCKs)) +#define NUM_SMALL_CEIL_BLOCKS (1 << SHIFT_SMALL_CEIL_BLOCKS) +#define SMALL_BLOCKS_ALIGN (SHIFT_SMALL_CEIL_BLOCKS + SHIFT_SMALL_QUANTUM) // 23 +#define SMALL_ARRAY_SIZE (NUM_SMALL_BLOCKS * 2) +#define SMALL_REGION_SIZE ((NUM_SMALL_BLOCKS * SMALL_QUANTUM + SMALL_ARRAY_SIZE + vm_page_size - 1) & ~ (vm_page_size - 1)) // data + meta data #define SMALL_PREVIOUS_MSIZE(ptr) ((msize_t *)(ptr))[-1] @@ -280,7 +276,7 @@ typedef void *tiny_region_t; #define SMALL_REGION_ADDRESS(region) ((unsigned char *)region) -#define SMALL_REGION_END(region) (SMALL_REGION_ADDRESS(region) + (1 << SMALL_BLOCKS_ALIGN)) +#define SMALL_REGION_END(region) (SMALL_REGION_ADDRESS(region) + (NUM_SMALL_BLOCKS * SMALL_QUANTUM)) /* * Locate the heap base for a pointer known to be within a small region. @@ -290,12 +286,13 @@ typedef void *tiny_region_t; /* * Locate the metadata base for a pointer known to be within a small region. */ -#define SMALL_META_HEADER_FOR_PTR(_p) ((msize_t *)(((((uintptr_t)(_p)) >> SMALL_BLOCKS_ALIGN) + 1) << SMALL_BLOCKS_ALIGN)) +#define SMALL_HEADER_START (NUM_SMALL_BLOCKS * SMALL_QUANTUM) +#define SMALL_META_HEADER_FOR_PTR(_p) ((msize_t *)((uintptr_t)SMALL_REGION_FOR_PTR(_p) + SMALL_HEADER_START)) /* * Compute the metadata index for a pointer known to be within a small region. */ -#define SMALL_META_INDEX_FOR_PTR(_p) (((uintptr_t)(_p) >> SHIFT_SMALL_QUANTUM) & (NUM_SMALL_BLOCKS - 1)) +#define SMALL_META_INDEX_FOR_PTR(_p) (((uintptr_t)(_p) >> SHIFT_SMALL_QUANTUM) & (NUM_SMALL_CEIL_BLOCKS - 1)) /* * Find the metadata word for a pointer known to be within a small region. @@ -312,10 +309,6 @@ typedef void *tiny_region_t; */ #define SMALL_PTR_SIZE(_p) (*SMALL_METADATA_FOR_PTR(_p) & ~SMALL_IS_FREE) -typedef void * small_region_t; - -#define INITIAL_NUM_SMALL_REGIONS 6 // must be even for szone to be aligned - #define PROTECT_SMALL 0 // Should be 0: 1 is too slow for normal use #define SMALL_CACHE 1 @@ -323,7 +316,7 @@ typedef void * small_region_t; #warning SMALL_CACHE turned off #endif -/********************* DEFINITIONS for large ************************/ +/********************* DEFINITIONS for large and huge ***********************/ #define LARGE_THRESHOLD (15 * 1024) // at or above this use "large" @@ -359,10 +352,15 @@ typedef void * small_region_t; #define LARGE_ENTRY_IS_EMPTY(entry) (((entry).address_and_num_pages) == 0) typedef compact_range_t large_entry_t; +typedef vm_range_t huge_entry_t; -/********************* DEFINITIONS for huge ************************/ +/******************************************************************************* + * Definitions for region hash + ******************************************************************************/ -typedef vm_range_t huge_entry_t; +typedef void * region_t; + +#define INITIAL_NUM_REGIONS 63 // Must be odd to hash well /********************* zone itself ************************/ @@ -373,8 +371,10 @@ typedef struct { void *log_address; /* Regions for tiny objects */ - unsigned num_tiny_regions; - tiny_region_t *tiny_regions; + size_t num_tiny_regions; + size_t num_tiny_regions_allocated; + region_t *tiny_regions; // hashed by location + region_t *last_tiny_region; void *last_tiny_free; // low SHIFT_TINY_QUANTUM indicate the msize unsigned tiny_bitmap; // cache of the 32 free lists free_list_t *tiny_free_list[NUM_TINY_SLOTS]; // 31 free lists for 1*TINY_QUANTUM to 31*TINY_QUANTUM plus 1 for larger than 32*SMALL_QUANTUM @@ -383,8 +383,10 @@ typedef struct { size_t num_bytes_in_tiny_objects; /* Regions for small objects */ - unsigned num_small_regions; - small_region_t *small_regions; + size_t num_small_regions; + size_t num_small_regions_allocated; + region_t *small_regions; // hashed by location + region_t *last_small_region; void *last_small_free; // low SHIFT_SMALL_QUANTUM indicate the msize unsigned small_bitmap; // cache of the free list free_list_t *small_free_list[NUM_SMALL_SLOTS]; @@ -404,21 +406,29 @@ typedef struct { size_t num_bytes_in_huge_objects; /* Initial region list */ - tiny_region_t initial_tiny_regions[INITIAL_NUM_TINY_REGIONS]; - small_region_t initial_small_regions[INITIAL_NUM_SMALL_REGIONS]; + region_t initial_tiny_regions[INITIAL_NUM_REGIONS]; + region_t initial_small_regions[INITIAL_NUM_REGIONS]; } szone_t; +#define SZONE_PAGED_SIZE ((sizeof(szone_t) + vm_page_size - 1) & ~ (vm_page_size - 1)) + #if DEBUG_MALLOC || DEBUG_CLIENT static void szone_sleep(void); #endif -static void szone_error(szone_t *szone, const char *msg, const void *ptr); -static void protect(szone_t *szone, void *address, size_t size, unsigned protection, unsigned debug_flags); +__private_extern__ void malloc_error_break(void); + +// msg prints after fmt, ... +static void szone_error(szone_t *szone, const char *msg, const void *ptr, const char *fmt, ...) __printflike(4, 5); + +static void protect(void *address, size_t size, unsigned protection, unsigned debug_flags); static void *allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label); static void deallocate_pages(szone_t *szone, void *addr, size_t size, unsigned debug_flags); static kern_return_t _szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr); static INLINE void free_list_checksum(szone_t *szone, free_list_t *ptr, const char *msg) ALWAYSINLINE; static INLINE void free_list_set_checksum(szone_t *szone, free_list_t *ptr) ALWAYSINLINE; +static INLINE uintptr_t free_list_checksum_ptr(void *p) ALWAYSINLINE; +static INLINE void * free_list_unchecksum_ptr(ptr_union ptr) ALWAYSINLINE; static unsigned free_list_count(const free_list_t *ptr); static INLINE msize_t get_tiny_meta_header(const void *ptr, boolean_t *is_free) ALWAYSINLINE; @@ -427,19 +437,19 @@ static INLINE void set_tiny_meta_header_middle(const void *ptr) ALWAYSINLINE; static INLINE void set_tiny_meta_header_free(const void *ptr, msize_t msize) ALWAYSINLINE; static INLINE boolean_t tiny_meta_header_is_free(const void *ptr) ALWAYSINLINE; static INLINE void *tiny_previous_preceding_free(void *ptr, msize_t *prev_msize) ALWAYSINLINE; -static INLINE void tiny_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize) ALWAYSINLINE; -static INLINE void tiny_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize) ALWAYSINLINE; -static INLINE tiny_region_t *tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE; -static INLINE void tiny_free_no_lock(szone_t *szone, tiny_region_t *region, void *ptr, msize_t msize) ALWAYSINLINE; +static void tiny_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize); +static void tiny_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize); +static INLINE region_t *tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE; +static INLINE void tiny_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize) ALWAYSINLINE; static void *tiny_malloc_from_region_no_lock(szone_t *szone, msize_t msize); static INLINE boolean_t try_realloc_tiny_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE; -static boolean_t tiny_check_region(szone_t *szone, tiny_region_t *region); -static kern_return_t tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t region_address, unsigned short num_regions, size_t tiny_bytes_free_at_end, memory_reader_t reader, vm_range_recorder_t recorder); -static INLINE void *tiny_malloc_from_free_list(szone_t *szone, msize_t msize) ALWAYSINLINE; +static boolean_t tiny_check_region(szone_t *szone, region_t region); +static kern_return_t tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder); +static void *tiny_malloc_from_free_list(szone_t *szone, msize_t msize); static INLINE void *tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) ALWAYSINLINE; -static INLINE void free_tiny(szone_t *szone, void *ptr, tiny_region_t *tiny_region) ALWAYSINLINE; +static INLINE void free_tiny(szone_t *szone, void *ptr, region_t *tiny_region) ALWAYSINLINE; static void print_tiny_free_list(szone_t *szone); -static void print_tiny_region(boolean_t verbose, tiny_region_t region, size_t bytes_at_end); +static void print_tiny_region(boolean_t verbose, region_t region, size_t bytes_at_end); static boolean_t tiny_free_list_check(szone_t *szone, grain_t slot); static INLINE void small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msize) ALWAYSINLINE; @@ -447,20 +457,25 @@ static INLINE void small_meta_header_set_in_use(msize_t *meta_headers, msize_t i static INLINE void small_meta_header_set_middle(msize_t *meta_headers, msize_t index) ALWAYSINLINE; static void small_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize); static void small_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize); -static INLINE small_region_t *small_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE; -static INLINE void small_free_no_lock(szone_t *szone, small_region_t *region, void *ptr, msize_t msize) ALWAYSINLINE; +static INLINE region_t *small_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE; +static INLINE void small_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize) ALWAYSINLINE; static void *small_malloc_from_region_no_lock(szone_t *szone, msize_t msize); static INLINE boolean_t try_realloc_small_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE; -static boolean_t szone_check_small_region(szone_t *szone, small_region_t *region); -static kern_return_t small_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t region_address, unsigned short num_regions, size_t small_bytes_free_at_end, memory_reader_t reader, vm_range_recorder_t recorder); -static INLINE void *small_malloc_from_free_list(szone_t *szone, msize_t msize) ALWAYSINLINE; +static boolean_t szone_check_small_region(szone_t *szone, region_t region); +static kern_return_t small_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder); +static void *small_malloc_from_free_list(szone_t *szone, msize_t msize); static INLINE void *small_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) ALWAYSINLINE; static INLINE void *small_malloc_cleared_no_lock(szone_t *szone, msize_t msize) ALWAYSINLINE; -static INLINE void free_small(szone_t *szone, void *ptr, small_region_t *small_region) ALWAYSINLINE; +static INLINE void free_small(szone_t *szone, void *ptr, region_t *small_region) ALWAYSINLINE; static void print_small_free_list(szone_t *szone); -static void print_small_region(szone_t *szone, boolean_t verbose, small_region_t *region, size_t bytes_at_end); +static void print_small_region(szone_t *szone, boolean_t verbose, region_t region, size_t bytes_at_end); static boolean_t small_free_list_check(szone_t *szone, grain_t grain); +static region_t * hash_lookup_region_no_lock(region_t *regions, size_t num_entries, region_t r); +static void hash_region_insert_no_lock(region_t *regions, size_t num_entries, region_t r); +static region_t * hash_regions_alloc_no_lock(szone_t *szone, size_t num_entries); +static region_t * hash_regions_grow_no_lock(szone_t *szone, region_t *regions, size_t old_size, size_t *new_size); + #if DEBUG_MALLOC static void large_debug_print(szone_t *szone); #endif @@ -469,13 +484,13 @@ static void large_entry_insert_no_lock(szone_t *szone, large_entry_t range); static INLINE void large_entries_rehash_after_entry_no_lock(szone_t *szone, large_entry_t *entry) ALWAYSINLINE; static INLINE large_entry_t *large_entries_alloc_no_lock(szone_t *szone, unsigned num) ALWAYSINLINE; static void large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num, vm_range_t *range_to_deallocate); -static void large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate); +static large_entry_t * large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate); static vm_range_t large_free_no_lock(szone_t *szone, large_entry_t *entry); static kern_return_t large_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t large_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder); static huge_entry_t *huge_entry_for_pointer_no_lock(szone_t *szone, const void *ptr); static boolean_t huge_entry_append(szone_t *szone, huge_entry_t huge); static kern_return_t huge_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t huge_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder); -static void *large_and_huge_malloc(szone_t *szone, unsigned num_pages); +static void *large_and_huge_malloc(szone_t *szone, size_t num_pages); static INLINE void free_large_or_huge(szone_t *szone, void *ptr) ALWAYSINLINE; static INLINE int try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE; @@ -546,23 +561,52 @@ szone_sleep(void) { if (getenv("MallocErrorSleep")) { - malloc_printf("*** sleeping to help debug\n"); + _malloc_printf(ASL_LEVEL_NOTICE, "*** sleeping to help debug\n"); sleep(3600); // to help debug } } #endif -static void -szone_error(szone_t *szone, const char *msg, const void *ptr) +// msg prints after fmt, ... +static __attribute__((noinline)) void +szone_error(szone_t *szone, const char *msg, const void *ptr, const char *fmt, ...) { + va_list ap; + _SIMPLE_STRING b = _simple_salloc(); if (szone) SZONE_UNLOCK(szone); - if (ptr) { - malloc_printf("*** error for object %p: %s\n", ptr, msg); + if (b) { + if (fmt) { + va_start(ap, fmt); + _simple_vsprintf(b, fmt, ap); + va_end(ap); + } + if (ptr) { + _simple_sprintf(b, "*** error for object %p: %s\n", ptr, msg); + } else { + _simple_sprintf(b, "*** error: %s\n", msg); + } + malloc_printf("%s*** set a breakpoint in malloc_error_break to debug\n", _simple_string(b)); + _simple_sfree(b); } else { - malloc_printf("*** error: %s\n", msg); + /* + * Should only get here if vm_allocate() can't get a single page of + * memory, implying _simple_asl_log() would also fail. So we just + * print to the file descriptor. + */ + if (fmt) { + va_start(ap, fmt); + _malloc_vprintf(MALLOC_PRINTF_NOLOG, fmt, ap); + va_end(ap); + } + if (ptr) { + _malloc_printf(MALLOC_PRINTF_NOLOG, "*** error for object %p: %s\n", ptr, msg); + } else { + _malloc_printf(MALLOC_PRINTF_NOLOG, "*** error: %s\n", msg); + } + _malloc_printf(MALLOC_PRINTF_NOLOG, "*** set a breakpoint in malloc_error_break to debug\n"); } - malloc_printf("*** set a breakpoint in szone_error to debug\n"); + malloc_error_break(); #if DEBUG_MALLOC szone_print(szone, 1); szone_sleep(); @@ -570,10 +614,11 @@ szone_error(szone_t *szone, const char *msg, const void *ptr) #if DEBUG_CLIENT szone_sleep(); #endif + if (szone->debug_flags & SCALABLE_MALLOC_ABORT_ON_ERROR) abort(); } static void -protect(szone_t *szone, void *address, size_t size, unsigned protection, unsigned debug_flags) +protect(void *address, size_t size, unsigned protection, unsigned debug_flags) { kern_return_t err; @@ -597,43 +642,39 @@ static void * allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label) { // align specifies a desired alignment (as a log) or 0 if no alignment requested - kern_return_t err; - vm_address_t vm_addr; + void *vm_addr; uintptr_t addr, aligned_address; boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES; size_t allocation_size = round_page(size); size_t delta; - + if (align) add_guard_pages = 0; // too cumbersome to deal with that if (!allocation_size) allocation_size = 1 << vm_page_shift; if (add_guard_pages) allocation_size += 2 * (1 << vm_page_shift); if (align) allocation_size += (size_t)1 << align; - err = vm_allocate(mach_task_self(), &vm_addr, allocation_size, vm_page_label | 1); - if (err) { - malloc_printf("*** vm_allocate(size=%lld) failed (error code=%d)\n", (long long)size, err); - szone_error(szone, "can't allocate region", NULL); - return NULL; + vm_addr = mmap(0, allocation_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, VM_MAKE_TAG(vm_page_label), 0); + if ((uintptr_t)vm_addr == -1) { + szone_error(szone, "can't allocate region", NULL, "*** mmap(size=%lld) failed (error code=%d)\n", (long long)allocation_size, errno); + return NULL; } addr = (uintptr_t)vm_addr; if (align) { aligned_address = (addr + ((uintptr_t)1 << align) - 1) & ~ (((uintptr_t)1 << align) - 1); if (aligned_address != addr) { delta = aligned_address - addr; - err = vm_deallocate(mach_task_self(), (vm_address_t)addr, delta); - if (err) - malloc_printf("*** freeing unaligned header failed with %d\n", err); + if (munmap((void *)addr, delta) == -1) + malloc_printf("*** freeing unaligned header failed with %d\n", errno); addr = aligned_address; allocation_size -= delta; } if (allocation_size > size) { - err = vm_deallocate(mach_task_self(), (vm_address_t)addr + size, allocation_size - size); - if (err) - malloc_printf("*** freeing unaligned footer failed with %d\n", err); + if (munmap((void *)(addr + size), allocation_size - size) == -1) + malloc_printf("*** freeing unaligned footer failed with %d\n", errno); } } if (add_guard_pages) { addr += (uintptr_t)1 << vm_page_shift; - protect(szone, (void *)addr, size, 0, debug_flags); + protect((void *)addr, size, 0, debug_flags); } return (void *)addr; } @@ -641,16 +682,16 @@ allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_ static void deallocate_pages(szone_t *szone, void *addr, size_t size, unsigned debug_flags) { - kern_return_t err; - boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES; + int err; + boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES; if (add_guard_pages) { addr -= 1 << vm_page_shift; size += 2 * (1 << vm_page_shift); } - err = vm_deallocate(mach_task_self(), (vm_address_t)addr, size); - if (err && szone) - szone_error(szone, "Can't deallocate_pages region", addr); + err = munmap(addr, size); + if ((err == -1) && szone) + szone_error(szone, "Can't deallocate_pages region", addr, NULL); } static kern_return_t @@ -660,28 +701,61 @@ _szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void ** return 0; } +/********************* FREE LIST UTILITIES ************************/ + +// A free list entry is comprised of a pair of pointers, previous and next. +// Because the free list entries are previously freed objects, there is a +// non-zero chance that a misbehaved program will write to an allocated object +// after it has called free() on the pointer. This write would then potentially +// corrupt the previous and next pointers, leading to a crash. In order to +// detect this case, we take advantage of the fact that pointers are known to +// be at least 16 byte aligned, and thus have at least 4 trailing zero bits. +// When an entry is added to the free list, the previous and next pointers are +// shifted right by 2 bits, and then have the high and low 2 bits set, to act +// as guard bits. Before accessing a free list object, we verify that these +// bits are still set, and log an error if they are not. + static INLINE void free_list_checksum(szone_t *szone, free_list_t *ptr, const char *msg) { - // We always checksum, as testing whether to do it (based on szone->debug_flags) is as fast as - // doing it - // XXX not necessarily true for LP64 case - if (ptr->checksum != (((uintptr_t)ptr->previous) ^ ((uintptr_t)ptr->next) ^ CHECKSUM_MAGIC)) { -#if DEBUG_MALLOC - malloc_printf("*** incorrect checksum: %s\n", msg); + uintptr_t ptrs = ptr->previous.u & ptr->next.u; + +#ifdef __LP64__ + ptrs = (ptrs << 2) | (ptrs >> (64-2)); +#else + ptrs = (ptrs << 2) | (ptrs >> (32-2)); #endif - szone_error(szone, "incorrect checksum for freed object " - "- object was probably modified after being freed, break at szone_error to debug", ptr); - } + + if ((ptrs & 15) != 15) + szone_error(szone, "incorrect checksum for freed object " + "- object was probably modified after being freed.", ptr, NULL); +} + +static INLINE uintptr_t +free_list_checksum_ptr(void *p) +{ + ptr_union ptr; + ptr.p = p; + +#ifdef __LP64__ + return (ptr.u >> 2) | 0xC000000000000003ULL; +#else + return (ptr.u >> 2) | 0xC0000003U; +#endif +} + +static INLINE void * +free_list_unchecksum_ptr(ptr_union ptr) +{ + uintptr_t u = (ptr.u >> 2) << 4; + return (void *)u; } static INLINE void free_list_set_checksum(szone_t *szone, free_list_t *ptr) { - // We always set checksum, as testing whether to do it (based on - // szone->debug_flags) is slower than just doing it - // XXX not necessarily true for LP64 case - ptr->checksum = ((uintptr_t)ptr->previous) ^ ((uintptr_t)ptr->next) ^ CHECKSUM_MAGIC; + ptr->previous.u = free_list_checksum_ptr(ptr->previous.p); + ptr->next.u = free_list_checksum_ptr(ptr->next.p); } static unsigned @@ -690,8 +764,8 @@ free_list_count(const free_list_t *ptr) unsigned count = 0; while (ptr) { - count++; - ptr = ptr->next; + count++; + ptr = free_list_unchecksum_ptr(ptr->next); } return count; } @@ -702,8 +776,8 @@ free_list_count(const free_list_t *ptr) #define BITMAP32_CLR(bitmap,bit) (bitmap &= ~ (1 << (bit))) #define BITMAP32_BIT(bitmap,bit) ((bitmap >> (bit)) & 1) -#define BITMAP32_FFS(bitmap) (ffs(bitmap)) - // returns bit # of first bit that's one, starting at 1 (returns 0 if !bitmap) +/* returns bit # of least-significant one bit, starting at 0 (undefined if !bitmap) */ +#define BITMAP32_CTZ(bitmap) (__builtin_ctz(bitmap)) /********************* TINY FREE LIST UTILITIES ************************/ @@ -721,40 +795,65 @@ free_list_count(const free_list_t *ptr) #define BITARRAY_BIT(bits,index) (((bits[index>>3]) >> (index & 7)) & 1) // Following is for start<8 and end<=start+32 -#define BITARRAY_MCLR_LESS_32(bits,start,end) \ -do { \ - unsigned char *_bits = (bits); \ - unsigned _end = (end); \ - switch (_end >> 3) { \ - case 4: _bits[4] &= ~ ((1 << (_end - 32)) - 1); _end = 32; \ - case 3: _bits[3] &= ~ ((1 << (_end - 24)) - 1); _end = 24; \ - case 2: _bits[2] &= ~ ((1 << (_end - 16)) - 1); _end = 16; \ - case 1: _bits[1] &= ~ ((1 << (_end - 8)) - 1); _end = 8; \ - case 0: _bits[0] &= ~ ((1 << _end) - (1 << (start))); \ - } \ -} while (0) +static void ALWAYSINLINE +bitarray_mclr(void *bits, unsigned start, unsigned end) { + unsigned word = OSReadLittleInt32(bits, 0); + unsigned mask = (0xFFFFFFFFU >> (31 - start)) >> 1; + + if (end > 31) { + unsigned char *bytes = (unsigned char *)bits; + bytes[4] &= ~((1 << (end - 32)) - 1); + } else { + mask |= (0xFFFFFFFF << end); + } + OSWriteLittleInt32(bits, 0, word & mask); +} -#if 0 // Simple but slow version -#warning Slow version in effect -#define BITARRAY_MCLR(bits,index,num) \ -do { \ - unsigned _ctr = (num); \ - unsigned _cur = (index); \ - \ - while (_ctr--) {BITARRAY_CLR(bits,_cur); _cur++; } \ -} while (0) -#else +/* + * Obtain the size of a free tiny block (in msize_t units). + */ +static msize_t +get_tiny_free_size(const void *ptr) +{ + void *next_block = (void *)((uintptr_t)ptr + TINY_QUANTUM); + void *region_end = TINY_REGION_END(TINY_REGION_FOR_PTR(ptr)); -// Following is for num <= 32 -#define BITARRAY_MCLR(bits,index,num) \ -do { \ - unsigned _index = (index); \ - unsigned char *_rebased = (bits) + (_index >> 3); \ - \ - _index &= 7; \ - BITARRAY_MCLR_LESS_32(_rebased, _index, _index + (num)); \ -} while (0) -#endif + // check whether the next block is outside the tiny region or a block header + // if so, then the size of this block is one, and there is no stored size. + if (next_block < region_end) + { + unsigned char *next_header = TINY_BLOCK_HEADER_FOR_PTR(next_block); + msize_t next_index = TINY_INDEX_FOR_PTR(next_block); + + if (!BITARRAY_BIT(next_header, next_index)) + return TINY_FREE_SIZE(ptr); + } + return 1; +} + +/* + * Get the size of the previous free block, which is stored in the last two + * bytes of the block. If the previous block is not free, then the result is + * undefined. + */ +static msize_t +get_tiny_previous_free_msize(const void *ptr) +{ + // check whether the previous block is in the tiny region and a block header + // if so, then the size of the previous block is one, and there is no stored + // size. + if (ptr != TINY_REGION_FOR_PTR(ptr)) + { + void *prev_block = (void *)((uintptr_t)ptr - TINY_QUANTUM); + unsigned char *prev_header = TINY_BLOCK_HEADER_FOR_PTR(prev_block); + msize_t prev_index = TINY_INDEX_FOR_PTR(prev_block); + if (BITARRAY_BIT(prev_header, prev_index)) + return 1; + return TINY_PREVIOUS_MSIZE(ptr); + } + // don't read possibly unmapped memory before the beginning of the region + return 0; +} static INLINE msize_t get_tiny_meta_header(const void *ptr, boolean_t *is_free) @@ -774,11 +873,11 @@ get_tiny_meta_header(const void *ptr, boolean_t *is_free) index &= 7; *is_free = 0; if (!BITMAP32_BIT(*block_header, index)) - return 0; + return 0; in_use = TINY_INUSE_FOR_HEADER(block_header); if (!BITMAP32_BIT(*in_use, index)) { - *is_free = 1; - return TINY_FREE_SIZE(ptr); + *is_free = 1; + return get_tiny_free_size(ptr); } uint32_t *addr = (uint32_t *)((uintptr_t)block_header & ~3); uint32_t word0 = OSReadLittleInt32(addr, 0) >> index; @@ -821,8 +920,8 @@ set_tiny_meta_header_in_use(const void *ptr, msize_t msize) block_header += byte_index; in_use += byte_index; index &= 7; end_bit = index + clr_msize; - BITARRAY_MCLR_LESS_32(block_header, index, end_bit); - BITARRAY_MCLR_LESS_32(in_use, index, end_bit); + bitarray_mclr(block_header, index, end_bit); + bitarray_mclr(in_use, index, end_bit); } BITARRAY_SET(block_header, index+clr_msize); // we set the block_header bit for the following block to reaffirm next block is a block #if DEBUG_MALLOC @@ -853,7 +952,6 @@ set_tiny_meta_header_middle(const void *ptr) BITARRAY_CLR(block_header, index); BITARRAY_CLR(in_use, index); - TINY_FREE_SIZE(ptr) = 0; } static INLINE void @@ -873,19 +971,26 @@ set_tiny_meta_header_free(const void *ptr, msize_t msize) malloc_printf("setting header for tiny free %p msize too large: %d\n", ptr, msize); } #endif - BITARRAY_SET(block_header, index); BITARRAY_CLR(in_use, index); - TINY_FREE_SIZE(ptr) = msize; - // mark the end of this block - if (msize) { // msize==0 => the whole region is free - void *follower = FOLLOWING_TINY_PTR(ptr, msize); - TINY_PREVIOUS_MSIZE(follower) = msize; + BITARRAY_SET(block_header, index); + BITARRAY_CLR(in_use, index); + // mark the end of this block if msize is > 1. For msize == 0, the whole + // region is free, so there is no following block. For msize == 1, there is + // no space to write the size on 64 bit systems. The size for 1 quantum + // blocks is computed from the metadata bitmaps. + if (msize > 1) { + void *follower = FOLLOWING_TINY_PTR(ptr, msize); + TINY_PREVIOUS_MSIZE(follower) = msize; + TINY_FREE_SIZE(ptr) = msize; + } + if (msize == 0) { + TINY_FREE_SIZE(ptr) = msize; } #if DEBUG_MALLOC boolean_t ff; msize_t mf = get_tiny_meta_header(ptr, &ff); if ((msize != mf) || !ff) { - malloc_printf("setting header for tiny free %p : %d\n", ptr, msize); - malloc_printf("reading header for tiny %p : %d %d\n", ptr, mf, ff); + malloc_printf("setting header for tiny free %p : %u\n", ptr, msize); + malloc_printf("reading header for tiny %p : %u %u\n", ptr, mf, ff); } #endif } @@ -893,8 +998,6 @@ set_tiny_meta_header_free(const void *ptr, msize_t msize) static INLINE boolean_t tiny_meta_header_is_free(const void *ptr) { - // returns msize and is_free shifted by 16 - // may return 0 for the msize component (meaning 65536) unsigned char *block_header; unsigned char *in_use; msize_t index; @@ -923,129 +1026,124 @@ tiny_previous_preceding_free(void *ptr, msize_t *prev_msize) index = TINY_INDEX_FOR_PTR(ptr); if (!index) - return NULL; - if ((previous_msize = TINY_PREVIOUS_MSIZE(ptr)) > index) - return NULL; + return NULL; + if ((previous_msize = get_tiny_previous_free_msize(ptr)) > index) + return NULL; previous_index = index - previous_msize; previous_ptr = (void *)(TINY_REGION_FOR_PTR(ptr) + TINY_BYTES_FOR_MSIZE(previous_index)); - if (TINY_FREE_SIZE(previous_ptr) != previous_msize) - return NULL; - if (!BITARRAY_BIT(block_header, previous_index)) - return NULL; + return NULL; if (BITARRAY_BIT(in_use, previous_index)) - return NULL; + return NULL; + if (get_tiny_free_size(previous_ptr) != previous_msize) + return NULL; // conservative check did match true check *prev_msize = previous_msize; return previous_ptr; } -static INLINE void +/* + * Adds an item to the proper free list, and also marks the meta-header of the + * block properly. + * Assumes szone has been locked + */ +static void tiny_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize) { - // Adds an item to the proper free list - // Also marks the meta-header of the block properly - // Assumes szone has been locked grain_t slot = (!msize || (msize >= NUM_TINY_SLOTS)) ? NUM_TINY_SLOTS - 1 : msize - 1; free_list_t *free_ptr = ptr; free_list_t *free_head = szone->tiny_free_list[slot]; #if DEBUG_MALLOC if (LOG(szone,ptr)) { - malloc_printf("in tiny_free_list_add_ptr(), ptr=%p, msize=%d\n", ptr, msize); + malloc_printf("in %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize); } - if (((unsigned)ptr) & (TINY_QUANTUM - 1)) { - szone_error(szone, "tiny_free_list_add_ptr: Unaligned ptr", ptr); + if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) { + szone_error(szone, "tiny_free_list_add_ptr: Unaligned ptr", ptr, NULL); } #endif set_tiny_meta_header_free(ptr, msize); if (free_head) { - free_list_checksum(szone, free_head, __PRETTY_FUNCTION__); + free_list_checksum(szone, free_head, __PRETTY_FUNCTION__); #if DEBUG_MALLOC - if (free_head->previous) { - malloc_printf("ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, free_head, free_head->previous); - szone_error(szone, "tiny_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr); - } - if (! tiny_meta_header_is_free(free_head)) { - malloc_printf("ptr=%p slot=%d free_head=%p\n", ptr, slot, free_head); - szone_error(szone, "tiny_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr); - } + if (free_list_unchecksum_ptr(free_head->previous)) { + szone_error(szone, "tiny_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr, + "ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, free_head, free_head->previous.p); + } + if (! tiny_meta_header_is_free(free_head)) { + szone_error(szone, "tiny_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr, + "ptr=%p slot=%d free_head=%p\n", ptr, slot, free_head); + } #endif - free_head->previous = free_ptr; - free_list_set_checksum(szone, free_head); + free_head->previous.u = free_list_checksum_ptr(free_ptr); } else { - BITMAP32_SET(szone->tiny_bitmap, slot); + BITMAP32_SET(szone->tiny_bitmap, slot); } - free_ptr->previous = NULL; - free_ptr->next = free_head; + free_ptr->previous.p = NULL; + free_ptr->next.p = free_head; free_list_set_checksum(szone, free_ptr); szone->tiny_free_list[slot] = free_ptr; } +/* + * Removes the item pointed to by ptr in the proper free list. + * Assumes szone has been locked + */ static INLINE void tiny_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize) { - // Removes item in the proper free list - // msize could be read, but all callers have it so we pass it in - // Assumes szone has been locked grain_t slot = (!msize || (msize >= NUM_TINY_SLOTS)) ? NUM_TINY_SLOTS - 1 : msize - 1; - free_list_t *free_ptr = ptr; - free_list_t *next = free_ptr->next; - free_list_t *previous = free_ptr->previous; + free_list_t *free_ptr = ptr, *next, *previous; + free_list_checksum(szone, free_ptr, __PRETTY_FUNCTION__); + + next = free_list_unchecksum_ptr(free_ptr->next); + previous = free_list_unchecksum_ptr(free_ptr->previous); #if DEBUG_MALLOC if (LOG(szone,ptr)) { - malloc_printf("In tiny_free_list_remove_ptr(), ptr=%p, msize=%d\n", ptr, msize); + malloc_printf("In %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize); } #endif - free_list_checksum(szone, free_ptr, __PRETTY_FUNCTION__); - if (!previous) { - // The block to remove is the head of the free list + if (!previous) { + // The block to remove is the head of the free list #if DEBUG_MALLOC - if (szone->tiny_free_list[slot] != ptr) { - malloc_printf("ptr=%p slot=%d msize=%d szone->tiny_free_list[slot]=%p\n", - ptr, slot, msize, szone->tiny_free_list[slot]); - szone_error(szone, "tiny_free_list_remove_ptr: Internal invariant broken (szone->tiny_free_list[slot])", ptr); - return; - } + if (szone->tiny_free_list[slot] != ptr) { + szone_error(szone, "tiny_free_list_remove_ptr: Internal invariant broken (szone->tiny_free_list[slot])", ptr, + "ptr=%p slot=%d msize=%d szone->tiny_free_list[slot]=%p\n", + ptr, slot, msize, szone->tiny_free_list[slot]); + return; + } #endif - szone->tiny_free_list[slot] = next; - if (!next) BITMAP32_CLR(szone->tiny_bitmap, slot); + szone->tiny_free_list[slot] = next; + if (!next) BITMAP32_CLR(szone->tiny_bitmap, slot); } else { - previous->next = next; - free_list_set_checksum(szone, previous); + // We know free_ptr is already checksummed, so we don't need to do it + // again. + previous->next = free_ptr->next; } if (next) { - next->previous = previous; - free_list_set_checksum(szone, next); + // We know free_ptr is already checksummed, so we don't need to do it + // again. + next->previous = free_ptr->previous; } } /* - * Find the tiny region containing (ptr) (if any). - * - * We take advantage of the knowledge that tiny regions are always (1 << TINY_BLOCKS_ALIGN) aligned. + * tiny_region_for_ptr_no_lock - Returns the tiny region containing the pointer, + * or NULL if not found. */ -static INLINE tiny_region_t * +static INLINE region_t * tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr) { - tiny_region_t *region; - tiny_region_t rbase; - int i; - - /* mask off irrelevant lower bits */ - rbase = TINY_REGION_FOR_PTR(ptr); - /* iterate over allocated regions - XXX not terribly efficient for large number of regions */ - for (i = szone->num_tiny_regions, region = szone->tiny_regions; i > 0; i--, region++) - if (rbase == *region) - return(region); - return(NULL); + return hash_lookup_region_no_lock(szone->tiny_regions, + szone->num_tiny_regions_allocated, + TINY_REGION_FOR_PTR(ptr)); } static INLINE void -tiny_free_no_lock(szone_t *szone, tiny_region_t *region, void *ptr, msize_t msize) +tiny_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize) { size_t original_size = TINY_BYTES_FOR_MSIZE(msize); void *next_block = ((char *)ptr + original_size); @@ -1061,63 +1159,71 @@ tiny_free_no_lock(szone_t *szone, tiny_region_t *region, void *ptr, msize_t msiz malloc_printf("in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize); } if (! msize) { - malloc_printf("in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize); - szone_error(szone, "trying to free tiny block that is too small", ptr); + szone_error(szone, "trying to free tiny block that is too small", ptr, + "in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize); } #endif // We try to coalesce this block with the preceeding one previous = tiny_previous_preceding_free(ptr, &previous_msize); if (previous) { #if DEBUG_MALLOC - if (LOG(szone, ptr) || LOG(szone,previous)) { - malloc_printf("in tiny_free_no_lock(), coalesced backwards for %p previous=%p\n", ptr, previous); - } + if (LOG(szone, ptr) || LOG(szone,previous)) { + malloc_printf("in tiny_free_no_lock(), coalesced backwards for %p previous=%p\n", ptr, previous); + } #endif - tiny_free_list_remove_ptr(szone, previous, previous_msize); - ptr = previous; - msize += previous_msize; + // clear the meta_header since this is no longer the start of a block + set_tiny_meta_header_middle(ptr); + tiny_free_list_remove_ptr(szone, previous, previous_msize); + ptr = previous; + msize += previous_msize; } // We try to coalesce with the next block if ((next_block < TINY_REGION_END(*region)) && tiny_meta_header_is_free(next_block)) { - // The next block is free, we coalesce - next_msize = TINY_FREE_SIZE(next_block); + next_msize = get_tiny_free_size(next_block); #if DEBUG_MALLOC - if (LOG(szone, ptr) || LOG(szone, next_block)) { - malloc_printf("in tiny_free_no_lock(), for ptr=%p, msize=%d coalesced forward=%p next_msize=%d\n", - ptr, msize, next_block, next_msize); - } + if (LOG(szone, ptr) || LOG(szone, next_block)) { + malloc_printf("in tiny_free_no_lock(), for ptr=%p, msize=%d coalesced forward=%p next_msize=%d\n", + ptr, msize, next_block, next_msize); + } #endif - if (next_msize >= NUM_TINY_SLOTS) { - // we take a short cut here to avoid removing next_block from the slot 31 freelist and then adding ptr back to slot 31 - msize += next_msize; - big_free_block = (free_list_t *)next_block; - after_next_block = big_free_block->next; - before_next_block = big_free_block->previous; - free_list_checksum(szone, big_free_block, __PRETTY_FUNCTION__); - if (!before_next_block) { - szone->tiny_free_list[NUM_TINY_SLOTS-1] = ptr; - } else { - before_next_block->next = ptr; - free_list_set_checksum(szone, before_next_block); - } - if (after_next_block) { - after_next_block->previous = ptr; - free_list_set_checksum(szone, after_next_block); - } - ((free_list_t *)ptr)->previous = before_next_block; - ((free_list_t *)ptr)->next = after_next_block; - free_list_set_checksum(szone, ptr); - set_tiny_meta_header_free(ptr, msize); - set_tiny_meta_header_middle(big_free_block); // clear the meta_header to enable coalescing backwards - goto tiny_free_ending; - } - tiny_free_list_remove_ptr(szone, next_block, next_msize); - set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards - msize += next_msize; - } + // If we are coalescing with the next block, and the next block is in + // the last slot of the free list, then we optimize this case here to + // avoid removing next_block from the slot 31 and then adding ptr back + // to slot 31. + if (next_msize >= NUM_TINY_SLOTS) { + msize += next_msize; + big_free_block = (free_list_t *)next_block; + free_list_checksum(szone, big_free_block, __PRETTY_FUNCTION__); + after_next_block = free_list_unchecksum_ptr(big_free_block->next); + before_next_block = free_list_unchecksum_ptr(big_free_block->previous); + if (!before_next_block) { + szone->tiny_free_list[NUM_TINY_SLOTS-1] = ptr; + } else { + before_next_block->next.u = free_list_checksum_ptr(ptr); + } + if (after_next_block) { + after_next_block->previous.u = free_list_checksum_ptr(ptr); + } + // we don't need to checksum these since they are already checksummed + ((free_list_t *)ptr)->previous = big_free_block->previous; + ((free_list_t *)ptr)->next = big_free_block->next; + + // clear the meta_header to enable coalescing backwards + set_tiny_meta_header_middle(big_free_block); + set_tiny_meta_header_free(ptr, msize); + goto tiny_free_ending; + } + tiny_free_list_remove_ptr(szone, next_block, next_msize); + set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards + msize += next_msize; + } +#if !TINY_CACHE + // The tiny cache already scribbles free blocks as they go through the + // cache, so we do not need to do it here. if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize) { memset(ptr, 0x55, TINY_BYTES_FOR_MSIZE(msize)); } +#endif tiny_free_list_add_ptr(szone, ptr, msize); tiny_free_ending: // When in proper debug mode we write on the memory to help debug memory smashers @@ -1125,42 +1231,68 @@ tiny_free_no_lock(szone_t *szone, tiny_region_t *region, void *ptr, msize_t msiz szone->num_bytes_in_tiny_objects -= original_size; // we use original_size and not msize to avoid double counting the coalesced blocks } +// Allocates from the last region or a freshly allocated region static void * tiny_malloc_from_region_no_lock(szone_t *szone, msize_t msize) { - tiny_region_t last_region, *new_regions; - void *last_block, *ptr, *aligned_address; - - // Allocates from the last region or a freshly allocated region - // Before anything we transform the tiny_bytes_free_at_end - if any - to a regular free block + void *last_block, *ptr, *aligned_address; + unsigned char *last_header; + msize_t last_msize, last_index; + + // Before anything we transform any remaining tiny_bytes_free_at_end into a + // regular free block. We take special care here to update the bitfield + // information, since we are bypassing the normal free codepath. If there + // is more than one quanta worth of memory in tiny_bytes_free_at_end, then + // there will be two block headers: + // 1) header for the free space at end, msize = 1 + // 2) header inserted by set_tiny_meta_header_in_use after block + // We must clear the second one so that when the free block's size is + // queried, we do not think the block is only 1 quantum in size because + // of the second set header bit. if (szone->tiny_bytes_free_at_end) { - last_region = szone->tiny_regions[szone->num_tiny_regions-1]; - last_block = TINY_REGION_END(last_region) - szone->tiny_bytes_free_at_end; - tiny_free_list_add_ptr(szone, last_block, TINY_MSIZE_FOR_BYTES(szone->tiny_bytes_free_at_end)); - szone->tiny_bytes_free_at_end = 0; + last_block = TINY_REGION_END(szone->last_tiny_region) - szone->tiny_bytes_free_at_end; + last_msize = TINY_MSIZE_FOR_BYTES(szone->tiny_bytes_free_at_end); + last_header = TINY_BLOCK_HEADER_FOR_PTR(last_block); + last_index = TINY_INDEX_FOR_PTR(last_block); + + if (last_index != (NUM_TINY_BLOCKS - 1)) + BITARRAY_CLR(last_header, last_index + 1); + + tiny_free_list_add_ptr(szone, last_block, last_msize); + szone->tiny_bytes_free_at_end = 0; } // time to create a new region - aligned_address = allocate_pages(szone, TINY_REGION_SIZE, TINY_BLOCKS_ALIGN, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_TINY)); - if (!aligned_address) { - // out of memory! - return NULL; - } + aligned_address = allocate_pages(szone, TINY_REGION_SIZE, TINY_BLOCKS_ALIGN, 0, VM_MEMORY_MALLOC_TINY); + if (!aligned_address) // out of memory! + return NULL; // We set the padding after block_header to be all 1 - ((uint32_t *)(aligned_address + (1 << TINY_BLOCKS_ALIGN) + (NUM_TINY_BLOCKS >> 3)))[0] = ~0; - if (szone->num_tiny_regions == INITIAL_NUM_TINY_REGIONS) { - // XXX logic here fails after initial reallocation of tiny regions is exhausted (approx 4GB of - // tiny allocations) - new_regions = small_malloc_from_region_no_lock(szone, 16); // 16 * 512 bytes is plenty of tiny regions (more than 4,000) - if (!new_regions) return NULL; - memcpy(new_regions, szone->tiny_regions, INITIAL_NUM_TINY_REGIONS * sizeof(tiny_region_t)); - szone->tiny_regions = new_regions; // we set the pointer after it's all ready to enable enumeration from another thread without locking - } - szone->tiny_regions[szone->num_tiny_regions] = aligned_address; - szone->num_tiny_regions ++; // we set the number after the pointer is all ready to enable enumeration from another thread without taking the lock + ((uint32_t *)(aligned_address + TINY_HEADER_START + (NUM_TINY_BLOCKS >> 3)))[0] = ~0; + + // Check to see if the hash ring of tiny regions needs to grow. Try to + // avoid the hash ring becoming too dense. + if (szone->num_tiny_regions_allocated < (2 * szone->num_tiny_regions)) { + region_t *new_regions; + size_t new_size; + new_regions = hash_regions_grow_no_lock(szone, szone->tiny_regions, + szone->num_tiny_regions_allocated, + &new_size); + // Do not deallocate the current tiny_regions allocation since someone may + // be iterating it. Instead, just leak it. + szone->tiny_regions = new_regions; + szone->num_tiny_regions_allocated = new_size; + } + // Insert the new region into the hash ring, and update malloc statistics + hash_region_insert_no_lock(szone->tiny_regions, + szone->num_tiny_regions_allocated, + aligned_address); + szone->last_tiny_region = aligned_address; + + szone->num_tiny_regions++; ptr = aligned_address; set_tiny_meta_header_in_use(ptr, msize); szone->num_tiny_objects++; szone->num_bytes_in_tiny_objects += TINY_BYTES_FOR_MSIZE(msize); + // We put a header on the last block so that it appears in use (for coalescing, etc...) set_tiny_meta_header_in_use(ptr + TINY_BYTES_FOR_MSIZE(msize), 1); szone->tiny_bytes_free_at_end = TINY_BYTES_FOR_MSIZE(NUM_TINY_BLOCKS - msize); @@ -1198,7 +1330,7 @@ try_realloc_tiny_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new SZONE_UNLOCK(szone); return 0; // next_block is in use; } - next_msize = TINY_FREE_SIZE(next_block); + next_msize = get_tiny_free_size(next_block); if (old_size + TINY_MSIZE_FOR_BYTES(next_msize) < new_size) { SZONE_UNLOCK(szone); return 0; // even with next block, not enough @@ -1224,24 +1356,25 @@ try_realloc_tiny_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new } static boolean_t -tiny_check_region(szone_t *szone, tiny_region_t *region) +tiny_check_region(szone_t *szone, region_t region) { - uintptr_t start, ptr, region_end, follower; - boolean_t prev_free = 0; - boolean_t is_free; - msize_t msize; - free_list_t *free_head; + uintptr_t start, ptr, region_end; + boolean_t prev_free = 0; + boolean_t is_free; + msize_t msize; + free_list_t *free_head; + void *follower, *previous, *next; /* establish region limits */ - start = (uintptr_t)TINY_REGION_ADDRESS(*region); + start = (uintptr_t)TINY_REGION_ADDRESS(region); ptr = start; - region_end = (uintptr_t)TINY_REGION_END(*region); + region_end = (uintptr_t)TINY_REGION_END(region); /* * The last region may have a trailing chunk which has not been converted into inuse/freelist * blocks yet. */ - if (region == szone->tiny_regions + szone->num_tiny_regions - 1) + if (region == szone->last_tiny_region) region_end -= szone->tiny_bytes_free_at_end; @@ -1296,28 +1429,30 @@ tiny_check_region(szone_t *szone, tiny_region_t *region) */ free_head = (free_list_t *)ptr; free_list_checksum(szone, free_head, __PRETTY_FUNCTION__); - if (free_head->previous && !tiny_meta_header_is_free(free_head->previous)) { + previous = free_list_unchecksum_ptr(free_head->previous); + next = free_list_unchecksum_ptr(free_head->next); + if (previous && !tiny_meta_header_is_free(previous)) { malloc_printf("*** invariant broken for %p (previous %p is not a free pointer)\n", - ptr, free_head->previous); + ptr, previous); return 0; } - if (free_head->next && !tiny_meta_header_is_free(free_head->next)) { + if (next && !tiny_meta_header_is_free(next)) { malloc_printf("*** invariant broken for %p (next in free list %p is not a free pointer)\n", - ptr, free_head->next); + ptr, next); return 0; } /* * Check the free block's trailing size value. */ - follower = (uintptr_t)FOLLOWING_TINY_PTR(ptr, msize); - if ((follower != region_end) && (TINY_PREVIOUS_MSIZE(follower) != msize)) { + follower = FOLLOWING_TINY_PTR(ptr, msize); + if (((uintptr_t)follower != region_end) && (get_tiny_previous_free_msize(follower) != msize)) { malloc_printf("*** invariant broken for tiny free %p followed by %p in region [%p-%p] " "(end marker incorrect) should be %d; in fact %d\n", - ptr, follower, TINY_REGION_ADDRESS(*region), region_end, msize, TINY_PREVIOUS_MSIZE(follower)); + ptr, follower, TINY_REGION_ADDRESS(region), region_end, msize, get_tiny_previous_free_msize(follower)); return 0; } /* move to next block */ - ptr = follower; + ptr = (uintptr_t)follower; } } /* @@ -1330,7 +1465,7 @@ tiny_check_region(szone_t *szone, tiny_region_t *region) /* * Check the trailing block's integrity. */ - if (region == szone->tiny_regions + szone->num_tiny_regions - 1) { + if (region == szone->last_tiny_region) { if (szone->tiny_bytes_free_at_end) { msize = get_tiny_meta_header((void *)ptr, &is_free); if (is_free || (msize != 1)) { @@ -1342,215 +1477,254 @@ tiny_check_region(szone_t *szone, tiny_region_t *region) } static kern_return_t -tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t region_address, unsigned short num_regions, size_t tiny_bytes_free_at_end, memory_reader_t reader, vm_range_recorder_t recorder) +tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder) { - tiny_region_t *regions; - unsigned index = 0; - vm_range_t buffer[MAX_RECORDER_BUFFER]; - unsigned count = 0; - kern_return_t err; - tiny_region_t region; - vm_range_t range; - vm_range_t admin_range; - vm_range_t ptr_range; - unsigned char *mapped_region; - unsigned char *block_header; - unsigned char *in_use; - unsigned block_index; - unsigned block_limit; - boolean_t is_free; - msize_t msize; - void *mapped_ptr; - unsigned bit; - - err = reader(task, region_address, sizeof(tiny_region_t) * num_regions, (void **)®ions); - if (err) return err; - while (index < num_regions) { - // unsigned num_in_use = 0; - // unsigned num_free = 0; - region = regions[index]; - range.address = (vm_address_t)TINY_REGION_ADDRESS(region); - range.size = (vm_size_t)TINY_REGION_SIZE; - if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) { - admin_range.address = range.address + (1 << TINY_BLOCKS_ALIGN); - admin_range.size = range.size - (1 << TINY_BLOCKS_ALIGN); - recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1); - } - if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) { - ptr_range.address = range.address; - ptr_range.size = 1 << TINY_BLOCKS_ALIGN; - recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1); - } - if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) { - err = reader(task, range.address, range.size, (void **)&mapped_region); - if (err) - return err; - block_header = (unsigned char *)(mapped_region + (1 << TINY_BLOCKS_ALIGN)); - in_use = block_header + (NUM_TINY_BLOCKS >> 3) + 4; - block_index = 0; - block_limit = NUM_TINY_BLOCKS; - if (index == num_regions - 1) - block_limit -= TINY_MSIZE_FOR_BYTES(tiny_bytes_free_at_end); - while (block_index < block_limit) { - is_free = !BITARRAY_BIT(in_use, block_index); - if (is_free) { - mapped_ptr = mapped_region + TINY_BYTES_FOR_MSIZE(block_index); - msize = TINY_FREE_SIZE(mapped_ptr); - if (!msize) - break; - } else { - msize = 1; - bit = block_index + 1; - while (! BITARRAY_BIT(block_header, bit)) { - bit++; - msize ++; - } - buffer[count].address = range.address + TINY_BYTES_FOR_MSIZE(block_index); - buffer[count].size = TINY_BYTES_FOR_MSIZE(msize); - count++; - if (count >= MAX_RECORDER_BUFFER) { - recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); - count = 0; - } - } - block_index += msize; - } - } - index++; - } - if (count) { - recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); - } - return 0; + size_t num_regions = szone->num_tiny_regions_allocated; + void *last_tiny_free = szone->last_tiny_free; + size_t index; + region_t *regions; + vm_range_t buffer[MAX_RECORDER_BUFFER]; + unsigned count = 0; + kern_return_t err; + region_t region; + vm_range_t range; + vm_range_t admin_range; + vm_range_t ptr_range; + unsigned char *mapped_region; + unsigned char *block_header; + unsigned char *in_use; + unsigned block_index; + unsigned block_limit; + boolean_t is_free; + msize_t msize; + void *mapped_ptr; + unsigned bit; + vm_address_t last_tiny_free_ptr = 0; + msize_t last_tiny_free_msize = 0; + + if (last_tiny_free) { + last_tiny_free_ptr = (uintptr_t) last_tiny_free & ~(TINY_QUANTUM - 1); + last_tiny_free_msize = (uintptr_t) last_tiny_free & (TINY_QUANTUM - 1); + } + + err = reader(task, (vm_address_t)szone->tiny_regions, sizeof(region_t) * num_regions, (void **)®ions); + if (err) return err; + for (index = 0; index < num_regions; ++index) { + region = regions[index]; + if (region) { + range.address = (vm_address_t)TINY_REGION_ADDRESS(region); + range.size = (vm_size_t)TINY_REGION_SIZE; + if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) { + admin_range.address = range.address + TINY_HEADER_START; + admin_range.size = TINY_HEADER_SIZE; + recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1); + } + if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) { + ptr_range.address = range.address; + ptr_range.size = NUM_TINY_BLOCKS * TINY_QUANTUM; + recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1); + } + if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) { + err = reader(task, range.address, range.size, (void **)&mapped_region); + if (err) + return err; + + block_header = (unsigned char *)(mapped_region + TINY_HEADER_START); + in_use = TINY_INUSE_FOR_HEADER(block_header); + block_index = 0; + block_limit = NUM_TINY_BLOCKS; + if (region == szone->last_tiny_region) + block_limit -= TINY_MSIZE_FOR_BYTES(szone->tiny_bytes_free_at_end); + + while (block_index < block_limit) { + vm_size_t block_offset = TINY_BYTES_FOR_MSIZE(block_index); + is_free = !BITARRAY_BIT(in_use, block_index); + if (is_free) { + mapped_ptr = mapped_region + block_offset; + + // mapped_region, the address at which 'range' in 'task' has been + // mapped into our process, is not necessarily aligned to + // TINY_BLOCKS_ALIGN. + // + // Since the code in get_tiny_free_size() assumes the pointer came + // from a properly aligned tiny region, and mapped_region is not + // necessarily aligned, then do the size calculation directly. + // If the next bit is set in the header bitmap, then the size is one + // quantum. Otherwise, read the size field. + if (!BITARRAY_BIT(block_header, block_index+1)) + msize = TINY_FREE_SIZE(mapped_ptr); + else + msize = 1; + + if (!msize) + break; + } else if (range.address + block_offset != last_tiny_free_ptr) { + msize = 1; + bit = block_index + 1; + while (! BITARRAY_BIT(block_header, bit)) { + bit++; + msize ++; + } + buffer[count].address = range.address + block_offset; + buffer[count].size = TINY_BYTES_FOR_MSIZE(msize); + count++; + if (count >= MAX_RECORDER_BUFFER) { + recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); + count = 0; + } + } else { + // Block is not free but it matches last_tiny_free_ptr so even + // though it is not marked free in the bitmap, we treat it as if + // it is and move on + msize = last_tiny_free_msize; + } + block_index += msize; + } + } + } + } + if (count) { + recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); + } + return 0; } -static INLINE void * +static void * tiny_malloc_from_free_list(szone_t *szone, msize_t msize) { // Assumes we've locked the region free_list_t *ptr; - msize_t this_msize; - grain_t slot = msize - 1; + msize_t this_msize; + grain_t slot = msize - 1; free_list_t **free_list = szone->tiny_free_list; free_list_t **the_slot = free_list + slot; free_list_t *next; free_list_t **limit; unsigned bitmap; - msize_t leftover_msize; + msize_t leftover_msize; free_list_t *leftover_ptr; - /* - * Look for an exact match by checking the freelist for this msize. - */ + // Assumes locked + CHECK_LOCKED(szone, __PRETTY_FUNCTION__); + + // Look for an exact match by checking the freelist for this msize. + // ptr = *the_slot; if (ptr) { - next = ptr->next; - if (next) { - next->previous = NULL; - free_list_set_checksum(szone, next); - } - *the_slot = next; - this_msize = msize; + next = free_list_unchecksum_ptr(ptr->next); + if (next) { + next->previous = ptr->previous; + } else { + BITMAP32_CLR(szone->tiny_bitmap, slot); + } + *the_slot = next; + this_msize = msize; #if DEBUG_MALLOC - if (LOG(szone, ptr)) { - malloc_printf("in tiny_malloc_from_free_list(), exact match ptr=%p, this_msize=%d\n", ptr, this_msize); - } + if (LOG(szone, ptr)) { + malloc_printf("in tiny_malloc_from_free_list(), exact match ptr=%p, this_msize=%d\n", ptr, this_msize); + } #endif - goto return_tiny_alloc; + goto return_tiny_alloc; } - /* - * Iterate over freelists for larger blocks looking for the next-largest block. - */ + // Mask off the bits representing slots holding free blocks smaller than the + // size we need. If there are no larger free blocks, try allocating from + // the free space at the end of the tiny region. bitmap = szone->tiny_bitmap & ~ ((1 << slot) - 1); if (!bitmap) - goto try_tiny_malloc_from_end; - slot = BITMAP32_FFS(bitmap) - 1; + goto try_tiny_malloc_from_end; + + slot = BITMAP32_CTZ(bitmap); limit = free_list + NUM_TINY_SLOTS - 1; free_list += slot; + + // Iterate over freelists looking for free blocks, starting at first list + // which is not empty, and contains blocks which are large enough to satisfy + // our request. while (free_list < limit) { - // try bigger grains - ptr = *free_list; - if (ptr) { - next = ptr->next; - if (next) { - next->previous = NULL; - free_list_set_checksum(szone, next); - } - *free_list = next; - this_msize = TINY_FREE_SIZE(ptr); -#if DEBUG_MALLOC - if (LOG(szone, ptr)) { - malloc_printf("in tiny_malloc_from_free_list(), bigger grain ptr=%p, msize=%d this_msize=%d\n", ptr, msize, this_msize); - } -#endif - goto add_leftover_and_proceed; - } - free_list++; - } - // we are now looking at the last slot (31) + ptr = *free_list; + if (ptr) { + next = free_list_unchecksum_ptr(ptr->next); + *free_list = next; + this_msize = get_tiny_free_size(ptr); + if (next) { + next->previous = ptr->previous; + } else { + BITMAP32_CLR(szone->tiny_bitmap, this_msize - 1); + } + goto add_leftover_and_proceed; + } + free_list++; + } + + // We are now looking at the last slot, which contains blocks equal to, or + // due to coalescing of free blocks, larger than 31 * tiny quantum size. + // If the last freelist is not empty, and the head contains a block that is + // larger than our request, then the remainder is put back on the free list. ptr = *limit; if (ptr) { - this_msize = TINY_FREE_SIZE(ptr); - next = ptr->next; - if (this_msize - msize >= NUM_TINY_SLOTS) { - // the leftover will go back to the free list, so we optimize by modifying the free list rather than removing the head and then adding back - leftover_msize = this_msize - msize; - leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize)); - *limit = leftover_ptr; - if (next) { - next->previous = leftover_ptr; - free_list_set_checksum(szone, next); - } - leftover_ptr->next = next; - leftover_ptr->previous = NULL; - free_list_set_checksum(szone, leftover_ptr); - set_tiny_meta_header_free(leftover_ptr, leftover_msize); + free_list_checksum(szone, ptr, __PRETTY_FUNCTION__); + this_msize = get_tiny_free_size(ptr); + next = free_list_unchecksum_ptr(ptr->next); + if (this_msize - msize >= NUM_TINY_SLOTS) { + // the leftover will go back to the free list, so we optimize by + // modifying the free list rather than a pop and push of the head + leftover_msize = this_msize - msize; + leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize)); + *limit = leftover_ptr; + if (next) { + next->previous.u = free_list_checksum_ptr(leftover_ptr); + } + leftover_ptr->previous = ptr->previous; + leftover_ptr->next = ptr->next; + set_tiny_meta_header_free(leftover_ptr, leftover_msize); #if DEBUG_MALLOC - if (LOG(szone,ptr)) { - malloc_printf("in tiny_malloc_from_free_list(), last slot ptr=%p, msize=%d this_msize=%d\n", ptr, msize, this_msize); - } + if (LOG(szone,ptr)) { + malloc_printf("in tiny_malloc_from_free_list(), last slot ptr=%p, msize=%d this_msize=%d\n", ptr, msize, this_msize); + } #endif - this_msize = msize; - goto return_tiny_alloc; - } - *limit = next; - if (next) { - next->previous = NULL; - free_list_set_checksum(szone, next); - } - goto add_leftover_and_proceed; + this_msize = msize; + goto return_tiny_alloc; + } + if (next) { + next->previous = ptr->previous; + } + *limit = next; + goto add_leftover_and_proceed; } + try_tiny_malloc_from_end: // Let's see if we can use szone->tiny_bytes_free_at_end if (szone->tiny_bytes_free_at_end >= TINY_BYTES_FOR_MSIZE(msize)) { - ptr = (free_list_t *)(TINY_REGION_END(szone->tiny_regions[szone->num_tiny_regions-1]) - szone->tiny_bytes_free_at_end); - szone->tiny_bytes_free_at_end -= TINY_BYTES_FOR_MSIZE(msize); - if (szone->tiny_bytes_free_at_end) { - // let's add an in use block after ptr to serve as boundary - set_tiny_meta_header_in_use((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize), 1); - } - this_msize = msize; + ptr = (free_list_t *)(TINY_REGION_END(szone->last_tiny_region) - szone->tiny_bytes_free_at_end); + szone->tiny_bytes_free_at_end -= TINY_BYTES_FOR_MSIZE(msize); + if (szone->tiny_bytes_free_at_end) { + // let's add an in use block after ptr to serve as boundary + set_tiny_meta_header_in_use((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize), 1); + } + this_msize = msize; #if DEBUG_MALLOC - if (LOG(szone, ptr)) { - malloc_printf("in tiny_malloc_from_free_list(), from end ptr=%p, msize=%d\n", ptr, msize); - } + if (LOG(szone, ptr)) { + malloc_printf("in tiny_malloc_from_free_list(), from end ptr=%p, msize=%d\n", ptr, msize); + } #endif - goto return_tiny_alloc; + goto return_tiny_alloc; } return NULL; + add_leftover_and_proceed: if (!this_msize || (this_msize > msize)) { - leftover_msize = this_msize - msize; - leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize)); + leftover_msize = this_msize - msize; + leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize)); #if DEBUG_MALLOC - if (LOG(szone,ptr)) { - malloc_printf("in tiny_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize); - } + if (LOG(szone,ptr)) { + malloc_printf("in tiny_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize); + } #endif - tiny_free_list_add_ptr(szone, leftover_ptr, leftover_msize); - this_msize = msize; + tiny_free_list_add_ptr(szone, leftover_ptr, leftover_msize); + this_msize = msize; } + return_tiny_alloc: szone->num_tiny_objects++; szone->num_bytes_in_tiny_objects += TINY_BYTES_FOR_MSIZE(this_msize); @@ -1571,7 +1745,7 @@ tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_reques #if DEBUG_MALLOC if (!msize) { - szone_error(szone, "invariant broken (!msize) in allocation (region)", NULL); + szone_error(szone, "invariant broken (!msize) in allocation (region)", NULL, NULL); return(NULL); } #endif @@ -1616,7 +1790,7 @@ tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_reques } static INLINE void -free_tiny(szone_t *szone, void *ptr, tiny_region_t *tiny_region) +free_tiny(szone_t *szone, void *ptr, region_t *tiny_region) { msize_t msize; boolean_t is_free; @@ -1630,13 +1804,13 @@ free_tiny(szone_t *szone, void *ptr, tiny_region_t *tiny_region) ptr2 = szone->last_tiny_free; /* check that we don't already have this pointer in the cache */ if (ptr == (void *)((uintptr_t)ptr2 & ~ (TINY_QUANTUM - 1))) { - szone_error(szone, "double free", ptr); + szone_error(szone, "double free", ptr, NULL); return; } #endif /* TINY_CACHE */ msize = get_tiny_meta_header(ptr, &is_free); if (is_free) { - szone_error(szone, "double free", ptr); + szone_error(szone, "double free", ptr, NULL); return; } #if DEBUG_MALLOC @@ -1647,7 +1821,9 @@ free_tiny(szone_t *szone, void *ptr, tiny_region_t *tiny_region) #endif #if TINY_CACHE if (msize < TINY_QUANTUM) { // to see if the bits fit in the last 4 bits - szone->last_tiny_free = (void *)(((uintptr_t)ptr) | msize); + if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize) + memset(ptr, 0x55, TINY_BYTES_FOR_MSIZE(msize)); + szone->last_tiny_free = (void *)(((uintptr_t)ptr) | msize); if (!ptr2) { SZONE_UNLOCK(szone); CHECK(szone, __PRETTY_FUNCTION__); @@ -1657,7 +1833,7 @@ free_tiny(szone_t *szone, void *ptr, tiny_region_t *tiny_region) ptr = (void *)(((uintptr_t)ptr2) & ~(TINY_QUANTUM - 1)); tiny_region = tiny_region_for_ptr_no_lock(szone, ptr); if (!tiny_region) { - szone_error(szone, "double free (tiny cache)", ptr); + szone_error(szone, "double free (tiny cache)", ptr, NULL); } } #endif @@ -1671,19 +1847,24 @@ print_tiny_free_list(szone_t *szone) { grain_t slot = 0; free_list_t *ptr; - - malloc_printf("tiny free sizes: "); - while (slot < NUM_TINY_SLOTS) { - ptr = szone->tiny_free_list[slot]; - if (ptr) { - malloc_printf("%s%y[%d]; ", (slot == NUM_TINY_SLOTS-1) ? ">=" : "", (slot+1)*TINY_QUANTUM, free_list_count(ptr)); + _SIMPLE_STRING b = _simple_salloc(); + + if (b) { + _simple_sappend(b, "tiny free sizes: "); + while (slot < NUM_TINY_SLOTS) { + ptr = szone->tiny_free_list[slot]; + if (ptr) { + _simple_sprintf(b, "%s%y[%d]; ", (slot == NUM_TINY_SLOTS-1) ? ">=" : "", (slot+1)*TINY_QUANTUM, free_list_count(ptr)); + } + slot++; } - slot++; + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b)); + _simple_sfree(b); } } static void -print_tiny_region(boolean_t verbose, tiny_region_t region, size_t bytes_at_end) +print_tiny_region(boolean_t verbose, region_t region, size_t bytes_at_end) { unsigned counts[1024]; unsigned in_use = 0; @@ -1693,6 +1874,7 @@ print_tiny_region(boolean_t verbose, tiny_region_t region, size_t bytes_at_end) boolean_t is_free; msize_t msize; unsigned ci; + _SIMPLE_STRING b; memset(counts, 0, 1024 * sizeof(unsigned)); while (current < limit) { @@ -1715,16 +1897,19 @@ print_tiny_region(boolean_t verbose, tiny_region_t region, size_t bytes_at_end) } current += TINY_BYTES_FOR_MSIZE(msize); } - malloc_printf("Tiny region [%p-%p, %y]\t", (void *)start, TINY_REGION_END(region), (int)TINY_REGION_SIZE); - malloc_printf("In_use=%d ", in_use); - if (bytes_at_end) malloc_printf("untouched=%y ", bytes_at_end); - if (verbose && in_use) { - malloc_printf("\tSizes in use: "); - for (ci = 0; ci < 1024; ci++) - if (counts[ci]) - malloc_printf("%d[%d]", TINY_BYTES_FOR_MSIZE(ci), counts[ci]); + if ((b = _simple_salloc()) != NULL) { + _simple_sprintf(b, "Tiny region [%p-%p, %y]\t", (void *)start, TINY_REGION_END(region), (int)TINY_REGION_SIZE); + _simple_sprintf(b, "In_use=%d ", in_use); + if (bytes_at_end) _simple_sprintf(b, "untouched=%ly", bytes_at_end); + if (verbose && in_use) { + _simple_sappend(b, "\n\tSizes in use: "); + for (ci = 0; ci < 1024; ci++) + if (counts[ci]) + _simple_sprintf(b, "%d[%d]", TINY_BYTES_FOR_MSIZE(ci), counts[ci]); + } + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b)); + _simple_sfree(b); } - malloc_printf("\n"); } static boolean_t @@ -1737,27 +1922,27 @@ tiny_free_list_check(szone_t *szone, grain_t slot) CHECK_LOCKED(szone, __PRETTY_FUNCTION__); while (ptr) { - free_list_checksum(szone, ptr, __PRETTY_FUNCTION__); - is_free = tiny_meta_header_is_free(ptr); - if (! is_free) { - malloc_printf("*** in-use ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr); - return 0; - } - if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) { - malloc_printf("*** inaligned ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr); - return 0; - } - if (!tiny_region_for_ptr_no_lock(szone, ptr)) { - malloc_printf("*** itr not in szone slot=%d count=%d ptr=%p\n", slot, count, ptr); - return 0; - } - if (ptr->previous != previous) { - malloc_printf("*** irevious incorrectly set slot=%d count=%d ptr=%p\n", slot, count, ptr); - return 0; - } - previous = ptr; - ptr = ptr->next; - count++; + free_list_checksum(szone, ptr, __PRETTY_FUNCTION__); + is_free = tiny_meta_header_is_free(ptr); + if (! is_free) { + malloc_printf("*** in-use ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr); + return 0; + } + if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) { + malloc_printf("*** unaligned ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr); + return 0; + } + if (!tiny_region_for_ptr_no_lock(szone, ptr)) { + malloc_printf("*** ptr not in szone slot=%d count=%d ptr=%p\n", slot, count, ptr); + return 0; + } + if (free_list_unchecksum_ptr(ptr->previous) != previous) { + malloc_printf("*** previous incorrectly set slot=%d count=%d ptr=%p\n", slot, count, ptr); + return 0; + } + previous = ptr; + ptr = free_list_unchecksum_ptr(ptr->next); + count++; } return 1; } @@ -1771,7 +1956,6 @@ tiny_free_list_check(szone_t *szone, grain_t slot) static INLINE void small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msize) { - meta_headers[index] = msize | SMALL_IS_FREE; } @@ -1782,7 +1966,6 @@ small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msi static INLINE void small_meta_header_set_in_use(msize_t *meta_headers, msize_t index, msize_t msize) { - meta_headers[index] = msize; } @@ -1792,7 +1975,6 @@ small_meta_header_set_in_use(msize_t *meta_headers, msize_t index, msize_t msize static INLINE void small_meta_header_set_middle(msize_t *meta_headers, msize_t index) { - meta_headers[index] = 0; } @@ -1802,42 +1984,39 @@ small_meta_header_set_middle(msize_t *meta_headers, msize_t index) static void small_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize) { - grain_t grain = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1; + grain_t slot = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1; free_list_t *free_ptr = ptr; - free_list_t *free_head = szone->small_free_list[grain]; + free_list_t *free_head = szone->small_free_list[slot]; void *follower; - CHECK_LOCKED(szone, __PRETTY_FUNCTION__); #if DEBUG_MALLOC if (LOG(szone,ptr)) { - malloc_printf("in small_free_list_add_ptr(), ptr=%p, msize=%d\n", ptr, msize); + malloc_printf("in %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize); } if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) { - szone_error(szone, "small_free_list_add_ptr: Unaligned ptr", ptr); + szone_error(szone, "small_free_list_add_ptr: Unaligned ptr", ptr, NULL); } #endif if (free_head) { - free_list_checksum(szone, free_head, __PRETTY_FUNCTION__); + free_list_checksum(szone, free_head, __PRETTY_FUNCTION__); #if DEBUG_MALLOC - if (free_head->previous) { - malloc_printf("ptr=%p grain=%d free_head=%p previous=%p\n", - ptr, grain, free_head, free_head->previous); - szone_error(szone, "small_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr); - } - if (!SMALL_PTR_IS_FREE(free_head)) { - malloc_printf("ptr=%p grain=%d free_head=%p\n", ptr, grain, free_head); - szone_error(szone, "small_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr); - } + if (free_list_unchecksum_ptr(free_head->previous)) { + szone_error(szone, "small_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr, + "ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, free_head, free_head->previous.p); + } + if (!SMALL_PTR_IS_FREE(free_head)) { + szone_error(szone, "small_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr, + "ptr=%p slot=%d free_head=%p\n", ptr, slot, free_head); + } #endif - free_head->previous = free_ptr; - free_list_set_checksum(szone, free_head); + free_head->previous.u = free_list_checksum_ptr(free_ptr); } else { - BITMAP32_SET(szone->small_bitmap, grain); + BITMAP32_SET(szone->small_bitmap, slot); } - free_ptr->previous = NULL; - free_ptr->next = free_head; + free_ptr->previous.p = NULL; + free_ptr->next.p = free_head; free_list_set_checksum(szone, free_ptr); - szone->small_free_list[grain] = free_ptr; + szone->small_free_list[slot] = free_ptr; follower = ptr + SMALL_BYTES_FOR_MSIZE(msize); SMALL_PREVIOUS_MSIZE(follower) = msize; } @@ -1848,58 +2027,52 @@ small_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize) static void small_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize) { - grain_t grain = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1; - free_list_t *free_ptr = ptr; - free_list_t *next = free_ptr->next; - free_list_t *previous = free_ptr->previous; + grain_t slot = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1; + free_list_t *free_ptr = ptr, *next, *previous; + free_list_checksum(szone, free_ptr, __PRETTY_FUNCTION__); + + next = free_list_unchecksum_ptr(free_ptr->next); + previous = free_list_unchecksum_ptr(free_ptr->previous); - CHECK_LOCKED(szone, __PRETTY_FUNCTION__); #if DEBUG_MALLOC if (LOG(szone,ptr)) { - malloc_printf("in small_free_list_remove_ptr(), ptr=%p, msize=%d\n", ptr, msize); + malloc_printf("In %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize); } #endif - free_list_checksum(szone, free_ptr, __PRETTY_FUNCTION__); - if (!previous) { + if (!previous) { + // The block to remove is the head of the free list #if DEBUG_MALLOC - if (szone->small_free_list[grain] != ptr) { - malloc_printf("ptr=%p grain=%d msize=%d szone->small_free_list[grain]=%p\n", - ptr, grain, msize, szone->small_free_list[grain]); - szone_error(szone, "small_free_list_remove_ptr: Internal invariant broken (szone->small_free_list[grain])", ptr); - return; - } + if (szone->small_free_list[slot] != ptr) { + szone_error(szone, "small_free_list_remove_ptr: Internal invariant broken (szone->small_free_list[grain])", ptr, + "ptr=%p slot=%d msize=%d szone->small_free_list[slot]=%p\n", + ptr, slot, msize, szone->small_free_list[slot]); + return; + } #endif - szone->small_free_list[grain] = next; - if (!next) BITMAP32_CLR(szone->small_bitmap, grain); + szone->small_free_list[slot] = next; + if (!next) BITMAP32_CLR(szone->small_bitmap, slot); } else { - previous->next = next; - free_list_set_checksum(szone, previous); + // We know free_ptr is already checksummed, so we don't need to do it + // again. + previous->next = free_ptr->next; } if (next) { - next->previous = previous; - free_list_set_checksum(szone, next); + // We know free_ptr is already checksummed, so we don't need to do it + // again. + next->previous = free_ptr->previous; } } -static INLINE small_region_t * +static INLINE region_t * small_region_for_ptr_no_lock(szone_t *szone, const void *ptr) { - small_region_t *region; - small_region_t rbase; - int i; - - /* find assumed heap/region base */ - rbase = SMALL_REGION_FOR_PTR(ptr); - - /* scan existing regions for a match */ - for (i = szone->num_small_regions, region = szone->small_regions; i > 0; i--, region++) - if (rbase == *region) - return(region); - return(NULL); + return hash_lookup_region_no_lock(szone->small_regions, + szone->num_small_regions_allocated, + SMALL_REGION_FOR_PTR(ptr)); } static INLINE void -small_free_no_lock(szone_t *szone, small_region_t *region, void *ptr, msize_t msize) +small_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize) { msize_t *meta_headers = SMALL_META_HEADER_FOR_PTR(ptr); unsigned index = SMALL_META_INDEX_FOR_PTR(ptr); @@ -1916,8 +2089,8 @@ small_free_no_lock(szone_t *szone, small_region_t *region, void *ptr, msize_t ms malloc_printf("in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize); } if (! msize) { - malloc_printf("in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize); - szone_error(szone, "trying to free small block that is too small", ptr); + szone_error(szone, "trying to free small block that is too small", ptr, + "in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize); } #endif // We try to coalesce this block with the preceeding one @@ -1951,7 +2124,7 @@ small_free_no_lock(szone_t *szone, small_region_t *region, void *ptr, msize_t ms } if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) { if (!msize) { - szone_error(szone, "incorrect size information - block header was damaged", ptr); + szone_error(szone, "incorrect size information - block header was damaged", ptr, NULL); } else { memset(ptr, 0x55, SMALL_BYTES_FOR_MSIZE(msize)); } @@ -1965,55 +2138,57 @@ small_free_no_lock(szone_t *szone, small_region_t *region, void *ptr, msize_t ms static void * small_malloc_from_region_no_lock(szone_t *szone, msize_t msize) { - small_region_t last_region; void *last_block; void *ptr; void *new_address; msize_t *meta_headers; msize_t index ; - size_t region_capacity; - msize_t new_msize; - small_region_t *new_regions; msize_t msize_left; // Allocates from the last region or a freshly allocated region CHECK_LOCKED(szone, __PRETTY_FUNCTION__); // Before anything we transform the small_bytes_free_at_end - if any - to a regular free block if (szone->small_bytes_free_at_end) { - last_region = szone->small_regions[szone->num_small_regions - 1]; - last_block = (void *)(SMALL_REGION_END(last_region) - szone->small_bytes_free_at_end); - small_free_list_add_ptr(szone, last_block, SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end)); - *SMALL_METADATA_FOR_PTR(last_block) = SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end) | SMALL_IS_FREE; - szone->small_bytes_free_at_end = 0; + last_block = (void *)(SMALL_REGION_END(szone->last_small_region) - szone->small_bytes_free_at_end); + small_free_list_add_ptr(szone, last_block, SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end)); + *SMALL_METADATA_FOR_PTR(last_block) = SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end) | SMALL_IS_FREE; + szone->small_bytes_free_at_end = 0; } // time to create a new region - new_address = allocate_pages(szone, SMALL_REGION_SIZE, SMALL_BLOCKS_ALIGN, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_SMALL)); - if (!new_address) { - // out of memory! - return NULL; - } + new_address = allocate_pages(szone, SMALL_REGION_SIZE, SMALL_BLOCKS_ALIGN, + 0, VM_MEMORY_MALLOC_SMALL); + if (!new_address) + return NULL; + ptr = new_address; meta_headers = SMALL_META_HEADER_FOR_PTR(ptr); index = 0; - if (szone->num_small_regions == INITIAL_NUM_SMALL_REGIONS) { - // time to grow the number of regions - region_capacity = (1 << (32 - SMALL_BLOCKS_ALIGN)) - 20; // that is for sure the maximum number of small regions we can have - new_msize = (region_capacity * sizeof(small_region_t) + SMALL_QUANTUM - 1) / SMALL_QUANTUM; - new_regions = ptr; - small_meta_header_set_in_use(meta_headers, index, new_msize); - szone->num_small_objects++; - szone->num_bytes_in_small_objects += SMALL_BYTES_FOR_MSIZE(new_msize); - memcpy(new_regions, szone->small_regions, INITIAL_NUM_SMALL_REGIONS * sizeof(small_region_t)); - // We intentionally leak the previous regions pointer to avoid multi-threading crashes if - // another thread was reading it (unlocked) while we are changing it. - szone->small_regions = new_regions; // note we set this pointer after it's all set - ptr += SMALL_BYTES_FOR_MSIZE(new_msize); - index = new_msize; - } - szone->small_regions[szone->num_small_regions] = new_address; - // we bump the number of regions AFTER we have changes the regions pointer to enable finding a - // small region without taking the lock - // XXX naive assumption assumes memory ordering coherence between this and other CPUs + + // Check to see if the hash ring of small regions needs to grow. Try to + // avoid the hash ring becoming too dense. + if (szone->num_small_regions_allocated < (2 * szone->num_small_regions)) { + region_t *new_regions; + size_t new_size; + new_regions = hash_regions_grow_no_lock(szone, szone->small_regions, + szone->num_small_regions_allocated, + &new_size); + // Do not deallocate the current small_regions allocation since someone + // may be iterating it. Instead, just leak it. + szone->small_regions = new_regions; + szone->num_small_regions_allocated = new_size; + } + // Insert the new region into the hash ring, and update malloc statistics + hash_region_insert_no_lock(szone->small_regions, + szone->num_small_regions_allocated, + new_address); + szone->last_small_region = new_address; + + // we bump the number of regions AFTER we have changes the regions pointer + // to enable finding a small region without taking the lock + // + // FIXME: naive assumption assumes memory ordering coherence between this + // and other CPUs. This also applies to the near-identical code in + // tiny_malloc_from_region_no_lock. szone->num_small_regions++; small_meta_header_set_in_use(meta_headers, index, msize); msize_left = NUM_SMALL_BLOCKS - index; @@ -2047,7 +2222,7 @@ try_realloc_small_in_place(szone_t *szone, void *ptr, size_t old_size, size_t ne } #if DEBUG_MALLOC if ((uintptr_t)next_block & (SMALL_QUANTUM - 1)) { - szone_error(szone, "internal invariant broken in realloc(next_block)", next_block); + szone_error(szone, "internal invariant broken in realloc(next_block)", next_block, NULL); } if (meta_headers[index] != old_msize) malloc_printf("*** try_realloc_small_in_place incorrect old %d %d\n", @@ -2096,20 +2271,21 @@ try_realloc_small_in_place(szone_t *szone, void *ptr, size_t old_size, size_t ne } static boolean_t -szone_check_small_region(szone_t *szone, small_region_t *region) +szone_check_small_region(szone_t *szone, region_t region) { - unsigned char *ptr = SMALL_REGION_ADDRESS(*region); + unsigned char *ptr = SMALL_REGION_ADDRESS(region); msize_t *meta_headers = SMALL_META_HEADER_FOR_PTR(ptr); - unsigned char *region_end = SMALL_REGION_END(*region); + unsigned char *region_end = SMALL_REGION_END(region); msize_t prev_free = 0; unsigned index; msize_t msize_and_free; msize_t msize; free_list_t *free_head; + void *previous, *next; msize_t *follower; CHECK_LOCKED(szone, __PRETTY_FUNCTION__); - if (region == szone->small_regions + szone->num_small_regions - 1) region_end -= szone->small_bytes_free_at_end; + if (region == szone->last_small_region) region_end -= szone->small_bytes_free_at_end; while (ptr < region_end) { index = SMALL_META_INDEX_FOR_PTR(ptr); msize_and_free = meta_headers[index]; @@ -2117,8 +2293,8 @@ szone_check_small_region(szone_t *szone, small_region_t *region) // block is in use msize = msize_and_free; if (!msize) { - malloc_printf("*** invariant broken: null msize ptr=%p region#=%d num_small_regions=%d end=%p\n", - ptr, region - szone->small_regions, szone->num_small_regions, (void *)region_end); + malloc_printf("*** invariant broken: null msize ptr=%p num_small_regions=%d end=%p\n", + ptr, szone->num_small_regions, region_end); return 0; } if (msize > (LARGE_THRESHOLD / SMALL_QUANTUM)) { @@ -2142,19 +2318,21 @@ szone_check_small_region(szone_t *szone, small_region_t *region) return 0; } free_list_checksum(szone, free_head, __PRETTY_FUNCTION__); - if (free_head->previous && !SMALL_PTR_IS_FREE(free_head->previous)) { + previous = free_list_unchecksum_ptr(free_head->previous); + next = free_list_unchecksum_ptr(free_head->next); + if (previous && !SMALL_PTR_IS_FREE(previous)) { malloc_printf("*** invariant broken for %p (previous %p is not a free pointer)\n", ptr, free_head->previous); return 0; } - if (free_head->next && !SMALL_PTR_IS_FREE(free_head->next)) { + if (next && !SMALL_PTR_IS_FREE(next)) { malloc_printf("*** invariant broken for %p (next is not a free pointer)\n", ptr); return 0; } if (SMALL_PREVIOUS_MSIZE(follower) != msize) { malloc_printf("*** invariant broken for small free %p followed by %p in region [%p-%p] " "(end marker incorrect) should be %d; in fact %d\n", - ptr, follower, SMALL_REGION_ADDRESS(*region), region_end, msize, SMALL_PREVIOUS_MSIZE(follower)); + ptr, follower, SMALL_REGION_ADDRESS(region), region_end, msize, SMALL_PREVIOUS_MSIZE(follower)); return 0; } ptr = (unsigned char *)follower; @@ -2165,148 +2343,183 @@ szone_check_small_region(szone_t *szone, small_region_t *region) } static kern_return_t -small_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t region_address, unsigned short num_regions, size_t small_bytes_free_at_end, memory_reader_t reader, vm_range_recorder_t recorder) +small_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder) { - small_region_t *regions; - unsigned index = 0; - vm_range_t buffer[MAX_RECORDER_BUFFER]; - unsigned count = 0; - kern_return_t err; - small_region_t region; - vm_range_t range; - vm_range_t admin_range; - vm_range_t ptr_range; - unsigned char *mapped_region; - msize_t *block_header; - unsigned block_index; - unsigned block_limit; - msize_t msize_and_free; - msize_t msize; - - err = reader(task, region_address, sizeof(small_region_t) * num_regions, (void **)®ions); - if (err) return err; - while (index < num_regions) { - region = regions[index]; - range.address = (vm_address_t)SMALL_REGION_ADDRESS(region); - range.size = SMALL_REGION_SIZE; - if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) { - admin_range.address = range.address + (1 << SMALL_BLOCKS_ALIGN); - admin_range.size = range.size - (1 << SMALL_BLOCKS_ALIGN); - recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1); - } - if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) { - ptr_range.address = range.address; - ptr_range.size = 1 << SMALL_BLOCKS_ALIGN; - recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1); - } - if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) { - err = reader(task, range.address, range.size, (void **)&mapped_region); - if (err) return err; - block_header = (msize_t *)(mapped_region + (1 << SMALL_BLOCKS_ALIGN)); - block_index = 0; - block_limit = NUM_SMALL_BLOCKS; - if (index == num_regions - 1) - block_limit -= SMALL_MSIZE_FOR_BYTES(small_bytes_free_at_end); - while (block_index < block_limit) { - msize_and_free = block_header[block_index]; - msize = msize_and_free & ~ SMALL_IS_FREE; - if (! (msize_and_free & SMALL_IS_FREE)) { - // Block in use - buffer[count].address = range.address + SMALL_BYTES_FOR_MSIZE(block_index); - buffer[count].size = SMALL_BYTES_FOR_MSIZE(msize); - count++; - if (count >= MAX_RECORDER_BUFFER) { - recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); - count = 0; - } - } - block_index += msize; - } - } - index++; - } - if (count) { - recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); - } - return 0; + size_t num_regions = szone->num_small_regions_allocated; + void *last_small_free = szone->last_small_free; + size_t index; + region_t *regions; + vm_range_t buffer[MAX_RECORDER_BUFFER]; + unsigned count = 0; + kern_return_t err; + region_t region; + vm_range_t range; + vm_range_t admin_range; + vm_range_t ptr_range; + unsigned char *mapped_region; + msize_t *block_header; + unsigned block_index; + unsigned block_limit; + msize_t msize_and_free; + msize_t msize; + vm_address_t last_small_free_ptr = 0; + msize_t last_small_free_msize = 0; + + if (last_small_free) { + last_small_free_ptr = (uintptr_t)last_small_free & ~(SMALL_QUANTUM - 1); + last_small_free_msize = (uintptr_t)last_small_free & (SMALL_QUANTUM - 1); + } + + err = reader(task, (vm_address_t)szone->small_regions, sizeof(region_t) * num_regions, (void **)®ions); + if (err) return err; + for (index = 0; index < num_regions; ++index) { + region = regions[index]; + if (region) { + range.address = (vm_address_t)SMALL_REGION_ADDRESS(region); + range.size = SMALL_REGION_SIZE; + if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) { + admin_range.address = range.address + SMALL_HEADER_START; + admin_range.size = SMALL_ARRAY_SIZE; + recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1); + } + if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) { + ptr_range.address = range.address; + ptr_range.size = NUM_SMALL_BLOCKS * SMALL_QUANTUM; + recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1); + } + if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) { + err = reader(task, range.address, range.size, (void **)&mapped_region); + if (err) return err; + block_header = (msize_t *)(mapped_region + SMALL_HEADER_START); + block_index = 0; + block_limit = NUM_SMALL_BLOCKS; + if (region == szone->last_small_region) + block_limit -= SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end); + while (block_index < block_limit) { + msize_and_free = block_header[block_index]; + msize = msize_and_free & ~ SMALL_IS_FREE; + if (! (msize_and_free & SMALL_IS_FREE) && + range.address + SMALL_BYTES_FOR_MSIZE(block_index) != last_small_free_ptr) { + // Block in use + buffer[count].address = range.address + SMALL_BYTES_FOR_MSIZE(block_index); + buffer[count].size = SMALL_BYTES_FOR_MSIZE(msize); + count++; + if (count >= MAX_RECORDER_BUFFER) { + recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); + count = 0; + } + } + block_index += msize; + } + } + } + } + if (count) { + recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); + } + return 0; } -static INLINE void * +static void * small_malloc_from_free_list(szone_t *szone, msize_t msize) { - grain_t grain = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1; - unsigned bitmap = szone->small_bitmap & ~ ((1 << grain) - 1); - void *ptr; - msize_t this_msize; - free_list_t **free_list; - free_list_t **limit; - free_list_t *next; - msize_t leftover_msize; - void *leftover_ptr; - msize_t *meta_headers; - unsigned leftover_index; + free_list_t *ptr; + msize_t this_msize; + grain_t slot = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1; + free_list_t **free_list = szone->small_free_list; + free_list_t *next; + free_list_t **limit; + unsigned bitmap = szone->small_bitmap & ~ ((1 << slot) - 1); + msize_t leftover_msize; + free_list_t *leftover_ptr; + msize_t *meta_headers; + unsigned leftover_index; // Assumes locked CHECK_LOCKED(szone, __PRETTY_FUNCTION__); - if (!bitmap) goto try_small_from_end; - grain = BITMAP32_FFS(bitmap) - 1; - // first try the small grains - limit = szone->small_free_list + NUM_SMALL_SLOTS - 1; - free_list = szone->small_free_list + grain; + // Mask off the bits representing slots holding free blocks smaller than the + // size we need. If there are no larger free blocks, try allocating from + // the free space at the end of the tiny region. + if (!bitmap) + goto try_small_from_end; + + slot = BITMAP32_CTZ(bitmap); + limit = free_list + NUM_SMALL_SLOTS - 1; + free_list += slot; + + // Iterate over freelists looking for free blocks, starting at first list + // which is not empty, and contains blocks which are large enough to satisfy + // our request. while (free_list < limit) { - // try bigger grains - ptr = *free_list; - if (ptr) { - next = ((free_list_t *)ptr)->next; - if (next) { - next->previous = NULL; - free_list_set_checksum(szone, next); - } - *free_list = next; - this_msize = SMALL_PTR_SIZE(ptr); - goto add_leftover_and_proceed; - } - free_list++; - } - // We now check the large grains for one that is big enough - ptr = *free_list; + ptr = *free_list; + if (ptr) { + next = free_list_unchecksum_ptr(ptr->next); + *free_list = next; + this_msize = SMALL_PTR_SIZE(ptr); + if (next) { + next->previous = ptr->previous; + } else { + BITMAP32_CLR(szone->small_bitmap, this_msize - 1); + } + goto add_leftover_and_proceed; + } + free_list++; + } + + // We are now looking at the last slot, which contains blocks equal to, or + // due to coalescing of free blocks, larger than 31 * small quantum size. + // If the last freelist is not empty, and the head contains a block that is + // larger than our request, then the remainder is put back on the free list. + // + // FIXME: This code doesn't have the optimization from the 'tiny' codepath + // that optimizes for the this_msize >= 2 * num slots + // FIXME: this code also seems somewhat bogus. There's a check for + // this_msize >= msize, but by definition we can't ask for a small + // block larger than 31 small quanta, and every free block in this + // slot has to be at least that large. + ptr = *limit; while (ptr) { - this_msize = SMALL_PTR_SIZE(ptr); - if (this_msize >= msize) { - small_free_list_remove_ptr(szone, ptr, this_msize); - goto add_leftover_and_proceed; - } - ptr = ((free_list_t *)ptr)->next; + free_list_checksum(szone, ptr, __PRETTY_FUNCTION__); + next = free_list_unchecksum_ptr(ptr->next); + this_msize = SMALL_PTR_SIZE(ptr); + if (this_msize >= msize) { + small_free_list_remove_ptr(szone, ptr, this_msize); + goto add_leftover_and_proceed; + } + ptr = next; } + try_small_from_end: // Let's see if we can use szone->small_bytes_free_at_end if (szone->small_bytes_free_at_end >= SMALL_BYTES_FOR_MSIZE(msize)) { - ptr = (void *)(SMALL_REGION_END(szone->small_regions[szone->num_small_regions-1]) - szone->small_bytes_free_at_end); - szone->small_bytes_free_at_end -= SMALL_BYTES_FOR_MSIZE(msize); - if (szone->small_bytes_free_at_end) { - // let's mark this block as in use to serve as boundary - *SMALL_METADATA_FOR_PTR(ptr + SMALL_BYTES_FOR_MSIZE(msize)) = SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end); - } - this_msize = msize; - goto return_small_alloc; + ptr = (free_list_t *)(SMALL_REGION_END(szone->last_small_region) - szone->small_bytes_free_at_end); + szone->small_bytes_free_at_end -= SMALL_BYTES_FOR_MSIZE(msize); + if (szone->small_bytes_free_at_end) { + // let's mark this block as in use to serve as boundary + *SMALL_METADATA_FOR_PTR((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize)) = SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end); + } + this_msize = msize; + goto return_small_alloc; } return NULL; + add_leftover_and_proceed: if (this_msize > msize) { - leftover_msize = this_msize - msize; - leftover_ptr = ptr + SMALL_BYTES_FOR_MSIZE(msize); + leftover_msize = this_msize - msize; + leftover_ptr = (free_list_t *)((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize)); #if DEBUG_MALLOC - if (LOG(szone,ptr)) { - malloc_printf("in small_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize); - } + if (LOG(szone,ptr)) { + malloc_printf("in small_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize); + } #endif - small_free_list_add_ptr(szone, leftover_ptr, leftover_msize); - meta_headers = SMALL_META_HEADER_FOR_PTR(leftover_ptr); - leftover_index = SMALL_META_INDEX_FOR_PTR(leftover_ptr); - small_meta_header_set_is_free(meta_headers, leftover_index, leftover_msize); - this_msize = msize; + small_free_list_add_ptr(szone, leftover_ptr, leftover_msize); + meta_headers = SMALL_META_HEADER_FOR_PTR(leftover_ptr); + leftover_index = SMALL_META_INDEX_FOR_PTR(leftover_ptr); + small_meta_header_set_is_free(meta_headers, leftover_index, leftover_msize); + this_msize = msize; } + return_small_alloc: szone->num_small_objects++; szone->num_bytes_in_small_objects += SMALL_BYTES_FOR_MSIZE(this_msize); @@ -2383,38 +2596,43 @@ small_malloc_cleared_no_lock(szone_t *szone, msize_t msize) } static INLINE void -free_small(szone_t *szone, void *ptr, small_region_t *small_region) +free_small(szone_t *szone, void *ptr, region_t *small_region) { - msize_t msize_and_free; + msize_t msize = SMALL_PTR_SIZE(ptr); #if SMALL_CACHE void *ptr2; #endif // ptr is known to be in small_region - msize_and_free = *SMALL_METADATA_FOR_PTR(ptr); - if (msize_and_free & SMALL_IS_FREE) { - szone_error(szone, "Object already freed being freed", ptr); - return; - } - CHECK(szone, __PRETTY_FUNCTION__); SZONE_LOCK(szone); #if SMALL_CACHE ptr2 = szone->last_small_free; - szone->last_small_free = (void *)(((uintptr_t)ptr) | msize_and_free); + /* check that we don't already have this pointer in the cache */ + if (ptr == (void *)((uintptr_t)ptr2 & ~ (SMALL_QUANTUM - 1))) { + szone_error(szone, "double free", ptr, NULL); + return; + } +#endif + if (SMALL_PTR_IS_FREE(ptr)) { + szone_error(szone, "double free", ptr, NULL); + return; + } +#if SMALL_CACHE + szone->last_small_free = (void *)(((uintptr_t)ptr) | msize); if (!ptr2) { SZONE_UNLOCK(szone); CHECK(szone, __PRETTY_FUNCTION__); return; } - msize_and_free = (uintptr_t)ptr2 & (SMALL_QUANTUM - 1); + msize = (uintptr_t)ptr2 & (SMALL_QUANTUM - 1); ptr = (void *)(((uintptr_t)ptr2) & ~ (SMALL_QUANTUM - 1)); small_region = small_region_for_ptr_no_lock(szone, ptr); if (!small_region) { - szone_error(szone, "double free (small cache)", ptr); + szone_error(szone, "double free (small cache)", ptr, NULL); return; } #endif - small_free_no_lock(szone, small_region, ptr, msize_and_free); + small_free_no_lock(szone, small_region, ptr, msize); SZONE_UNLOCK(szone); CHECK(szone, __PRETTY_FUNCTION__); } @@ -2424,28 +2642,33 @@ print_small_free_list(szone_t *szone) { grain_t grain = 0; free_list_t *ptr; + _SIMPLE_STRING b = _simple_salloc(); - malloc_printf("small free sizes: "); - while (grain < NUM_SMALL_SLOTS) { - ptr = szone->small_free_list[grain]; - if (ptr) { - malloc_printf("%s%y[%d]; ", (grain == NUM_SMALL_SLOTS-1) ? ">=" : "", (grain + 1) * SMALL_QUANTUM, free_list_count(ptr)); + if (b) { + _simple_sappend(b, "small free sizes: "); + while (grain < NUM_SMALL_SLOTS) { + ptr = szone->small_free_list[grain]; + if (ptr) { + _simple_sprintf(b, "%s%y[%d]; ", (grain == NUM_SMALL_SLOTS-1) ? ">=" : "", (grain + 1) * SMALL_QUANTUM, free_list_count(ptr)); + } + grain++; } - grain++; + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b)); + _simple_sfree(b); } - malloc_printf("\n"); } static void -print_small_region(szone_t *szone, boolean_t verbose, small_region_t *region, size_t bytes_at_end) +print_small_region(szone_t *szone, boolean_t verbose, region_t region, size_t bytes_at_end) { unsigned counts[1024]; unsigned in_use = 0; - void *start = SMALL_REGION_ADDRESS(*region); - void *limit = SMALL_REGION_END(*region) - bytes_at_end; + void *start = SMALL_REGION_ADDRESS(region); + void *limit = SMALL_REGION_END(region) - bytes_at_end; msize_t msize_and_free; msize_t msize; unsigned ci; + _SIMPLE_STRING b; memset(counts, 0, 1024 * sizeof(unsigned)); while (start < limit) { @@ -2459,17 +2682,20 @@ print_small_region(szone_t *szone, boolean_t verbose, small_region_t *region, si } start += SMALL_BYTES_FOR_MSIZE(msize); } - malloc_printf("Small region [%p-%p, %y]\tIn_use=%d ", - SMALL_REGION_ADDRESS(*region), SMALL_REGION_END(*region), (int)SMALL_REGION_SIZE, in_use); - if (bytes_at_end) - malloc_printf("Untouched=%y ", bytes_at_end); - if (verbose && in_use) { - malloc_printf("\n\tSizes in use: "); - for (ci = 0; ci < 1024; ci++) - if (counts[ci]) - malloc_printf("%d[%d] ", SMALL_BYTES_FOR_MSIZE(ci), counts[ci]); + if ((b = _simple_salloc()) != NULL) { + _simple_sprintf(b, "Small region [%p-%p, %y]\tIn_use=%d ", + SMALL_REGION_ADDRESS(region), SMALL_REGION_END(region), (int)SMALL_REGION_SIZE, in_use); + if (bytes_at_end) + _simple_sprintf(b, "Untouched=%ly", bytes_at_end); + if (verbose && in_use) { + _simple_sappend(b, "\n\tSizes in use: "); + for (ci = 0; ci < 1024; ci++) + if (counts[ci]) + _simple_sprintf(b, "%d[%d] ", SMALL_BYTES_FOR_MSIZE(ci), counts[ci]); + } + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b)); + _simple_sfree(b); } - malloc_printf("\n"); } static boolean_t @@ -2482,32 +2708,127 @@ small_free_list_check(szone_t *szone, grain_t grain) CHECK_LOCKED(szone, __PRETTY_FUNCTION__); while (ptr) { - msize_and_free = *SMALL_METADATA_FOR_PTR(ptr); - count++; - if (!(msize_and_free & SMALL_IS_FREE)) { - malloc_printf("*** in-use ptr in free list grain=%d count=%d ptr=%p\n", grain, count, ptr); - return 0; - } - if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) { - malloc_printf("*** unaligned ptr in free list grain=%d count=%d ptr=%p\n", grain, count, ptr); - return 0; - } - if (!small_region_for_ptr_no_lock(szone, ptr)) { - malloc_printf("*** ptr not in szone grain=%d count=%d ptr=%p\n", grain, count, ptr); - return 0; - } - free_list_checksum(szone, ptr, __PRETTY_FUNCTION__); - if (ptr->previous != previous) { - malloc_printf("*** previous incorrectly set grain=%d count=%d ptr=%p\n", grain, count, ptr); - return 0; - } - previous = ptr; - ptr = ptr->next; + msize_and_free = *SMALL_METADATA_FOR_PTR(ptr); + count++; + if (!(msize_and_free & SMALL_IS_FREE)) { + malloc_printf("*** in-use ptr in free list grain=%d count=%d ptr=%p\n", grain, count, ptr); + return 0; + } + if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) { + malloc_printf("*** unaligned ptr in free list grain=%d count=%d ptr=%p\n", grain, count, ptr); + return 0; + } + if (!small_region_for_ptr_no_lock(szone, ptr)) { + malloc_printf("*** ptr not in szone grain=%d count=%d ptr=%p\n", grain, count, ptr); + return 0; + } + free_list_checksum(szone, ptr, __PRETTY_FUNCTION__); + if (free_list_unchecksum_ptr(ptr->previous) != previous) { + malloc_printf("*** previous incorrectly set grain=%d count=%d ptr=%p\n", grain, count, ptr); + return 0; + } + previous = ptr; + ptr = free_list_unchecksum_ptr(ptr->next); } return 1; } -/********************* LARGE ENTRY UTILITIES ************************/ +/******************************************************************************* + * Region hash implementation + * + * This is essentially a duplicate of the existing Large allocator hash, minus + * the ability to remove entries. The two should be combined eventually. + ******************************************************************************/ +#pragma mark region hash + +/* + * hash_lookup_region_no_lock - Scan a hash ring looking for an entry for a + * given region. + * + * FIXME: If consecutive queries of the same region are likely, a one-entry + * cache would likely be a significant performance win here. + */ +static region_t * +hash_lookup_region_no_lock(region_t *regions, size_t num_entries, region_t r) { + size_t index, hash_index; + region_t *entry; + + if (!num_entries) + return 0; + + index = hash_index = ((uintptr_t)r >> 20) % num_entries; + do { + entry = regions + index; + if (*entry == 0) + return 0; + if (*entry == r) + return entry; + if (++index == num_entries) + index = 0; + } while (index != hash_index); + return 0; +} + +/* + * hash_region_insert_no_lock - Insert a region into the hash ring. + */ +static void +hash_region_insert_no_lock(region_t *regions, size_t num_entries, region_t r) { + size_t index, hash_index; + region_t *entry; + + index = hash_index = ((uintptr_t)r >> 20) % num_entries; + do { + entry = regions + index; + if (*entry == 0) { + *entry = r; + return; + } + if (++index == num_entries) + index = 0; + } while (index != hash_index); +} + +/* + * hash_regions_alloc_no_lock - Allocate space for a number of entries. This + * must be a VM allocation as to avoid recursing between allocating a new small + * region, and asking the small region to allocate space for the new list of + * regions. + */ +static region_t * +hash_regions_alloc_no_lock(szone_t *szone, size_t num_entries) +{ + size_t size = num_entries * sizeof(region_t); + return allocate_pages(szone, round_page(size), 0, 0, VM_MEMORY_MALLOC); +} + +/* + * hash_regions_grow_no_lock - Grow the hash ring, and rehash the entries. + * Return the new region and new size to update the szone. Do not deallocate + * the old entries since someone may still be allocating them. + */ +static region_t * +hash_regions_grow_no_lock(szone_t *szone, region_t *regions, size_t old_size, + size_t *new_size) +{ + // double in size and allocate memory for the regions + *new_size = old_size * 2 + 1; + region_t *new_regions = hash_regions_alloc_no_lock(szone, *new_size); + + // rehash the entries into the new list + size_t index; + for (index = 0; index < old_size; ++index) { + region_t r = regions[index]; + if (r != 0) + hash_region_insert_no_lock(new_regions, *new_size, r); + } + return new_regions; +} + +/******************************************************************************* + * Large allocator implementation + ******************************************************************************/ +#pragma mark large allocator #if DEBUG_MALLOC @@ -2517,12 +2838,16 @@ large_debug_print(szone_t *szone) unsigned num_large_entries = szone->num_large_entries; unsigned index = num_large_entries; large_entry_t *range; + _SIMPLE_STRING b = _simple_salloc(); - for (index = 0, range = szone->large_entries; index < szone->num_large_entries; index++, range++) - if (!LARGE_ENTRY_IS_EMPTY(*range)) - malloc_printf("%d: %p(%y); ", index, LARGE_ENTRY_ADDRESS(*range), LARGE_ENTRY_SIZE(*range)); + if (b) { + for (index = 0, range = szone->large_entries; index < szone->num_large_entries; index++, range++) + if (!LARGE_ENTRY_IS_EMPTY(*range)) + _simple_sprintf(b, "%d: %p(%y); ", index, LARGE_ENTRY_ADDRESS(*range), LARGE_ENTRY_SIZE(*range)); - malloc_printf("\n"); + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b)); + _simple_sfree(b); + } } #endif @@ -2606,7 +2931,7 @@ large_entries_alloc_no_lock(szone_t *szone, unsigned num) // Note that we allocate memory (via a system call) under a spin lock // That is certainly evil, however it's very rare in the lifetime of a process // The alternative would slow down the normal case - return (void *)allocate_pages(szone, round_page(size), 0, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_LARGE)); + return allocate_pages(szone, round_page(size), 0, 0, VM_MEMORY_MALLOC_LARGE); } else { return small_malloc_cleared_no_lock(szone, SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1)); } @@ -2618,7 +2943,7 @@ large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num, // returns range to deallocate size_t size = num * sizeof(large_entry_t); boolean_t is_vm_allocation = size >= LARGE_THRESHOLD; - small_region_t *region; + region_t *region; msize_t msize_and_free; if (is_vm_allocation) { @@ -2629,14 +2954,14 @@ large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num, region = small_region_for_ptr_no_lock(szone, entries); msize_and_free = *SMALL_METADATA_FOR_PTR(entries); if (msize_and_free & SMALL_IS_FREE) { - szone_error(szone, "object already freed being freed", entries); + szone_error(szone, "object already freed being freed", entries, NULL); return; } small_free_no_lock(szone, region, entries, msize_and_free); } } -static void +static large_entry_t * large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate) { // sets range_to_deallocate @@ -2647,6 +2972,10 @@ large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate) unsigned index = old_num_entries; large_entry_t oldRange; + // if the allocation of new entries failed, bail + if (new_entries == NULL) + return NULL; + szone->num_large_entries = new_num_entries; szone->large_entries = new_entries; @@ -2662,6 +2991,7 @@ large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate) } else { range_to_deallocate->size = 0; } + return new_entries; } // frees the specific entry in the size table @@ -2676,7 +3006,7 @@ large_free_no_lock(szone_t *szone, large_entry_t *entry) szone->num_large_objects_in_use--; szone->num_bytes_in_large_objects -= range.size; if (szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) { - protect(szone, (void *)range.address, range.size, VM_PROT_READ | VM_PROT_WRITE, szone->debug_flags); + protect((void *)range.address, range.size, VM_PROT_READ | VM_PROT_WRITE, szone->debug_flags); range.address -= vm_page_size; range.size += 2 * vm_page_size; } @@ -2808,7 +3138,7 @@ huge_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_addres } static void * -large_and_huge_malloc(szone_t *szone, unsigned num_pages) +large_and_huge_malloc(szone_t *szone, size_t num_pages) { void *addr; vm_range_t range_to_deallocate; @@ -2821,7 +3151,7 @@ large_and_huge_malloc(szone_t *szone, unsigned num_pages) size = (size_t)num_pages << vm_page_shift; range_to_deallocate.size = 0; if (num_pages >= (1 << vm_page_shift)) { - addr = allocate_pages(szone, size, 0, szone->debug_flags, VM_MAKE_TAG(VM_MEMORY_MALLOC_HUGE)); + addr = allocate_pages(szone, size, 0, szone->debug_flags, VM_MEMORY_MALLOC_HUGE); if (addr == NULL) return NULL; huge_entry.size = size; @@ -2832,10 +3162,10 @@ large_and_huge_malloc(szone_t *szone, unsigned num_pages) szone->num_bytes_in_huge_objects += size; } else { - addr = allocate_pages(szone, size, 0, szone->debug_flags, VM_MAKE_TAG(VM_MEMORY_MALLOC_LARGE)); + addr = allocate_pages(szone, size, 0, szone->debug_flags, VM_MEMORY_MALLOC_LARGE); #if DEBUG_MALLOC if (LOG(szone, addr)) - malloc_printf("in szone_malloc true large allocation at %p for %y\n", (void *)addr, size); + malloc_printf("in szone_malloc true large allocation at %p for %ly\n", (void *)addr, size); #endif SZONE_LOCK(szone); if (addr == NULL) { @@ -2852,7 +3182,11 @@ large_and_huge_malloc(szone_t *szone, unsigned num_pages) if ((szone->num_large_objects_in_use + 1) * 4 > szone->num_large_entries) { // density of hash table too high; grow table // we do that under lock to avoid a race - large_entries_grow_no_lock(szone, &range_to_deallocate); + large_entry_t *entries = large_entries_grow_no_lock(szone, &range_to_deallocate); + if (entries == NULL) { + SZONE_UNLOCK(szone); + return NULL; + } } large_entry.address_and_num_pages = (uintptr_t)addr | num_pages; #if DEBUG_MALLOC @@ -2907,7 +3241,7 @@ free_large_or_huge(szone_t *szone, void *ptr) #if DEBUG_MALLOC large_debug_print(szone); #endif - szone_error(szone, "pointer being freed was not allocated", ptr); + szone_error(szone, "pointer being freed was not allocated", ptr, NULL); return; } SZONE_UNLOCK(szone); // we release the lock asap @@ -2969,7 +3303,7 @@ try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, s /* extend existing large entry */ large_entry = large_entry_for_pointer_no_lock(szone, ptr); if (!large_entry) { - szone_error(szone, "large entry reallocated is not properly in table", ptr); + szone_error(szone, "large entry reallocated is not properly in table", ptr, NULL); /* XXX will cause fault on next reference to entry */ } large_entry->address_and_num_pages = (uintptr_t)ptr | (new_size >> vm_page_shift); @@ -2978,7 +3312,7 @@ try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, s /* extend existing huge entry */ huge_entry = huge_entry_for_pointer_no_lock(szone, ptr); if (!huge_entry) { - szone_error(szone, "huge entry reallocated is not properly in table", ptr); + szone_error(szone, "huge entry reallocated is not properly in table", ptr, NULL); /* XXX will cause fault on next reference to huge_entry */ } huge_entry->size = new_size; @@ -2990,7 +3324,6 @@ try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, s large_entry = large_entry_for_pointer_no_lock(szone, ptr); saved_entry = *large_entry; // in case we need to put it back large_free_no_lock(szone, large_entry); - szone->num_bytes_in_large_objects -= old_size; /* and get a huge entry */ huge.address = (vm_address_t)ptr; @@ -3013,8 +3346,8 @@ try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, s static void szone_free(szone_t *szone, void *ptr) { - tiny_region_t *tiny_region; - small_region_t *small_region; + region_t *tiny_region; + region_t *small_region; #if DEBUG_MALLOC if (LOG(szone, ptr)) @@ -3026,10 +3359,14 @@ szone_free(szone_t *szone, void *ptr) * Try to free to a tiny region. */ if ((uintptr_t)ptr & (TINY_QUANTUM - 1)) { - szone_error(szone, "Non-aligned pointer being freed", ptr); + szone_error(szone, "Non-aligned pointer being freed", ptr, NULL); return; } if ((tiny_region = tiny_region_for_ptr_no_lock(szone, ptr)) != NULL) { + if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS) { + szone_error(szone, "Pointer to metadata being freed", ptr, NULL); + return; + } free_tiny(szone, ptr, tiny_region); return; } @@ -3038,17 +3375,21 @@ szone_free(szone_t *szone, void *ptr) * Try to free to a small region. */ if ((uintptr_t)ptr & (SMALL_QUANTUM - 1)) { - szone_error(szone, "Non-aligned pointer being freed (2)", ptr); + szone_error(szone, "Non-aligned pointer being freed (2)", ptr, NULL); return; } if ((small_region = small_region_for_ptr_no_lock(szone, ptr)) != NULL) { + if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS) { + szone_error(szone, "Pointer to metadata being freed (2)", ptr, NULL); + return; + } free_small(szone, ptr, small_region); return; } /* check that it's a legal large/huge allocation */ if ((uintptr_t)ptr & (vm_page_size - 1)) { - szone_error(szone, "non-page-aligned, non-allocated pointer being freed", ptr); + szone_error(szone, "non-page-aligned, non-allocated pointer being freed", ptr, NULL); return; } free_large_or_huge(szone, ptr); @@ -3059,7 +3400,6 @@ szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_request { void *ptr; msize_t msize; - unsigned num_pages; if (size <= 31*TINY_QUANTUM) { // think tiny @@ -3074,11 +3414,11 @@ szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_request ptr = small_malloc_should_clear(szone, msize, cleared_requested); } else { // large or huge - num_pages = round_page(size) >> vm_page_shift; + size_t num_pages = round_page(size) >> vm_page_shift; if (num_pages == 0) /* Overflowed */ ptr = 0; else - ptr = large_and_huge_malloc(szone, num_pages); + ptr = large_and_huge_malloc(szone, num_pages); } #if DEBUG_MALLOC if (LOG(szone, ptr)) @@ -3101,14 +3441,17 @@ szone_malloc(szone_t *szone, size_t size) { static void * szone_calloc(szone_t *szone, size_t num_items, size_t size) { - return szone_malloc_should_clear(szone, num_items * size, 1); + size_t total_bytes = num_items * size; + if ((num_items > 1) && (size != 0) && ((total_bytes / size) != num_items)) + return NULL; + return szone_malloc_should_clear(szone, total_bytes, 1); } static void * szone_valloc(szone_t *szone, size_t size) { void *ptr; - unsigned num_pages; + size_t num_pages; num_pages = round_page(size) >> vm_page_shift; ptr = large_and_huge_malloc(szone, num_pages); @@ -3142,6 +3485,8 @@ szone_size(szone_t *szone, const void *ptr) if ((uintptr_t)ptr & (TINY_QUANTUM - 1)) return 0; if (tiny_region_for_ptr_no_lock(szone, ptr)) { + if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS) + return 0; msize = get_tiny_meta_header(ptr, &is_free); return (is_free) ? 0 : TINY_BYTES_FOR_MSIZE(msize); } @@ -3152,6 +3497,8 @@ szone_size(szone_t *szone, const void *ptr) if ((uintptr_t)ptr & (SMALL_QUANTUM - 1)) return 0; if (small_region_for_ptr_no_lock(szone, ptr)) { + if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS) + return 0; msize_and_free = *SMALL_METADATA_FOR_PTR(ptr); return (msize_and_free & SMALL_IS_FREE) ? 0 : SMALL_BYTES_FOR_MSIZE(msize_and_free); } @@ -3198,7 +3545,7 @@ szone_realloc(szone_t *szone, void *ptr, size_t new_size) } old_size = szone_size(szone, ptr); if (!old_size) { - szone_error(szone, "pointer being reallocated was not allocated", ptr); + szone_error(szone, "pointer being reallocated was not allocated", ptr, NULL); return NULL; } /* we never shrink an allocation */ @@ -3257,45 +3604,42 @@ szone_realloc(szone_t *szone, void *ptr, size_t new_size) return new_ptr; } -// given a size, returns pointers capable of holding that size -// returns the number of pointers allocated -// may return 0 - this function will do best attempts, but just that +// given a size, returns the number of pointers allocated capable of holding +// that size, up to the limit specified by the 'count' argument. These pointers +// are stored in the 'results' array, which must be allocated by the caller. +// may return zero, since this function is only a best attempt at allocating +// the pointers. clients should be prepared to call malloc for any additional +// blocks they need. static unsigned szone_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count) { - msize_t msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1); - size_t chunk_size = TINY_BYTES_FOR_MSIZE(msize); - free_list_t **free_list = szone->tiny_free_list + msize - 1; - free_list_t *ptr = *free_list; - unsigned found = 0; + msize_t msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1); + unsigned found = 0; + // only bother implementing this for tiny if (size > 31*TINY_QUANTUM) - return 0; // only bother implementing this for tiny + return 0; + // make sure to return objects at least one quantum in size if (!msize) - msize = 1; + msize = 1; + CHECK(szone, __PRETTY_FUNCTION__); - SZONE_LOCK(szone); // might as well lock right here to avoid concurrency issues - while (found < count) { - if (!ptr) - break; - *results++ = ptr; - found++; - set_tiny_meta_header_in_use(ptr, msize); - ptr = ptr->next; + + // We must lock the zone now, since tiny_malloc_from_free_list assumes that + // the caller has done so. + SZONE_LOCK(szone); + + // with the zone locked, allocate objects from the free list until all + // sufficiently large objects have been exhausted, or we have met our quota + // of objects to allocate. + while (found < count) { + void *ptr = tiny_malloc_from_free_list(szone, msize); + if (!ptr) + break; + + *results++ = ptr; + found++; } - if (ptr) { - ptr->previous = NULL; - free_list_set_checksum(szone, ptr); - } - *free_list = ptr; - - // Note that we could allocate from the free lists for larger msize - // But that may un-necessarily fragment - so we might as well let the client do that - // We could also allocate from szone->tiny_bytes_free_at_end - // But that means we'll "eat-up" the untouched area faster, increasing the working set - // So we just return what we have and just that - szone->num_tiny_objects += found; - szone->num_bytes_in_tiny_objects += chunk_size * found; SZONE_UNLOCK(szone); return found; } @@ -3305,7 +3649,7 @@ szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count) { unsigned cc = 0; void *ptr; - tiny_region_t *tiny_region; + region_t *tiny_region; boolean_t is_free; msize_t msize; @@ -3317,15 +3661,19 @@ szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count) SZONE_LOCK(szone); while (cc < count) { ptr = to_be_freed[cc]; - /* XXX this really slows us down */ - tiny_region = tiny_region_for_ptr_no_lock(szone, ptr); - if (tiny_region) { - // this is a tiny pointer - msize = get_tiny_meta_header(ptr, &is_free); - if (is_free) - break; // a double free; let the standard free deal with it - tiny_free_no_lock(szone, tiny_region, ptr, msize); - to_be_freed[cc] = NULL; + if (ptr) { + /* XXX this really slows us down */ + tiny_region = tiny_region_for_ptr_no_lock(szone, ptr); + if (tiny_region) { + // this is a tiny pointer + if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS) + break; // pointer to metadata; let the standard free deal with it + msize = get_tiny_meta_header(ptr, &is_free); + if (is_free) + break; // a double free; let the standard free deal with it + tiny_free_no_lock(szone, tiny_region, ptr, msize); + to_be_freed[cc] = NULL; + } } cc++; } @@ -3341,13 +3689,10 @@ szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count) static void szone_destroy(szone_t *szone) { - unsigned index; - small_region_t pended_region = 0; + size_t index; large_entry_t *large; vm_range_t range_to_deallocate; huge_entry_t *huge; - tiny_region_t tiny_region; - small_region_t small_region; /* destroy large entries */ index = szone->num_large_entries; @@ -3373,28 +3718,26 @@ szone_destroy(szone_t *szone) } /* destroy tiny regions */ - index = szone->num_tiny_regions; - while (index--) { - tiny_region = szone->tiny_regions[index]; - deallocate_pages(szone, TINY_REGION_ADDRESS(tiny_region), TINY_REGION_SIZE, 0); - } - /* destroy small regions; region 0 must go last as it contains the szone */ - index = szone->num_small_regions; - while (index--) { - small_region = szone->small_regions[index]; - /* - * If we've allocated more than the basic set of small regions, avoid destroying the - * region that contains the array. - */ - if ((index > 0) && - (SMALL_REGION_FOR_PTR(szone->small_regions) == SMALL_REGION_ADDRESS(small_region))) { - pended_region = small_region; - } else { - deallocate_pages(szone, (void *)SMALL_REGION_ADDRESS(small_region), SMALL_REGION_SIZE, 0); - } - } - if (pended_region) - deallocate_pages(NULL, (void *)SMALL_REGION_ADDRESS(pended_region), SMALL_REGION_SIZE, 0); + for (index = 0; index < szone->num_tiny_regions_allocated; ++index) + if (szone->tiny_regions[index]) + deallocate_pages(szone, szone->tiny_regions[index], TINY_REGION_SIZE, 0); + + /* destroy small regions */ + for (index = 0; index < szone->num_small_regions_allocated; ++index) + if (szone->small_regions[index]) + deallocate_pages(szone, szone->small_regions[index], SMALL_REGION_SIZE, 0); + + /* destroy region hash rings, if any */ + if (szone->tiny_regions != szone->initial_tiny_regions) { + size_t size = round_page(szone->num_tiny_regions_allocated * sizeof(region_t)); + deallocate_pages(szone, szone->tiny_regions, size, 0); + } + if (szone->small_regions != szone->initial_small_regions) { + size_t size = round_page(szone->num_small_regions_allocated * sizeof(region_t)); + deallocate_pages(szone, szone->small_regions, size, 0); + } + /* Now destroy the separate szone region */ + deallocate_pages(szone, (void *)szone, SZONE_PAGED_SIZE, SCALABLE_MALLOC_ADD_GUARD_PAGES); } static size_t @@ -3429,71 +3772,66 @@ unsigned szone_check_modulo = 1; static boolean_t szone_check_all(szone_t *szone, const char *function) { - int index; - tiny_region_t *tiny; - small_region_t *small; - + size_t index; + SZONE_LOCK(szone); CHECK_LOCKED(szone, __PRETTY_FUNCTION__); /* check tiny regions - chould check region count */ - for (index = szone->num_tiny_regions - 1, tiny = szone->tiny_regions; - index >= 0; - index--, tiny++) { - if (!tiny_check_region(szone, tiny)) { - SZONE_UNLOCK(szone); - szone->debug_flags &= ~ CHECK_REGIONS; - malloc_printf("*** tiny region %d incorrect szone_check_all(%s) counter=%d\n", - szone->num_tiny_regions - index, function, szone_check_counter); - szone_error(szone, "check: tiny region incorrect", NULL); - return 0; - } + for (index = 0; index < szone->num_tiny_regions_allocated; ++index) { + region_t tiny = szone->tiny_regions[index]; + if (tiny && !tiny_check_region(szone, tiny)) { + SZONE_UNLOCK(szone); + szone->debug_flags &= ~ CHECK_REGIONS; + szone_error(szone, "check: tiny region incorrect", NULL, + "*** tiny region %d incorrect szone_check_all(%s) counter=%d\n", + index, function, szone_check_counter); + return 0; + } + } + /* check tiny free lists */ + for (index = 0; index < NUM_TINY_SLOTS; ++index) { + if (!tiny_free_list_check(szone, index)) { + SZONE_UNLOCK(szone); + szone->debug_flags &= ~ CHECK_REGIONS; + szone_error(szone, "check: tiny free list incorrect", NULL, + "*** tiny free list incorrect (slot=%d) szone_check_all(%s) counter=%d\n", + index, function, szone_check_counter); + return 0; + } } - - for (index = NUM_TINY_SLOTS - 1; index >= 0; index--) { - if (!tiny_free_list_check(szone, index)) { - SZONE_UNLOCK(szone); - szone->debug_flags &= ~ CHECK_REGIONS; - malloc_printf("*** tiny free list incorrect (slot=%d) szone_check_all(%s) counter=%d\n", - index, function, szone_check_counter); - szone_error(szone, "check: tiny free list incorrect", NULL); - return 0; - } - } - /* check small regions - could check region count */ - for (index = szone->num_small_regions - 1, small = szone->small_regions; - index >= 0; - index--, small++) { - if (!szone_check_small_region(szone, small)) { - SZONE_UNLOCK(szone); - szone->debug_flags &= ~ CHECK_REGIONS; - malloc_printf("*** small region %d incorrect szone_check_all(%s) counter=%d\n", - szone->num_small_regions, index, function, szone_check_counter); - szone_error(szone, "check: small region incorrect", NULL); - return 0; - } - } - for (index = NUM_SMALL_SLOTS - 1; index >= 0; index--) { - if (!small_free_list_check(szone, index)) { - SZONE_UNLOCK(szone); - szone->debug_flags &= ~ CHECK_REGIONS; - malloc_printf("*** small free list incorrect (grain=%d) szone_check_all(%s) counter=%d\n", index, function, szone_check_counter); - szone_error(szone, "check: small free list incorrect", NULL); - return 0; - } + for (index = 0; index < szone->num_small_regions_allocated; ++index) { + region_t small = szone->small_regions[index]; + if (small && !szone_check_small_region(szone, small)) { + SZONE_UNLOCK(szone); + szone->debug_flags &= ~ CHECK_REGIONS; + szone_error(szone, "check: small region incorrect", NULL, + "*** small region %d incorrect szone_check_all(%s) counter=%d\n", + index, function, szone_check_counter); + return 0; + } + } + /* check small free lists */ + for (index = 0; index < NUM_SMALL_SLOTS; ++index) { + if (!small_free_list_check(szone, index)) { + SZONE_UNLOCK(szone); + szone->debug_flags &= ~ CHECK_REGIONS; + szone_error(szone, "check: small free list incorrect", NULL, + "*** small free list incorrect (grain=%d) szone_check_all(%s) counter=%d\n", + index, function, szone_check_counter); + return 0; + } } SZONE_UNLOCK(szone); - // szone_print(szone, 1); return 1; } static boolean_t szone_check(szone_t *szone) { - if ((++szone_check_counter % 10000) == 0) - malloc_printf("at szone_check counter=%d\n", szone_check_counter); + _malloc_printf(ASL_LEVEL_NOTICE, "at szone_check counter=%d\n", szone_check_counter); if (szone_check_counter < szone_check_start) return 1; if (szone_check_counter % szone_check_modulo) @@ -3510,11 +3848,9 @@ szone_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_a if (!reader) reader = _szone_default_reader; err = reader(task, zone_address, sizeof(szone_t), (void **)&szone); if (err) return err; - err = tiny_in_use_enumerator(task, context, type_mask, - (vm_address_t)szone->tiny_regions, szone->num_tiny_regions, szone->tiny_bytes_free_at_end , reader, recorder); + err = tiny_in_use_enumerator(task, context, type_mask, szone, reader, recorder); if (err) return err; - err = small_in_use_enumerator(task, context, type_mask, - (vm_address_t)szone->small_regions, szone->num_small_regions, szone->small_bytes_free_at_end , reader, recorder); + err = small_in_use_enumerator(task, context, type_mask, szone, reader, recorder); if (err) return err; err = large_in_use_enumerator(task, context, type_mask, (vm_address_t)szone->large_entries, szone->num_large_entries, reader, @@ -3553,35 +3889,42 @@ scalable_zone_info(malloc_zone_t *zone, unsigned *info_to_fill, unsigned count) static void szone_print(szone_t *szone, boolean_t verbose) { - unsigned info[13]; - unsigned index = 0; - tiny_region_t *region; - - SZONE_LOCK(szone); - scalable_zone_info((void *)szone, info, 13); - malloc_printf("Scalable zone %p: inUse=%d(%y) touched=%y allocated=%y flags=%d\n", - szone, info[0], info[1], info[2], info[3], info[12]); - malloc_printf("\ttiny=%d(%y) small=%d(%y) large=%d(%y) huge=%d(%y)\n", - info[4], info[5], info[6], info[7], info[8], info[9], info[10], info[11]); - // tiny - malloc_printf("%d tiny regions: \n", szone->num_tiny_regions); - while (index < szone->num_tiny_regions) { - region = szone->tiny_regions + index; - print_tiny_region(verbose, *region, (index == szone->num_tiny_regions - 1) ? szone->tiny_bytes_free_at_end : 0); - index++; - } - if (verbose) print_tiny_free_list(szone); - // small - malloc_printf("%d small regions: \n", szone->num_small_regions); - index = 0; - while (index < szone->num_small_regions) { - region = szone->small_regions + index; - print_small_region(szone, verbose, region, (index == szone->num_small_regions - 1) ? szone->small_bytes_free_at_end : 0); - index++; - } - if (verbose) - print_small_free_list(szone); - SZONE_UNLOCK(szone); + unsigned info[13]; + size_t index; + region_t region; + + SZONE_LOCK(szone); + scalable_zone_info((void *)szone, info, 13); + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, + "Scalable zone %p: inUse=%d(%y) touched=%y allocated=%y flags=%d\n", + szone, info[0], info[1], info[2], info[3], info[12]); + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, + "\ttiny=%d(%y) small=%d(%y) large=%d(%y) huge=%d(%y)\n", + info[4], info[5], info[6], info[7], info[8], info[9], info[10], info[11]); + // tiny + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, + "%d tiny regions:\n", szone->num_tiny_regions); + for (index = 0; index < szone->num_tiny_regions_allocated; ++index) { + region = szone->tiny_regions[index]; + if (region) + print_tiny_region(verbose, region, (region == szone->last_tiny_region) ? + szone->tiny_bytes_free_at_end : 0); + } + if (verbose) + print_tiny_free_list(szone); + // small + _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, + "%d small regions:\n", szone->num_small_regions); + for (index = 0; index < szone->num_small_regions_allocated; ++index) { + region = szone->small_regions[index]; + if (region) + print_small_region(szone, verbose, region, + (region == szone->last_small_region) ? + szone->small_bytes_free_at_end : 0); + } + if (verbose) + print_small_free_list(szone); + SZONE_UNLOCK(szone); } static void @@ -3673,9 +4016,6 @@ malloc_zone_t * create_scalable_zone(size_t initial_size, unsigned debug_flags) { szone_t *szone; - size_t msize; - size_t msize_used; - msize_t free_msize; /* * Sanity-check our build-time assumptions about the size of a page. @@ -3687,14 +4027,19 @@ create_scalable_zone(size_t initial_size, unsigned debug_flags) exit(-1); } - /* get memory for the zone */ - szone = allocate_pages(NULL, SMALL_REGION_SIZE, SMALL_BLOCKS_ALIGN, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC)); + /* get memory for the zone, which is now separate from any region. + add guard pages to prevent walking from any other vm allocations + to here and overwriting the function pointers in basic_zone. */ + szone = allocate_pages(NULL, SZONE_PAGED_SIZE, 0, + SCALABLE_MALLOC_ADD_GUARD_PAGES, + VM_MEMORY_MALLOC); if (!szone) - return NULL; + return NULL; /* set up the szone structure */ szone->tiny_regions = szone->initial_tiny_regions; szone->small_regions = szone->initial_small_regions; - msize_used = msize; szone->num_small_objects++; + szone->num_tiny_regions_allocated = INITIAL_NUM_REGIONS; + szone->num_small_regions_allocated = INITIAL_NUM_REGIONS; szone->basic_zone.version = 3; szone->basic_zone.size = (void *)szone_size; szone->basic_zone.malloc = (void *)szone_malloc; @@ -3707,17 +4052,8 @@ create_scalable_zone(size_t initial_size, unsigned debug_flags) szone->basic_zone.batch_free = (void *)szone_batch_free; szone->basic_zone.introspect = (struct malloc_introspection_t *)&szone_introspect; szone->debug_flags = debug_flags; - - /* as the szone is allocated out of the first tiny, region, reflect that allocation */ - szone->small_regions[0] = szone; - szone->num_small_regions = 1; - msize = SMALL_MSIZE_FOR_BYTES(sizeof(szone_t) + SMALL_QUANTUM - 1); - free_msize = NUM_SMALL_BLOCKS - msize; - *SMALL_METADATA_FOR_PTR(szone) = msize; - *(SMALL_METADATA_FOR_PTR(szone) + msize) = free_msize; - szone->small_bytes_free_at_end = SMALL_BYTES_FOR_MSIZE(free_msize); - LOCK_INIT(szone->lock); + #if 0 #warning CHECK_REGIONS enabled debug_flags |= CHECK_REGIONS; @@ -3744,12 +4080,13 @@ create_scalable_zone(size_t initial_size, unsigned debug_flags) * 3) Original szone-based freezedrying code. * 4) Fresher malloc with tiny zone * 5) 32/64bit compatible malloc + * 6) Metadata within 1MB and 8MB region for tiny and small * * No version backward compatibility is provided, but the version number does * make it possible for malloc_jumpstart() to return an error if the application * was freezedried with an older version of malloc. */ -#define MALLOC_FREEZEDRY_VERSION 5 +#define MALLOC_FREEZEDRY_VERSION 6 typedef struct { unsigned version;