X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c0fea4742e91338fffdcf79f86a7c1d5e2b97eb1..cc8bc92ae4a8e9f1a1ab61bf83d34ad8150b3405:/osfmk/kern/kalloc.c diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index 3d1d0c899..97b04c739 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -66,92 +72,214 @@ #include #include #include -#include +#include #include #include #include #include +#include + +#include #ifdef MACH_BSD zone_t kalloc_zone(vm_size_t); #endif +#define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024) +#define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024) vm_map_t kalloc_map; -vm_size_t kalloc_map_size = 16 * 1024 * 1024; vm_size_t kalloc_max; vm_size_t kalloc_max_prerounded; vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */ +/* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */ +unsigned long kalloc_fallback_count; + unsigned int kalloc_large_inuse; vm_size_t kalloc_large_total; vm_size_t kalloc_large_max; +vm_size_t kalloc_largest_allocated = 0; +uint64_t kalloc_large_sum; + +int kalloc_fake_zone_index = -1; /* index of our fake zone in statistics arrays */ + +vm_offset_t kalloc_map_min; +vm_offset_t kalloc_map_max; + +#ifdef MUTEX_ZONE +/* + * Diagnostic code to track mutexes separately rather than via the 2^ zones + */ + zone_t lck_mtx_zone; +#endif + +static void +KALLOC_ZINFO_SALLOC(vm_size_t bytes) +{ + thread_t thr = current_thread(); + ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes); +} + +static void +KALLOC_ZINFO_SFREE(vm_size_t bytes) +{ + thread_t thr = current_thread(); + ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes); +} /* * All allocations of size less than kalloc_max are rounded to the - * next highest power of 2. This allocator is built on top of + * next nearest sized zone. This allocator is built on top of * the zone allocator. A zone is created for each potential size * that we are willing to get in small blocks. * * We assume that kalloc_max is not greater than 64K; - * thus 16 is a safe array size for k_zone and k_zone_name. * * Note that kalloc_max is somewhat confusingly named. * It represents the first power of two for which no zone exists. * kalloc_max_prerounded is the smallest allocation size, before * rounding, for which no zone exists. - * Also if the allocation size is more than kalloc_kernmap_size - * then allocate from kernel map rather than kalloc_map. + * + * Also if the allocation size is more than kalloc_kernmap_size + * then allocate from kernel map rather than kalloc_map. */ -int first_k_zone = -1; -struct zone *k_zone[16]; -static const char *k_zone_name[16] = { - "kalloc.1", "kalloc.2", - "kalloc.4", "kalloc.8", - "kalloc.16", "kalloc.32", - "kalloc.64", "kalloc.128", - "kalloc.256", "kalloc.512", - "kalloc.1024", "kalloc.2048", - "kalloc.4096", "kalloc.8192", - "kalloc.16384", "kalloc.32768" -}; +#if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4 + +#define K_ZONE_SIZES \ + 16, \ + 32, \ + 48, \ +/* 3 */ 64, \ + 80, \ + 96, \ +/* 6 */ 128, \ + 160, 192, \ + 256, \ +/* 9 */ 288, \ + 512, 576, \ + 1024, 1152, \ +/* C */ 1280, \ + 2048, \ + 4096 + +#define K_ZONE_NAMES \ + "kalloc.16", \ + "kalloc.32", \ + "kalloc.48", \ +/* 3 */ "kalloc.64", \ + "kalloc.80", \ + "kalloc.96", \ +/* 6 */ "kalloc.128", \ + "kalloc.160", \ + "kalloc.192", \ + "kalloc.256", \ +/* 9 */ "kalloc.288", \ + "kalloc.512", \ + "kalloc.576", \ + "kalloc.1024", \ + "kalloc.1152", \ +/* C */ "kalloc.1280", \ + "kalloc.2048", \ + "kalloc.4096" + +#elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3 /* - * Max number of elements per zone. zinit rounds things up correctly - * Doing things this way permits each zone to have a different maximum size - * based on need, rather than just guessing; it also - * means its patchable in case you're wrong! + * Tweaked for ARM (and x64) in 04/2011 */ -unsigned long k_zone_max[16] = { - 1024, /* 1 Byte */ - 1024, /* 2 Byte */ - 1024, /* 4 Byte */ - 1024, /* 8 Byte */ - 1024, /* 16 Byte */ - 4096, /* 32 Byte */ - 4096, /* 64 Byte */ - 4096, /* 128 Byte */ - 4096, /* 256 Byte */ - 1024, /* 512 Byte */ - 1024, /* 1024 Byte */ - 1024, /* 2048 Byte */ - 1024, /* 4096 Byte */ - 4096, /* 8192 Byte */ - 64, /* 16384 Byte */ - 64, /* 32768 Byte */ + +#define K_ZONE_SIZES \ +/* 3 */ 8, \ + 16, 24, \ + 32, 40, 48, \ +/* 6 */ 64, 72, 88, 112, \ + 128, 192, \ + 256, 288, 384, 440, \ +/* 9 */ 512, 576, 768, \ + 1024, 1152, 1536, \ + 2048, 2128, 3072, \ + 4096, 6144 + +#define K_ZONE_NAMES \ +/* 3 */ "kalloc.8", \ + "kalloc.16", "kalloc.24", \ + "kalloc.32", "kalloc.40", "kalloc.48", \ +/* 6 */ "kalloc.64", "kalloc.72", "kalloc.88", "kalloc.112", \ + "kalloc.128", "kalloc.192", \ + "kalloc.256", "kalloc.288", "kalloc.384", "kalloc.440", \ +/* 9 */ "kalloc.512", "kalloc.576", "kalloc.768", \ + "kalloc.1024", "kalloc.1152", "kalloc.1536", \ + "kalloc.2048", "kalloc.2128", "kalloc.3072", \ + "kalloc.4096", "kalloc.6144" + +#else +#error missing zone size parameters for kalloc +#endif + +#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN) +#define KiB(x) (1024 * (x)) + +static const int k_zone_size[] = { + K_ZONE_SIZES, + KiB(8), + KiB(16), + KiB(32) +}; + +#define MAX_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0])) + +static const char *k_zone_name[MAX_K_ZONE] = { + K_ZONE_NAMES, + "kalloc.8192", + "kalloc.16384", + "kalloc.32768" }; + +/* + * Many kalloc() allocations are for small structures containing a few + * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by + * size normalized to the minimum alignment, finds the right zone index + * for them in one dereference. + */ + +#define INDEX_ZDLUT(size) \ + (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN) +#define N_K_ZDLUT (2048 / KALLOC_MINALIGN) + /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */ +#define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN) + +static int8_t k_zone_dlut[N_K_ZDLUT]; /* table of indices into k_zone[] */ + +/* + * If there's no hit in the DLUT, then start searching from k_zindex_start. + */ +static int k_zindex_start; + +static zone_t k_zone[MAX_K_ZONE]; + +/* #define KALLOC_DEBUG 1 */ + /* forward declarations */ -void * kalloc_canblock( - vm_size_t size, - boolean_t canblock); + +lck_grp_t kalloc_lck_grp; +lck_mtx_t kalloc_lock; + +#define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock) +#define kalloc_unlock() lck_mtx_unlock(&kalloc_lock) /* OSMalloc local data declarations */ static queue_head_t OSMalloc_tag_list; -decl_simple_lock_data(static,OSMalloc_tag_lock) +lck_grp_t *OSMalloc_tag_lck_grp; +lck_mtx_t OSMalloc_tag_lock; + +#define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock) +#define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock) + /* OSMalloc forward declarations */ void OSMalloc_init(void); @@ -172,305 +300,511 @@ kalloc_init( { kern_return_t retval; vm_offset_t min; - vm_size_t size; - register int i; + vm_size_t size, kalloc_map_size; + int i; + vm_map_kernel_flags_t vmk_flags; + + /* + * Scale the kalloc_map_size to physical memory size: stay below + * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel). + */ + kalloc_map_size = (vm_size_t)(sane_size >> 5); +#if !__LP64__ + if (kalloc_map_size > KALLOC_MAP_SIZE_MAX) + kalloc_map_size = KALLOC_MAP_SIZE_MAX; +#endif /* !__LP64__ */ + if (kalloc_map_size < KALLOC_MAP_SIZE_MIN) + kalloc_map_size = KALLOC_MAP_SIZE_MIN; + + vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; + vmk_flags.vmkf_permanent = TRUE; retval = kmem_suballoc(kernel_map, &min, kalloc_map_size, - FALSE, VM_FLAGS_ANYWHERE, &kalloc_map); + FALSE, + (VM_FLAGS_ANYWHERE), + vmk_flags, + VM_KERN_MEMORY_KALLOC, + &kalloc_map); if (retval != KERN_SUCCESS) panic("kalloc_init: kmem_suballoc failed"); + kalloc_map_min = min; + kalloc_map_max = min + kalloc_map_size - 1; + /* - * Ensure that zones up to size 8192 bytes exist. - * This is desirable because messages are allocated - * with kalloc, and messages up through size 8192 are common. + * Create zones up to a least 2 pages because small page-multiples are common + * allocations. Also ensure that zones up to size 8192 bytes exist. This is + * desirable because messages are allocated with kalloc(), and messages up + * through size 8192 are common. */ + kalloc_max = PAGE_SIZE << 2; + if (kalloc_max < KiB(16)) { + kalloc_max = KiB(16); + } + assert(kalloc_max <= KiB(64)); /* assumption made in size arrays */ - if (PAGE_SIZE < 16*1024) - kalloc_max = 16*1024; - else - kalloc_max = PAGE_SIZE; kalloc_max_prerounded = kalloc_max / 2 + 1; - /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */ + /* allocations larger than 16 times kalloc_max go directly to kernel map */ kalloc_kernmap_size = (kalloc_max * 16) + 1; + kalloc_largest_allocated = kalloc_kernmap_size; /* - * Allocate a zone for each size we are going to handle. - * We specify non-paged memory. + * Allocate a zone for each size we are going to handle. Don't charge the + * caller for the allocation, as we aren't sure how the memory will be + * handled. */ - for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) { - if (size < KALLOC_MINSIZE) { - k_zone[i] = 0; - continue; - } - if (size == KALLOC_MINSIZE) { - first_k_zone = i; + for (i = 0; i < (int)MAX_K_ZONE && (size = k_zone_size[i]) < kalloc_max; i++) { + k_zone[i] = zinit(size, size, size, k_zone_name[i]); + zone_change(k_zone[i], Z_CALLERACCT, FALSE); +#if VM_MAX_TAG_ZONES + if (zone_tagging_on) zone_change(k_zone[i], Z_TAGS_ENABLED, TRUE); +#endif + zone_change(k_zone[i], Z_KASAN_QUARANTINE, FALSE); + } + + /* + * Build the Direct LookUp Table for small allocations + */ + for (i = 0, size = 0; i <= N_K_ZDLUT; i++, size += KALLOC_MINALIGN) { + int zindex = 0; + + while ((vm_size_t)k_zone_size[zindex] < size) + zindex++; + + if (i == N_K_ZDLUT) { + k_zindex_start = zindex; + break; } - k_zone[i] = zinit(size, k_zone_max[i] * size, size, - k_zone_name[i]); + k_zone_dlut[i] = (int8_t)zindex; } - OSMalloc_init(); -} -void * -kalloc_canblock( - vm_size_t size, - boolean_t canblock) -{ - register int zindex; - register vm_size_t allocsize; - vm_map_t alloc_map = VM_MAP_NULL; +#ifdef KALLOC_DEBUG + printf("kalloc_init: k_zindex_start %d\n", k_zindex_start); /* - * If size is too large for a zone, then use kmem_alloc. - * (We use kmem_alloc instead of kmem_alloc_wired so that - * krealloc can use kmem_realloc.) + * Do a quick synthesis to see how well/badly we can + * find-a-zone for a given size. + * Useful when debugging/tweaking the array of zone sizes. + * Cache misses probably more critical than compare-branches! */ + for (i = 0; i < (int)MAX_K_ZONE; i++) { + vm_size_t testsize = (vm_size_t)k_zone_size[i] - 1; + int compare = 0; + int zindex; - if (size >= kalloc_max_prerounded) { - void *addr; + if (testsize < MAX_SIZE_ZDLUT) { + compare += 1; /* 'if' (T) */ - /* kmem_alloc could block so we return if noblock */ - if (!canblock) { - return(0); - } + long dindex = INDEX_ZDLUT(testsize); + zindex = (int)k_zone_dlut[dindex]; - if (size >= kalloc_kernmap_size) - alloc_map = kernel_map; - else - alloc_map = kalloc_map; + } else if (testsize < kalloc_max_prerounded) { - if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) - addr = 0; + compare += 2; /* 'if' (F), 'if' (T) */ - if (addr) { - kalloc_large_inuse++; - kalloc_large_total += size; + zindex = k_zindex_start; + while ((vm_size_t)k_zone_size[zindex] < testsize) { + zindex++; + compare++; /* 'while' (T) */ + } + compare++; /* 'while' (F) */ + } else + break; /* not zone-backed */ - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; - } - return(addr); + zone_t z = k_zone[zindex]; + printf("kalloc_init: req size %4lu: %11s took %d compare%s\n", + (unsigned long)testsize, z->zone_name, compare, + compare == 1 ? "" : "s"); } +#endif + + lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL); + lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL); + OSMalloc_init(); +#ifdef MUTEX_ZONE + lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx"); +#endif +} + +/* + * Given an allocation size, return the kalloc zone it belongs to. + * Direct LookUp Table variant. + */ +static __inline zone_t +get_zone_dlut(vm_size_t size) +{ + long dindex = INDEX_ZDLUT(size); + int zindex = (int)k_zone_dlut[dindex]; + return (k_zone[zindex]); +} - /* compute the size of the block that we will actually allocate */ +/* As above, but linear search k_zone_size[] for the next zone that fits. */ - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; +static __inline zone_t +get_zone_search(vm_size_t size, int zindex) +{ + assert(size < kalloc_max_prerounded); + + while ((vm_size_t)k_zone_size[zindex] < size) zindex++; - } - /* allocate from the appropriate zone */ - assert(allocsize < kalloc_max); - return(zalloc_canblock(k_zone[zindex], canblock)); + assert((unsigned)zindex < MAX_K_ZONE && + (vm_size_t)k_zone_size[zindex] < kalloc_max); + + return (k_zone[zindex]); } -void * -kalloc( - vm_size_t size) +static vm_size_t +vm_map_lookup_kalloc_entry_locked( + vm_map_t map, + void *addr) { - return( kalloc_canblock(size, TRUE) ); + boolean_t ret; + vm_map_entry_t vm_entry = NULL; + + ret = vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry); + if (!ret) { + panic("Attempting to lookup/free an address not allocated via kalloc! (vm_map_lookup_entry() failed map: %p, addr: %p)\n", + map, addr); + } + if (vm_entry->vme_start != (vm_map_offset_t)addr) { + panic("Attempting to lookup/free the middle of a kalloc'ed element! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + if (!vm_entry->vme_atomic) { + panic("Attempting to lookup/free an address not managed by kalloc! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + return (vm_entry->vme_end - vm_entry->vme_start); } -void * -kalloc_noblock( - vm_size_t size) +#if KASAN_KALLOC +/* + * KASAN kalloc stashes the original user-requested size away in the poisoned + * area. Return that directly. + */ +vm_size_t +kalloc_size(void *addr) { - return( kalloc_canblock(size, FALSE) ); + (void)vm_map_lookup_kalloc_entry_locked; /* silence warning */ + return kasan_user_size((vm_offset_t)addr); } +#else +vm_size_t +kalloc_size( + void *addr) +{ + vm_map_t map; + vm_size_t size; + size = zone_element_size(addr, NULL); + if (size) { + return size; + } + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + vm_map_lock_read(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + vm_map_unlock_read(map); + return size; +} +#endif -void -krealloc( - void **addrp, - vm_size_t old_size, - vm_size_t new_size, - simple_lock_t lock) +vm_size_t +kalloc_bucket_size( + vm_size_t size) { - register int zindex; - register vm_size_t allocsize; - void *naddr; - vm_map_t alloc_map = VM_MAP_NULL; + zone_t z; + vm_map_t map; + + if (size < MAX_SIZE_ZDLUT) { + z = get_zone_dlut(size); + return z->elem_size; + } + + if (size < kalloc_max_prerounded) { + z = get_zone_search(size, k_zindex_start); + return z->elem_size; + } - /* can only be used for increasing allocation size */ + if (size >= kalloc_kernmap_size) + map = kernel_map; + else + map = kalloc_map; + + return vm_map_round_page(size, VM_MAP_PAGE_MASK(map)); +} - assert(new_size > old_size); +#if KASAN_KALLOC +vm_size_t +kfree_addr(void *addr) +{ + vm_size_t origsz = kalloc_size(addr); + kfree(addr, origsz); + return origsz; +} +#else +vm_size_t +kfree_addr( + void *addr) +{ + vm_map_t map; + vm_size_t size = 0; + kern_return_t ret; + zone_t z; + + size = zone_element_size(addr, &z); + if (size) { + zfree(z, addr); + return size; + } - /* if old_size is zero, then we are simply allocating */ + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + if ((vm_offset_t)addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS) { + panic("kfree on an address not in the kernel & kext address range! addr: %p\n", addr); + } - if (old_size == 0) { - simple_unlock(lock); - naddr = kalloc(new_size); - simple_lock(lock); - *addrp = naddr; - return; + vm_map_lock(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + ret = vm_map_remove_locked(map, + vm_map_trunc_page((vm_map_offset_t)addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((vm_map_offset_t)addr + size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_REMOVE_KUNWIRE); + if (ret != KERN_SUCCESS) { + panic("vm_map_remove_locked() failed for kalloc vm_entry! addr: %p, map: %p ret: %d\n", + addr, map, ret); } + vm_map_unlock(map); + + kalloc_spin_lock(); + kalloc_large_total -= size; + kalloc_large_inuse--; + kalloc_unlock(); + + KALLOC_ZINFO_SFREE(size); + return size; +} +#endif - /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */ +void * +kalloc_canblock( + vm_size_t * psize, + boolean_t canblock, + vm_allocation_site_t * site) +{ + zone_t z; + vm_size_t size; + void *addr; + vm_tag_t tag; - if (old_size >= kalloc_max_prerounded) { - if (old_size >= kalloc_kernmap_size) - alloc_map = kernel_map; - else - alloc_map = kalloc_map; + tag = VM_KERN_MEMORY_KALLOC; + size = *psize; - old_size = round_page(old_size); - new_size = round_page(new_size); - if (new_size > old_size) { +#if KASAN_KALLOC + /* expand the allocation to accomodate redzones */ + vm_size_t req_size = size; + size = kasan_alloc_resize(req_size); +#endif - if (KERN_SUCCESS != kmem_realloc(alloc_map, - (vm_offset_t)*addrp, old_size, - (vm_offset_t *)&naddr, new_size)) { - panic("krealloc: kmem_realloc"); - naddr = 0; - } + if (size < MAX_SIZE_ZDLUT) + z = get_zone_dlut(size); + else if (size < kalloc_max_prerounded) + z = get_zone_search(size, k_zindex_start); + else { + /* + * If size is too large for a zone, then use kmem_alloc. + * (We use kmem_alloc instead of kmem_alloc_kobject so that + * krealloc can use kmem_realloc.) + */ + vm_map_t alloc_map; - simple_lock(lock); - *addrp = (void *) naddr; + /* kmem_alloc could block so we return if noblock */ + if (!canblock) { + return(NULL); + } - /* kmem_realloc() doesn't free old page range. */ - kmem_free(alloc_map, (vm_offset_t)*addrp, old_size); +#if KASAN_KALLOC + /* large allocation - use guard pages instead of small redzones */ + size = round_page(req_size + 2 * PAGE_SIZE); + assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded); +#endif - kalloc_large_total += (new_size - old_size); + if (size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; + if (site) tag = vm_tag_alloc(site); + if (kmem_alloc_flags(alloc_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) { + if (alloc_map != kernel_map) { + if (kalloc_fallback_count++ == 0) { + printf("%s: falling back to kernel_map\n", __func__); + } + if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) + addr = NULL; + } + else + addr = NULL; } - return; - } - - /* compute the size of the block that we actually allocated */ - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < old_size) { - allocsize <<= 1; - zindex++; - } - - /* if new size fits in old block, then return */ - - if (new_size <= allocsize) { - return; - } + if (addr != NULL) { + kalloc_spin_lock(); + /* + * Thread-safe version of the workaround for 4740071 + * (a double FREE()) + */ + if (size > kalloc_largest_allocated) + kalloc_largest_allocated = size; - /* if new size does not fit in zone, kmem_alloc it, else zalloc it */ + kalloc_large_inuse++; + kalloc_large_total += size; + kalloc_large_sum += size; - simple_unlock(lock); - if (new_size >= kalloc_max_prerounded) { - if (new_size >= kalloc_kernmap_size) - alloc_map = kernel_map; - else - alloc_map = kalloc_map; - if (KERN_SUCCESS != kmem_alloc(alloc_map, - (vm_offset_t *)&naddr, new_size)) { - panic("krealloc: kmem_alloc"); - simple_lock(lock); - *addrp = NULL; - return; - } - kalloc_large_inuse++; - kalloc_large_total += new_size; + if (kalloc_large_total > kalloc_large_max) + kalloc_large_max = kalloc_large_total; - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; - } else { - register int new_zindex; + kalloc_unlock(); - allocsize <<= 1; - new_zindex = zindex + 1; - while (allocsize < new_size) { - allocsize <<= 1; - new_zindex++; + KALLOC_ZINFO_SALLOC(size); } - naddr = zalloc(k_zone[new_zindex]); +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, size, req_size, PAGE_SIZE); +#else + *psize = round_page(size); +#endif + return(addr); } - simple_lock(lock); +#ifdef KALLOC_DEBUG + if (size > z->elem_size) + panic("%s: z %p (%s) but requested size %lu", __func__, + z, z->zone_name, (unsigned long)size); +#endif - /* copy existing data */ + assert(size <= z->elem_size); - bcopy((const char *)*addrp, (char *)naddr, old_size); +#if VM_MAX_TAG_ZONES + if (z->tags && site) + { + tag = vm_tag_alloc(site); + if (!canblock && !vm_allocation_zone_totals[tag]) tag = VM_KERN_MEMORY_KALLOC; + } +#endif - /* free old block, and return */ + addr = zalloc_canblock_tag(z, canblock, size, tag); - zfree(k_zone[zindex], *addrp); +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, z->elem_size, req_size, KASAN_GUARD_SIZE); - /* set up new address */ + /* For KASan, the redzone lives in any additional space, so don't + * expand the allocation. */ +#else + *psize = z->elem_size; +#endif - *addrp = (void *) naddr; + return addr; } - void * -kget( - vm_size_t size) +kalloc_external( + vm_size_t size); +void * +kalloc_external( + vm_size_t size) { - register int zindex; - register vm_size_t allocsize; - - /* size must not be too large for a zone */ - - if (size >= kalloc_max_prerounded) { - /* This will never work, so we might as well panic */ - panic("kget"); - } - - /* compute the size of the block that we will actually allocate */ - - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - - /* allocate from the appropriate zone */ - - assert(allocsize < kalloc_max); - return(zget(k_zone[zindex])); + return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) ); } +volatile SInt32 kfree_nop_count = 0; + void kfree( void *data, vm_size_t size) { - register int zindex; - register vm_size_t freesize; - vm_map_t alloc_map = VM_MAP_NULL; + zone_t z; + +#if KASAN_KALLOC + /* + * Resize back to the real allocation size and hand off to the KASan + * quarantine. `data` may then point to a different allocation. + */ + vm_size_t user_size = size; + kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC); + data = (void *)kasan_dealloc((vm_address_t)data, &size); + kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true); + if (!data) { + return; + } +#endif - /* if size was too large for a zone, then use kmem_free */ + if (size < MAX_SIZE_ZDLUT) + z = get_zone_dlut(size); + else if (size < kalloc_max_prerounded) + z = get_zone_search(size, k_zindex_start); + else { + /* if size was too large for a zone, then use kmem_free */ - if (size >= kalloc_max_prerounded) { - if (size >= kalloc_kernmap_size) - alloc_map = kernel_map; - else + vm_map_t alloc_map = kernel_map; + + if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max)) alloc_map = kalloc_map; + if (size > kalloc_largest_allocated) { + /* + * work around double FREEs of small MALLOCs + * this used to end up being a nop + * since the pointer being freed from an + * alloc backed by the zalloc world could + * never show up in the kalloc_map... however, + * the kernel_map is a different issue... since it + * was released back into the zalloc pool, a pointer + * would have gotten written over the 'size' that + * the MALLOC was retaining in the first 4 bytes of + * the underlying allocation... that pointer ends up + * looking like a really big size on the 2nd FREE and + * pushes the kfree into the kernel_map... we + * end up removing a ton of virtual space before we panic + * this check causes us to ignore the kfree for a size + * that must be 'bogus'... note that it might not be due + * to the above scenario, but it would still be wrong and + * cause serious damage. + */ + + OSAddAtomic(1, &kfree_nop_count); + return; + } kmem_free(alloc_map, (vm_offset_t)data, size); + kalloc_spin_lock(); kalloc_large_total -= size; kalloc_large_inuse--; - return; - } - - /* compute the size of the block that we actually allocated from */ + kalloc_unlock(); - freesize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (freesize < size) { - freesize <<= 1; - zindex++; + KALLOC_ZINFO_SFREE(size); + return; } /* free to the appropriate zone */ - - assert(freesize < kalloc_max); - zfree(k_zone[zindex], data); +#ifdef KALLOC_DEBUG + if (size > z->elem_size) + panic("%s: z %p (%s) but requested size %lu", __func__, + z, z->zone_name, (unsigned long)size); +#endif + assert(size <= z->elem_size); + zfree(z, data); } #ifdef MACH_BSD @@ -478,46 +812,22 @@ zone_t kalloc_zone( vm_size_t size) { - register int zindex = 0; - register vm_size_t allocsize; - - /* compute the size of the block that we will actually allocate */ - - allocsize = size; - if (size <= kalloc_max) { - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - return (k_zone[zindex]); - } + if (size < MAX_SIZE_ZDLUT) + return (get_zone_dlut(size)); + if (size <= kalloc_max) + return (get_zone_search(size, k_zindex_start)); return (ZONE_NULL); } #endif - -void -kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, - vm_size_t *alloc_size, int *collectable, int *exhaustable) -{ - *count = kalloc_large_inuse; - *cur_size = kalloc_large_total; - *max_size = kalloc_large_max; - *elem_size = kalloc_large_total / kalloc_large_inuse; - *alloc_size = kalloc_large_total / kalloc_large_inuse; - *collectable = 0; - *exhaustable = 0; -} - - void OSMalloc_init( void) { queue_init(&OSMalloc_tag_list); - simple_lock_init(&OSMalloc_tag_lock, 0); + + OSMalloc_tag_lck_grp = lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL); + lck_mtx_init(&OSMalloc_tag_lock, OSMalloc_tag_lck_grp, LCK_ATTR_NULL); } OSMallocTag @@ -536,11 +846,11 @@ OSMalloc_Tagalloc( OSMTag->OSMT_refcnt = 1; - strncpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); + strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); - simple_lock(&OSMalloc_tag_lock); + OSMalloc_tag_spin_lock(); enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag); - simple_unlock(&OSMalloc_tag_lock); + OSMalloc_tag_unlock(); OSMTag->OSMT_state = OSMT_VALID; return(OSMTag); } @@ -550,9 +860,9 @@ OSMalloc_Tagref( OSMallocTag tag) { if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) - panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state); + panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state); - (void)hw_atomic_add((uint32_t *)(&tag->OSMT_refcnt), 1); + (void)hw_atomic_add(&tag->OSMT_refcnt, 1); } void @@ -560,16 +870,16 @@ OSMalloc_Tagrele( OSMallocTag tag) { if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) - panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state); + panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state); - if (hw_atomic_sub((uint32_t *)(&tag->OSMT_refcnt), 1) == 0) { + if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) { if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) { - simple_lock(&OSMalloc_tag_lock); + OSMalloc_tag_spin_lock(); (void)remque((queue_entry_t)tag); - simple_unlock(&OSMalloc_tag_lock); + OSMalloc_tag_unlock(); kfree((void*)tag, sizeof(*tag)); } else - panic("OSMalloc_Tagrele(): refcnt 0\n"); + panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag->OSMT_name); } } @@ -578,12 +888,12 @@ OSMalloc_Tagfree( OSMallocTag tag) { if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) - panic("OSMalloc_Tagfree(): bad state 0x%08X\n", tag->OSMT_state); + panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag->OSMT_name, tag->OSMT_state); - if (hw_atomic_sub((uint32_t *)(&tag->OSMT_refcnt), 1) == 0) { - simple_lock(&OSMalloc_tag_lock); + if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) { + OSMalloc_tag_spin_lock(); (void)remque((queue_entry_t)tag); - simple_unlock(&OSMalloc_tag_lock); + OSMalloc_tag_unlock(); kfree((void*)tag, sizeof(*tag)); } } @@ -599,11 +909,13 @@ OSMalloc( OSMalloc_Tagref(tag); if ((tag->OSMT_attr & OSMT_PAGEABLE) && (size & ~PAGE_MASK)) { - - if ((kr = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) - panic("OSMalloc(): kmem_alloc_pageable() failed 0x%08X\n", kr); + if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) + addr = NULL; } else - addr = kalloc((vm_size_t)size); + addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); + + if (!addr) + OSMalloc_Tagrele(tag); return(addr); } @@ -620,7 +932,7 @@ OSMalloc_nowait( OSMalloc_Tagref(tag); /* XXX: use non-blocking kalloc for now */ - addr = kalloc_noblock((vm_size_t)size); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (addr == NULL) OSMalloc_Tagrele(tag); @@ -638,7 +950,7 @@ OSMalloc_noblock( return(NULL); OSMalloc_Tagref(tag); - addr = kalloc_noblock((vm_size_t)size); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (addr == NULL) OSMalloc_Tagrele(tag); @@ -655,7 +967,15 @@ OSFree( && (size & ~PAGE_MASK)) { kmem_free(kernel_map, (vm_offset_t)addr, size); } else - kfree((void*)addr, size); + kfree((void *)addr, size); OSMalloc_Tagrele(tag); } + +uint32_t +OSMalloc_size( + void *addr) +{ + return (uint32_t)kalloc_size(addr); +} +