X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/91447636331957f3d9b5ca5b508f07c526b0074d..a39ff7e25e19b3a8c3020042a3872ca9ec9659f1:/osfmk/kern/kalloc.c?ds=sidebyside diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index 57161e0bb..65e9df392 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -61,94 +67,201 @@ #include #include +#include #include #include #include #include #include -#include +#include #include #include #include #include +#include + +#include #ifdef MACH_BSD zone_t kalloc_zone(vm_size_t); #endif +#define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024) +#define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024) vm_map_t kalloc_map; -vm_size_t kalloc_map_size = 16 * 1024 * 1024; vm_size_t kalloc_max; vm_size_t kalloc_max_prerounded; +vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */ + +/* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */ +unsigned long kalloc_fallback_count; unsigned int kalloc_large_inuse; vm_size_t kalloc_large_total; vm_size_t kalloc_large_max; +vm_size_t kalloc_largest_allocated = 0; +uint64_t kalloc_large_sum; +int kalloc_fake_zone_index = -1; /* index of our fake zone in statistics arrays */ + +vm_offset_t kalloc_map_min; +vm_offset_t kalloc_map_max; + +#ifdef MUTEX_ZONE /* - * All allocations of size less than kalloc_max are rounded to the - * next highest power of 2. This allocator is built on top of - * the zone allocator. A zone is created for each potential size - * that we are willing to get in small blocks. + * Diagnostic code to track mutexes separately rather than via the 2^ zones + */ + zone_t lck_mtx_zone; +#endif + +static void +KALLOC_ZINFO_SALLOC(vm_size_t bytes) +{ + thread_t thr = current_thread(); + ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes); +} + +static void +KALLOC_ZINFO_SFREE(vm_size_t bytes) +{ + thread_t thr = current_thread(); + ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes); +} + +/* + * All allocations of size less than kalloc_max are rounded to the next nearest + * sized zone. This allocator is built on top of the zone allocator. A zone + * is created for each potential size that we are willing to get in small + * blocks. * - * We assume that kalloc_max is not greater than 64K; - * thus 16 is a safe array size for k_zone and k_zone_name. + * We assume that kalloc_max is not greater than 64K; * - * Note that kalloc_max is somewhat confusingly named. - * It represents the first power of two for which no zone exists. - * kalloc_max_prerounded is the smallest allocation size, before - * rounding, for which no zone exists. + * Note that kalloc_max is somewhat confusingly named. It represents the first + * power of two for which no zone exists. kalloc_max_prerounded is the + * smallest allocation size, before rounding, for which no zone exists. + * + * Also if the allocation size is more than kalloc_kernmap_size then allocate + * from kernel map rather than kalloc_map. */ -int first_k_zone = -1; -struct zone *k_zone[16]; -static const char *k_zone_name[16] = { - "kalloc.1", "kalloc.2", - "kalloc.4", "kalloc.8", - "kalloc.16", "kalloc.32", - "kalloc.64", "kalloc.128", - "kalloc.256", "kalloc.512", - "kalloc.1024", "kalloc.2048", - "kalloc.4096", "kalloc.8192", - "kalloc.16384", "kalloc.32768" +#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN) +#define KiB(x) (1024 * (x)) + +static const struct kalloc_zone_config { + int kzc_size; + const char *kzc_name; +} k_zone_config[] = { +#define KZC_ENTRY(SIZE) { .kzc_size = (SIZE), .kzc_name = "kalloc." #SIZE } + +#if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4 + /* 64-bit targets, generally */ + KZC_ENTRY(16), + KZC_ENTRY(32), + KZC_ENTRY(48), + KZC_ENTRY(64), + KZC_ENTRY(80), + KZC_ENTRY(96), + KZC_ENTRY(128), + KZC_ENTRY(160), + KZC_ENTRY(192), + KZC_ENTRY(224), + KZC_ENTRY(256), + KZC_ENTRY(288), + KZC_ENTRY(368), + KZC_ENTRY(400), + KZC_ENTRY(512), + KZC_ENTRY(576), + KZC_ENTRY(768), + KZC_ENTRY(1024), + KZC_ENTRY(1152), + KZC_ENTRY(1280), + KZC_ENTRY(1664), + KZC_ENTRY(2048), +#elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3 + /* 32-bit targets, generally */ + KZC_ENTRY(8), + KZC_ENTRY(16), + KZC_ENTRY(24), + KZC_ENTRY(32), + KZC_ENTRY(40), + KZC_ENTRY(48), + KZC_ENTRY(64), + KZC_ENTRY(72), + KZC_ENTRY(88), + KZC_ENTRY(112), + KZC_ENTRY(128), + KZC_ENTRY(192), + KZC_ENTRY(256), + KZC_ENTRY(288), + KZC_ENTRY(384), + KZC_ENTRY(440), + KZC_ENTRY(512), + KZC_ENTRY(576), + KZC_ENTRY(768), + KZC_ENTRY(1024), + KZC_ENTRY(1152), + KZC_ENTRY(1536), + KZC_ENTRY(2048), + KZC_ENTRY(2128), + KZC_ENTRY(3072), +#else +#error missing or invalid zone size parameters for kalloc +#endif + + /* all configurations get these zones */ + KZC_ENTRY(4096), + KZC_ENTRY(6144), + KZC_ENTRY(8192), + KZC_ENTRY(16384), + KZC_ENTRY(32768), +#undef KZC_ENTRY }; +#define MAX_K_ZONE (int)(sizeof(k_zone_config) / sizeof(k_zone_config[0])) + /* - * Max number of elements per zone. zinit rounds things up correctly - * Doing things this way permits each zone to have a different maximum size - * based on need, rather than just guessing; it also - * means its patchable in case you're wrong! + * Many kalloc() allocations are for small structures containing a few + * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by + * size normalized to the minimum alignment, finds the right zone index + * for them in one dereference. */ -unsigned long k_zone_max[16] = { - 1024, /* 1 Byte */ - 1024, /* 2 Byte */ - 1024, /* 4 Byte */ - 1024, /* 8 Byte */ - 1024, /* 16 Byte */ - 4096, /* 32 Byte */ - 4096, /* 64 Byte */ - 4096, /* 128 Byte */ - 4096, /* 256 Byte */ - 1024, /* 512 Byte */ - 1024, /* 1024 Byte */ - 1024, /* 2048 Byte */ - 1024, /* 4096 Byte */ - 4096, /* 8192 Byte */ - 64, /* 16384 Byte */ - 64, /* 32768 Byte */ -}; + +#define INDEX_ZDLUT(size) \ + (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN) +#define N_K_ZDLUT (2048 / KALLOC_MINALIGN) + /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */ +#define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN) + +static int8_t k_zone_dlut[N_K_ZDLUT]; /* table of indices into k_zone[] */ + +/* + * If there's no hit in the DLUT, then start searching from k_zindex_start. + */ +static int k_zindex_start; + +static zone_t k_zone[MAX_K_ZONE]; + +/* #define KALLOC_DEBUG 1 */ /* forward declarations */ -void * kalloc_canblock( - vm_size_t size, - boolean_t canblock); + +lck_grp_t kalloc_lck_grp; +lck_mtx_t kalloc_lock; + +#define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock) +#define kalloc_unlock() lck_mtx_unlock(&kalloc_lock) /* OSMalloc local data declarations */ static queue_head_t OSMalloc_tag_list; -decl_simple_lock_data(static,OSMalloc_tag_lock) +lck_grp_t *OSMalloc_tag_lck_grp; +lck_mtx_t OSMalloc_tag_lock; + +#define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock) +#define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock) + /* OSMalloc forward declarations */ void OSMalloc_init(void); @@ -169,281 +282,523 @@ kalloc_init( { kern_return_t retval; vm_offset_t min; - vm_size_t size; - register int i; + vm_size_t size, kalloc_map_size; + vm_map_kernel_flags_t vmk_flags; + + /* + * Scale the kalloc_map_size to physical memory size: stay below + * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel). + */ + kalloc_map_size = (vm_size_t)(sane_size >> 5); +#if !__LP64__ + if (kalloc_map_size > KALLOC_MAP_SIZE_MAX) + kalloc_map_size = KALLOC_MAP_SIZE_MAX; +#endif /* !__LP64__ */ + if (kalloc_map_size < KALLOC_MAP_SIZE_MIN) + kalloc_map_size = KALLOC_MAP_SIZE_MIN; + + vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; + vmk_flags.vmkf_permanent = TRUE; retval = kmem_suballoc(kernel_map, &min, kalloc_map_size, - FALSE, VM_FLAGS_ANYWHERE, &kalloc_map); + FALSE, + (VM_FLAGS_ANYWHERE), + vmk_flags, + VM_KERN_MEMORY_KALLOC, + &kalloc_map); if (retval != KERN_SUCCESS) panic("kalloc_init: kmem_suballoc failed"); + kalloc_map_min = min; + kalloc_map_max = min + kalloc_map_size - 1; + /* - * Ensure that zones up to size 8192 bytes exist. - * This is desirable because messages are allocated - * with kalloc, and messages up through size 8192 are common. + * Create zones up to a least 4 pages because small page-multiples are + * common allocations. Also ensure that zones up to size 16KB bytes exist. + * This is desirable because messages are allocated with kalloc(), and + * messages up through size 8192 are common. */ + kalloc_max = PAGE_SIZE << 2; + if (kalloc_max < KiB(16)) { + kalloc_max = KiB(16); + } + assert(kalloc_max <= KiB(64)); /* assumption made in size arrays */ - if (PAGE_SIZE < 16*1024) - kalloc_max = 16*1024; - else - kalloc_max = PAGE_SIZE; kalloc_max_prerounded = kalloc_max / 2 + 1; + /* allocations larger than 16 times kalloc_max go directly to kernel map */ + kalloc_kernmap_size = (kalloc_max * 16) + 1; + kalloc_largest_allocated = kalloc_kernmap_size; /* - * Allocate a zone for each size we are going to handle. - * We specify non-paged memory. + * Allocate a zone for each size we are going to handle. */ - for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) { - if (size < KALLOC_MINSIZE) { - k_zone[i] = 0; - continue; - } - if (size == KALLOC_MINSIZE) { - first_k_zone = i; + for (int i = 0; i < MAX_K_ZONE && (size = k_zone_config[i].kzc_size) < kalloc_max; i++) { + k_zone[i] = zinit(size, size, size, k_zone_config[i].kzc_name); + + /* + * Don't charge the caller for the allocation, as we aren't sure how + * the memory will be handled. + */ + zone_change(k_zone[i], Z_CALLERACCT, FALSE); +#if VM_MAX_TAG_ZONES + if (zone_tagging_on) zone_change(k_zone[i], Z_TAGS_ENABLED, TRUE); +#endif + zone_change(k_zone[i], Z_KASAN_QUARANTINE, FALSE); + } + + /* + * Build the Direct LookUp Table for small allocations + */ + size = 0; + for (int i = 0; i <= N_K_ZDLUT; i++, size += KALLOC_MINALIGN) { + int zindex = 0; + + while ((vm_size_t)k_zone_config[zindex].kzc_size < size) + zindex++; + + if (i == N_K_ZDLUT) { + k_zindex_start = zindex; + break; } - k_zone[i] = zinit(size, k_zone_max[i] * size, size, - k_zone_name[i]); + k_zone_dlut[i] = (int8_t)zindex; } - OSMalloc_init(); -} -void * -kalloc_canblock( - vm_size_t size, - boolean_t canblock) -{ - register int zindex; - register vm_size_t allocsize; +#ifdef KALLOC_DEBUG + printf("kalloc_init: k_zindex_start %d\n", k_zindex_start); /* - * If size is too large for a zone, then use kmem_alloc. - * (We use kmem_alloc instead of kmem_alloc_wired so that - * krealloc can use kmem_realloc.) + * Do a quick synthesis to see how well/badly we can + * find-a-zone for a given size. + * Useful when debugging/tweaking the array of zone sizes. + * Cache misses probably more critical than compare-branches! */ + for (int i = 0; i < MAX_K_ZONE; i++) { + vm_size_t testsize = (vm_size_t)k_zone_config[i].kzc_size - 1; + int compare = 0; + int zindex; - if (size >= kalloc_max_prerounded) { - void *addr; + if (testsize < MAX_SIZE_ZDLUT) { + compare += 1; /* 'if' (T) */ - /* kmem_alloc could block so we return if noblock */ - if (!canblock) { - return(0); - } - if (kmem_alloc(kalloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) - addr = 0; + long dindex = INDEX_ZDLUT(testsize); + zindex = (int)k_zone_dlut[dindex]; - if (addr) { - kalloc_large_inuse++; - kalloc_large_total += size; + } else if (testsize < kalloc_max_prerounded) { - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; - } - return(addr); - } + compare += 2; /* 'if' (F), 'if' (T) */ - /* compute the size of the block that we will actually allocate */ + zindex = k_zindex_start; + while ((vm_size_t)k_zone_config[zindex].kzc_size < testsize) { + zindex++; + compare++; /* 'while' (T) */ + } + compare++; /* 'while' (F) */ + } else + break; /* not zone-backed */ - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; + zone_t z = k_zone[zindex]; + printf("kalloc_init: req size %4lu: %11s took %d compare%s\n", + (unsigned long)testsize, z->zone_name, compare, + compare == 1 ? "" : "s"); } +#endif - /* allocate from the appropriate zone */ - assert(allocsize < kalloc_max); - return(zalloc_canblock(k_zone[zindex], canblock)); + lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL); + lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL); + OSMalloc_init(); +#ifdef MUTEX_ZONE + lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx"); +#endif } -void * -kalloc( - vm_size_t size) +/* + * Given an allocation size, return the kalloc zone it belongs to. + * Direct LookUp Table variant. + */ +static __inline zone_t +get_zone_dlut(vm_size_t size) { - return( kalloc_canblock(size, TRUE) ); + long dindex = INDEX_ZDLUT(size); + int zindex = (int)k_zone_dlut[dindex]; + return (k_zone[zindex]); } -void * -kalloc_noblock( - vm_size_t size) +/* As above, but linear search k_zone_config[] for the next zone that fits. */ + +static __inline zone_t +get_zone_search(vm_size_t size, int zindex) { - return( kalloc_canblock(size, FALSE) ); + assert(size < kalloc_max_prerounded); + + while ((vm_size_t)k_zone_config[zindex].kzc_size < size) + zindex++; + + assert(zindex < MAX_K_ZONE && + (vm_size_t)k_zone_config[zindex].kzc_size < kalloc_max); + + return (k_zone[zindex]); } +static vm_size_t +vm_map_lookup_kalloc_entry_locked( + vm_map_t map, + void *addr) +{ + boolean_t ret; + vm_map_entry_t vm_entry = NULL; + + ret = vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry); + if (!ret) { + panic("Attempting to lookup/free an address not allocated via kalloc! (vm_map_lookup_entry() failed map: %p, addr: %p)\n", + map, addr); + } + if (vm_entry->vme_start != (vm_map_offset_t)addr) { + panic("Attempting to lookup/free the middle of a kalloc'ed element! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + if (!vm_entry->vme_atomic) { + panic("Attempting to lookup/free an address not managed by kalloc! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + return (vm_entry->vme_end - vm_entry->vme_start); +} -void -krealloc( - void **addrp, - vm_size_t old_size, - vm_size_t new_size, - simple_lock_t lock) +#if KASAN_KALLOC +/* + * KASAN kalloc stashes the original user-requested size away in the poisoned + * area. Return that directly. + */ +vm_size_t +kalloc_size(void *addr) { - register int zindex; - register vm_size_t allocsize; - void *naddr; + (void)vm_map_lookup_kalloc_entry_locked; /* silence warning */ + return kasan_user_size((vm_offset_t)addr); +} +#else +vm_size_t +kalloc_size( + void *addr) +{ + vm_map_t map; + vm_size_t size; - /* can only be used for increasing allocation size */ + size = zone_element_size(addr, NULL); + if (size) { + return size; + } + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + vm_map_lock_read(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + vm_map_unlock_read(map); + return size; +} +#endif - assert(new_size > old_size); +vm_size_t +kalloc_bucket_size( + vm_size_t size) +{ + zone_t z; + vm_map_t map; + + if (size < MAX_SIZE_ZDLUT) { + z = get_zone_dlut(size); + return z->elem_size; + } + + if (size < kalloc_max_prerounded) { + z = get_zone_search(size, k_zindex_start); + return z->elem_size; + } - /* if old_size is zero, then we are simply allocating */ + if (size >= kalloc_kernmap_size) + map = kernel_map; + else + map = kalloc_map; + + return vm_map_round_page(size, VM_MAP_PAGE_MASK(map)); +} - if (old_size == 0) { - simple_unlock(lock); - naddr = kalloc(new_size); - simple_lock(lock); - *addrp = naddr; - return; +#if KASAN_KALLOC +vm_size_t +kfree_addr(void *addr) +{ + vm_size_t origsz = kalloc_size(addr); + kfree(addr, origsz); + return origsz; +} +#else +vm_size_t +kfree_addr( + void *addr) +{ + vm_map_t map; + vm_size_t size = 0; + kern_return_t ret; + zone_t z; + + size = zone_element_size(addr, &z); + if (size) { + DTRACE_VM3(kfree, vm_size_t, -1, vm_size_t, z->elem_size, void*, addr); + zfree(z, addr); + return size; } - /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */ - - if (old_size >= kalloc_max_prerounded) { - old_size = round_page(old_size); - new_size = round_page(new_size); - if (new_size > old_size) { + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + if ((vm_offset_t)addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS) { + panic("kfree on an address not in the kernel & kext address range! addr: %p\n", addr); + } - if (KERN_SUCCESS != kmem_realloc(kalloc_map, - (vm_offset_t)*addrp, old_size, - (vm_offset_t *)&naddr, new_size)) { - panic("krealloc: kmem_realloc"); - naddr = 0; - } + vm_map_lock(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + ret = vm_map_remove_locked(map, + vm_map_trunc_page((vm_map_offset_t)addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((vm_map_offset_t)addr + size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_REMOVE_KUNWIRE); + if (ret != KERN_SUCCESS) { + panic("vm_map_remove_locked() failed for kalloc vm_entry! addr: %p, map: %p ret: %d\n", + addr, map, ret); + } + vm_map_unlock(map); + DTRACE_VM3(kfree, vm_size_t, -1, vm_size_t, size, void*, addr); + + kalloc_spin_lock(); + kalloc_large_total -= size; + kalloc_large_inuse--; + kalloc_unlock(); + + KALLOC_ZINFO_SFREE(size); + return size; +} +#endif - simple_lock(lock); - *addrp = (void *) naddr; +void * +kalloc_canblock( + vm_size_t * psize, + boolean_t canblock, + vm_allocation_site_t * site) +{ + zone_t z; + vm_size_t size; + void *addr; + vm_tag_t tag; - /* kmem_realloc() doesn't free old page range. */ - kmem_free(kalloc_map, (vm_offset_t)*addrp, old_size); + tag = VM_KERN_MEMORY_KALLOC; + size = *psize; - kalloc_large_total += (new_size - old_size); +#if KASAN_KALLOC + /* expand the allocation to accomodate redzones */ + vm_size_t req_size = size; + size = kasan_alloc_resize(req_size); +#endif - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; + if (size < MAX_SIZE_ZDLUT) + z = get_zone_dlut(size); + else if (size < kalloc_max_prerounded) + z = get_zone_search(size, k_zindex_start); + else { + /* + * If size is too large for a zone, then use kmem_alloc. + * (We use kmem_alloc instead of kmem_alloc_kobject so that + * krealloc can use kmem_realloc.) + */ + vm_map_t alloc_map; + /* kmem_alloc could block so we return if noblock */ + if (!canblock) { + return(NULL); } - return; - } - /* compute the size of the block that we actually allocated */ +#if KASAN_KALLOC + /* large allocation - use guard pages instead of small redzones */ + size = round_page(req_size + 2 * PAGE_SIZE); + assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded); +#endif - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < old_size) { - allocsize <<= 1; - zindex++; - } + if (size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; - /* if new size fits in old block, then return */ + if (site) tag = vm_tag_alloc(site); - if (new_size <= allocsize) { - return; - } + if (kmem_alloc_flags(alloc_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) { + if (alloc_map != kernel_map) { + if (kalloc_fallback_count++ == 0) { + printf("%s: falling back to kernel_map\n", __func__); + } + if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) + addr = NULL; + } + else + addr = NULL; + } - /* if new size does not fit in zone, kmem_alloc it, else zalloc it */ + if (addr != NULL) { + kalloc_spin_lock(); + /* + * Thread-safe version of the workaround for 4740071 + * (a double FREE()) + */ + if (size > kalloc_largest_allocated) + kalloc_largest_allocated = size; - simple_unlock(lock); - if (new_size >= kalloc_max_prerounded) { - if (KERN_SUCCESS != kmem_alloc(kalloc_map, - (vm_offset_t *)&naddr, new_size)) { - panic("krealloc: kmem_alloc"); - simple_lock(lock); - *addrp = NULL; - return; - } - kalloc_large_inuse++; - kalloc_large_total += new_size; + kalloc_large_inuse++; + kalloc_large_total += size; + kalloc_large_sum += size; - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; - } else { - register int new_zindex; + if (kalloc_large_total > kalloc_large_max) + kalloc_large_max = kalloc_large_total; + + kalloc_unlock(); - allocsize <<= 1; - new_zindex = zindex + 1; - while (allocsize < new_size) { - allocsize <<= 1; - new_zindex++; + KALLOC_ZINFO_SALLOC(size); } - naddr = zalloc(k_zone[new_zindex]); +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, size, req_size, PAGE_SIZE); +#else + *psize = round_page(size); +#endif + DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, *psize, void*, addr); + return(addr); } - simple_lock(lock); +#ifdef KALLOC_DEBUG + if (size > z->elem_size) + panic("%s: z %p (%s) but requested size %lu", __func__, + z, z->zone_name, (unsigned long)size); +#endif - /* copy existing data */ + assert(size <= z->elem_size); - bcopy((const char *)*addrp, (char *)naddr, old_size); +#if VM_MAX_TAG_ZONES + if (z->tags && site) + { + tag = vm_tag_alloc(site); + if (!canblock && !vm_allocation_zone_totals[tag]) tag = VM_KERN_MEMORY_KALLOC; + } +#endif - /* free old block, and return */ + addr = zalloc_canblock_tag(z, canblock, size, tag); - zfree(k_zone[zindex], *addrp); +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, z->elem_size, req_size, KASAN_GUARD_SIZE); - /* set up new address */ + /* For KASan, the redzone lives in any additional space, so don't + * expand the allocation. */ +#else + *psize = z->elem_size; +#endif - *addrp = (void *) naddr; + DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, *psize, void*, addr); + return addr; } - void * -kget( - vm_size_t size) +kalloc_external( + vm_size_t size); +void * +kalloc_external( + vm_size_t size) { - register int zindex; - register vm_size_t allocsize; - - /* size must not be too large for a zone */ - - if (size >= kalloc_max_prerounded) { - /* This will never work, so we might as well panic */ - panic("kget"); - } - - /* compute the size of the block that we will actually allocate */ - - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - - /* allocate from the appropriate zone */ - - assert(allocsize < kalloc_max); - return(zget(k_zone[zindex])); + return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) ); } +volatile SInt32 kfree_nop_count = 0; + void kfree( void *data, vm_size_t size) { - register int zindex; - register vm_size_t freesize; + zone_t z; - /* if size was too large for a zone, then use kmem_free */ +#if KASAN_KALLOC + /* + * Resize back to the real allocation size and hand off to the KASan + * quarantine. `data` may then point to a different allocation. + */ + vm_size_t user_size = size; + kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC); + data = (void *)kasan_dealloc((vm_address_t)data, &size); + kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true); + if (!data) { + return; + } +#endif - if (size >= kalloc_max_prerounded) { - kmem_free(kalloc_map, (vm_offset_t)data, size); + if (size < MAX_SIZE_ZDLUT) + z = get_zone_dlut(size); + else if (size < kalloc_max_prerounded) + z = get_zone_search(size, k_zindex_start); + else { + /* if size was too large for a zone, then use kmem_free */ + + vm_map_t alloc_map = kernel_map; + + if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max)) + alloc_map = kalloc_map; + if (size > kalloc_largest_allocated) { + /* + * work around double FREEs of small MALLOCs + * this used to end up being a nop + * since the pointer being freed from an + * alloc backed by the zalloc world could + * never show up in the kalloc_map... however, + * the kernel_map is a different issue... since it + * was released back into the zalloc pool, a pointer + * would have gotten written over the 'size' that + * the MALLOC was retaining in the first 4 bytes of + * the underlying allocation... that pointer ends up + * looking like a really big size on the 2nd FREE and + * pushes the kfree into the kernel_map... we + * end up removing a ton of virtual space before we panic + * this check causes us to ignore the kfree for a size + * that must be 'bogus'... note that it might not be due + * to the above scenario, but it would still be wrong and + * cause serious damage. + */ + + OSAddAtomic(1, &kfree_nop_count); + return; + } + kmem_free(alloc_map, (vm_offset_t)data, size); + kalloc_spin_lock(); kalloc_large_total -= size; kalloc_large_inuse--; - return; - } + kalloc_unlock(); - /* compute the size of the block that we actually allocated from */ +#if !KASAN_KALLOC + DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, data); +#endif - freesize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (freesize < size) { - freesize <<= 1; - zindex++; + KALLOC_ZINFO_SFREE(size); + return; } /* free to the appropriate zone */ - - assert(freesize < kalloc_max); - zfree(k_zone[zindex], data); +#ifdef KALLOC_DEBUG + if (size > z->elem_size) + panic("%s: z %p (%s) but requested size %lu", __func__, + z, z->zone_name, (unsigned long)size); +#endif + assert(size <= z->elem_size); + DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, z->elem_size, void*, data); + zfree(z, data); } #ifdef MACH_BSD @@ -451,46 +806,22 @@ zone_t kalloc_zone( vm_size_t size) { - register int zindex = 0; - register vm_size_t allocsize; - - /* compute the size of the block that we will actually allocate */ - - allocsize = size; - if (size <= kalloc_max) { - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - return (k_zone[zindex]); - } + if (size < MAX_SIZE_ZDLUT) + return (get_zone_dlut(size)); + if (size <= kalloc_max) + return (get_zone_search(size, k_zindex_start)); return (ZONE_NULL); } #endif - -void -kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, - vm_size_t *alloc_size, int *collectable, int *exhaustable) -{ - *count = kalloc_large_inuse; - *cur_size = kalloc_large_total; - *max_size = kalloc_large_max; - *elem_size = kalloc_large_total / kalloc_large_inuse; - *alloc_size = kalloc_large_total / kalloc_large_inuse; - *collectable = 0; - *exhaustable = 0; -} - - void OSMalloc_init( void) { queue_init(&OSMalloc_tag_list); - simple_lock_init(&OSMalloc_tag_lock, 0); + + OSMalloc_tag_lck_grp = lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL); + lck_mtx_init(&OSMalloc_tag_lock, OSMalloc_tag_lck_grp, LCK_ATTR_NULL); } OSMallocTag @@ -509,11 +840,11 @@ OSMalloc_Tagalloc( OSMTag->OSMT_refcnt = 1; - strncpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); + strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); - simple_lock(&OSMalloc_tag_lock); + OSMalloc_tag_spin_lock(); enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag); - simple_unlock(&OSMalloc_tag_lock); + OSMalloc_tag_unlock(); OSMTag->OSMT_state = OSMT_VALID; return(OSMTag); } @@ -523,9 +854,9 @@ OSMalloc_Tagref( OSMallocTag tag) { if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) - panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state); + panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state); - (void)hw_atomic_add((uint32_t *)(&tag->OSMT_refcnt), 1); + (void)hw_atomic_add(&tag->OSMT_refcnt, 1); } void @@ -533,16 +864,16 @@ OSMalloc_Tagrele( OSMallocTag tag) { if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) - panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state); + panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state); - if (hw_atomic_sub((uint32_t *)(&tag->OSMT_refcnt), 1) == 0) { + if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) { if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) { - simple_lock(&OSMalloc_tag_lock); + OSMalloc_tag_spin_lock(); (void)remque((queue_entry_t)tag); - simple_unlock(&OSMalloc_tag_lock); + OSMalloc_tag_unlock(); kfree((void*)tag, sizeof(*tag)); } else - panic("OSMalloc_Tagrele(): refcnt 0\n"); + panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag->OSMT_name); } } @@ -551,12 +882,12 @@ OSMalloc_Tagfree( OSMallocTag tag) { if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) - panic("OSMalloc_Tagfree(): bad state 0x%08X\n", tag->OSMT_state); + panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag->OSMT_name, tag->OSMT_state); - if (hw_atomic_sub((uint32_t *)(&tag->OSMT_refcnt), 1) == 0) { - simple_lock(&OSMalloc_tag_lock); + if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) { + OSMalloc_tag_spin_lock(); (void)remque((queue_entry_t)tag); - simple_unlock(&OSMalloc_tag_lock); + OSMalloc_tag_unlock(); kfree((void*)tag, sizeof(*tag)); } } @@ -572,11 +903,13 @@ OSMalloc( OSMalloc_Tagref(tag); if ((tag->OSMT_attr & OSMT_PAGEABLE) && (size & ~PAGE_MASK)) { - - if ((kr = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) - panic("OSMalloc(): kmem_alloc_pageable() failed 0x%08X\n", kr); + if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) + addr = NULL; } else - addr = kalloc((vm_size_t)size); + addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); + + if (!addr) + OSMalloc_Tagrele(tag); return(addr); } @@ -593,7 +926,7 @@ OSMalloc_nowait( OSMalloc_Tagref(tag); /* XXX: use non-blocking kalloc for now */ - addr = kalloc_noblock((vm_size_t)size); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (addr == NULL) OSMalloc_Tagrele(tag); @@ -611,7 +944,7 @@ OSMalloc_noblock( return(NULL); OSMalloc_Tagref(tag); - addr = kalloc_noblock((vm_size_t)size); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (addr == NULL) OSMalloc_Tagrele(tag); @@ -628,7 +961,15 @@ OSFree( && (size & ~PAGE_MASK)) { kmem_free(kernel_map, (vm_offset_t)addr, size); } else - kfree((void*)addr, size); + kfree((void *)addr, size); OSMalloc_Tagrele(tag); } + +uint32_t +OSMalloc_size( + void *addr) +{ + return (uint32_t)kalloc_size(addr); +} +