X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/de355530ae67247cbd0da700edb3a2a1dae884c2..a39ff7e25e19b3a8c3020042a3872ca9ec9659f1:/osfmk/kern/kalloc.c diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index a67c71d14..65e9df392 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -1,182 +1,33 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:55 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.19.5 1995/02/24 15:20:29 alanl - * Lock package cleanup. - * [95/02/15 alanl] - * - * Merge with DIPC2_SHARED. - * [1995/01/05 15:11:02 alanl] - * - * Revision 1.2.28.2 1994/11/10 06:12:50 dwm - * mk6 CR764 - s/spinlock/simple_lock/ (name change only) - * [1994/11/10 05:28:35 dwm] - * - * Revision 1.2.28.1 1994/11/04 10:07:40 dwm - * mk6 CR668 - 1.3b26 merge - * * Revision 1.2.2.4 1993/11/08 15:04:18 gm - * CR9710: Updated to new zinit() and zone_change() interfaces. - * * End1.3merge - * [1994/11/04 09:25:48 dwm] - * - * Revision 1.2.19.3 1994/09/23 02:20:52 ezf - * change marker to not FREE - * [1994/09/22 21:33:57 ezf] - * - * Revision 1.2.19.2 1994/06/14 18:36:36 bolinger - * NMK17.2 merge: Replace simple_lock ops. - * [1994/06/14 18:35:17 bolinger] - * - * Revision 1.2.19.1 1994/06/14 17:04:23 bolinger - * Merge up to NMK17.2. - * [1994/06/14 16:54:19 bolinger] - * - * Revision 1.2.23.3 1994/10/14 12:24:33 sjs - * Removed krealloc_spinl routine: the newer locking scheme makes it - * obsolete. - * [94/10/13 sjs] - * - * Revision 1.2.23.2 1994/08/11 14:42:46 rwd - * Post merge cleanup - * [94/08/09 rwd] - * - * Changed zcollectable to use zchange. - * [94/08/04 rwd] - * - * Revision 1.2.17.2 1994/07/08 01:58:45 alanl - * Change comment to match function name. - * [1994/07/08 01:47:59 alanl] - * - * Revision 1.2.17.1 1994/05/26 16:20:38 sjs - * Added krealloc_spinl: same as krealloc but uses spin locks. - * [94/05/25 sjs] - * - * Revision 1.2.23.1 1994/08/04 02:24:55 mmp - * Added krealloc_spinl: same as krealloc but uses spin locks. - * [94/05/25 sjs] - * - * Revision 1.2.13.1 1994/02/11 14:27:12 paire - * Changed krealloc() to make it work on a MP system. Added a new parameter - * which is the simple lock that should be held while modifying the memory - * area already initialized. - * Change from NMK16.1 [93/09/02 paire] - * - * Do not set debug for kalloc zones as default. It wastes - * to much space. - * Change from NMK16.1 [93/08/16 bernadat] - * [94/02/07 paire] - * - * Revision 1.2.2.3 1993/07/28 17:15:44 bernard - * CR9523 -- Prototypes. - * [1993/07/27 20:14:12 bernard] - * - * Revision 1.2.2.2 1993/06/02 23:37:46 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:12:59 jeffc] - * - * Revision 1.2 1992/12/07 21:28:42 robert - * integrate any changes below for 14.0 (branch from 13.16 base) - * - * Joseph Barrera (jsb) at Carnegie-Mellon University 11-Sep-92 - * Added krealloc. Added kalloc_max_prerounded for quicker choice between - * zalloc and kmem_alloc. Renamed MINSIZE to KALLOC_MINSIZE. - * [1992/12/06 19:47:16 robert] - * - * Revision 1.1 1992/09/30 02:09:23 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.9 91/05/14 16:43:17 mrt - * Correcting copyright - * - * Revision 2.8 91/03/16 14:50:37 rpd - * Updated for new kmem_alloc interface. - * [91/03/03 rpd] - * - * Revision 2.7 91/02/05 17:27:22 mrt - * Changed to new Mach copyright - * [91/02/01 16:14:12 mrt] - * - * Revision 2.6 90/06/19 22:59:06 rpd - * Made the big kalloc zones collectable. - * [90/06/05 rpd] - * - * Revision 2.5 90/06/02 14:54:47 rpd - * Added kalloc_max, kalloc_map_size. - * [90/03/26 22:06:39 rpd] - * - * Revision 2.4 90/01/11 11:43:13 dbg - * De-lint. - * [89/12/06 dbg] - * - * Revision 2.3 89/09/08 11:25:51 dbg - * MACH_KERNEL: remove non-MACH data types. - * [89/07/11 dbg] - * - * Revision 2.2 89/08/31 16:18:59 rwd - * First Checkin - * [89/08/23 15:41:37 rwd] - * - * Revision 2.6 89/08/02 08:03:28 jsb - * Make all kalloc zones 8 MB big. (No more kalloc panics!) - * [89/08/01 14:10:17 jsb] - * - * Revision 2.4 89/04/05 13:03:10 rvb - * Guarantee a zone max of at least 100 elements or 10 pages - * which ever is greater. Afs (AllocDouble()) puts a great demand - * on the 2048 zone and used to blow away. - * [89/03/09 rvb] - * - * Revision 2.3 89/02/25 18:04:39 gm0w - * Changes for cleanup. - * - * Revision 2.2 89/01/18 02:07:04 jsb - * Give each kalloc zone a meaningful name (for panics); - * create a zone for each power of 2 between MINSIZE - * and PAGE_SIZE, instead of using (obsoleted) NQUEUES. - * [89/01/17 10:16:33 jsb] - * - * - * 13-Feb-88 John Seamons (jks) at NeXT - * Updated to use kmem routines instead of vmem routines. - * - * 21-Jun-85 Avadis Tevanian (avie) at Carnegie-Mellon University - * Created. - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -216,81 +67,206 @@ #include #include +#include #include #include #include #include #include -#include +#include #include #include #include +#include +#include + +#include #ifdef MACH_BSD zone_t kalloc_zone(vm_size_t); #endif +#define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024) +#define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024) vm_map_t kalloc_map; -vm_size_t kalloc_map_size = 16 * 1024 * 1024; vm_size_t kalloc_max; vm_size_t kalloc_max_prerounded; +vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */ + +/* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */ +unsigned long kalloc_fallback_count; unsigned int kalloc_large_inuse; vm_size_t kalloc_large_total; vm_size_t kalloc_large_max; +vm_size_t kalloc_largest_allocated = 0; +uint64_t kalloc_large_sum; + +int kalloc_fake_zone_index = -1; /* index of our fake zone in statistics arrays */ + +vm_offset_t kalloc_map_min; +vm_offset_t kalloc_map_max; + +#ifdef MUTEX_ZONE +/* + * Diagnostic code to track mutexes separately rather than via the 2^ zones + */ + zone_t lck_mtx_zone; +#endif + +static void +KALLOC_ZINFO_SALLOC(vm_size_t bytes) +{ + thread_t thr = current_thread(); + ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes); +} + +static void +KALLOC_ZINFO_SFREE(vm_size_t bytes) +{ + thread_t thr = current_thread(); + ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes); +} /* - * All allocations of size less than kalloc_max are rounded to the - * next highest power of 2. This allocator is built on top of - * the zone allocator. A zone is created for each potential size - * that we are willing to get in small blocks. + * All allocations of size less than kalloc_max are rounded to the next nearest + * sized zone. This allocator is built on top of the zone allocator. A zone + * is created for each potential size that we are willing to get in small + * blocks. * - * We assume that kalloc_max is not greater than 64K; - * thus 16 is a safe array size for k_zone and k_zone_name. + * We assume that kalloc_max is not greater than 64K; * - * Note that kalloc_max is somewhat confusingly named. - * It represents the first power of two for which no zone exists. - * kalloc_max_prerounded is the smallest allocation size, before - * rounding, for which no zone exists. + * Note that kalloc_max is somewhat confusingly named. It represents the first + * power of two for which no zone exists. kalloc_max_prerounded is the + * smallest allocation size, before rounding, for which no zone exists. + * + * Also if the allocation size is more than kalloc_kernmap_size then allocate + * from kernel map rather than kalloc_map. */ -int first_k_zone = -1; -struct zone *k_zone[16]; -static char *k_zone_name[16] = { - "kalloc.1", "kalloc.2", - "kalloc.4", "kalloc.8", - "kalloc.16", "kalloc.32", - "kalloc.64", "kalloc.128", - "kalloc.256", "kalloc.512", - "kalloc.1024", "kalloc.2048", - "kalloc.4096", "kalloc.8192", - "kalloc.16384", "kalloc.32768" +#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN) +#define KiB(x) (1024 * (x)) + +static const struct kalloc_zone_config { + int kzc_size; + const char *kzc_name; +} k_zone_config[] = { +#define KZC_ENTRY(SIZE) { .kzc_size = (SIZE), .kzc_name = "kalloc." #SIZE } + +#if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4 + /* 64-bit targets, generally */ + KZC_ENTRY(16), + KZC_ENTRY(32), + KZC_ENTRY(48), + KZC_ENTRY(64), + KZC_ENTRY(80), + KZC_ENTRY(96), + KZC_ENTRY(128), + KZC_ENTRY(160), + KZC_ENTRY(192), + KZC_ENTRY(224), + KZC_ENTRY(256), + KZC_ENTRY(288), + KZC_ENTRY(368), + KZC_ENTRY(400), + KZC_ENTRY(512), + KZC_ENTRY(576), + KZC_ENTRY(768), + KZC_ENTRY(1024), + KZC_ENTRY(1152), + KZC_ENTRY(1280), + KZC_ENTRY(1664), + KZC_ENTRY(2048), +#elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3 + /* 32-bit targets, generally */ + KZC_ENTRY(8), + KZC_ENTRY(16), + KZC_ENTRY(24), + KZC_ENTRY(32), + KZC_ENTRY(40), + KZC_ENTRY(48), + KZC_ENTRY(64), + KZC_ENTRY(72), + KZC_ENTRY(88), + KZC_ENTRY(112), + KZC_ENTRY(128), + KZC_ENTRY(192), + KZC_ENTRY(256), + KZC_ENTRY(288), + KZC_ENTRY(384), + KZC_ENTRY(440), + KZC_ENTRY(512), + KZC_ENTRY(576), + KZC_ENTRY(768), + KZC_ENTRY(1024), + KZC_ENTRY(1152), + KZC_ENTRY(1536), + KZC_ENTRY(2048), + KZC_ENTRY(2128), + KZC_ENTRY(3072), +#else +#error missing or invalid zone size parameters for kalloc +#endif + + /* all configurations get these zones */ + KZC_ENTRY(4096), + KZC_ENTRY(6144), + KZC_ENTRY(8192), + KZC_ENTRY(16384), + KZC_ENTRY(32768), +#undef KZC_ENTRY }; +#define MAX_K_ZONE (int)(sizeof(k_zone_config) / sizeof(k_zone_config[0])) + /* - * Max number of elements per zone. zinit rounds things up correctly - * Doing things this way permits each zone to have a different maximum size - * based on need, rather than just guessing; it also - * means its patchable in case you're wrong! + * Many kalloc() allocations are for small structures containing a few + * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by + * size normalized to the minimum alignment, finds the right zone index + * for them in one dereference. */ -unsigned long k_zone_max[16] = { - 1024, /* 1 Byte */ - 1024, /* 2 Byte */ - 1024, /* 4 Byte */ - 1024, /* 8 Byte */ - 1024, /* 16 Byte */ - 4096, /* 32 Byte */ - 4096, /* 64 Byte */ - 4096, /* 128 Byte */ - 4096, /* 256 Byte */ - 1024, /* 512 Byte */ - 1024, /* 1024 Byte */ - 1024, /* 2048 Byte */ - 1024, /* 4096 Byte */ - 4096, /* 8192 Byte */ - 64, /* 16384 Byte */ - 64, /* 32768 Byte */ -}; + +#define INDEX_ZDLUT(size) \ + (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN) +#define N_K_ZDLUT (2048 / KALLOC_MINALIGN) + /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */ +#define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN) + +static int8_t k_zone_dlut[N_K_ZDLUT]; /* table of indices into k_zone[] */ + +/* + * If there's no hit in the DLUT, then start searching from k_zindex_start. + */ +static int k_zindex_start; + +static zone_t k_zone[MAX_K_ZONE]; + +/* #define KALLOC_DEBUG 1 */ + +/* forward declarations */ + +lck_grp_t kalloc_lck_grp; +lck_mtx_t kalloc_lock; + +#define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock) +#define kalloc_unlock() lck_mtx_unlock(&kalloc_lock) + + +/* OSMalloc local data declarations */ +static +queue_head_t OSMalloc_tag_list; + +lck_grp_t *OSMalloc_tag_lck_grp; +lck_mtx_t OSMalloc_tag_lock; + +#define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock) +#define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock) + + +/* OSMalloc forward declarations */ +void OSMalloc_init(void); +void OSMalloc_Tagref(OSMallocTag tag); +void OSMalloc_Tagrele(OSMallocTag tag); /* * Initialize the memory allocator. This should be called only @@ -306,314 +282,694 @@ kalloc_init( { kern_return_t retval; vm_offset_t min; - vm_size_t size; - register int i; + vm_size_t size, kalloc_map_size; + vm_map_kernel_flags_t vmk_flags; + + /* + * Scale the kalloc_map_size to physical memory size: stay below + * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel). + */ + kalloc_map_size = (vm_size_t)(sane_size >> 5); +#if !__LP64__ + if (kalloc_map_size > KALLOC_MAP_SIZE_MAX) + kalloc_map_size = KALLOC_MAP_SIZE_MAX; +#endif /* !__LP64__ */ + if (kalloc_map_size < KALLOC_MAP_SIZE_MIN) + kalloc_map_size = KALLOC_MAP_SIZE_MIN; + + vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; + vmk_flags.vmkf_permanent = TRUE; retval = kmem_suballoc(kernel_map, &min, kalloc_map_size, - FALSE, TRUE, &kalloc_map); + FALSE, + (VM_FLAGS_ANYWHERE), + vmk_flags, + VM_KERN_MEMORY_KALLOC, + &kalloc_map); + if (retval != KERN_SUCCESS) panic("kalloc_init: kmem_suballoc failed"); + kalloc_map_min = min; + kalloc_map_max = min + kalloc_map_size - 1; + /* - * Ensure that zones up to size 8192 bytes exist. - * This is desirable because messages are allocated - * with kalloc, and messages up through size 8192 are common. + * Create zones up to a least 4 pages because small page-multiples are + * common allocations. Also ensure that zones up to size 16KB bytes exist. + * This is desirable because messages are allocated with kalloc(), and + * messages up through size 8192 are common. */ + kalloc_max = PAGE_SIZE << 2; + if (kalloc_max < KiB(16)) { + kalloc_max = KiB(16); + } + assert(kalloc_max <= KiB(64)); /* assumption made in size arrays */ - if (PAGE_SIZE < 16*1024) - kalloc_max = 16*1024; - else - kalloc_max = PAGE_SIZE; kalloc_max_prerounded = kalloc_max / 2 + 1; + /* allocations larger than 16 times kalloc_max go directly to kernel map */ + kalloc_kernmap_size = (kalloc_max * 16) + 1; + kalloc_largest_allocated = kalloc_kernmap_size; /* - * Allocate a zone for each size we are going to handle. - * We specify non-paged memory. + * Allocate a zone for each size we are going to handle. */ - for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) { - if (size < KALLOC_MINSIZE) { - k_zone[i] = 0; - continue; - } - if (size == KALLOC_MINSIZE) { - first_k_zone = i; + for (int i = 0; i < MAX_K_ZONE && (size = k_zone_config[i].kzc_size) < kalloc_max; i++) { + k_zone[i] = zinit(size, size, size, k_zone_config[i].kzc_name); + + /* + * Don't charge the caller for the allocation, as we aren't sure how + * the memory will be handled. + */ + zone_change(k_zone[i], Z_CALLERACCT, FALSE); +#if VM_MAX_TAG_ZONES + if (zone_tagging_on) zone_change(k_zone[i], Z_TAGS_ENABLED, TRUE); +#endif + zone_change(k_zone[i], Z_KASAN_QUARANTINE, FALSE); + } + + /* + * Build the Direct LookUp Table for small allocations + */ + size = 0; + for (int i = 0; i <= N_K_ZDLUT; i++, size += KALLOC_MINALIGN) { + int zindex = 0; + + while ((vm_size_t)k_zone_config[zindex].kzc_size < size) + zindex++; + + if (i == N_K_ZDLUT) { + k_zindex_start = zindex; + break; } - k_zone[i] = zinit(size, k_zone_max[i] * size, size, - k_zone_name[i]); + k_zone_dlut[i] = (int8_t)zindex; } -} -vm_offset_t -kalloc_canblock( - vm_size_t size, - boolean_t canblock) -{ - register int zindex; - register vm_size_t allocsize; +#ifdef KALLOC_DEBUG + printf("kalloc_init: k_zindex_start %d\n", k_zindex_start); /* - * If size is too large for a zone, then use kmem_alloc. - * (We use kmem_alloc instead of kmem_alloc_wired so that - * krealloc can use kmem_realloc.) + * Do a quick synthesis to see how well/badly we can + * find-a-zone for a given size. + * Useful when debugging/tweaking the array of zone sizes. + * Cache misses probably more critical than compare-branches! */ + for (int i = 0; i < MAX_K_ZONE; i++) { + vm_size_t testsize = (vm_size_t)k_zone_config[i].kzc_size - 1; + int compare = 0; + int zindex; - if (size >= kalloc_max_prerounded) { - vm_offset_t addr; + if (testsize < MAX_SIZE_ZDLUT) { + compare += 1; /* 'if' (T) */ - /* kmem_alloc could block so we return if noblock */ - if (!canblock) { - return(0); - } - if (kmem_alloc(kalloc_map, &addr, size) != KERN_SUCCESS) - addr = 0; + long dindex = INDEX_ZDLUT(testsize); + zindex = (int)k_zone_dlut[dindex]; - if (addr) { - kalloc_large_inuse++; - kalloc_large_total += size; + } else if (testsize < kalloc_max_prerounded) { - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; - } - return(addr); + compare += 2; /* 'if' (F), 'if' (T) */ + + zindex = k_zindex_start; + while ((vm_size_t)k_zone_config[zindex].kzc_size < testsize) { + zindex++; + compare++; /* 'while' (T) */ + } + compare++; /* 'while' (F) */ + } else + break; /* not zone-backed */ + + zone_t z = k_zone[zindex]; + printf("kalloc_init: req size %4lu: %11s took %d compare%s\n", + (unsigned long)testsize, z->zone_name, compare, + compare == 1 ? "" : "s"); } +#endif + + lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL); + lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL); + OSMalloc_init(); +#ifdef MUTEX_ZONE + lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx"); +#endif +} - /* compute the size of the block that we will actually allocate */ +/* + * Given an allocation size, return the kalloc zone it belongs to. + * Direct LookUp Table variant. + */ +static __inline zone_t +get_zone_dlut(vm_size_t size) +{ + long dindex = INDEX_ZDLUT(size); + int zindex = (int)k_zone_dlut[dindex]; + return (k_zone[zindex]); +} - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; +/* As above, but linear search k_zone_config[] for the next zone that fits. */ + +static __inline zone_t +get_zone_search(vm_size_t size, int zindex) +{ + assert(size < kalloc_max_prerounded); + + while ((vm_size_t)k_zone_config[zindex].kzc_size < size) zindex++; - } - /* allocate from the appropriate zone */ + assert(zindex < MAX_K_ZONE && + (vm_size_t)k_zone_config[zindex].kzc_size < kalloc_max); - assert(allocsize < kalloc_max); - return(zalloc_canblock(k_zone[zindex], canblock)); + return (k_zone[zindex]); } -vm_offset_t -kalloc( - vm_size_t size) +static vm_size_t +vm_map_lookup_kalloc_entry_locked( + vm_map_t map, + void *addr) { - return( kalloc_canblock(size, TRUE) ); + boolean_t ret; + vm_map_entry_t vm_entry = NULL; + + ret = vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry); + if (!ret) { + panic("Attempting to lookup/free an address not allocated via kalloc! (vm_map_lookup_entry() failed map: %p, addr: %p)\n", + map, addr); + } + if (vm_entry->vme_start != (vm_map_offset_t)addr) { + panic("Attempting to lookup/free the middle of a kalloc'ed element! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + if (!vm_entry->vme_atomic) { + panic("Attempting to lookup/free an address not managed by kalloc! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + return (vm_entry->vme_end - vm_entry->vme_start); } -vm_offset_t -kalloc_noblock( - vm_size_t size) +#if KASAN_KALLOC +/* + * KASAN kalloc stashes the original user-requested size away in the poisoned + * area. Return that directly. + */ +vm_size_t +kalloc_size(void *addr) { - return( kalloc_canblock(size, FALSE) ); + (void)vm_map_lookup_kalloc_entry_locked; /* silence warning */ + return kasan_user_size((vm_offset_t)addr); } +#else +vm_size_t +kalloc_size( + void *addr) +{ + vm_map_t map; + vm_size_t size; + size = zone_element_size(addr, NULL); + if (size) { + return size; + } + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + vm_map_lock_read(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + vm_map_unlock_read(map); + return size; +} +#endif -void -krealloc( - vm_offset_t *addrp, - vm_size_t old_size, - vm_size_t new_size, - simple_lock_t lock) +vm_size_t +kalloc_bucket_size( + vm_size_t size) { - register int zindex; - register vm_size_t allocsize; - vm_offset_t naddr; + zone_t z; + vm_map_t map; + + if (size < MAX_SIZE_ZDLUT) { + z = get_zone_dlut(size); + return z->elem_size; + } + + if (size < kalloc_max_prerounded) { + z = get_zone_search(size, k_zindex_start); + return z->elem_size; + } - /* can only be used for increasing allocation size */ + if (size >= kalloc_kernmap_size) + map = kernel_map; + else + map = kalloc_map; + + return vm_map_round_page(size, VM_MAP_PAGE_MASK(map)); +} - assert(new_size > old_size); +#if KASAN_KALLOC +vm_size_t +kfree_addr(void *addr) +{ + vm_size_t origsz = kalloc_size(addr); + kfree(addr, origsz); + return origsz; +} +#else +vm_size_t +kfree_addr( + void *addr) +{ + vm_map_t map; + vm_size_t size = 0; + kern_return_t ret; + zone_t z; + + size = zone_element_size(addr, &z); + if (size) { + DTRACE_VM3(kfree, vm_size_t, -1, vm_size_t, z->elem_size, void*, addr); + zfree(z, addr); + return size; + } - /* if old_size is zero, then we are simply allocating */ + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + if ((vm_offset_t)addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS) { + panic("kfree on an address not in the kernel & kext address range! addr: %p\n", addr); + } - if (old_size == 0) { - simple_unlock(lock); - naddr = kalloc(new_size); - simple_lock(lock); - *addrp = naddr; - return; + vm_map_lock(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + ret = vm_map_remove_locked(map, + vm_map_trunc_page((vm_map_offset_t)addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((vm_map_offset_t)addr + size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_REMOVE_KUNWIRE); + if (ret != KERN_SUCCESS) { + panic("vm_map_remove_locked() failed for kalloc vm_entry! addr: %p, map: %p ret: %d\n", + addr, map, ret); } + vm_map_unlock(map); + DTRACE_VM3(kfree, vm_size_t, -1, vm_size_t, size, void*, addr); + + kalloc_spin_lock(); + kalloc_large_total -= size; + kalloc_large_inuse--; + kalloc_unlock(); + + KALLOC_ZINFO_SFREE(size); + return size; +} +#endif + +void * +kalloc_canblock( + vm_size_t * psize, + boolean_t canblock, + vm_allocation_site_t * site) +{ + zone_t z; + vm_size_t size; + void *addr; + vm_tag_t tag; - /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */ + tag = VM_KERN_MEMORY_KALLOC; + size = *psize; - if (old_size >= kalloc_max_prerounded) { - old_size = round_page(old_size); - new_size = round_page(new_size); - if (new_size > old_size) { +#if KASAN_KALLOC + /* expand the allocation to accomodate redzones */ + vm_size_t req_size = size; + size = kasan_alloc_resize(req_size); +#endif - if (kmem_realloc(kalloc_map, *addrp, old_size, &naddr, - new_size) != KERN_SUCCESS) { - panic("krealloc: kmem_realloc"); - naddr = 0; - } + if (size < MAX_SIZE_ZDLUT) + z = get_zone_dlut(size); + else if (size < kalloc_max_prerounded) + z = get_zone_search(size, k_zindex_start); + else { + /* + * If size is too large for a zone, then use kmem_alloc. + * (We use kmem_alloc instead of kmem_alloc_kobject so that + * krealloc can use kmem_realloc.) + */ + vm_map_t alloc_map; + + /* kmem_alloc could block so we return if noblock */ + if (!canblock) { + return(NULL); + } + +#if KASAN_KALLOC + /* large allocation - use guard pages instead of small redzones */ + size = round_page(req_size + 2 * PAGE_SIZE); + assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded); +#endif + + if (size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; + + if (site) tag = vm_tag_alloc(site); - simple_lock(lock); - *addrp = naddr; + if (kmem_alloc_flags(alloc_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) { + if (alloc_map != kernel_map) { + if (kalloc_fallback_count++ == 0) { + printf("%s: falling back to kernel_map\n", __func__); + } + if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) + addr = NULL; + } + else + addr = NULL; + } - /* kmem_realloc() doesn't free old page range. */ - kmem_free(kalloc_map, *addrp, old_size); + if (addr != NULL) { + kalloc_spin_lock(); + /* + * Thread-safe version of the workaround for 4740071 + * (a double FREE()) + */ + if (size > kalloc_largest_allocated) + kalloc_largest_allocated = size; - kalloc_large_total += (new_size - old_size); + kalloc_large_inuse++; + kalloc_large_total += size; + kalloc_large_sum += size; if (kalloc_large_total > kalloc_large_max) kalloc_large_max = kalloc_large_total; + + kalloc_unlock(); + + KALLOC_ZINFO_SALLOC(size); } - return; +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, size, req_size, PAGE_SIZE); +#else + *psize = round_page(size); +#endif + DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, *psize, void*, addr); + return(addr); } +#ifdef KALLOC_DEBUG + if (size > z->elem_size) + panic("%s: z %p (%s) but requested size %lu", __func__, + z, z->zone_name, (unsigned long)size); +#endif - /* compute the size of the block that we actually allocated */ + assert(size <= z->elem_size); - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < old_size) { - allocsize <<= 1; - zindex++; - } +#if VM_MAX_TAG_ZONES + if (z->tags && site) + { + tag = vm_tag_alloc(site); + if (!canblock && !vm_allocation_zone_totals[tag]) tag = VM_KERN_MEMORY_KALLOC; + } +#endif - /* if new size fits in old block, then return */ + addr = zalloc_canblock_tag(z, canblock, size, tag); - if (new_size <= allocsize) { - return; - } +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, z->elem_size, req_size, KASAN_GUARD_SIZE); + + /* For KASan, the redzone lives in any additional space, so don't + * expand the allocation. */ +#else + *psize = z->elem_size; +#endif - /* if new size does not fit in zone, kmem_alloc it, else zalloc it */ + DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, *psize, void*, addr); + return addr; +} - simple_unlock(lock); - if (new_size >= kalloc_max_prerounded) { - if (kmem_alloc(kalloc_map, &naddr, new_size) != KERN_SUCCESS) { - panic("krealloc: kmem_alloc"); - simple_lock(lock); - *addrp = 0; - return; - } - kalloc_large_inuse++; - kalloc_large_total += new_size; +void * +kalloc_external( + vm_size_t size); +void * +kalloc_external( + vm_size_t size) +{ + return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) ); +} - if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; - } else { - register int new_zindex; +volatile SInt32 kfree_nop_count = 0; - allocsize <<= 1; - new_zindex = zindex + 1; - while (allocsize < new_size) { - allocsize <<= 1; - new_zindex++; - } - naddr = zalloc(k_zone[new_zindex]); +void +kfree( + void *data, + vm_size_t size) +{ + zone_t z; + +#if KASAN_KALLOC + /* + * Resize back to the real allocation size and hand off to the KASan + * quarantine. `data` may then point to a different allocation. + */ + vm_size_t user_size = size; + kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC); + data = (void *)kasan_dealloc((vm_address_t)data, &size); + kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true); + if (!data) { + return; } - simple_lock(lock); +#endif + + if (size < MAX_SIZE_ZDLUT) + z = get_zone_dlut(size); + else if (size < kalloc_max_prerounded) + z = get_zone_search(size, k_zindex_start); + else { + /* if size was too large for a zone, then use kmem_free */ + + vm_map_t alloc_map = kernel_map; + + if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max)) + alloc_map = kalloc_map; + if (size > kalloc_largest_allocated) { + /* + * work around double FREEs of small MALLOCs + * this used to end up being a nop + * since the pointer being freed from an + * alloc backed by the zalloc world could + * never show up in the kalloc_map... however, + * the kernel_map is a different issue... since it + * was released back into the zalloc pool, a pointer + * would have gotten written over the 'size' that + * the MALLOC was retaining in the first 4 bytes of + * the underlying allocation... that pointer ends up + * looking like a really big size on the 2nd FREE and + * pushes the kfree into the kernel_map... we + * end up removing a ton of virtual space before we panic + * this check causes us to ignore the kfree for a size + * that must be 'bogus'... note that it might not be due + * to the above scenario, but it would still be wrong and + * cause serious damage. + */ + + OSAddAtomic(1, &kfree_nop_count); + return; + } + kmem_free(alloc_map, (vm_offset_t)data, size); + kalloc_spin_lock(); - /* copy existing data */ + kalloc_large_total -= size; + kalloc_large_inuse--; - bcopy((const char *)*addrp, (char *)naddr, old_size); + kalloc_unlock(); - /* free old block, and return */ +#if !KASAN_KALLOC + DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, data); +#endif - zfree(k_zone[zindex], *addrp); + KALLOC_ZINFO_SFREE(size); + return; + } - /* set up new address */ + /* free to the appropriate zone */ +#ifdef KALLOC_DEBUG + if (size > z->elem_size) + panic("%s: z %p (%s) but requested size %lu", __func__, + z, z->zone_name, (unsigned long)size); +#endif + assert(size <= z->elem_size); + DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, z->elem_size, void*, data); + zfree(z, data); +} - *addrp = naddr; +#ifdef MACH_BSD +zone_t +kalloc_zone( + vm_size_t size) +{ + if (size < MAX_SIZE_ZDLUT) + return (get_zone_dlut(size)); + if (size <= kalloc_max) + return (get_zone_search(size, k_zindex_start)); + return (ZONE_NULL); } +#endif + +void +OSMalloc_init( + void) +{ + queue_init(&OSMalloc_tag_list); + OSMalloc_tag_lck_grp = lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL); + lck_mtx_init(&OSMalloc_tag_lock, OSMalloc_tag_lck_grp, LCK_ATTR_NULL); +} -vm_offset_t -kget( - vm_size_t size) +OSMallocTag +OSMalloc_Tagalloc( + const char *str, + uint32_t flags) { - register int zindex; - register vm_size_t allocsize; + OSMallocTag OSMTag; - /* size must not be too large for a zone */ + OSMTag = (OSMallocTag)kalloc(sizeof(*OSMTag)); - if (size >= kalloc_max_prerounded) { - /* This will never work, so we might as well panic */ - panic("kget"); - } + bzero((void *)OSMTag, sizeof(*OSMTag)); - /* compute the size of the block that we will actually allocate */ + if (flags & OSMT_PAGEABLE) + OSMTag->OSMT_attr = OSMT_ATTR_PAGEABLE; - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } + OSMTag->OSMT_refcnt = 1; - /* allocate from the appropriate zone */ + strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); - assert(allocsize < kalloc_max); - return(zget(k_zone[zindex])); + OSMalloc_tag_spin_lock(); + enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag); + OSMalloc_tag_unlock(); + OSMTag->OSMT_state = OSMT_VALID; + return(OSMTag); } void -kfree( - vm_offset_t data, - vm_size_t size) +OSMalloc_Tagref( + OSMallocTag tag) { - register int zindex; - register vm_size_t freesize; - - /* if size was too large for a zone, then use kmem_free */ + if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) + panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state); - if (size >= kalloc_max_prerounded) { - kmem_free(kalloc_map, data, size); + (void)hw_atomic_add(&tag->OSMT_refcnt, 1); +} - kalloc_large_total -= size; - kalloc_large_inuse--; +void +OSMalloc_Tagrele( + OSMallocTag tag) +{ + if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) + panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state); + + if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) { + if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) { + OSMalloc_tag_spin_lock(); + (void)remque((queue_entry_t)tag); + OSMalloc_tag_unlock(); + kfree((void*)tag, sizeof(*tag)); + } else + panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag->OSMT_name); + } +} - return; +void +OSMalloc_Tagfree( + OSMallocTag tag) +{ + if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) + panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag->OSMT_name, tag->OSMT_state); + + if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) { + OSMalloc_tag_spin_lock(); + (void)remque((queue_entry_t)tag); + OSMalloc_tag_unlock(); + kfree((void*)tag, sizeof(*tag)); } +} - /* compute the size of the block that we actually allocated from */ +void * +OSMalloc( + uint32_t size, + OSMallocTag tag) +{ + void *addr=NULL; + kern_return_t kr; - freesize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (freesize < size) { - freesize <<= 1; - zindex++; - } + OSMalloc_Tagref(tag); + if ((tag->OSMT_attr & OSMT_PAGEABLE) + && (size & ~PAGE_MASK)) { + if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) + addr = NULL; + } else + addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); - /* free to the appropriate zone */ + if (!addr) + OSMalloc_Tagrele(tag); - assert(freesize < kalloc_max); - zfree(k_zone[zindex], data); + return(addr); } -#ifdef MACH_BSD -zone_t -kalloc_zone( - vm_size_t size) +void * +OSMalloc_nowait( + uint32_t size, + OSMallocTag tag) { - register int zindex = 0; - register vm_size_t allocsize; + void *addr=NULL; - /* compute the size of the block that we will actually allocate */ + if (tag->OSMT_attr & OSMT_PAGEABLE) + return(NULL); - allocsize = size; - if (size <= kalloc_max) { - allocsize = KALLOC_MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - return (k_zone[zindex]); - } - return (ZONE_NULL); + OSMalloc_Tagref(tag); + /* XXX: use non-blocking kalloc for now */ + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); + if (addr == NULL) + OSMalloc_Tagrele(tag); + + return(addr); +} + +void * +OSMalloc_noblock( + uint32_t size, + OSMallocTag tag) +{ + void *addr=NULL; + + if (tag->OSMT_attr & OSMT_PAGEABLE) + return(NULL); + + OSMalloc_Tagref(tag); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); + if (addr == NULL) + OSMalloc_Tagrele(tag); + + return(addr); } -#endif +void +OSFree( + void *addr, + uint32_t size, + OSMallocTag tag) +{ + if ((tag->OSMT_attr & OSMT_PAGEABLE) + && (size & ~PAGE_MASK)) { + kmem_free(kernel_map, (vm_offset_t)addr, size); + } else + kfree((void *)addr, size); + OSMalloc_Tagrele(tag); +} -kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, - vm_size_t *alloc_size, int *collectable, int *exhaustable) +uint32_t +OSMalloc_size( + void *addr) { - *count = kalloc_large_inuse; - *cur_size = kalloc_large_total; - *max_size = kalloc_large_max; - *elem_size = kalloc_large_total / kalloc_large_inuse; - *alloc_size = kalloc_large_total / kalloc_large_inuse; - *collectable = 0; - *exhaustable = 0; + return (uint32_t)kalloc_size(addr); }