/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
- *
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- *
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
-/*
- * HISTORY
- *
- * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1 1998/03/07 02:25:55 wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.2.19.5 1995/02/24 15:20:29 alanl
- * Lock package cleanup.
- * [95/02/15 alanl]
- *
- * Merge with DIPC2_SHARED.
- * [1995/01/05 15:11:02 alanl]
- *
- * Revision 1.2.28.2 1994/11/10 06:12:50 dwm
- * mk6 CR764 - s/spinlock/simple_lock/ (name change only)
- * [1994/11/10 05:28:35 dwm]
- *
- * Revision 1.2.28.1 1994/11/04 10:07:40 dwm
- * mk6 CR668 - 1.3b26 merge
- * * Revision 1.2.2.4 1993/11/08 15:04:18 gm
- * CR9710: Updated to new zinit() and zone_change() interfaces.
- * * End1.3merge
- * [1994/11/04 09:25:48 dwm]
- *
- * Revision 1.2.19.3 1994/09/23 02:20:52 ezf
- * change marker to not FREE
- * [1994/09/22 21:33:57 ezf]
- *
- * Revision 1.2.19.2 1994/06/14 18:36:36 bolinger
- * NMK17.2 merge: Replace simple_lock ops.
- * [1994/06/14 18:35:17 bolinger]
- *
- * Revision 1.2.19.1 1994/06/14 17:04:23 bolinger
- * Merge up to NMK17.2.
- * [1994/06/14 16:54:19 bolinger]
- *
- * Revision 1.2.23.3 1994/10/14 12:24:33 sjs
- * Removed krealloc_spinl routine: the newer locking scheme makes it
- * obsolete.
- * [94/10/13 sjs]
- *
- * Revision 1.2.23.2 1994/08/11 14:42:46 rwd
- * Post merge cleanup
- * [94/08/09 rwd]
- *
- * Changed zcollectable to use zchange.
- * [94/08/04 rwd]
- *
- * Revision 1.2.17.2 1994/07/08 01:58:45 alanl
- * Change comment to match function name.
- * [1994/07/08 01:47:59 alanl]
- *
- * Revision 1.2.17.1 1994/05/26 16:20:38 sjs
- * Added krealloc_spinl: same as krealloc but uses spin locks.
- * [94/05/25 sjs]
- *
- * Revision 1.2.23.1 1994/08/04 02:24:55 mmp
- * Added krealloc_spinl: same as krealloc but uses spin locks.
- * [94/05/25 sjs]
- *
- * Revision 1.2.13.1 1994/02/11 14:27:12 paire
- * Changed krealloc() to make it work on a MP system. Added a new parameter
- * which is the simple lock that should be held while modifying the memory
- * area already initialized.
- * Change from NMK16.1 [93/09/02 paire]
- *
- * Do not set debug for kalloc zones as default. It wastes
- * to much space.
- * Change from NMK16.1 [93/08/16 bernadat]
- * [94/02/07 paire]
- *
- * Revision 1.2.2.3 1993/07/28 17:15:44 bernard
- * CR9523 -- Prototypes.
- * [1993/07/27 20:14:12 bernard]
- *
- * Revision 1.2.2.2 1993/06/02 23:37:46 jeffc
- * Added to OSF/1 R1.3 from NMK15.0.
- * [1993/06/02 21:12:59 jeffc]
- *
- * Revision 1.2 1992/12/07 21:28:42 robert
- * integrate any changes below for 14.0 (branch from 13.16 base)
- *
- * Joseph Barrera (jsb) at Carnegie-Mellon University 11-Sep-92
- * Added krealloc. Added kalloc_max_prerounded for quicker choice between
- * zalloc and kmem_alloc. Renamed MINSIZE to KALLOC_MINSIZE.
- * [1992/12/06 19:47:16 robert]
- *
- * Revision 1.1 1992/09/30 02:09:23 robert
- * Initial revision
- *
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.9 91/05/14 16:43:17 mrt
- * Correcting copyright
- *
- * Revision 2.8 91/03/16 14:50:37 rpd
- * Updated for new kmem_alloc interface.
- * [91/03/03 rpd]
- *
- * Revision 2.7 91/02/05 17:27:22 mrt
- * Changed to new Mach copyright
- * [91/02/01 16:14:12 mrt]
- *
- * Revision 2.6 90/06/19 22:59:06 rpd
- * Made the big kalloc zones collectable.
- * [90/06/05 rpd]
- *
- * Revision 2.5 90/06/02 14:54:47 rpd
- * Added kalloc_max, kalloc_map_size.
- * [90/03/26 22:06:39 rpd]
- *
- * Revision 2.4 90/01/11 11:43:13 dbg
- * De-lint.
- * [89/12/06 dbg]
- *
- * Revision 2.3 89/09/08 11:25:51 dbg
- * MACH_KERNEL: remove non-MACH data types.
- * [89/07/11 dbg]
- *
- * Revision 2.2 89/08/31 16:18:59 rwd
- * First Checkin
- * [89/08/23 15:41:37 rwd]
- *
- * Revision 2.6 89/08/02 08:03:28 jsb
- * Make all kalloc zones 8 MB big. (No more kalloc panics!)
- * [89/08/01 14:10:17 jsb]
- *
- * Revision 2.4 89/04/05 13:03:10 rvb
- * Guarantee a zone max of at least 100 elements or 10 pages
- * which ever is greater. Afs (AllocDouble()) puts a great demand
- * on the 2048 zone and used to blow away.
- * [89/03/09 rvb]
- *
- * Revision 2.3 89/02/25 18:04:39 gm0w
- * Changes for cleanup.
- *
- * Revision 2.2 89/01/18 02:07:04 jsb
- * Give each kalloc zone a meaningful name (for panics);
- * create a zone for each power of 2 between MINSIZE
- * and PAGE_SIZE, instead of using (obsoleted) NQUEUES.
- * [89/01/17 10:16:33 jsb]
- *
- *
- * 13-Feb-88 John Seamons (jks) at NeXT
- * Updated to use kmem routines instead of vmem routines.
- *
- * 21-Jun-85 Avadis Tevanian (avie) at Carnegie-Mellon University
- * Created.
- */
-/* CMU_ENDHIST */
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
#include <zone_debug.h>
#include <mach/boolean.h>
+#include <mach/sdt.h>
#include <mach/machine/vm_types.h>
#include <mach/vm_param.h>
#include <kern/misc_protos.h>
#include <kern/zalloc.h>
#include <kern/kalloc.h>
-#include <kern/lock.h>
+#include <kern/ledger.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_map.h>
+#include <libkern/OSMalloc.h>
+#include <sys/kdebug.h>
+
+#include <san/kasan.h>
#ifdef MACH_BSD
zone_t kalloc_zone(vm_size_t);
#endif
+#define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024)
+#define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024)
vm_map_t kalloc_map;
-vm_size_t kalloc_map_size = 16 * 1024 * 1024;
vm_size_t kalloc_max;
vm_size_t kalloc_max_prerounded;
+vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */
+
+/* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */
+unsigned long kalloc_fallback_count;
unsigned int kalloc_large_inuse;
vm_size_t kalloc_large_total;
vm_size_t kalloc_large_max;
+vm_size_t kalloc_largest_allocated = 0;
+uint64_t kalloc_large_sum;
+
+int kalloc_fake_zone_index = -1; /* index of our fake zone in statistics arrays */
+
+vm_offset_t kalloc_map_min;
+vm_offset_t kalloc_map_max;
+
+#ifdef MUTEX_ZONE
+/*
+ * Diagnostic code to track mutexes separately rather than via the 2^ zones
+ */
+ zone_t lck_mtx_zone;
+#endif
+
+static void
+KALLOC_ZINFO_SALLOC(vm_size_t bytes)
+{
+ thread_t thr = current_thread();
+ ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes);
+}
+
+static void
+KALLOC_ZINFO_SFREE(vm_size_t bytes)
+{
+ thread_t thr = current_thread();
+ ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes);
+}
/*
- * All allocations of size less than kalloc_max are rounded to the
- * next highest power of 2. This allocator is built on top of
- * the zone allocator. A zone is created for each potential size
- * that we are willing to get in small blocks.
+ * All allocations of size less than kalloc_max are rounded to the next nearest
+ * sized zone. This allocator is built on top of the zone allocator. A zone
+ * is created for each potential size that we are willing to get in small
+ * blocks.
*
- * We assume that kalloc_max is not greater than 64K;
- * thus 16 is a safe array size for k_zone and k_zone_name.
+ * We assume that kalloc_max is not greater than 64K;
*
- * Note that kalloc_max is somewhat confusingly named.
- * It represents the first power of two for which no zone exists.
- * kalloc_max_prerounded is the smallest allocation size, before
- * rounding, for which no zone exists.
+ * Note that kalloc_max is somewhat confusingly named. It represents the first
+ * power of two for which no zone exists. kalloc_max_prerounded is the
+ * smallest allocation size, before rounding, for which no zone exists.
+ *
+ * Also if the allocation size is more than kalloc_kernmap_size then allocate
+ * from kernel map rather than kalloc_map.
*/
-int first_k_zone = -1;
-struct zone *k_zone[16];
-static char *k_zone_name[16] = {
- "kalloc.1", "kalloc.2",
- "kalloc.4", "kalloc.8",
- "kalloc.16", "kalloc.32",
- "kalloc.64", "kalloc.128",
- "kalloc.256", "kalloc.512",
- "kalloc.1024", "kalloc.2048",
- "kalloc.4096", "kalloc.8192",
- "kalloc.16384", "kalloc.32768"
+#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
+#define KiB(x) (1024 * (x))
+
+static const struct kalloc_zone_config {
+ int kzc_size;
+ const char *kzc_name;
+} k_zone_config[] = {
+#define KZC_ENTRY(SIZE) { .kzc_size = (SIZE), .kzc_name = "kalloc." #SIZE }
+
+#if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
+ /* 64-bit targets, generally */
+ KZC_ENTRY(16),
+ KZC_ENTRY(32),
+ KZC_ENTRY(48),
+ KZC_ENTRY(64),
+ KZC_ENTRY(80),
+ KZC_ENTRY(96),
+ KZC_ENTRY(128),
+ KZC_ENTRY(160),
+ KZC_ENTRY(192),
+ KZC_ENTRY(224),
+ KZC_ENTRY(256),
+ KZC_ENTRY(288),
+ KZC_ENTRY(368),
+ KZC_ENTRY(400),
+ KZC_ENTRY(512),
+ KZC_ENTRY(576),
+ KZC_ENTRY(768),
+ KZC_ENTRY(1024),
+ KZC_ENTRY(1152),
+ KZC_ENTRY(1280),
+ KZC_ENTRY(1664),
+ KZC_ENTRY(2048),
+#elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
+ /* 32-bit targets, generally */
+ KZC_ENTRY(8),
+ KZC_ENTRY(16),
+ KZC_ENTRY(24),
+ KZC_ENTRY(32),
+ KZC_ENTRY(40),
+ KZC_ENTRY(48),
+ KZC_ENTRY(64),
+ KZC_ENTRY(72),
+ KZC_ENTRY(88),
+ KZC_ENTRY(112),
+ KZC_ENTRY(128),
+ KZC_ENTRY(192),
+ KZC_ENTRY(256),
+ KZC_ENTRY(288),
+ KZC_ENTRY(384),
+ KZC_ENTRY(440),
+ KZC_ENTRY(512),
+ KZC_ENTRY(576),
+ KZC_ENTRY(768),
+ KZC_ENTRY(1024),
+ KZC_ENTRY(1152),
+ KZC_ENTRY(1536),
+ KZC_ENTRY(2048),
+ KZC_ENTRY(2128),
+ KZC_ENTRY(3072),
+#else
+#error missing or invalid zone size parameters for kalloc
+#endif
+
+ /* all configurations get these zones */
+ KZC_ENTRY(4096),
+ KZC_ENTRY(6144),
+ KZC_ENTRY(8192),
+ KZC_ENTRY(16384),
+ KZC_ENTRY(32768),
+#undef KZC_ENTRY
};
+#define MAX_K_ZONE (int)(sizeof(k_zone_config) / sizeof(k_zone_config[0]))
+
/*
- * Max number of elements per zone. zinit rounds things up correctly
- * Doing things this way permits each zone to have a different maximum size
- * based on need, rather than just guessing; it also
- * means its patchable in case you're wrong!
+ * Many kalloc() allocations are for small structures containing a few
+ * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by
+ * size normalized to the minimum alignment, finds the right zone index
+ * for them in one dereference.
*/
-unsigned long k_zone_max[16] = {
- 1024, /* 1 Byte */
- 1024, /* 2 Byte */
- 1024, /* 4 Byte */
- 1024, /* 8 Byte */
- 1024, /* 16 Byte */
- 4096, /* 32 Byte */
- 4096, /* 64 Byte */
- 4096, /* 128 Byte */
- 4096, /* 256 Byte */
- 1024, /* 512 Byte */
- 1024, /* 1024 Byte */
- 1024, /* 2048 Byte */
- 1024, /* 4096 Byte */
- 4096, /* 8192 Byte */
- 64, /* 16384 Byte */
- 64, /* 32768 Byte */
-};
+
+#define INDEX_ZDLUT(size) \
+ (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
+#define N_K_ZDLUT (2048 / KALLOC_MINALIGN)
+ /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */
+#define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN)
+
+static int8_t k_zone_dlut[N_K_ZDLUT]; /* table of indices into k_zone[] */
+
+/*
+ * If there's no hit in the DLUT, then start searching from k_zindex_start.
+ */
+static int k_zindex_start;
+
+static zone_t k_zone[MAX_K_ZONE];
+
+/* #define KALLOC_DEBUG 1 */
+
+/* forward declarations */
+
+lck_grp_t kalloc_lck_grp;
+lck_mtx_t kalloc_lock;
+
+#define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock)
+#define kalloc_unlock() lck_mtx_unlock(&kalloc_lock)
+
+
+/* OSMalloc local data declarations */
+static
+queue_head_t OSMalloc_tag_list;
+
+lck_grp_t *OSMalloc_tag_lck_grp;
+lck_mtx_t OSMalloc_tag_lock;
+
+#define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock)
+#define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock)
+
+
+/* OSMalloc forward declarations */
+void OSMalloc_init(void);
+void OSMalloc_Tagref(OSMallocTag tag);
+void OSMalloc_Tagrele(OSMallocTag tag);
/*
* Initialize the memory allocator. This should be called only
{
kern_return_t retval;
vm_offset_t min;
- vm_size_t size;
- register int i;
+ vm_size_t size, kalloc_map_size;
+ vm_map_kernel_flags_t vmk_flags;
+
+ /*
+ * Scale the kalloc_map_size to physical memory size: stay below
+ * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel).
+ */
+ kalloc_map_size = (vm_size_t)(sane_size >> 5);
+#if !__LP64__
+ if (kalloc_map_size > KALLOC_MAP_SIZE_MAX)
+ kalloc_map_size = KALLOC_MAP_SIZE_MAX;
+#endif /* !__LP64__ */
+ if (kalloc_map_size < KALLOC_MAP_SIZE_MIN)
+ kalloc_map_size = KALLOC_MAP_SIZE_MIN;
+
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
retval = kmem_suballoc(kernel_map, &min, kalloc_map_size,
- FALSE, TRUE, &kalloc_map);
+ FALSE,
+ (VM_FLAGS_ANYWHERE),
+ vmk_flags,
+ VM_KERN_MEMORY_KALLOC,
+ &kalloc_map);
+
if (retval != KERN_SUCCESS)
panic("kalloc_init: kmem_suballoc failed");
+ kalloc_map_min = min;
+ kalloc_map_max = min + kalloc_map_size - 1;
+
/*
- * Ensure that zones up to size 8192 bytes exist.
- * This is desirable because messages are allocated
- * with kalloc, and messages up through size 8192 are common.
+ * Create zones up to a least 4 pages because small page-multiples are
+ * common allocations. Also ensure that zones up to size 16KB bytes exist.
+ * This is desirable because messages are allocated with kalloc(), and
+ * messages up through size 8192 are common.
*/
+ kalloc_max = PAGE_SIZE << 2;
+ if (kalloc_max < KiB(16)) {
+ kalloc_max = KiB(16);
+ }
+ assert(kalloc_max <= KiB(64)); /* assumption made in size arrays */
- if (PAGE_SIZE < 16*1024)
- kalloc_max = 16*1024;
- else
- kalloc_max = PAGE_SIZE;
kalloc_max_prerounded = kalloc_max / 2 + 1;
+ /* allocations larger than 16 times kalloc_max go directly to kernel map */
+ kalloc_kernmap_size = (kalloc_max * 16) + 1;
+ kalloc_largest_allocated = kalloc_kernmap_size;
/*
- * Allocate a zone for each size we are going to handle.
- * We specify non-paged memory.
+ * Allocate a zone for each size we are going to handle.
*/
- for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) {
- if (size < KALLOC_MINSIZE) {
- k_zone[i] = 0;
- continue;
- }
- if (size == KALLOC_MINSIZE) {
- first_k_zone = i;
+ for (int i = 0; i < MAX_K_ZONE && (size = k_zone_config[i].kzc_size) < kalloc_max; i++) {
+ k_zone[i] = zinit(size, size, size, k_zone_config[i].kzc_name);
+
+ /*
+ * Don't charge the caller for the allocation, as we aren't sure how
+ * the memory will be handled.
+ */
+ zone_change(k_zone[i], Z_CALLERACCT, FALSE);
+#if VM_MAX_TAG_ZONES
+ if (zone_tagging_on) zone_change(k_zone[i], Z_TAGS_ENABLED, TRUE);
+#endif
+ zone_change(k_zone[i], Z_KASAN_QUARANTINE, FALSE);
+ }
+
+ /*
+ * Build the Direct LookUp Table for small allocations
+ */
+ size = 0;
+ for (int i = 0; i <= N_K_ZDLUT; i++, size += KALLOC_MINALIGN) {
+ int zindex = 0;
+
+ while ((vm_size_t)k_zone_config[zindex].kzc_size < size)
+ zindex++;
+
+ if (i == N_K_ZDLUT) {
+ k_zindex_start = zindex;
+ break;
}
- k_zone[i] = zinit(size, k_zone_max[i] * size, size,
- k_zone_name[i]);
+ k_zone_dlut[i] = (int8_t)zindex;
}
-}
-vm_offset_t
-kalloc_canblock(
- vm_size_t size,
- boolean_t canblock)
-{
- register int zindex;
- register vm_size_t allocsize;
+#ifdef KALLOC_DEBUG
+ printf("kalloc_init: k_zindex_start %d\n", k_zindex_start);
/*
- * If size is too large for a zone, then use kmem_alloc.
- * (We use kmem_alloc instead of kmem_alloc_wired so that
- * krealloc can use kmem_realloc.)
+ * Do a quick synthesis to see how well/badly we can
+ * find-a-zone for a given size.
+ * Useful when debugging/tweaking the array of zone sizes.
+ * Cache misses probably more critical than compare-branches!
*/
+ for (int i = 0; i < MAX_K_ZONE; i++) {
+ vm_size_t testsize = (vm_size_t)k_zone_config[i].kzc_size - 1;
+ int compare = 0;
+ int zindex;
- if (size >= kalloc_max_prerounded) {
- vm_offset_t addr;
+ if (testsize < MAX_SIZE_ZDLUT) {
+ compare += 1; /* 'if' (T) */
- /* kmem_alloc could block so we return if noblock */
- if (!canblock) {
- return(0);
- }
- if (kmem_alloc(kalloc_map, &addr, size) != KERN_SUCCESS)
- addr = 0;
+ long dindex = INDEX_ZDLUT(testsize);
+ zindex = (int)k_zone_dlut[dindex];
- if (addr) {
- kalloc_large_inuse++;
- kalloc_large_total += size;
+ } else if (testsize < kalloc_max_prerounded) {
- if (kalloc_large_total > kalloc_large_max)
- kalloc_large_max = kalloc_large_total;
- }
- return(addr);
+ compare += 2; /* 'if' (F), 'if' (T) */
+
+ zindex = k_zindex_start;
+ while ((vm_size_t)k_zone_config[zindex].kzc_size < testsize) {
+ zindex++;
+ compare++; /* 'while' (T) */
+ }
+ compare++; /* 'while' (F) */
+ } else
+ break; /* not zone-backed */
+
+ zone_t z = k_zone[zindex];
+ printf("kalloc_init: req size %4lu: %11s took %d compare%s\n",
+ (unsigned long)testsize, z->zone_name, compare,
+ compare == 1 ? "" : "s");
}
+#endif
+
+ lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL);
+ OSMalloc_init();
+#ifdef MUTEX_ZONE
+ lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx");
+#endif
+}
- /* compute the size of the block that we will actually allocate */
+/*
+ * Given an allocation size, return the kalloc zone it belongs to.
+ * Direct LookUp Table variant.
+ */
+static __inline zone_t
+get_zone_dlut(vm_size_t size)
+{
+ long dindex = INDEX_ZDLUT(size);
+ int zindex = (int)k_zone_dlut[dindex];
+ return (k_zone[zindex]);
+}
- allocsize = KALLOC_MINSIZE;
- zindex = first_k_zone;
- while (allocsize < size) {
- allocsize <<= 1;
+/* As above, but linear search k_zone_config[] for the next zone that fits. */
+
+static __inline zone_t
+get_zone_search(vm_size_t size, int zindex)
+{
+ assert(size < kalloc_max_prerounded);
+
+ while ((vm_size_t)k_zone_config[zindex].kzc_size < size)
zindex++;
- }
- /* allocate from the appropriate zone */
+ assert(zindex < MAX_K_ZONE &&
+ (vm_size_t)k_zone_config[zindex].kzc_size < kalloc_max);
- assert(allocsize < kalloc_max);
- return(zalloc_canblock(k_zone[zindex], canblock));
+ return (k_zone[zindex]);
}
-vm_offset_t
-kalloc(
- vm_size_t size)
+static vm_size_t
+vm_map_lookup_kalloc_entry_locked(
+ vm_map_t map,
+ void *addr)
{
- return( kalloc_canblock(size, TRUE) );
+ boolean_t ret;
+ vm_map_entry_t vm_entry = NULL;
+
+ ret = vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry);
+ if (!ret) {
+ panic("Attempting to lookup/free an address not allocated via kalloc! (vm_map_lookup_entry() failed map: %p, addr: %p)\n",
+ map, addr);
+ }
+ if (vm_entry->vme_start != (vm_map_offset_t)addr) {
+ panic("Attempting to lookup/free the middle of a kalloc'ed element! (map: %p, addr: %p, entry: %p)\n",
+ map, addr, vm_entry);
+ }
+ if (!vm_entry->vme_atomic) {
+ panic("Attempting to lookup/free an address not managed by kalloc! (map: %p, addr: %p, entry: %p)\n",
+ map, addr, vm_entry);
+ }
+ return (vm_entry->vme_end - vm_entry->vme_start);
}
-vm_offset_t
-kalloc_noblock(
- vm_size_t size)
+#if KASAN_KALLOC
+/*
+ * KASAN kalloc stashes the original user-requested size away in the poisoned
+ * area. Return that directly.
+ */
+vm_size_t
+kalloc_size(void *addr)
{
- return( kalloc_canblock(size, FALSE) );
+ (void)vm_map_lookup_kalloc_entry_locked; /* silence warning */
+ return kasan_user_size((vm_offset_t)addr);
}
+#else
+vm_size_t
+kalloc_size(
+ void *addr)
+{
+ vm_map_t map;
+ vm_size_t size;
+ size = zone_element_size(addr, NULL);
+ if (size) {
+ return size;
+ }
+ if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) {
+ map = kalloc_map;
+ } else {
+ map = kernel_map;
+ }
+ vm_map_lock_read(map);
+ size = vm_map_lookup_kalloc_entry_locked(map, addr);
+ vm_map_unlock_read(map);
+ return size;
+}
+#endif
-void
-krealloc(
- vm_offset_t *addrp,
- vm_size_t old_size,
- vm_size_t new_size,
- simple_lock_t lock)
+vm_size_t
+kalloc_bucket_size(
+ vm_size_t size)
{
- register int zindex;
- register vm_size_t allocsize;
- vm_offset_t naddr;
+ zone_t z;
+ vm_map_t map;
+
+ if (size < MAX_SIZE_ZDLUT) {
+ z = get_zone_dlut(size);
+ return z->elem_size;
+ }
+
+ if (size < kalloc_max_prerounded) {
+ z = get_zone_search(size, k_zindex_start);
+ return z->elem_size;
+ }
- /* can only be used for increasing allocation size */
+ if (size >= kalloc_kernmap_size)
+ map = kernel_map;
+ else
+ map = kalloc_map;
+
+ return vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
+}
- assert(new_size > old_size);
+#if KASAN_KALLOC
+vm_size_t
+kfree_addr(void *addr)
+{
+ vm_size_t origsz = kalloc_size(addr);
+ kfree(addr, origsz);
+ return origsz;
+}
+#else
+vm_size_t
+kfree_addr(
+ void *addr)
+{
+ vm_map_t map;
+ vm_size_t size = 0;
+ kern_return_t ret;
+ zone_t z;
+
+ size = zone_element_size(addr, &z);
+ if (size) {
+ DTRACE_VM3(kfree, vm_size_t, -1, vm_size_t, z->elem_size, void*, addr);
+ zfree(z, addr);
+ return size;
+ }
- /* if old_size is zero, then we are simply allocating */
+ if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) {
+ map = kalloc_map;
+ } else {
+ map = kernel_map;
+ }
+ if ((vm_offset_t)addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
+ panic("kfree on an address not in the kernel & kext address range! addr: %p\n", addr);
+ }
- if (old_size == 0) {
- simple_unlock(lock);
- naddr = kalloc(new_size);
- simple_lock(lock);
- *addrp = naddr;
- return;
+ vm_map_lock(map);
+ size = vm_map_lookup_kalloc_entry_locked(map, addr);
+ ret = vm_map_remove_locked(map,
+ vm_map_trunc_page((vm_map_offset_t)addr,
+ VM_MAP_PAGE_MASK(map)),
+ vm_map_round_page((vm_map_offset_t)addr + size,
+ VM_MAP_PAGE_MASK(map)),
+ VM_MAP_REMOVE_KUNWIRE);
+ if (ret != KERN_SUCCESS) {
+ panic("vm_map_remove_locked() failed for kalloc vm_entry! addr: %p, map: %p ret: %d\n",
+ addr, map, ret);
}
+ vm_map_unlock(map);
+ DTRACE_VM3(kfree, vm_size_t, -1, vm_size_t, size, void*, addr);
+
+ kalloc_spin_lock();
+ kalloc_large_total -= size;
+ kalloc_large_inuse--;
+ kalloc_unlock();
+
+ KALLOC_ZINFO_SFREE(size);
+ return size;
+}
+#endif
+
+void *
+kalloc_canblock(
+ vm_size_t * psize,
+ boolean_t canblock,
+ vm_allocation_site_t * site)
+{
+ zone_t z;
+ vm_size_t size;
+ void *addr;
+ vm_tag_t tag;
- /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */
+ tag = VM_KERN_MEMORY_KALLOC;
+ size = *psize;
- if (old_size >= kalloc_max_prerounded) {
- old_size = round_page_32(old_size);
- new_size = round_page_32(new_size);
- if (new_size > old_size) {
+#if KASAN_KALLOC
+ /* expand the allocation to accomodate redzones */
+ vm_size_t req_size = size;
+ size = kasan_alloc_resize(req_size);
+#endif
- if (kmem_realloc(kalloc_map, *addrp, old_size, &naddr,
- new_size) != KERN_SUCCESS) {
- panic("krealloc: kmem_realloc");
- naddr = 0;
- }
+ if (size < MAX_SIZE_ZDLUT)
+ z = get_zone_dlut(size);
+ else if (size < kalloc_max_prerounded)
+ z = get_zone_search(size, k_zindex_start);
+ else {
+ /*
+ * If size is too large for a zone, then use kmem_alloc.
+ * (We use kmem_alloc instead of kmem_alloc_kobject so that
+ * krealloc can use kmem_realloc.)
+ */
+ vm_map_t alloc_map;
+
+ /* kmem_alloc could block so we return if noblock */
+ if (!canblock) {
+ return(NULL);
+ }
+
+#if KASAN_KALLOC
+ /* large allocation - use guard pages instead of small redzones */
+ size = round_page(req_size + 2 * PAGE_SIZE);
+ assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded);
+#endif
+
+ if (size >= kalloc_kernmap_size)
+ alloc_map = kernel_map;
+ else
+ alloc_map = kalloc_map;
+
+ if (site) tag = vm_tag_alloc(site);
- simple_lock(lock);
- *addrp = naddr;
+ if (kmem_alloc_flags(alloc_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) {
+ if (alloc_map != kernel_map) {
+ if (kalloc_fallback_count++ == 0) {
+ printf("%s: falling back to kernel_map\n", __func__);
+ }
+ if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS)
+ addr = NULL;
+ }
+ else
+ addr = NULL;
+ }
- /* kmem_realloc() doesn't free old page range. */
- kmem_free(kalloc_map, *addrp, old_size);
+ if (addr != NULL) {
+ kalloc_spin_lock();
+ /*
+ * Thread-safe version of the workaround for 4740071
+ * (a double FREE())
+ */
+ if (size > kalloc_largest_allocated)
+ kalloc_largest_allocated = size;
- kalloc_large_total += (new_size - old_size);
+ kalloc_large_inuse++;
+ kalloc_large_total += size;
+ kalloc_large_sum += size;
if (kalloc_large_total > kalloc_large_max)
kalloc_large_max = kalloc_large_total;
+
+ kalloc_unlock();
+
+ KALLOC_ZINFO_SALLOC(size);
}
- return;
+#if KASAN_KALLOC
+ /* fixup the return address to skip the redzone */
+ addr = (void *)kasan_alloc((vm_offset_t)addr, size, req_size, PAGE_SIZE);
+#else
+ *psize = round_page(size);
+#endif
+ DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, *psize, void*, addr);
+ return(addr);
}
+#ifdef KALLOC_DEBUG
+ if (size > z->elem_size)
+ panic("%s: z %p (%s) but requested size %lu", __func__,
+ z, z->zone_name, (unsigned long)size);
+#endif
- /* compute the size of the block that we actually allocated */
+ assert(size <= z->elem_size);
- allocsize = KALLOC_MINSIZE;
- zindex = first_k_zone;
- while (allocsize < old_size) {
- allocsize <<= 1;
- zindex++;
- }
+#if VM_MAX_TAG_ZONES
+ if (z->tags && site)
+ {
+ tag = vm_tag_alloc(site);
+ if (!canblock && !vm_allocation_zone_totals[tag]) tag = VM_KERN_MEMORY_KALLOC;
+ }
+#endif
- /* if new size fits in old block, then return */
+ addr = zalloc_canblock_tag(z, canblock, size, tag);
- if (new_size <= allocsize) {
- return;
- }
+#if KASAN_KALLOC
+ /* fixup the return address to skip the redzone */
+ addr = (void *)kasan_alloc((vm_offset_t)addr, z->elem_size, req_size, KASAN_GUARD_SIZE);
+
+ /* For KASan, the redzone lives in any additional space, so don't
+ * expand the allocation. */
+#else
+ *psize = z->elem_size;
+#endif
- /* if new size does not fit in zone, kmem_alloc it, else zalloc it */
+ DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, *psize, void*, addr);
+ return addr;
+}
- simple_unlock(lock);
- if (new_size >= kalloc_max_prerounded) {
- if (kmem_alloc(kalloc_map, &naddr, new_size) != KERN_SUCCESS) {
- panic("krealloc: kmem_alloc");
- simple_lock(lock);
- *addrp = 0;
- return;
- }
- kalloc_large_inuse++;
- kalloc_large_total += new_size;
+void *
+kalloc_external(
+ vm_size_t size);
+void *
+kalloc_external(
+ vm_size_t size)
+{
+ return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) );
+}
- if (kalloc_large_total > kalloc_large_max)
- kalloc_large_max = kalloc_large_total;
- } else {
- register int new_zindex;
+volatile SInt32 kfree_nop_count = 0;
- allocsize <<= 1;
- new_zindex = zindex + 1;
- while (allocsize < new_size) {
- allocsize <<= 1;
- new_zindex++;
- }
- naddr = zalloc(k_zone[new_zindex]);
+void
+kfree(
+ void *data,
+ vm_size_t size)
+{
+ zone_t z;
+
+#if KASAN_KALLOC
+ /*
+ * Resize back to the real allocation size and hand off to the KASan
+ * quarantine. `data` may then point to a different allocation.
+ */
+ vm_size_t user_size = size;
+ kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC);
+ data = (void *)kasan_dealloc((vm_address_t)data, &size);
+ kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true);
+ if (!data) {
+ return;
}
- simple_lock(lock);
+#endif
+
+ if (size < MAX_SIZE_ZDLUT)
+ z = get_zone_dlut(size);
+ else if (size < kalloc_max_prerounded)
+ z = get_zone_search(size, k_zindex_start);
+ else {
+ /* if size was too large for a zone, then use kmem_free */
+
+ vm_map_t alloc_map = kernel_map;
+
+ if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max))
+ alloc_map = kalloc_map;
+ if (size > kalloc_largest_allocated) {
+ /*
+ * work around double FREEs of small MALLOCs
+ * this used to end up being a nop
+ * since the pointer being freed from an
+ * alloc backed by the zalloc world could
+ * never show up in the kalloc_map... however,
+ * the kernel_map is a different issue... since it
+ * was released back into the zalloc pool, a pointer
+ * would have gotten written over the 'size' that
+ * the MALLOC was retaining in the first 4 bytes of
+ * the underlying allocation... that pointer ends up
+ * looking like a really big size on the 2nd FREE and
+ * pushes the kfree into the kernel_map... we
+ * end up removing a ton of virtual space before we panic
+ * this check causes us to ignore the kfree for a size
+ * that must be 'bogus'... note that it might not be due
+ * to the above scenario, but it would still be wrong and
+ * cause serious damage.
+ */
+
+ OSAddAtomic(1, &kfree_nop_count);
+ return;
+ }
+ kmem_free(alloc_map, (vm_offset_t)data, size);
+ kalloc_spin_lock();
- /* copy existing data */
+ kalloc_large_total -= size;
+ kalloc_large_inuse--;
- bcopy((const char *)*addrp, (char *)naddr, old_size);
+ kalloc_unlock();
- /* free old block, and return */
+#if !KASAN_KALLOC
+ DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, data);
+#endif
- zfree(k_zone[zindex], *addrp);
+ KALLOC_ZINFO_SFREE(size);
+ return;
+ }
- /* set up new address */
+ /* free to the appropriate zone */
+#ifdef KALLOC_DEBUG
+ if (size > z->elem_size)
+ panic("%s: z %p (%s) but requested size %lu", __func__,
+ z, z->zone_name, (unsigned long)size);
+#endif
+ assert(size <= z->elem_size);
+ DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, z->elem_size, void*, data);
+ zfree(z, data);
+}
- *addrp = naddr;
+#ifdef MACH_BSD
+zone_t
+kalloc_zone(
+ vm_size_t size)
+{
+ if (size < MAX_SIZE_ZDLUT)
+ return (get_zone_dlut(size));
+ if (size <= kalloc_max)
+ return (get_zone_search(size, k_zindex_start));
+ return (ZONE_NULL);
}
+#endif
+
+void
+OSMalloc_init(
+ void)
+{
+ queue_init(&OSMalloc_tag_list);
+ OSMalloc_tag_lck_grp = lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&OSMalloc_tag_lock, OSMalloc_tag_lck_grp, LCK_ATTR_NULL);
+}
-vm_offset_t
-kget(
- vm_size_t size)
+OSMallocTag
+OSMalloc_Tagalloc(
+ const char *str,
+ uint32_t flags)
{
- register int zindex;
- register vm_size_t allocsize;
+ OSMallocTag OSMTag;
- /* size must not be too large for a zone */
+ OSMTag = (OSMallocTag)kalloc(sizeof(*OSMTag));
- if (size >= kalloc_max_prerounded) {
- /* This will never work, so we might as well panic */
- panic("kget");
- }
+ bzero((void *)OSMTag, sizeof(*OSMTag));
- /* compute the size of the block that we will actually allocate */
+ if (flags & OSMT_PAGEABLE)
+ OSMTag->OSMT_attr = OSMT_ATTR_PAGEABLE;
- allocsize = KALLOC_MINSIZE;
- zindex = first_k_zone;
- while (allocsize < size) {
- allocsize <<= 1;
- zindex++;
- }
+ OSMTag->OSMT_refcnt = 1;
- /* allocate from the appropriate zone */
+ strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME);
- assert(allocsize < kalloc_max);
- return(zget(k_zone[zindex]));
+ OSMalloc_tag_spin_lock();
+ enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag);
+ OSMalloc_tag_unlock();
+ OSMTag->OSMT_state = OSMT_VALID;
+ return(OSMTag);
}
void
-kfree(
- vm_offset_t data,
- vm_size_t size)
+OSMalloc_Tagref(
+ OSMallocTag tag)
{
- register int zindex;
- register vm_size_t freesize;
-
- /* if size was too large for a zone, then use kmem_free */
+ if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID))
+ panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state);
- if (size >= kalloc_max_prerounded) {
- kmem_free(kalloc_map, data, size);
+ (void)hw_atomic_add(&tag->OSMT_refcnt, 1);
+}
- kalloc_large_total -= size;
- kalloc_large_inuse--;
+void
+OSMalloc_Tagrele(
+ OSMallocTag tag)
+{
+ if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID))
+ panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state);
+
+ if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
+ if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) {
+ OSMalloc_tag_spin_lock();
+ (void)remque((queue_entry_t)tag);
+ OSMalloc_tag_unlock();
+ kfree((void*)tag, sizeof(*tag));
+ } else
+ panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag->OSMT_name);
+ }
+}
- return;
+void
+OSMalloc_Tagfree(
+ OSMallocTag tag)
+{
+ if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state))
+ panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag->OSMT_name, tag->OSMT_state);
+
+ if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
+ OSMalloc_tag_spin_lock();
+ (void)remque((queue_entry_t)tag);
+ OSMalloc_tag_unlock();
+ kfree((void*)tag, sizeof(*tag));
}
+}
- /* compute the size of the block that we actually allocated from */
+void *
+OSMalloc(
+ uint32_t size,
+ OSMallocTag tag)
+{
+ void *addr=NULL;
+ kern_return_t kr;
- freesize = KALLOC_MINSIZE;
- zindex = first_k_zone;
- while (freesize < size) {
- freesize <<= 1;
- zindex++;
- }
+ OSMalloc_Tagref(tag);
+ if ((tag->OSMT_attr & OSMT_PAGEABLE)
+ && (size & ~PAGE_MASK)) {
+ if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS)
+ addr = NULL;
+ } else
+ addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
- /* free to the appropriate zone */
+ if (!addr)
+ OSMalloc_Tagrele(tag);
- assert(freesize < kalloc_max);
- zfree(k_zone[zindex], data);
+ return(addr);
}
-#ifdef MACH_BSD
-zone_t
-kalloc_zone(
- vm_size_t size)
+void *
+OSMalloc_nowait(
+ uint32_t size,
+ OSMallocTag tag)
{
- register int zindex = 0;
- register vm_size_t allocsize;
+ void *addr=NULL;
- /* compute the size of the block that we will actually allocate */
+ if (tag->OSMT_attr & OSMT_PAGEABLE)
+ return(NULL);
- allocsize = size;
- if (size <= kalloc_max) {
- allocsize = KALLOC_MINSIZE;
- zindex = first_k_zone;
- while (allocsize < size) {
- allocsize <<= 1;
- zindex++;
- }
- return (k_zone[zindex]);
- }
- return (ZONE_NULL);
+ OSMalloc_Tagref(tag);
+ /* XXX: use non-blocking kalloc for now */
+ addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
+ if (addr == NULL)
+ OSMalloc_Tagrele(tag);
+
+ return(addr);
+}
+
+void *
+OSMalloc_noblock(
+ uint32_t size,
+ OSMallocTag tag)
+{
+ void *addr=NULL;
+
+ if (tag->OSMT_attr & OSMT_PAGEABLE)
+ return(NULL);
+
+ OSMalloc_Tagref(tag);
+ addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
+ if (addr == NULL)
+ OSMalloc_Tagrele(tag);
+
+ return(addr);
}
-#endif
+void
+OSFree(
+ void *addr,
+ uint32_t size,
+ OSMallocTag tag)
+{
+ if ((tag->OSMT_attr & OSMT_PAGEABLE)
+ && (size & ~PAGE_MASK)) {
+ kmem_free(kernel_map, (vm_offset_t)addr, size);
+ } else
+ kfree((void *)addr, size);
+ OSMalloc_Tagrele(tag);
+}
-kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
- vm_size_t *alloc_size, int *collectable, int *exhaustable)
+uint32_t
+OSMalloc_size(
+ void *addr)
{
- *count = kalloc_large_inuse;
- *cur_size = kalloc_large_total;
- *max_size = kalloc_large_max;
- *elem_size = kalloc_large_total / kalloc_large_inuse;
- *alloc_size = kalloc_large_total / kalloc_large_inuse;
- *collectable = 0;
- *exhaustable = 0;
+ return (uint32_t)kalloc_size(addr);
}