2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr.
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
66 #define ZALLOC_ALLOW_DEPRECATED 1
67 #include <mach/mach_types.h>
68 #include <mach/vm_param.h>
69 #include <mach/kern_return.h>
70 #include <mach/mach_host_server.h>
71 #include <mach/task_server.h>
72 #include <mach/machine/vm_types.h>
73 #include <mach/vm_map.h>
76 #include <kern/bits.h>
77 #include <kern/startup.h>
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/backtrace.h>
81 #include <kern/host.h>
82 #include <kern/macro_help.h>
83 #include <kern/sched.h>
84 #include <kern/locks.h>
85 #include <kern/sched_prim.h>
86 #include <kern/misc_protos.h>
87 #include <kern/thread_call.h>
88 #include <kern/zalloc_internal.h>
89 #include <kern/kalloc.h>
91 #include <prng/random.h>
94 #include <vm/vm_map.h>
95 #include <vm/vm_kern.h>
96 #include <vm/vm_page.h>
97 #include <vm/vm_compressor.h> /* C_SLOT_PACKED_PTR* */
99 #include <pexpert/pexpert.h>
101 #include <machine/machparam.h>
102 #include <machine/machine_routines.h> /* ml_cpu_get_info */
104 #include <os/atomic.h>
106 #include <libkern/OSDebug.h>
107 #include <libkern/OSAtomic.h>
108 #include <libkern/section_keywords.h>
109 #include <sys/kdebug.h>
111 #include <san/kasan.h>
114 #define ZONE_ENABLE_LOGGING 0
115 #elif DEBUG || DEVELOPMENT
116 #define ZONE_ENABLE_LOGGING 1
118 #define ZONE_ENABLE_LOGGING 0
121 extern void vm_pageout_garbage_collect(int collect
);
123 /* Returns pid of the task with the largest number of VM map entries. */
124 extern pid_t
find_largest_process_vm_map_entries(void);
127 * Callout to jetsam. If pid is -1, we wake up the memorystatus thread to do asynchronous kills.
128 * For any other pid we try to kill that process synchronously.
130 extern boolean_t
memorystatus_kill_on_zone_map_exhaustion(pid_t pid
);
132 extern zone_t vm_map_entry_zone
;
133 extern zone_t vm_object_zone
;
134 extern vm_offset_t kmapoff_kaddr
;
135 extern unsigned int kmapoff_pgcnt
;
136 extern unsigned int stack_total
;
137 extern unsigned long long stack_allocs
;
140 * The max # of elements in a chunk should fit into
141 * zone_page_metadata.free_count (uint16_t).
143 * Update this if the type of free_count changes.
145 #define ZONE_CHUNK_MAXELEMENTS (UINT16_MAX)
147 #define ZONE_PAGECOUNT_BITS 14
149 /* Zone elements must fit both a next pointer and a backup pointer */
150 #define ZONE_MIN_ELEM_SIZE (2 * sizeof(vm_offset_t))
151 #define ZONE_MAX_ALLOC_SIZE (32 * 1024)
153 /* per-cpu zones are special because of counters */
154 #define ZONE_MIN_PCPU_ELEM_SIZE (1 * sizeof(vm_offset_t))
156 struct zone_map_range
{
157 vm_offset_t min_address
;
158 vm_offset_t max_address
;
161 struct zone_page_metadata
{
162 /* The index of the zone this metadata page belongs to */
166 * zm_secondary_page == 0: number of pages in this run
167 * zm_secondary_page == 1: offset to the chunk start
169 uint16_t zm_page_count
: ZONE_PAGECOUNT_BITS
;
171 /* Whether this page is part of a chunk run */
172 uint16_t zm_percpu
: 1;
173 uint16_t zm_secondary_page
: 1;
176 * The start of the freelist can be maintained as a 16-bit
177 * offset instead of a pointer because the free elements would
178 * be at max ZONE_MAX_ALLOC_SIZE bytes away from the start
179 * of the allocation chunk.
181 * Offset from start of the allocation chunk to free element
184 uint16_t zm_freelist_offs
;
187 * zm_secondary_page == 0: number of allocated elements in the chunk
188 * zm_secondary_page == 1: unused
190 * PAGE_METADATA_EMPTY_FREELIST indicates an empty freelist
192 uint16_t zm_alloc_count
;
193 #define PAGE_METADATA_EMPTY_FREELIST UINT16_MAX
195 zone_pva_t zm_page_next
;
196 zone_pva_t zm_page_prev
;
199 * This is only for the sake of debuggers
201 #define ZONE_FOREIGN_COOKIE 0x123456789abcdef
202 uint64_t zm_foreign_cookie
[];
206 /* Align elements that use the zone page list to 32 byte boundaries. */
207 #define ZONE_PAGE_FIRST_OFFSET(kind) ((kind) == ZONE_ADDR_NATIVE ? 0 : 32)
209 static_assert(sizeof(struct zone_page_metadata
) == 16, "validate packing");
211 static __security_const_late
struct {
212 struct zone_map_range zi_map_range
;
213 struct zone_map_range zi_general_range
;
214 struct zone_map_range zi_meta_range
;
215 struct zone_map_range zi_foreign_range
;
218 * The metadata lives within the zi_meta_range address range.
220 * The correct formula to find a metadata index is:
221 * absolute_page_index - page_index(zi_meta_range.min_address)
223 * And then this index is used to dereference zi_meta_range.min_address
224 * as a `struct zone_page_metadata` array.
226 * To avoid doing that substraction all the time in the various fast-paths,
227 * zi_array_base is offset by `page_index(zi_meta_range.min_address)`
228 * to avoid redoing that math all the time.
230 struct zone_page_metadata
*zi_array_base
;
234 * The zone_locks_grp allows for collecting lock statistics.
235 * All locks are associated to this group in zinit.
236 * Look at tools/lockstat for debugging lock contention.
238 LCK_GRP_DECLARE(zone_locks_grp
, "zone_locks");
239 LCK_MTX_EARLY_DECLARE(zone_metadata_region_lck
, &zone_locks_grp
);
242 * Exclude more than one concurrent garbage collection
244 LCK_GRP_DECLARE(zone_gc_lck_grp
, "zone_gc");
245 LCK_MTX_EARLY_DECLARE(zone_gc_lock
, &zone_gc_lck_grp
);
247 boolean_t panic_include_zprint
= FALSE
;
248 mach_memory_info_t
*panic_kext_memory_info
= NULL
;
249 vm_size_t panic_kext_memory_size
= 0;
252 * Protects zone_array, num_zones, num_zones_in_use, and
253 * zone_destroyed_bitmap
255 static SIMPLE_LOCK_DECLARE(all_zones_lock
, 0);
256 static unsigned int num_zones_in_use
;
257 unsigned int _Atomic num_zones
;
258 SECURITY_READ_ONLY_LATE(unsigned int) zone_view_count
;
261 #define MAX_ZONES 566
262 #else /* !KASAN_ZALLOC */
263 #define MAX_ZONES 402
264 #endif/* !KASAN_ZALLOC */
265 struct zone zone_array
[MAX_ZONES
];
267 /* Initialized in zone_bootstrap(), how many "copies" the per-cpu system does */
268 static SECURITY_READ_ONLY_LATE(unsigned) zpercpu_early_count
;
270 /* Used to keep track of destroyed slots in the zone_array */
271 static bitmap_t zone_destroyed_bitmap
[BITMAP_LEN(MAX_ZONES
)];
273 /* number of pages used by all zones */
274 static long _Atomic zones_phys_page_count
;
276 /* number of zone mapped pages used by all zones */
277 static long _Atomic zones_phys_page_mapped_count
;
279 #if CONFIG_ZALLOC_SEQUESTER
280 #define ZSECURITY_OPTIONS_SEQUESTER_DEFAULT ZSECURITY_OPTIONS_SEQUESTER
282 #define ZSECURITY_OPTIONS_SEQUESTER_DEFAULT 0
285 * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
286 * not break third party kexts that haven't yet been recompiled
287 * to use the new iokit macros.
289 #if XNU_TARGET_OS_OSX && __x86_64__
290 #define ZSECURITY_OPTIONS_STRICT_IOKIT_FREE_DEFAULT 0
292 #define ZSECURITY_OPTIONS_STRICT_IOKIT_FREE_DEFAULT \
293 ZSECURITY_OPTIONS_STRICT_IOKIT_FREE
296 #define ZSECURITY_DEFAULT ( \
297 ZSECURITY_OPTIONS_SEQUESTER_DEFAULT | \
298 ZSECURITY_OPTIONS_SUBMAP_USER_DATA | \
299 ZSECURITY_OPTIONS_SEQUESTER_KEXT_KALLOC | \
300 ZSECURITY_OPTIONS_STRICT_IOKIT_FREE_DEFAULT | \
302 TUNABLE(zone_security_options_t
, zsecurity_options
, "zs", ZSECURITY_DEFAULT
);
305 /* enable tags for zones that ask for it */
306 TUNABLE(bool, zone_tagging_on
, "-zt", false);
307 #endif /* VM_MAX_TAG_ZONES */
309 #if DEBUG || DEVELOPMENT
310 TUNABLE(bool, zalloc_disable_copyio_check
, "-no-copyio-zalloc-check", false);
311 __options_decl(zalloc_debug_t
, uint32_t, {
312 ZALLOC_DEBUG_ZONEGC
= 0x00000001,
313 ZALLOC_DEBUG_ZCRAM
= 0x00000002,
316 TUNABLE(zalloc_debug_t
, zalloc_debug
, "zalloc_debug", 0);
317 #endif /* DEBUG || DEVELOPMENT */
319 /* Making pointer scanning leaks detection possible for all zones */
320 TUNABLE(bool, zone_leaks_scan_enable
, "-zl", false);
322 #define zone_leaks_scan_enable false
326 * Async allocation of zones
327 * This mechanism allows for bootstrapping an empty zone which is setup with
328 * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call
329 * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free.
330 * This will prime the zone for the next use.
332 * Currently the thread_callout function (zalloc_async) will loop through all zones
333 * looking for any zone with async_pending set and do the work for it.
335 * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call,
336 * then zalloc_noblock to an empty zone may succeed.
338 static void zalloc_async(thread_call_param_t p0
, thread_call_param_t p1
);
339 static thread_call_data_t call_async_alloc
;
340 static void zcram_and_lock(zone_t zone
, vm_offset_t newmem
, vm_size_t size
);
343 * Zone Corruption Debugging
345 * We use four techniques to detect modification of a zone element
346 * after it's been freed.
348 * (1) Check the freelist next pointer for sanity.
349 * (2) Store a backup of the next pointer at the end of the element,
350 * and compare it to the primary next pointer when the element is allocated
351 * to detect corruption of the freelist due to use-after-free bugs.
352 * The backup pointer is also XORed with a per-boot random cookie.
353 * (3) Poison the freed element by overwriting it with 0xdeadbeef,
354 * and check for that value when the element is being reused to make sure
355 * no part of the element has been modified while it was on the freelist.
356 * This will also help catch read-after-frees, as code will now dereference
357 * 0xdeadbeef instead of a valid but freed pointer.
358 * (4) If the zfree_clear_mem flag is set clear the element on free and
359 * assert that it is still clear when alloc-ed.
361 * (1) and (2) occur for every allocation and free to a zone.
362 * This is done to make it slightly more difficult for an attacker to
363 * manipulate the freelist to behave in a specific way.
365 * Poisoning (3) occurs periodically for every N frees (counted per-zone).
366 * If -zp is passed as a boot arg, poisoning occurs for every free.
368 * Zeroing (4) is done for those zones that pass the ZC_ZFREE_CLEARMEM
369 * flag on creation or if the element size is less than one cacheline.
371 * Performance slowdown is inversely proportional to the frequency of poisoning,
372 * with a 4-5% hit around N=1, down to ~0.3% at N=16 and just "noise" at N=32
373 * and higher. You can expect to find a 100% reproducible bug in an average of
374 * N tries, with a standard deviation of about N, but you will want to set
375 * "-zp" to always poison every free if you are attempting to reproduce
378 * For a more heavyweight, but finer-grained method of detecting misuse
379 * of zone memory, look up the "Guard mode" zone allocator in gzalloc.c.
381 * Zone Corruption Logging
383 * You can also track where corruptions come from by using the boot-arguments
384 * "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later
385 * in this document for more implementation and usage information.
387 * Zone Leak Detection
389 * To debug leaks of zone memory, use the zone leak detection tool 'zleaks'
390 * found later in this file via the showtopztrace and showz* macros in kgmacros,
391 * or use zlog without the -zc argument.
395 #define ZP_DEFAULT_SAMPLING_FACTOR 16
396 #define ZP_DEFAULT_SCALE_FACTOR 4
399 * set by zp-factor=N boot arg
401 * A zp_factor of 0 indicates zone poisoning is disabled and can also be set by
402 * passing the -no-zp boot-arg.
404 * A zp_factor of 1 indicates zone poisoning is on for all elements and can be
405 * set by passing the -zp boot-arg.
407 static TUNABLE(uint32_t, zp_factor
, "zp-factor", ZP_DEFAULT_SAMPLING_FACTOR
);
409 /* set by zp-scale=N boot arg, scales zp_factor by zone size */
410 static TUNABLE(uint32_t, zp_scale
, "zp-scale", ZP_DEFAULT_SCALE_FACTOR
);
412 /* initialized to a per-boot random value in zp_bootstrap */
413 static SECURITY_READ_ONLY_LATE(uintptr_t) zp_poisoned_cookie
;
414 static SECURITY_READ_ONLY_LATE(uintptr_t) zp_nopoison_cookie
;
415 static SECURITY_READ_ONLY_LATE(uintptr_t) zp_min_size
;
416 static SECURITY_READ_ONLY_LATE(uint64_t) zone_phys_mapped_max
;
418 static SECURITY_READ_ONLY_LATE(vm_map_t
) zone_submaps
[Z_SUBMAP_IDX_COUNT
];
419 static SECURITY_READ_ONLY_LATE(uint32_t) zone_last_submap_idx
;
421 static struct bool_gen zone_bool_gen
;
422 static zone_t
zone_find_largest(void);
423 static void zone_drop_free_elements(zone_t z
);
425 #define submap_for_zone(z) zone_submaps[(z)->submap_idx]
426 #define MAX_SUBMAP_NAME 16
428 /* Globals for random boolean generator for elements in free list */
429 #define MAX_ENTROPY_PER_ZCRAM 4
433 * Specifies a single zone to enable CPU caching for.
434 * Can be set using boot-args: zcc_enable_for_zone_name=<zone>
436 static char cache_zone_name
[MAX_ZONE_NAME
];
437 static TUNABLE(bool, zcc_kalloc
, "zcc_kalloc", false);
439 __header_always_inline
bool
440 zone_caching_enabled(zone_t z
)
442 return z
->zcache
.zcc_depot
!= NULL
;
445 __header_always_inline
bool
446 zone_caching_enabled(zone_t z __unused
)
450 #endif /* CONFIG_ZCACHE */
452 #pragma mark Zone metadata
454 __enum_closed_decl(zone_addr_kind_t
, bool, {
459 static inline zone_id_t
462 return (zone_id_t
)(z
- zone_array
);
466 zone_has_index(zone_t z
, zone_id_t zid
)
468 return zone_array
+ zid
== z
;
471 static inline vm_size_t
472 zone_elem_count(zone_t zone
, vm_size_t alloc_size
, zone_addr_kind_t kind
)
474 if (kind
== ZONE_ADDR_NATIVE
) {
476 return PAGE_SIZE
/ zone_elem_size(zone
);
478 return alloc_size
/ zone_elem_size(zone
);
480 assert(alloc_size
== PAGE_SIZE
);
481 return (PAGE_SIZE
- ZONE_PAGE_FIRST_OFFSET(kind
)) / zone_elem_size(zone
);
487 zone_metadata_corruption(zone_t zone
, struct zone_page_metadata
*meta
,
490 panic("zone metadata corruption: %s (meta %p, zone %s%s)",
491 kind
, meta
, zone_heap_name(zone
), zone
->z_name
);
496 zone_invalid_element_addr_panic(zone_t zone
, vm_offset_t addr
)
498 panic("zone element pointer validation failed (addr: %p, zone %s%s)",
499 (void *)addr
, zone_heap_name(zone
), zone
->z_name
);
504 zone_page_metadata_index_confusion_panic(zone_t zone
, vm_offset_t addr
,
505 struct zone_page_metadata
*meta
)
507 panic("%p not in the expected zone %s%s (%d != %d)",
508 (void *)addr
, zone_heap_name(zone
), zone
->z_name
,
509 meta
->zm_index
, zone_index(zone
));
514 zone_page_metadata_native_queue_corruption(zone_t zone
, zone_pva_t
*queue
)
516 panic("foreign metadata index %d enqueued in native head %p from zone %s%s",
517 queue
->packed_address
, queue
, zone_heap_name(zone
),
523 zone_page_metadata_list_corruption(zone_t zone
, struct zone_page_metadata
*meta
)
525 panic("metadata list corruption through element %p detected in zone %s%s",
526 meta
, zone_heap_name(zone
), zone
->z_name
);
531 zone_page_metadata_foreign_queue_corruption(zone_t zone
, zone_pva_t
*queue
)
533 panic("native metadata index %d enqueued in foreign head %p from zone %s%s",
534 queue
->packed_address
, queue
, zone_heap_name(zone
), zone
->z_name
);
539 zone_page_metadata_foreign_confusion_panic(zone_t zone
, vm_offset_t addr
)
541 panic("manipulating foreign address %p in a native-only zone %s%s",
542 (void *)addr
, zone_heap_name(zone
), zone
->z_name
);
547 zone_invalid_foreign_addr_panic(zone_t zone
, vm_offset_t addr
)
549 panic("addr %p being freed to foreign zone %s%s not from foreign range",
550 (void *)addr
, zone_heap_name(zone
), zone
->z_name
);
555 zone_page_meta_accounting_panic(zone_t zone
, struct zone_page_metadata
*meta
,
558 panic("accounting mismatch (%s) for zone %s%s, meta %p", kind
,
559 zone_heap_name(zone
), zone
->z_name
, meta
);
564 zone_accounting_panic(zone_t zone
, const char *kind
)
566 panic("accounting mismatch (%s) for zone %s%s", kind
,
567 zone_heap_name(zone
), zone
->z_name
);
572 zone_nofail_panic(zone_t zone
)
574 panic("zalloc(Z_NOFAIL) can't be satisfied for zone %s%s (potential leak)",
575 zone_heap_name(zone
), zone
->z_name
);
579 // <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
580 #define zone_range_load(r, rmin, rmax) \
581 asm("ldp %[rmin], %[rmax], [%[range]]" \
582 : [rmin] "=r"(rmin), [rmax] "=r"(rmax) \
585 #define zone_range_load(r, rmin, rmax) \
586 ({ rmin = (r)->min_address; rmax = (r)->max_address; })
589 __header_always_inline
bool
590 zone_range_contains(const struct zone_map_range
*r
, vm_offset_t addr
, vm_offset_t size
)
592 vm_offset_t rmin
, rmax
;
595 * The `&` is not a typo: we really expect the check to pass,
596 * so encourage the compiler to eagerly load and test without branches
598 zone_range_load(r
, rmin
, rmax
);
599 return (addr
>= rmin
) & (addr
+ size
>= rmin
) & (addr
+ size
<= rmax
);
602 __header_always_inline vm_size_t
603 zone_range_size(const struct zone_map_range
*r
)
605 vm_offset_t rmin
, rmax
;
607 zone_range_load(r
, rmin
, rmax
);
611 #define from_zone_map(addr, size) \
612 zone_range_contains(&zone_info.zi_map_range, (vm_offset_t)(addr), size)
614 #define from_general_submap(addr, size) \
615 zone_range_contains(&zone_info.zi_general_range, (vm_offset_t)(addr), size)
617 #define from_foreign_range(addr, size) \
618 zone_range_contains(&zone_info.zi_foreign_range, (vm_offset_t)(addr), size)
620 #define from_native_meta_map(addr) \
621 zone_range_contains(&zone_info.zi_meta_range, (vm_offset_t)(addr), \
622 sizeof(struct zone_page_metadata))
624 #define zone_addr_kind(addr, size) \
625 (from_zone_map(addr, size) ? ZONE_ADDR_NATIVE : ZONE_ADDR_FOREIGN)
627 __header_always_inline
bool
628 zone_pva_is_null(zone_pva_t page
)
630 return page
.packed_address
== 0;
633 __header_always_inline
bool
634 zone_pva_is_queue(zone_pva_t page
)
636 // actual kernel pages have the top bit set
637 return (int32_t)page
.packed_address
> 0;
640 __header_always_inline
bool
641 zone_pva_is_equal(zone_pva_t pva1
, zone_pva_t pva2
)
643 return pva1
.packed_address
== pva2
.packed_address
;
646 __header_always_inline
void
647 zone_queue_set_head(zone_t z
, zone_pva_t queue
, zone_pva_t oldv
,
648 struct zone_page_metadata
*meta
)
650 zone_pva_t
*queue_head
= &((zone_pva_t
*)zone_array
)[queue
.packed_address
];
652 if (!zone_pva_is_equal(*queue_head
, oldv
)) {
653 zone_page_metadata_list_corruption(z
, meta
);
655 *queue_head
= meta
->zm_page_next
;
658 __header_always_inline zone_pva_t
659 zone_queue_encode(zone_pva_t
*headp
)
661 return (zone_pva_t
){ (uint32_t)(headp
- (zone_pva_t
*)zone_array
) };
664 __header_always_inline zone_pva_t
665 zone_pva_from_addr(vm_address_t addr
)
667 // cannot use atop() because we want to maintain the sign bit
668 return (zone_pva_t
){ (uint32_t)((intptr_t)addr
>> PAGE_SHIFT
) };
671 __header_always_inline vm_address_t
672 zone_pva_to_addr(zone_pva_t page
)
674 // cause sign extension so that we end up with the right address
675 return (vm_offset_t
)(int32_t)page
.packed_address
<< PAGE_SHIFT
;
678 __header_always_inline
struct zone_page_metadata
*
679 zone_pva_to_meta(zone_pva_t page
, zone_addr_kind_t kind
)
681 if (kind
== ZONE_ADDR_NATIVE
) {
682 return &zone_info
.zi_array_base
[page
.packed_address
];
684 return (struct zone_page_metadata
*)zone_pva_to_addr(page
);
688 __header_always_inline zone_pva_t
689 zone_pva_from_meta(struct zone_page_metadata
*meta
, zone_addr_kind_t kind
)
691 if (kind
== ZONE_ADDR_NATIVE
) {
692 uint32_t index
= (uint32_t)(meta
- zone_info
.zi_array_base
);
693 return (zone_pva_t
){ index
};
695 return zone_pva_from_addr((vm_address_t
)meta
);
699 __header_always_inline
struct zone_page_metadata
*
700 zone_meta_from_addr(vm_offset_t addr
, zone_addr_kind_t kind
)
702 if (kind
== ZONE_ADDR_NATIVE
) {
703 return zone_pva_to_meta(zone_pva_from_addr(addr
), kind
);
705 return (struct zone_page_metadata
*)trunc_page(addr
);
709 #define zone_native_meta_from_addr(addr) \
710 zone_meta_from_addr((vm_offset_t)(addr), ZONE_ADDR_NATIVE)
712 __header_always_inline vm_offset_t
713 zone_meta_to_addr(struct zone_page_metadata
*meta
, zone_addr_kind_t kind
)
715 if (kind
== ZONE_ADDR_NATIVE
) {
716 return ptoa((int)(meta
- zone_info
.zi_array_base
));
718 return (vm_offset_t
)meta
;
722 __header_always_inline
void
723 zone_meta_queue_push(zone_t z
, zone_pva_t
*headp
,
724 struct zone_page_metadata
*meta
, zone_addr_kind_t kind
)
726 zone_pva_t head
= *headp
;
727 zone_pva_t queue_pva
= zone_queue_encode(headp
);
728 struct zone_page_metadata
*tmp
;
730 meta
->zm_page_next
= head
;
731 if (!zone_pva_is_null(head
)) {
732 tmp
= zone_pva_to_meta(head
, kind
);
733 if (!zone_pva_is_equal(tmp
->zm_page_prev
, queue_pva
)) {
734 zone_page_metadata_list_corruption(z
, meta
);
736 tmp
->zm_page_prev
= zone_pva_from_meta(meta
, kind
);
738 meta
->zm_page_prev
= queue_pva
;
739 *headp
= zone_pva_from_meta(meta
, kind
);
742 __header_always_inline
struct zone_page_metadata
*
743 zone_meta_queue_pop(zone_t z
, zone_pva_t
*headp
, zone_addr_kind_t kind
,
744 vm_offset_t
*page_addrp
)
746 zone_pva_t head
= *headp
;
747 struct zone_page_metadata
*meta
= zone_pva_to_meta(head
, kind
);
748 vm_offset_t page_addr
= zone_pva_to_addr(head
);
749 struct zone_page_metadata
*tmp
;
751 if (kind
== ZONE_ADDR_NATIVE
&& !from_native_meta_map(meta
)) {
752 zone_page_metadata_native_queue_corruption(z
, headp
);
754 if (kind
== ZONE_ADDR_FOREIGN
&& from_zone_map(meta
, sizeof(*meta
))) {
755 zone_page_metadata_foreign_queue_corruption(z
, headp
);
758 if (!zone_pva_is_null(meta
->zm_page_next
)) {
759 tmp
= zone_pva_to_meta(meta
->zm_page_next
, kind
);
760 if (!zone_pva_is_equal(tmp
->zm_page_prev
, head
)) {
761 zone_page_metadata_list_corruption(z
, meta
);
763 tmp
->zm_page_prev
= meta
->zm_page_prev
;
765 *headp
= meta
->zm_page_next
;
767 *page_addrp
= page_addr
;
771 __header_always_inline
void
772 zone_meta_requeue(zone_t z
, zone_pva_t
*headp
,
773 struct zone_page_metadata
*meta
, zone_addr_kind_t kind
)
775 zone_pva_t meta_pva
= zone_pva_from_meta(meta
, kind
);
776 struct zone_page_metadata
*tmp
;
778 if (!zone_pva_is_null(meta
->zm_page_next
)) {
779 tmp
= zone_pva_to_meta(meta
->zm_page_next
, kind
);
780 if (!zone_pva_is_equal(tmp
->zm_page_prev
, meta_pva
)) {
781 zone_page_metadata_list_corruption(z
, meta
);
783 tmp
->zm_page_prev
= meta
->zm_page_prev
;
785 if (zone_pva_is_queue(meta
->zm_page_prev
)) {
786 zone_queue_set_head(z
, meta
->zm_page_prev
, meta_pva
, meta
);
788 tmp
= zone_pva_to_meta(meta
->zm_page_prev
, kind
);
789 if (!zone_pva_is_equal(tmp
->zm_page_next
, meta_pva
)) {
790 zone_page_metadata_list_corruption(z
, meta
);
792 tmp
->zm_page_next
= meta
->zm_page_next
;
795 zone_meta_queue_push(z
, headp
, meta
, kind
);
799 * Routine to populate a page backing metadata in the zone_metadata_region.
800 * Must be called without the zone lock held as it might potentially block.
803 zone_meta_populate(struct zone_page_metadata
*from
, struct zone_page_metadata
*to
)
805 vm_offset_t page_addr
= trunc_page(from
);
807 for (; page_addr
< (vm_offset_t
)to
; page_addr
+= PAGE_SIZE
) {
810 * This can race with another thread doing a populate on the same metadata
811 * page, where we see an updated pmap but unmapped KASan shadow, causing a
812 * fault in the shadow when we first access the metadata page. Avoid this
813 * by always synchronizing on the zone_metadata_region lock with KASan.
815 if (pmap_find_phys(kernel_pmap
, page_addr
)) {
821 kern_return_t ret
= KERN_SUCCESS
;
823 /* All updates to the zone_metadata_region are done under the zone_metadata_region_lck */
824 lck_mtx_lock(&zone_metadata_region_lck
);
825 if (0 == pmap_find_phys(kernel_pmap
, page_addr
)) {
826 ret
= kernel_memory_populate(kernel_map
, page_addr
,
827 PAGE_SIZE
, KMA_NOPAGEWAIT
| KMA_KOBJECT
| KMA_ZERO
,
828 VM_KERN_MEMORY_OSFMK
);
830 lck_mtx_unlock(&zone_metadata_region_lck
);
832 if (ret
== KERN_SUCCESS
) {
837 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
838 * to bad system deadlocks, so if the allocation failed,
839 * we need to do the VM_PAGE_WAIT() outside of the lock.
847 zone_allocated_element_offset_is_valid(zone_t zone
, vm_offset_t addr
,
848 vm_offset_t page
, zone_addr_kind_t kind
)
850 vm_offset_t offs
= addr
- page
- ZONE_PAGE_FIRST_OFFSET(kind
);
851 vm_offset_t esize
= zone_elem_size(zone
);
853 if (esize
& (esize
- 1)) { /* not a power of 2 */
854 return (offs
% esize
) == 0;
856 return (offs
& (esize
- 1)) == 0;
860 __attribute__((always_inline
))
861 static struct zone_page_metadata
*
862 zone_allocated_element_resolve(zone_t zone
, vm_offset_t addr
,
863 vm_offset_t
*pagep
, zone_addr_kind_t
*kindp
)
865 struct zone_page_metadata
*meta
;
866 zone_addr_kind_t kind
;
868 vm_offset_t esize
= zone_elem_size(zone
);
870 kind
= zone_addr_kind(addr
, esize
);
871 page
= trunc_page(addr
);
872 meta
= zone_meta_from_addr(addr
, kind
);
874 if (kind
== ZONE_ADDR_NATIVE
) {
875 if (meta
->zm_secondary_page
) {
876 if (meta
->zm_percpu
) {
877 zone_invalid_element_addr_panic(zone
, addr
);
879 page
-= ptoa(meta
->zm_page_count
);
880 meta
-= meta
->zm_page_count
;
882 } else if (!zone
->allows_foreign
) {
883 zone_page_metadata_foreign_confusion_panic(zone
, addr
);
885 } else if (!from_foreign_range(addr
, esize
)) {
886 zone_invalid_foreign_addr_panic(zone
, addr
);
888 } else if (!pmap_kernel_va(addr
)) {
889 zone_invalid_element_addr_panic(zone
, addr
);
893 if (!zone_allocated_element_offset_is_valid(zone
, addr
, page
, kind
)) {
894 zone_invalid_element_addr_panic(zone
, addr
);
897 if (!zone_has_index(zone
, meta
->zm_index
)) {
898 zone_page_metadata_index_confusion_panic(zone
, addr
, meta
);
910 __attribute__((always_inline
))
912 zone_allocated_element_validate(zone_t zone
, vm_offset_t addr
)
914 zone_allocated_element_resolve(zone
, addr
, NULL
, NULL
);
917 __header_always_inline vm_offset_t
918 zone_page_meta_get_freelist(zone_t zone
, struct zone_page_metadata
*meta
,
921 assert(!meta
->zm_secondary_page
);
922 if (meta
->zm_freelist_offs
== PAGE_METADATA_EMPTY_FREELIST
) {
926 vm_size_t size
= ptoa(meta
->zm_percpu
? 1 : meta
->zm_page_count
);
927 if (meta
->zm_freelist_offs
+ zone_elem_size(zone
) > size
) {
928 zone_metadata_corruption(zone
, meta
, "freelist corruption");
931 return page
+ meta
->zm_freelist_offs
;
934 __header_always_inline
void
935 zone_page_meta_set_freelist(struct zone_page_metadata
*meta
,
936 vm_offset_t page
, vm_offset_t addr
)
938 assert(!meta
->zm_secondary_page
);
940 meta
->zm_freelist_offs
= (uint16_t)(addr
- page
);
942 meta
->zm_freelist_offs
= PAGE_METADATA_EMPTY_FREELIST
;
947 zone_page_meta_is_sane_element(zone_t zone
, struct zone_page_metadata
*meta
,
948 vm_offset_t page
, vm_offset_t element
, zone_addr_kind_t kind
)
951 /* ends of the freelist are NULL */
954 if (element
< page
+ ZONE_PAGE_FIRST_OFFSET(kind
)) {
957 vm_size_t size
= ptoa(meta
->zm_percpu
? 1 : meta
->zm_page_count
);
958 if (element
> page
+ size
- zone_elem_size(zone
)) {
964 /* Routine to get the size of a zone allocated address.
965 * If the address doesnt belong to the zone maps, returns 0.
968 zone_element_size(void *addr
, zone_t
*z
)
970 struct zone_page_metadata
*meta
;
971 struct zone
*src_zone
;
973 if (from_zone_map(addr
, sizeof(void *))) {
974 meta
= zone_native_meta_from_addr(addr
);
975 src_zone
= &zone_array
[meta
->zm_index
];
979 return zone_elem_size(src_zone
);
982 if (__improbable(gzalloc_enabled())) {
984 if (gzalloc_element_size(addr
, z
, &gzsize
)) {
988 #endif /* CONFIG_GZALLOC */
993 /* This function just formats the reason for the panics by redoing the checks */
996 zone_require_panic(zone_t zone
, void *addr
)
1001 if (!from_zone_map(addr
, zone_elem_size(zone
))) {
1002 panic("zone_require failed: address not in a zone (addr: %p)", addr
);
1005 zindex
= zone_native_meta_from_addr(addr
)->zm_index
;
1006 other
= &zone_array
[zindex
];
1007 if (zindex
>= os_atomic_load(&num_zones
, relaxed
) || !other
->z_self
) {
1008 panic("zone_require failed: invalid zone index %d "
1009 "(addr: %p, expected: %s%s)", zindex
,
1010 addr
, zone_heap_name(zone
), zone
->z_name
);
1012 panic("zone_require failed: address in unexpected zone id %d (%s%s) "
1013 "(addr: %p, expected: %s%s)",
1014 zindex
, zone_heap_name(other
), other
->z_name
,
1015 addr
, zone_heap_name(zone
), zone
->z_name
);
1021 zone_id_require_panic(zone_id_t zid
, void *addr
)
1023 zone_require_panic(&zone_array
[zid
], addr
);
1027 * Routines to panic if a pointer is not mapped to an expected zone.
1028 * This can be used as a means of pinning an object to the zone it is expected
1029 * to be a part of. Causes a panic if the address does not belong to any
1030 * specified zone, does not belong to any zone, has been freed and therefore
1031 * unmapped from the zone, or the pointer contains an uninitialized value that
1032 * does not belong to any zone.
1034 * Note that this can only work with collectable zones without foreign pages.
1037 zone_require(zone_t zone
, void *addr
)
1039 if (__probable(from_general_submap(addr
, zone_elem_size(zone
)) &&
1040 (zone_has_index(zone
, zone_native_meta_from_addr(addr
)->zm_index
)))) {
1044 if (__probable(gzalloc_enabled())) {
1048 zone_require_panic(zone
, addr
);
1052 zone_id_require(zone_id_t zid
, vm_size_t esize
, void *addr
)
1054 if (__probable(from_general_submap(addr
, esize
) &&
1055 (zid
== zone_native_meta_from_addr(addr
)->zm_index
))) {
1059 if (__probable(gzalloc_enabled())) {
1063 zone_id_require_panic(zid
, addr
);
1067 zone_owns(zone_t zone
, void *addr
)
1069 if (__probable(from_general_submap(addr
, zone_elem_size(zone
)) &&
1070 (zone_has_index(zone
, zone_native_meta_from_addr(addr
)->zm_index
)))) {
1074 if (__probable(gzalloc_enabled())) {
1082 #if VM_MAX_TAG_ZONES
1084 // for zones with tagging enabled:
1086 // calculate a pointer to the tag base entry,
1087 // holding either a uint32_t the first tag offset for a page in the zone map,
1088 // or two uint16_t tags if the page can only hold one or two elements
1090 #define ZTAGBASE(zone, element) \
1091 (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_info.zi_map_range.min_address)])
1093 // pointer to the tag for an element
1094 #define ZTAG(zone, element) \
1096 vm_tag_t * result; \
1097 if ((zone)->tags_inline) { \
1098 result = (vm_tag_t *) ZTAGBASE((zone), (element)); \
1099 if ((page_mask & element) >= zone_elem_size(zone)) result++; \
1101 result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / zone_elem_size((zone))]; \
1107 static vm_offset_t zone_tagbase_min
;
1108 static vm_offset_t zone_tagbase_max
;
1109 static vm_offset_t zone_tagbase_map_size
;
1110 static vm_map_t zone_tagbase_map
;
1112 static vm_offset_t zone_tags_min
;
1113 static vm_offset_t zone_tags_max
;
1114 static vm_offset_t zone_tags_map_size
;
1115 static vm_map_t zone_tags_map
;
1117 // simple heap allocator for allocating the tags for new memory
1119 LCK_MTX_EARLY_DECLARE(ztLock
, &zone_locks_grp
); /* heap lock */
1122 ztFreeIndexCount
= 8,
1123 ztFreeIndexMax
= (ztFreeIndexCount
- 1),
1128 #if __LITTLE_ENDIAN__
1134 // ztBlock needs free bit least significant
1135 #error !__LITTLE_ENDIAN__
1138 typedef struct ztBlock ztBlock
;
1140 static ztBlock
* ztBlocks
;
1141 static uint32_t ztBlocksCount
;
1142 static uint32_t ztBlocksFree
;
1145 ztLog2up(uint32_t size
)
1150 size
= 32 - __builtin_clz(size
- 1);
1156 ztLog2down(uint32_t size
)
1158 size
= 31 - __builtin_clz(size
);
1163 ztFault(vm_map_t map
, const void * address
, size_t size
, uint32_t flags
)
1165 vm_map_offset_t addr
= (vm_map_offset_t
) address
;
1166 vm_map_offset_t page
, end
;
1168 page
= trunc_page(addr
);
1169 end
= round_page(addr
+ size
);
1171 for (; page
< end
; page
+= page_size
) {
1172 if (!pmap_find_phys(kernel_pmap
, page
)) {
1173 kern_return_t __unused
1174 ret
= kernel_memory_populate(map
, page
, PAGE_SIZE
,
1175 KMA_KOBJECT
| flags
, VM_KERN_MEMORY_DIAG
);
1176 assert(ret
== KERN_SUCCESS
);
1182 ztPresent(const void * address
, size_t size
)
1184 vm_map_offset_t addr
= (vm_map_offset_t
) address
;
1185 vm_map_offset_t page
, end
;
1188 page
= trunc_page(addr
);
1189 end
= round_page(addr
+ size
);
1190 for (result
= TRUE
; (page
< end
); page
+= page_size
) {
1191 result
= pmap_find_phys(kernel_pmap
, page
);
1201 ztDump(boolean_t sanity
);
1203 ztDump(boolean_t sanity
)
1207 for (q
= 0; q
<= ztFreeIndexMax
; q
++) {
1211 cq
= ztLog2down(ztBlocks
[p
].size
);
1212 if (cq
> ztFreeIndexMax
) {
1213 cq
= ztFreeIndexMax
;
1215 if (!ztBlocks
[p
].free
1216 || ((p
!= q
) && (q
!= cq
))
1217 || (ztBlocks
[ztBlocks
[p
].next
].prev
!= p
)
1218 || (ztBlocks
[ztBlocks
[p
].prev
].next
!= p
)) {
1219 kprintf("zterror at %d", p
);
1221 kprintf("zterror at %d", p
);
1226 kprintf("zt[%03d]%c %d, %d, %d\n",
1227 p
, ztBlocks
[p
].free
? 'F' : 'A',
1228 ztBlocks
[p
].next
, ztBlocks
[p
].prev
,
1230 p
= ztBlocks
[p
].next
;
1240 printf("-----------------------\n");
1246 #define ZTBDEQ(idx) \
1247 ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
1248 ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
1251 ztFree(zone_t zone __unused
, uint32_t index
, uint32_t count
)
1253 uint32_t q
, w
, p
, size
, merge
;
1256 ztBlocksFree
+= count
;
1258 // merge with preceding
1259 merge
= (index
+ count
);
1260 if ((merge
< ztBlocksCount
)
1261 && ztPresent(&ztBlocks
[merge
], sizeof(ztBlocks
[merge
]))
1262 && ztBlocks
[merge
].free
) {
1264 count
+= ztBlocks
[merge
].size
;
1267 // merge with following
1268 merge
= (index
- 1);
1269 if ((merge
> ztFreeIndexMax
)
1270 && ztPresent(&ztBlocks
[merge
], sizeof(ztBlocks
[merge
]))
1271 && ztBlocks
[merge
].free
) {
1272 size
= ztBlocks
[merge
].size
;
1278 q
= ztLog2down(count
);
1279 if (q
> ztFreeIndexMax
) {
1283 // queue in order of size
1285 p
= ztBlocks
[w
].next
;
1289 if (ztBlocks
[p
].size
>= count
) {
1294 ztBlocks
[p
].prev
= index
;
1295 ztBlocks
[w
].next
= index
;
1298 ztFault(zone_tags_map
, &ztBlocks
[index
], sizeof(ztBlocks
[index
]), 0);
1300 // mark first & last with free flag and size
1301 ztBlocks
[index
].free
= TRUE
;
1302 ztBlocks
[index
].size
= count
;
1303 ztBlocks
[index
].prev
= w
;
1304 ztBlocks
[index
].next
= p
;
1306 index
+= (count
- 1);
1308 ztFault(zone_tags_map
, &ztBlocks
[index
], sizeof(ztBlocks
[index
]), 0);
1309 ztBlocks
[index
].free
= TRUE
;
1310 ztBlocks
[index
].size
= count
;
1315 ztAlloc(zone_t zone
, uint32_t count
)
1317 uint32_t q
, w
, p
, leftover
;
1321 q
= ztLog2up(count
);
1322 if (q
> ztFreeIndexMax
) {
1328 p
= ztBlocks
[w
].next
;
1332 if (ztBlocks
[p
].size
>= count
) {
1333 // dequeue, mark both ends allocated
1334 ztBlocks
[w
].next
= ztBlocks
[p
].next
;
1335 ztBlocks
[ztBlocks
[p
].next
].prev
= w
;
1336 ztBlocks
[p
].free
= FALSE
;
1337 ztBlocksFree
-= ztBlocks
[p
].size
;
1338 if (ztBlocks
[p
].size
> 1) {
1339 ztBlocks
[p
+ ztBlocks
[p
].size
- 1].free
= FALSE
;
1342 // fault all the allocation
1343 ztFault(zone_tags_map
, &ztBlocks
[p
], count
* sizeof(ztBlocks
[p
]), 0);
1344 // mark last as allocated
1346 ztBlocks
[p
+ count
- 1].free
= FALSE
;
1349 leftover
= ztBlocks
[p
].size
- count
;
1351 ztFree(zone
, p
+ ztBlocks
[p
].size
- leftover
, leftover
);
1359 }while (q
<= ztFreeIndexMax
);
1366 zone_tagging_init(vm_size_t max_zonemap_size
)
1369 vm_map_kernel_flags_t vmk_flags
;
1372 // allocate submaps VM_KERN_MEMORY_DIAG
1374 zone_tagbase_map_size
= atop(max_zonemap_size
) * sizeof(uint32_t);
1375 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
1376 vmk_flags
.vmkf_permanent
= TRUE
;
1377 ret
= kmem_suballoc(kernel_map
, &zone_tagbase_min
, zone_tagbase_map_size
,
1378 FALSE
, VM_FLAGS_ANYWHERE
, vmk_flags
, VM_KERN_MEMORY_DIAG
,
1381 if (ret
!= KERN_SUCCESS
) {
1382 panic("zone_init: kmem_suballoc failed");
1384 zone_tagbase_max
= zone_tagbase_min
+ round_page(zone_tagbase_map_size
);
1386 zone_tags_map_size
= 2048 * 1024 * sizeof(vm_tag_t
);
1387 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
1388 vmk_flags
.vmkf_permanent
= TRUE
;
1389 ret
= kmem_suballoc(kernel_map
, &zone_tags_min
, zone_tags_map_size
,
1390 FALSE
, VM_FLAGS_ANYWHERE
, vmk_flags
, VM_KERN_MEMORY_DIAG
,
1393 if (ret
!= KERN_SUCCESS
) {
1394 panic("zone_init: kmem_suballoc failed");
1396 zone_tags_max
= zone_tags_min
+ round_page(zone_tags_map_size
);
1398 ztBlocks
= (ztBlock
*) zone_tags_min
;
1399 ztBlocksCount
= (uint32_t)(zone_tags_map_size
/ sizeof(ztBlock
));
1401 // initialize the qheads
1402 lck_mtx_lock(&ztLock
);
1404 ztFault(zone_tags_map
, &ztBlocks
[0], sizeof(ztBlocks
[0]), 0);
1405 for (idx
= 0; idx
< ztFreeIndexCount
; idx
++) {
1406 ztBlocks
[idx
].free
= TRUE
;
1407 ztBlocks
[idx
].next
= idx
;
1408 ztBlocks
[idx
].prev
= idx
;
1409 ztBlocks
[idx
].size
= 0;
1411 // free remaining space
1412 ztFree(NULL
, ztFreeIndexCount
, ztBlocksCount
- ztFreeIndexCount
);
1414 lck_mtx_unlock(&ztLock
);
1418 ztMemoryAdd(zone_t zone
, vm_offset_t mem
, vm_size_t size
)
1421 uint32_t count
, block
, blocks
, idx
;
1425 tagbase
= ZTAGBASE(zone
, mem
);
1427 lck_mtx_lock(&ztLock
);
1430 ztFault(zone_tagbase_map
, tagbase
, pages
* sizeof(uint32_t), 0);
1432 if (!zone
->tags_inline
) {
1434 count
= (uint32_t)(size
/ zone_elem_size(zone
));
1435 blocks
= ((count
+ ztTagsPerBlock
- 1) / ztTagsPerBlock
);
1436 block
= ztAlloc(zone
, blocks
);
1440 assert(-1U != block
);
1443 lck_mtx_unlock(&ztLock
);
1445 if (!zone
->tags_inline
) {
1446 // set tag base for each page
1447 block
*= ztTagsPerBlock
;
1448 for (idx
= 0; idx
< pages
; idx
++) {
1449 vm_offset_t esize
= zone_elem_size(zone
);
1450 tagbase
[idx
] = block
+ (uint32_t)((ptoa(idx
) + esize
- 1) / esize
);
1456 ztMemoryRemove(zone_t zone
, vm_offset_t mem
, vm_size_t size
)
1459 uint32_t count
, block
, blocks
, idx
;
1462 // set tag base for each page
1464 tagbase
= ZTAGBASE(zone
, mem
);
1466 for (idx
= 0; idx
< pages
; idx
++) {
1467 tagbase
[idx
] = 0xFFFFFFFF;
1470 lck_mtx_lock(&ztLock
);
1471 if (!zone
->tags_inline
) {
1472 count
= (uint32_t)(size
/ zone_elem_size(zone
));
1473 blocks
= ((count
+ ztTagsPerBlock
- 1) / ztTagsPerBlock
);
1474 assert(block
!= 0xFFFFFFFF);
1475 block
/= ztTagsPerBlock
;
1476 ztFree(NULL
/* zone is unlocked */, block
, blocks
);
1479 lck_mtx_unlock(&ztLock
);
1483 zone_index_from_tag_index(uint32_t tag_zone_index
, vm_size_t
* elem_size
)
1485 simple_lock(&all_zones_lock
, &zone_locks_grp
);
1487 zone_index_foreach(idx
) {
1488 zone_t z
= &zone_array
[idx
];
1492 if (tag_zone_index
!= z
->tag_zone_index
) {
1496 *elem_size
= zone_elem_size(z
);
1497 simple_unlock(&all_zones_lock
);
1501 simple_unlock(&all_zones_lock
);
1506 #endif /* VM_MAX_TAG_ZONES */
1507 #pragma mark zalloc helpers
1516 zone_heap_name(zone_t z
)
1518 if (__probable(z
->kalloc_heap
< KHEAP_ID_COUNT
)) {
1519 return kalloc_heap_names
[z
->kalloc_heap
];
1524 static inline vm_size_t
1525 zone_submaps_approx_size(void)
1529 for (unsigned idx
= 0; idx
<= zone_last_submap_idx
; idx
++) {
1530 size
+= zone_submaps
[idx
]->size
;
1537 zone_maps_owned(vm_address_t addr
, vm_size_t size
)
1539 return from_zone_map(addr
, size
);
1544 vm_map_size_t
*psize
,
1545 vm_map_size_t
*pfree
,
1546 vm_map_size_t
*plargest_free
)
1548 vm_map_sizes(zone_submaps
[Z_SUBMAP_IDX_GENERAL_MAP
], psize
, pfree
, plargest_free
);
1552 zone_submap(zone_t zone
)
1554 return submap_for_zone(zone
);
1560 return zpercpu_early_count
;
1564 track_this_zone(const char *zonename
, const char *logname
)
1567 const char *zc
= zonename
;
1568 const char *lc
= logname
;
1571 * Compare the strings. We bound the compare by MAX_ZONE_NAME.
1574 for (len
= 1; len
<= MAX_ZONE_NAME
; zc
++, lc
++, len
++) {
1576 * If the current characters don't match, check for a space in
1577 * in the zone name and a corresponding period in the log name.
1578 * If that's not there, then the strings don't match.
1581 if (*zc
!= *lc
&& !(*zc
== ' ' && *lc
== '.')) {
1586 * The strings are equal so far. If we're at the end, then it's a match.
1597 #if DEBUG || DEVELOPMENT
1600 zone_element_info(void *addr
, vm_tag_t
* ptag
)
1603 vm_tag_t tag
= VM_KERN_MEMORY_NONE
;
1604 struct zone_page_metadata
*meta
;
1605 struct zone
*src_zone
;
1607 if (from_zone_map(addr
, sizeof(void *))) {
1608 meta
= zone_native_meta_from_addr(addr
);
1609 src_zone
= &zone_array
[meta
->zm_index
];
1610 #if VM_MAX_TAG_ZONES
1611 if (__improbable(src_zone
->tags
)) {
1612 tag
= (ZTAG(src_zone
, (vm_offset_t
) addr
)[0] >> 1);
1614 #endif /* VM_MAX_TAG_ZONES */
1615 size
= zone_elem_size(src_zone
);
1618 gzalloc_element_size(addr
, NULL
, &size
);
1619 #endif /* CONFIG_GZALLOC */
1625 #endif /* DEBUG || DEVELOPMENT */
1627 /* Someone wrote to freed memory. */
1630 zone_element_was_modified_panic(
1632 vm_offset_t element
,
1634 vm_offset_t expected
,
1637 panic("a freed zone element has been modified in zone %s%s: "
1638 "expected %p but found %p, bits changed %p, "
1639 "at offset %d of %d in element %p, cookies %p %p",
1640 zone_heap_name(zone
),
1644 (void *) (expected
^ found
),
1646 (uint32_t) zone_elem_size(zone
),
1648 (void *) zp_nopoison_cookie
,
1649 (void *) zp_poisoned_cookie
);
1652 /* The backup pointer is stored in the last pointer-sized location in an element. */
1653 __header_always_inline vm_offset_t
*
1654 get_backup_ptr(vm_size_t elem_size
, vm_offset_t
*element
)
1656 return (vm_offset_t
*)((vm_offset_t
)element
+ elem_size
- sizeof(vm_offset_t
));
1660 * The primary and backup pointers don't match.
1661 * Determine which one was likely the corrupted pointer, find out what it
1662 * probably should have been, and panic.
1666 backup_ptr_mismatch_panic(
1668 struct zone_page_metadata
*page_meta
,
1670 vm_offset_t element
)
1672 vm_offset_t primary
= *(vm_offset_t
*)element
;
1673 vm_offset_t backup
= *get_backup_ptr(zone_elem_size(zone
), &element
);
1674 vm_offset_t likely_backup
;
1675 vm_offset_t likely_primary
;
1676 zone_addr_kind_t kind
= zone_addr_kind(page
, zone_elem_size(zone
));
1678 likely_primary
= primary
^ zp_nopoison_cookie
;
1679 boolean_t sane_backup
;
1680 boolean_t sane_primary
= zone_page_meta_is_sane_element(zone
, page_meta
,
1681 page
, likely_primary
, kind
);
1682 boolean_t element_was_poisoned
= (backup
& 0x1);
1684 #if defined(__LP64__)
1685 /* We can inspect the tag in the upper bits for additional confirmation */
1686 if ((backup
& 0xFFFFFF0000000000) == 0xFACADE0000000000) {
1687 element_was_poisoned
= TRUE
;
1688 } else if ((backup
& 0xFFFFFF0000000000) == 0xC0FFEE0000000000) {
1689 element_was_poisoned
= FALSE
;
1693 if (element_was_poisoned
) {
1694 likely_backup
= backup
^ zp_poisoned_cookie
;
1696 likely_backup
= backup
^ zp_nopoison_cookie
;
1698 sane_backup
= zone_page_meta_is_sane_element(zone
, page_meta
,
1699 page
, likely_backup
, kind
);
1701 /* The primary is definitely the corrupted one */
1702 if (!sane_primary
&& sane_backup
) {
1703 zone_element_was_modified_panic(zone
, element
, primary
, (likely_backup
^ zp_nopoison_cookie
), 0);
1706 /* The backup is definitely the corrupted one */
1707 if (sane_primary
&& !sane_backup
) {
1708 zone_element_was_modified_panic(zone
, element
, backup
,
1709 (likely_primary
^ (element_was_poisoned
? zp_poisoned_cookie
: zp_nopoison_cookie
)),
1710 zone_elem_size(zone
) - sizeof(vm_offset_t
));
1714 * Not sure which is the corrupted one.
1715 * It's less likely that the backup pointer was overwritten with
1716 * ( (sane address) ^ (valid cookie) ), so we'll guess that the
1717 * primary pointer has been overwritten with a sane but incorrect address.
1719 if (sane_primary
&& sane_backup
) {
1720 zone_element_was_modified_panic(zone
, element
, primary
, (likely_backup
^ zp_nopoison_cookie
), 0);
1723 /* Neither are sane, so just guess. */
1724 zone_element_was_modified_panic(zone
, element
, primary
, (likely_backup
^ zp_nopoison_cookie
), 0);
1728 * zone_sequestered_page_get
1731 static struct zone_page_metadata
*
1732 zone_sequestered_page_get(zone_t z
, vm_offset_t
*page
)
1734 const zone_addr_kind_t kind
= ZONE_ADDR_NATIVE
;
1736 if (!zone_pva_is_null(z
->pages_sequester
)) {
1737 if (os_sub_overflow(z
->sequester_page_count
, z
->alloc_pages
,
1738 &z
->sequester_page_count
)) {
1739 zone_accounting_panic(z
, "sequester_page_count wrap-around");
1741 return zone_meta_queue_pop(z
, &z
->pages_sequester
, kind
, page
);
1748 * zone_sequestered_page_populate
1750 * page_meta is invalid on failure
1752 static kern_return_t
1753 zone_sequestered_page_populate(zone_t z
, struct zone_page_metadata
*page_meta
,
1754 vm_offset_t space
, vm_size_t alloc_size
, int zflags
)
1756 kern_return_t retval
;
1758 assert(alloc_size
== ptoa(z
->alloc_pages
));
1759 retval
= kernel_memory_populate(submap_for_zone(z
), space
, alloc_size
,
1760 zflags
, VM_KERN_MEMORY_ZONE
);
1761 if (retval
!= KERN_SUCCESS
) {
1763 zone_meta_queue_push(z
, &z
->pages_sequester
, page_meta
, ZONE_ADDR_NATIVE
);
1764 z
->sequester_page_count
+= z
->alloc_pages
;
1770 #pragma mark Zone poisoning/zeroing
1773 * Initialize zone poisoning
1774 * called from zone_bootstrap before any allocations are made from zalloc
1783 * Initialize backup pointer random cookie for poisoned elements
1784 * Try not to call early_random() back to back, it may return
1785 * the same value if mach_absolute_time doesn't have sufficient time
1786 * to tick over between calls. <rdar://problem/11597395>
1787 * (This is only a problem on embedded devices)
1789 zp_poisoned_cookie
= (uintptr_t) early_random();
1791 /* -zp: enable poisoning for every alloc and free */
1792 if (PE_parse_boot_argn("-zp", temp_buf
, sizeof(temp_buf
))) {
1796 /* -no-zp: disable poisoning */
1797 if (PE_parse_boot_argn("-no-zp", temp_buf
, sizeof(temp_buf
))) {
1799 printf("Zone poisoning disabled\n");
1802 /* Initialize backup pointer random cookie for unpoisoned elements */
1803 zp_nopoison_cookie
= (uintptr_t) early_random();
1806 if (zp_poisoned_cookie
== zp_nopoison_cookie
) {
1807 panic("early_random() is broken: %p and %p are not random\n",
1808 (void *) zp_poisoned_cookie
, (void *) zp_nopoison_cookie
);
1813 * Use the last bit in the backup pointer to hint poisoning state
1814 * to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so
1815 * the low bits are zero.
1817 zp_poisoned_cookie
|= (uintptr_t)0x1ULL
;
1818 zp_nopoison_cookie
&= ~((uintptr_t)0x1ULL
);
1820 #if defined(__LP64__)
1822 * Make backup pointers more obvious in GDB for 64 bit
1823 * by making OxFFFFFF... ^ cookie = 0xFACADE...
1824 * (0xFACADE = 0xFFFFFF ^ 0x053521)
1825 * (0xC0FFEE = 0xFFFFFF ^ 0x3f0011)
1826 * The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked
1827 * by the sanity check, so it's OK for that part of the cookie to be predictable.
1829 * TODO: Use #defines, xors, and shifts
1832 zp_poisoned_cookie
&= 0x000000FFFFFFFFFF;
1833 zp_poisoned_cookie
|= 0x0535210000000000; /* 0xFACADE */
1835 zp_nopoison_cookie
&= 0x000000FFFFFFFFFF;
1836 zp_nopoison_cookie
|= 0x3f00110000000000; /* 0xC0FFEE */
1840 * Initialize zp_min_size to two cachelines. Elements smaller than this will
1843 ml_cpu_info_t cpu_info
;
1844 ml_cpu_get_info(&cpu_info
);
1845 zp_min_size
= 2 * cpu_info
.cache_line_size
;
1849 zone_poison_count_init(zone_t zone
)
1851 return zp_factor
+ (((uint32_t)zone_elem_size(zone
)) >> zp_scale
) ^
1852 (mach_absolute_time() & 0x7);
1855 #if ZALLOC_ENABLE_POISONING
1857 zfree_poison_element(zone_t zone
, uint32_t *zp_count
, vm_offset_t elem
)
1859 bool poison
= false;
1860 uint32_t zp_count_local
;
1862 assert(!zone
->percpu
);
1863 if (zp_factor
!= 0) {
1865 * Poison the memory of every zp_count-th element before it ends up
1866 * on the freelist to catch use-after-free and use of uninitialized
1869 * Every element is poisoned when zp_factor is set to 1.
1872 zp_count_local
= os_atomic_load(zp_count
, relaxed
);
1873 if (__improbable(zp_count_local
== 0 || zp_factor
== 1)) {
1876 os_atomic_store(zp_count
, zone_poison_count_init(zone
), relaxed
);
1878 /* memset_pattern{4|8} could help make this faster: <rdar://problem/4662004> */
1879 vm_offset_t
*element_cursor
= ((vm_offset_t
*) elem
);
1880 vm_offset_t
*end_cursor
= (vm_offset_t
*)(elem
+ zone_elem_size(zone
));
1882 for (; element_cursor
< end_cursor
; element_cursor
++) {
1883 *element_cursor
= ZONE_POISON
;
1886 os_atomic_store(zp_count
, zp_count_local
- 1, relaxed
);
1888 * Zero first zp_min_size bytes of elements that aren't being poisoned.
1889 * Element size is larger than zp_min_size in this path as elements
1890 * that are smaller will always be zero-ed.
1892 bzero((void *) elem
, zp_min_size
);
1899 zfree_poison_element(zone_t zone
, uint32_t *zp_count
, vm_offset_t elem
)
1901 #pragma unused(zone, zp_count, elem)
1902 assert(!zone
->percpu
);
1907 __attribute__((always_inline
))
1909 zfree_clear(zone_t zone
, vm_offset_t addr
, vm_size_t elem_size
)
1911 assert(zone
->zfree_clear_mem
);
1913 zpercpu_foreach_cpu(i
) {
1914 bzero((void *)(addr
+ ptoa(i
)), elem_size
);
1917 bzero((void *)addr
, elem_size
);
1924 * Zero the element if zone has zfree_clear_mem flag set else poison
1925 * the element if zp_count hits 0.
1927 __attribute__((always_inline
))
1929 zfree_clear_or_poison(zone_t zone
, uint32_t *zp_count
, vm_offset_t addr
)
1931 vm_size_t elem_size
= zone_elem_size(zone
);
1933 if (zone
->zfree_clear_mem
) {
1934 return zfree_clear(zone
, addr
, elem_size
);
1937 return zfree_poison_element(zone
, zp_count
, (vm_offset_t
)addr
);
1941 * Clear out the old next pointer and backup to avoid leaking the zone
1942 * poisoning cookie and so that only values on the freelist have a valid
1946 zone_clear_freelist_pointers(zone_t zone
, vm_offset_t addr
)
1948 vm_offset_t perm_value
= 0;
1950 if (!zone
->zfree_clear_mem
) {
1951 perm_value
= ZONE_POISON
;
1954 vm_offset_t
*primary
= (vm_offset_t
*) addr
;
1955 vm_offset_t
*backup
= get_backup_ptr(zone_elem_size(zone
), primary
);
1957 *primary
= perm_value
;
1958 *backup
= perm_value
;
1961 #if ZALLOC_ENABLE_POISONING
1964 zone_element_not_clear_panic(zone_t zone
, void *addr
)
1966 panic("Zone element %p was modified after free for zone %s%s: "
1967 "Expected element to be cleared", addr
, zone_heap_name(zone
),
1972 * Validate that the element was not tampered with while it was in the
1976 zalloc_validate_element(zone_t zone
, vm_offset_t addr
, vm_size_t size
, bool validate
)
1979 assert(zone
->zfree_clear_mem
);
1980 zpercpu_foreach_cpu(i
) {
1981 if (memcmp_zero_ptr_aligned((void *)(addr
+ ptoa(i
)), size
)) {
1982 zone_element_not_clear_panic(zone
, (void *)(addr
+ ptoa(i
)));
1985 } else if (zone
->zfree_clear_mem
) {
1986 if (memcmp_zero_ptr_aligned((void *)addr
, size
)) {
1987 zone_element_not_clear_panic(zone
, (void *)addr
);
1989 } else if (__improbable(validate
)) {
1990 const vm_offset_t
*p
= (vm_offset_t
*)addr
;
1991 const vm_offset_t
*end
= (vm_offset_t
*)(addr
+ size
);
1993 for (; p
< end
; p
++) {
1994 if (*p
!= ZONE_POISON
) {
1995 zone_element_was_modified_panic(zone
, addr
,
1996 *p
, ZONE_POISON
, (vm_offset_t
)p
- addr
);
2001 * If element wasn't poisoned or entirely cleared, validate that the
2002 * minimum bytes that were cleared on free haven't been corrupted.
2003 * addr is advanced by ptr size as we have already validated and cleared
2004 * the freelist pointer/zcache canary.
2006 if (memcmp_zero_ptr_aligned((void *) (addr
+ sizeof(vm_offset_t
)),
2007 zp_min_size
- sizeof(vm_offset_t
))) {
2008 zone_element_not_clear_panic(zone
, (void *)addr
);
2012 #endif /* ZALLOC_ENABLE_POISONING */
2014 #pragma mark Zone Leak Detection
2017 * Zone leak debugging code
2019 * When enabled, this code keeps a log to track allocations to a particular zone that have not
2020 * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
2021 * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
2024 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
2025 * is the name of the zone you wish to log.
2027 * This code only tracks one zone, so you need to identify which one is leaking first.
2028 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
2029 * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
2030 * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
2031 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
2032 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
2033 * See the help in the kgmacros for usage info.
2036 * Zone corruption logging
2038 * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
2039 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
2040 * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
2041 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
2042 * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
2043 * corrupted to examine its history. This should lead to the source of the corruption.
2046 /* Returns TRUE if we rolled over the counter at factor */
2047 __header_always_inline
bool
2048 sample_counter(volatile uint32_t *count_p
, uint32_t factor
)
2050 uint32_t old_count
, new_count
= 0;
2051 if (count_p
!= NULL
) {
2052 os_atomic_rmw_loop(count_p
, old_count
, new_count
, relaxed
, {
2053 new_count
= old_count
+ 1;
2054 if (new_count
>= factor
) {
2060 return new_count
== 0;
2063 #if ZONE_ENABLE_LOGGING
2064 /* Log allocations and frees to help debug a zone element corruption */
2065 TUNABLE(bool, corruption_debug_flag
, "-zc", false);
2067 #define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
2069 static int max_num_zones_to_log
= MAX_NUM_ZONES_ALLOWED_LOGGING
;
2070 static int num_zones_logged
= 0;
2073 * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
2074 * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
2075 * is the number of stacks suspected of leaking, we don't need many records.
2078 #if defined(__LP64__)
2079 #define ZRECORDS_MAX 2560 /* Max records allowed in the log */
2081 #define ZRECORDS_MAX 1536 /* Max records allowed in the log */
2083 #define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
2085 static TUNABLE(uint32_t, log_records
, "zrecs", ZRECORDS_DEFAULT
);
2088 zone_enable_logging(zone_t z
)
2090 z
->zlog_btlog
= btlog_create(log_records
, MAX_ZTRACE_DEPTH
,
2091 (corruption_debug_flag
== FALSE
) /* caller_will_remove_entries_for_element? */);
2093 if (z
->zlog_btlog
) {
2094 printf("zone: logging started for zone %s%s\n",
2095 zone_heap_name(z
), z
->z_name
);
2097 printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
2098 z
->zone_logging
= false;
2103 * @function zone_setup_logging
2106 * Optionally sets up a zone for logging.
2109 * We recognized two boot-args:
2111 * zlog=<zone_to_log>
2112 * zrecs=<num_records_in_log>
2114 * The zlog arg is used to specify the zone name that should be logged,
2115 * and zrecs is used to control the size of the log.
2117 * If zrecs is not specified, a default value is used.
2120 zone_setup_logging(zone_t z
)
2122 char zone_name
[MAX_ZONE_NAME
]; /* Temp. buffer for the zone name */
2123 char zlog_name
[MAX_ZONE_NAME
]; /* Temp. buffer to create the strings zlog1, zlog2 etc... */
2124 char zlog_val
[MAX_ZONE_NAME
]; /* the zone name we're logging, if any */
2127 * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
2129 * This prevents accidentally hogging too much kernel memory
2130 * and making the system unusable.
2132 if (log_records
> ZRECORDS_MAX
) {
2133 log_records
= ZRECORDS_MAX
;
2137 * Append kalloc heap name to zone name (if zone is used by kalloc)
2139 snprintf(zone_name
, MAX_ZONE_NAME
, "%s%s", zone_heap_name(z
), z
->z_name
);
2141 /* zlog0 isn't allowed. */
2142 for (int i
= 1; i
<= max_num_zones_to_log
; i
++) {
2143 snprintf(zlog_name
, MAX_ZONE_NAME
, "zlog%d", i
);
2145 if (PE_parse_boot_argn(zlog_name
, zlog_val
, sizeof(zlog_val
)) &&
2146 track_this_zone(zone_name
, zlog_val
)) {
2147 z
->zone_logging
= true;
2154 * Backwards compat. with the old boot-arg used to specify single zone
2155 * logging i.e. zlog Needs to happen after the newer zlogn checks
2156 * because the prefix will match all the zlogn
2159 if (!z
->zone_logging
&&
2160 PE_parse_boot_argn("zlog", zlog_val
, sizeof(zlog_val
)) &&
2161 track_this_zone(zone_name
, zlog_val
)) {
2162 z
->zone_logging
= true;
2168 * If we want to log a zone, see if we need to allocate buffer space for
2171 * Some vm related zones are zinit'ed before we can do a kmem_alloc, so
2172 * we have to defer allocation in that case.
2174 * zone_init() will finish the job.
2176 * If we want to log one of the VM related zones that's set up early on,
2177 * we will skip allocation of the log until zinit is called again later
2178 * on some other zone.
2180 if (z
->zone_logging
&& startup_phase
>= STARTUP_SUB_KMEM_ALLOC
) {
2181 zone_enable_logging(z
);
2186 * Each record in the log contains a pointer to the zone element it refers to,
2187 * and a small array to hold the pc's from the stack trace. A
2188 * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
2189 * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
2190 * If the log fills, old records are replaced as if it were a circular buffer.
2195 * Decide if we want to log this zone by doing a string compare between a zone name and the name
2196 * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
2197 * possible to include spaces in strings passed in via the boot-args, a period in the logname will
2198 * match a space in the zone name.
2202 * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
2203 * the buffer for the records has been allocated.
2206 #define DO_LOGGING(z) (z->zlog_btlog != NULL)
2207 #else /* !ZONE_ENABLE_LOGGING */
2208 #define DO_LOGGING(z) 0
2209 #endif /* !ZONE_ENABLE_LOGGING */
2214 * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
2215 * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
2216 * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
2217 * and stop tracking it if it was being tracked.
2219 * We track the allocations in the zallocations hash table, which stores the address that was returned from
2220 * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
2221 * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
2222 * backtraces - we don't store them more than once.
2224 * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
2225 * a large amount of virtual space.
2227 #define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
2228 #define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
2229 #define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
2230 #define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
2231 uint32_t zleak_state
= 0; /* State of collection, as above */
2233 boolean_t panic_include_ztrace
= FALSE
; /* Enable zleak logging on panic */
2234 vm_size_t zleak_global_tracking_threshold
; /* Size of zone map at which to start collecting data */
2235 vm_size_t zleak_per_zone_tracking_threshold
; /* Size a zone will have before we will collect data on it */
2236 unsigned int zleak_sample_factor
= 1000; /* Allocations per sample attempt */
2239 * Counters for allocation statistics.
2242 /* Times two active records want to occupy the same spot */
2243 unsigned int z_alloc_collisions
= 0;
2244 unsigned int z_trace_collisions
= 0;
2246 /* Times a new record lands on a spot previously occupied by a freed allocation */
2247 unsigned int z_alloc_overwrites
= 0;
2248 unsigned int z_trace_overwrites
= 0;
2250 /* Times a new alloc or trace is put into the hash table */
2251 unsigned int z_alloc_recorded
= 0;
2252 unsigned int z_trace_recorded
= 0;
2254 /* Times zleak_log returned false due to not being able to acquire the lock */
2255 unsigned int z_total_conflicts
= 0;
2258 * Structure for keeping track of an allocation
2259 * An allocation bucket is in use if its element is not NULL
2261 struct zallocation
{
2262 uintptr_t za_element
; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
2263 vm_size_t za_size
; /* how much memory did this allocation take up? */
2264 uint32_t za_trace_index
; /* index into ztraces for backtrace associated with allocation */
2265 /* TODO: #if this out */
2266 uint32_t za_hit_count
; /* for determining effectiveness of hash function */
2269 /* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
2270 uint32_t zleak_alloc_buckets
= CONFIG_ZLEAK_ALLOCATION_MAP_NUM
;
2271 uint32_t zleak_trace_buckets
= CONFIG_ZLEAK_TRACE_MAP_NUM
;
2273 vm_size_t zleak_max_zonemap_size
;
2275 /* Hashmaps of allocations and their corresponding traces */
2276 static struct zallocation
* zallocations
;
2277 static struct ztrace
* ztraces
;
2279 /* not static so that panic can see this, see kern/debug.c */
2280 struct ztrace
* top_ztrace
;
2282 /* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
2283 LCK_GRP_DECLARE(zleak_lock_grp
, "zleak_lock");
2284 LCK_SPIN_DECLARE(zleak_lock
, &zleak_lock_grp
);
2287 * Initializes the zone leak monitor. Called from zone_init()
2291 zleak_init(vm_size_t max_zonemap_size
)
2293 char scratch_buf
[16];
2294 boolean_t zleak_enable_flag
= FALSE
;
2296 zleak_max_zonemap_size
= max_zonemap_size
;
2297 zleak_global_tracking_threshold
= max_zonemap_size
/ 2;
2298 zleak_per_zone_tracking_threshold
= zleak_global_tracking_threshold
/ 8;
2301 if (PE_parse_boot_argn("-zleakon", scratch_buf
, sizeof(scratch_buf
))) {
2302 zleak_enable_flag
= TRUE
;
2303 printf("zone leak detection enabled\n");
2305 zleak_enable_flag
= FALSE
;
2306 printf("zone leak detection disabled\n");
2308 #else /* CONFIG_EMBEDDED */
2309 /* -zleakoff (flag to disable zone leak monitor) */
2310 if (PE_parse_boot_argn("-zleakoff", scratch_buf
, sizeof(scratch_buf
))) {
2311 zleak_enable_flag
= FALSE
;
2312 printf("zone leak detection disabled\n");
2314 zleak_enable_flag
= TRUE
;
2315 printf("zone leak detection enabled\n");
2317 #endif /* CONFIG_EMBEDDED */
2319 /* zfactor=XXXX (override how often to sample the zone allocator) */
2320 if (PE_parse_boot_argn("zfactor", &zleak_sample_factor
, sizeof(zleak_sample_factor
))) {
2321 printf("Zone leak factor override: %u\n", zleak_sample_factor
);
2324 /* zleak-allocs=XXXX (override number of buckets in zallocations) */
2325 if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets
, sizeof(zleak_alloc_buckets
))) {
2326 printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets
);
2327 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
2328 if (zleak_alloc_buckets
== 0 || (zleak_alloc_buckets
& (zleak_alloc_buckets
- 1))) {
2329 printf("Override isn't a power of two, bad things might happen!\n");
2333 /* zleak-traces=XXXX (override number of buckets in ztraces) */
2334 if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets
, sizeof(zleak_trace_buckets
))) {
2335 printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets
);
2336 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
2337 if (zleak_trace_buckets
== 0 || (zleak_trace_buckets
& (zleak_trace_buckets
- 1))) {
2338 printf("Override isn't a power of two, bad things might happen!\n");
2342 if (zleak_enable_flag
) {
2343 zleak_state
= ZLEAK_STATE_ENABLED
;
2348 * Support for kern.zleak.active sysctl - a simplified
2349 * version of the zleak_state variable.
2352 get_zleak_state(void)
2354 if (zleak_state
& ZLEAK_STATE_FAILED
) {
2357 if (zleak_state
& ZLEAK_STATE_ACTIVE
) {
2364 zleak_activate(void)
2366 kern_return_t retval
;
2367 vm_size_t z_alloc_size
= zleak_alloc_buckets
* sizeof(struct zallocation
);
2368 vm_size_t z_trace_size
= zleak_trace_buckets
* sizeof(struct ztrace
);
2369 void *allocations_ptr
= NULL
;
2370 void *traces_ptr
= NULL
;
2372 /* Only one thread attempts to activate at a time */
2373 if (zleak_state
& (ZLEAK_STATE_ACTIVE
| ZLEAK_STATE_ACTIVATING
| ZLEAK_STATE_FAILED
)) {
2374 return KERN_SUCCESS
;
2377 /* Indicate that we're doing the setup */
2378 lck_spin_lock(&zleak_lock
);
2379 if (zleak_state
& (ZLEAK_STATE_ACTIVE
| ZLEAK_STATE_ACTIVATING
| ZLEAK_STATE_FAILED
)) {
2380 lck_spin_unlock(&zleak_lock
);
2381 return KERN_SUCCESS
;
2384 zleak_state
|= ZLEAK_STATE_ACTIVATING
;
2385 lck_spin_unlock(&zleak_lock
);
2387 /* Allocate and zero tables */
2388 retval
= kmem_alloc_kobject(kernel_map
, (vm_offset_t
*)&allocations_ptr
, z_alloc_size
, VM_KERN_MEMORY_OSFMK
);
2389 if (retval
!= KERN_SUCCESS
) {
2393 retval
= kmem_alloc_kobject(kernel_map
, (vm_offset_t
*)&traces_ptr
, z_trace_size
, VM_KERN_MEMORY_OSFMK
);
2394 if (retval
!= KERN_SUCCESS
) {
2398 bzero(allocations_ptr
, z_alloc_size
);
2399 bzero(traces_ptr
, z_trace_size
);
2401 /* Everything's set. Install tables, mark active. */
2402 zallocations
= allocations_ptr
;
2403 ztraces
= traces_ptr
;
2406 * Initialize the top_ztrace to the first entry in ztraces,
2407 * so we don't have to check for null in zleak_log
2409 top_ztrace
= &ztraces
[0];
2412 * Note that we do need a barrier between installing
2413 * the tables and setting the active flag, because the zfree()
2414 * path accesses the table without a lock if we're active.
2416 lck_spin_lock(&zleak_lock
);
2417 zleak_state
|= ZLEAK_STATE_ACTIVE
;
2418 zleak_state
&= ~ZLEAK_STATE_ACTIVATING
;
2419 lck_spin_unlock(&zleak_lock
);
2425 * If we fail to allocate memory, don't further tax
2426 * the system by trying again.
2428 lck_spin_lock(&zleak_lock
);
2429 zleak_state
|= ZLEAK_STATE_FAILED
;
2430 zleak_state
&= ~ZLEAK_STATE_ACTIVATING
;
2431 lck_spin_unlock(&zleak_lock
);
2433 if (allocations_ptr
!= NULL
) {
2434 kmem_free(kernel_map
, (vm_offset_t
)allocations_ptr
, z_alloc_size
);
2437 if (traces_ptr
!= NULL
) {
2438 kmem_free(kernel_map
, (vm_offset_t
)traces_ptr
, z_trace_size
);
2445 * TODO: What about allocations that never get deallocated,
2446 * especially ones with unique backtraces? Should we wait to record
2447 * until after boot has completed?
2448 * (How many persistent zallocs are there?)
2452 * This function records the allocation in the allocations table,
2453 * and stores the associated backtrace in the traces table
2454 * (or just increments the refcount if the trace is already recorded)
2455 * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
2456 * the associated trace's refcount is decremented.
2457 * If the trace slot is in use, it returns.
2458 * The refcount is incremented by the amount of memory the allocation consumes.
2459 * The return value indicates whether to try again next time.
2462 zleak_log(uintptr_t* bt
,
2465 vm_size_t allocation_size
)
2467 /* Quit if there's someone else modifying the hash tables */
2468 if (!lck_spin_try_lock(&zleak_lock
)) {
2469 z_total_conflicts
++;
2473 struct zallocation
* allocation
= &zallocations
[hashaddr(addr
, zleak_alloc_buckets
)];
2475 uint32_t trace_index
= hashbacktrace(bt
, depth
, zleak_trace_buckets
);
2476 struct ztrace
* trace
= &ztraces
[trace_index
];
2478 allocation
->za_hit_count
++;
2479 trace
->zt_hit_count
++;
2482 * If the allocation bucket we want to be in is occupied, and if the occupier
2483 * has the same trace as us, just bail.
2485 if (allocation
->za_element
!= (uintptr_t) 0 && trace_index
== allocation
->za_trace_index
) {
2486 z_alloc_collisions
++;
2488 lck_spin_unlock(&zleak_lock
);
2492 /* STEP 1: Store the backtrace in the traces array. */
2493 /* A size of zero indicates that the trace bucket is free. */
2495 if (trace
->zt_size
> 0 && bcmp(trace
->zt_stack
, bt
, (depth
* sizeof(uintptr_t))) != 0) {
2497 * Different unique trace with same hash!
2498 * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
2499 * and get out of the way for later chances
2501 trace
->zt_collisions
++;
2502 z_trace_collisions
++;
2504 lck_spin_unlock(&zleak_lock
);
2506 } else if (trace
->zt_size
> 0) {
2507 /* Same trace, already added, so increment refcount */
2508 trace
->zt_size
+= allocation_size
;
2510 /* Found an unused trace bucket, record the trace here! */
2511 if (trace
->zt_depth
!= 0) { /* if this slot was previously used but not currently in use */
2512 z_trace_overwrites
++;
2516 trace
->zt_size
= allocation_size
;
2517 memcpy(trace
->zt_stack
, bt
, (depth
* sizeof(uintptr_t)));
2519 trace
->zt_depth
= depth
;
2520 trace
->zt_collisions
= 0;
2523 /* STEP 2: Store the allocation record in the allocations array. */
2525 if (allocation
->za_element
!= (uintptr_t) 0) {
2527 * Straight up replace any allocation record that was there. We don't want to do the work
2528 * to preserve the allocation entries that were there, because we only record a subset of the
2529 * allocations anyways.
2532 z_alloc_collisions
++;
2534 struct ztrace
* associated_trace
= &ztraces
[allocation
->za_trace_index
];
2535 /* Knock off old allocation's size, not the new allocation */
2536 associated_trace
->zt_size
-= allocation
->za_size
;
2537 } else if (allocation
->za_trace_index
!= 0) {
2538 /* Slot previously used but not currently in use */
2539 z_alloc_overwrites
++;
2542 allocation
->za_element
= addr
;
2543 allocation
->za_trace_index
= trace_index
;
2544 allocation
->za_size
= allocation_size
;
2548 if (top_ztrace
->zt_size
< trace
->zt_size
) {
2552 lck_spin_unlock(&zleak_lock
);
2557 * Free the allocation record and release the stacktrace.
2558 * This should be as fast as possible because it will be called for every free.
2560 __attribute__((noinline
))
2562 zleak_free(uintptr_t addr
,
2563 vm_size_t allocation_size
)
2565 if (addr
== (uintptr_t) 0) {
2569 struct zallocation
* allocation
= &zallocations
[hashaddr(addr
, zleak_alloc_buckets
)];
2571 /* Double-checked locking: check to find out if we're interested, lock, check to make
2572 * sure it hasn't changed, then modify it, and release the lock.
2575 if (allocation
->za_element
== addr
&& allocation
->za_trace_index
< zleak_trace_buckets
) {
2576 /* if the allocation was the one, grab the lock, check again, then delete it */
2577 lck_spin_lock(&zleak_lock
);
2579 if (allocation
->za_element
== addr
&& allocation
->za_trace_index
< zleak_trace_buckets
) {
2580 struct ztrace
*trace
;
2582 /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */
2583 if (allocation
->za_size
!= allocation_size
) {
2584 panic("Freeing as size %lu memory that was allocated with size %lu\n",
2585 (uintptr_t)allocation_size
, (uintptr_t)allocation
->za_size
);
2588 trace
= &ztraces
[allocation
->za_trace_index
];
2590 /* size of 0 indicates trace bucket is unused */
2591 if (trace
->zt_size
> 0) {
2592 trace
->zt_size
-= allocation_size
;
2595 /* A NULL element means the allocation bucket is unused */
2596 allocation
->za_element
= 0;
2598 lck_spin_unlock(&zleak_lock
);
2602 #endif /* CONFIG_ZLEAKS */
2604 /* These functions outside of CONFIG_ZLEAKS because they are also used in
2605 * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix.
2608 /* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */
2610 hash_mix(uintptr_t x
)
2633 hashbacktrace(uintptr_t* bt
, uint32_t depth
, uint32_t max_size
)
2636 uintptr_t mask
= max_size
- 1;
2639 hash
+= bt
[--depth
];
2642 hash
= hash_mix(hash
) & mask
;
2644 assert(hash
< max_size
);
2646 return (uint32_t) hash
;
2650 * TODO: Determine how well distributed this is
2651 * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
2654 hashaddr(uintptr_t pt
, uint32_t max_size
)
2657 uintptr_t mask
= max_size
- 1;
2659 hash
= hash_mix(pt
) & mask
;
2661 assert(hash
< max_size
);
2663 return (uint32_t) hash
;
2666 /* End of all leak-detection code */
2667 #pragma mark zone creation, configuration, destruction
2670 zone_init_defaults(zone_id_t zid
)
2672 zone_t z
= &zone_array
[zid
];
2674 z
->page_count_max
= ~0u;
2675 z
->collectable
= true;
2676 z
->expandable
= true;
2677 z
->submap_idx
= Z_SUBMAP_IDX_GENERAL_MAP
;
2679 simple_lock_init(&z
->lock
, 0);
2685 zone_is_initializing(zone_t z
)
2687 return !z
->z_self
&& !z
->destroyed
;
2691 zone_set_max(zone_t z
, vm_size_t max
)
2694 if (z
->kasan_redzone
) {
2696 * Adjust the max memory for the kasan redzones
2698 max
+= (max
/ z
->pcpu_elem_size
) * z
->kasan_redzone
* 2;
2701 if (max
< z
->percpu
? 1 : z
->alloc_pages
) {
2702 max
= z
->percpu
? 1 : z
->alloc_pages
;
2704 max
= atop(round_page(max
));
2706 z
->page_count_max
= max
;
2710 zone_set_submap_idx(zone_t zone
, unsigned int sub_map_idx
)
2712 if (!zone_is_initializing(zone
)) {
2713 panic("%s: called after zone_create()", __func__
);
2715 if (sub_map_idx
> zone_last_submap_idx
) {
2716 panic("zone_set_submap_idx(%d) > %d", sub_map_idx
, zone_last_submap_idx
);
2718 zone
->submap_idx
= sub_map_idx
;
2726 if (!zone_is_initializing(zone
)) {
2727 panic("%s: called after zone_create()", __func__
);
2729 zone
->expandable
= false;
2730 zone_set_max(zone
, max
);
2734 zone_set_exhaustible(
2738 if (!zone_is_initializing(zone
)) {
2739 panic("%s: called after zone_create()", __func__
);
2741 zone
->expandable
= false;
2742 zone
->exhaustible
= true;
2743 zone_set_max(zone
, max
);
2747 * @function zone_create_find
2750 * Finds an unused zone for the given name and element size.
2752 * @param name the zone name
2753 * @param size the element size (including redzones, ...)
2754 * @param flags the flags passed to @c zone_create*
2755 * @param zid the desired zone ID or ZONE_ID_ANY
2757 * @returns a zone to initialize further.
2763 zone_create_flags_t flags
,
2769 simple_lock(&all_zones_lock
, &zone_locks_grp
);
2771 nzones
= (zone_id_t
)os_atomic_load(&num_zones
, relaxed
);
2772 assert(num_zones_in_use
<= nzones
&& nzones
< MAX_ZONES
);
2774 if (__improbable(nzones
< ZONE_ID__FIRST_DYNAMIC
)) {
2776 * The first time around, make sure the reserved zone IDs
2777 * have an initialized lock as zone_index_foreach() will
2780 while (nzones
< ZONE_ID__FIRST_DYNAMIC
) {
2781 zone_init_defaults(nzones
++);
2784 os_atomic_store(&num_zones
, nzones
, release
);
2787 if (zid
!= ZONE_ID_ANY
) {
2788 if (zid
>= ZONE_ID__FIRST_DYNAMIC
) {
2789 panic("zone_create: invalid desired zone ID %d for %s",
2792 if (flags
& ZC_DESTRUCTIBLE
) {
2793 panic("zone_create: ID %d (%s) must be permanent", zid
, name
);
2795 if (zone_array
[zid
].z_self
) {
2796 panic("zone_create: creating zone ID %d (%s) twice", zid
, name
);
2798 z
= &zone_array
[zid
];
2800 if (flags
& ZC_DESTRUCTIBLE
) {
2802 * If possible, find a previously zdestroy'ed zone in the
2803 * zone_array that we can reuse.
2805 for (int i
= bitmap_first(zone_destroyed_bitmap
, MAX_ZONES
);
2806 i
>= 0; i
= bitmap_next(zone_destroyed_bitmap
, i
)) {
2810 * If the zone name and the element size are the
2811 * same, we can just reuse the old zone struct.
2813 if (strcmp(z
->z_name
, name
) || zone_elem_size(z
) != size
) {
2816 bitmap_clear(zone_destroyed_bitmap
, i
);
2817 z
->destroyed
= false;
2825 z
= zone_init_defaults(zid
);
2828 * The release barrier pairs with the acquire in
2829 * zone_index_foreach() and makes sure that enumeration loops
2830 * always see an initialized zone lock.
2832 os_atomic_store(&num_zones
, nzones
, release
);
2837 simple_unlock(&all_zones_lock
);
2844 zone_create_panic(const char *name
, const char *f1
, const char *f2
)
2846 panic("zone_create: creating zone %s: flag %s and %s are incompatible",
2849 #define zone_create_assert_not_both(name, flags, current_flag, forbidden_flag) \
2850 if ((flags) & forbidden_flag) { \
2851 zone_create_panic(name, #current_flag, #forbidden_flag); \
2855 * Adjusts the size of the element based on minimum size, alignment
2856 * and kasan redzones
2859 zone_elem_adjust_size(
2860 const char *name __unused
,
2861 vm_size_t elem_size
,
2862 zone_create_flags_t flags
,
2863 vm_size_t
*redzone __unused
)
2867 * Adjust element size for minimum size and pointer alignment
2869 size
= (elem_size
+ sizeof(vm_offset_t
) - 1) & -sizeof(vm_offset_t
);
2870 if (((flags
& ZC_PERCPU
) == 0) && size
< ZONE_MIN_ELEM_SIZE
) {
2871 size
= ZONE_MIN_ELEM_SIZE
;
2876 * Expand the zone allocation size to include the redzones.
2878 * For page-multiple zones add a full guard page because they
2879 * likely require alignment.
2881 vm_size_t redzone_tmp
;
2882 if (flags
& (ZC_KASAN_NOREDZONE
| ZC_PERCPU
)) {
2884 } else if ((size
& PAGE_MASK
) == 0) {
2885 if (size
!= PAGE_SIZE
&& (flags
& ZC_ALIGNMENT_REQUIRED
)) {
2886 panic("zone_create: zone %s can't provide more than PAGE_SIZE"
2889 redzone_tmp
= PAGE_SIZE
;
2890 } else if (flags
& ZC_ALIGNMENT_REQUIRED
) {
2893 redzone_tmp
= KASAN_GUARD_SIZE
;
2895 size
+= redzone_tmp
* 2;
2897 *redzone
= redzone_tmp
;
2904 * Returns the allocation chunk size that has least framentation
2907 zone_get_min_alloc_granule(
2908 vm_size_t elem_size
,
2909 zone_create_flags_t flags
)
2911 vm_size_t alloc_granule
= PAGE_SIZE
;
2912 if (flags
& ZC_PERCPU
) {
2913 alloc_granule
= PAGE_SIZE
* zpercpu_count();
2914 if (PAGE_SIZE
% elem_size
> 256) {
2915 panic("zone_create: per-cpu zone has too much fragmentation");
2917 } else if ((elem_size
& PAGE_MASK
) == 0) {
2918 /* zero fragmentation by definition */
2919 alloc_granule
= elem_size
;
2920 } else if (alloc_granule
% elem_size
== 0) {
2921 /* zero fragmentation by definition */
2923 vm_size_t frag
= (alloc_granule
% elem_size
) * 100 / alloc_granule
;
2924 vm_size_t alloc_tmp
= PAGE_SIZE
;
2925 while ((alloc_tmp
+= PAGE_SIZE
) <= ZONE_MAX_ALLOC_SIZE
) {
2926 vm_size_t frag_tmp
= (alloc_tmp
% elem_size
) * 100 / alloc_tmp
;
2927 if (frag_tmp
< frag
) {
2929 alloc_granule
= alloc_tmp
;
2933 return alloc_granule
;
2937 zone_get_foreign_alloc_size(
2938 const char *name __unused
,
2939 vm_size_t elem_size
,
2940 zone_create_flags_t flags
,
2943 vm_size_t adjusted_size
= zone_elem_adjust_size(name
, elem_size
, flags
,
2945 vm_size_t alloc_granule
= zone_get_min_alloc_granule(adjusted_size
,
2947 vm_size_t min_size
= min_pages
* PAGE_SIZE
;
2949 * Round up min_size to a multiple of alloc_granule
2951 return ((min_size
+ alloc_granule
- 1) / alloc_granule
)
2959 zone_create_flags_t flags
,
2960 zone_id_t desired_zid
,
2961 void (^extra_setup
)(zone_t
))
2967 if (size
> ZONE_MAX_ALLOC_SIZE
) {
2968 panic("zone_create: element size too large: %zd", (size_t)size
);
2971 size
= zone_elem_adjust_size(name
, size
, flags
, &redzone
);
2973 * Allocate the zone slot, return early if we found an older match.
2975 z
= zone_create_find(name
, size
, flags
, desired_zid
);
2976 if (__improbable(z
->z_self
)) {
2977 /* We found a zone to reuse */
2982 * Initialize the zone properly.
2986 * If the kernel is post lockdown, copy the zone name passed in.
2987 * Else simply maintain a pointer to the name string as it can only
2988 * be a core XNU zone (no unloadable kext exists before lockdown).
2990 if (startup_phase
>= STARTUP_SUB_LOCKDOWN
) {
2991 size_t nsz
= MIN(strlen(name
) + 1, MACH_ZONE_NAME_MAX_LEN
);
2992 char *buf
= zalloc_permanent(nsz
, ZALIGN_NONE
);
2993 strlcpy(buf
, name
, nsz
);
2999 * If zone_init() hasn't run yet, the permanent zones do not exist.
3000 * We can limp along without properly initialized stats for a while,
3001 * zone_init() will rebuild the missing stats when it runs.
3003 if (__probable(zone_array
[ZONE_ID_PERCPU_PERMANENT
].z_self
)) {
3004 z
->z_stats
= zalloc_percpu_permanent_type(struct zone_stats
);
3007 alloc
= zone_get_min_alloc_granule(size
, flags
);
3009 if (flags
& ZC_KALLOC_HEAP
) {
3010 size_t rem
= (alloc
% size
) / (alloc
/ size
);
3013 * Try to grow the elements size and spread them more if the remaining
3014 * space is large enough.
3016 size
+= rem
& ~(KALLOC_MINALIGN
- 1);
3019 z
->pcpu_elem_size
= z
->z_elem_size
= (uint16_t)size
;
3020 z
->alloc_pages
= (uint16_t)atop(alloc
);
3022 z
->kasan_redzone
= redzone
;
3023 if (strncmp(name
, "fakestack.", sizeof("fakestack.") - 1) == 0) {
3024 z
->kasan_fakestacks
= true;
3032 if (flags
& ZC_SEQUESTER
) {
3033 z
->va_sequester
= true;
3036 /* ZC_CACHING applied after all configuration is done */
3038 if (flags
& ZC_PERCPU
) {
3040 * ZC_CACHING is disallowed because it uses per-cpu zones for its
3041 * implementation and it would be circular. These allocations are
3042 * also quite expensive, so caching feels dangerous memory wise too.
3044 * ZC_ZFREE_CLEARMEM is forced because per-cpu zones allow for
3045 * pointer-sized allocations which poisoning doesn't support.
3047 zone_create_assert_not_both(name
, flags
, ZC_PERCPU
, ZC_CACHING
);
3048 zone_create_assert_not_both(name
, flags
, ZC_PERCPU
, ZC_ALLOW_FOREIGN
);
3050 z
->gzalloc_exempt
= true;
3051 z
->zfree_clear_mem
= true;
3052 z
->pcpu_elem_size
*= zpercpu_count();
3054 if (flags
& ZC_ZFREE_CLEARMEM
) {
3055 z
->zfree_clear_mem
= true;
3057 if (flags
& ZC_NOGC
) {
3058 z
->collectable
= false;
3060 if (flags
& ZC_NOENCRYPT
) {
3061 z
->noencrypt
= true;
3063 if (flags
& ZC_ALIGNMENT_REQUIRED
) {
3064 z
->alignment_required
= true;
3066 if (flags
& ZC_NOGZALLOC
) {
3067 z
->gzalloc_exempt
= true;
3069 if (flags
& ZC_NOCALLOUT
) {
3070 z
->no_callout
= true;
3072 if (flags
& ZC_DESTRUCTIBLE
) {
3073 zone_create_assert_not_both(name
, flags
, ZC_DESTRUCTIBLE
, ZC_CACHING
);
3074 zone_create_assert_not_both(name
, flags
, ZC_DESTRUCTIBLE
, ZC_ALLOW_FOREIGN
);
3075 z
->destructible
= true;
3079 * Handle Internal flags
3081 if (flags
& ZC_ALLOW_FOREIGN
) {
3082 z
->allows_foreign
= true;
3084 if ((ZSECURITY_OPTIONS_SUBMAP_USER_DATA
& zsecurity_options
) &&
3085 (flags
& ZC_DATA_BUFFERS
)) {
3086 z
->submap_idx
= Z_SUBMAP_IDX_BAG_OF_BYTES_MAP
;
3088 if (flags
& ZC_KASAN_NOQUARANTINE
) {
3089 z
->kasan_noquarantine
= true;
3091 /* ZC_KASAN_NOREDZONE already handled */
3094 * Then if there's extra tuning, do it
3101 * Configure debugging features
3104 gzalloc_zone_init(z
); /* might set z->gzalloc_tracked */
3106 #if ZONE_ENABLE_LOGGING
3107 if (!z
->gzalloc_tracked
&& num_zones_logged
< max_num_zones_to_log
) {
3109 * Check for and set up zone leak detection if requested via boot-args.
3110 * might set z->zone_logging
3112 zone_setup_logging(z
);
3114 #endif /* ZONE_ENABLE_LOGGING */
3115 #if VM_MAX_TAG_ZONES
3116 if (!z
->gzalloc_tracked
&& z
->kalloc_heap
&& zone_tagging_on
) {
3117 static int tag_zone_index
;
3118 vm_offset_t esize
= zone_elem_size(z
);
3120 z
->tags_inline
= (((page_size
+ esize
- 1) / esize
) <=
3121 (sizeof(uint32_t) / sizeof(uint16_t)));
3122 z
->tag_zone_index
= os_atomic_inc_orig(&tag_zone_index
, relaxed
);
3123 assert(z
->tag_zone_index
< VM_MAX_TAG_ZONES
);
3128 * Finally, fixup properties based on security policies, boot-args, ...
3130 if ((ZSECURITY_OPTIONS_SUBMAP_USER_DATA
& zsecurity_options
) &&
3131 z
->kalloc_heap
== KHEAP_ID_DATA_BUFFERS
) {
3132 z
->submap_idx
= Z_SUBMAP_IDX_BAG_OF_BYTES_MAP
;
3135 if ((ZSECURITY_OPTIONS_SEQUESTER
& zsecurity_options
) &&
3136 (flags
& ZC_NOSEQUESTER
) == 0 &&
3137 z
->submap_idx
== Z_SUBMAP_IDX_GENERAL_MAP
) {
3138 z
->va_sequester
= true;
3142 * Always clear zone elements smaller than a cacheline,
3143 * because it's pretty close to free.
3145 if (size
<= zp_min_size
) {
3146 z
->zfree_clear_mem
= true;
3148 if (zp_factor
!= 0 && !z
->zfree_clear_mem
) {
3149 z
->zp_count
= zone_poison_count_init(z
);
3153 if ((flags
& ZC_NOCACHING
) == 0) {
3155 * Append kalloc heap name to zone name (if zone is used by kalloc)
3157 char temp_zone_name
[MAX_ZONE_NAME
] = "";
3158 snprintf(temp_zone_name
, MAX_ZONE_NAME
, "%s%s", zone_heap_name(z
), z
->z_name
);
3160 /* Check if boot-arg specified it should have a cache */
3161 if (track_this_zone(temp_zone_name
, cache_zone_name
)) {
3162 flags
|= ZC_CACHING
;
3163 } else if (zcc_kalloc
&& z
->kalloc_heap
) {
3164 flags
|= ZC_CACHING
;
3167 if ((flags
& ZC_CACHING
) &&
3168 !z
->tags
&& !z
->zone_logging
&& !z
->gzalloc_tracked
) {
3171 #endif /* CONFIG_ZCACHE */
3182 zone_create_startup(struct zone_create_startup_spec
*spec
)
3184 *spec
->z_var
= zone_create_ext(spec
->z_name
, spec
->z_size
,
3185 spec
->z_flags
, spec
->z_zid
, spec
->z_setup
);
3189 * The 4 first field of a zone_view and a zone alias, so that the zone_or_view_t
3190 * union works. trust but verify.
3192 #define zalloc_check_zov_alias(f1, f2) \
3193 static_assert(offsetof(struct zone, f1) == offsetof(struct zone_view, f2))
3194 zalloc_check_zov_alias(z_self
, zv_zone
);
3195 zalloc_check_zov_alias(z_stats
, zv_stats
);
3196 zalloc_check_zov_alias(z_name
, zv_name
);
3197 zalloc_check_zov_alias(z_views
, zv_next
);
3198 #undef zalloc_check_zov_alias
3202 zone_view_startup_init(struct zone_view_startup_spec
*spec
)
3204 struct kalloc_heap
*heap
= NULL
;
3205 zone_view_t zv
= spec
->zv_view
;
3208 switch (spec
->zv_heapid
) {
3209 case KHEAP_ID_DEFAULT
:
3210 heap
= KHEAP_DEFAULT
;
3212 case KHEAP_ID_DATA_BUFFERS
:
3213 heap
= KHEAP_DATA_BUFFERS
;
3223 z
= kalloc_heap_zone_for_size(heap
, spec
->zv_size
);
3227 assert(spec
->zv_size
<= zone_elem_size(z
));
3231 zv
->zv_stats
= zalloc_percpu_permanent_type(struct zone_stats
);
3232 zv
->zv_next
= z
->z_views
;
3233 if (z
->z_views
== NULL
&& z
->kalloc_heap
== KHEAP_ID_NONE
) {
3235 * count the raw view for zones not in a heap,
3236 * kalloc_heap_init() already counts it for its members.
3238 zone_view_count
+= 2;
3240 zone_view_count
+= 1;
3249 zone_create_flags_t flags
)
3251 return zone_create_ext(name
, size
, flags
, ZONE_ID_ANY
, NULL
);
3256 vm_size_t size
, /* the size of an element */
3257 vm_size_t max
, /* maximum memory to use */
3258 vm_size_t alloc __unused
, /* allocation size */
3259 const char *name
) /* a name for the zone */
3261 zone_t z
= zone_create(name
, size
, ZC_DESTRUCTIBLE
);
3262 zone_set_max(z
, max
);
3269 unsigned int zindex
= zone_index(z
);
3273 if (!z
->destructible
|| zone_caching_enabled(z
) || z
->allows_foreign
) {
3274 panic("zdestroy: Zone %s%s isn't destructible",
3275 zone_heap_name(z
), z
->z_name
);
3278 if (!z
->z_self
|| z
->expanding_no_vm_priv
|| z
->expanding_vm_priv
||
3279 z
->async_pending
|| z
->waiting
) {
3280 panic("zdestroy: Zone %s%s in an invalid state for destruction",
3281 zone_heap_name(z
), z
->z_name
);
3286 * Unset the valid bit. We'll hit an assert failure on further operations
3287 * on this zone, until zinit() is called again.
3289 * Leave the zone valid for KASan as we will see zfree's on quarantined free
3290 * elements even after the zone is destroyed.
3294 z
->destroyed
= true;
3297 /* Dump all the free elements */
3298 zone_drop_free_elements(z
);
3301 if (__improbable(z
->gzalloc_tracked
)) {
3302 /* If the zone is gzalloc managed dump all the elements in the free cache */
3303 gzalloc_empty_free_cache(z
);
3309 while (!zone_pva_is_null(z
->pages_sequester
)) {
3310 struct zone_page_metadata
*page_meta
;
3311 vm_offset_t free_addr
;
3313 page_meta
= zone_sequestered_page_get(z
, &free_addr
);
3315 kmem_free(submap_for_zone(z
), free_addr
, ptoa(z
->alloc_pages
));
3320 /* Assert that all counts are zero */
3321 if (z
->countavail
|| z
->countfree
|| zone_size_wired(z
) ||
3322 z
->allfree_page_count
|| z
->sequester_page_count
) {
3323 panic("zdestroy: Zone %s%s isn't empty at zdestroy() time",
3324 zone_heap_name(z
), z
->z_name
);
3327 /* consistency check: make sure everything is indeed empty */
3328 assert(zone_pva_is_null(z
->pages_any_free_foreign
));
3329 assert(zone_pva_is_null(z
->pages_all_used_foreign
));
3330 assert(zone_pva_is_null(z
->pages_all_free
));
3331 assert(zone_pva_is_null(z
->pages_intermediate
));
3332 assert(zone_pva_is_null(z
->pages_all_used
));
3333 assert(zone_pva_is_null(z
->pages_sequester
));
3338 simple_lock(&all_zones_lock
, &zone_locks_grp
);
3340 assert(!bitmap_test(zone_destroyed_bitmap
, zindex
));
3341 /* Mark the zone as empty in the bitmap */
3342 bitmap_set(zone_destroyed_bitmap
, zindex
);
3344 assert(num_zones_in_use
> 0);
3346 simple_unlock(&all_zones_lock
);
3349 #pragma mark zone (re)fill, jetsam
3352 * Dealing with zone allocations from the mach VM code.
3354 * The implementation of the mach VM itself uses the zone allocator
3355 * for things like the vm_map_entry data structure. In order to prevent
3356 * an infinite recursion problem when adding more pages to a zone, zalloc
3357 * uses a replenish thread to refill the VM layer's zones before they have
3358 * too few remaining free entries. The reserved remaining free entries
3359 * guarantee that the VM routines can get entries from already mapped pages.
3361 * In order for that to work, the amount of allocations in the nested
3362 * case have to be bounded. There are currently 2 replenish zones, and
3363 * if each needs 1 element of each zone to add a new page to itself, that
3364 * gives us a minumum reserve of 2 elements.
3366 * There is also a deadlock issue with the zone garbage collection thread,
3367 * or any thread that is trying to free zone pages. While holding
3368 * the kernel's map lock they may need to allocate new VM map entries, hence
3369 * we need enough reserve to allow them to get past the point of holding the
3370 * map lock. After freeing that page, the GC thread will wait in drop_free_elements()
3371 * until the replenish threads can finish. Since there's only 1 GC thread at a time,
3372 * that adds a minimum of 1 to the reserve size.
3374 * Since the minumum amount you can add to a zone is 1 page, we'll use 16K (from ARM)
3375 * as the refill size on all platforms.
3377 * When a refill zone drops to half that available, i.e. REFILL_SIZE / 2,
3378 * zalloc_ext() will wake the replenish thread. The replenish thread runs
3379 * until at least REFILL_SIZE worth of free elements exist, before sleeping again.
3380 * In the meantime threads may continue to use the reserve until there are only REFILL_SIZE / 4
3381 * elements left. Below that point only the replenish threads themselves and the GC
3382 * thread may continue to use from the reserve.
3384 static unsigned zone_replenish_loops
;
3385 static unsigned zone_replenish_wakeups
;
3386 static unsigned zone_replenish_wakeups_initiated
;
3387 static unsigned zone_replenish_throttle_count
;
3389 #define ZONE_REPLENISH_TARGET (16 * 1024)
3390 static unsigned zone_replenish_active
= 0; /* count of zones currently replenishing */
3391 static unsigned zone_replenish_max_threads
= 0;
3393 LCK_GRP_DECLARE(zone_replenish_lock_grp
, "zone_replenish_lock");
3394 LCK_SPIN_DECLARE(zone_replenish_lock
, &zone_replenish_lock_grp
);
3398 zone_replenish_panic(zone_t zone
, kern_return_t kr
)
3400 panic_include_zprint
= TRUE
;
3402 if ((zleak_state
& ZLEAK_STATE_ACTIVE
)) {
3403 panic_include_ztrace
= TRUE
;
3405 #endif /* CONFIG_ZLEAKS */
3406 if (kr
== KERN_NO_SPACE
) {
3407 zone_t zone_largest
= zone_find_largest();
3408 panic("zalloc: zone map exhausted while allocating from zone %s%s, "
3409 "likely due to memory leak in zone %s%s "
3410 "(%lu total bytes, %d elements allocated)",
3411 zone_heap_name(zone
), zone
->z_name
,
3412 zone_heap_name(zone_largest
), zone_largest
->z_name
,
3413 (unsigned long)zone_size_wired(zone_largest
),
3414 zone_count_allocated(zone_largest
));
3416 panic("zalloc: %s%s (%d elements) retry fail %d",
3417 zone_heap_name(zone
), zone
->z_name
,
3418 zone_count_allocated(zone
), kr
);
3422 zone_replenish_locked(zone_t z
, zalloc_flags_t flags
, bool asynchronously
)
3424 int kmaflags
= KMA_KOBJECT
| KMA_ZERO
;
3425 vm_offset_t space
, alloc_size
;
3430 kmaflags
|= KMA_NOENCRYPT
;
3432 if (flags
& Z_NOPAGEWAIT
) {
3433 kmaflags
|= KMA_NOPAGEWAIT
;
3436 kmaflags
|= KMA_PERMANENT
;
3440 struct zone_page_metadata
*page_meta
= NULL
;
3443 * Try to allocate our regular chunk of pages,
3444 * unless the system is under massive pressure
3445 * and we're looking for more than 2 pages.
3447 if (!z
->percpu
&& z
->alloc_pages
> 2 && (vm_pool_low() || retry
> 0)) {
3448 alloc_size
= round_page(zone_elem_size(z
));
3450 alloc_size
= ptoa(z
->alloc_pages
);
3451 page_meta
= zone_sequestered_page_get(z
, &space
);
3458 * Do the zone leak activation here because zleak_activate()
3459 * may block, and can't be done on the way out.
3461 if (__improbable(zleak_state
& ZLEAK_STATE_ENABLED
)) {
3462 if (!(zleak_state
& ZLEAK_STATE_ACTIVE
) &&
3463 zone_submaps_approx_size() >= zleak_global_tracking_threshold
) {
3464 kr
= zleak_activate();
3465 if (kr
!= KERN_SUCCESS
) {
3466 printf("Failed to activate live zone leak debugging (%d).\n", kr
);
3470 #endif /* CONFIG_ZLEAKS */
3473 * Trigger jetsams via the vm_pageout_garbage_collect thread if
3474 * we're running out of zone memory
3476 if (is_zone_map_nearing_exhaustion()) {
3477 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
3481 kr
= zone_sequestered_page_populate(z
, page_meta
, space
,
3482 alloc_size
, kmaflags
);
3484 if (z
->submap_idx
== Z_SUBMAP_IDX_GENERAL_MAP
&& z
->kalloc_heap
!= KHEAP_ID_NONE
) {
3485 kmaflags
|= KMA_KHEAP
;
3487 kr
= kernel_memory_allocate(submap_for_zone(z
),
3488 &space
, alloc_size
, 0, kmaflags
, VM_KERN_MEMORY_ZONE
);
3492 if (kr
== KERN_NO_SPACE
&& z
->allows_foreign
) {
3494 * For zones allowing foreign pages, fallback to the kernel map
3496 kr
= kernel_memory_allocate(kernel_map
, &space
,
3497 alloc_size
, 0, kmaflags
, VM_KERN_MEMORY_ZONE
);
3501 if (kr
== KERN_SUCCESS
) {
3505 if (flags
& Z_NOPAGEWAIT
) {
3510 if (asynchronously
) {
3511 assert_wait_timeout(&z
->prio_refill_count
,
3512 THREAD_UNINT
, 1, 100 * NSEC_PER_USEC
);
3513 thread_block(THREAD_CONTINUE_NULL
);
3514 } else if (++retry
== 3) {
3515 zone_replenish_panic(z
, kr
);
3521 zcram_and_lock(z
, space
, alloc_size
);
3524 if (__improbable(zleak_state
& ZLEAK_STATE_ACTIVE
)) {
3526 zone_size_wired(z
) >= zleak_per_zone_tracking_threshold
) {
3530 #endif /* CONFIG_ZLEAKS */
3534 * High priority VM privileged thread used to asynchronously refill a given zone.
3535 * These are needed for data structures used by the lower level VM itself. The
3536 * replenish thread maintains a reserve of elements, so that the VM will never
3537 * block in the zone allocator.
3541 zone_replenish_thread(void *_z
, wait_result_t __unused wr
)
3545 current_thread()->options
|= (TH_OPT_VMPRIV
| TH_OPT_ZONE_PRIV
);
3549 assert(z
->z_self
== z
);
3550 assert(z
->zone_replenishing
);
3551 assert(z
->prio_refill_count
!= 0);
3553 while (z
->countfree
< z
->prio_refill_count
) {
3554 assert(!z
->expanding_no_vm_priv
);
3555 assert(!z
->expanding_vm_priv
);
3557 zone_replenish_locked(z
, Z_WAITOK
, true);
3559 assert(z
->z_self
== z
);
3560 zone_replenish_loops
++;
3563 /* Wakeup any potentially throttled allocations. */
3566 assert_wait(&z
->prio_refill_count
, THREAD_UNINT
);
3569 * We finished refilling the zone, so decrement the active count
3570 * and wake up any waiting GC threads.
3572 lck_spin_lock(&zone_replenish_lock
);
3573 assert(zone_replenish_active
> 0);
3574 if (--zone_replenish_active
== 0) {
3575 thread_wakeup((event_t
)&zone_replenish_active
);
3577 lck_spin_unlock(&zone_replenish_lock
);
3579 z
->zone_replenishing
= false;
3582 thread_block(THREAD_CONTINUE_NULL
);
3583 zone_replenish_wakeups
++;
3588 zone_prio_refill_configure(zone_t z
)
3594 assert(!z
->prio_refill_count
&& !z
->destructible
);
3595 z
->prio_refill_count
= (uint16_t)(ZONE_REPLENISH_TARGET
/ zone_elem_size(z
));
3596 z
->zone_replenishing
= true;
3599 lck_spin_lock(&zone_replenish_lock
);
3600 ++zone_replenish_max_threads
;
3601 ++zone_replenish_active
;
3602 lck_spin_unlock(&zone_replenish_lock
);
3605 tres
= kernel_thread_start_priority(zone_replenish_thread
, z
,
3606 MAXPRI_KERNEL
, &th
);
3607 if (tres
!= KERN_SUCCESS
) {
3608 panic("zone_prio_refill_configure, thread create: 0x%x", tres
);
3611 thread_deallocate(th
);
3615 zone_randomize_freelist(zone_t zone
, struct zone_page_metadata
*meta
,
3616 vm_offset_t size
, zone_addr_kind_t kind
, unsigned int *entropy_buffer
)
3618 const vm_size_t elem_size
= zone_elem_size(zone
);
3619 vm_offset_t left
, right
, head
, base
;
3620 vm_offset_t element
;
3622 left
= ZONE_PAGE_FIRST_OFFSET(kind
);
3623 right
= size
- ((size
- left
) % elem_size
);
3625 base
= zone_meta_to_addr(meta
, kind
);
3627 while (left
< right
) {
3628 if (zone_leaks_scan_enable
|| __improbable(zone
->tags
) ||
3629 random_bool_gen_bits(&zone_bool_gen
, entropy_buffer
, MAX_ENTROPY_PER_ZCRAM
, 1)) {
3630 element
= base
+ left
;
3634 element
= base
+ right
;
3637 vm_offset_t
*primary
= (vm_offset_t
*)element
;
3638 vm_offset_t
*backup
= get_backup_ptr(elem_size
, primary
);
3640 *primary
= *backup
= head
^ zp_nopoison_cookie
;
3644 meta
->zm_freelist_offs
= (uint16_t)(head
- base
);
3648 * Cram the given memory into the specified zone. Update the zone page count accordingly.
3651 zcram_and_lock(zone_t zone
, vm_offset_t newmem
, vm_size_t size
)
3653 unsigned int entropy_buffer
[MAX_ENTROPY_PER_ZCRAM
] = { 0 };
3654 struct zone_page_metadata
*meta
;
3655 zone_addr_kind_t kind
;
3656 uint32_t pg_count
= (uint32_t)atop(size
);
3657 uint32_t zindex
= zone_index(zone
);
3658 uint32_t free_count
;
3659 uint16_t empty_freelist_offs
= PAGE_METADATA_EMPTY_FREELIST
;
3661 /* Basic sanity checks */
3662 assert(zone
!= ZONE_NULL
&& newmem
!= (vm_offset_t
)0);
3663 assert((newmem
& PAGE_MASK
) == 0);
3664 assert((size
& PAGE_MASK
) == 0);
3666 KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC
, ZALLOC_ZCRAM
) | DBG_FUNC_START
,
3669 kind
= zone_addr_kind(newmem
, size
);
3670 #if DEBUG || DEVELOPMENT
3671 if (zalloc_debug
& ZALLOC_DEBUG_ZCRAM
) {
3672 kprintf("zcram(%p[%s%s], 0x%lx%s, 0x%lx)\n", zone
,
3673 zone_heap_name(zone
), zone
->z_name
, (uintptr_t)newmem
,
3674 kind
== ZONE_ADDR_FOREIGN
? "[F]" : "", (uintptr_t)size
);
3676 #endif /* DEBUG || DEVELOPMENT */
3679 * Initialize the metadata for all pages. We dont need the zone lock
3680 * here because we are not manipulating any zone related state yet.
3682 * This includes randomizing the freelists as the metadata isn't
3686 if (kind
== ZONE_ADDR_NATIVE
) {
3688 * We're being called by zfill,
3689 * zone_replenish_thread or vm_page_more_fictitious,
3691 * which will only either allocate a single page, or `alloc_pages`
3694 assert(pg_count
<= zone
->alloc_pages
);
3697 * Make sure the range of metadata entries we're about to init
3698 * have proper physical backing, then initialize them.
3700 meta
= zone_meta_from_addr(newmem
, kind
);
3701 zone_meta_populate(meta
, meta
+ pg_count
);
3703 if (zone
->permanent
) {
3704 empty_freelist_offs
= 0;
3707 meta
[0] = (struct zone_page_metadata
){
3709 .zm_page_count
= pg_count
,
3710 .zm_percpu
= zone
->percpu
,
3711 .zm_freelist_offs
= empty_freelist_offs
,
3714 for (uint32_t i
= 1; i
< pg_count
; i
++) {
3715 meta
[i
] = (struct zone_page_metadata
){
3718 .zm_percpu
= zone
->percpu
,
3719 .zm_secondary_page
= true,
3720 .zm_freelist_offs
= empty_freelist_offs
,
3724 if (!zone
->permanent
) {
3725 zone_randomize_freelist(zone
, meta
,
3726 zone
->percpu
? PAGE_SIZE
: size
, kind
, entropy_buffer
);
3729 if (!zone
->allows_foreign
|| !from_foreign_range(newmem
, size
)) {
3730 panic("zcram_and_lock: foreign memory [%lx] being crammed is "
3731 "outside of foreign range", (uintptr_t)newmem
);
3735 * We cannot support elements larger than page size for foreign
3736 * memory because we put metadata on the page itself for each
3737 * page of foreign memory.
3739 * We need to do this in order to be able to reach the metadata
3740 * when any element is freed.
3742 assert(!zone
->percpu
&& !zone
->permanent
);
3743 assert(zone_elem_size(zone
) <= PAGE_SIZE
- sizeof(struct zone_page_metadata
));
3745 bzero((void *)newmem
, size
);
3747 for (vm_offset_t offs
= 0; offs
< size
; offs
+= PAGE_SIZE
) {
3748 meta
= (struct zone_page_metadata
*)(newmem
+ offs
);
3749 *meta
= (struct zone_page_metadata
){
3752 .zm_freelist_offs
= empty_freelist_offs
,
3754 meta
->zm_foreign_cookie
[0] = ZONE_FOREIGN_COOKIE
;
3755 zone_randomize_freelist(zone
, meta
, PAGE_SIZE
, kind
,
3760 #if VM_MAX_TAG_ZONES
3761 if (__improbable(zone
->tags
)) {
3762 assert(kind
== ZONE_ADDR_NATIVE
&& !zone
->percpu
);
3763 ztMemoryAdd(zone
, newmem
, size
);
3765 #endif /* VM_MAX_TAG_ZONES */
3768 * Insert the initialized pages / metadatas into the right lists.
3772 assert(zone
->z_self
== zone
);
3774 zone
->page_count
+= pg_count
;
3775 if (zone
->page_count_hwm
< zone
->page_count
) {
3776 zone
->page_count_hwm
= zone
->page_count
;
3778 os_atomic_add(&zones_phys_page_count
, pg_count
, relaxed
);
3780 if (kind
== ZONE_ADDR_NATIVE
) {
3781 os_atomic_add(&zones_phys_page_mapped_count
, pg_count
, relaxed
);
3782 if (zone
->permanent
) {
3783 zone_meta_queue_push(zone
, &zone
->pages_intermediate
, meta
, kind
);
3785 zone_meta_queue_push(zone
, &zone
->pages_all_free
, meta
, kind
);
3786 zone
->allfree_page_count
+= meta
->zm_page_count
;
3788 free_count
= zone_elem_count(zone
, size
, kind
);
3789 zone
->countfree
+= free_count
;
3790 zone
->countavail
+= free_count
;
3792 free_count
= zone_elem_count(zone
, PAGE_SIZE
, kind
);
3793 for (vm_offset_t offs
= 0; offs
< size
; offs
+= PAGE_SIZE
) {
3794 meta
= (struct zone_page_metadata
*)(newmem
+ offs
);
3795 zone_meta_queue_push(zone
, &zone
->pages_any_free_foreign
, meta
, kind
);
3796 zone
->countfree
+= free_count
;
3797 zone
->countavail
+= free_count
;
3801 KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC
, ZALLOC_ZCRAM
) | DBG_FUNC_END
, zindex
);
3805 zcram(zone_t zone
, vm_offset_t newmem
, vm_size_t size
)
3807 zcram_and_lock(zone
, newmem
, size
);
3812 * Fill a zone with enough memory to contain at least nelem elements.
3813 * Return the number of elements actually put into the zone, which may
3814 * be more than the caller asked for since the memory allocation is
3815 * rounded up to the next zone allocation size.
3825 vm_size_t alloc_size
= ptoa(zone
->alloc_pages
);
3826 vm_size_t nalloc_inc
= zone_elem_count(zone
, alloc_size
, ZONE_ADDR_NATIVE
);
3827 vm_size_t nalloc
= 0, goal
= MAX(0, nelem
);
3828 int kmaflags
= KMA_KOBJECT
| KMA_ZERO
;
3830 if (zone
->noencrypt
) {
3831 kmaflags
|= KMA_NOENCRYPT
;
3834 assert(!zone
->allows_foreign
&& !zone
->permanent
);
3837 * Trigger jetsams via the vm_pageout_garbage_collect thread if we're
3838 * running out of zone memory
3840 if (is_zone_map_nearing_exhaustion()) {
3841 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
3844 if (zone
->va_sequester
) {
3848 struct zone_page_metadata
*page_meta
;
3849 page_meta
= zone_sequestered_page_get(zone
, &memory
);
3850 if (NULL
== page_meta
) {
3855 kr
= zone_sequestered_page_populate(zone
, page_meta
,
3856 memory
, alloc_size
, kmaflags
);
3857 if (KERN_SUCCESS
!= kr
) {
3861 zcram_and_lock(zone
, memory
, alloc_size
);
3862 nalloc
+= nalloc_inc
;
3863 } while (nalloc
< goal
);
3869 while (nalloc
< goal
) {
3870 kr
= kernel_memory_allocate(submap_for_zone(zone
), &memory
,
3871 alloc_size
, 0, kmaflags
, VM_KERN_MEMORY_ZONE
);
3872 if (kr
!= KERN_SUCCESS
) {
3873 printf("%s: kernel_memory_allocate() of %lu bytes failed\n",
3874 __func__
, (unsigned long)(nalloc
* alloc_size
));
3878 zcram(zone
, memory
, alloc_size
);
3879 nalloc
+= nalloc_inc
;
3886 * We're being very conservative here and picking a value of 95%. We might need to lower this if
3887 * we find that we're not catching the problem and are still hitting zone map exhaustion panics.
3889 #define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95
3892 * Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit.
3893 * Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default.
3895 TUNABLE_WRITEABLE(unsigned int, zone_map_jetsam_limit
, "zone_map_jetsam_limit",
3896 ZONE_MAP_JETSAM_LIMIT_DEFAULT
);
3899 get_zone_map_size(uint64_t *current_size
, uint64_t *capacity
)
3901 vm_offset_t phys_pages
= os_atomic_load(&zones_phys_page_mapped_count
, relaxed
);
3902 *current_size
= ptoa_64(phys_pages
);
3903 *capacity
= zone_phys_mapped_max
;
3907 get_largest_zone_info(char *zone_name
, size_t zone_name_len
, uint64_t *zone_size
)
3909 zone_t largest_zone
= zone_find_largest();
3912 * Append kalloc heap name to zone name (if zone is used by kalloc)
3914 snprintf(zone_name
, zone_name_len
, "%s%s",
3915 zone_heap_name(largest_zone
), largest_zone
->z_name
);
3917 *zone_size
= zone_size_wired(largest_zone
);
3921 is_zone_map_nearing_exhaustion(void)
3923 vm_offset_t phys_pages
= os_atomic_load(&zones_phys_page_mapped_count
, relaxed
);
3924 return ptoa_64(phys_pages
) > (zone_phys_mapped_max
* zone_map_jetsam_limit
) / 100;
3928 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
3931 * Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread
3932 * to walk through the jetsam priority bands and kill processes.
3935 kill_process_in_largest_zone(void)
3938 zone_t largest_zone
= zone_find_largest();
3940 printf("zone_map_exhaustion: Zone mapped %lld of %lld, used %lld, map size %lld, capacity %lld [jetsam limit %d%%]\n",
3941 ptoa_64(os_atomic_load(&zones_phys_page_mapped_count
, relaxed
)), ptoa_64(zone_phys_mapped_max
),
3942 ptoa_64(os_atomic_load(&zones_phys_page_count
, relaxed
)),
3943 (uint64_t)zone_submaps_approx_size(),
3944 (uint64_t)zone_range_size(&zone_info
.zi_map_range
),
3945 zone_map_jetsam_limit
);
3946 printf("zone_map_exhaustion: Largest zone %s%s, size %lu\n", zone_heap_name(largest_zone
),
3947 largest_zone
->z_name
, (uintptr_t)zone_size_wired(largest_zone
));
3950 * We want to make sure we don't call this function from userspace.
3951 * Or we could end up trying to synchronously kill the process
3952 * whose context we're in, causing the system to hang.
3954 assert(current_task() == kernel_task
);
3957 * If vm_object_zone is the largest, check to see if the number of
3958 * elements in vm_map_entry_zone is comparable.
3960 * If so, consider vm_map_entry_zone as the largest. This lets us target
3961 * a specific process to jetsam to quickly recover from the zone map
3964 if (largest_zone
== vm_object_zone
) {
3965 unsigned int vm_object_zone_count
= zone_count_allocated(vm_object_zone
);
3966 unsigned int vm_map_entry_zone_count
= zone_count_allocated(vm_map_entry_zone
);
3967 /* Is the VM map entries zone count >= 98% of the VM objects zone count? */
3968 if (vm_map_entry_zone_count
>= ((vm_object_zone_count
* VMENTRY_TO_VMOBJECT_COMPARISON_RATIO
) / 100)) {
3969 largest_zone
= vm_map_entry_zone
;
3970 printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n",
3971 (uintptr_t)zone_size_wired(largest_zone
));
3975 /* TODO: Extend this to check for the largest process in other zones as well. */
3976 if (largest_zone
== vm_map_entry_zone
) {
3977 pid
= find_largest_process_vm_map_entries();
3979 printf("zone_map_exhaustion: Nothing to do for the largest zone [%s%s]. "
3980 "Waking up memorystatus thread.\n", zone_heap_name(largest_zone
),
3981 largest_zone
->z_name
);
3983 if (!memorystatus_kill_on_zone_map_exhaustion(pid
)) {
3984 printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n", pid
);
3988 #pragma mark zalloc module init
3991 * Initialize the "zone of zones" which uses fixed memory allocated
3992 * earlier in memory initialization. zone_bootstrap is called
3997 zone_bootstrap(void)
3999 /* Validate struct zone_page_metadata expectations */
4000 if ((1U << ZONE_PAGECOUNT_BITS
) <
4001 atop(ZONE_MAX_ALLOC_SIZE
) * sizeof(struct zone_page_metadata
)) {
4002 panic("ZONE_PAGECOUNT_BITS is not large enough to hold page counts");
4005 /* Validate struct zone_packed_virtual_address expectations */
4006 static_assert((intptr_t)VM_MIN_KERNEL_ADDRESS
< 0, "the top bit must be 1");
4007 if (VM_KERNEL_POINTER_SIGNIFICANT_BITS
- PAGE_SHIFT
> 31) {
4008 panic("zone_pva_t can't pack a kernel page address in 31 bits");
4011 zpercpu_early_count
= ml_early_cpu_max_number() + 1;
4013 /* Set up zone element poisoning */
4016 random_bool_init(&zone_bool_gen
);
4019 * the KASAN quarantine for kalloc doesn't understand heaps
4020 * and trips the heap confusion panics. At the end of the day,
4021 * all these security measures are double duty with KASAN.
4023 * On 32bit kernels, these protections are just too expensive.
4025 #if !defined(__LP64__) || KASAN_ZALLOC
4026 zsecurity_options
&= ~ZSECURITY_OPTIONS_SEQUESTER
;
4027 zsecurity_options
&= ~ZSECURITY_OPTIONS_SUBMAP_USER_DATA
;
4028 zsecurity_options
&= ~ZSECURITY_OPTIONS_SEQUESTER_KEXT_KALLOC
;
4031 thread_call_setup(&call_async_alloc
, zalloc_async
, NULL
);
4034 /* zcc_enable_for_zone_name=<zone>: enable per-cpu zone caching for <zone>. */
4035 if (PE_parse_boot_arg_str("zcc_enable_for_zone_name", cache_zone_name
, sizeof(cache_zone_name
))) {
4036 printf("zcache: caching enabled for zone %s\n", cache_zone_name
);
4038 #endif /* CONFIG_ZCACHE */
4043 #define ZONE_MAP_VIRTUAL_SIZE_LP64 (32ULL * 1024ULL * 1024 * 1024)
4045 #define ZONE_MAP_VIRTUAL_SIZE_LP64 (128ULL * 1024ULL * 1024 * 1024)
4047 #endif /* __LP64__ */
4049 #define SINGLE_GUARD 16384
4050 #define MULTI_GUARD (3 * SINGLE_GUARD)
4053 static inline vm_offset_t
4054 zone_restricted_va_max(void)
4056 vm_offset_t compressor_max
= VM_PACKING_MAX_PACKABLE(C_SLOT_PACKED_PTR
);
4057 vm_offset_t vm_page_max
= VM_PACKING_MAX_PACKABLE(VM_PAGE_PACKED_PTR
);
4059 return trunc_page(MIN(compressor_max
, vm_page_max
));
4065 zone_tunables_fixup(void)
4067 if (zone_map_jetsam_limit
== 0 || zone_map_jetsam_limit
> 100) {
4068 zone_map_jetsam_limit
= ZONE_MAP_JETSAM_LIMIT_DEFAULT
;
4071 STARTUP(TUNABLES
, STARTUP_RANK_MIDDLE
, zone_tunables_fixup
);
4075 zone_phys_size_max(void)
4077 mach_vm_size_t zsize
;
4080 if (PE_parse_boot_argn("zsize", &zsizearg
, sizeof(zsizearg
))) {
4081 zsize
= zsizearg
* (1024ULL * 1024);
4083 zsize
= sane_size
>> 2; /* Set target zone size as 1/4 of physical memory */
4084 #if defined(__LP64__)
4085 zsize
+= zsize
>> 1;
4086 #endif /* __LP64__ */
4089 if (zsize
< CONFIG_ZONE_MAP_MIN
) {
4090 zsize
= CONFIG_ZONE_MAP_MIN
; /* Clamp to min */
4092 if (zsize
> sane_size
>> 1) {
4093 zsize
= sane_size
>> 1; /* Clamp to half of RAM max */
4095 if (zsizearg
== 0 && zsize
> ZONE_MAP_MAX
) {
4096 /* if zsize boot-arg not present and zsize exceeds platform maximum, clip zsize */
4097 vm_size_t orig_zsize
= zsize
;
4098 zsize
= ZONE_MAP_MAX
;
4099 printf("NOTE: zonemap size reduced from 0x%lx to 0x%lx\n",
4100 (uintptr_t)orig_zsize
, (uintptr_t)zsize
);
4103 assert((vm_size_t
) zsize
== zsize
);
4104 return (vm_size_t
)trunc_page(zsize
);
4108 static struct zone_map_range
4109 zone_init_allocate_va(vm_offset_t
*submap_min
, vm_size_t size
, bool guard
)
4111 struct zone_map_range r
;
4115 vm_map_offset_t addr
= *submap_min
;
4116 vm_map_kernel_flags_t vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4118 vmk_flags
.vmkf_permanent
= TRUE
;
4119 kr
= vm_map_enter(kernel_map
, &addr
, size
, 0,
4120 VM_FLAGS_FIXED
, vmk_flags
, VM_KERN_MEMORY_ZONE
, kernel_object
,
4121 0, FALSE
, VM_PROT_NONE
, VM_PROT_NONE
, VM_INHERIT_DEFAULT
);
4122 *submap_min
= (vm_offset_t
)addr
;
4124 kr
= kernel_memory_allocate(kernel_map
, submap_min
, size
,
4125 0, KMA_KOBJECT
| KMA_PAGEABLE
| KMA_VAONLY
, VM_KERN_MEMORY_ZONE
);
4127 if (kr
!= KERN_SUCCESS
) {
4128 panic("zone_init_allocate_va(0x%lx:0x%zx) failed: %d",
4129 (uintptr_t)*submap_min
, (size_t)size
, kr
);
4132 r
.min_address
= *submap_min
;
4133 *submap_min
+= size
;
4134 r
.max_address
= *submap_min
;
4142 vm_offset_t
*submap_min
,
4144 uint64_t zone_sub_map_numer
,
4145 uint64_t *remaining_denom
,
4146 vm_offset_t
*remaining_size
,
4147 vm_size_t guard_size
)
4149 vm_offset_t submap_start
, submap_end
;
4150 vm_size_t submap_size
;
4154 submap_size
= trunc_page(zone_sub_map_numer
* *remaining_size
/
4156 submap_start
= *submap_min
;
4157 submap_end
= submap_start
+ submap_size
;
4159 #if defined(__LP64__)
4160 if (idx
== Z_SUBMAP_IDX_VA_RESTRICTED_MAP
) {
4161 vm_offset_t restricted_va_max
= zone_restricted_va_max();
4162 if (submap_end
> restricted_va_max
) {
4163 #if DEBUG || DEVELOPMENT
4164 printf("zone_init: submap[%d] clipped to %zdM of %zdM\n", idx
,
4165 (size_t)(restricted_va_max
- submap_start
) >> 20,
4166 (size_t)submap_size
>> 20);
4167 #endif /* DEBUG || DEVELOPMENT */
4168 guard_size
+= submap_end
- restricted_va_max
;
4169 *remaining_size
-= submap_end
- restricted_va_max
;
4170 submap_end
= restricted_va_max
;
4171 submap_size
= restricted_va_max
- submap_start
;
4174 vm_packing_verify_range("vm_compressor",
4175 submap_start
, submap_end
, VM_PACKING_PARAMS(C_SLOT_PACKED_PTR
));
4176 vm_packing_verify_range("vm_page",
4177 submap_start
, submap_end
, VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR
));
4179 #endif /* defined(__LP64__) */
4181 vm_map_kernel_flags_t vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4182 vmk_flags
.vmkf_permanent
= TRUE
;
4183 kr
= kmem_suballoc(kernel_map
, submap_min
, submap_size
,
4184 FALSE
, VM_FLAGS_FIXED
, vmk_flags
,
4185 VM_KERN_MEMORY_ZONE
, &submap
);
4186 if (kr
!= KERN_SUCCESS
) {
4187 panic("kmem_suballoc(kernel_map[%d] %p:%p) failed: %d",
4188 idx
, (void *)submap_start
, (void *)submap_end
, kr
);
4191 #if DEBUG || DEVELOPMENT
4192 printf("zone_init: submap[%d] %p:%p (%zuM)\n",
4193 idx
, (void *)submap_start
, (void *)submap_end
,
4194 (size_t)submap_size
>> 20);
4195 #endif /* DEBUG || DEVELOPMENT */
4197 zone_submaps
[idx
] = submap
;
4198 *submap_min
= submap_end
;
4199 *remaining_size
-= submap_size
;
4200 *remaining_denom
-= zone_sub_map_numer
;
4202 zone_init_allocate_va(submap_min
, guard_size
, true);
4205 /* Global initialization of Zone Allocator.
4206 * Runs after zone_bootstrap.
4212 vm_size_t zone_meta_size
;
4213 vm_size_t zone_map_size
;
4214 vm_size_t remaining_size
;
4215 vm_offset_t submap_min
= 0;
4217 if (ZSECURITY_OPTIONS_SUBMAP_USER_DATA
& zsecurity_options
) {
4218 zone_last_submap_idx
= Z_SUBMAP_IDX_BAG_OF_BYTES_MAP
;
4220 zone_last_submap_idx
= Z_SUBMAP_IDX_GENERAL_MAP
;
4222 zone_phys_mapped_max
= zone_phys_size_max();
4225 zone_map_size
= ZONE_MAP_VIRTUAL_SIZE_LP64
;
4227 zone_map_size
= zone_phys_mapped_max
;
4229 zone_meta_size
= round_page(atop(zone_map_size
) *
4230 sizeof(struct zone_page_metadata
));
4235 * [ VA_RESTRICTED ] <-- LP64 only
4236 * [ SINGLE_GUARD ] <-- LP64 only
4239 * [ map<i> ] \ for each extra map
4242 remaining_size
= zone_map_size
;
4243 #if defined(__LP64__)
4244 remaining_size
-= SINGLE_GUARD
;
4246 remaining_size
-= zone_meta_size
+ SINGLE_GUARD
;
4247 remaining_size
-= MULTI_GUARD
* (zone_last_submap_idx
-
4248 Z_SUBMAP_IDX_GENERAL_MAP
+ 1);
4250 #if VM_MAX_TAG_ZONES
4251 if (zone_tagging_on
) {
4252 zone_tagging_init(zone_map_size
);
4256 uint64_t remaining_denom
= 0;
4257 uint64_t zone_sub_map_numer
[Z_SUBMAP_IDX_COUNT
] = {
4259 [Z_SUBMAP_IDX_VA_RESTRICTED_MAP
] = 20,
4260 #endif /* defined(__LP64__) */
4261 [Z_SUBMAP_IDX_GENERAL_MAP
] = 40,
4262 [Z_SUBMAP_IDX_BAG_OF_BYTES_MAP
] = 40,
4265 for (unsigned idx
= 0; idx
<= zone_last_submap_idx
; idx
++) {
4266 #if DEBUG || DEVELOPMENT
4267 char submap_name
[MAX_SUBMAP_NAME
];
4268 snprintf(submap_name
, MAX_SUBMAP_NAME
, "submap%d", idx
);
4269 PE_parse_boot_argn(submap_name
, &zone_sub_map_numer
[idx
], sizeof(uint64_t));
4271 remaining_denom
+= zone_sub_map_numer
[idx
];
4275 * And now allocate the various pieces of VA and submaps.
4277 * Make a first allocation of contiguous VA, that we'll deallocate,
4278 * and we'll carve-out memory in that range again linearly.
4279 * The kernel is stil single threaded at this stage.
4282 struct zone_map_range
*map_range
= &zone_info
.zi_map_range
;
4284 *map_range
= zone_init_allocate_va(&submap_min
, zone_map_size
, false);
4285 submap_min
= map_range
->min_address
;
4286 kmem_free(kernel_map
, submap_min
, zone_map_size
);
4288 #if defined(__LP64__)
4290 * Allocate `Z_SUBMAP_IDX_VA_RESTRICTED_MAP` first because its VA range
4291 * can't go beyond RESTRICTED_VA_MAX for the vm_page_t packing to work.
4293 zone_submap_init(&submap_min
, Z_SUBMAP_IDX_VA_RESTRICTED_MAP
,
4294 zone_sub_map_numer
[Z_SUBMAP_IDX_VA_RESTRICTED_MAP
], &remaining_denom
,
4295 &remaining_size
, SINGLE_GUARD
);
4296 #endif /* defined(__LP64__) */
4299 * Allocate metadata array
4301 zone_info
.zi_meta_range
=
4302 zone_init_allocate_va(&submap_min
, zone_meta_size
, true);
4303 zone_init_allocate_va(&submap_min
, SINGLE_GUARD
, true);
4305 zone_info
.zi_array_base
=
4306 (struct zone_page_metadata
*)zone_info
.zi_meta_range
.min_address
-
4307 zone_pva_from_addr(map_range
->min_address
).packed_address
;
4310 * Allocate other submaps
4312 for (unsigned idx
= Z_SUBMAP_IDX_GENERAL_MAP
; idx
<= zone_last_submap_idx
; idx
++) {
4313 zone_submap_init(&submap_min
, idx
, zone_sub_map_numer
[idx
],
4314 &remaining_denom
, &remaining_size
, MULTI_GUARD
);
4317 vm_map_t general_map
= zone_submaps
[Z_SUBMAP_IDX_GENERAL_MAP
];
4318 zone_info
.zi_general_range
.min_address
= vm_map_min(general_map
);
4319 zone_info
.zi_general_range
.max_address
= vm_map_max(general_map
);
4321 assert(submap_min
== map_range
->max_address
);
4324 gzalloc_init(zone_map_size
);
4327 zone_create_flags_t kma_flags
= ZC_NOCACHING
|
4328 ZC_NOGC
| ZC_NOENCRYPT
| ZC_NOGZALLOC
| ZC_NOCALLOUT
|
4329 ZC_KASAN_NOQUARANTINE
| ZC_KASAN_NOREDZONE
;
4331 (void)zone_create_ext("vm.permanent", 1, kma_flags
,
4332 ZONE_ID_PERMANENT
, ^(zone_t z
){
4333 z
->permanent
= true;
4335 z
->pcpu_elem_size
= 1;
4336 #if defined(__LP64__)
4337 z
->submap_idx
= Z_SUBMAP_IDX_VA_RESTRICTED_MAP
;
4340 (void)zone_create_ext("vm.permanent.percpu", 1, kma_flags
| ZC_PERCPU
,
4341 ZONE_ID_PERCPU_PERMANENT
, ^(zone_t z
){
4342 z
->permanent
= true;
4344 z
->pcpu_elem_size
= zpercpu_count();
4345 #if defined(__LP64__)
4346 z
->submap_idx
= Z_SUBMAP_IDX_VA_RESTRICTED_MAP
;
4351 * Now fix the zones that are missing their zone stats
4352 * we don't really know if zfree()s happened so our stats
4353 * are slightly off for early boot. ¯\_(ツ)_/¯
4355 zone_index_foreach(idx
) {
4356 zone_t tz
= &zone_array
[idx
];
4359 zone_stats_t zs
= zalloc_percpu_permanent_type(struct zone_stats
);
4361 zpercpu_get_cpu(zs
, 0)->zs_mem_allocated
+=
4362 (tz
->countavail
- tz
->countfree
) *
4364 assert(tz
->z_stats
== NULL
);
4366 #if ZONE_ENABLE_LOGGING
4367 if (tz
->zone_logging
&& !tz
->zlog_btlog
) {
4368 zone_enable_logging(tz
);
4376 * Initialize the zone leak monitor
4378 zleak_init(zone_map_size
);
4379 #endif /* CONFIG_ZLEAKS */
4381 #if VM_MAX_TAG_ZONES
4382 if (zone_tagging_on
) {
4383 vm_allocation_zones_init();
4387 STARTUP(ZALLOC
, STARTUP_RANK_FIRST
, zone_init
);
4391 zone_set_foreign_range(
4392 vm_offset_t range_min
,
4393 vm_offset_t range_max
)
4395 zone_info
.zi_foreign_range
.min_address
= range_min
;
4396 zone_info
.zi_foreign_range
.max_address
= range_max
;
4401 zone_foreign_mem_init(vm_size_t size
)
4403 vm_offset_t mem
= (vm_offset_t
) pmap_steal_memory(size
);
4404 zone_set_foreign_range(mem
, mem
+ size
);
4412 * Called from zfree() to add the element being freed to the KASan quarantine.
4414 * Returns true if the newly-freed element made it into the quarantine without
4415 * displacing another, false otherwise. In the latter case, addrp points to the
4416 * address of the displaced element, which will be freed by the zone.
4419 kasan_quarantine_freed_element(
4420 zone_t
*zonep
, /* the zone the element is being freed to */
4421 void **addrp
) /* address of the element being freed */
4423 zone_t zone
= *zonep
;
4424 void *addr
= *addrp
;
4427 * Resize back to the real allocation size and hand off to the KASan
4428 * quarantine. `addr` may then point to a different allocation, if the
4429 * current element replaced another in the quarantine. The zone then
4430 * takes ownership of the swapped out free element.
4432 vm_size_t usersz
= zone_elem_size(zone
) - 2 * zone
->kasan_redzone
;
4433 vm_size_t sz
= usersz
;
4435 if (addr
&& zone
->kasan_redzone
) {
4436 kasan_check_free((vm_address_t
)addr
, usersz
, KASAN_HEAP_ZALLOC
);
4437 addr
= (void *)kasan_dealloc((vm_address_t
)addr
, &sz
);
4438 assert(sz
== zone_elem_size(zone
));
4440 if (addr
&& !zone
->kasan_noquarantine
) {
4441 kasan_free(&addr
, &sz
, KASAN_HEAP_ZALLOC
, zonep
, usersz
, true);
4446 if (addr
&& zone
->kasan_noquarantine
) {
4447 kasan_unpoison(addr
, zone_elem_size(zone
));
4453 #endif /* KASAN_ZALLOC */
4456 zone_needs_async_refill(zone_t zone
)
4458 if (zone
->countfree
!= 0 || zone
->async_pending
|| zone
->no_callout
) {
4462 return zone
->expandable
|| zone
->page_count
< zone
->page_count_max
;
4465 __attribute__((noinline
))
4467 zone_refill_synchronously_locked(
4469 zalloc_flags_t flags
)
4471 thread_t thr
= current_thread();
4472 bool set_expanding_vm_priv
= false;
4473 zone_pva_t orig
= zone
->pages_intermediate
;
4475 while ((flags
& Z_NOWAIT
) == 0 && (zone
->permanent
4476 ? zone_pva_is_equal(zone
->pages_intermediate
, orig
)
4477 : zone
->countfree
== 0)) {
4479 * zone is empty, try to expand it
4481 * Note that we now allow up to 2 threads (1 vm_privliged and
4482 * 1 non-vm_privliged) to expand the zone concurrently...
4484 * this is necessary to avoid stalling vm_privileged threads
4485 * running critical code necessary to continue
4486 * compressing/swapping pages (i.e. making new free pages) from
4487 * stalling behind non-vm_privileged threads waiting to acquire
4488 * free pages when the vm_page_free_count is below the
4489 * vm_page_free_reserved limit.
4491 if ((zone
->expanding_no_vm_priv
|| zone
->expanding_vm_priv
) &&
4492 (((thr
->options
& TH_OPT_VMPRIV
) == 0) || zone
->expanding_vm_priv
)) {
4494 * This is a non-vm_privileged thread and a non-vm_privileged or
4495 * a vm_privileged thread is already expanding the zone...
4497 * this is a vm_privileged thread and a vm_privileged thread is
4498 * already expanding the zone...
4500 * In either case wait for a thread to finish, then try again.
4502 zone
->waiting
= true;
4503 assert_wait(zone
, THREAD_UNINT
);
4505 thread_block(THREAD_CONTINUE_NULL
);
4510 if (zone
->page_count
>= zone
->page_count_max
) {
4511 if (zone
->exhaustible
) {
4514 if (zone
->expandable
) {
4516 * If we're expandable, just don't go through this again.
4518 zone
->page_count_max
= ~0u;
4522 panic_include_zprint
= true;
4524 if (zleak_state
& ZLEAK_STATE_ACTIVE
) {
4525 panic_include_ztrace
= true;
4527 #endif /* CONFIG_ZLEAKS */
4528 panic("zalloc: zone \"%s\" empty.", zone
->z_name
);
4533 * It is possible that a BG thread is refilling/expanding the zone
4534 * and gets pre-empted during that operation. That blocks all other
4535 * threads from making progress leading to a watchdog timeout. To
4536 * avoid that, boost the thread priority using the rwlock boost
4538 set_thread_rwlock_boost();
4540 if ((thr
->options
& TH_OPT_VMPRIV
)) {
4541 zone
->expanding_vm_priv
= true;
4542 set_expanding_vm_priv
= true;
4544 zone
->expanding_no_vm_priv
= true;
4547 zone_replenish_locked(zone
, flags
, false);
4549 if (set_expanding_vm_priv
== true) {
4550 zone
->expanding_vm_priv
= false;
4552 zone
->expanding_no_vm_priv
= false;
4555 if (zone
->waiting
) {
4556 zone
->waiting
= false;
4557 thread_wakeup(zone
);
4559 clear_thread_rwlock_boost();
4561 if (zone
->countfree
== 0) {
4562 assert(flags
& Z_NOPAGEWAIT
);
4567 if ((flags
& (Z_NOWAIT
| Z_NOPAGEWAIT
)) &&
4568 zone_needs_async_refill(zone
) && !vm_pool_low()) {
4569 zone
->async_pending
= true;
4571 thread_call_enter(&call_async_alloc
);
4573 assert(zone
->z_self
== zone
);
4577 __attribute__((noinline
))
4579 zone_refill_asynchronously_locked(zone_t zone
)
4581 uint32_t min_free
= zone
->prio_refill_count
/ 2;
4582 uint32_t resv_free
= zone
->prio_refill_count
/ 4;
4583 thread_t thr
= current_thread();
4586 * Nothing to do if there are plenty of elements.
4588 while (zone
->countfree
<= min_free
) {
4590 * Wakeup the replenish thread if not running.
4592 if (!zone
->zone_replenishing
) {
4593 lck_spin_lock(&zone_replenish_lock
);
4594 assert(zone_replenish_active
< zone_replenish_max_threads
);
4595 ++zone_replenish_active
;
4596 lck_spin_unlock(&zone_replenish_lock
);
4597 zone
->zone_replenishing
= true;
4598 zone_replenish_wakeups_initiated
++;
4599 thread_wakeup(&zone
->prio_refill_count
);
4603 * We'll let VM_PRIV threads to continue to allocate until the
4604 * reserve drops to 25%. After that only TH_OPT_ZONE_PRIV threads
4607 * TH_OPT_ZONE_PRIV threads are the GC thread and a replenish thread itself.
4608 * Replenish threads *need* to use the reserve. GC threads need to
4609 * get through the current allocation, but then will wait at a higher
4610 * level after they've dropped any locks which would deadlock the
4613 if ((zone
->countfree
> resv_free
&& (thr
->options
& TH_OPT_VMPRIV
)) ||
4614 (thr
->options
& TH_OPT_ZONE_PRIV
)) {
4619 * Wait for the replenish threads to add more elements for us to allocate from.
4621 zone_replenish_throttle_count
++;
4623 assert_wait_timeout(zone
, THREAD_UNINT
, 1, NSEC_PER_MSEC
);
4624 thread_block(THREAD_CONTINUE_NULL
);
4627 assert(zone
->z_self
== zone
);
4631 * If we're here because of zone_gc(), we didn't wait for
4632 * zone_replenish_thread to finish. So we need to ensure that
4633 * we will successfully grab an element.
4635 * zones that have a replenish thread configured.
4636 * The value of (refill_level / 2) in the previous bit of code should have
4637 * given us headroom even though this thread didn't wait.
4639 if (thr
->options
& TH_OPT_ZONE_PRIV
) {
4640 assert(zone
->countfree
!= 0);
4644 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
4645 __attribute__((noinline
))
4647 zalloc_log_or_trace_leaks(zone_t zone
, vm_offset_t addr
)
4649 uintptr_t zbt
[MAX_ZTRACE_DEPTH
]; /* used in zone leak logging and zone leak detection */
4650 unsigned int numsaved
= 0;
4652 #if ZONE_ENABLE_LOGGING
4653 if (DO_LOGGING(zone
)) {
4654 numsaved
= backtrace(zbt
, MAX_ZTRACE_DEPTH
, NULL
);
4655 btlog_add_entry(zone
->zlog_btlog
, (void *)addr
,
4656 ZOP_ALLOC
, (void **)zbt
, numsaved
);
4662 * Zone leak detection: capture a backtrace every zleak_sample_factor
4663 * allocations in this zone.
4665 if (__improbable(zone
->zleak_on
)) {
4666 if (sample_counter(&zone
->zleak_capture
, zleak_sample_factor
)) {
4667 /* Avoid backtracing twice if zone logging is on */
4668 if (numsaved
== 0) {
4669 numsaved
= backtrace(zbt
, MAX_ZTRACE_DEPTH
, NULL
);
4671 /* Sampling can fail if another sample is happening at the same time in a different zone. */
4672 if (!zleak_log(zbt
, addr
, numsaved
, zone_elem_size(zone
))) {
4673 /* If it failed, roll back the counter so we sample the next allocation instead. */
4674 zone
->zleak_capture
= zleak_sample_factor
;
4679 if (__improbable(zone_leaks_scan_enable
&&
4680 !(zone_elem_size(zone
) & (sizeof(uintptr_t) - 1)))) {
4681 unsigned int count
, idx
;
4682 /* Fill element, from tail, with backtrace in reverse order */
4683 if (numsaved
== 0) {
4684 numsaved
= backtrace(zbt
, MAX_ZTRACE_DEPTH
, NULL
);
4686 count
= (unsigned int)(zone_elem_size(zone
) / sizeof(uintptr_t));
4687 if (count
>= numsaved
) {
4688 count
= numsaved
- 1;
4690 for (idx
= 0; idx
< count
; idx
++) {
4691 ((uintptr_t *)addr
)[count
- 1 - idx
] = zbt
[idx
+ 1];
4694 #endif /* CONFIG_ZLEAKS */
4698 zalloc_should_log_or_trace_leaks(zone_t zone
, vm_size_t elem_size
)
4700 #if ZONE_ENABLE_LOGGING
4701 if (DO_LOGGING(zone
)) {
4707 * Zone leak detection: capture a backtrace every zleak_sample_factor
4708 * allocations in this zone.
4710 if (zone
->zleak_on
) {
4713 if (zone_leaks_scan_enable
&& !(elem_size
& (sizeof(uintptr_t) - 1))) {
4716 #endif /* CONFIG_ZLEAKS */
4719 #endif /* ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS */
4720 #if ZONE_ENABLE_LOGGING
4722 __attribute__((noinline
))
4724 zfree_log_trace(zone_t zone
, vm_offset_t addr
)
4727 * See if we're doing logging on this zone.
4729 * There are two styles of logging used depending on
4730 * whether we're trying to catch a leak or corruption.
4732 if (__improbable(DO_LOGGING(zone
))) {
4733 if (corruption_debug_flag
) {
4734 uintptr_t zbt
[MAX_ZTRACE_DEPTH
];
4735 unsigned int numsaved
;
4737 * We're logging to catch a corruption.
4739 * Add a record of this zfree operation to log.
4741 numsaved
= backtrace(zbt
, MAX_ZTRACE_DEPTH
, NULL
);
4742 btlog_add_entry(zone
->zlog_btlog
, (void *)addr
, ZOP_FREE
,
4743 (void **)zbt
, numsaved
);
4746 * We're logging to catch a leak.
4748 * Remove any record we might have for this element
4749 * since it's being freed. Note that we may not find it
4750 * if the buffer overflowed and that's OK.
4752 * Since the log is of a limited size, old records get
4753 * overwritten if there are more zallocs than zfrees.
4755 btlog_remove_entries_for_element(zone
->zlog_btlog
, (void *)addr
);
4759 #endif /* ZONE_ENABLE_LOGGING */
4762 * Removes an element from the zone's free list, returning 0 if the free list is empty.
4763 * Verifies that the next-pointer and backup next-pointer are intact,
4764 * and verifies that a poisoned element hasn't been modified.
4767 zalloc_direct_locked(
4769 zalloc_flags_t flags __unused
,
4770 vm_size_t waste __unused
)
4772 struct zone_page_metadata
*page_meta
;
4773 zone_addr_kind_t kind
= ZONE_ADDR_NATIVE
;
4774 vm_offset_t element
, page
, validate_bit
= 0;
4776 /* if zone is empty, bail */
4777 if (!zone_pva_is_null(zone
->pages_any_free_foreign
)) {
4778 kind
= ZONE_ADDR_FOREIGN
;
4779 page_meta
= zone_pva_to_meta(zone
->pages_any_free_foreign
, kind
);
4780 page
= (vm_offset_t
)page_meta
;
4781 } else if (!zone_pva_is_null(zone
->pages_intermediate
)) {
4782 page_meta
= zone_pva_to_meta(zone
->pages_intermediate
, kind
);
4783 page
= zone_pva_to_addr(zone
->pages_intermediate
);
4784 } else if (!zone_pva_is_null(zone
->pages_all_free
)) {
4785 page_meta
= zone_pva_to_meta(zone
->pages_all_free
, kind
);
4786 page
= zone_pva_to_addr(zone
->pages_all_free
);
4787 if (os_sub_overflow(zone
->allfree_page_count
,
4788 page_meta
->zm_page_count
, &zone
->allfree_page_count
)) {
4789 zone_accounting_panic(zone
, "allfree_page_count wrap-around");
4792 zone_accounting_panic(zone
, "countfree corruption");
4795 if (!zone_has_index(zone
, page_meta
->zm_index
)) {
4796 zone_page_metadata_index_confusion_panic(zone
, page
, page_meta
);
4799 element
= zone_page_meta_get_freelist(zone
, page_meta
, page
);
4801 vm_offset_t
*primary
= (vm_offset_t
*) element
;
4802 vm_offset_t
*backup
= get_backup_ptr(zone_elem_size(zone
), primary
);
4805 * since the primary next pointer is xor'ed with zp_nopoison_cookie
4806 * for obfuscation, retrieve the original value back
4808 vm_offset_t next_element
= *primary
^ zp_nopoison_cookie
;
4809 vm_offset_t next_element_primary
= *primary
;
4810 vm_offset_t next_element_backup
= *backup
;
4813 * backup_ptr_mismatch_panic will determine what next_element
4814 * should have been, and print it appropriately
4816 if (!zone_page_meta_is_sane_element(zone
, page_meta
, page
, next_element
, kind
)) {
4817 backup_ptr_mismatch_panic(zone
, page_meta
, page
, element
);
4820 /* Check the backup pointer for the regular cookie */
4821 if (__improbable(next_element_primary
!= next_element_backup
)) {
4822 /* Check for the poisoned cookie instead */
4823 if (__improbable(next_element
!= (next_element_backup
^ zp_poisoned_cookie
))) {
4824 /* Neither cookie is valid, corruption has occurred */
4825 backup_ptr_mismatch_panic(zone
, page_meta
, page
, element
);
4829 * Element was marked as poisoned, so check its integrity before using it.
4831 validate_bit
= ZALLOC_ELEMENT_NEEDS_VALIDATION
;
4832 } else if (zone
->zfree_clear_mem
) {
4833 validate_bit
= ZALLOC_ELEMENT_NEEDS_VALIDATION
;
4836 /* Remove this element from the free list */
4837 zone_page_meta_set_freelist(page_meta
, page
, next_element
);
4839 if (kind
== ZONE_ADDR_FOREIGN
) {
4840 if (next_element
== 0) {
4841 /* last foreign element allocated on page, move to all_used_foreign */
4842 zone_meta_requeue(zone
, &zone
->pages_all_used_foreign
, page_meta
, kind
);
4844 } else if (next_element
== 0) {
4845 zone_meta_requeue(zone
, &zone
->pages_all_used
, page_meta
, kind
);
4846 } else if (page_meta
->zm_alloc_count
== 0) {
4847 /* remove from free, move to intermediate */
4848 zone_meta_requeue(zone
, &zone
->pages_intermediate
, page_meta
, kind
);
4851 if (os_add_overflow(page_meta
->zm_alloc_count
, 1,
4852 &page_meta
->zm_alloc_count
)) {
4854 * This will not catch a lot of errors, the proper check
4855 * would be against the number of elements this run should
4856 * have which is expensive to count.
4858 * But zm_alloc_count is a 16 bit number which could
4859 * theoretically be valuable to cause to wrap around,
4862 zone_page_meta_accounting_panic(zone
, page_meta
,
4863 "zm_alloc_count overflow");
4865 if (os_sub_overflow(zone
->countfree
, 1, &zone
->countfree
)) {
4866 zone_accounting_panic(zone
, "countfree wrap-around");
4869 #if VM_MAX_TAG_ZONES
4870 if (__improbable(zone
->tags
)) {
4871 vm_tag_t tag
= zalloc_flags_get_tag(flags
);
4872 // set the tag with b0 clear so the block remains inuse
4873 ZTAG(zone
, element
)[0] = (vm_tag_t
)(tag
<< 1);
4874 vm_tag_update_zone_size(tag
, zone
->tag_zone_index
,
4875 zone_elem_size(zone
), waste
);
4877 #endif /* VM_MAX_TAG_ZONES */
4880 zpercpu_foreach_cpu(i
) {
4881 kasan_poison_range(element
+ ptoa(i
),
4882 zone_elem_size(zone
), ASAN_VALID
);
4885 kasan_poison_range(element
, zone_elem_size(zone
), ASAN_VALID
);
4889 return element
| validate_bit
;
4893 * zalloc returns an element from the specified zone.
4898 zone_stats_t zstats
,
4899 zalloc_flags_t flags
,
4902 vm_offset_t addr
= 0;
4903 vm_size_t elem_size
= zone_elem_size(zone
);
4906 * KASan uses zalloc() for fakestack, which can be called anywhere.
4907 * However, we make sure these calls can never block.
4909 assert(zone
->kasan_fakestacks
||
4910 ml_get_interrupts_enabled() ||
4911 ml_is_quiescing() ||
4912 debug_mode_active() ||
4913 startup_phase
< STARTUP_SUB_EARLY_BOOT
);
4916 * Make sure Z_NOFAIL was not obviously misused
4918 if ((flags
& Z_NOFAIL
) && !zone
->prio_refill_count
) {
4919 assert(!zone
->exhaustible
&& (flags
& (Z_NOWAIT
| Z_NOPAGEWAIT
)) == 0);
4924 * Note: if zone caching is on, gzalloc and tags aren't used
4925 * so we can always check this first
4927 if (zone_caching_enabled(zone
)) {
4928 addr
= zcache_alloc_from_cpu_cache(zone
, zstats
, waste
);
4929 if (__probable(addr
)) {
4930 goto allocated_from_cache
;
4933 #endif /* CONFIG_ZCACHE */
4936 if (__improbable(zone
->gzalloc_tracked
)) {
4937 addr
= gzalloc_alloc(zone
, zstats
, flags
);
4938 goto allocated_from_gzalloc
;
4940 #endif /* CONFIG_GZALLOC */
4941 #if VM_MAX_TAG_ZONES
4942 if (__improbable(zone
->tags
)) {
4943 vm_tag_t tag
= zalloc_flags_get_tag(flags
);
4944 if (tag
== VM_KERN_MEMORY_NONE
) {
4946 * zone views into heaps can lead to a site-less call
4947 * and we fallback to KALLOC as a tag for those.
4949 tag
= VM_KERN_MEMORY_KALLOC
;
4950 flags
|= Z_VM_TAG(tag
);
4952 vm_tag_will_update_zone(tag
, zone
->tag_zone_index
);
4954 #endif /* VM_MAX_TAG_ZONES */
4957 assert(zone
->z_self
== zone
);
4960 * Check if we need another thread to replenish the zone or
4961 * if we have to wait for a replenish thread to finish.
4962 * This is used for elements, like vm_map_entry, which are
4963 * needed themselves to implement zalloc().
4965 if (__improbable(zone
->prio_refill_count
&&
4966 zone
->countfree
<= zone
->prio_refill_count
/ 2)) {
4967 zone_refill_asynchronously_locked(zone
);
4968 } else if (__improbable(zone
->countfree
== 0)) {
4969 zone_refill_synchronously_locked(zone
, flags
);
4970 if (__improbable(zone
->countfree
== 0)) {
4972 if (__improbable(flags
& Z_NOFAIL
)) {
4973 zone_nofail_panic(zone
);
4979 addr
= zalloc_direct_locked(zone
, flags
, waste
);
4980 if (__probable(zstats
!= NULL
)) {
4982 * The few vm zones used before zone_init() runs do not have
4985 int cpu
= cpu_number();
4986 zpercpu_get_cpu(zstats
, cpu
)->zs_mem_allocated
+= elem_size
;
4987 #if ZALLOC_DETAILED_STATS
4989 zpercpu_get_cpu(zstats
, cpu
)->zs_mem_wasted
+= waste
;
4991 #endif /* ZALLOC_DETAILED_STATS */
4996 #if ZALLOC_ENABLE_POISONING
4997 bool validate
= addr
& ZALLOC_ELEMENT_NEEDS_VALIDATION
;
4999 addr
&= ~ZALLOC_ELEMENT_NEEDS_VALIDATION
;
5000 zone_clear_freelist_pointers(zone
, addr
);
5001 #if ZALLOC_ENABLE_POISONING
5003 * Note: percpu zones do not respect ZONE_MIN_ELEM_SIZE,
5004 * so we will check the first word even if we just
5007 zalloc_validate_element(zone
, addr
, elem_size
- sizeof(vm_offset_t
),
5009 #endif /* ZALLOC_ENABLE_POISONING */
5011 allocated_from_cache
:
5012 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
5013 if (__improbable(zalloc_should_log_or_trace_leaks(zone
, elem_size
))) {
5014 zalloc_log_or_trace_leaks(zone
, addr
);
5016 #endif /* ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS */
5019 allocated_from_gzalloc
:
5022 if (zone
->kasan_redzone
) {
5023 addr
= kasan_alloc(addr
, elem_size
,
5024 elem_size
- 2 * zone
->kasan_redzone
, zone
->kasan_redzone
);
5025 elem_size
-= 2 * zone
->kasan_redzone
;
5028 * Initialize buffer with unique pattern only if memory
5029 * wasn't expected to be zeroed.
5031 if (!zone
->zfree_clear_mem
&& !(flags
& Z_ZERO
)) {
5032 kasan_leak_init(addr
, elem_size
);
5034 #endif /* KASAN_ZALLOC */
5035 if ((flags
& Z_ZERO
) && !zone
->zfree_clear_mem
) {
5036 bzero((void *)addr
, elem_size
);
5039 TRACE_MACHLEAKS(ZALLOC_CODE
, ZALLOC_CODE_2
, elem_size
, addr
);
5042 DTRACE_VM2(zalloc
, zone_t
, zone
, void*, addr
);
5043 return (void *)addr
;
5047 zalloc(union zone_or_view zov
)
5049 return zalloc_flags(zov
, Z_WAITOK
);
5053 zalloc_noblock(union zone_or_view zov
)
5055 return zalloc_flags(zov
, Z_NOWAIT
);
5059 zalloc_flags(union zone_or_view zov
, zalloc_flags_t flags
)
5061 zone_t zone
= zov
.zov_view
->zv_zone
;
5062 zone_stats_t zstats
= zov
.zov_view
->zv_stats
;
5063 assert(!zone
->percpu
);
5064 return zalloc_ext(zone
, zstats
, flags
, 0);
5068 zalloc_percpu(union zone_or_view zov
, zalloc_flags_t flags
)
5070 zone_t zone
= zov
.zov_view
->zv_zone
;
5071 zone_stats_t zstats
= zov
.zov_view
->zv_stats
;
5072 assert(zone
->percpu
);
5073 return (void *)__zpcpu_mangle(zalloc_ext(zone
, zstats
, flags
, 0));
5077 _zalloc_permanent(zone_t zone
, vm_size_t size
, vm_offset_t mask
)
5079 const zone_addr_kind_t kind
= ZONE_ADDR_NATIVE
;
5080 struct zone_page_metadata
*page_meta
;
5081 vm_offset_t offs
, addr
;
5084 assert(ml_get_interrupts_enabled() ||
5085 ml_is_quiescing() ||
5086 debug_mode_active() ||
5087 startup_phase
< STARTUP_SUB_EARLY_BOOT
);
5089 size
= (size
+ mask
) & ~mask
;
5090 assert(size
<= PAGE_SIZE
);
5093 assert(zone
->z_self
== zone
);
5096 pva
= zone
->pages_intermediate
;
5097 while (!zone_pva_is_null(pva
)) {
5098 page_meta
= zone_pva_to_meta(pva
, kind
);
5099 if (page_meta
->zm_freelist_offs
+ size
<= PAGE_SIZE
) {
5102 pva
= page_meta
->zm_page_next
;
5105 zone_refill_synchronously_locked(zone
, Z_WAITOK
);
5109 offs
= (page_meta
->zm_freelist_offs
+ mask
) & ~mask
;
5110 page_meta
->zm_freelist_offs
= offs
+ size
;
5111 page_meta
->zm_alloc_count
+= size
;
5112 zone
->countfree
-= size
;
5113 if (__probable(zone
->z_stats
)) {
5114 zpercpu_get(zone
->z_stats
)->zs_mem_allocated
+= size
;
5117 if (page_meta
->zm_alloc_count
>= PAGE_SIZE
- sizeof(vm_offset_t
)) {
5118 zone_meta_requeue(zone
, &zone
->pages_all_used
, page_meta
, kind
);
5123 addr
= offs
+ zone_pva_to_addr(pva
);
5125 DTRACE_VM2(zalloc
, zone_t
, zone
, void*, addr
);
5126 return (void *)addr
;
5130 _zalloc_permanent_large(size_t size
, vm_offset_t mask
)
5135 kr
= kernel_memory_allocate(kernel_map
, &addr
, size
, mask
,
5136 KMA_KOBJECT
| KMA_PERMANENT
| KMA_ZERO
,
5137 VM_KERN_MEMORY_KALLOC
);
5139 panic("zalloc_permanent: unable to allocate %zd bytes (%d)",
5142 return (void *)addr
;
5146 zalloc_permanent(vm_size_t size
, vm_offset_t mask
)
5148 if (size
<= PAGE_SIZE
) {
5149 zone_t zone
= &zone_array
[ZONE_ID_PERMANENT
];
5150 return _zalloc_permanent(zone
, size
, mask
);
5152 return _zalloc_permanent_large(size
, mask
);
5156 zalloc_percpu_permanent(vm_size_t size
, vm_offset_t mask
)
5158 zone_t zone
= &zone_array
[ZONE_ID_PERCPU_PERMANENT
];
5159 return (void *)__zpcpu_mangle(_zalloc_permanent(zone
, size
, mask
));
5163 zalloc_async(__unused thread_call_param_t p0
, __unused thread_call_param_t p1
)
5165 zone_index_foreach(i
) {
5166 zone_t z
= &zone_array
[i
];
5168 if (z
->no_callout
) {
5169 /* async_pending will never be set */
5174 if (z
->z_self
&& z
->async_pending
) {
5175 z
->async_pending
= false;
5176 zone_refill_synchronously_locked(z
, Z_WAITOK
);
5183 * Adds the element to the head of the zone's free list
5184 * Keeps a backup next-pointer at the end of the element
5187 zfree_direct_locked(zone_t zone
, vm_offset_t element
, bool poison
)
5189 struct zone_page_metadata
*page_meta
;
5190 vm_offset_t page
, old_head
;
5191 zone_addr_kind_t kind
;
5192 vm_size_t elem_size
= zone_elem_size(zone
);
5194 vm_offset_t
*primary
= (vm_offset_t
*) element
;
5195 vm_offset_t
*backup
= get_backup_ptr(elem_size
, primary
);
5197 page_meta
= zone_allocated_element_resolve(zone
, element
, &page
, &kind
);
5198 old_head
= zone_page_meta_get_freelist(zone
, page_meta
, page
);
5200 if (__improbable(old_head
== element
)) {
5201 panic("zfree: double free of %p to zone %s%s\n",
5202 (void *) element
, zone_heap_name(zone
), zone
->z_name
);
5205 #if ZALLOC_ENABLE_POISONING
5206 if (poison
&& elem_size
< ZONE_MIN_ELEM_SIZE
) {
5207 assert(zone
->percpu
);
5215 * Always write a redundant next pointer
5216 * So that it is more difficult to forge, xor it with a random cookie
5217 * A poisoned element is indicated by using zp_poisoned_cookie
5218 * instead of zp_nopoison_cookie
5221 *backup
= old_head
^ (poison
? zp_poisoned_cookie
: zp_nopoison_cookie
);
5224 * Insert this element at the head of the free list. We also xor the
5225 * primary pointer with the zp_nopoison_cookie to make sure a free
5226 * element does not provide the location of the next free element directly.
5228 *primary
= old_head
^ zp_nopoison_cookie
;
5230 #if VM_MAX_TAG_ZONES
5231 if (__improbable(zone
->tags
)) {
5232 vm_tag_t tag
= (ZTAG(zone
, element
)[0] >> 1);
5233 // set the tag with b0 clear so the block remains inuse
5234 ZTAG(zone
, element
)[0] = 0xFFFE;
5235 vm_tag_update_zone_size(tag
, zone
->tag_zone_index
,
5236 -((int64_t)elem_size
), 0);
5238 #endif /* VM_MAX_TAG_ZONES */
5240 zone_page_meta_set_freelist(page_meta
, page
, element
);
5241 if (os_sub_overflow(page_meta
->zm_alloc_count
, 1,
5242 &page_meta
->zm_alloc_count
)) {
5243 zone_page_meta_accounting_panic(zone
, page_meta
,
5244 "alloc_count wrap-around");
5248 if (kind
== ZONE_ADDR_FOREIGN
) {
5249 if (old_head
== 0) {
5250 /* first foreign element freed on page, move from all_used_foreign */
5251 zone_meta_requeue(zone
, &zone
->pages_any_free_foreign
, page_meta
, kind
);
5253 } else if (page_meta
->zm_alloc_count
== 0) {
5254 /* whether the page was on the intermediate or all_used, queue, move it to free */
5255 zone_meta_requeue(zone
, &zone
->pages_all_free
, page_meta
, kind
);
5256 zone
->allfree_page_count
+= page_meta
->zm_page_count
;
5257 } else if (old_head
== 0) {
5258 /* first free element on page, move from all_used */
5259 zone_meta_requeue(zone
, &zone
->pages_intermediate
, page_meta
, kind
);
5264 zpercpu_foreach_cpu(i
) {
5265 kasan_poison_range(element
+ ptoa(i
), elem_size
,
5269 kasan_poison_range(element
, elem_size
, ASAN_HEAP_FREED
);
5275 zfree_ext(zone_t zone
, zone_stats_t zstats
, void *addr
)
5277 vm_offset_t elem
= (vm_offset_t
)addr
;
5278 vm_size_t elem_size
= zone_elem_size(zone
);
5279 bool poison
= false;
5281 DTRACE_VM2(zfree
, zone_t
, zone
, void*, addr
);
5282 TRACE_MACHLEAKS(ZFREE_CODE
, ZFREE_CODE_2
, elem_size
, elem
);
5285 if (kasan_quarantine_freed_element(&zone
, &addr
)) {
5289 * kasan_quarantine_freed_element() might return a different
5290 * {zone, addr} than the one being freed for kalloc heaps.
5292 * Make sure we reload everything.
5294 elem
= (vm_offset_t
)addr
;
5295 elem_size
= zone_elem_size(zone
);
5300 * Zone leak detection: un-track the allocation
5302 if (__improbable(zone
->zleak_on
)) {
5303 zleak_free(elem
, elem_size
);
5305 #endif /* CONFIG_ZLEAKS */
5309 * Note: if zone caching is on, gzalloc and tags aren't used
5310 * so we can always check this first
5312 if (zone_caching_enabled(zone
)) {
5313 return zcache_free_to_cpu_cache(zone
, zstats
, (vm_offset_t
)addr
);
5315 #endif /* CONFIG_ZCACHE */
5318 if (__improbable(zone
->gzalloc_tracked
)) {
5319 return gzalloc_free(zone
, zstats
, addr
);
5321 #endif /* CONFIG_GZALLOC */
5323 #if ZONE_ENABLE_LOGGING
5324 if (__improbable(DO_LOGGING(zone
))) {
5325 zfree_log_trace(zone
, elem
);
5327 #endif /* ZONE_ENABLE_LOGGING */
5329 if (zone
->zfree_clear_mem
) {
5330 poison
= zfree_clear(zone
, elem
, elem_size
);
5334 assert(zone
->z_self
== zone
);
5337 poison
= zfree_poison_element(zone
, &zone
->zp_count
, elem
);
5340 if (__probable(zstats
!= NULL
)) {
5342 * The few vm zones used before zone_init() runs do not have
5345 zpercpu_get(zstats
)->zs_mem_freed
+= elem_size
;
5348 zfree_direct_locked(zone
, elem
, poison
);
5354 (zfree
)(union zone_or_view zov
, void *addr
)
5356 zone_t zone
= zov
.zov_view
->zv_zone
;
5357 zone_stats_t zstats
= zov
.zov_view
->zv_stats
;
5358 assert(!zone
->percpu
);
5359 zfree_ext(zone
, zstats
, addr
);
5363 zfree_percpu(union zone_or_view zov
, void *addr
)
5365 zone_t zone
= zov
.zov_view
->zv_zone
;
5366 zone_stats_t zstats
= zov
.zov_view
->zv_stats
;
5367 assert(zone
->percpu
);
5368 zfree_ext(zone
, zstats
, (void *)__zpcpu_demangle(addr
));
5371 #pragma mark vm integration, MIG routines
5374 * Drops (i.e. frees) the elements in the all free pages queue of a zone.
5375 * Called by zone_gc() on each zone and when a zone is zdestroy()ed.
5378 zone_drop_free_elements(zone_t z
)
5380 const zone_addr_kind_t kind
= ZONE_ADDR_NATIVE
;
5381 unsigned int total_freed_pages
= 0;
5382 struct zone_page_metadata
*page_meta
, *seq_meta
;
5383 vm_address_t page_addr
;
5384 vm_size_t size_to_free
;
5385 vm_size_t free_count
;
5386 uint32_t page_count
;
5388 current_thread()->options
|= TH_OPT_ZONE_PRIV
;
5391 while (!zone_pva_is_null(z
->pages_all_free
)) {
5393 * If any replenishment threads are running, defer to them,
5394 * so that we don't deplete reserved zones.
5396 * The timing of the check isn't super important, as there are
5397 * enough reserves to allow freeing an extra page_meta.
5399 * Hence, we can check without grabbing the lock every time
5400 * through the loop. We do need the lock however to avoid
5401 * missing a wakeup when we decide to block.
5403 if (zone_replenish_active
> 0) {
5404 lck_spin_lock(&zone_replenish_lock
);
5405 if (zone_replenish_active
> 0) {
5406 assert_wait(&zone_replenish_active
, THREAD_UNINT
);
5407 lck_spin_unlock(&zone_replenish_lock
);
5409 thread_block(THREAD_CONTINUE_NULL
);
5413 lck_spin_unlock(&zone_replenish_lock
);
5416 page_meta
= zone_pva_to_meta(z
->pages_all_free
, kind
);
5417 page_count
= page_meta
->zm_page_count
;
5418 free_count
= zone_elem_count(z
, ptoa(page_count
), kind
);
5421 * Don't drain zones with async refill to below the refill
5422 * threshold, as they need some reserve to function properly.
5424 if (!z
->destroyed
&& z
->prio_refill_count
&&
5425 (vm_size_t
)(z
->countfree
- free_count
) < z
->prio_refill_count
) {
5429 zone_meta_queue_pop(z
, &z
->pages_all_free
, kind
, &page_addr
);
5431 if (os_sub_overflow(z
->countfree
, free_count
, &z
->countfree
)) {
5432 zone_accounting_panic(z
, "countfree wrap-around");
5434 if (os_sub_overflow(z
->countavail
, free_count
, &z
->countavail
)) {
5435 zone_accounting_panic(z
, "countavail wrap-around");
5437 if (os_sub_overflow(z
->allfree_page_count
, page_count
,
5438 &z
->allfree_page_count
)) {
5439 zone_accounting_panic(z
, "allfree_page_count wrap-around");
5441 if (os_sub_overflow(z
->page_count
, page_count
, &z
->page_count
)) {
5442 zone_accounting_panic(z
, "page_count wrap-around");
5445 os_atomic_sub(&zones_phys_page_count
, page_count
, relaxed
);
5446 os_atomic_sub(&zones_phys_page_mapped_count
, page_count
, relaxed
);
5448 bzero(page_meta
, sizeof(*page_meta
) * page_count
);
5449 seq_meta
= page_meta
;
5450 page_meta
= NULL
; /* page_meta fields are zeroed, prevent reuse */
5454 /* Free the pages for metadata and account for them */
5455 total_freed_pages
+= page_count
;
5456 size_to_free
= ptoa(page_count
);
5458 kasan_poison_range(page_addr
, size_to_free
, ASAN_VALID
);
5460 #if VM_MAX_TAG_ZONES
5462 ztMemoryRemove(z
, page_addr
, size_to_free
);
5464 #endif /* VM_MAX_TAG_ZONES */
5466 if (z
->va_sequester
&& z
->alloc_pages
== page_count
) {
5467 kernel_memory_depopulate(submap_for_zone(z
), page_addr
,
5468 size_to_free
, KMA_KOBJECT
, VM_KERN_MEMORY_ZONE
);
5470 kmem_free(submap_for_zone(z
), page_addr
, size_to_free
);
5473 thread_yield_to_preemption();
5478 zone_meta_queue_push(z
, &z
->pages_sequester
, seq_meta
, kind
);
5479 z
->sequester_page_count
+= page_count
;
5483 assert(zone_pva_is_null(z
->pages_all_free
));
5484 assert(z
->allfree_page_count
== 0);
5487 current_thread()->options
&= ~TH_OPT_ZONE_PRIV
;
5489 #if DEBUG || DEVELOPMENT
5490 if (zalloc_debug
& ZALLOC_DEBUG_ZONEGC
) {
5491 kprintf("zone_gc() of zone %s%s freed %lu elements, %d pages\n",
5492 zone_heap_name(z
), z
->z_name
,
5493 (unsigned long)(ptoa(total_freed_pages
) / z
->pcpu_elem_size
),
5496 #endif /* DEBUG || DEVELOPMENT */
5499 /* Zone garbage collection
5501 * zone_gc will walk through all the free elements in all the
5502 * zones that are marked collectable looking for reclaimable
5503 * pages. zone_gc is called by consider_zone_gc when the system
5504 * begins to run out of memory.
5506 * We should ensure that zone_gc never blocks.
5509 zone_gc(boolean_t consider_jetsams
)
5511 if (consider_jetsams
) {
5512 kill_process_in_largest_zone();
5514 * If we do end up jetsamming something, we need to do a zone_gc so that
5515 * we can reclaim free zone elements and update the zone map size.
5520 lck_mtx_lock(&zone_gc_lock
);
5522 #if DEBUG || DEVELOPMENT
5523 if (zalloc_debug
& ZALLOC_DEBUG_ZONEGC
) {
5524 kprintf("zone_gc() starting...\n");
5526 #endif /* DEBUG || DEVELOPMENT */
5528 zone_index_foreach(i
) {
5529 zone_t z
= &zone_array
[i
];
5531 if (!z
->collectable
) {
5535 if (zone_caching_enabled(z
)) {
5536 zcache_drain_depot(z
);
5538 #endif /* CONFIG_ZCACHE */
5539 if (zone_pva_is_null(z
->pages_all_free
)) {
5543 zone_drop_free_elements(z
);
5546 lck_mtx_unlock(&zone_gc_lock
);
5552 * Called by the pageout daemon when the system needs more free pages.
5556 consider_zone_gc(boolean_t consider_jetsams
)
5559 * One-time reclaim of kernel_map resources we allocated in
5562 * Use atomic exchange in case multiple threads race into here.
5564 vm_offset_t deallocate_kaddr
;
5565 if (kmapoff_kaddr
!= 0 &&
5566 (deallocate_kaddr
= os_atomic_xchg(&kmapoff_kaddr
, 0, relaxed
)) != 0) {
5567 vm_deallocate(kernel_map
, deallocate_kaddr
, ptoa_64(kmapoff_pgcnt
));
5570 zone_gc(consider_jetsams
);
5574 * Creates a vm_map_copy_t to return to the caller of mach_* MIG calls
5575 * requesting zone information.
5576 * Frees unused pages towards the end of the region, and zero'es out unused
5577 * space on the last page.
5579 static vm_map_copy_t
5581 vm_offset_t start_addr
,
5582 vm_size_t total_size
,
5583 vm_size_t used_size
)
5586 vm_offset_t end_addr
;
5587 vm_size_t free_size
;
5590 if (used_size
!= total_size
) {
5591 end_addr
= start_addr
+ used_size
;
5592 free_size
= total_size
- (round_page(end_addr
) - start_addr
);
5594 if (free_size
>= PAGE_SIZE
) {
5595 kmem_free(ipc_kernel_map
,
5596 round_page(end_addr
), free_size
);
5598 bzero((char *) end_addr
, round_page(end_addr
) - end_addr
);
5601 kr
= vm_map_copyin(ipc_kernel_map
, (vm_map_address_t
)start_addr
,
5602 (vm_map_size_t
)used_size
, TRUE
, ©
);
5603 assert(kr
== KERN_SUCCESS
);
5611 mach_zone_name_t
*zn
,
5612 mach_zone_info_t
*zi
)
5616 assert(z
!= ZONE_NULL
);
5627 * Append kalloc heap name to zone name (if zone is used by kalloc)
5629 char temp_zone_name
[MAX_ZONE_NAME
] = "";
5630 snprintf(temp_zone_name
, MAX_ZONE_NAME
, "%s%s",
5631 zone_heap_name(z
), z
->z_name
);
5633 /* assuming here the name data is static */
5634 (void) __nosan_strlcpy(zn
->mzn_name
, temp_zone_name
,
5635 strlen(temp_zone_name
) + 1);
5639 *zi
= (mach_zone_info_t
) {
5640 .mzi_count
= zone_count_allocated(&zcopy
),
5641 .mzi_cur_size
= ptoa_64(zcopy
.page_count
),
5642 // max_size for zprint is now high-watermark of pages used
5643 .mzi_max_size
= ptoa_64(zcopy
.page_count_hwm
),
5644 .mzi_elem_size
= zcopy
.pcpu_elem_size
,
5645 .mzi_alloc_size
= ptoa_64(zcopy
.alloc_pages
),
5646 .mzi_exhaustible
= (uint64_t)zcopy
.exhaustible
,
5648 zpercpu_foreach(zs
, zcopy
.z_stats
) {
5649 zi
->mzi_sum_size
+= zs
->zs_mem_allocated
;
5651 if (zcopy
.collectable
) {
5652 SET_MZI_COLLECTABLE_BYTES(zi
->mzi_collectable
,
5653 ptoa_64(zcopy
.allfree_page_count
));
5654 SET_MZI_COLLECTABLE_FLAG(zi
->mzi_collectable
, TRUE
);
5663 __unused task_t task
,
5664 __unused mach_zone_name_array_t
*namesp
,
5665 __unused mach_msg_type_number_t
*namesCntp
,
5666 __unused task_zone_info_array_t
*infop
,
5667 __unused mach_msg_type_number_t
*infoCntp
)
5669 return KERN_FAILURE
;
5675 mach_zone_name_array_t
*namesp
,
5676 mach_msg_type_number_t
*namesCntp
,
5677 mach_zone_info_array_t
*infop
,
5678 mach_msg_type_number_t
*infoCntp
)
5680 return mach_memory_info(host
, namesp
, namesCntp
, infop
, infoCntp
, NULL
, NULL
);
5687 mach_zone_name_array_t
*namesp
,
5688 mach_msg_type_number_t
*namesCntp
,
5689 mach_zone_info_array_t
*infop
,
5690 mach_msg_type_number_t
*infoCntp
,
5691 mach_memory_info_array_t
*memoryInfop
,
5692 mach_msg_type_number_t
*memoryInfoCntp
)
5694 mach_zone_name_t
*names
;
5695 vm_offset_t names_addr
;
5696 vm_size_t names_size
;
5698 mach_zone_info_t
*info
;
5699 vm_offset_t info_addr
;
5700 vm_size_t info_size
;
5702 mach_memory_info_t
*memory_info
;
5703 vm_offset_t memory_info_addr
;
5704 vm_size_t memory_info_size
;
5705 vm_size_t memory_info_vmsize
;
5706 unsigned int num_info
;
5708 unsigned int max_zones
, used_zones
, i
;
5709 mach_zone_name_t
*zn
;
5710 mach_zone_info_t
*zi
;
5713 uint64_t zones_collectable_bytes
= 0;
5715 if (host
== HOST_NULL
) {
5716 return KERN_INVALID_HOST
;
5718 #if CONFIG_DEBUGGER_FOR_ZONE_INFO
5719 if (!PE_i_can_has_debugger(NULL
)) {
5720 return KERN_INVALID_HOST
;
5725 * We assume that zones aren't freed once allocated.
5726 * We won't pick up any zones that are allocated later.
5729 max_zones
= os_atomic_load(&num_zones
, relaxed
);
5731 names_size
= round_page(max_zones
* sizeof *names
);
5732 kr
= kmem_alloc_pageable(ipc_kernel_map
,
5733 &names_addr
, names_size
, VM_KERN_MEMORY_IPC
);
5734 if (kr
!= KERN_SUCCESS
) {
5737 names
= (mach_zone_name_t
*) names_addr
;
5739 info_size
= round_page(max_zones
* sizeof *info
);
5740 kr
= kmem_alloc_pageable(ipc_kernel_map
,
5741 &info_addr
, info_size
, VM_KERN_MEMORY_IPC
);
5742 if (kr
!= KERN_SUCCESS
) {
5743 kmem_free(ipc_kernel_map
,
5744 names_addr
, names_size
);
5747 info
= (mach_zone_info_t
*) info_addr
;
5752 used_zones
= max_zones
;
5753 for (i
= 0; i
< max_zones
; i
++) {
5754 if (!get_zone_info(&(zone_array
[i
]), zn
, zi
)) {
5758 zones_collectable_bytes
+= GET_MZI_COLLECTABLE_BYTES(zi
->mzi_collectable
);
5763 *namesp
= (mach_zone_name_t
*) create_vm_map_copy(names_addr
, names_size
, used_zones
* sizeof *names
);
5764 *namesCntp
= used_zones
;
5766 *infop
= (mach_zone_info_t
*) create_vm_map_copy(info_addr
, info_size
, used_zones
* sizeof *info
);
5767 *infoCntp
= used_zones
;
5770 memory_info_addr
= 0;
5772 if (memoryInfop
&& memoryInfoCntp
) {
5774 num_info
= vm_page_diagnose_estimate();
5775 memory_info_size
= num_info
* sizeof(*memory_info
);
5776 memory_info_vmsize
= round_page(memory_info_size
);
5777 kr
= kmem_alloc_pageable(ipc_kernel_map
,
5778 &memory_info_addr
, memory_info_vmsize
, VM_KERN_MEMORY_IPC
);
5779 if (kr
!= KERN_SUCCESS
) {
5783 kr
= vm_map_wire_kernel(ipc_kernel_map
, memory_info_addr
, memory_info_addr
+ memory_info_vmsize
,
5784 VM_PROT_READ
| VM_PROT_WRITE
, VM_KERN_MEMORY_IPC
, FALSE
);
5785 assert(kr
== KERN_SUCCESS
);
5787 memory_info
= (mach_memory_info_t
*) memory_info_addr
;
5788 vm_page_diagnose(memory_info
, num_info
, zones_collectable_bytes
);
5790 kr
= vm_map_unwire(ipc_kernel_map
, memory_info_addr
, memory_info_addr
+ memory_info_vmsize
, FALSE
);
5791 assert(kr
== KERN_SUCCESS
);
5793 kr
= vm_map_copyin(ipc_kernel_map
, (vm_map_address_t
)memory_info_addr
,
5794 (vm_map_size_t
)memory_info_size
, TRUE
, ©
);
5795 assert(kr
== KERN_SUCCESS
);
5797 *memoryInfop
= (mach_memory_info_t
*) copy
;
5798 *memoryInfoCntp
= num_info
;
5801 return KERN_SUCCESS
;
5805 mach_zone_info_for_zone(
5807 mach_zone_name_t name
,
5808 mach_zone_info_t
*infop
)
5812 if (host
== HOST_NULL
) {
5813 return KERN_INVALID_HOST
;
5815 #if CONFIG_DEBUGGER_FOR_ZONE_INFO
5816 if (!PE_i_can_has_debugger(NULL
)) {
5817 return KERN_INVALID_HOST
;
5821 if (infop
== NULL
) {
5822 return KERN_INVALID_ARGUMENT
;
5825 zone_ptr
= ZONE_NULL
;
5826 zone_index_foreach(i
) {
5827 zone_t z
= &(zone_array
[i
]);
5828 assert(z
!= ZONE_NULL
);
5831 * Append kalloc heap name to zone name (if zone is used by kalloc)
5833 char temp_zone_name
[MAX_ZONE_NAME
] = "";
5834 snprintf(temp_zone_name
, MAX_ZONE_NAME
, "%s%s",
5835 zone_heap_name(z
), z
->z_name
);
5837 /* Find the requested zone by name */
5838 if (track_this_zone(temp_zone_name
, name
.mzn_name
)) {
5844 /* No zones found with the requested zone name */
5845 if (zone_ptr
== ZONE_NULL
) {
5846 return KERN_INVALID_ARGUMENT
;
5849 if (get_zone_info(zone_ptr
, NULL
, infop
)) {
5850 return KERN_SUCCESS
;
5852 return KERN_FAILURE
;
5856 mach_zone_info_for_largest_zone(
5858 mach_zone_name_t
*namep
,
5859 mach_zone_info_t
*infop
)
5861 if (host
== HOST_NULL
) {
5862 return KERN_INVALID_HOST
;
5864 #if CONFIG_DEBUGGER_FOR_ZONE_INFO
5865 if (!PE_i_can_has_debugger(NULL
)) {
5866 return KERN_INVALID_HOST
;
5870 if (namep
== NULL
|| infop
== NULL
) {
5871 return KERN_INVALID_ARGUMENT
;
5874 if (get_zone_info(zone_find_largest(), namep
, infop
)) {
5875 return KERN_SUCCESS
;
5877 return KERN_FAILURE
;
5881 get_zones_collectable_bytes(void)
5883 uint64_t zones_collectable_bytes
= 0;
5884 mach_zone_info_t zi
;
5886 zone_index_foreach(i
) {
5887 if (get_zone_info(&zone_array
[i
], NULL
, &zi
)) {
5888 zones_collectable_bytes
+=
5889 GET_MZI_COLLECTABLE_BYTES(zi
.mzi_collectable
);
5893 return zones_collectable_bytes
;
5897 mach_zone_get_zlog_zones(
5899 mach_zone_name_array_t
*namesp
,
5900 mach_msg_type_number_t
*namesCntp
)
5902 #if ZONE_ENABLE_LOGGING
5903 unsigned int max_zones
, logged_zones
, i
;
5906 mach_zone_name_t
*names
;
5907 vm_offset_t names_addr
;
5908 vm_size_t names_size
;
5910 if (host
== HOST_NULL
) {
5911 return KERN_INVALID_HOST
;
5914 if (namesp
== NULL
|| namesCntp
== NULL
) {
5915 return KERN_INVALID_ARGUMENT
;
5918 max_zones
= os_atomic_load(&num_zones
, relaxed
);
5920 names_size
= round_page(max_zones
* sizeof *names
);
5921 kr
= kmem_alloc_pageable(ipc_kernel_map
,
5922 &names_addr
, names_size
, VM_KERN_MEMORY_IPC
);
5923 if (kr
!= KERN_SUCCESS
) {
5926 names
= (mach_zone_name_t
*) names_addr
;
5928 zone_ptr
= ZONE_NULL
;
5930 for (i
= 0; i
< max_zones
; i
++) {
5931 zone_t z
= &(zone_array
[i
]);
5932 assert(z
!= ZONE_NULL
);
5934 /* Copy out the zone name if zone logging is enabled */
5935 if (z
->zlog_btlog
) {
5936 get_zone_info(z
, &names
[logged_zones
], NULL
);
5941 *namesp
= (mach_zone_name_t
*) create_vm_map_copy(names_addr
, names_size
, logged_zones
* sizeof *names
);
5942 *namesCntp
= logged_zones
;
5944 return KERN_SUCCESS
;
5946 #else /* ZONE_ENABLE_LOGGING */
5947 #pragma unused(host, namesp, namesCntp)
5948 return KERN_FAILURE
;
5949 #endif /* ZONE_ENABLE_LOGGING */
5953 mach_zone_get_btlog_records(
5955 mach_zone_name_t name
,
5956 zone_btrecord_array_t
*recsp
,
5957 mach_msg_type_number_t
*recsCntp
)
5959 #if DEBUG || DEVELOPMENT
5960 unsigned int numrecs
= 0;
5961 zone_btrecord_t
*recs
;
5964 vm_offset_t recs_addr
;
5965 vm_size_t recs_size
;
5967 if (host
== HOST_NULL
) {
5968 return KERN_INVALID_HOST
;
5971 if (recsp
== NULL
|| recsCntp
== NULL
) {
5972 return KERN_INVALID_ARGUMENT
;
5975 zone_ptr
= ZONE_NULL
;
5976 zone_index_foreach(i
) {
5977 zone_t z
= &zone_array
[i
];
5980 * Append kalloc heap name to zone name (if zone is used by kalloc)
5982 char temp_zone_name
[MAX_ZONE_NAME
] = "";
5983 snprintf(temp_zone_name
, MAX_ZONE_NAME
, "%s%s",
5984 zone_heap_name(z
), z
->z_name
);
5986 /* Find the requested zone by name */
5987 if (track_this_zone(temp_zone_name
, name
.mzn_name
)) {
5993 /* No zones found with the requested zone name */
5994 if (zone_ptr
== ZONE_NULL
) {
5995 return KERN_INVALID_ARGUMENT
;
5998 /* Logging not turned on for the requested zone */
5999 if (!DO_LOGGING(zone_ptr
)) {
6000 return KERN_FAILURE
;
6003 /* Allocate memory for btlog records */
6004 numrecs
= (unsigned int)(get_btlog_records_count(zone_ptr
->zlog_btlog
));
6005 recs_size
= round_page(numrecs
* sizeof *recs
);
6007 kr
= kmem_alloc_pageable(ipc_kernel_map
, &recs_addr
, recs_size
, VM_KERN_MEMORY_IPC
);
6008 if (kr
!= KERN_SUCCESS
) {
6013 * We will call get_btlog_records() below which populates this region while holding a spinlock
6014 * (the btlog lock). So these pages need to be wired.
6016 kr
= vm_map_wire_kernel(ipc_kernel_map
, recs_addr
, recs_addr
+ recs_size
,
6017 VM_PROT_READ
| VM_PROT_WRITE
, VM_KERN_MEMORY_IPC
, FALSE
);
6018 assert(kr
== KERN_SUCCESS
);
6020 recs
= (zone_btrecord_t
*)recs_addr
;
6021 get_btlog_records(zone_ptr
->zlog_btlog
, recs
, &numrecs
);
6023 kr
= vm_map_unwire(ipc_kernel_map
, recs_addr
, recs_addr
+ recs_size
, FALSE
);
6024 assert(kr
== KERN_SUCCESS
);
6026 *recsp
= (zone_btrecord_t
*) create_vm_map_copy(recs_addr
, recs_size
, numrecs
* sizeof *recs
);
6027 *recsCntp
= numrecs
;
6029 return KERN_SUCCESS
;
6031 #else /* DEBUG || DEVELOPMENT */
6032 #pragma unused(host, name, recsp, recsCntp)
6033 return KERN_FAILURE
;
6034 #endif /* DEBUG || DEVELOPMENT */
6038 #if DEBUG || DEVELOPMENT
6041 mach_memory_info_check(void)
6043 mach_memory_info_t
* memory_info
;
6044 mach_memory_info_t
* info
;
6045 unsigned int num_info
;
6046 vm_offset_t memory_info_addr
;
6048 size_t memory_info_size
, memory_info_vmsize
;
6049 uint64_t top_wired
, zonestotal
, total
;
6051 num_info
= vm_page_diagnose_estimate();
6052 memory_info_size
= num_info
* sizeof(*memory_info
);
6053 memory_info_vmsize
= round_page(memory_info_size
);
6054 kr
= kmem_alloc(kernel_map
, &memory_info_addr
, memory_info_vmsize
, VM_KERN_MEMORY_DIAG
);
6055 assert(kr
== KERN_SUCCESS
);
6057 memory_info
= (mach_memory_info_t
*) memory_info_addr
;
6058 vm_page_diagnose(memory_info
, num_info
, 0);
6060 top_wired
= total
= zonestotal
= 0;
6061 zone_index_foreach(idx
) {
6062 zonestotal
+= zone_size_wired(&zone_array
[idx
]);
6065 for (uint32_t idx
= 0; idx
< num_info
; idx
++) {
6066 info
= &memory_info
[idx
];
6070 if (VM_KERN_COUNT_WIRED
== info
->site
) {
6071 top_wired
= info
->size
;
6073 if (VM_KERN_SITE_HIDE
& info
->flags
) {
6076 if (!(VM_KERN_SITE_WIRED
& info
->flags
)) {
6079 total
+= info
->size
;
6081 total
+= zonestotal
;
6083 printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n",
6084 total
, top_wired
, zonestotal
, top_wired
- total
);
6086 kmem_free(kernel_map
, memory_info_addr
, memory_info_vmsize
);
6091 extern boolean_t(*volatile consider_buffer_cache_collect
)(int);
6093 #endif /* DEBUG || DEVELOPMENT */
6099 if (host
== HOST_NULL
) {
6100 return KERN_INVALID_HOST
;
6103 #if DEBUG || DEVELOPMENT
6104 /* Callout to buffer cache GC to drop elements in the apfs zones */
6105 if (consider_buffer_cache_collect
!= NULL
) {
6106 (void)(*consider_buffer_cache_collect
)(0);
6108 consider_zone_gc(FALSE
);
6109 #endif /* DEBUG || DEVELOPMENT */
6110 return KERN_SUCCESS
;
6114 zone_find_largest(void)
6116 uint32_t largest_idx
= 0;
6117 vm_offset_t largest_size
= zone_size_wired(&zone_array
[0]);
6119 zone_index_foreach(i
) {
6120 vm_offset_t size
= zone_size_wired(&zone_array
[i
]);
6121 if (size
> largest_size
) {
6123 largest_size
= size
;
6127 return &zone_array
[largest_idx
];
6130 #pragma mark - tests
6131 #if DEBUG || DEVELOPMENT
6134 * Used for sysctl kern.run_zone_test which is not thread-safe. Ensure only one
6135 * thread goes through at a time. Or we can end up with multiple test zones (if
6136 * a second zinit() comes through before zdestroy()), which could lead us to
6139 SIMPLE_LOCK_DECLARE(zone_test_lock
, 0);
6140 static boolean_t zone_test_running
= FALSE
;
6141 static zone_t test_zone_ptr
= NULL
;
6144 zone_copy_allocations(zone_t z
, uintptr_t *elems
, bitmap_t
*bits
,
6145 zone_pva_t page_index
, zone_addr_kind_t kind
)
6147 vm_offset_t free
, first
, end
, page
;
6148 struct zone_page_metadata
*meta
;
6150 while (!zone_pva_is_null(page_index
)) {
6151 page
= zone_pva_to_addr(page_index
);
6152 meta
= zone_pva_to_meta(page_index
, kind
);
6153 end
= page
+ ptoa(meta
->zm_percpu
? 1 : meta
->zm_page_count
);
6154 first
= page
+ ZONE_PAGE_FIRST_OFFSET(kind
);
6156 bitmap_clear(bits
, (uint32_t)((end
- first
) / zone_elem_size(z
)));
6158 // construct bitmap of all freed elements
6159 free
= zone_page_meta_get_freelist(z
, meta
, page
);
6161 bitmap_set(bits
, (uint32_t)((free
- first
) / zone_elem_size(z
)));
6163 // next free element
6164 free
= *(vm_offset_t
*)free
^ zp_nopoison_cookie
;
6167 for (unsigned i
= 0; first
< end
; i
++, first
+= zone_elem_size(z
)) {
6168 if (!bitmap_test(bits
, i
)) {
6169 *elems
++ = INSTANCE_PUT(first
);
6173 page_index
= meta
->zm_page_next
;
6179 zone_leaks(const char * zoneName
, uint32_t nameLen
, leak_site_proc proc
, void * refCon
)
6181 uintptr_t zbt
[MAX_ZTRACE_DEPTH
];
6185 uintptr_t element
, bt
;
6186 uint32_t idx
, count
, found
;
6187 uint32_t btidx
, btcount
, nobtcount
, btfound
;
6193 zone_index_foreach(i
) {
6194 if (!strncmp(zoneName
, zone_array
[i
].z_name
, nameLen
)) {
6195 zone
= &zone_array
[i
];
6200 return KERN_INVALID_NAME
;
6203 elemSize
= zone_elem_size(zone
);
6204 maxElems
= (zone
->countavail
+ 1) & ~1ul;
6206 if ((ptoa(zone
->percpu
? 1 : zone
->alloc_pages
) % elemSize
) &&
6207 !zone_leaks_scan_enable
) {
6208 return KERN_INVALID_CAPABILITY
;
6211 kr
= kmem_alloc_kobject(kernel_map
, (vm_offset_t
*) &array
,
6212 maxElems
* sizeof(uintptr_t) + BITMAP_LEN(ZONE_CHUNK_MAXELEMENTS
),
6213 VM_KERN_MEMORY_DIAG
);
6214 if (KERN_SUCCESS
!= kr
) {
6218 /* maxElems is a 2-multiple so we're always aligned */
6219 bits
= CAST_DOWN_EXPLICIT(bitmap_t
*, array
+ maxElems
);
6224 next
= zone_copy_allocations(zone
, next
, bits
,
6225 zone
->pages_any_free_foreign
, ZONE_ADDR_FOREIGN
);
6226 next
= zone_copy_allocations(zone
, next
, bits
,
6227 zone
->pages_all_used_foreign
, ZONE_ADDR_FOREIGN
);
6228 next
= zone_copy_allocations(zone
, next
, bits
,
6229 zone
->pages_intermediate
, ZONE_ADDR_NATIVE
);
6230 next
= zone_copy_allocations(zone
, next
, bits
,
6231 zone
->pages_all_used
, ZONE_ADDR_NATIVE
);
6232 count
= (uint32_t)(next
- array
);
6236 zone_leaks_scan(array
, count
, zone_elem_size(zone
), &found
);
6237 assert(found
<= count
);
6239 for (idx
= 0; idx
< count
; idx
++) {
6240 element
= array
[idx
];
6241 if (kInstanceFlagReferenced
& element
) {
6244 element
= INSTANCE_PUT(element
) & ~kInstanceFlags
;
6247 #if ZONE_ENABLE_LOGGING
6248 if (zone
->zlog_btlog
&& !corruption_debug_flag
) {
6249 // btlog_copy_backtraces_for_elements will set kInstanceFlagReferenced on elements it found
6250 btlog_copy_backtraces_for_elements(zone
->zlog_btlog
, array
, &count
, elemSize
, proc
, refCon
);
6252 #endif /* ZONE_ENABLE_LOGGING */
6254 for (nobtcount
= idx
= 0; idx
< count
; idx
++) {
6255 element
= array
[idx
];
6259 if (kInstanceFlagReferenced
& element
) {
6262 element
= INSTANCE_PUT(element
) & ~kInstanceFlags
;
6264 // see if we can find any backtrace left in the element
6265 btcount
= (typeof(btcount
))(zone_elem_size(zone
) / sizeof(uintptr_t));
6266 if (btcount
>= MAX_ZTRACE_DEPTH
) {
6267 btcount
= MAX_ZTRACE_DEPTH
- 1;
6269 for (btfound
= btidx
= 0; btidx
< btcount
; btidx
++) {
6270 bt
= ((uintptr_t *)element
)[btcount
- 1 - btidx
];
6271 if (!VM_KERNEL_IS_SLID(bt
)) {
6274 zbt
[btfound
++] = bt
;
6277 (*proc
)(refCon
, 1, elemSize
, &zbt
[0], btfound
);
6283 // fake backtrace when we found nothing
6284 zbt
[0] = (uintptr_t) &zalloc
;
6285 (*proc
)(refCon
, nobtcount
, elemSize
, &zbt
[0], 1);
6288 kmem_free(kernel_map
, (vm_offset_t
) array
, maxElems
* sizeof(uintptr_t));
6290 return KERN_SUCCESS
;
6296 unsigned int i
= 0, max_iter
= 5;
6300 simple_lock(&zone_test_lock
, &zone_locks_grp
);
6301 if (!zone_test_running
) {
6302 zone_test_running
= TRUE
;
6304 simple_unlock(&zone_test_lock
);
6305 printf("run_zone_test: Test already running.\n");
6308 simple_unlock(&zone_test_lock
);
6310 printf("run_zone_test: Testing zinit(), zalloc(), zfree() and zdestroy() on zone \"test_zone_sysctl\"\n");
6312 /* zinit() and zdestroy() a zone with the same name a bunch of times, verify that we get back the same zone each time */
6314 test_zone
= zinit(sizeof(uint64_t), 100 * sizeof(uint64_t), sizeof(uint64_t), "test_zone_sysctl");
6315 if (test_zone
== NULL
) {
6316 printf("run_zone_test: zinit() failed\n");
6321 if (test_zone_ptr
== NULL
&& test_zone
->countfree
!= 0) {
6323 if (test_zone
->countfree
!= 0) {
6325 printf("run_zone_test: free count is not zero\n");
6329 if (test_zone_ptr
== NULL
) {
6330 /* Stash the zone pointer returned on the fist zinit */
6331 printf("run_zone_test: zone created for the first time\n");
6332 test_zone_ptr
= test_zone
;
6333 } else if (test_zone
!= test_zone_ptr
) {
6334 printf("run_zone_test: old zone pointer and new zone pointer don't match\n");
6338 test_ptr
= zalloc(test_zone
);
6339 if (test_ptr
== NULL
) {
6340 printf("run_zone_test: zalloc() failed\n");
6343 zfree(test_zone
, test_ptr
);
6345 zdestroy(test_zone
);
6348 printf("run_zone_test: Iteration %d successful\n", i
);
6349 } while (i
< max_iter
);
6351 /* test Z_VA_SEQUESTER */
6352 if (zsecurity_options
& ZSECURITY_OPTIONS_SEQUESTER
) {
6353 int idx
, num_allocs
= 8;
6354 vm_size_t elem_size
= 2 * PAGE_SIZE
/ num_allocs
;
6355 void *allocs
[num_allocs
];
6356 vm_offset_t phys_pages
= os_atomic_load(&zones_phys_page_count
, relaxed
);
6357 vm_size_t zone_map_size
= zone_range_size(&zone_info
.zi_map_range
);
6359 test_zone
= zone_create("test_zone_sysctl", elem_size
,
6360 ZC_DESTRUCTIBLE
| ZC_SEQUESTER
);
6361 if (test_zone
== NULL
) {
6362 printf("run_zone_test: zinit() failed\n");
6366 for (idx
= 0; idx
< num_allocs
; idx
++) {
6367 allocs
[idx
] = zalloc(test_zone
);
6368 assert(NULL
!= allocs
[idx
]);
6369 printf("alloc[%d] %p\n", idx
, allocs
[idx
]);
6371 for (idx
= 0; idx
< num_allocs
; idx
++) {
6372 zfree(test_zone
, allocs
[idx
]);
6374 assert(!zone_pva_is_null(test_zone
->pages_all_free
));
6376 printf("vm_page_wire_count %d, vm_page_free_count %d, p to v %qd%%\n",
6377 vm_page_wire_count
, vm_page_free_count
,
6378 (100ULL * ptoa_64(phys_pages
)) / zone_map_size
);
6380 printf("vm_page_wire_count %d, vm_page_free_count %d, p to v %qd%%\n",
6381 vm_page_wire_count
, vm_page_free_count
,
6382 (100ULL * ptoa_64(phys_pages
)) / zone_map_size
);
6383 unsigned int allva
= 0;
6384 zone_index_foreach(zidx
) {
6385 zone_t z
= &zone_array
[zidx
];
6387 allva
+= z
->page_count
;
6388 if (!z
->sequester_page_count
) {
6394 zone_pva_t pg
= z
->pages_sequester
;
6395 struct zone_page_metadata
*page_meta
;
6396 while (pg
.packed_address
) {
6397 page_meta
= zone_pva_to_meta(pg
, ZONE_ADDR_NATIVE
);
6398 count
+= z
->alloc_pages
;
6399 pg
= page_meta
->zm_page_next
;
6401 assert(count
== z
->sequester_page_count
);
6402 size
= zone_size_wired(z
);
6406 printf("%s%s: seq %d, res %d, %qd %%\n",
6407 zone_heap_name(z
), z
->z_name
, z
->sequester_page_count
,
6408 z
->page_count
, zone_size_allocated(z
) * 100ULL / size
);
6412 printf("total va: %d\n", allva
);
6414 assert(zone_pva_is_null(test_zone
->pages_all_free
));
6415 assert(!zone_pva_is_null(test_zone
->pages_sequester
));
6416 assert(2 == test_zone
->sequester_page_count
);
6417 for (idx
= 0; idx
< num_allocs
; idx
++) {
6418 assert(0 == pmap_find_phys(kernel_pmap
, (addr64_t
)(uintptr_t) allocs
[idx
]));
6420 for (idx
= 0; idx
< num_allocs
; idx
++) {
6421 allocs
[idx
] = zalloc(test_zone
);
6422 assert(allocs
[idx
]);
6423 printf("alloc[%d] %p\n", idx
, allocs
[idx
]);
6425 assert(zone_pva_is_null(test_zone
->pages_sequester
));
6426 assert(0 == test_zone
->sequester_page_count
);
6427 for (idx
= 0; idx
< num_allocs
; idx
++) {
6428 zfree(test_zone
, allocs
[idx
]);
6430 zdestroy(test_zone
);
6432 printf("run_zone_test: skipping sequester test (not enabled)\n");
6435 printf("run_zone_test: Test passed\n");
6437 simple_lock(&zone_test_lock
, &zone_locks_grp
);
6438 zone_test_running
= FALSE
;
6439 simple_unlock(&zone_test_lock
);
6445 * Routines to test that zone garbage collection and zone replenish threads
6446 * running at the same time don't cause problems.
6450 zone_gc_replenish_test(void)
6457 zone_alloc_replenish_test(void)
6460 struct data
{ struct data
*next
; } *node
, *list
= NULL
;
6463 * Find a zone that has a replenish thread
6465 zone_index_foreach(i
) {
6467 if (z
->prio_refill_count
&&
6468 zone_elem_size(z
) >= sizeof(struct data
)) {
6474 printf("Couldn't find a replenish zone\n");
6478 for (uint32_t i
= 0; i
< 2000; ++i
) { /* something big enough to go past replenishment */
6485 * release the memory we allocated
6487 while (list
!= NULL
) {
6494 #endif /* DEBUG || DEVELOPMENT */