]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/zalloc.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65 #include <zone_debug.h>
66
67 #include <mach/mach_types.h>
68 #include <mach/vm_param.h>
69 #include <mach/kern_return.h>
70 #include <mach/mach_host_server.h>
71 #include <mach/task_server.h>
72 #include <mach/machine/vm_types.h>
73 #include <mach/vm_map.h>
74 #include <mach/sdt.h>
75
76 #include <kern/bits.h>
77 #include <kern/kern_types.h>
78 #include <kern/assert.h>
79 #include <kern/backtrace.h>
80 #include <kern/host.h>
81 #include <kern/macro_help.h>
82 #include <kern/sched.h>
83 #include <kern/locks.h>
84 #include <kern/sched_prim.h>
85 #include <kern/misc_protos.h>
86 #include <kern/thread_call.h>
87 #include <kern/zalloc.h>
88 #include <kern/kalloc.h>
89
90 #include <prng/random.h>
91
92 #include <vm/pmap.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_kern.h>
95 #include <vm/vm_page.h>
96
97 #include <pexpert/pexpert.h>
98
99 #include <machine/machparam.h>
100 #include <machine/machine_routines.h> /* ml_cpu_get_info */
101
102 #include <libkern/OSDebug.h>
103 #include <libkern/OSAtomic.h>
104 #include <libkern/section_keywords.h>
105 #include <sys/kdebug.h>
106
107 #include <san/kasan.h>
108
109 /*
110 * The zone_locks_grp allows for collecting lock statistics.
111 * All locks are associated to this group in zinit.
112 * Look at tools/lockstat for debugging lock contention.
113 */
114
115 lck_grp_t zone_locks_grp;
116 lck_grp_attr_t zone_locks_grp_attr;
117
118 /*
119 * ZONE_ALIAS_ADDR (deprecated)
120 */
121
122 #define from_zone_map(addr, size) \
123 ((vm_offset_t)(addr) >= zone_map_min_address && \
124 ((vm_offset_t)(addr) + size - 1) < zone_map_max_address )
125
126 /*
127 * Zone Corruption Debugging
128 *
129 * We use three techniques to detect modification of a zone element
130 * after it's been freed.
131 *
132 * (1) Check the freelist next pointer for sanity.
133 * (2) Store a backup of the next pointer at the end of the element,
134 * and compare it to the primary next pointer when the element is allocated
135 * to detect corruption of the freelist due to use-after-free bugs.
136 * The backup pointer is also XORed with a per-boot random cookie.
137 * (3) Poison the freed element by overwriting it with 0xdeadbeef,
138 * and check for that value when the element is being reused to make sure
139 * no part of the element has been modified while it was on the freelist.
140 * This will also help catch read-after-frees, as code will now dereference
141 * 0xdeadbeef instead of a valid but freed pointer.
142 *
143 * (1) and (2) occur for every allocation and free to a zone.
144 * This is done to make it slightly more difficult for an attacker to
145 * manipulate the freelist to behave in a specific way.
146 *
147 * Poisoning (3) occurs periodically for every N frees (counted per-zone)
148 * and on every free for zones smaller than a cacheline. If -zp
149 * is passed as a boot arg, poisoning occurs for every free.
150 *
151 * Performance slowdown is inversely proportional to the frequency of poisoning,
152 * with a 4-5% hit around N=1, down to ~0.3% at N=16 and just "noise" at N=32
153 * and higher. You can expect to find a 100% reproducible bug in an average of
154 * N tries, with a standard deviation of about N, but you will want to set
155 * "-zp" to always poison every free if you are attempting to reproduce
156 * a known bug.
157 *
158 * For a more heavyweight, but finer-grained method of detecting misuse
159 * of zone memory, look up the "Guard mode" zone allocator in gzalloc.c.
160 *
161 * Zone Corruption Logging
162 *
163 * You can also track where corruptions come from by using the boot-arguments
164 * "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later
165 * in this document for more implementation and usage information.
166 *
167 * Zone Leak Detection
168 *
169 * To debug leaks of zone memory, use the zone leak detection tool 'zleaks'
170 * found later in this file via the showtopztrace and showz* macros in kgmacros,
171 * or use zlog without the -zc argument.
172 *
173 */
174
175 /* Returns TRUE if we rolled over the counter at factor */
176 static inline boolean_t
177 sample_counter(volatile uint32_t * count_p, uint32_t factor)
178 {
179 uint32_t old_count, new_count;
180 boolean_t rolled_over;
181
182 do {
183 new_count = old_count = *count_p;
184
185 if (++new_count >= factor) {
186 rolled_over = TRUE;
187 new_count = 0;
188 } else {
189 rolled_over = FALSE;
190 }
191 } while (!OSCompareAndSwap(old_count, new_count, count_p));
192
193 return rolled_over;
194 }
195
196 #if defined(__LP64__)
197 #define ZP_POISON 0xdeadbeefdeadbeef
198 #else
199 #define ZP_POISON 0xdeadbeef
200 #endif
201
202 boolean_t zfree_poison_element(zone_t zone, vm_offset_t elem);
203 void zalloc_poison_element(boolean_t check_poison, zone_t zone, vm_offset_t addr);
204
205 #define ZP_DEFAULT_SAMPLING_FACTOR 16
206 #define ZP_DEFAULT_SCALE_FACTOR 4
207
208 /*
209 * A zp_factor of 0 indicates zone poisoning is disabled,
210 * however, we still poison zones smaller than zp_tiny_zone_limit (a cacheline).
211 * Passing the -no-zp boot-arg disables even this behavior.
212 * In all cases, we record and check the integrity of a backup pointer.
213 */
214
215 /* set by zp-factor=N boot arg, zero indicates non-tiny poisoning disabled */
216 #if DEBUG
217 #define DEFAULT_ZP_FACTOR (1)
218 #else
219 #define DEFAULT_ZP_FACTOR (0)
220 #endif
221 uint32_t zp_factor = DEFAULT_ZP_FACTOR;
222
223 /* set by zp-scale=N boot arg, scales zp_factor by zone size */
224 uint32_t zp_scale = 0;
225
226 /* set in zp_init, zero indicates -no-zp boot-arg */
227 vm_size_t zp_tiny_zone_limit = 0;
228
229 /* initialized to a per-boot random value in zp_init */
230 uintptr_t zp_poisoned_cookie = 0;
231 uintptr_t zp_nopoison_cookie = 0;
232
233 #if VM_MAX_TAG_ZONES
234 boolean_t zone_tagging_on;
235 #endif /* VM_MAX_TAG_ZONES */
236
237 SECURITY_READ_ONLY_LATE(boolean_t) copyio_zalloc_check = TRUE;
238 static struct bool_gen zone_bool_gen;
239
240 /*
241 * initialize zone poisoning
242 * called from zone_bootstrap before any allocations are made from zalloc
243 */
244 static inline void
245 zp_init(void)
246 {
247 char temp_buf[16];
248
249 /*
250 * Initialize backup pointer random cookie for poisoned elements
251 * Try not to call early_random() back to back, it may return
252 * the same value if mach_absolute_time doesn't have sufficient time
253 * to tick over between calls. <rdar://problem/11597395>
254 * (This is only a problem on embedded devices)
255 */
256 zp_poisoned_cookie = (uintptr_t) early_random();
257
258 /*
259 * Always poison zones smaller than a cacheline,
260 * because it's pretty close to free
261 */
262 ml_cpu_info_t cpu_info;
263 ml_cpu_get_info(&cpu_info);
264 zp_tiny_zone_limit = (vm_size_t) cpu_info.cache_line_size;
265
266 zp_factor = ZP_DEFAULT_SAMPLING_FACTOR;
267 zp_scale = ZP_DEFAULT_SCALE_FACTOR;
268
269 //TODO: Bigger permutation?
270 /*
271 * Permute the default factor +/- 1 to make it less predictable
272 * This adds or subtracts ~4 poisoned objects per 1000 frees.
273 */
274 if (zp_factor != 0) {
275 uint32_t rand_bits = early_random() & 0x3;
276
277 if (rand_bits == 0x1) {
278 zp_factor += 1;
279 } else if (rand_bits == 0x2) {
280 zp_factor -= 1;
281 }
282 /* if 0x0 or 0x3, leave it alone */
283 }
284
285 /* -zp: enable poisoning for every alloc and free */
286 if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) {
287 zp_factor = 1;
288 }
289
290 /* -no-zp: disable poisoning completely even for tiny zones */
291 if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) {
292 zp_factor = 0;
293 zp_tiny_zone_limit = 0;
294 printf("Zone poisoning disabled\n");
295 }
296
297 /* zp-factor=XXXX: override how often to poison freed zone elements */
298 if (PE_parse_boot_argn("zp-factor", &zp_factor, sizeof(zp_factor))) {
299 printf("Zone poisoning factor override: %u\n", zp_factor);
300 }
301
302 /* zp-scale=XXXX: override how much zone size scales zp-factor by */
303 if (PE_parse_boot_argn("zp-scale", &zp_scale, sizeof(zp_scale))) {
304 printf("Zone poisoning scale factor override: %u\n", zp_scale);
305 }
306
307 /* Initialize backup pointer random cookie for unpoisoned elements */
308 zp_nopoison_cookie = (uintptr_t) early_random();
309
310 #if MACH_ASSERT
311 if (zp_poisoned_cookie == zp_nopoison_cookie) {
312 panic("early_random() is broken: %p and %p are not random\n",
313 (void *) zp_poisoned_cookie, (void *) zp_nopoison_cookie);
314 }
315 #endif
316
317 /*
318 * Use the last bit in the backup pointer to hint poisoning state
319 * to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so
320 * the low bits are zero.
321 */
322 zp_poisoned_cookie |= (uintptr_t)0x1ULL;
323 zp_nopoison_cookie &= ~((uintptr_t)0x1ULL);
324
325 #if defined(__LP64__)
326 /*
327 * Make backup pointers more obvious in GDB for 64 bit
328 * by making OxFFFFFF... ^ cookie = 0xFACADE...
329 * (0xFACADE = 0xFFFFFF ^ 0x053521)
330 * (0xC0FFEE = 0xFFFFFF ^ 0x3f0011)
331 * The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked
332 * by the sanity check, so it's OK for that part of the cookie to be predictable.
333 *
334 * TODO: Use #defines, xors, and shifts
335 */
336
337 zp_poisoned_cookie &= 0x000000FFFFFFFFFF;
338 zp_poisoned_cookie |= 0x0535210000000000; /* 0xFACADE */
339
340 zp_nopoison_cookie &= 0x000000FFFFFFFFFF;
341 zp_nopoison_cookie |= 0x3f00110000000000; /* 0xC0FFEE */
342 #endif
343 }
344
345 /*
346 * These macros are used to keep track of the number
347 * of pages being used by the zone currently. The
348 * z->page_count is not protected by the zone lock.
349 */
350 #define ZONE_PAGE_COUNT_INCR(z, count) \
351 { \
352 OSAddAtomic64(count, &(z->page_count)); \
353 }
354
355 #define ZONE_PAGE_COUNT_DECR(z, count) \
356 { \
357 OSAddAtomic64(-count, &(z->page_count)); \
358 }
359
360 vm_map_t zone_map = VM_MAP_NULL;
361
362 /* for is_sane_zone_element and garbage collection */
363
364 vm_offset_t zone_map_min_address = 0; /* initialized in zone_init */
365 vm_offset_t zone_map_max_address = 0;
366
367 /* Globals for random boolean generator for elements in free list */
368 #define MAX_ENTROPY_PER_ZCRAM 4
369
370 /* VM region for all metadata structures */
371 vm_offset_t zone_metadata_region_min = 0;
372 vm_offset_t zone_metadata_region_max = 0;
373 decl_lck_mtx_data(static, zone_metadata_region_lck)
374 lck_attr_t zone_metadata_lock_attr;
375 lck_mtx_ext_t zone_metadata_region_lck_ext;
376
377 /* Helpful for walking through a zone's free element list. */
378 struct zone_free_element {
379 struct zone_free_element *next;
380 /* ... */
381 /* void *backup_ptr; */
382 };
383
384 #if CONFIG_ZCACHE
385
386 #if !CONFIG_GZALLOC
387 bool use_caching = TRUE;
388 #else
389 bool use_caching = FALSE;
390 #endif /* !CONFIG_GZALLOC */
391
392 /*
393 * Decides whether per-cpu zone caching is to be enabled for all zones.
394 * Can be set to TRUE via the boot-arg '-zcache_all'.
395 */
396 bool cache_all_zones = FALSE;
397
398 /*
399 * Specifies a single zone to enable CPU caching for.
400 * Can be set using boot-args: zcc_enable_for_zone_name=<zone>
401 */
402 static char cache_zone_name[MAX_ZONE_NAME];
403
404 static inline bool
405 zone_caching_enabled(zone_t z)
406 {
407 return z->cpu_cache_enabled && !z->tags && !z->zleak_on;
408 }
409
410 #endif /* CONFIG_ZCACHE */
411
412 /*
413 * Protects zone_array, num_zones, num_zones_in_use, and zone_empty_bitmap
414 */
415 decl_simple_lock_data(, all_zones_lock)
416 unsigned int num_zones_in_use;
417 unsigned int num_zones;
418
419 #define MAX_ZONES 320
420 struct zone zone_array[MAX_ZONES];
421
422 /* Used to keep track of empty slots in the zone_array */
423 bitmap_t zone_empty_bitmap[BITMAP_LEN(MAX_ZONES)];
424
425 #if DEBUG || DEVELOPMENT
426 /*
427 * Used for sysctl kern.run_zone_test which is not thread-safe. Ensure only one thread goes through at a time.
428 * Or we can end up with multiple test zones (if a second zinit() comes through before zdestroy()), which could lead us to
429 * run out of zones.
430 */
431 decl_simple_lock_data(, zone_test_lock)
432 static boolean_t zone_test_running = FALSE;
433 static zone_t test_zone_ptr = NULL;
434 #endif /* DEBUG || DEVELOPMENT */
435
436 #define PAGE_METADATA_GET_ZINDEX(page_meta) \
437 (page_meta->zindex)
438
439 #define PAGE_METADATA_GET_ZONE(page_meta) \
440 (&(zone_array[page_meta->zindex]))
441
442 #define PAGE_METADATA_SET_ZINDEX(page_meta, index) \
443 page_meta->zindex = (index);
444
445 struct zone_page_metadata {
446 queue_chain_t pages; /* linkage pointer for metadata lists */
447
448 /* Union for maintaining start of element free list and real metadata (for multipage allocations) */
449 union {
450 /*
451 * The start of the freelist can be maintained as a 32-bit offset instead of a pointer because
452 * the free elements would be at max ZONE_MAX_ALLOC_SIZE bytes away from the metadata. Offset
453 * from start of the allocation chunk to free element list head.
454 */
455 uint32_t freelist_offset;
456 /*
457 * This field is used to lookup the real metadata for multipage allocations, where we mark the
458 * metadata for all pages except the first as "fake" metadata using MULTIPAGE_METADATA_MAGIC.
459 * Offset from this fake metadata to real metadata of allocation chunk (-ve offset).
460 */
461 uint32_t real_metadata_offset;
462 };
463
464 /*
465 * For the first page in the allocation chunk, this represents the total number of free elements in
466 * the chunk.
467 */
468 uint16_t free_count;
469 unsigned zindex : ZINDEX_BITS; /* Zone index within the zone_array */
470 unsigned page_count : PAGECOUNT_BITS; /* Count of pages within the allocation chunk */
471 };
472
473 /* Macro to get page index (within zone_map) of page containing element */
474 #define PAGE_INDEX_FOR_ELEMENT(element) \
475 (((vm_offset_t)trunc_page(element) - zone_map_min_address) / PAGE_SIZE)
476
477 /* Macro to get metadata structure given a page index in zone_map */
478 #define PAGE_METADATA_FOR_PAGE_INDEX(index) \
479 (zone_metadata_region_min + ((index) * sizeof(struct zone_page_metadata)))
480
481 /* Macro to get index (within zone_map) for given metadata */
482 #define PAGE_INDEX_FOR_METADATA(page_meta) \
483 (((vm_offset_t)page_meta - zone_metadata_region_min) / sizeof(struct zone_page_metadata))
484
485 /* Macro to get page for given page index in zone_map */
486 #define PAGE_FOR_PAGE_INDEX(index) \
487 (zone_map_min_address + (PAGE_SIZE * (index)))
488
489 /* Macro to get the actual metadata for a given address */
490 #define PAGE_METADATA_FOR_ELEMENT(element) \
491 (struct zone_page_metadata *)(PAGE_METADATA_FOR_PAGE_INDEX(PAGE_INDEX_FOR_ELEMENT(element)))
492
493 /* Magic value to indicate empty element free list */
494 #define PAGE_METADATA_EMPTY_FREELIST ((uint32_t)(~0))
495
496 vm_map_copy_t create_vm_map_copy(vm_offset_t start_addr, vm_size_t total_size, vm_size_t used_size);
497 boolean_t get_zone_info(zone_t z, mach_zone_name_t *zn, mach_zone_info_t *zi);
498 boolean_t is_zone_map_nearing_exhaustion(void);
499 extern void vm_pageout_garbage_collect(int collect);
500
501 static inline void *
502 page_metadata_get_freelist(struct zone_page_metadata *page_meta)
503 {
504 assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
505 if (page_meta->freelist_offset == PAGE_METADATA_EMPTY_FREELIST) {
506 return NULL;
507 } else {
508 if (from_zone_map(page_meta, sizeof(struct zone_page_metadata))) {
509 return (void *)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)) + page_meta->freelist_offset);
510 } else {
511 return (void *)((vm_offset_t)page_meta + page_meta->freelist_offset);
512 }
513 }
514 }
515
516 static inline void
517 page_metadata_set_freelist(struct zone_page_metadata *page_meta, void *addr)
518 {
519 assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
520 if (addr == NULL) {
521 page_meta->freelist_offset = PAGE_METADATA_EMPTY_FREELIST;
522 } else {
523 if (from_zone_map(page_meta, sizeof(struct zone_page_metadata))) {
524 page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)));
525 } else {
526 page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - (vm_offset_t)page_meta);
527 }
528 }
529 }
530
531 static inline struct zone_page_metadata *
532 page_metadata_get_realmeta(struct zone_page_metadata *page_meta)
533 {
534 assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC);
535 return (struct zone_page_metadata *)((vm_offset_t)page_meta - page_meta->real_metadata_offset);
536 }
537
538 static inline void
539 page_metadata_set_realmeta(struct zone_page_metadata *page_meta, struct zone_page_metadata *real_meta)
540 {
541 assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC);
542 assert(PAGE_METADATA_GET_ZINDEX(real_meta) != MULTIPAGE_METADATA_MAGIC);
543 assert((vm_offset_t)page_meta > (vm_offset_t)real_meta);
544 vm_offset_t offset = (vm_offset_t)page_meta - (vm_offset_t)real_meta;
545 assert(offset <= UINT32_MAX);
546 page_meta->real_metadata_offset = (uint32_t)offset;
547 }
548
549 /* The backup pointer is stored in the last pointer-sized location in an element. */
550 static inline vm_offset_t *
551 get_backup_ptr(vm_size_t elem_size,
552 vm_offset_t *element)
553 {
554 return (vm_offset_t *) ((vm_offset_t)element + elem_size - sizeof(vm_offset_t));
555 }
556
557 /*
558 * Routine to populate a page backing metadata in the zone_metadata_region.
559 * Must be called without the zone lock held as it might potentially block.
560 */
561 static inline void
562 zone_populate_metadata_page(struct zone_page_metadata *page_meta)
563 {
564 vm_offset_t page_metadata_begin = trunc_page(page_meta);
565 vm_offset_t page_metadata_end = trunc_page((vm_offset_t)page_meta + sizeof(struct zone_page_metadata));
566
567 for (; page_metadata_begin <= page_metadata_end; page_metadata_begin += PAGE_SIZE) {
568 #if !KASAN
569 /*
570 * This can race with another thread doing a populate on the same metadata
571 * page, where we see an updated pmap but unmapped KASan shadow, causing a
572 * fault in the shadow when we first access the metadata page. Avoid this
573 * by always synchronizing on the zone_metadata_region lock with KASan.
574 */
575 if (pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) {
576 continue;
577 }
578 #endif
579 /* All updates to the zone_metadata_region are done under the zone_metadata_region_lck */
580 lck_mtx_lock(&zone_metadata_region_lck);
581 if (0 == pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) {
582 kern_return_t __assert_only ret = kernel_memory_populate(zone_map,
583 page_metadata_begin,
584 PAGE_SIZE,
585 KMA_KOBJECT,
586 VM_KERN_MEMORY_OSFMK);
587
588 /* should not fail with the given arguments */
589 assert(ret == KERN_SUCCESS);
590 }
591 lck_mtx_unlock(&zone_metadata_region_lck);
592 }
593 return;
594 }
595
596 static inline uint16_t
597 get_metadata_alloc_count(struct zone_page_metadata *page_meta)
598 {
599 assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
600 struct zone *z = PAGE_METADATA_GET_ZONE(page_meta);
601 return (page_meta->page_count * PAGE_SIZE) / z->elem_size;
602 }
603
604 /*
605 * Routine to lookup metadata for any given address.
606 * If init is marked as TRUE, this should be called without holding the zone lock
607 * since the initialization might block.
608 */
609 static inline struct zone_page_metadata *
610 get_zone_page_metadata(struct zone_free_element *element, boolean_t init)
611 {
612 struct zone_page_metadata *page_meta = 0;
613
614 if (from_zone_map(element, sizeof(struct zone_free_element))) {
615 page_meta = (struct zone_page_metadata *)(PAGE_METADATA_FOR_ELEMENT(element));
616 if (init) {
617 zone_populate_metadata_page(page_meta);
618 }
619 } else {
620 page_meta = (struct zone_page_metadata *)(trunc_page((vm_offset_t)element));
621 }
622 if (init) {
623 bzero((char *)page_meta, sizeof(struct zone_page_metadata));
624 }
625 return (PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC) ? page_meta : page_metadata_get_realmeta(page_meta);
626 }
627
628 /* Routine to get the page for a given metadata */
629 static inline vm_offset_t
630 get_zone_page(struct zone_page_metadata *page_meta)
631 {
632 if (from_zone_map(page_meta, sizeof(struct zone_page_metadata))) {
633 return (vm_offset_t)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)));
634 } else {
635 return (vm_offset_t)(trunc_page(page_meta));
636 }
637 }
638
639 /*
640 * ZTAGS
641 */
642
643 #if VM_MAX_TAG_ZONES
644
645 // for zones with tagging enabled:
646
647 // calculate a pointer to the tag base entry,
648 // holding either a uint32_t the first tag offset for a page in the zone map,
649 // or two uint16_t tags if the page can only hold one or two elements
650
651 #define ZTAGBASE(zone, element) \
652 (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_map_min_address)])
653
654 // pointer to the tag for an element
655 #define ZTAG(zone, element) \
656 ({ \
657 vm_tag_t * result; \
658 if ((zone)->tags_inline) { \
659 result = (vm_tag_t *) ZTAGBASE((zone), (element)); \
660 if ((page_mask & element) >= (zone)->elem_size) result++; \
661 } else { \
662 result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / (zone)->elem_size]; \
663 } \
664 result; \
665 })
666
667
668 static vm_offset_t zone_tagbase_min;
669 static vm_offset_t zone_tagbase_max;
670 static vm_offset_t zone_tagbase_map_size;
671 static vm_map_t zone_tagbase_map;
672
673 static vm_offset_t zone_tags_min;
674 static vm_offset_t zone_tags_max;
675 static vm_offset_t zone_tags_map_size;
676 static vm_map_t zone_tags_map;
677
678 // simple heap allocator for allocating the tags for new memory
679
680 decl_lck_mtx_data(, ztLock) /* heap lock */
681 enum{
682 ztFreeIndexCount = 8,
683 ztFreeIndexMax = (ztFreeIndexCount - 1),
684 ztTagsPerBlock = 4
685 };
686
687 struct ztBlock {
688 #if __LITTLE_ENDIAN__
689 uint64_t free:1,
690 next:21,
691 prev:21,
692 size:21;
693 #else
694 // ztBlock needs free bit least significant
695 #error !__LITTLE_ENDIAN__
696 #endif
697 };
698 typedef struct ztBlock ztBlock;
699
700 static ztBlock * ztBlocks;
701 static uint32_t ztBlocksCount;
702 static uint32_t ztBlocksFree;
703
704 static uint32_t
705 ztLog2up(uint32_t size)
706 {
707 if (1 == size) {
708 size = 0;
709 } else {
710 size = 32 - __builtin_clz(size - 1);
711 }
712 return size;
713 }
714
715 static uint32_t
716 ztLog2down(uint32_t size)
717 {
718 size = 31 - __builtin_clz(size);
719 return size;
720 }
721
722 static void
723 ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
724 {
725 vm_map_offset_t addr = (vm_map_offset_t) address;
726 vm_map_offset_t page, end;
727
728 page = trunc_page(addr);
729 end = round_page(addr + size);
730
731 for (; page < end; page += page_size) {
732 if (!pmap_find_phys(kernel_pmap, page)) {
733 kern_return_t __unused
734 ret = kernel_memory_populate(map, page, PAGE_SIZE,
735 KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG);
736 assert(ret == KERN_SUCCESS);
737 }
738 }
739 }
740
741 static boolean_t
742 ztPresent(const void * address, size_t size)
743 {
744 vm_map_offset_t addr = (vm_map_offset_t) address;
745 vm_map_offset_t page, end;
746 boolean_t result;
747
748 page = trunc_page(addr);
749 end = round_page(addr + size);
750 for (result = TRUE; (page < end); page += page_size) {
751 result = pmap_find_phys(kernel_pmap, page);
752 if (!result) {
753 break;
754 }
755 }
756 return result;
757 }
758
759
760 void __unused
761 ztDump(boolean_t sanity);
762 void __unused
763 ztDump(boolean_t sanity)
764 {
765 uint32_t q, cq, p;
766
767 for (q = 0; q <= ztFreeIndexMax; q++) {
768 p = q;
769 do{
770 if (sanity) {
771 cq = ztLog2down(ztBlocks[p].size);
772 if (cq > ztFreeIndexMax) {
773 cq = ztFreeIndexMax;
774 }
775 if (!ztBlocks[p].free
776 || ((p != q) && (q != cq))
777 || (ztBlocks[ztBlocks[p].next].prev != p)
778 || (ztBlocks[ztBlocks[p].prev].next != p)) {
779 kprintf("zterror at %d", p);
780 ztDump(FALSE);
781 kprintf("zterror at %d", p);
782 assert(FALSE);
783 }
784 continue;
785 }
786 kprintf("zt[%03d]%c %d, %d, %d\n",
787 p, ztBlocks[p].free ? 'F' : 'A',
788 ztBlocks[p].next, ztBlocks[p].prev,
789 ztBlocks[p].size);
790 p = ztBlocks[p].next;
791 if (p == q) {
792 break;
793 }
794 }while (p != q);
795 if (!sanity) {
796 printf("\n");
797 }
798 }
799 if (!sanity) {
800 printf("-----------------------\n");
801 }
802 }
803
804
805
806 #define ZTBDEQ(idx) \
807 ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
808 ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
809
810 static void
811 ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
812 {
813 uint32_t q, w, p, size, merge;
814
815 assert(count);
816 ztBlocksFree += count;
817
818 // merge with preceding
819 merge = (index + count);
820 if ((merge < ztBlocksCount)
821 && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
822 && ztBlocks[merge].free) {
823 ZTBDEQ(merge);
824 count += ztBlocks[merge].size;
825 }
826
827 // merge with following
828 merge = (index - 1);
829 if ((merge > ztFreeIndexMax)
830 && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
831 && ztBlocks[merge].free) {
832 size = ztBlocks[merge].size;
833 count += size;
834 index -= size;
835 ZTBDEQ(index);
836 }
837
838 q = ztLog2down(count);
839 if (q > ztFreeIndexMax) {
840 q = ztFreeIndexMax;
841 }
842 w = q;
843 // queue in order of size
844 while (TRUE) {
845 p = ztBlocks[w].next;
846 if (p == q) {
847 break;
848 }
849 if (ztBlocks[p].size >= count) {
850 break;
851 }
852 w = p;
853 }
854 ztBlocks[p].prev = index;
855 ztBlocks[w].next = index;
856
857 // fault in first
858 ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
859
860 // mark first & last with free flag and size
861 ztBlocks[index].free = TRUE;
862 ztBlocks[index].size = count;
863 ztBlocks[index].prev = w;
864 ztBlocks[index].next = p;
865 if (count > 1) {
866 index += (count - 1);
867 // fault in last
868 ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
869 ztBlocks[index].free = TRUE;
870 ztBlocks[index].size = count;
871 }
872 }
873
874 static uint32_t
875 ztAlloc(zone_t zone, uint32_t count)
876 {
877 uint32_t q, w, p, leftover;
878
879 assert(count);
880
881 q = ztLog2up(count);
882 if (q > ztFreeIndexMax) {
883 q = ztFreeIndexMax;
884 }
885 do{
886 w = q;
887 while (TRUE) {
888 p = ztBlocks[w].next;
889 if (p == q) {
890 break;
891 }
892 if (ztBlocks[p].size >= count) {
893 // dequeue, mark both ends allocated
894 ztBlocks[w].next = ztBlocks[p].next;
895 ztBlocks[ztBlocks[p].next].prev = w;
896 ztBlocks[p].free = FALSE;
897 ztBlocksFree -= ztBlocks[p].size;
898 if (ztBlocks[p].size > 1) {
899 ztBlocks[p + ztBlocks[p].size - 1].free = FALSE;
900 }
901
902 // fault all the allocation
903 ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0);
904 // mark last as allocated
905 if (count > 1) {
906 ztBlocks[p + count - 1].free = FALSE;
907 }
908 // free remainder
909 leftover = ztBlocks[p].size - count;
910 if (leftover) {
911 ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
912 }
913
914 return p;
915 }
916 w = p;
917 }
918 q++;
919 }while (q <= ztFreeIndexMax);
920
921 return -1U;
922 }
923
924 static void
925 ztInit(vm_size_t max_zonemap_size, lck_grp_t * group)
926 {
927 kern_return_t ret;
928 vm_map_kernel_flags_t vmk_flags;
929 uint32_t idx;
930
931 lck_mtx_init(&ztLock, group, LCK_ATTR_NULL);
932
933 // allocate submaps VM_KERN_MEMORY_DIAG
934
935 zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
936 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
937 vmk_flags.vmkf_permanent = TRUE;
938 ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
939 FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
940 &zone_tagbase_map);
941
942 if (ret != KERN_SUCCESS) {
943 panic("zone_init: kmem_suballoc failed");
944 }
945 zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
946
947 zone_tags_map_size = 2048 * 1024 * sizeof(vm_tag_t);
948 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
949 vmk_flags.vmkf_permanent = TRUE;
950 ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
951 FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
952 &zone_tags_map);
953
954 if (ret != KERN_SUCCESS) {
955 panic("zone_init: kmem_suballoc failed");
956 }
957 zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
958
959 ztBlocks = (ztBlock *) zone_tags_min;
960 ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
961
962 // initialize the qheads
963 lck_mtx_lock(&ztLock);
964
965 ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0);
966 for (idx = 0; idx < ztFreeIndexCount; idx++) {
967 ztBlocks[idx].free = TRUE;
968 ztBlocks[idx].next = idx;
969 ztBlocks[idx].prev = idx;
970 ztBlocks[idx].size = 0;
971 }
972 // free remaining space
973 ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
974
975 lck_mtx_unlock(&ztLock);
976 }
977
978 static void
979 ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
980 {
981 uint32_t * tagbase;
982 uint32_t count, block, blocks, idx;
983 size_t pages;
984
985 pages = atop(size);
986 tagbase = ZTAGBASE(zone, mem);
987
988 lck_mtx_lock(&ztLock);
989
990 // fault tagbase
991 ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0);
992
993 if (!zone->tags_inline) {
994 // allocate tags
995 count = (uint32_t)(size / zone->elem_size);
996 blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
997 block = ztAlloc(zone, blocks);
998 if (-1U == block) {
999 ztDump(false);
1000 }
1001 assert(-1U != block);
1002 }
1003
1004 lck_mtx_unlock(&ztLock);
1005
1006 if (!zone->tags_inline) {
1007 // set tag base for each page
1008 block *= ztTagsPerBlock;
1009 for (idx = 0; idx < pages; idx++) {
1010 tagbase[idx] = block + (uint32_t)((ptoa(idx) + (zone->elem_size - 1)) / zone->elem_size);
1011 }
1012 }
1013 }
1014
1015 static void
1016 ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
1017 {
1018 uint32_t * tagbase;
1019 uint32_t count, block, blocks, idx;
1020 size_t pages;
1021
1022 // set tag base for each page
1023 pages = atop(size);
1024 tagbase = ZTAGBASE(zone, mem);
1025 block = tagbase[0];
1026 for (idx = 0; idx < pages; idx++) {
1027 tagbase[idx] = 0xFFFFFFFF;
1028 }
1029
1030 lck_mtx_lock(&ztLock);
1031 if (!zone->tags_inline) {
1032 count = (uint32_t)(size / zone->elem_size);
1033 blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
1034 assert(block != 0xFFFFFFFF);
1035 block /= ztTagsPerBlock;
1036 ztFree(NULL /* zone is unlocked */, block, blocks);
1037 }
1038
1039 lck_mtx_unlock(&ztLock);
1040 }
1041
1042 uint32_t
1043 zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
1044 {
1045 zone_t z;
1046 uint32_t idx;
1047
1048 simple_lock(&all_zones_lock, &zone_locks_grp);
1049
1050 for (idx = 0; idx < num_zones; idx++) {
1051 z = &(zone_array[idx]);
1052 if (!z->tags) {
1053 continue;
1054 }
1055 if (tag_zone_index != z->tag_zone_index) {
1056 continue;
1057 }
1058 *elem_size = z->elem_size;
1059 break;
1060 }
1061
1062 simple_unlock(&all_zones_lock);
1063
1064 if (idx == num_zones) {
1065 idx = -1U;
1066 }
1067
1068 return idx;
1069 }
1070
1071 #endif /* VM_MAX_TAG_ZONES */
1072
1073 /* Routine to get the size of a zone allocated address. If the address doesnt belong to the
1074 * zone_map, returns 0.
1075 */
1076 vm_size_t
1077 zone_element_size(void *addr, zone_t *z)
1078 {
1079 struct zone *src_zone;
1080 if (from_zone_map(addr, sizeof(void *))) {
1081 struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
1082 src_zone = PAGE_METADATA_GET_ZONE(page_meta);
1083 if (z) {
1084 *z = src_zone;
1085 }
1086 return src_zone->elem_size;
1087 } else {
1088 #if CONFIG_GZALLOC
1089 vm_size_t gzsize;
1090 if (gzalloc_element_size(addr, z, &gzsize)) {
1091 return gzsize;
1092 }
1093 #endif /* CONFIG_GZALLOC */
1094
1095 return 0;
1096 }
1097 }
1098
1099 #if DEBUG || DEVELOPMENT
1100
1101 vm_size_t
1102 zone_element_info(void *addr, vm_tag_t * ptag)
1103 {
1104 vm_size_t size = 0;
1105 vm_tag_t tag = VM_KERN_MEMORY_NONE;
1106 struct zone * src_zone;
1107
1108 if (from_zone_map(addr, sizeof(void *))) {
1109 struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
1110 src_zone = PAGE_METADATA_GET_ZONE(page_meta);
1111 #if VM_MAX_TAG_ZONES
1112 if (__improbable(src_zone->tags)) {
1113 tag = (ZTAG(src_zone, (vm_offset_t) addr)[0] >> 1);
1114 }
1115 #endif /* VM_MAX_TAG_ZONES */
1116 size = src_zone->elem_size;
1117 } else {
1118 #if CONFIG_GZALLOC
1119 gzalloc_element_size(addr, NULL, &size);
1120 #endif /* CONFIG_GZALLOC */
1121 }
1122 *ptag = tag;
1123 return size;
1124 }
1125
1126 #endif /* DEBUG || DEVELOPMENT */
1127
1128 /*
1129 * Zone checking helper function.
1130 * A pointer that satisfies these conditions is OK to be a freelist next pointer
1131 * A pointer that doesn't satisfy these conditions indicates corruption
1132 */
1133 static inline boolean_t
1134 is_sane_zone_ptr(zone_t zone,
1135 vm_offset_t addr,
1136 size_t obj_size)
1137 {
1138 /* Must be aligned to pointer boundary */
1139 if (__improbable((addr & (sizeof(vm_offset_t) - 1)) != 0)) {
1140 return FALSE;
1141 }
1142
1143 /* Must be a kernel address */
1144 if (__improbable(!pmap_kernel_va(addr))) {
1145 return FALSE;
1146 }
1147
1148 /* Must be from zone map if the zone only uses memory from the zone_map */
1149 /*
1150 * TODO: Remove the zone->collectable check when every
1151 * zone using foreign memory is properly tagged with allows_foreign
1152 */
1153 if (zone->collectable && !zone->allows_foreign) {
1154 /* check if addr is from zone map */
1155 if (addr >= zone_map_min_address &&
1156 (addr + obj_size - 1) < zone_map_max_address) {
1157 return TRUE;
1158 }
1159
1160 return FALSE;
1161 }
1162
1163 return TRUE;
1164 }
1165
1166 static inline boolean_t
1167 is_sane_zone_page_metadata(zone_t zone,
1168 vm_offset_t page_meta)
1169 {
1170 /* NULL page metadata structures are invalid */
1171 if (page_meta == 0) {
1172 return FALSE;
1173 }
1174 return is_sane_zone_ptr(zone, page_meta, sizeof(struct zone_page_metadata));
1175 }
1176
1177 static inline boolean_t
1178 is_sane_zone_element(zone_t zone,
1179 vm_offset_t addr)
1180 {
1181 /* NULL is OK because it indicates the tail of the list */
1182 if (addr == 0) {
1183 return TRUE;
1184 }
1185 return is_sane_zone_ptr(zone, addr, zone->elem_size);
1186 }
1187
1188 /* Someone wrote to freed memory. */
1189 static inline void
1190 /* noreturn */
1191 zone_element_was_modified_panic(zone_t zone,
1192 vm_offset_t element,
1193 vm_offset_t found,
1194 vm_offset_t expected,
1195 vm_offset_t offset)
1196 {
1197 panic("a freed zone element has been modified in zone %s: expected %p but found %p, bits changed %p, at offset %d of %d in element %p, cookies %p %p",
1198 zone->zone_name,
1199 (void *) expected,
1200 (void *) found,
1201 (void *) (expected ^ found),
1202 (uint32_t) offset,
1203 (uint32_t) zone->elem_size,
1204 (void *) element,
1205 (void *) zp_nopoison_cookie,
1206 (void *) zp_poisoned_cookie);
1207 }
1208
1209 /*
1210 * The primary and backup pointers don't match.
1211 * Determine which one was likely the corrupted pointer, find out what it
1212 * probably should have been, and panic.
1213 * I would like to mark this as noreturn, but panic() isn't marked noreturn.
1214 */
1215 static void
1216 /* noreturn */
1217 backup_ptr_mismatch_panic(zone_t zone,
1218 vm_offset_t element,
1219 vm_offset_t primary,
1220 vm_offset_t backup)
1221 {
1222 vm_offset_t likely_backup;
1223 vm_offset_t likely_primary;
1224
1225 likely_primary = primary ^ zp_nopoison_cookie;
1226 boolean_t sane_backup;
1227 boolean_t sane_primary = is_sane_zone_element(zone, likely_primary);
1228 boolean_t element_was_poisoned = (backup & 0x1) ? TRUE : FALSE;
1229
1230 #if defined(__LP64__)
1231 /* We can inspect the tag in the upper bits for additional confirmation */
1232 if ((backup & 0xFFFFFF0000000000) == 0xFACADE0000000000) {
1233 element_was_poisoned = TRUE;
1234 } else if ((backup & 0xFFFFFF0000000000) == 0xC0FFEE0000000000) {
1235 element_was_poisoned = FALSE;
1236 }
1237 #endif
1238
1239 if (element_was_poisoned) {
1240 likely_backup = backup ^ zp_poisoned_cookie;
1241 sane_backup = is_sane_zone_element(zone, likely_backup);
1242 } else {
1243 likely_backup = backup ^ zp_nopoison_cookie;
1244 sane_backup = is_sane_zone_element(zone, likely_backup);
1245 }
1246
1247 /* The primary is definitely the corrupted one */
1248 if (!sane_primary && sane_backup) {
1249 zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
1250 }
1251
1252 /* The backup is definitely the corrupted one */
1253 if (sane_primary && !sane_backup) {
1254 zone_element_was_modified_panic(zone, element, backup,
1255 (likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
1256 zone->elem_size - sizeof(vm_offset_t));
1257 }
1258
1259 /*
1260 * Not sure which is the corrupted one.
1261 * It's less likely that the backup pointer was overwritten with
1262 * ( (sane address) ^ (valid cookie) ), so we'll guess that the
1263 * primary pointer has been overwritten with a sane but incorrect address.
1264 */
1265 if (sane_primary && sane_backup) {
1266 zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
1267 }
1268
1269 /* Neither are sane, so just guess. */
1270 zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
1271 }
1272
1273 /*
1274 * Adds the element to the head of the zone's free list
1275 * Keeps a backup next-pointer at the end of the element
1276 */
1277 static inline void
1278 free_to_zone(zone_t zone,
1279 vm_offset_t element,
1280 boolean_t poison)
1281 {
1282 vm_offset_t old_head;
1283 struct zone_page_metadata *page_meta;
1284
1285 vm_offset_t *primary = (vm_offset_t *) element;
1286 vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
1287
1288 page_meta = get_zone_page_metadata((struct zone_free_element *)element, FALSE);
1289 assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
1290 old_head = (vm_offset_t)page_metadata_get_freelist(page_meta);
1291
1292 if (__improbable(!is_sane_zone_element(zone, old_head))) {
1293 panic("zfree: invalid head pointer %p for freelist of zone %s\n",
1294 (void *) old_head, zone->zone_name);
1295 }
1296
1297 if (__improbable(!is_sane_zone_element(zone, element))) {
1298 panic("zfree: freeing invalid pointer %p to zone %s\n",
1299 (void *) element, zone->zone_name);
1300 }
1301
1302 if (__improbable(old_head == element)) {
1303 panic("zfree: double free of %p to zone %s\n",
1304 (void *) element, zone->zone_name);
1305 }
1306 /*
1307 * Always write a redundant next pointer
1308 * So that it is more difficult to forge, xor it with a random cookie
1309 * A poisoned element is indicated by using zp_poisoned_cookie
1310 * instead of zp_nopoison_cookie
1311 */
1312
1313 *backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie);
1314
1315 /*
1316 * Insert this element at the head of the free list. We also xor the
1317 * primary pointer with the zp_nopoison_cookie to make sure a free
1318 * element does not provide the location of the next free element directly.
1319 */
1320 *primary = old_head ^ zp_nopoison_cookie;
1321 page_metadata_set_freelist(page_meta, (struct zone_free_element *)element);
1322 page_meta->free_count++;
1323 if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) {
1324 if (page_meta->free_count == 1) {
1325 /* first foreign element freed on page, move from all_used */
1326 re_queue_tail(&zone->pages.any_free_foreign, &(page_meta->pages));
1327 } else {
1328 /* no other list transitions */
1329 }
1330 } else if (page_meta->free_count == get_metadata_alloc_count(page_meta)) {
1331 /* whether the page was on the intermediate or all_used, queue, move it to free */
1332 re_queue_tail(&zone->pages.all_free, &(page_meta->pages));
1333 zone->count_all_free_pages += page_meta->page_count;
1334 } else if (page_meta->free_count == 1) {
1335 /* first free element on page, move from all_used */
1336 re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
1337 }
1338 zone->count--;
1339 zone->countfree++;
1340
1341 #if KASAN_ZALLOC
1342 kasan_poison_range(element, zone->elem_size, ASAN_HEAP_FREED);
1343 #endif
1344 }
1345
1346
1347 /*
1348 * Removes an element from the zone's free list, returning 0 if the free list is empty.
1349 * Verifies that the next-pointer and backup next-pointer are intact,
1350 * and verifies that a poisoned element hasn't been modified.
1351 */
1352 static inline vm_offset_t
1353 try_alloc_from_zone(zone_t zone,
1354 vm_tag_t tag __unused,
1355 boolean_t* check_poison)
1356 {
1357 vm_offset_t element;
1358 struct zone_page_metadata *page_meta;
1359
1360 *check_poison = FALSE;
1361
1362 /* if zone is empty, bail */
1363 if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign)) {
1364 page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
1365 } else if (!queue_empty(&zone->pages.intermediate)) {
1366 page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
1367 } else if (!queue_empty(&zone->pages.all_free)) {
1368 page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
1369 assert(zone->count_all_free_pages >= page_meta->page_count);
1370 zone->count_all_free_pages -= page_meta->page_count;
1371 } else {
1372 return 0;
1373 }
1374 /* Check if page_meta passes is_sane_zone_element */
1375 if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta))) {
1376 panic("zalloc: invalid metadata structure %p for freelist of zone %s\n",
1377 (void *) page_meta, zone->zone_name);
1378 }
1379 assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
1380 element = (vm_offset_t)page_metadata_get_freelist(page_meta);
1381
1382 if (__improbable(!is_sane_zone_ptr(zone, element, zone->elem_size))) {
1383 panic("zfree: invalid head pointer %p for freelist of zone %s\n",
1384 (void *) element, zone->zone_name);
1385 }
1386
1387 vm_offset_t *primary = (vm_offset_t *) element;
1388 vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
1389
1390 /*
1391 * Since the primary next pointer is xor'ed with zp_nopoison_cookie
1392 * for obfuscation, retrieve the original value back
1393 */
1394 vm_offset_t next_element = *primary ^ zp_nopoison_cookie;
1395 vm_offset_t next_element_primary = *primary;
1396 vm_offset_t next_element_backup = *backup;
1397
1398 /*
1399 * backup_ptr_mismatch_panic will determine what next_element
1400 * should have been, and print it appropriately
1401 */
1402 if (__improbable(!is_sane_zone_element(zone, next_element))) {
1403 backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
1404 }
1405
1406 /* Check the backup pointer for the regular cookie */
1407 if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) {
1408 /* Check for the poisoned cookie instead */
1409 if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie))) {
1410 /* Neither cookie is valid, corruption has occurred */
1411 backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
1412 }
1413
1414 /*
1415 * Element was marked as poisoned, so check its integrity before using it.
1416 */
1417 *check_poison = TRUE;
1418 }
1419
1420 /* Make sure the page_meta is at the correct offset from the start of page */
1421 if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element, FALSE))) {
1422 panic("zalloc: Incorrect metadata %p found in zone %s page queue. Expected metadata: %p\n",
1423 page_meta, zone->zone_name, get_zone_page_metadata((struct zone_free_element *)element, FALSE));
1424 }
1425
1426 /* Make sure next_element belongs to the same page as page_meta */
1427 if (next_element) {
1428 if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element, FALSE))) {
1429 panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n",
1430 (void *)next_element, (void *)element, zone->zone_name);
1431 }
1432 }
1433
1434 /* Remove this element from the free list */
1435 page_metadata_set_freelist(page_meta, (struct zone_free_element *)next_element);
1436 page_meta->free_count--;
1437
1438 if (page_meta->free_count == 0) {
1439 /* move to all used */
1440 re_queue_tail(&zone->pages.all_used, &(page_meta->pages));
1441 } else {
1442 if (!zone->allows_foreign || from_zone_map(element, zone->elem_size)) {
1443 if (get_metadata_alloc_count(page_meta) == page_meta->free_count + 1) {
1444 /* remove from free, move to intermediate */
1445 re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
1446 }
1447 }
1448 }
1449 zone->countfree--;
1450 zone->count++;
1451 zone->sum_count++;
1452
1453 #if VM_MAX_TAG_ZONES
1454 if (__improbable(zone->tags)) {
1455 // set the tag with b0 clear so the block remains inuse
1456 ZTAG(zone, element)[0] = (tag << 1);
1457 }
1458 #endif /* VM_MAX_TAG_ZONES */
1459
1460
1461 #if KASAN_ZALLOC
1462 kasan_poison_range(element, zone->elem_size, ASAN_VALID);
1463 #endif
1464
1465 return element;
1466 }
1467
1468 /*
1469 * End of zone poisoning
1470 */
1471
1472 /*
1473 * Zone info options
1474 */
1475 #define ZINFO_SLOTS MAX_ZONES /* for now */
1476
1477 zone_t zone_find_largest(void);
1478
1479 /*
1480 * Async allocation of zones
1481 * This mechanism allows for bootstrapping an empty zone which is setup with
1482 * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call
1483 * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free.
1484 * This will prime the zone for the next use.
1485 *
1486 * Currently the thread_callout function (zalloc_async) will loop through all zones
1487 * looking for any zone with async_pending set and do the work for it.
1488 *
1489 * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call,
1490 * then zalloc_noblock to an empty zone may succeed.
1491 */
1492 void zalloc_async(
1493 thread_call_param_t p0,
1494 thread_call_param_t p1);
1495
1496 static thread_call_data_t call_async_alloc;
1497
1498 /*
1499 * Align elements that use the zone page list to 32 byte boundaries.
1500 */
1501 #define ZONE_ELEMENT_ALIGNMENT 32
1502
1503 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
1504 #define zone_sleep(zone) \
1505 (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN_ALWAYS, (event_t)(zone), THREAD_UNINT);
1506
1507
1508 #define lock_zone_init(zone) \
1509 MACRO_BEGIN \
1510 lck_attr_setdefault(&(zone)->lock_attr); \
1511 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
1512 &zone_locks_grp, &(zone)->lock_attr); \
1513 MACRO_END
1514
1515 #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock)
1516
1517 /*
1518 * Exclude more than one concurrent garbage collection
1519 */
1520 decl_lck_mtx_data(, zone_gc_lock)
1521
1522 lck_attr_t zone_gc_lck_attr;
1523 lck_grp_t zone_gc_lck_grp;
1524 lck_grp_attr_t zone_gc_lck_grp_attr;
1525 lck_mtx_ext_t zone_gc_lck_ext;
1526
1527 boolean_t zone_gc_allowed = TRUE;
1528 boolean_t panic_include_zprint = FALSE;
1529
1530 mach_memory_info_t *panic_kext_memory_info = NULL;
1531 vm_size_t panic_kext_memory_size = 0;
1532
1533 #define ZALLOC_DEBUG_ZONEGC 0x00000001
1534 #define ZALLOC_DEBUG_ZCRAM 0x00000002
1535 uint32_t zalloc_debug = 0;
1536
1537 /*
1538 * Zone leak debugging code
1539 *
1540 * When enabled, this code keeps a log to track allocations to a particular zone that have not
1541 * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
1542 * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
1543 * off by default.
1544 *
1545 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
1546 * is the name of the zone you wish to log.
1547 *
1548 * This code only tracks one zone, so you need to identify which one is leaking first.
1549 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
1550 * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
1551 * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
1552 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
1553 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
1554 * See the help in the kgmacros for usage info.
1555 *
1556 *
1557 * Zone corruption logging
1558 *
1559 * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
1560 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
1561 * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
1562 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
1563 * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
1564 * corrupted to examine its history. This should lead to the source of the corruption.
1565 */
1566
1567 static boolean_t log_records_init = FALSE;
1568 static int log_records; /* size of the log, expressed in number of records */
1569
1570 #define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
1571
1572 static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
1573 static int num_zones_logged = 0;
1574
1575 static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
1576
1577 /* Log allocations and frees to help debug a zone element corruption */
1578 boolean_t corruption_debug_flag = DEBUG; /* enabled by "-zc" boot-arg */
1579 /* Making pointer scanning leaks detection possible for all zones */
1580
1581 #if DEBUG || DEVELOPMENT
1582 boolean_t leak_scan_debug_flag = FALSE; /* enabled by "-zl" boot-arg */
1583 #endif /* DEBUG || DEVELOPMENT */
1584
1585
1586 /*
1587 * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
1588 * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
1589 * is the number of stacks suspected of leaking, we don't need many records.
1590 */
1591
1592 #if defined(__LP64__)
1593 #define ZRECORDS_MAX 2560 /* Max records allowed in the log */
1594 #else
1595 #define ZRECORDS_MAX 1536 /* Max records allowed in the log */
1596 #endif
1597 #define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
1598
1599 /*
1600 * Each record in the log contains a pointer to the zone element it refers to,
1601 * and a small array to hold the pc's from the stack trace. A
1602 * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
1603 * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
1604 * If the log fills, old records are replaced as if it were a circular buffer.
1605 */
1606
1607
1608 /*
1609 * Decide if we want to log this zone by doing a string compare between a zone name and the name
1610 * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
1611 * possible to include spaces in strings passed in via the boot-args, a period in the logname will
1612 * match a space in the zone name.
1613 */
1614
1615 int
1616 track_this_zone(const char *zonename, const char *logname)
1617 {
1618 unsigned int len;
1619 const char *zc = zonename;
1620 const char *lc = logname;
1621
1622 /*
1623 * Compare the strings. We bound the compare by MAX_ZONE_NAME.
1624 */
1625
1626 for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
1627 /*
1628 * If the current characters don't match, check for a space in
1629 * in the zone name and a corresponding period in the log name.
1630 * If that's not there, then the strings don't match.
1631 */
1632
1633 if (*zc != *lc && !(*zc == ' ' && *lc == '.')) {
1634 break;
1635 }
1636
1637 /*
1638 * The strings are equal so far. If we're at the end, then it's a match.
1639 */
1640
1641 if (*zc == '\0') {
1642 return TRUE;
1643 }
1644 }
1645
1646 return FALSE;
1647 }
1648
1649
1650 /*
1651 * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
1652 * the buffer for the records has been allocated.
1653 */
1654
1655 #define DO_LOGGING(z) (z->zone_logging == TRUE && z->zlog_btlog)
1656
1657 extern boolean_t kmem_alloc_ready;
1658
1659 #if CONFIG_ZLEAKS
1660 #pragma mark -
1661 #pragma mark Zone Leak Detection
1662
1663 /*
1664 * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
1665 * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
1666 * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
1667 * and stop tracking it if it was being tracked.
1668 *
1669 * We track the allocations in the zallocations hash table, which stores the address that was returned from
1670 * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
1671 * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
1672 * backtraces - we don't store them more than once.
1673 *
1674 * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
1675 * a large amount of virtual space.
1676 */
1677 #define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
1678 #define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
1679 #define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
1680 #define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
1681 uint32_t zleak_state = 0; /* State of collection, as above */
1682
1683 boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */
1684 vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */
1685 vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */
1686 unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */
1687
1688 /*
1689 * Counters for allocation statistics.
1690 */
1691
1692 /* Times two active records want to occupy the same spot */
1693 unsigned int z_alloc_collisions = 0;
1694 unsigned int z_trace_collisions = 0;
1695
1696 /* Times a new record lands on a spot previously occupied by a freed allocation */
1697 unsigned int z_alloc_overwrites = 0;
1698 unsigned int z_trace_overwrites = 0;
1699
1700 /* Times a new alloc or trace is put into the hash table */
1701 unsigned int z_alloc_recorded = 0;
1702 unsigned int z_trace_recorded = 0;
1703
1704 /* Times zleak_log returned false due to not being able to acquire the lock */
1705 unsigned int z_total_conflicts = 0;
1706
1707
1708 #pragma mark struct zallocation
1709 /*
1710 * Structure for keeping track of an allocation
1711 * An allocation bucket is in use if its element is not NULL
1712 */
1713 struct zallocation {
1714 uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
1715 vm_size_t za_size; /* how much memory did this allocation take up? */
1716 uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */
1717 /* TODO: #if this out */
1718 uint32_t za_hit_count; /* for determining effectiveness of hash function */
1719 };
1720
1721 /* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
1722 uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
1723 uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
1724
1725 vm_size_t zleak_max_zonemap_size;
1726
1727 /* Hashmaps of allocations and their corresponding traces */
1728 static struct zallocation* zallocations;
1729 static struct ztrace* ztraces;
1730
1731 /* not static so that panic can see this, see kern/debug.c */
1732 struct ztrace* top_ztrace;
1733
1734 /* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
1735 static lck_spin_t zleak_lock;
1736 static lck_attr_t zleak_lock_attr;
1737 static lck_grp_t zleak_lock_grp;
1738 static lck_grp_attr_t zleak_lock_grp_attr;
1739
1740 /*
1741 * Initializes the zone leak monitor. Called from zone_init()
1742 */
1743 static void
1744 zleak_init(vm_size_t max_zonemap_size)
1745 {
1746 char scratch_buf[16];
1747 boolean_t zleak_enable_flag = FALSE;
1748
1749 zleak_max_zonemap_size = max_zonemap_size;
1750 zleak_global_tracking_threshold = max_zonemap_size / 2;
1751 zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
1752
1753 #if CONFIG_EMBEDDED
1754 if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
1755 zleak_enable_flag = TRUE;
1756 printf("zone leak detection enabled\n");
1757 } else {
1758 zleak_enable_flag = FALSE;
1759 printf("zone leak detection disabled\n");
1760 }
1761 #else /* CONFIG_EMBEDDED */
1762 /* -zleakoff (flag to disable zone leak monitor) */
1763 if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
1764 zleak_enable_flag = FALSE;
1765 printf("zone leak detection disabled\n");
1766 } else {
1767 zleak_enable_flag = TRUE;
1768 printf("zone leak detection enabled\n");
1769 }
1770 #endif /* CONFIG_EMBEDDED */
1771
1772 /* zfactor=XXXX (override how often to sample the zone allocator) */
1773 if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
1774 printf("Zone leak factor override: %u\n", zleak_sample_factor);
1775 }
1776
1777 /* zleak-allocs=XXXX (override number of buckets in zallocations) */
1778 if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
1779 printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets);
1780 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
1781 if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets - 1))) {
1782 printf("Override isn't a power of two, bad things might happen!\n");
1783 }
1784 }
1785
1786 /* zleak-traces=XXXX (override number of buckets in ztraces) */
1787 if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
1788 printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets);
1789 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
1790 if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets - 1))) {
1791 printf("Override isn't a power of two, bad things might happen!\n");
1792 }
1793 }
1794
1795 /* allocate the zleak_lock */
1796 lck_grp_attr_setdefault(&zleak_lock_grp_attr);
1797 lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr);
1798 lck_attr_setdefault(&zleak_lock_attr);
1799 lck_spin_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
1800
1801 if (zleak_enable_flag) {
1802 zleak_state = ZLEAK_STATE_ENABLED;
1803 }
1804 }
1805
1806 #if CONFIG_ZLEAKS
1807
1808 /*
1809 * Support for kern.zleak.active sysctl - a simplified
1810 * version of the zleak_state variable.
1811 */
1812 int
1813 get_zleak_state(void)
1814 {
1815 if (zleak_state & ZLEAK_STATE_FAILED) {
1816 return -1;
1817 }
1818 if (zleak_state & ZLEAK_STATE_ACTIVE) {
1819 return 1;
1820 }
1821 return 0;
1822 }
1823
1824 #endif
1825
1826
1827 kern_return_t
1828 zleak_activate(void)
1829 {
1830 kern_return_t retval;
1831 vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
1832 vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
1833 void *allocations_ptr = NULL;
1834 void *traces_ptr = NULL;
1835
1836 /* Only one thread attempts to activate at a time */
1837 if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
1838 return KERN_SUCCESS;
1839 }
1840
1841 /* Indicate that we're doing the setup */
1842 lck_spin_lock(&zleak_lock);
1843 if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
1844 lck_spin_unlock(&zleak_lock);
1845 return KERN_SUCCESS;
1846 }
1847
1848 zleak_state |= ZLEAK_STATE_ACTIVATING;
1849 lck_spin_unlock(&zleak_lock);
1850
1851 /* Allocate and zero tables */
1852 retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK);
1853 if (retval != KERN_SUCCESS) {
1854 goto fail;
1855 }
1856
1857 retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK);
1858 if (retval != KERN_SUCCESS) {
1859 goto fail;
1860 }
1861
1862 bzero(allocations_ptr, z_alloc_size);
1863 bzero(traces_ptr, z_trace_size);
1864
1865 /* Everything's set. Install tables, mark active. */
1866 zallocations = allocations_ptr;
1867 ztraces = traces_ptr;
1868
1869 /*
1870 * Initialize the top_ztrace to the first entry in ztraces,
1871 * so we don't have to check for null in zleak_log
1872 */
1873 top_ztrace = &ztraces[0];
1874
1875 /*
1876 * Note that we do need a barrier between installing
1877 * the tables and setting the active flag, because the zfree()
1878 * path accesses the table without a lock if we're active.
1879 */
1880 lck_spin_lock(&zleak_lock);
1881 zleak_state |= ZLEAK_STATE_ACTIVE;
1882 zleak_state &= ~ZLEAK_STATE_ACTIVATING;
1883 lck_spin_unlock(&zleak_lock);
1884
1885 return 0;
1886
1887 fail:
1888 /*
1889 * If we fail to allocate memory, don't further tax
1890 * the system by trying again.
1891 */
1892 lck_spin_lock(&zleak_lock);
1893 zleak_state |= ZLEAK_STATE_FAILED;
1894 zleak_state &= ~ZLEAK_STATE_ACTIVATING;
1895 lck_spin_unlock(&zleak_lock);
1896
1897 if (allocations_ptr != NULL) {
1898 kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
1899 }
1900
1901 if (traces_ptr != NULL) {
1902 kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
1903 }
1904
1905 return retval;
1906 }
1907
1908 /*
1909 * TODO: What about allocations that never get deallocated,
1910 * especially ones with unique backtraces? Should we wait to record
1911 * until after boot has completed?
1912 * (How many persistent zallocs are there?)
1913 */
1914
1915 /*
1916 * This function records the allocation in the allocations table,
1917 * and stores the associated backtrace in the traces table
1918 * (or just increments the refcount if the trace is already recorded)
1919 * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
1920 * the associated trace's refcount is decremented.
1921 * If the trace slot is in use, it returns.
1922 * The refcount is incremented by the amount of memory the allocation consumes.
1923 * The return value indicates whether to try again next time.
1924 */
1925 static boolean_t
1926 zleak_log(uintptr_t* bt,
1927 uintptr_t addr,
1928 uint32_t depth,
1929 vm_size_t allocation_size)
1930 {
1931 /* Quit if there's someone else modifying the hash tables */
1932 if (!lck_spin_try_lock(&zleak_lock)) {
1933 z_total_conflicts++;
1934 return FALSE;
1935 }
1936
1937 struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
1938
1939 uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
1940 struct ztrace* trace = &ztraces[trace_index];
1941
1942 allocation->za_hit_count++;
1943 trace->zt_hit_count++;
1944
1945 /*
1946 * If the allocation bucket we want to be in is occupied, and if the occupier
1947 * has the same trace as us, just bail.
1948 */
1949 if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) {
1950 z_alloc_collisions++;
1951
1952 lck_spin_unlock(&zleak_lock);
1953 return TRUE;
1954 }
1955
1956 /* STEP 1: Store the backtrace in the traces array. */
1957 /* A size of zero indicates that the trace bucket is free. */
1958
1959 if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0) {
1960 /*
1961 * Different unique trace with same hash!
1962 * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
1963 * and get out of the way for later chances
1964 */
1965 trace->zt_collisions++;
1966 z_trace_collisions++;
1967
1968 lck_spin_unlock(&zleak_lock);
1969 return TRUE;
1970 } else if (trace->zt_size > 0) {
1971 /* Same trace, already added, so increment refcount */
1972 trace->zt_size += allocation_size;
1973 } else {
1974 /* Found an unused trace bucket, record the trace here! */
1975 if (trace->zt_depth != 0) { /* if this slot was previously used but not currently in use */
1976 z_trace_overwrites++;
1977 }
1978
1979 z_trace_recorded++;
1980 trace->zt_size = allocation_size;
1981 memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)));
1982
1983 trace->zt_depth = depth;
1984 trace->zt_collisions = 0;
1985 }
1986
1987 /* STEP 2: Store the allocation record in the allocations array. */
1988
1989 if (allocation->za_element != (uintptr_t) 0) {
1990 /*
1991 * Straight up replace any allocation record that was there. We don't want to do the work
1992 * to preserve the allocation entries that were there, because we only record a subset of the
1993 * allocations anyways.
1994 */
1995
1996 z_alloc_collisions++;
1997
1998 struct ztrace* associated_trace = &ztraces[allocation->za_trace_index];
1999 /* Knock off old allocation's size, not the new allocation */
2000 associated_trace->zt_size -= allocation->za_size;
2001 } else if (allocation->za_trace_index != 0) {
2002 /* Slot previously used but not currently in use */
2003 z_alloc_overwrites++;
2004 }
2005
2006 allocation->za_element = addr;
2007 allocation->za_trace_index = trace_index;
2008 allocation->za_size = allocation_size;
2009
2010 z_alloc_recorded++;
2011
2012 if (top_ztrace->zt_size < trace->zt_size) {
2013 top_ztrace = trace;
2014 }
2015
2016 lck_spin_unlock(&zleak_lock);
2017 return TRUE;
2018 }
2019
2020 /*
2021 * Free the allocation record and release the stacktrace.
2022 * This should be as fast as possible because it will be called for every free.
2023 */
2024 static void
2025 zleak_free(uintptr_t addr,
2026 vm_size_t allocation_size)
2027 {
2028 if (addr == (uintptr_t) 0) {
2029 return;
2030 }
2031
2032 struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
2033
2034 /* Double-checked locking: check to find out if we're interested, lock, check to make
2035 * sure it hasn't changed, then modify it, and release the lock.
2036 */
2037
2038 if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
2039 /* if the allocation was the one, grab the lock, check again, then delete it */
2040 lck_spin_lock(&zleak_lock);
2041
2042 if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
2043 struct ztrace *trace;
2044
2045 /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */
2046 if (allocation->za_size != allocation_size) {
2047 panic("Freeing as size %lu memory that was allocated with size %lu\n",
2048 (uintptr_t)allocation_size, (uintptr_t)allocation->za_size);
2049 }
2050
2051 trace = &ztraces[allocation->za_trace_index];
2052
2053 /* size of 0 indicates trace bucket is unused */
2054 if (trace->zt_size > 0) {
2055 trace->zt_size -= allocation_size;
2056 }
2057
2058 /* A NULL element means the allocation bucket is unused */
2059 allocation->za_element = 0;
2060 }
2061 lck_spin_unlock(&zleak_lock);
2062 }
2063 }
2064
2065 #endif /* CONFIG_ZLEAKS */
2066
2067 /* These functions outside of CONFIG_ZLEAKS because they are also used in
2068 * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix.
2069 */
2070
2071 /* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */
2072 uintptr_t
2073 hash_mix(uintptr_t x)
2074 {
2075 #ifndef __LP64__
2076 x += ~(x << 15);
2077 x ^= (x >> 10);
2078 x += (x << 3);
2079 x ^= (x >> 6);
2080 x += ~(x << 11);
2081 x ^= (x >> 16);
2082 #else
2083 x += ~(x << 32);
2084 x ^= (x >> 22);
2085 x += ~(x << 13);
2086 x ^= (x >> 8);
2087 x += (x << 3);
2088 x ^= (x >> 15);
2089 x += ~(x << 27);
2090 x ^= (x >> 31);
2091 #endif
2092 return x;
2093 }
2094
2095 uint32_t
2096 hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
2097 {
2098 uintptr_t hash = 0;
2099 uintptr_t mask = max_size - 1;
2100
2101 while (depth) {
2102 hash += bt[--depth];
2103 }
2104
2105 hash = hash_mix(hash) & mask;
2106
2107 assert(hash < max_size);
2108
2109 return (uint32_t) hash;
2110 }
2111
2112 /*
2113 * TODO: Determine how well distributed this is
2114 * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
2115 */
2116 uint32_t
2117 hashaddr(uintptr_t pt, uint32_t max_size)
2118 {
2119 uintptr_t hash = 0;
2120 uintptr_t mask = max_size - 1;
2121
2122 hash = hash_mix(pt) & mask;
2123
2124 assert(hash < max_size);
2125
2126 return (uint32_t) hash;
2127 }
2128
2129 /* End of all leak-detection code */
2130 #pragma mark -
2131
2132 #define ZONE_MAX_ALLOC_SIZE (32 * 1024)
2133 #define ZONE_ALLOC_FRAG_PERCENT(alloc_size, ele_size) (((alloc_size % ele_size) * 100) / alloc_size)
2134
2135 /* Used to manage copying in of new zone names */
2136 static vm_offset_t zone_names_start;
2137 static vm_offset_t zone_names_next;
2138
2139 static vm_size_t
2140 compute_element_size(vm_size_t requested_size)
2141 {
2142 vm_size_t element_size = requested_size;
2143
2144 /* Zone elements must fit both a next pointer and a backup pointer */
2145 vm_size_t minimum_element_size = sizeof(vm_offset_t) * 2;
2146 if (element_size < minimum_element_size) {
2147 element_size = minimum_element_size;
2148 }
2149
2150 /*
2151 * Round element size to a multiple of sizeof(pointer)
2152 * This also enforces that allocations will be aligned on pointer boundaries
2153 */
2154 element_size = ((element_size - 1) + sizeof(vm_offset_t)) -
2155 ((element_size - 1) % sizeof(vm_offset_t));
2156
2157 return element_size;
2158 }
2159
2160 #if KASAN_ZALLOC
2161
2162 /*
2163 * Called from zinit().
2164 *
2165 * Fixes up the zone's element size to incorporate the redzones.
2166 */
2167 static void
2168 kasan_update_element_size_for_redzone(
2169 zone_t zone, /* the zone that needs to be updated */
2170 vm_size_t *size, /* requested zone element size */
2171 vm_size_t *max, /* maximum memory to use */
2172 const char *name) /* zone name */
2173 {
2174 /* Expand the zone allocation size to include the redzones. For page-multiple
2175 * zones add a full guard page because they likely require alignment. kalloc
2176 * and fakestack handles its own KASan state, so ignore those zones. */
2177 /* XXX: remove this when zinit_with_options() is a thing */
2178 const char *kalloc_name = "kalloc.";
2179 const char *fakestack_name = "fakestack.";
2180 if (strncmp(name, kalloc_name, strlen(kalloc_name)) == 0) {
2181 zone->kasan_redzone = 0;
2182 } else if (strncmp(name, fakestack_name, strlen(fakestack_name)) == 0) {
2183 zone->kasan_redzone = 0;
2184 } else {
2185 if ((*size % PAGE_SIZE) != 0) {
2186 zone->kasan_redzone = KASAN_GUARD_SIZE;
2187 } else {
2188 zone->kasan_redzone = PAGE_SIZE;
2189 }
2190 *max = (*max / *size) * (*size + zone->kasan_redzone * 2);
2191 *size += zone->kasan_redzone * 2;
2192 }
2193 }
2194
2195 /*
2196 * Called from zalloc_internal() to fix up the address of the newly
2197 * allocated element.
2198 *
2199 * Returns the element address skipping over the redzone on the left.
2200 */
2201 static vm_offset_t
2202 kasan_fixup_allocated_element_address(
2203 zone_t zone, /* the zone the element belongs to */
2204 vm_offset_t addr) /* address of the element, including the redzone */
2205 {
2206 /* Fixup the return address to skip the redzone */
2207 if (zone->kasan_redzone) {
2208 addr = kasan_alloc(addr, zone->elem_size,
2209 zone->elem_size - 2 * zone->kasan_redzone, zone->kasan_redzone);
2210 }
2211 return addr;
2212 }
2213
2214 /*
2215 * Called from zfree() to add the element being freed to the KASan quarantine.
2216 *
2217 * Returns true if the newly-freed element made it into the quarantine without
2218 * displacing another, false otherwise. In the latter case, addrp points to the
2219 * address of the displaced element, which will be freed by the zone.
2220 */
2221 static bool
2222 kasan_quarantine_freed_element(
2223 zone_t *zonep, /* the zone the element is being freed to */
2224 void **addrp) /* address of the element being freed */
2225 {
2226 zone_t zone = *zonep;
2227 void *addr = *addrp;
2228
2229 /*
2230 * Resize back to the real allocation size and hand off to the KASan
2231 * quarantine. `addr` may then point to a different allocation, if the
2232 * current element replaced another in the quarantine. The zone then
2233 * takes ownership of the swapped out free element.
2234 */
2235 vm_size_t usersz = zone->elem_size - 2 * zone->kasan_redzone;
2236 vm_size_t sz = usersz;
2237
2238 if (addr && zone->kasan_redzone) {
2239 kasan_check_free((vm_address_t)addr, usersz, KASAN_HEAP_ZALLOC);
2240 addr = (void *)kasan_dealloc((vm_address_t)addr, &sz);
2241 assert(sz == zone->elem_size);
2242 }
2243 if (addr && zone->kasan_quarantine) {
2244 kasan_free(&addr, &sz, KASAN_HEAP_ZALLOC, zonep, usersz, true);
2245 if (!addr) {
2246 return TRUE;
2247 }
2248 }
2249 *addrp = addr;
2250 return FALSE;
2251 }
2252
2253 #endif /* KASAN_ZALLOC */
2254
2255 /*
2256 * zinit initializes a new zone. The zone data structures themselves
2257 * are stored in a zone, which is initially a static structure that
2258 * is initialized by zone_init.
2259 */
2260
2261 zone_t
2262 zinit(
2263 vm_size_t size, /* the size of an element */
2264 vm_size_t max, /* maximum memory to use */
2265 vm_size_t alloc, /* allocation size */
2266 const char *name) /* a name for the zone */
2267 {
2268 zone_t z;
2269
2270 size = compute_element_size(size);
2271
2272 simple_lock(&all_zones_lock, &zone_locks_grp);
2273
2274 assert(num_zones < MAX_ZONES);
2275 assert(num_zones_in_use <= num_zones);
2276
2277 /* If possible, find a previously zdestroy'ed zone in the zone_array that we can reuse instead of initializing a new zone. */
2278 for (int index = bitmap_first(zone_empty_bitmap, MAX_ZONES);
2279 index >= 0 && index < (int)num_zones;
2280 index = bitmap_next(zone_empty_bitmap, index)) {
2281 z = &(zone_array[index]);
2282
2283 /*
2284 * If the zone name and the element size are the same, we can just reuse the old zone struct.
2285 * Otherwise hand out a new zone from the zone_array.
2286 */
2287 if (!strcmp(z->zone_name, name)) {
2288 vm_size_t old_size = z->elem_size;
2289 #if KASAN_ZALLOC
2290 old_size -= z->kasan_redzone * 2;
2291 #endif
2292 if (old_size == size) {
2293 /* Clear the empty bit for this zone, increment num_zones_in_use, and mark the zone as valid again. */
2294 bitmap_clear(zone_empty_bitmap, index);
2295 num_zones_in_use++;
2296 z->zone_valid = TRUE;
2297
2298 /* All other state is already set up since the zone was previously in use. Return early. */
2299 simple_unlock(&all_zones_lock);
2300 return z;
2301 }
2302 }
2303 }
2304
2305 /* If we're here, it means we didn't find a zone above that we could simply reuse. Set up a new zone. */
2306
2307 /* Clear the empty bit for the new zone */
2308 bitmap_clear(zone_empty_bitmap, num_zones);
2309
2310 z = &(zone_array[num_zones]);
2311 z->index = num_zones;
2312
2313 num_zones++;
2314 num_zones_in_use++;
2315
2316 /*
2317 * Initialize the zone lock here before dropping the all_zones_lock. Otherwise we could race with
2318 * zalloc_async() and try to grab the zone lock before it has been initialized, causing a panic.
2319 */
2320 lock_zone_init(z);
2321
2322 simple_unlock(&all_zones_lock);
2323
2324 #if KASAN_ZALLOC
2325 kasan_update_element_size_for_redzone(z, &size, &max, name);
2326 #endif
2327
2328 max = round_page(max);
2329
2330 vm_size_t best_alloc = PAGE_SIZE;
2331
2332 if ((size % PAGE_SIZE) == 0) {
2333 /* zero fragmentation by definition */
2334 best_alloc = size;
2335 } else {
2336 vm_size_t alloc_size;
2337 for (alloc_size = (2 * PAGE_SIZE); alloc_size <= ZONE_MAX_ALLOC_SIZE; alloc_size += PAGE_SIZE) {
2338 if (ZONE_ALLOC_FRAG_PERCENT(alloc_size, size) < ZONE_ALLOC_FRAG_PERCENT(best_alloc, size)) {
2339 best_alloc = alloc_size;
2340 }
2341 }
2342 }
2343
2344 alloc = best_alloc;
2345 if (max && (max < alloc)) {
2346 max = alloc;
2347 }
2348
2349 z->free_elements = NULL;
2350 queue_init(&z->pages.any_free_foreign);
2351 queue_init(&z->pages.all_free);
2352 queue_init(&z->pages.intermediate);
2353 queue_init(&z->pages.all_used);
2354 z->cur_size = 0;
2355 z->page_count = 0;
2356 z->max_size = max;
2357 z->elem_size = size;
2358 z->alloc_size = alloc;
2359 z->count = 0;
2360 z->countfree = 0;
2361 z->count_all_free_pages = 0;
2362 z->sum_count = 0LL;
2363 z->doing_alloc_without_vm_priv = FALSE;
2364 z->doing_alloc_with_vm_priv = FALSE;
2365 z->exhaustible = FALSE;
2366 z->collectable = TRUE;
2367 z->allows_foreign = FALSE;
2368 z->expandable = TRUE;
2369 z->waiting = FALSE;
2370 z->async_pending = FALSE;
2371 z->caller_acct = TRUE;
2372 z->noencrypt = FALSE;
2373 z->no_callout = FALSE;
2374 z->async_prio_refill = FALSE;
2375 z->gzalloc_exempt = FALSE;
2376 z->alignment_required = FALSE;
2377 z->zone_replenishing = FALSE;
2378 z->prio_refill_watermark = 0;
2379 z->zone_replenish_thread = NULL;
2380 z->zp_count = 0;
2381 z->kasan_quarantine = TRUE;
2382 z->zone_valid = TRUE;
2383 z->cpu_cache_enabled = FALSE;
2384
2385 #if CONFIG_ZLEAKS
2386 z->zleak_capture = 0;
2387 z->zleak_on = FALSE;
2388 #endif /* CONFIG_ZLEAKS */
2389
2390 /*
2391 * If the VM is ready to handle kmem_alloc requests, copy the zone name passed in.
2392 *
2393 * Else simply maintain a pointer to the name string. The only zones we'll actually have
2394 * to do this for would be the VM-related zones that are created very early on before any
2395 * kexts can be loaded (unloaded). So we should be fine with just a pointer in this case.
2396 */
2397 if (kmem_alloc_ready) {
2398 size_t len = MIN(strlen(name) + 1, MACH_ZONE_NAME_MAX_LEN);
2399
2400 if (zone_names_start == 0 || ((zone_names_next - zone_names_start) + len) > PAGE_SIZE) {
2401 printf("zalloc: allocating memory for zone names buffer\n");
2402 kern_return_t retval = kmem_alloc_kobject(kernel_map, &zone_names_start,
2403 PAGE_SIZE, VM_KERN_MEMORY_OSFMK);
2404 if (retval != KERN_SUCCESS) {
2405 panic("zalloc: zone_names memory allocation failed");
2406 }
2407 bzero((char *)zone_names_start, PAGE_SIZE);
2408 zone_names_next = zone_names_start;
2409 }
2410
2411 strlcpy((char *)zone_names_next, name, len);
2412 z->zone_name = (char *)zone_names_next;
2413 zone_names_next += len;
2414 } else {
2415 z->zone_name = name;
2416 }
2417
2418 /*
2419 * Check for and set up zone leak detection if requested via boot-args. We recognized two
2420 * boot-args:
2421 *
2422 * zlog=<zone_to_log>
2423 * zrecs=<num_records_in_log>
2424 *
2425 * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
2426 * control the size of the log. If zrecs is not specified, a default value is used.
2427 */
2428
2429 if (num_zones_logged < max_num_zones_to_log) {
2430 int i = 1; /* zlog0 isn't allowed. */
2431 boolean_t zone_logging_enabled = FALSE;
2432 char zlog_name[MAX_ZONE_NAME] = ""; /* Temp. buffer to create the strings zlog1, zlog2 etc... */
2433
2434 while (i <= max_num_zones_to_log) {
2435 snprintf(zlog_name, MAX_ZONE_NAME, "zlog%d", i);
2436
2437 if (PE_parse_boot_argn(zlog_name, zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
2438 if (track_this_zone(z->zone_name, zone_name_to_log)) {
2439 if (z->zone_valid) {
2440 z->zone_logging = TRUE;
2441 zone_logging_enabled = TRUE;
2442 num_zones_logged++;
2443 break;
2444 }
2445 }
2446 }
2447 i++;
2448 }
2449
2450 if (zone_logging_enabled == FALSE) {
2451 /*
2452 * Backwards compat. with the old boot-arg used to specify single zone logging i.e. zlog
2453 * Needs to happen after the newer zlogn checks because the prefix will match all the zlogn
2454 * boot-args.
2455 */
2456 if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
2457 if (track_this_zone(z->zone_name, zone_name_to_log)) {
2458 if (z->zone_valid) {
2459 z->zone_logging = TRUE;
2460 zone_logging_enabled = TRUE;
2461 num_zones_logged++;
2462 }
2463 }
2464 }
2465 }
2466
2467 if (log_records_init == FALSE && zone_logging_enabled == TRUE) {
2468 if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
2469 /*
2470 * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
2471 * This prevents accidentally hogging too much kernel memory and making the system
2472 * unusable.
2473 */
2474
2475 log_records = MIN(ZRECORDS_MAX, log_records);
2476 log_records_init = TRUE;
2477 } else {
2478 log_records = ZRECORDS_DEFAULT;
2479 log_records_init = TRUE;
2480 }
2481 }
2482
2483 /*
2484 * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are
2485 * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. kmem_alloc_ready is set to
2486 * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one
2487 * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
2488 * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized
2489 * right now.
2490 */
2491 if (kmem_alloc_ready) {
2492 zone_t curr_zone = NULL;
2493 unsigned int max_zones = 0, zone_idx = 0;
2494
2495 simple_lock(&all_zones_lock, &zone_locks_grp);
2496 max_zones = num_zones;
2497 simple_unlock(&all_zones_lock);
2498
2499 for (zone_idx = 0; zone_idx < max_zones; zone_idx++) {
2500 curr_zone = &(zone_array[zone_idx]);
2501
2502 if (!curr_zone->zone_valid) {
2503 continue;
2504 }
2505
2506 /*
2507 * We work with the zone unlocked here because we could end up needing the zone lock to
2508 * enable logging for this zone e.g. need a VM object to allocate memory to enable logging for the
2509 * VM objects zone.
2510 *
2511 * We don't expect these zones to be needed at this early a time in boot and so take this chance.
2512 */
2513 if (curr_zone->zone_logging && curr_zone->zlog_btlog == NULL) {
2514 curr_zone->zlog_btlog = btlog_create(log_records, MAX_ZTRACE_DEPTH, (corruption_debug_flag == FALSE) /* caller_will_remove_entries_for_element? */);
2515
2516 if (curr_zone->zlog_btlog) {
2517 printf("zone: logging started for zone %s\n", curr_zone->zone_name);
2518 } else {
2519 printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
2520 curr_zone->zone_logging = FALSE;
2521 }
2522 }
2523 }
2524 }
2525 }
2526
2527 #if CONFIG_GZALLOC
2528 gzalloc_zone_init(z);
2529 #endif
2530
2531 #if CONFIG_ZCACHE
2532 /* Check if boot-arg specified it should have a cache */
2533 if (cache_all_zones || track_this_zone(name, cache_zone_name)) {
2534 zone_change(z, Z_CACHING_ENABLED, TRUE);
2535 }
2536 #endif
2537
2538 return z;
2539 }
2540 unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated, zone_replenish_throttle_count;
2541
2542 static void zone_replenish_thread(zone_t);
2543
2544 /* High priority VM privileged thread used to asynchronously refill a designated
2545 * zone, such as the reserved VM map entry zone.
2546 */
2547 __attribute__((noreturn))
2548 static void
2549 zone_replenish_thread(zone_t z)
2550 {
2551 vm_size_t free_size;
2552 current_thread()->options |= TH_OPT_VMPRIV;
2553
2554 for (;;) {
2555 lock_zone(z);
2556 assert(z->zone_valid);
2557 z->zone_replenishing = TRUE;
2558 assert(z->prio_refill_watermark != 0);
2559 while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
2560 assert(z->doing_alloc_without_vm_priv == FALSE);
2561 assert(z->doing_alloc_with_vm_priv == FALSE);
2562 assert(z->async_prio_refill == TRUE);
2563
2564 unlock_zone(z);
2565 int zflags = KMA_KOBJECT | KMA_NOPAGEWAIT;
2566 vm_offset_t space, alloc_size;
2567 kern_return_t kr;
2568
2569 if (vm_pool_low()) {
2570 alloc_size = round_page(z->elem_size);
2571 } else {
2572 alloc_size = z->alloc_size;
2573 }
2574
2575 if (z->noencrypt) {
2576 zflags |= KMA_NOENCRYPT;
2577 }
2578
2579 /* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */
2580 if (is_zone_map_nearing_exhaustion()) {
2581 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2582 }
2583
2584 kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
2585
2586 if (kr == KERN_SUCCESS) {
2587 zcram(z, space, alloc_size);
2588 } else if (kr == KERN_RESOURCE_SHORTAGE) {
2589 VM_PAGE_WAIT();
2590 } else if (kr == KERN_NO_SPACE) {
2591 kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
2592 if (kr == KERN_SUCCESS) {
2593 zcram(z, space, alloc_size);
2594 } else {
2595 assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, 1, 100 * NSEC_PER_USEC);
2596 thread_block(THREAD_CONTINUE_NULL);
2597 }
2598 }
2599
2600 lock_zone(z);
2601 assert(z->zone_valid);
2602 zone_replenish_loops++;
2603 }
2604
2605 z->zone_replenishing = FALSE;
2606 /* Signal any potential throttled consumers, terminating
2607 * their timer-bounded waits.
2608 */
2609 thread_wakeup(z);
2610
2611 assert_wait(&z->zone_replenish_thread, THREAD_UNINT);
2612 unlock_zone(z);
2613 thread_block(THREAD_CONTINUE_NULL);
2614 zone_replenish_wakeups++;
2615 }
2616 }
2617
2618 void
2619 zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark)
2620 {
2621 z->prio_refill_watermark = low_water_mark;
2622
2623 z->async_prio_refill = TRUE;
2624 OSMemoryBarrier();
2625 kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread);
2626
2627 if (tres != KERN_SUCCESS) {
2628 panic("zone_prio_refill_configure, thread create: 0x%x", tres);
2629 }
2630
2631 thread_deallocate(z->zone_replenish_thread);
2632 }
2633
2634 void
2635 zdestroy(zone_t z)
2636 {
2637 unsigned int zindex;
2638
2639 assert(z != NULL);
2640
2641 lock_zone(z);
2642 assert(z->zone_valid);
2643
2644 /* Assert that the zone does not have any allocations in flight */
2645 assert(z->doing_alloc_without_vm_priv == FALSE);
2646 assert(z->doing_alloc_with_vm_priv == FALSE);
2647 assert(z->async_pending == FALSE);
2648 assert(z->waiting == FALSE);
2649 assert(z->async_prio_refill == FALSE);
2650
2651 #if !KASAN_ZALLOC
2652 /*
2653 * Unset the valid bit. We'll hit an assert failure on further operations on this zone, until zinit() is called again.
2654 * Leave the zone valid for KASan as we will see zfree's on quarantined free elements even after the zone is destroyed.
2655 */
2656 z->zone_valid = FALSE;
2657 #endif
2658 unlock_zone(z);
2659
2660 #if CONFIG_ZCACHE
2661 /* Drain the per-cpu caches if caching is enabled for the zone. */
2662 if (zone_caching_enabled(z)) {
2663 panic("zdestroy: Zone caching enabled for zone %s", z->zone_name);
2664 }
2665 #endif /* CONFIG_ZCACHE */
2666
2667 /* Dump all the free elements */
2668 drop_free_elements(z);
2669
2670 #if CONFIG_GZALLOC
2671 /* If the zone is gzalloc managed dump all the elements in the free cache */
2672 gzalloc_empty_free_cache(z);
2673 #endif
2674
2675 lock_zone(z);
2676
2677 #if !KASAN_ZALLOC
2678 /* Assert that all counts are zero */
2679 assert(z->count == 0);
2680 assert(z->countfree == 0);
2681 assert(z->cur_size == 0);
2682 assert(z->page_count == 0);
2683 assert(z->count_all_free_pages == 0);
2684
2685 /* Assert that all queues except the foreign queue are empty. The zone allocator doesn't know how to free up foreign memory. */
2686 assert(queue_empty(&z->pages.all_used));
2687 assert(queue_empty(&z->pages.intermediate));
2688 assert(queue_empty(&z->pages.all_free));
2689 #endif
2690
2691 zindex = z->index;
2692
2693 unlock_zone(z);
2694
2695 simple_lock(&all_zones_lock, &zone_locks_grp);
2696
2697 assert(!bitmap_test(zone_empty_bitmap, zindex));
2698 /* Mark the zone as empty in the bitmap */
2699 bitmap_set(zone_empty_bitmap, zindex);
2700 num_zones_in_use--;
2701 assert(num_zones_in_use > 0);
2702
2703 simple_unlock(&all_zones_lock);
2704 }
2705
2706 /* Initialize the metadata for an allocation chunk */
2707 static inline void
2708 zcram_metadata_init(vm_offset_t newmem, vm_size_t size, struct zone_page_metadata *chunk_metadata)
2709 {
2710 struct zone_page_metadata *page_metadata;
2711
2712 /* The first page is the real metadata for this allocation chunk. We mark the others as fake metadata */
2713 size -= PAGE_SIZE;
2714 newmem += PAGE_SIZE;
2715
2716 for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
2717 page_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE);
2718 assert(page_metadata != chunk_metadata);
2719 PAGE_METADATA_SET_ZINDEX(page_metadata, MULTIPAGE_METADATA_MAGIC);
2720 page_metadata_set_realmeta(page_metadata, chunk_metadata);
2721 page_metadata->free_count = 0;
2722 }
2723 return;
2724 }
2725
2726
2727 static void
2728 random_free_to_zone(
2729 zone_t zone,
2730 vm_offset_t newmem,
2731 vm_offset_t first_element_offset,
2732 int element_count,
2733 unsigned int *entropy_buffer)
2734 {
2735 vm_offset_t last_element_offset;
2736 vm_offset_t element_addr;
2737 vm_size_t elem_size;
2738 int index;
2739
2740 assert(element_count && element_count <= ZONE_CHUNK_MAXELEMENTS);
2741 elem_size = zone->elem_size;
2742 last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size);
2743 for (index = 0; index < element_count; index++) {
2744 assert(first_element_offset <= last_element_offset);
2745 if (
2746 #if DEBUG || DEVELOPMENT
2747 leak_scan_debug_flag || __improbable(zone->tags) ||
2748 #endif /* DEBUG || DEVELOPMENT */
2749 random_bool_gen_bits(&zone_bool_gen, entropy_buffer, MAX_ENTROPY_PER_ZCRAM, 1)) {
2750 element_addr = newmem + first_element_offset;
2751 first_element_offset += elem_size;
2752 } else {
2753 element_addr = newmem + last_element_offset;
2754 last_element_offset -= elem_size;
2755 }
2756 if (element_addr != (vm_offset_t)zone) {
2757 zone->count++; /* compensate for free_to_zone */
2758 free_to_zone(zone, element_addr, FALSE);
2759 }
2760 zone->cur_size += elem_size;
2761 }
2762 }
2763
2764 /*
2765 * Cram the given memory into the specified zone. Update the zone page count accordingly.
2766 */
2767 void
2768 zcram(
2769 zone_t zone,
2770 vm_offset_t newmem,
2771 vm_size_t size)
2772 {
2773 vm_size_t elem_size;
2774 boolean_t from_zm = FALSE;
2775 int element_count;
2776 unsigned int entropy_buffer[MAX_ENTROPY_PER_ZCRAM] = { 0 };
2777
2778 /* Basic sanity checks */
2779 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
2780 assert(!zone->collectable || zone->allows_foreign
2781 || (from_zone_map(newmem, size)));
2782
2783 elem_size = zone->elem_size;
2784
2785 KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, zone->index, size);
2786
2787 if (from_zone_map(newmem, size)) {
2788 from_zm = TRUE;
2789 }
2790
2791 if (!from_zm) {
2792 /* We cannot support elements larger than page size for foreign memory because we
2793 * put metadata on the page itself for each page of foreign memory. We need to do
2794 * this in order to be able to reach the metadata when any element is freed
2795 */
2796 assert((zone->allows_foreign == TRUE) && (zone->elem_size <= (PAGE_SIZE - sizeof(struct zone_page_metadata))));
2797 }
2798
2799 if (zalloc_debug & ZALLOC_DEBUG_ZCRAM) {
2800 kprintf("zcram(%p[%s], 0x%lx%s, 0x%lx)\n", zone, zone->zone_name,
2801 (unsigned long)newmem, from_zm ? "" : "[F]", (unsigned long)size);
2802 }
2803
2804 ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
2805
2806 /*
2807 * Initialize the metadata for all pages. We dont need the zone lock
2808 * here because we are not manipulating any zone related state yet.
2809 */
2810
2811 struct zone_page_metadata *chunk_metadata;
2812 size_t zone_page_metadata_size = sizeof(struct zone_page_metadata);
2813
2814 assert((newmem & PAGE_MASK) == 0);
2815 assert((size & PAGE_MASK) == 0);
2816
2817 chunk_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE);
2818 chunk_metadata->pages.next = NULL;
2819 chunk_metadata->pages.prev = NULL;
2820 page_metadata_set_freelist(chunk_metadata, 0);
2821 PAGE_METADATA_SET_ZINDEX(chunk_metadata, zone->index);
2822 chunk_metadata->free_count = 0;
2823 assert((size / PAGE_SIZE) <= ZONE_CHUNK_MAXPAGES);
2824 chunk_metadata->page_count = (unsigned)(size / PAGE_SIZE);
2825
2826 zcram_metadata_init(newmem, size, chunk_metadata);
2827
2828 #if VM_MAX_TAG_ZONES
2829 if (__improbable(zone->tags)) {
2830 assert(from_zm);
2831 ztMemoryAdd(zone, newmem, size);
2832 }
2833 #endif /* VM_MAX_TAG_ZONES */
2834
2835 lock_zone(zone);
2836 assert(zone->zone_valid);
2837 enqueue_tail(&zone->pages.all_used, &(chunk_metadata->pages));
2838
2839 if (!from_zm) {
2840 /* We cannot support elements larger than page size for foreign memory because we
2841 * put metadata on the page itself for each page of foreign memory. We need to do
2842 * this in order to be able to reach the metadata when any element is freed
2843 */
2844
2845 for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
2846 vm_offset_t first_element_offset = 0;
2847 if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0) {
2848 first_element_offset = zone_page_metadata_size;
2849 } else {
2850 first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
2851 }
2852 element_count = (unsigned int)((PAGE_SIZE - first_element_offset) / elem_size);
2853 random_free_to_zone(zone, newmem, first_element_offset, element_count, entropy_buffer);
2854 }
2855 } else {
2856 element_count = (unsigned int)(size / elem_size);
2857 random_free_to_zone(zone, newmem, 0, element_count, entropy_buffer);
2858 }
2859 unlock_zone(zone);
2860
2861 KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, zone->index);
2862 }
2863
2864 /*
2865 * Fill a zone with enough memory to contain at least nelem elements.
2866 * Return the number of elements actually put into the zone, which may
2867 * be more than the caller asked for since the memory allocation is
2868 * rounded up to the next zone allocation size.
2869 */
2870 int
2871 zfill(
2872 zone_t zone,
2873 int nelem)
2874 {
2875 kern_return_t kr;
2876 vm_offset_t memory;
2877
2878 vm_size_t alloc_size = zone->alloc_size;
2879 vm_size_t elem_per_alloc = alloc_size / zone->elem_size;
2880 vm_size_t nalloc = (nelem + elem_per_alloc - 1) / elem_per_alloc;
2881
2882 /* Don't mix-and-match zfill with foreign memory */
2883 assert(!zone->allows_foreign);
2884
2885 /* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */
2886 if (is_zone_map_nearing_exhaustion()) {
2887 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2888 }
2889
2890 kr = kernel_memory_allocate(zone_map, &memory, nalloc * alloc_size, 0, KMA_KOBJECT, VM_KERN_MEMORY_ZONE);
2891 if (kr != KERN_SUCCESS) {
2892 printf("%s: kernel_memory_allocate() of %lu bytes failed\n",
2893 __func__, (unsigned long)(nalloc * alloc_size));
2894 return 0;
2895 }
2896
2897 for (vm_size_t i = 0; i < nalloc; i++) {
2898 zcram(zone, memory + i * alloc_size, alloc_size);
2899 }
2900
2901 return (int)(nalloc * elem_per_alloc);
2902 }
2903
2904 /*
2905 * Initialize the "zone of zones" which uses fixed memory allocated
2906 * earlier in memory initialization. zone_bootstrap is called
2907 * before zone_init.
2908 */
2909 void
2910 zone_bootstrap(void)
2911 {
2912 char temp_buf[16];
2913
2914 if (!PE_parse_boot_argn("zalloc_debug", &zalloc_debug, sizeof(zalloc_debug))) {
2915 zalloc_debug = 0;
2916 }
2917
2918 /* Set up zone element poisoning */
2919 zp_init();
2920
2921 random_bool_init(&zone_bool_gen);
2922
2923 /* should zlog log to debug zone corruption instead of leaks? */
2924 if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
2925 corruption_debug_flag = TRUE;
2926 }
2927
2928 #if DEBUG || DEVELOPMENT
2929 /* should perform zone element size checking in copyin/copyout? */
2930 if (PE_parse_boot_argn("-no-copyio-zalloc-check", temp_buf, sizeof(temp_buf))) {
2931 copyio_zalloc_check = FALSE;
2932 }
2933 #if VM_MAX_TAG_ZONES
2934 /* enable tags for zones that ask for */
2935 if (PE_parse_boot_argn("-zt", temp_buf, sizeof(temp_buf))) {
2936 zone_tagging_on = TRUE;
2937 }
2938 #endif /* VM_MAX_TAG_ZONES */
2939 /* disable element location randomization in a page */
2940 if (PE_parse_boot_argn("-zl", temp_buf, sizeof(temp_buf))) {
2941 leak_scan_debug_flag = TRUE;
2942 }
2943 #endif
2944
2945 simple_lock_init(&all_zones_lock, 0);
2946
2947 num_zones_in_use = 0;
2948 num_zones = 0;
2949 /* Mark all zones as empty */
2950 bitmap_full(zone_empty_bitmap, BITMAP_LEN(MAX_ZONES));
2951 zone_names_next = zone_names_start = 0;
2952
2953 #if DEBUG || DEVELOPMENT
2954 simple_lock_init(&zone_test_lock, 0);
2955 #endif /* DEBUG || DEVELOPMENT */
2956
2957 thread_call_setup(&call_async_alloc, zalloc_async, NULL);
2958
2959 /* initializing global lock group for zones */
2960 lck_grp_attr_setdefault(&zone_locks_grp_attr);
2961 lck_grp_init(&zone_locks_grp, "zone_locks", &zone_locks_grp_attr);
2962
2963 lck_attr_setdefault(&zone_metadata_lock_attr);
2964 lck_mtx_init_ext(&zone_metadata_region_lck, &zone_metadata_region_lck_ext, &zone_locks_grp, &zone_metadata_lock_attr);
2965
2966 #if CONFIG_ZCACHE
2967 /* zcc_enable_for_zone_name=<zone>: enable per-cpu zone caching for <zone>. */
2968 if (PE_parse_boot_arg_str("zcc_enable_for_zone_name", cache_zone_name, sizeof(cache_zone_name))) {
2969 printf("zcache: caching enabled for zone %s\n", cache_zone_name);
2970 }
2971
2972 /* -zcache_all: enable per-cpu zone caching for all zones, overrides 'zcc_enable_for_zone_name'. */
2973 if (PE_parse_boot_argn("-zcache_all", temp_buf, sizeof(temp_buf))) {
2974 cache_all_zones = TRUE;
2975 printf("zcache: caching enabled for all zones\n");
2976 }
2977 #endif /* CONFIG_ZCACHE */
2978 }
2979
2980 /*
2981 * We're being very conservative here and picking a value of 95%. We might need to lower this if
2982 * we find that we're not catching the problem and are still hitting zone map exhaustion panics.
2983 */
2984 #define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95
2985
2986 /*
2987 * Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit.
2988 * Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default.
2989 */
2990 unsigned int zone_map_jetsam_limit = ZONE_MAP_JETSAM_LIMIT_DEFAULT;
2991
2992 /*
2993 * Returns pid of the task with the largest number of VM map entries.
2994 */
2995 extern pid_t find_largest_process_vm_map_entries(void);
2996
2997 /*
2998 * Callout to jetsam. If pid is -1, we wake up the memorystatus thread to do asynchronous kills.
2999 * For any other pid we try to kill that process synchronously.
3000 */
3001 boolean_t memorystatus_kill_on_zone_map_exhaustion(pid_t pid);
3002
3003 void
3004 get_zone_map_size(uint64_t *current_size, uint64_t *capacity)
3005 {
3006 *current_size = zone_map->size;
3007 *capacity = vm_map_max(zone_map) - vm_map_min(zone_map);
3008 }
3009
3010 void
3011 get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size)
3012 {
3013 zone_t largest_zone = zone_find_largest();
3014 strlcpy(zone_name, largest_zone->zone_name, zone_name_len);
3015 *zone_size = largest_zone->cur_size;
3016 }
3017
3018 boolean_t
3019 is_zone_map_nearing_exhaustion(void)
3020 {
3021 uint64_t size = zone_map->size;
3022 uint64_t capacity = vm_map_max(zone_map) - vm_map_min(zone_map);
3023 if (size > ((capacity * zone_map_jetsam_limit) / 100)) {
3024 return TRUE;
3025 }
3026 return FALSE;
3027 }
3028
3029 extern zone_t vm_map_entry_zone;
3030 extern zone_t vm_object_zone;
3031
3032 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
3033
3034 /*
3035 * Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread
3036 * to walk through the jetsam priority bands and kill processes.
3037 */
3038 static void
3039 kill_process_in_largest_zone(void)
3040 {
3041 pid_t pid = -1;
3042 zone_t largest_zone = zone_find_largest();
3043
3044 printf("zone_map_exhaustion: Zone map size %lld, capacity %lld [jetsam limit %d%%]\n", (uint64_t)zone_map->size,
3045 (uint64_t)(vm_map_max(zone_map) - vm_map_min(zone_map)), zone_map_jetsam_limit);
3046 printf("zone_map_exhaustion: Largest zone %s, size %lu\n", largest_zone->zone_name, (uintptr_t)largest_zone->cur_size);
3047
3048 /*
3049 * We want to make sure we don't call this function from userspace. Or we could end up trying to synchronously kill the process
3050 * whose context we're in, causing the system to hang.
3051 */
3052 assert(current_task() == kernel_task);
3053
3054 /*
3055 * If vm_object_zone is the largest, check to see if the number of elements in vm_map_entry_zone is comparable. If so, consider
3056 * vm_map_entry_zone as the largest. This lets us target a specific process to jetsam to quickly recover from the zone map bloat.
3057 */
3058 if (largest_zone == vm_object_zone) {
3059 unsigned int vm_object_zone_count = vm_object_zone->count;
3060 unsigned int vm_map_entry_zone_count = vm_map_entry_zone->count;
3061 /* Is the VM map entries zone count >= 98% of the VM objects zone count? */
3062 if (vm_map_entry_zone_count >= ((vm_object_zone_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
3063 largest_zone = vm_map_entry_zone;
3064 printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n", (uintptr_t)largest_zone->cur_size);
3065 }
3066 }
3067
3068 /* TODO: Extend this to check for the largest process in other zones as well. */
3069 if (largest_zone == vm_map_entry_zone) {
3070 pid = find_largest_process_vm_map_entries();
3071 } else {
3072 printf("zone_map_exhaustion: Nothing to do for the largest zone [%s]. Waking up memorystatus thread.\n", largest_zone->zone_name);
3073 }
3074 if (!memorystatus_kill_on_zone_map_exhaustion(pid)) {
3075 printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n", pid);
3076 }
3077 }
3078
3079 /* Global initialization of Zone Allocator.
3080 * Runs after zone_bootstrap.
3081 */
3082 void
3083 zone_init(
3084 vm_size_t max_zonemap_size)
3085 {
3086 kern_return_t retval;
3087 vm_offset_t zone_min;
3088 vm_offset_t zone_max;
3089 vm_offset_t zone_metadata_space;
3090 unsigned int zone_pages;
3091 vm_map_kernel_flags_t vmk_flags;
3092
3093 #if VM_MAX_TAG_ZONES
3094 if (zone_tagging_on) {
3095 ztInit(max_zonemap_size, &zone_locks_grp);
3096 }
3097 #endif
3098
3099 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
3100 vmk_flags.vmkf_permanent = TRUE;
3101 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
3102 FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_ZONE,
3103 &zone_map);
3104
3105 if (retval != KERN_SUCCESS) {
3106 panic("zone_init: kmem_suballoc failed");
3107 }
3108 zone_max = zone_min + round_page(max_zonemap_size);
3109
3110 #if CONFIG_GZALLOC
3111 gzalloc_init(max_zonemap_size);
3112 #endif
3113
3114 /*
3115 * Setup garbage collection information:
3116 */
3117 zone_map_min_address = zone_min;
3118 zone_map_max_address = zone_max;
3119
3120 zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
3121 zone_metadata_space = round_page(zone_pages * sizeof(struct zone_page_metadata));
3122 retval = kernel_memory_allocate(zone_map, &zone_metadata_region_min, zone_metadata_space,
3123 0, KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_OSFMK);
3124 if (retval != KERN_SUCCESS) {
3125 panic("zone_init: zone_metadata_region initialization failed!");
3126 }
3127 zone_metadata_region_max = zone_metadata_region_min + zone_metadata_space;
3128
3129 #if defined(__LP64__)
3130 /*
3131 * ensure that any vm_page_t that gets created from
3132 * the vm_page zone can be packed properly (see vm_page.h
3133 * for the packing requirements
3134 */
3135 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_metadata_region_max))) != (vm_page_t)zone_metadata_region_max) {
3136 panic("VM_PAGE_PACK_PTR failed on zone_metadata_region_max - %p", (void *)zone_metadata_region_max);
3137 }
3138
3139 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_max_address))) != (vm_page_t)zone_map_max_address) {
3140 panic("VM_PAGE_PACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address);
3141 }
3142 #endif
3143
3144 lck_grp_attr_setdefault(&zone_gc_lck_grp_attr);
3145 lck_grp_init(&zone_gc_lck_grp, "zone_gc", &zone_gc_lck_grp_attr);
3146 lck_attr_setdefault(&zone_gc_lck_attr);
3147 lck_mtx_init_ext(&zone_gc_lock, &zone_gc_lck_ext, &zone_gc_lck_grp, &zone_gc_lck_attr);
3148
3149 #if CONFIG_ZLEAKS
3150 /*
3151 * Initialize the zone leak monitor
3152 */
3153 zleak_init(max_zonemap_size);
3154 #endif /* CONFIG_ZLEAKS */
3155
3156 #if VM_MAX_TAG_ZONES
3157 if (zone_tagging_on) {
3158 vm_allocation_zones_init();
3159 }
3160 #endif
3161
3162 int jetsam_limit_temp = 0;
3163 if (PE_parse_boot_argn("zone_map_jetsam_limit", &jetsam_limit_temp, sizeof(jetsam_limit_temp)) &&
3164 jetsam_limit_temp > 0 && jetsam_limit_temp <= 100) {
3165 zone_map_jetsam_limit = jetsam_limit_temp;
3166 }
3167 }
3168
3169 #pragma mark -
3170 #pragma mark zalloc_canblock
3171
3172 extern boolean_t early_boot_complete;
3173
3174 void
3175 zalloc_poison_element(boolean_t check_poison, zone_t zone, vm_offset_t addr)
3176 {
3177 vm_offset_t inner_size = zone->elem_size;
3178 if (__improbable(check_poison && addr)) {
3179 vm_offset_t *element_cursor = ((vm_offset_t *) addr) + 1;
3180 vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *) addr);
3181
3182 for (; element_cursor < backup; element_cursor++) {
3183 if (__improbable(*element_cursor != ZP_POISON)) {
3184 zone_element_was_modified_panic(zone,
3185 addr,
3186 *element_cursor,
3187 ZP_POISON,
3188 ((vm_offset_t)element_cursor) - addr);
3189 }
3190 }
3191 }
3192
3193 if (addr) {
3194 /*
3195 * Clear out the old next pointer and backup to avoid leaking the cookie
3196 * and so that only values on the freelist have a valid cookie
3197 */
3198
3199 vm_offset_t *primary = (vm_offset_t *) addr;
3200 vm_offset_t *backup = get_backup_ptr(inner_size, primary);
3201
3202 *primary = ZP_POISON;
3203 *backup = ZP_POISON;
3204 }
3205 }
3206
3207 /*
3208 * zalloc returns an element from the specified zone.
3209 */
3210 static void *
3211 zalloc_internal(
3212 zone_t zone,
3213 boolean_t canblock,
3214 boolean_t nopagewait,
3215 vm_size_t
3216 #if !VM_MAX_TAG_ZONES
3217 __unused
3218 #endif
3219 reqsize,
3220 vm_tag_t tag)
3221 {
3222 vm_offset_t addr = 0;
3223 kern_return_t retval;
3224 uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */
3225 unsigned int numsaved = 0;
3226 boolean_t zone_replenish_wakeup = FALSE, zone_alloc_throttle = FALSE;
3227 thread_t thr = current_thread();
3228 boolean_t check_poison = FALSE;
3229 boolean_t set_doing_alloc_with_vm_priv = FALSE;
3230
3231 #if CONFIG_ZLEAKS
3232 uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */
3233 #endif /* CONFIG_ZLEAKS */
3234
3235 #if KASAN
3236 /*
3237 * KASan uses zalloc() for fakestack, which can be called anywhere. However,
3238 * we make sure these calls can never block.
3239 */
3240 boolean_t irq_safe = FALSE;
3241 const char *fakestack_name = "fakestack.";
3242 if (strncmp(zone->zone_name, fakestack_name, strlen(fakestack_name)) == 0) {
3243 irq_safe = TRUE;
3244 }
3245 #elif MACH_ASSERT
3246 /* In every other case, zalloc() from interrupt context is unsafe. */
3247 const boolean_t irq_safe = FALSE;
3248 #endif
3249
3250 assert(zone != ZONE_NULL);
3251 assert(irq_safe || ml_get_interrupts_enabled() || ml_is_quiescing() || debug_mode_active() || !early_boot_complete);
3252
3253 #if CONFIG_GZALLOC
3254 addr = gzalloc_alloc(zone, canblock);
3255 #endif
3256 /*
3257 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
3258 */
3259 if (__improbable(DO_LOGGING(zone))) {
3260 numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH);
3261 }
3262
3263 #if CONFIG_ZLEAKS
3264 /*
3265 * Zone leak detection: capture a backtrace every zleak_sample_factor
3266 * allocations in this zone.
3267 */
3268 if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) {
3269 /* Avoid backtracing twice if zone logging is on */
3270 if (numsaved == 0) {
3271 zleak_tracedepth = backtrace(zbt, MAX_ZTRACE_DEPTH);
3272 } else {
3273 zleak_tracedepth = numsaved;
3274 }
3275 }
3276 #endif /* CONFIG_ZLEAKS */
3277
3278 #if VM_MAX_TAG_ZONES
3279 if (__improbable(zone->tags)) {
3280 vm_tag_will_update_zone(tag, zone->tag_zone_index);
3281 }
3282 #endif /* VM_MAX_TAG_ZONES */
3283
3284 #if CONFIG_ZCACHE
3285 if (__probable(addr == 0)) {
3286 if (zone_caching_enabled(zone)) {
3287 addr = zcache_alloc_from_cpu_cache(zone);
3288 if (addr) {
3289 #if KASAN_ZALLOC
3290 addr = kasan_fixup_allocated_element_address(zone, addr);
3291 #endif
3292 DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
3293 return (void *)addr;
3294 }
3295 }
3296 }
3297 #endif /* CONFIG_ZCACHE */
3298
3299 lock_zone(zone);
3300 assert(zone->zone_valid);
3301
3302 if (zone->async_prio_refill && zone->zone_replenish_thread) {
3303 vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size));
3304 vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size;
3305 zone_replenish_wakeup = (zfreec < zrefillwm);
3306 zone_alloc_throttle = (((zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0)) || (zfreec == 0));
3307
3308 do {
3309 if (zone_replenish_wakeup) {
3310 zone_replenish_wakeups_initiated++;
3311 /* Signal the potentially waiting
3312 * refill thread.
3313 */
3314 thread_wakeup(&zone->zone_replenish_thread);
3315
3316 /* We don't want to wait around for zone_replenish_thread to bump up the free count
3317 * if we're in zone_gc(). This keeps us from deadlocking with zone_replenish_thread.
3318 */
3319 if (thr->options & TH_OPT_ZONE_GC) {
3320 break;
3321 }
3322
3323 unlock_zone(zone);
3324 /* Scheduling latencies etc. may prevent
3325 * the refill thread from keeping up
3326 * with demand. Throttle consumers
3327 * when we fall below half the
3328 * watermark, unless VM privileged
3329 */
3330 if (zone_alloc_throttle) {
3331 zone_replenish_throttle_count++;
3332 assert_wait_timeout(zone, THREAD_UNINT, 1, NSEC_PER_MSEC);
3333 thread_block(THREAD_CONTINUE_NULL);
3334 }
3335 lock_zone(zone);
3336 assert(zone->zone_valid);
3337 }
3338
3339 zfreec = (zone->cur_size - (zone->count * zone->elem_size));
3340 zrefillwm = zone->prio_refill_watermark * zone->elem_size;
3341 zone_replenish_wakeup = (zfreec < zrefillwm);
3342 zone_alloc_throttle = (((zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0)) || (zfreec == 0));
3343 } while (zone_alloc_throttle == TRUE);
3344 }
3345
3346 if (__probable(addr == 0)) {
3347 addr = try_alloc_from_zone(zone, tag, &check_poison);
3348 }
3349
3350 /* If we're here because of zone_gc(), we didn't wait for zone_replenish_thread to finish.
3351 * So we need to ensure that we did successfully grab an element. And we only need to assert
3352 * this for zones that have a replenish thread configured (in this case, the Reserved VM map
3353 * entries zone).
3354 */
3355 if (thr->options & TH_OPT_ZONE_GC && zone->async_prio_refill) {
3356 assert(addr != 0);
3357 }
3358
3359 while ((addr == 0) && canblock) {
3360 /*
3361 * zone is empty, try to expand it
3362 *
3363 * Note that we now allow up to 2 threads (1 vm_privliged and 1 non-vm_privliged)
3364 * to expand the zone concurrently... this is necessary to avoid stalling
3365 * vm_privileged threads running critical code necessary to continue compressing/swapping
3366 * pages (i.e. making new free pages) from stalling behind non-vm_privileged threads
3367 * waiting to acquire free pages when the vm_page_free_count is below the
3368 * vm_page_free_reserved limit.
3369 */
3370 if ((zone->doing_alloc_without_vm_priv || zone->doing_alloc_with_vm_priv) &&
3371 (((thr->options & TH_OPT_VMPRIV) == 0) || zone->doing_alloc_with_vm_priv)) {
3372 /*
3373 * This is a non-vm_privileged thread and a non-vm_privileged or
3374 * a vm_privileged thread is already expanding the zone...
3375 * OR
3376 * this is a vm_privileged thread and a vm_privileged thread is
3377 * already expanding the zone...
3378 *
3379 * In either case wait for a thread to finish, then try again.
3380 */
3381 zone->waiting = TRUE;
3382 zone_sleep(zone);
3383 } else {
3384 vm_offset_t space;
3385 vm_size_t alloc_size;
3386 int retry = 0;
3387
3388 if ((zone->cur_size + zone->elem_size) >
3389 zone->max_size) {
3390 if (zone->exhaustible) {
3391 break;
3392 }
3393 if (zone->expandable) {
3394 /*
3395 * We're willing to overflow certain
3396 * zones, but not without complaining.
3397 *
3398 * This is best used in conjunction
3399 * with the collectable flag. What we
3400 * want is an assurance we can get the
3401 * memory back, assuming there's no
3402 * leak.
3403 */
3404 zone->max_size += (zone->max_size >> 1);
3405 } else {
3406 unlock_zone(zone);
3407
3408 panic_include_zprint = TRUE;
3409 #if CONFIG_ZLEAKS
3410 if (zleak_state & ZLEAK_STATE_ACTIVE) {
3411 panic_include_ztrace = TRUE;
3412 }
3413 #endif /* CONFIG_ZLEAKS */
3414 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
3415 }
3416 }
3417 /*
3418 * It is possible that a BG thread is refilling/expanding the zone
3419 * and gets pre-empted during that operation. That blocks all other
3420 * threads from making progress leading to a watchdog timeout. To
3421 * avoid that, boost the thread priority using the rwlock boost
3422 */
3423 set_thread_rwlock_boost();
3424
3425 if ((thr->options & TH_OPT_VMPRIV)) {
3426 zone->doing_alloc_with_vm_priv = TRUE;
3427 set_doing_alloc_with_vm_priv = TRUE;
3428 } else {
3429 zone->doing_alloc_without_vm_priv = TRUE;
3430 }
3431 unlock_zone(zone);
3432
3433 for (;;) {
3434 int zflags = KMA_KOBJECT | KMA_NOPAGEWAIT;
3435
3436 if (vm_pool_low() || retry >= 1) {
3437 alloc_size =
3438 round_page(zone->elem_size);
3439 } else {
3440 alloc_size = zone->alloc_size;
3441 }
3442
3443 if (zone->noencrypt) {
3444 zflags |= KMA_NOENCRYPT;
3445 }
3446
3447 /* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */
3448 if (is_zone_map_nearing_exhaustion()) {
3449 thread_wakeup((event_t) &vm_pageout_garbage_collect);
3450 }
3451
3452 retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
3453 if (retval == KERN_SUCCESS) {
3454 #if CONFIG_ZLEAKS
3455 if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
3456 if (zone_map->size >= zleak_global_tracking_threshold) {
3457 kern_return_t kr;
3458
3459 kr = zleak_activate();
3460 if (kr != KERN_SUCCESS) {
3461 printf("Failed to activate live zone leak debugging (%d).\n", kr);
3462 }
3463 }
3464 }
3465
3466 if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
3467 if (zone->cur_size > zleak_per_zone_tracking_threshold) {
3468 zone->zleak_on = TRUE;
3469 }
3470 }
3471 #endif /* CONFIG_ZLEAKS */
3472 zcram(zone, space, alloc_size);
3473
3474 break;
3475 } else if (retval != KERN_RESOURCE_SHORTAGE) {
3476 retry++;
3477
3478 if (retry == 3) {
3479 panic_include_zprint = TRUE;
3480 #if CONFIG_ZLEAKS
3481 if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
3482 panic_include_ztrace = TRUE;
3483 }
3484 #endif /* CONFIG_ZLEAKS */
3485 if (retval == KERN_NO_SPACE) {
3486 zone_t zone_largest = zone_find_largest();
3487 panic("zalloc: zone map exhausted while allocating from zone %s, likely due to memory leak in zone %s (%lu total bytes, %d elements allocated)",
3488 zone->zone_name, zone_largest->zone_name,
3489 (unsigned long)zone_largest->cur_size, zone_largest->count);
3490 }
3491 panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
3492 }
3493 } else {
3494 break;
3495 }
3496 }
3497 lock_zone(zone);
3498 assert(zone->zone_valid);
3499
3500 if (set_doing_alloc_with_vm_priv == TRUE) {
3501 zone->doing_alloc_with_vm_priv = FALSE;
3502 } else {
3503 zone->doing_alloc_without_vm_priv = FALSE;
3504 }
3505
3506 if (zone->waiting) {
3507 zone->waiting = FALSE;
3508 zone_wakeup(zone);
3509 }
3510 clear_thread_rwlock_boost();
3511
3512 addr = try_alloc_from_zone(zone, tag, &check_poison);
3513 if (addr == 0 &&
3514 retval == KERN_RESOURCE_SHORTAGE) {
3515 if (nopagewait == TRUE) {
3516 break; /* out of the main while loop */
3517 }
3518 unlock_zone(zone);
3519
3520 VM_PAGE_WAIT();
3521 lock_zone(zone);
3522 assert(zone->zone_valid);
3523 }
3524 }
3525 if (addr == 0) {
3526 addr = try_alloc_from_zone(zone, tag, &check_poison);
3527 }
3528 }
3529
3530 #if CONFIG_ZLEAKS
3531 /* Zone leak detection:
3532 * If we're sampling this allocation, add it to the zleaks hash table.
3533 */
3534 if (addr && zleak_tracedepth > 0) {
3535 /* Sampling can fail if another sample is happening at the same time in a different zone. */
3536 if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
3537 /* If it failed, roll back the counter so we sample the next allocation instead. */
3538 zone->zleak_capture = zleak_sample_factor;
3539 }
3540 }
3541 #endif /* CONFIG_ZLEAKS */
3542
3543
3544 if ((addr == 0) && (!canblock || nopagewait) && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
3545 zone->async_pending = TRUE;
3546 unlock_zone(zone);
3547 thread_call_enter(&call_async_alloc);
3548 lock_zone(zone);
3549 assert(zone->zone_valid);
3550 addr = try_alloc_from_zone(zone, tag, &check_poison);
3551 }
3552
3553 #if VM_MAX_TAG_ZONES
3554 if (__improbable(zone->tags) && addr) {
3555 if (reqsize) {
3556 reqsize = zone->elem_size - reqsize;
3557 }
3558 vm_tag_update_zone_size(tag, zone->tag_zone_index, zone->elem_size, reqsize);
3559 }
3560 #endif /* VM_MAX_TAG_ZONES */
3561
3562 unlock_zone(zone);
3563
3564 if (__improbable(DO_LOGGING(zone) && addr)) {
3565 btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_ALLOC, (void **)zbt, numsaved);
3566 }
3567
3568 zalloc_poison_element(check_poison, zone, addr);
3569
3570 if (addr) {
3571 #if DEBUG || DEVELOPMENT
3572 if (__improbable(leak_scan_debug_flag && !(zone->elem_size & (sizeof(uintptr_t) - 1)))) {
3573 unsigned int count, idx;
3574 /* Fill element, from tail, with backtrace in reverse order */
3575 if (numsaved == 0) {
3576 numsaved = backtrace(zbt, MAX_ZTRACE_DEPTH);
3577 }
3578 count = (unsigned int)(zone->elem_size / sizeof(uintptr_t));
3579 if (count >= numsaved) {
3580 count = numsaved - 1;
3581 }
3582 for (idx = 0; idx < count; idx++) {
3583 ((uintptr_t *)addr)[count - 1 - idx] = zbt[idx + 1];
3584 }
3585 }
3586 #endif /* DEBUG || DEVELOPMENT */
3587 }
3588
3589 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
3590
3591
3592 #if KASAN_ZALLOC
3593 addr = kasan_fixup_allocated_element_address(zone, addr);
3594 #endif
3595
3596 DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
3597
3598 return (void *)addr;
3599 }
3600
3601 void *
3602 zalloc(zone_t zone)
3603 {
3604 return zalloc_internal(zone, TRUE, FALSE, 0, VM_KERN_MEMORY_NONE);
3605 }
3606
3607 void *
3608 zalloc_noblock(zone_t zone)
3609 {
3610 return zalloc_internal(zone, FALSE, FALSE, 0, VM_KERN_MEMORY_NONE);
3611 }
3612
3613 void *
3614 zalloc_nopagewait(zone_t zone)
3615 {
3616 return zalloc_internal(zone, TRUE, TRUE, 0, VM_KERN_MEMORY_NONE);
3617 }
3618
3619 void *
3620 zalloc_canblock_tag(zone_t zone, boolean_t canblock, vm_size_t reqsize, vm_tag_t tag)
3621 {
3622 return zalloc_internal(zone, canblock, FALSE, reqsize, tag);
3623 }
3624
3625 void *
3626 zalloc_canblock(zone_t zone, boolean_t canblock)
3627 {
3628 return zalloc_internal(zone, canblock, FALSE, 0, VM_KERN_MEMORY_NONE);
3629 }
3630
3631 void *
3632 zalloc_attempt(zone_t zone)
3633 {
3634 boolean_t check_poison = FALSE;
3635 vm_offset_t addr = try_alloc_from_zone(zone, VM_KERN_MEMORY_NONE, &check_poison);
3636 zalloc_poison_element(check_poison, zone, addr);
3637 return (void *)addr;
3638 }
3639
3640 void
3641 zfree_direct(zone_t zone, vm_offset_t elem)
3642 {
3643 boolean_t poison = zfree_poison_element(zone, elem);
3644 free_to_zone(zone, elem, poison);
3645 }
3646
3647
3648 void
3649 zalloc_async(
3650 __unused thread_call_param_t p0,
3651 __unused thread_call_param_t p1)
3652 {
3653 zone_t current_z = NULL;
3654 unsigned int max_zones, i;
3655 void *elt = NULL;
3656 boolean_t pending = FALSE;
3657
3658 simple_lock(&all_zones_lock, &zone_locks_grp);
3659 max_zones = num_zones;
3660 simple_unlock(&all_zones_lock);
3661 for (i = 0; i < max_zones; i++) {
3662 current_z = &(zone_array[i]);
3663
3664 if (current_z->no_callout == TRUE) {
3665 /* async_pending will never be set */
3666 continue;
3667 }
3668
3669 lock_zone(current_z);
3670 if (current_z->zone_valid && current_z->async_pending == TRUE) {
3671 current_z->async_pending = FALSE;
3672 pending = TRUE;
3673 }
3674 unlock_zone(current_z);
3675
3676 if (pending == TRUE) {
3677 elt = zalloc_canblock_tag(current_z, TRUE, 0, VM_KERN_MEMORY_OSFMK);
3678 zfree(current_z, elt);
3679 pending = FALSE;
3680 }
3681 }
3682 }
3683
3684 /*
3685 * zget returns an element from the specified zone
3686 * and immediately returns nothing if there is nothing there.
3687 */
3688 void *
3689 zget(
3690 zone_t zone)
3691 {
3692 return zalloc_internal(zone, FALSE, TRUE, 0, VM_KERN_MEMORY_NONE);
3693 }
3694
3695 /* Keep this FALSE by default. Large memory machine run orders of magnitude
3696 * slower in debug mode when true. Use debugger to enable if needed */
3697 /* static */ boolean_t zone_check = FALSE;
3698
3699 static void
3700 zone_check_freelist(zone_t zone, vm_offset_t elem)
3701 {
3702 struct zone_free_element *this;
3703 struct zone_page_metadata *thispage;
3704
3705 if (zone->allows_foreign) {
3706 for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
3707 !queue_end(&zone->pages.any_free_foreign, &(thispage->pages));
3708 thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) {
3709 for (this = page_metadata_get_freelist(thispage);
3710 this != NULL;
3711 this = this->next) {
3712 if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) {
3713 panic("zone_check_freelist");
3714 }
3715 }
3716 }
3717 }
3718 for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
3719 !queue_end(&zone->pages.all_free, &(thispage->pages));
3720 thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) {
3721 for (this = page_metadata_get_freelist(thispage);
3722 this != NULL;
3723 this = this->next) {
3724 if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) {
3725 panic("zone_check_freelist");
3726 }
3727 }
3728 }
3729 for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
3730 !queue_end(&zone->pages.intermediate, &(thispage->pages));
3731 thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) {
3732 for (this = page_metadata_get_freelist(thispage);
3733 this != NULL;
3734 this = this->next) {
3735 if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) {
3736 panic("zone_check_freelist");
3737 }
3738 }
3739 }
3740 }
3741
3742 boolean_t
3743 zfree_poison_element(zone_t zone, vm_offset_t elem)
3744 {
3745 boolean_t poison = FALSE;
3746 if (zp_factor != 0 || zp_tiny_zone_limit != 0) {
3747 /*
3748 * Poison the memory before it ends up on the freelist to catch
3749 * use-after-free and use of uninitialized memory
3750 *
3751 * Always poison tiny zones' elements (limit is 0 if -no-zp is set)
3752 * Also poison larger elements periodically
3753 */
3754
3755 vm_offset_t inner_size = zone->elem_size;
3756
3757 uint32_t sample_factor = zp_factor + (((uint32_t)inner_size) >> zp_scale);
3758
3759 if (inner_size <= zp_tiny_zone_limit) {
3760 poison = TRUE;
3761 } else if (zp_factor != 0 && sample_counter(&zone->zp_count, sample_factor) == TRUE) {
3762 poison = TRUE;
3763 }
3764
3765 if (__improbable(poison)) {
3766 /* memset_pattern{4|8} could help make this faster: <rdar://problem/4662004> */
3767 /* Poison everything but primary and backup */
3768 vm_offset_t *element_cursor = ((vm_offset_t *) elem) + 1;
3769 vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *)elem);
3770
3771 for (; element_cursor < backup; element_cursor++) {
3772 *element_cursor = ZP_POISON;
3773 }
3774 }
3775 }
3776 return poison;
3777 }
3778 void
3779 (zfree)(
3780 zone_t zone,
3781 void *addr)
3782 {
3783 vm_offset_t elem = (vm_offset_t) addr;
3784 uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */
3785 unsigned int numsaved = 0;
3786 boolean_t gzfreed = FALSE;
3787 boolean_t poison = FALSE;
3788 #if VM_MAX_TAG_ZONES
3789 vm_tag_t tag;
3790 #endif /* VM_MAX_TAG_ZONES */
3791
3792 assert(zone != ZONE_NULL);
3793 DTRACE_VM2(zfree, zone_t, zone, void*, addr);
3794 #if KASAN_ZALLOC
3795 if (kasan_quarantine_freed_element(&zone, &addr)) {
3796 return;
3797 }
3798 elem = (vm_offset_t)addr;
3799 #endif
3800
3801 /*
3802 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
3803 */
3804
3805 if (__improbable(DO_LOGGING(zone) && corruption_debug_flag)) {
3806 numsaved = OSBacktrace((void *)zbt, MAX_ZTRACE_DEPTH);
3807 }
3808
3809 #if MACH_ASSERT
3810 /* Basic sanity checks */
3811 if (zone == ZONE_NULL || elem == (vm_offset_t)0) {
3812 panic("zfree: NULL");
3813 }
3814 #endif
3815
3816 #if CONFIG_GZALLOC
3817 gzfreed = gzalloc_free(zone, addr);
3818 #endif
3819
3820 if (!gzfreed) {
3821 struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
3822 if (zone != PAGE_METADATA_GET_ZONE(page_meta)) {
3823 panic("Element %p from zone %s caught being freed to wrong zone %s\n", addr, PAGE_METADATA_GET_ZONE(page_meta)->zone_name, zone->zone_name);
3824 }
3825 }
3826
3827 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr);
3828
3829 if (__improbable(!gzfreed && zone->collectable && !zone->allows_foreign &&
3830 !from_zone_map(elem, zone->elem_size))) {
3831 panic("zfree: non-allocated memory in collectable zone!");
3832 }
3833
3834 if (!gzfreed) {
3835 poison = zfree_poison_element(zone, elem);
3836 }
3837
3838 /*
3839 * See if we're doing logging on this zone. There are two styles of logging used depending on
3840 * whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
3841 */
3842
3843 if (__improbable(DO_LOGGING(zone))) {
3844 if (corruption_debug_flag) {
3845 /*
3846 * We're logging to catch a corruption. Add a record of this zfree operation
3847 * to log.
3848 */
3849 btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_FREE, (void **)zbt, numsaved);
3850 } else {
3851 /*
3852 * We're logging to catch a leak. Remove any record we might have for this
3853 * element since it's being freed. Note that we may not find it if the buffer
3854 * overflowed and that's OK. Since the log is of a limited size, old records
3855 * get overwritten if there are more zallocs than zfrees.
3856 */
3857 btlog_remove_entries_for_element(zone->zlog_btlog, (void *)addr);
3858 }
3859 }
3860
3861 #if CONFIG_ZCACHE
3862 if (zone_caching_enabled(zone)) {
3863 int __assert_only ret = zcache_free_to_cpu_cache(zone, addr);
3864 assert(ret != FALSE);
3865 return;
3866 }
3867 #endif /* CONFIG_ZCACHE */
3868
3869 lock_zone(zone);
3870 assert(zone->zone_valid);
3871
3872 if (zone_check) {
3873 zone_check_freelist(zone, elem);
3874 }
3875
3876 if (__probable(!gzfreed)) {
3877 #if VM_MAX_TAG_ZONES
3878 if (__improbable(zone->tags)) {
3879 tag = (ZTAG(zone, elem)[0] >> 1);
3880 // set the tag with b0 clear so the block remains inuse
3881 ZTAG(zone, elem)[0] = 0xFFFE;
3882 }
3883 #endif /* VM_MAX_TAG_ZONES */
3884 free_to_zone(zone, elem, poison);
3885 }
3886
3887 if (__improbable(zone->count < 0)) {
3888 panic("zfree: zone count underflow in zone %s while freeing element %p, possible cause: double frees or freeing memory that did not come from this zone",
3889 zone->zone_name, addr);
3890 }
3891
3892 #if CONFIG_ZLEAKS
3893 /*
3894 * Zone leak detection: un-track the allocation
3895 */
3896 if (zone->zleak_on) {
3897 zleak_free(elem, zone->elem_size);
3898 }
3899 #endif /* CONFIG_ZLEAKS */
3900
3901 #if VM_MAX_TAG_ZONES
3902 if (__improbable(zone->tags) && __probable(!gzfreed)) {
3903 vm_tag_update_zone_size(tag, zone->tag_zone_index, -((int64_t)zone->elem_size), 0);
3904 }
3905 #endif /* VM_MAX_TAG_ZONES */
3906
3907 unlock_zone(zone);
3908 }
3909
3910 /* Change a zone's flags.
3911 * This routine must be called immediately after zinit.
3912 */
3913 void
3914 zone_change(
3915 zone_t zone,
3916 unsigned int item,
3917 boolean_t value)
3918 {
3919 assert( zone != ZONE_NULL );
3920 assert( value == TRUE || value == FALSE );
3921
3922 switch (item) {
3923 case Z_NOENCRYPT:
3924 zone->noencrypt = value;
3925 break;
3926 case Z_EXHAUST:
3927 zone->exhaustible = value;
3928 break;
3929 case Z_COLLECT:
3930 zone->collectable = value;
3931 break;
3932 case Z_EXPAND:
3933 zone->expandable = value;
3934 break;
3935 case Z_FOREIGN:
3936 zone->allows_foreign = value;
3937 break;
3938 case Z_CALLERACCT:
3939 zone->caller_acct = value;
3940 break;
3941 case Z_NOCALLOUT:
3942 zone->no_callout = value;
3943 break;
3944 case Z_TAGS_ENABLED:
3945 #if VM_MAX_TAG_ZONES
3946 {
3947 static int tag_zone_index;
3948 zone->tags = TRUE;
3949 zone->tags_inline = (((page_size + zone->elem_size - 1) / zone->elem_size) <= (sizeof(uint32_t) / sizeof(uint16_t)));
3950 zone->tag_zone_index = OSAddAtomic(1, &tag_zone_index);
3951 }
3952 #endif /* VM_MAX_TAG_ZONES */
3953 break;
3954 case Z_GZALLOC_EXEMPT:
3955 zone->gzalloc_exempt = value;
3956 #if CONFIG_GZALLOC
3957 gzalloc_reconfigure(zone);
3958 #endif
3959 break;
3960 case Z_ALIGNMENT_REQUIRED:
3961 zone->alignment_required = value;
3962 #if KASAN_ZALLOC
3963 if (zone->kasan_redzone == KASAN_GUARD_SIZE) {
3964 /* Don't disturb alignment with the redzone for zones with
3965 * specific alignment requirements. */
3966 zone->elem_size -= zone->kasan_redzone * 2;
3967 zone->kasan_redzone = 0;
3968 }
3969 #endif
3970 #if CONFIG_GZALLOC
3971 gzalloc_reconfigure(zone);
3972 #endif
3973 break;
3974 case Z_KASAN_QUARANTINE:
3975 zone->kasan_quarantine = value;
3976 break;
3977 case Z_CACHING_ENABLED:
3978 #if CONFIG_ZCACHE
3979 if (value == TRUE && use_caching) {
3980 if (zcache_ready()) {
3981 zcache_init(zone);
3982 } else {
3983 zone->cpu_cache_enable_when_ready = TRUE;
3984 }
3985 }
3986 #endif
3987 break;
3988 default:
3989 panic("Zone_change: Wrong Item Type!");
3990 /* break; */
3991 }
3992 }
3993
3994 /*
3995 * Return the expected number of free elements in the zone.
3996 * This calculation will be incorrect if items are zfree'd that
3997 * were never zalloc'd/zget'd. The correct way to stuff memory
3998 * into a zone is by zcram.
3999 */
4000
4001 integer_t
4002 zone_free_count(zone_t zone)
4003 {
4004 integer_t free_count;
4005
4006 lock_zone(zone);
4007 free_count = zone->countfree;
4008 unlock_zone(zone);
4009
4010 assert(free_count >= 0);
4011
4012 return free_count;
4013 }
4014
4015 /* Drops the elements in the free queue of a zone. Called by zone_gc() on each zone, and when a zone is zdestroy'ed. */
4016 void
4017 drop_free_elements(zone_t z)
4018 {
4019 vm_size_t elt_size, size_freed;
4020 unsigned int total_freed_pages = 0;
4021 uint64_t old_all_free_count;
4022 struct zone_page_metadata *page_meta;
4023 queue_head_t page_meta_head;
4024
4025 lock_zone(z);
4026 if (queue_empty(&z->pages.all_free)) {
4027 unlock_zone(z);
4028 return;
4029 }
4030
4031 /*
4032 * Snatch all of the free elements away from the zone.
4033 */
4034 elt_size = z->elem_size;
4035 old_all_free_count = z->count_all_free_pages;
4036 queue_new_head(&z->pages.all_free, &page_meta_head, struct zone_page_metadata *, pages);
4037 queue_init(&z->pages.all_free);
4038 z->count_all_free_pages = 0;
4039 unlock_zone(z);
4040
4041 /* Iterate through all elements to find out size and count of elements we snatched */
4042 size_freed = 0;
4043 queue_iterate(&page_meta_head, page_meta, struct zone_page_metadata *, pages) {
4044 assert(from_zone_map((vm_address_t)page_meta, sizeof(*page_meta))); /* foreign elements should be in any_free_foreign */
4045 size_freed += elt_size * page_meta->free_count;
4046 }
4047
4048 /* Update the zone size and free element count */
4049 lock_zone(z);
4050 z->cur_size -= size_freed;
4051 z->countfree -= size_freed / elt_size;
4052 unlock_zone(z);
4053
4054 while ((page_meta = (struct zone_page_metadata *)dequeue_head(&page_meta_head)) != NULL) {
4055 vm_address_t free_page_address;
4056 /* Free the pages for metadata and account for them */
4057 free_page_address = get_zone_page(page_meta);
4058 ZONE_PAGE_COUNT_DECR(z, page_meta->page_count);
4059 total_freed_pages += page_meta->page_count;
4060 old_all_free_count -= page_meta->page_count;
4061 #if KASAN_ZALLOC
4062 kasan_poison_range(free_page_address, page_meta->page_count * PAGE_SIZE, ASAN_VALID);
4063 #endif
4064 #if VM_MAX_TAG_ZONES
4065 if (z->tags) {
4066 ztMemoryRemove(z, free_page_address, (page_meta->page_count * PAGE_SIZE));
4067 }
4068 #endif /* VM_MAX_TAG_ZONES */
4069 kmem_free(zone_map, free_page_address, (page_meta->page_count * PAGE_SIZE));
4070 if (current_thread()->options & TH_OPT_ZONE_GC) {
4071 thread_yield_to_preemption();
4072 }
4073 }
4074
4075 /* We freed all the pages from the all_free list for this zone */
4076 assert(old_all_free_count == 0);
4077
4078 if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) {
4079 kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed / elt_size, total_freed_pages);
4080 }
4081 }
4082
4083 /* Zone garbage collection
4084 *
4085 * zone_gc will walk through all the free elements in all the
4086 * zones that are marked collectable looking for reclaimable
4087 * pages. zone_gc is called by consider_zone_gc when the system
4088 * begins to run out of memory.
4089 *
4090 * We should ensure that zone_gc never blocks.
4091 */
4092 void
4093 zone_gc(boolean_t consider_jetsams)
4094 {
4095 unsigned int max_zones;
4096 zone_t z;
4097 unsigned int i;
4098
4099 if (consider_jetsams) {
4100 kill_process_in_largest_zone();
4101 /*
4102 * If we do end up jetsamming something, we need to do a zone_gc so that
4103 * we can reclaim free zone elements and update the zone map size.
4104 * Fall through.
4105 */
4106 }
4107
4108 lck_mtx_lock(&zone_gc_lock);
4109
4110 current_thread()->options |= TH_OPT_ZONE_GC;
4111
4112 simple_lock(&all_zones_lock, &zone_locks_grp);
4113 max_zones = num_zones;
4114 simple_unlock(&all_zones_lock);
4115
4116 if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) {
4117 kprintf("zone_gc() starting...\n");
4118 }
4119
4120 for (i = 0; i < max_zones; i++) {
4121 z = &(zone_array[i]);
4122 assert(z != ZONE_NULL);
4123
4124 if (!z->collectable) {
4125 continue;
4126 }
4127 #if CONFIG_ZCACHE
4128 if (zone_caching_enabled(z)) {
4129 zcache_drain_depot(z);
4130 }
4131 #endif /* CONFIG_ZCACHE */
4132 if (queue_empty(&z->pages.all_free)) {
4133 continue;
4134 }
4135
4136 drop_free_elements(z);
4137 }
4138
4139 current_thread()->options &= ~TH_OPT_ZONE_GC;
4140
4141 lck_mtx_unlock(&zone_gc_lock);
4142 }
4143
4144 extern vm_offset_t kmapoff_kaddr;
4145 extern unsigned int kmapoff_pgcnt;
4146
4147 /*
4148 * consider_zone_gc:
4149 *
4150 * Called by the pageout daemon when the system needs more free pages.
4151 */
4152
4153 void
4154 consider_zone_gc(boolean_t consider_jetsams)
4155 {
4156 if (kmapoff_kaddr != 0) {
4157 /*
4158 * One-time reclaim of kernel_map resources we allocated in
4159 * early boot.
4160 */
4161 (void) vm_deallocate(kernel_map,
4162 kmapoff_kaddr, kmapoff_pgcnt * PAGE_SIZE_64);
4163 kmapoff_kaddr = 0;
4164 }
4165
4166 if (zone_gc_allowed) {
4167 zone_gc(consider_jetsams);
4168 }
4169 }
4170
4171 /*
4172 * Creates a vm_map_copy_t to return to the caller of mach_* MIG calls
4173 * requesting zone information.
4174 * Frees unused pages towards the end of the region, and zero'es out unused
4175 * space on the last page.
4176 */
4177 vm_map_copy_t
4178 create_vm_map_copy(
4179 vm_offset_t start_addr,
4180 vm_size_t total_size,
4181 vm_size_t used_size)
4182 {
4183 kern_return_t kr;
4184 vm_offset_t end_addr;
4185 vm_size_t free_size;
4186 vm_map_copy_t copy;
4187
4188 if (used_size != total_size) {
4189 end_addr = start_addr + used_size;
4190 free_size = total_size - (round_page(end_addr) - start_addr);
4191
4192 if (free_size >= PAGE_SIZE) {
4193 kmem_free(ipc_kernel_map,
4194 round_page(end_addr), free_size);
4195 }
4196 bzero((char *) end_addr, round_page(end_addr) - end_addr);
4197 }
4198
4199 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)start_addr,
4200 (vm_map_size_t)used_size, TRUE, &copy);
4201 assert(kr == KERN_SUCCESS);
4202
4203 return copy;
4204 }
4205
4206 boolean_t
4207 get_zone_info(
4208 zone_t z,
4209 mach_zone_name_t *zn,
4210 mach_zone_info_t *zi)
4211 {
4212 struct zone zcopy;
4213
4214 assert(z != ZONE_NULL);
4215 lock_zone(z);
4216 if (!z->zone_valid) {
4217 unlock_zone(z);
4218 return FALSE;
4219 }
4220 zcopy = *z;
4221 unlock_zone(z);
4222
4223 if (zn != NULL) {
4224 /* assuming here the name data is static */
4225 (void) __nosan_strlcpy(zn->mzn_name, zcopy.zone_name,
4226 strlen(zcopy.zone_name) + 1);
4227 }
4228
4229 if (zi != NULL) {
4230 zi->mzi_count = (uint64_t)zcopy.count;
4231 zi->mzi_cur_size = ptoa_64(zcopy.page_count);
4232 zi->mzi_max_size = (uint64_t)zcopy.max_size;
4233 zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
4234 zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
4235 zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size;
4236 zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible;
4237 zi->mzi_collectable = 0;
4238 if (zcopy.collectable) {
4239 SET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable, ((uint64_t)zcopy.count_all_free_pages * PAGE_SIZE));
4240 SET_MZI_COLLECTABLE_FLAG(zi->mzi_collectable, TRUE);
4241 }
4242 }
4243
4244 return TRUE;
4245 }
4246
4247 kern_return_t
4248 task_zone_info(
4249 __unused task_t task,
4250 __unused mach_zone_name_array_t *namesp,
4251 __unused mach_msg_type_number_t *namesCntp,
4252 __unused task_zone_info_array_t *infop,
4253 __unused mach_msg_type_number_t *infoCntp)
4254 {
4255 return KERN_FAILURE;
4256 }
4257
4258 kern_return_t
4259 mach_zone_info(
4260 host_priv_t host,
4261 mach_zone_name_array_t *namesp,
4262 mach_msg_type_number_t *namesCntp,
4263 mach_zone_info_array_t *infop,
4264 mach_msg_type_number_t *infoCntp)
4265 {
4266 return mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL);
4267 }
4268
4269
4270 kern_return_t
4271 mach_memory_info(
4272 host_priv_t host,
4273 mach_zone_name_array_t *namesp,
4274 mach_msg_type_number_t *namesCntp,
4275 mach_zone_info_array_t *infop,
4276 mach_msg_type_number_t *infoCntp,
4277 mach_memory_info_array_t *memoryInfop,
4278 mach_msg_type_number_t *memoryInfoCntp)
4279 {
4280 mach_zone_name_t *names;
4281 vm_offset_t names_addr;
4282 vm_size_t names_size;
4283
4284 mach_zone_info_t *info;
4285 vm_offset_t info_addr;
4286 vm_size_t info_size;
4287
4288 mach_memory_info_t *memory_info;
4289 vm_offset_t memory_info_addr;
4290 vm_size_t memory_info_size;
4291 vm_size_t memory_info_vmsize;
4292 unsigned int num_info;
4293
4294 unsigned int max_zones, used_zones, i;
4295 mach_zone_name_t *zn;
4296 mach_zone_info_t *zi;
4297 kern_return_t kr;
4298
4299 uint64_t zones_collectable_bytes = 0;
4300
4301 if (host == HOST_NULL) {
4302 return KERN_INVALID_HOST;
4303 }
4304 #if CONFIG_DEBUGGER_FOR_ZONE_INFO
4305 if (!PE_i_can_has_debugger(NULL)) {
4306 return KERN_INVALID_HOST;
4307 }
4308 #endif
4309
4310 /*
4311 * We assume that zones aren't freed once allocated.
4312 * We won't pick up any zones that are allocated later.
4313 */
4314
4315 simple_lock(&all_zones_lock, &zone_locks_grp);
4316 max_zones = (unsigned int)(num_zones);
4317 simple_unlock(&all_zones_lock);
4318
4319 names_size = round_page(max_zones * sizeof *names);
4320 kr = kmem_alloc_pageable(ipc_kernel_map,
4321 &names_addr, names_size, VM_KERN_MEMORY_IPC);
4322 if (kr != KERN_SUCCESS) {
4323 return kr;
4324 }
4325 names = (mach_zone_name_t *) names_addr;
4326
4327 info_size = round_page(max_zones * sizeof *info);
4328 kr = kmem_alloc_pageable(ipc_kernel_map,
4329 &info_addr, info_size, VM_KERN_MEMORY_IPC);
4330 if (kr != KERN_SUCCESS) {
4331 kmem_free(ipc_kernel_map,
4332 names_addr, names_size);
4333 return kr;
4334 }
4335 info = (mach_zone_info_t *) info_addr;
4336
4337 zn = &names[0];
4338 zi = &info[0];
4339
4340 used_zones = max_zones;
4341 for (i = 0; i < max_zones; i++) {
4342 if (!get_zone_info(&(zone_array[i]), zn, zi)) {
4343 used_zones--;
4344 continue;
4345 }
4346 zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable);
4347 zn++;
4348 zi++;
4349 }
4350
4351 *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, used_zones * sizeof *names);
4352 *namesCntp = used_zones;
4353
4354 *infop = (mach_zone_info_t *) create_vm_map_copy(info_addr, info_size, used_zones * sizeof *info);
4355 *infoCntp = used_zones;
4356
4357 num_info = 0;
4358 memory_info_addr = 0;
4359
4360 if (memoryInfop && memoryInfoCntp) {
4361 vm_map_copy_t copy;
4362 num_info = vm_page_diagnose_estimate();
4363 memory_info_size = num_info * sizeof(*memory_info);
4364 memory_info_vmsize = round_page(memory_info_size);
4365 kr = kmem_alloc_pageable(ipc_kernel_map,
4366 &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC);
4367 if (kr != KERN_SUCCESS) {
4368 return kr;
4369 }
4370
4371 kr = vm_map_wire_kernel(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
4372 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
4373 assert(kr == KERN_SUCCESS);
4374
4375 memory_info = (mach_memory_info_t *) memory_info_addr;
4376 vm_page_diagnose(memory_info, num_info, zones_collectable_bytes);
4377
4378 kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE);
4379 assert(kr == KERN_SUCCESS);
4380
4381 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr,
4382 (vm_map_size_t)memory_info_size, TRUE, &copy);
4383 assert(kr == KERN_SUCCESS);
4384
4385 *memoryInfop = (mach_memory_info_t *) copy;
4386 *memoryInfoCntp = num_info;
4387 }
4388
4389 return KERN_SUCCESS;
4390 }
4391
4392 kern_return_t
4393 mach_zone_info_for_zone(
4394 host_priv_t host,
4395 mach_zone_name_t name,
4396 mach_zone_info_t *infop)
4397 {
4398 unsigned int max_zones, i;
4399 zone_t zone_ptr;
4400
4401 if (host == HOST_NULL) {
4402 return KERN_INVALID_HOST;
4403 }
4404 #if CONFIG_DEBUGGER_FOR_ZONE_INFO
4405 if (!PE_i_can_has_debugger(NULL)) {
4406 return KERN_INVALID_HOST;
4407 }
4408 #endif
4409
4410 if (infop == NULL) {
4411 return KERN_INVALID_ARGUMENT;
4412 }
4413
4414 simple_lock(&all_zones_lock, &zone_locks_grp);
4415 max_zones = (unsigned int)(num_zones);
4416 simple_unlock(&all_zones_lock);
4417
4418 zone_ptr = ZONE_NULL;
4419 for (i = 0; i < max_zones; i++) {
4420 zone_t z = &(zone_array[i]);
4421 assert(z != ZONE_NULL);
4422
4423 /* Find the requested zone by name */
4424 if (track_this_zone(z->zone_name, name.mzn_name)) {
4425 zone_ptr = z;
4426 break;
4427 }
4428 }
4429
4430 /* No zones found with the requested zone name */
4431 if (zone_ptr == ZONE_NULL) {
4432 return KERN_INVALID_ARGUMENT;
4433 }
4434
4435 if (get_zone_info(zone_ptr, NULL, infop)) {
4436 return KERN_SUCCESS;
4437 }
4438 return KERN_FAILURE;
4439 }
4440
4441 kern_return_t
4442 mach_zone_info_for_largest_zone(
4443 host_priv_t host,
4444 mach_zone_name_t *namep,
4445 mach_zone_info_t *infop)
4446 {
4447 if (host == HOST_NULL) {
4448 return KERN_INVALID_HOST;
4449 }
4450 #if CONFIG_DEBUGGER_FOR_ZONE_INFO
4451 if (!PE_i_can_has_debugger(NULL)) {
4452 return KERN_INVALID_HOST;
4453 }
4454 #endif
4455
4456 if (namep == NULL || infop == NULL) {
4457 return KERN_INVALID_ARGUMENT;
4458 }
4459
4460 if (get_zone_info(zone_find_largest(), namep, infop)) {
4461 return KERN_SUCCESS;
4462 }
4463 return KERN_FAILURE;
4464 }
4465
4466 uint64_t
4467 get_zones_collectable_bytes(void)
4468 {
4469 unsigned int i, max_zones;
4470 uint64_t zones_collectable_bytes = 0;
4471 mach_zone_info_t zi;
4472
4473 simple_lock(&all_zones_lock, &zone_locks_grp);
4474 max_zones = (unsigned int)(num_zones);
4475 simple_unlock(&all_zones_lock);
4476
4477 for (i = 0; i < max_zones; i++) {
4478 if (get_zone_info(&(zone_array[i]), NULL, &zi)) {
4479 zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi.mzi_collectable);
4480 }
4481 }
4482
4483 return zones_collectable_bytes;
4484 }
4485
4486 kern_return_t
4487 mach_zone_get_zlog_zones(
4488 host_priv_t host,
4489 mach_zone_name_array_t *namesp,
4490 mach_msg_type_number_t *namesCntp)
4491 {
4492 #if DEBUG || DEVELOPMENT
4493 unsigned int max_zones, logged_zones, i;
4494 kern_return_t kr;
4495 zone_t zone_ptr;
4496 mach_zone_name_t *names;
4497 vm_offset_t names_addr;
4498 vm_size_t names_size;
4499
4500 if (host == HOST_NULL) {
4501 return KERN_INVALID_HOST;
4502 }
4503
4504 if (namesp == NULL || namesCntp == NULL) {
4505 return KERN_INVALID_ARGUMENT;
4506 }
4507
4508 simple_lock(&all_zones_lock, &zone_locks_grp);
4509 max_zones = (unsigned int)(num_zones);
4510 simple_unlock(&all_zones_lock);
4511
4512 names_size = round_page(max_zones * sizeof *names);
4513 kr = kmem_alloc_pageable(ipc_kernel_map,
4514 &names_addr, names_size, VM_KERN_MEMORY_IPC);
4515 if (kr != KERN_SUCCESS) {
4516 return kr;
4517 }
4518 names = (mach_zone_name_t *) names_addr;
4519
4520 zone_ptr = ZONE_NULL;
4521 logged_zones = 0;
4522 for (i = 0; i < max_zones; i++) {
4523 zone_t z = &(zone_array[i]);
4524 assert(z != ZONE_NULL);
4525
4526 /* Copy out the zone name if zone logging is enabled */
4527 if (z->zlog_btlog) {
4528 get_zone_info(z, &names[logged_zones], NULL);
4529 logged_zones++;
4530 }
4531 }
4532
4533 *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, logged_zones * sizeof *names);
4534 *namesCntp = logged_zones;
4535
4536 return KERN_SUCCESS;
4537
4538 #else /* DEBUG || DEVELOPMENT */
4539 #pragma unused(host, namesp, namesCntp)
4540 return KERN_FAILURE;
4541 #endif /* DEBUG || DEVELOPMENT */
4542 }
4543
4544 kern_return_t
4545 mach_zone_get_btlog_records(
4546 host_priv_t host,
4547 mach_zone_name_t name,
4548 zone_btrecord_array_t *recsp,
4549 mach_msg_type_number_t *recsCntp)
4550 {
4551 #if DEBUG || DEVELOPMENT
4552 unsigned int max_zones, i, numrecs = 0;
4553 zone_btrecord_t *recs;
4554 kern_return_t kr;
4555 zone_t zone_ptr;
4556 vm_offset_t recs_addr;
4557 vm_size_t recs_size;
4558
4559 if (host == HOST_NULL) {
4560 return KERN_INVALID_HOST;
4561 }
4562
4563 if (recsp == NULL || recsCntp == NULL) {
4564 return KERN_INVALID_ARGUMENT;
4565 }
4566
4567 simple_lock(&all_zones_lock, &zone_locks_grp);
4568 max_zones = (unsigned int)(num_zones);
4569 simple_unlock(&all_zones_lock);
4570
4571 zone_ptr = ZONE_NULL;
4572 for (i = 0; i < max_zones; i++) {
4573 zone_t z = &(zone_array[i]);
4574 assert(z != ZONE_NULL);
4575
4576 /* Find the requested zone by name */
4577 if (track_this_zone(z->zone_name, name.mzn_name)) {
4578 zone_ptr = z;
4579 break;
4580 }
4581 }
4582
4583 /* No zones found with the requested zone name */
4584 if (zone_ptr == ZONE_NULL) {
4585 return KERN_INVALID_ARGUMENT;
4586 }
4587
4588 /* Logging not turned on for the requested zone */
4589 if (!DO_LOGGING(zone_ptr)) {
4590 return KERN_FAILURE;
4591 }
4592
4593 /* Allocate memory for btlog records */
4594 numrecs = (unsigned int)(get_btlog_records_count(zone_ptr->zlog_btlog));
4595 recs_size = round_page(numrecs * sizeof *recs);
4596
4597 kr = kmem_alloc_pageable(ipc_kernel_map, &recs_addr, recs_size, VM_KERN_MEMORY_IPC);
4598 if (kr != KERN_SUCCESS) {
4599 return kr;
4600 }
4601
4602 /*
4603 * We will call get_btlog_records() below which populates this region while holding a spinlock
4604 * (the btlog lock). So these pages need to be wired.
4605 */
4606 kr = vm_map_wire_kernel(ipc_kernel_map, recs_addr, recs_addr + recs_size,
4607 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
4608 assert(kr == KERN_SUCCESS);
4609
4610 recs = (zone_btrecord_t *)recs_addr;
4611 get_btlog_records(zone_ptr->zlog_btlog, recs, &numrecs);
4612
4613 kr = vm_map_unwire(ipc_kernel_map, recs_addr, recs_addr + recs_size, FALSE);
4614 assert(kr == KERN_SUCCESS);
4615
4616 *recsp = (zone_btrecord_t *) create_vm_map_copy(recs_addr, recs_size, numrecs * sizeof *recs);
4617 *recsCntp = numrecs;
4618
4619 return KERN_SUCCESS;
4620
4621 #else /* DEBUG || DEVELOPMENT */
4622 #pragma unused(host, name, recsp, recsCntp)
4623 return KERN_FAILURE;
4624 #endif /* DEBUG || DEVELOPMENT */
4625 }
4626
4627
4628 #if DEBUG || DEVELOPMENT
4629
4630 kern_return_t
4631 mach_memory_info_check(void)
4632 {
4633 mach_memory_info_t * memory_info;
4634 mach_memory_info_t * info;
4635 zone_t zone;
4636 unsigned int idx, num_info, max_zones;
4637 vm_offset_t memory_info_addr;
4638 kern_return_t kr;
4639 size_t memory_info_size, memory_info_vmsize;
4640 uint64_t top_wired, zonestotal, total;
4641
4642 num_info = vm_page_diagnose_estimate();
4643 memory_info_size = num_info * sizeof(*memory_info);
4644 memory_info_vmsize = round_page(memory_info_size);
4645 kr = kmem_alloc(kernel_map, &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_DIAG);
4646 assert(kr == KERN_SUCCESS);
4647
4648 memory_info = (mach_memory_info_t *) memory_info_addr;
4649 vm_page_diagnose(memory_info, num_info, 0);
4650
4651 simple_lock(&all_zones_lock, &zone_locks_grp);
4652 max_zones = num_zones;
4653 simple_unlock(&all_zones_lock);
4654
4655 top_wired = total = zonestotal = 0;
4656 for (idx = 0; idx < max_zones; idx++) {
4657 zone = &(zone_array[idx]);
4658 assert(zone != ZONE_NULL);
4659 lock_zone(zone);
4660 zonestotal += ptoa_64(zone->page_count);
4661 unlock_zone(zone);
4662 }
4663 for (idx = 0; idx < num_info; idx++) {
4664 info = &memory_info[idx];
4665 if (!info->size) {
4666 continue;
4667 }
4668 if (VM_KERN_COUNT_WIRED == info->site) {
4669 top_wired = info->size;
4670 }
4671 if (VM_KERN_SITE_HIDE & info->flags) {
4672 continue;
4673 }
4674 if (!(VM_KERN_SITE_WIRED & info->flags)) {
4675 continue;
4676 }
4677 total += info->size;
4678 }
4679 total += zonestotal;
4680
4681 printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n", total, top_wired, zonestotal, top_wired - total);
4682
4683 kmem_free(kernel_map, memory_info_addr, memory_info_vmsize);
4684
4685 return kr;
4686 }
4687
4688 extern boolean_t(*volatile consider_buffer_cache_collect)(int);
4689
4690 #endif /* DEBUG || DEVELOPMENT */
4691
4692 kern_return_t
4693 mach_zone_force_gc(
4694 host_t host)
4695 {
4696 if (host == HOST_NULL) {
4697 return KERN_INVALID_HOST;
4698 }
4699
4700 #if DEBUG || DEVELOPMENT
4701 /* Callout to buffer cache GC to drop elements in the apfs zones */
4702 if (consider_buffer_cache_collect != NULL) {
4703 (void)(*consider_buffer_cache_collect)(0);
4704 }
4705 consider_zone_gc(FALSE);
4706 #endif /* DEBUG || DEVELOPMENT */
4707 return KERN_SUCCESS;
4708 }
4709
4710 extern unsigned int stack_total;
4711 extern unsigned long long stack_allocs;
4712
4713 #if defined(__i386__) || defined (__x86_64__)
4714 extern unsigned int inuse_ptepages_count;
4715 extern long long alloc_ptepages_count;
4716 #endif
4717
4718 zone_t
4719 zone_find_largest(void)
4720 {
4721 unsigned int i;
4722 unsigned int max_zones;
4723 zone_t the_zone;
4724 zone_t zone_largest;
4725
4726 simple_lock(&all_zones_lock, &zone_locks_grp);
4727 max_zones = num_zones;
4728 simple_unlock(&all_zones_lock);
4729
4730 zone_largest = &(zone_array[0]);
4731 for (i = 0; i < max_zones; i++) {
4732 the_zone = &(zone_array[i]);
4733 if (the_zone->cur_size > zone_largest->cur_size) {
4734 zone_largest = the_zone;
4735 }
4736 }
4737 return zone_largest;
4738 }
4739
4740 #if ZONE_DEBUG
4741
4742 /* should we care about locks here ? */
4743
4744 #define zone_in_use(z) ( z->count || z->free_elements \
4745 || !queue_empty(&z->pages.all_free) \
4746 || !queue_empty(&z->pages.intermediate) \
4747 || (z->allows_foreign && !queue_empty(&z->pages.any_free_foreign)))
4748
4749
4750 #endif /* ZONE_DEBUG */
4751
4752
4753 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4754
4755 #if DEBUG || DEVELOPMENT
4756
4757 static uintptr_t *
4758 zone_copy_all_allocations_inqueue(zone_t z, queue_head_t * queue, uintptr_t * elems)
4759 {
4760 struct zone_page_metadata *page_meta;
4761 vm_offset_t free, elements;
4762 vm_offset_t idx, numElements, freeCount, bytesAvail, metaSize;
4763
4764 queue_iterate(queue, page_meta, struct zone_page_metadata *, pages)
4765 {
4766 elements = get_zone_page(page_meta);
4767 bytesAvail = ptoa(page_meta->page_count);
4768 freeCount = 0;
4769 if (z->allows_foreign && !from_zone_map(elements, z->elem_size)) {
4770 metaSize = (sizeof(struct zone_page_metadata) + ZONE_ELEMENT_ALIGNMENT - 1) & ~(ZONE_ELEMENT_ALIGNMENT - 1);
4771 bytesAvail -= metaSize;
4772 elements += metaSize;
4773 }
4774 numElements = bytesAvail / z->elem_size;
4775 // construct array of all possible elements
4776 for (idx = 0; idx < numElements; idx++) {
4777 elems[idx] = INSTANCE_PUT(elements + idx * z->elem_size);
4778 }
4779 // remove from the array all free elements
4780 free = (vm_offset_t)page_metadata_get_freelist(page_meta);
4781 while (free) {
4782 // find idx of free element
4783 for (idx = 0; (idx < numElements) && (elems[idx] != INSTANCE_PUT(free)); idx++) {
4784 }
4785 assert(idx < numElements);
4786 // remove it
4787 bcopy(&elems[idx + 1], &elems[idx], (numElements - (idx + 1)) * sizeof(elems[0]));
4788 numElements--;
4789 freeCount++;
4790 // next free element
4791 vm_offset_t *primary = (vm_offset_t *) free;
4792 free = *primary ^ zp_nopoison_cookie;
4793 }
4794 elems += numElements;
4795 }
4796
4797 return elems;
4798 }
4799
4800 kern_return_t
4801 zone_leaks(const char * zoneName, uint32_t nameLen, leak_site_proc proc, void * refCon)
4802 {
4803 uintptr_t zbt[MAX_ZTRACE_DEPTH];
4804 zone_t zone;
4805 uintptr_t * array;
4806 uintptr_t * next;
4807 uintptr_t element, bt;
4808 uint32_t idx, count, found;
4809 uint32_t btidx, btcount, nobtcount, btfound;
4810 uint32_t elemSize;
4811 uint64_t maxElems;
4812 unsigned int max_zones;
4813 kern_return_t kr;
4814
4815 simple_lock(&all_zones_lock, &zone_locks_grp);
4816 max_zones = num_zones;
4817 simple_unlock(&all_zones_lock);
4818
4819 for (idx = 0; idx < max_zones; idx++) {
4820 if (!strncmp(zoneName, zone_array[idx].zone_name, nameLen)) {
4821 break;
4822 }
4823 }
4824 if (idx >= max_zones) {
4825 return KERN_INVALID_NAME;
4826 }
4827 zone = &zone_array[idx];
4828
4829 elemSize = (uint32_t) zone->elem_size;
4830 maxElems = ptoa(zone->page_count) / elemSize;
4831
4832 if ((zone->alloc_size % elemSize)
4833 && !leak_scan_debug_flag) {
4834 return KERN_INVALID_CAPABILITY;
4835 }
4836
4837 kr = kmem_alloc_kobject(kernel_map, (vm_offset_t *) &array,
4838 maxElems * sizeof(uintptr_t), VM_KERN_MEMORY_DIAG);
4839 if (KERN_SUCCESS != kr) {
4840 return kr;
4841 }
4842
4843 lock_zone(zone);
4844
4845 next = array;
4846 next = zone_copy_all_allocations_inqueue(zone, &zone->pages.any_free_foreign, next);
4847 next = zone_copy_all_allocations_inqueue(zone, &zone->pages.intermediate, next);
4848 next = zone_copy_all_allocations_inqueue(zone, &zone->pages.all_used, next);
4849 count = (uint32_t)(next - array);
4850
4851 unlock_zone(zone);
4852
4853 zone_leaks_scan(array, count, (uint32_t)zone->elem_size, &found);
4854 assert(found <= count);
4855
4856 for (idx = 0; idx < count; idx++) {
4857 element = array[idx];
4858 if (kInstanceFlagReferenced & element) {
4859 continue;
4860 }
4861 element = INSTANCE_PUT(element) & ~kInstanceFlags;
4862 }
4863
4864 if (zone->zlog_btlog && !corruption_debug_flag) {
4865 // btlog_copy_backtraces_for_elements will set kInstanceFlagReferenced on elements it found
4866 btlog_copy_backtraces_for_elements(zone->zlog_btlog, array, &count, elemSize, proc, refCon);
4867 }
4868
4869 for (nobtcount = idx = 0; idx < count; idx++) {
4870 element = array[idx];
4871 if (!element) {
4872 continue;
4873 }
4874 if (kInstanceFlagReferenced & element) {
4875 continue;
4876 }
4877 element = INSTANCE_PUT(element) & ~kInstanceFlags;
4878
4879 // see if we can find any backtrace left in the element
4880 btcount = (typeof(btcount))(zone->elem_size / sizeof(uintptr_t));
4881 if (btcount >= MAX_ZTRACE_DEPTH) {
4882 btcount = MAX_ZTRACE_DEPTH - 1;
4883 }
4884 for (btfound = btidx = 0; btidx < btcount; btidx++) {
4885 bt = ((uintptr_t *)element)[btcount - 1 - btidx];
4886 if (!VM_KERNEL_IS_SLID(bt)) {
4887 break;
4888 }
4889 zbt[btfound++] = bt;
4890 }
4891 if (btfound) {
4892 (*proc)(refCon, 1, elemSize, &zbt[0], btfound);
4893 } else {
4894 nobtcount++;
4895 }
4896 }
4897 if (nobtcount) {
4898 // fake backtrace when we found nothing
4899 zbt[0] = (uintptr_t) &zalloc;
4900 (*proc)(refCon, nobtcount, elemSize, &zbt[0], 1);
4901 }
4902
4903 kmem_free(kernel_map, (vm_offset_t) array, maxElems * sizeof(uintptr_t));
4904
4905 return KERN_SUCCESS;
4906 }
4907
4908 boolean_t
4909 kdp_is_in_zone(void *addr, const char *zone_name)
4910 {
4911 zone_t z;
4912 return zone_element_size(addr, &z) && !strcmp(z->zone_name, zone_name);
4913 }
4914
4915 boolean_t
4916 run_zone_test(void)
4917 {
4918 unsigned int i = 0, max_iter = 5;
4919 void * test_ptr;
4920 zone_t test_zone;
4921
4922 simple_lock(&zone_test_lock, &zone_locks_grp);
4923 if (!zone_test_running) {
4924 zone_test_running = TRUE;
4925 } else {
4926 simple_unlock(&zone_test_lock);
4927 printf("run_zone_test: Test already running.\n");
4928 return FALSE;
4929 }
4930 simple_unlock(&zone_test_lock);
4931
4932 printf("run_zone_test: Testing zinit(), zalloc(), zfree() and zdestroy() on zone \"test_zone_sysctl\"\n");
4933
4934 /* zinit() and zdestroy() a zone with the same name a bunch of times, verify that we get back the same zone each time */
4935 do {
4936 test_zone = zinit(sizeof(uint64_t), 100 * sizeof(uint64_t), sizeof(uint64_t), "test_zone_sysctl");
4937 if (test_zone == NULL) {
4938 printf("run_zone_test: zinit() failed\n");
4939 return FALSE;
4940 }
4941
4942 #if KASAN_ZALLOC
4943 if (test_zone_ptr == NULL && zone_free_count(test_zone) != 0) {
4944 #else
4945 if (zone_free_count(test_zone) != 0) {
4946 #endif
4947 printf("run_zone_test: free count is not zero\n");
4948 return FALSE;
4949 }
4950
4951 if (test_zone_ptr == NULL) {
4952 /* Stash the zone pointer returned on the fist zinit */
4953 printf("run_zone_test: zone created for the first time\n");
4954 test_zone_ptr = test_zone;
4955 } else if (test_zone != test_zone_ptr) {
4956 printf("run_zone_test: old zone pointer and new zone pointer don't match\n");
4957 return FALSE;
4958 }
4959
4960 test_ptr = zalloc(test_zone);
4961 if (test_ptr == NULL) {
4962 printf("run_zone_test: zalloc() failed\n");
4963 return FALSE;
4964 }
4965 zfree(test_zone, test_ptr);
4966
4967 zdestroy(test_zone);
4968 i++;
4969
4970 printf("run_zone_test: Iteration %d successful\n", i);
4971 } while (i < max_iter);
4972
4973 printf("run_zone_test: Test passed\n");
4974
4975 simple_lock(&zone_test_lock, &zone_locks_grp);
4976 zone_test_running = FALSE;
4977 simple_unlock(&zone_test_lock);
4978
4979 return TRUE;
4980 }
4981
4982 #endif /* DEBUG || DEVELOPMENT */