]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/zalloc_internal.h
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc_internal.h
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/btlog.h>
65 #include <kern/simple_lock.h>
66
67 #include <os/atomic_private.h>
68 #include <sys/queue.h>
69
70 #if KASAN
71 #include <san/kasan.h>
72 #include <kern/spl.h>
73 #endif /* !KASAN */
74
75 /*!
76 * @file <kern/zalloc_internal.h>
77 *
78 * @abstract
79 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
80 * kalloc subsystems.
81 */
82
83 __BEGIN_DECLS
84
85 #pragma GCC visibility push(hidden)
86
87 #if CONFIG_GZALLOC
88 typedef struct gzalloc_data {
89 uint32_t gzfc_index;
90 vm_offset_t *gzfc;
91 } gzalloc_data_t;
92 #endif
93
94 /*
95 * A zone is a collection of fixed size blocks for which there
96 * is fast allocation/deallocation access. Kernel routines can
97 * use zones to manage data structures dynamically, creating a zone
98 * for each type of data structure to be managed.
99 *
100 */
101
102 /*!
103 * @typedef zone_pva_t
104 *
105 * @brief
106 * Type used to point to a page virtual address in the zone allocator.
107 *
108 * @description
109 * - Valid pages have the top bit set.
110 * - 0 represents the "NULL" page
111 * - non 0 values with the top bit cleared do not represent any valid page.
112 * the zone freelists use this space to encode "queue" addresses.
113 */
114 typedef struct zone_packed_virtual_address {
115 uint32_t packed_address;
116 } zone_pva_t;
117
118 /*!
119 * @struct zone_stats
120 *
121 * @abstract
122 * Per-cpu structure used for basic zone stats.
123 *
124 * @discussion
125 * The values aren't scaled for per-cpu zones.
126 */
127 struct zone_stats {
128 uint64_t zs_mem_allocated;
129 uint64_t zs_mem_freed;
130 uint32_t zs_poison_seqno; /* counter for poisoning every N frees */
131 uint32_t zs_alloc_rr; /* allocation rr bias */
132 };
133
134 STAILQ_HEAD(zone_depot, zone_magazine);
135
136 struct zone {
137 /*
138 * Readonly / rarely written fields
139 */
140
141 /*
142 * The first 4 fields match a zone_view.
143 *
144 * z_self points back to the zone when the zone is initialized,
145 * or is NULL else.
146 */
147 struct zone *z_self;
148 zone_stats_t z_stats;
149 const char *z_name;
150 struct zone_view *z_views;
151
152 struct thread *z_expander;
153 struct zone_cache *__zpercpu z_pcpu_cache;
154
155 uint16_t z_chunk_pages; /* size used for more memory in pages */
156 uint16_t z_chunk_elems; /* count of allocations per chunk */
157 uint16_t z_elems_rsv; /* maintain a free reserve of elements */
158 uint16_t z_elem_size; /* size of an element */
159
160 uint64_t
161 /*
162 * Lifecycle state (Mutable after creation)
163 */
164 z_destroyed :1, /* zone is (being) destroyed */
165 z_async_refilling :1, /* asynchronous allocation pending? */
166 z_replenish_wait :1, /* someone is waiting on the replenish thread */
167 z_expanding_wait :1, /* is thread waiting for expansion? */
168 z_expander_vm_priv :1, /* a vm privileged thread is expanding */
169
170 /*
171 * Security sensitive configuration bits
172 */
173 z_allows_foreign :1, /* allow non-zalloc space */
174 z_destructible :1, /* zone can be zdestroy()ed */
175 kalloc_heap :2, /* zone_kheap_id_t when part of a kalloc heap */
176 z_noencrypt :1, /* do not encrypt pages when hibernating */
177 z_submap_idx :2, /* a Z_SUBMAP_IDX_* value */
178 z_va_sequester :1, /* page sequester: no VA reuse with other zones */
179 z_free_zeroes :1, /* clear memory of elements on free and assert on alloc */
180
181 /*
182 * Behavior configuration bits
183 */
184 z_percpu :1, /* the zone is percpu */
185 z_permanent :1, /* the zone allocations are permanent */
186 z_replenishes :1, /* uses the async replenish mechanism for VM */
187 z_nocaching :1, /* disallow zone caching for this zone */
188 collectable :1, /* garbage collect empty pages */
189 exhaustible :1, /* merely return if empty? */
190 expandable :1, /* expand zone (with message)? */
191 no_callout :1,
192
193 _reserved :26,
194
195 /*
196 * Debugging features
197 */
198 alignment_required :1, /* element alignment needs to be preserved */
199 gzalloc_tracked :1, /* this zone is tracked by gzalloc */
200 gzalloc_exempt :1, /* this zone doesn't participate with gzalloc */
201 kasan_fakestacks :1,
202 kasan_noquarantine :1, /* whether to use the kasan quarantine */
203 tag_zone_index :7,
204 tags :1,
205 tags_inline :1,
206 zleak_on :1, /* Are we collecting allocation information? */
207 zone_logging :1; /* Enable zone logging for this zone. */
208
209 /*
210 * often mutated fields
211 */
212
213 lck_spin_t z_lock;
214 struct zone_depot z_recirc;
215
216 /*
217 * Page accounting (wired / VA)
218 *
219 * Those numbers are unscaled for z_percpu zones
220 * (zone_scale_for_percpu() needs to be used to find the true value).
221 */
222 uint32_t z_wired_max; /* how large can this zone grow */
223 uint32_t z_wired_hwm; /* z_wired_cur high watermark */
224 uint32_t z_wired_cur; /* number of pages used by this zone */
225 uint32_t z_wired_empty; /* pages collectable by GC */
226 uint32_t z_va_cur; /* amount of VA used by this zone */
227
228 /*
229 * list of metadata structs, which maintain per-page free element lists
230 *
231 * Note: Due to the index packing in page metadata,
232 * these pointers can't be at the beginning of the zone struct.
233 */
234 zone_pva_t z_pageq_empty; /* populated, completely empty pages */
235 zone_pva_t z_pageq_partial;/* populated, partially filled pages */
236 zone_pva_t z_pageq_full; /* populated, completely full pages */
237 zone_pva_t z_pageq_va; /* non-populated VA pages */
238
239 /*
240 * Zone statistics
241 *
242 * z_contention_wma:
243 * weighted moving average of the number of contentions per second,
244 * in Z_CONTENTION_WMA_UNIT units (fixed point decimal).
245 *
246 * z_contention_cur:
247 * count of recorded contentions that will be fused in z_contention_wma
248 * at the next period.
249 *
250 * z_recirc_cur:
251 * number of magazines in the recirculation depot.
252 *
253 * z_elems_free:
254 * number of free elements in the zone.
255 *
256 * z_elems_{min,max}:
257 * tracks the low/high watermark of z_elems_free for the current
258 * weighted moving average period.
259 *
260 * z_elems_free_wss:
261 * weighted moving average of the (z_elems_free_max - z_elems_free_min)
262 * amplited which is used by the GC for trim operations.
263 *
264 * z_elems_avail:
265 * number of elements in the zone (at all).
266 */
267 #define Z_CONTENTION_WMA_UNIT (1u << 8)
268 uint32_t z_contention_wma;
269 uint32_t z_contention_cur;
270 uint32_t z_recirc_cur;
271 uint32_t z_elems_free_max;
272 uint32_t z_elems_free_wss;
273 uint32_t z_elems_free_min;
274 uint32_t z_elems_free; /* Number of free elements */
275 uint32_t z_elems_avail; /* Number of elements available */
276
277 #if CONFIG_ZLEAKS
278 uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */
279 #endif
280 #if CONFIG_GZALLOC
281 gzalloc_data_t gz;
282 #endif
283 #if KASAN_ZALLOC
284 uint32_t z_kasan_redzone;
285 spl_t z_kasan_spl;
286 #endif
287 #if DEBUG || DEVELOPMENT || CONFIG_ZLEAKS
288 /* zone logging structure to hold stacks and element references to those stacks. */
289 btlog_t *zlog_btlog;
290 #endif
291 };
292
293
294 __options_decl(zone_security_options_t, uint64_t, {
295 /*
296 * Zsecurity option to enable sequestering VA of zones
297 */
298 ZSECURITY_OPTIONS_SEQUESTER = 0x00000001,
299 /*
300 * Zsecurity option to enable creating separate kalloc zones for
301 * bags of bytes
302 */
303 ZSECURITY_OPTIONS_SUBMAP_USER_DATA = 0x00000004,
304 /*
305 * Zsecurity option to enable sequestering of kalloc zones used by
306 * kexts (KHEAP_KEXT heap)
307 */
308 ZSECURITY_OPTIONS_SEQUESTER_KEXT_KALLOC = 0x00000008,
309 /*
310 * Zsecurity option to enable strict free of iokit objects to zone
311 * or heap they were allocated from.
312 */
313 ZSECURITY_OPTIONS_STRICT_IOKIT_FREE = 0x00000010,
314 });
315
316 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
317 #define KALLOC_DLUT_SIZE (2048 / KALLOC_MINALIGN)
318
319 struct kheap_zones {
320 struct kalloc_zone_cfg *cfg;
321 struct kalloc_heap *views;
322 zone_kheap_id_t heap_id;
323 uint16_t max_k_zone;
324 uint8_t dlut[KALLOC_DLUT_SIZE]; /* table of indices into k_zone[] */
325 uint8_t k_zindex_start;
326 /* If there's no hit in the DLUT, then start searching from k_zindex_start. */
327 zone_t *k_zone;
328 };
329
330 extern zone_security_options_t zsecurity_options;
331 extern zone_id_t _Atomic num_zones;
332 extern uint32_t zone_view_count;
333 extern struct zone zone_array[];
334 extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
335 extern bool panic_include_zprint;
336 #if CONFIG_ZLEAKS
337 extern bool panic_include_ztrace;
338 extern struct ztrace *top_ztrace;
339 #endif
340 extern mach_memory_info_t *panic_kext_memory_info;
341 extern vm_size_t panic_kext_memory_size;
342 extern unsigned int zone_map_jetsam_limit;
343
344 #define zone_index_foreach(i) \
345 for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
346 i < num_zones_##i; i++)
347
348 #define zone_foreach(z) \
349 for (zone_t z = &zone_array[1], \
350 last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
351 z < last_zone_##z; z++)
352
353 struct zone_map_range {
354 vm_offset_t min_address;
355 vm_offset_t max_address;
356 } __attribute__((aligned(2 * sizeof(vm_offset_t))));
357
358 __pure2
359 static inline vm_offset_t
360 zone_elem_size(zone_t zone)
361 {
362 return zone->z_elem_size;
363 }
364
365 static inline uint32_t
366 zone_count_allocated(zone_t zone)
367 {
368 return zone->z_elems_avail - zone->z_elems_free;
369 }
370
371 static inline vm_size_t
372 zone_scale_for_percpu(zone_t zone, vm_size_t size)
373 {
374 if (zone->z_percpu) {
375 size *= zpercpu_count();
376 }
377 return size;
378 }
379
380 static inline vm_size_t
381 zone_size_wired(zone_t zone)
382 {
383 /*
384 * this either require the zone lock,
385 * or to be used for statistics purposes only.
386 */
387 vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
388 return zone_scale_for_percpu(zone, size);
389 }
390
391 static inline vm_size_t
392 zone_size_free(zone_t zone)
393 {
394 return zone_scale_for_percpu(zone,
395 (vm_size_t)zone->z_elem_size * zone->z_elems_free);
396 }
397
398 static inline vm_size_t
399 zone_size_allocated(zone_t zone)
400 {
401 return zone_scale_for_percpu(zone,
402 (vm_size_t)zone->z_elem_size * zone_count_allocated(zone));
403 }
404
405 static inline vm_size_t
406 zone_size_wasted(zone_t zone)
407 {
408 return zone_size_wired(zone) - zone_scale_for_percpu(zone,
409 (vm_size_t)zone->z_elem_size * zone->z_elems_avail);
410 }
411
412 /*
413 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
414 * userspace reboot is needed. The only other way to query for this information
415 * is via mach_memory_info() which is unavailable on release kernels.
416 */
417 extern uint64_t get_zones_collectable_bytes(void);
418
419 /*!
420 * @enum zone_gc_level_t
421 *
422 * @const ZONE_GC_TRIM
423 * Request a trimming GC: it will trim allocations in excess
424 * of the working set size estimate only.
425 *
426 * @const ZONE_GC_DRAIN
427 * Request a draining GC: this is an aggressive mode that will
428 * cause all caches to be drained and all free pages returned to the system.
429 *
430 * @const ZONE_GC_JETSAM
431 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
432 * @c ZONE_GC_DRAIN depending on the state of the zone map.
433 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
434 * request a @c ZONE_GC_JETSAM level.
435 */
436 __enum_closed_decl(zone_gc_level_t, uint32_t, {
437 ZONE_GC_TRIM,
438 ZONE_GC_DRAIN,
439 ZONE_GC_JETSAM,
440 });
441
442 /*!
443 * @function zone_gc
444 *
445 * @brief
446 * Reduces memory used by zones by trimming caches and freelists.
447 *
448 * @discussion
449 * @c zone_gc() is called:
450 * - by the pageout daemon when the system needs more free pages.
451 * - by the VM when contiguous page allocation requests get stuck
452 * (see vm_page_find_contiguous()).
453 *
454 * @param level The zone GC level requested.
455 */
456 extern void zone_gc(zone_gc_level_t level);
457
458 extern void zone_gc_trim(void);
459 extern void zone_gc_drain(void);
460
461 #define ZONE_WSS_UPDATE_PERIOD 10
462 /*!
463 * @function compute_zone_working_set_size
464 *
465 * @brief
466 * Recomputes the working set size for every zone
467 *
468 * @discussion
469 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
470 * computing an exponential moving average with a weight of 75%,
471 * so that the history of the last minute is the dominating factor.
472 */
473 extern void compute_zone_working_set_size(void *);
474
475 /* Debug logging for zone-map-exhaustion jetsams. */
476 extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
477 extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
478
479 /* Bootstrap zone module (create zone zone) */
480 extern void zone_bootstrap(void);
481
482 /*!
483 * @function zone_foreign_mem_init
484 *
485 * @brief
486 * Steal memory from pmap (prior to initialization of zalloc)
487 * for the special vm zones that allow foreign memory and store
488 * the range so as to facilitate range checking in zfree.
489 */
490 __startup_func
491 extern vm_offset_t zone_foreign_mem_init(
492 vm_size_t size);
493
494 /*!
495 * @function zone_get_foreign_alloc_size
496 *
497 * @brief
498 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
499 * of the allocation granule for the zone with the given creation flags and
500 * element size.
501 */
502 __startup_func
503 extern vm_size_t zone_get_foreign_alloc_size(
504 const char *name __unused,
505 vm_size_t elem_size,
506 zone_create_flags_t flags,
507 uint16_t min_pages);
508
509 /*!
510 * @function zone_cram_foreign
511 *
512 * @brief
513 * Cram memory allocated with @c zone_foreign_mem_init() into a zone.
514 *
515 * @param zone The zone to cram memory into.
516 * @param newmem The base address for the memory to cram.
517 * @param size The size of the memory to cram into the zone.
518 */
519 __startup_func
520 extern void zone_cram_foreign(
521 zone_t zone,
522 vm_offset_t newmem,
523 vm_size_t size);
524
525 extern bool zone_maps_owned(
526 vm_address_t addr,
527 vm_size_t size);
528
529 extern void zone_map_sizes(
530 vm_map_size_t *psize,
531 vm_map_size_t *pfree,
532 vm_map_size_t *plargest_free);
533
534 extern bool
535 zone_map_nearing_exhaustion(void);
536
537 #if defined(__LP64__)
538 #define ZONE_POISON 0xdeadbeefdeadbeef
539 #else
540 #define ZONE_POISON 0xdeadbeef
541 #endif
542
543 static inline vm_tag_t
544 zalloc_flags_get_tag(zalloc_flags_t flags)
545 {
546 return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
547 }
548
549 extern void *zalloc_ext(
550 zone_t zone,
551 zone_stats_t zstats,
552 zalloc_flags_t flags);
553
554 extern void zfree_ext(
555 zone_t zone,
556 zone_stats_t zstats,
557 void *addr);
558
559 /*!
560 * @function zone_replenish_configure
561 *
562 * @brief
563 * Used by zones backing the VM to maintain a reserve of free elements.
564 *
565 * @discussion
566 * This function should not be used by anyone else than the VM.
567 */
568 extern void zone_replenish_configure(
569 zone_t zone);
570
571 extern vm_size_t zone_element_size(
572 void *addr,
573 zone_t *z);
574
575 /*!
576 * @function zone_owns
577 *
578 * @abstract
579 * This function is a soft version of zone_require that checks if a given
580 * pointer belongs to the specified zone and should not be used outside
581 * allocator code.
582 *
583 * @discussion
584 * Note that zone_owns() can only work with:
585 * - zones not allowing foreign memory
586 * - zones in the general submap.
587 *
588 * @param zone the zone the address needs to belong to.
589 * @param addr the element address to check.
590 */
591 extern bool zone_owns(
592 zone_t zone,
593 void *addr);
594
595 /*
596 * Structure for keeping track of a backtrace, used for leak detection.
597 * This is in the .h file because it is used during panic, see kern/debug.c
598 * A non-zero size indicates that the trace is in use.
599 */
600 struct ztrace {
601 vm_size_t zt_size; /* How much memory are all the allocations referring to this trace taking up? */
602 uint32_t zt_depth; /* depth of stack (0 to MAX_ZTRACE_DEPTH) */
603 void* zt_stack[MAX_ZTRACE_DEPTH]; /* series of return addresses from OSBacktrace */
604 uint32_t zt_collisions; /* How many times did a different stack land here while it was occupied? */
605 uint32_t zt_hit_count; /* for determining effectiveness of hash function */
606 };
607
608 #ifndef VM_MAX_TAG_ZONES
609 #error MAX_TAG_ZONES
610 #endif
611 #if VM_MAX_TAG_ZONES
612
613 extern uint32_t zone_index_from_tag_index(
614 uint32_t tag_zone_index,
615 vm_size_t *elem_size);
616
617 #endif /* VM_MAX_TAG_ZONES */
618
619 static inline void
620 zone_lock(zone_t zone)
621 {
622 #if KASAN_ZALLOC
623 spl_t s = 0;
624 if (zone->kasan_fakestacks) {
625 s = splsched();
626 }
627 #endif /* KASAN_ZALLOC */
628 lck_spin_lock(&zone->z_lock);
629 #if KASAN_ZALLOC
630 zone->z_kasan_spl = s;
631 #endif /* KASAN_ZALLOC */
632 }
633
634 static inline void
635 zone_unlock(zone_t zone)
636 {
637 #if KASAN_ZALLOC
638 spl_t s = zone->z_kasan_spl;
639 zone->z_kasan_spl = 0;
640 #endif /* KASAN_ZALLOC */
641 lck_spin_unlock(&zone->z_lock);
642 #if KASAN_ZALLOC
643 if (zone->kasan_fakestacks) {
644 splx(s);
645 }
646 #endif /* KASAN_ZALLOC */
647 }
648
649 #if CONFIG_GZALLOC
650 void gzalloc_init(vm_size_t);
651 void gzalloc_zone_init(zone_t);
652 void gzalloc_empty_free_cache(zone_t);
653 boolean_t gzalloc_enabled(void);
654
655 vm_offset_t gzalloc_alloc(zone_t, zone_stats_t zstats, zalloc_flags_t flags);
656 void gzalloc_free(zone_t, zone_stats_t zstats, void *);
657 boolean_t gzalloc_element_size(void *, zone_t *, vm_size_t *);
658 #endif /* CONFIG_GZALLOC */
659
660 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
661 int track_this_zone(const char *zonename, const char *logname);
662
663 #if DEBUG || DEVELOPMENT
664 extern boolean_t run_zone_test(void);
665 extern void zone_gc_replenish_test(void);
666 extern void zone_alloc_replenish_test(void);
667 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
668 extern bool zalloc_disable_copyio_check;
669 #else
670 #define zalloc_disable_copyio_check false
671 #endif /* DEBUG || DEVELOPMENT */
672
673 #pragma GCC visibility pop
674
675 __END_DECLS
676
677 #endif /* _KERN_ZALLOC_INTERNAL_H_ */