#include <kern/locks.h>
#include <kern/btlog.h>
#include <kern/simple_lock.h>
-#include <kern/zcache_internal.h>
#include <os/atomic_private.h>
+#include <sys/queue.h>
#if KASAN
-#include <sys/queue.h>
#include <san/kasan.h>
-/*
- * Set to 0 to debug poisoning and ZC_ZFREE_CLEARMEM validation under kasan.
- * Otherwise they are double-duty with what kasan already does.
- */
-#define ZALLOC_ENABLE_POISONING 0
-#else /* !KASAN */
-#define ZALLOC_ENABLE_POISONING 1
+#include <kern/spl.h>
#endif /* !KASAN */
-#if DEBUG || DEVELOPMENT
-#define ZALLOC_DETAILED_STATS 1
-#else
-#define ZALLOC_DETAILED_STATS 0
-#endif
-
/*!
* @file <kern/zalloc_internal.h>
*
struct zone_stats {
uint64_t zs_mem_allocated;
uint64_t zs_mem_freed;
-#if ZALLOC_DETAILED_STATS
- uint64_t zs_mem_wasted;
-#endif /* ZALLOC_DETAILED_STATS */
+ uint32_t zs_poison_seqno; /* counter for poisoning every N frees */
+ uint32_t zs_alloc_rr; /* allocation rr bias */
};
+STAILQ_HEAD(zone_depot, zone_magazine);
+
struct zone {
/*
* Readonly / rarely written fields
zone_stats_t z_stats;
const char *z_name;
struct zone_view *z_views;
-#ifdef CONFIG_ZCACHE
- struct zone_cache zcache;
-#endif /* CONFIG_ZCACHE */
- uint16_t alloc_pages; /* size used for more memory in pages */
- uint16_t z_elem_size; /* size of an element */
- uint16_t pcpu_elem_size;
- uint16_t prio_refill_count; /* if !=0 , refill to this count */
- uint32_t page_count_max; /* how large can this zone grow */
+ struct thread *z_expander;
+ struct zone_cache *__zpercpu z_pcpu_cache;
- uint32_t page_count_hwm; /* page_count high watermark */
- uint32_t page_count; /* number of pages used by this zone */
- uint32_t countavail; /* Number of elements available */
+ uint16_t z_chunk_pages; /* size used for more memory in pages */
+ uint16_t z_chunk_elems; /* count of allocations per chunk */
+ uint16_t z_elems_rsv; /* maintain a free reserve of elements */
+ uint16_t z_elem_size; /* size of an element */
uint64_t
/*
* Lifecycle state (Mutable after creation)
*/
- destroyed :1, /* zone is (being) destroyed */
- expanding_no_vm_priv:1, /* zone expanding via a non-vm_privileged thread */
- expanding_vm_priv :1, /* zone expanding via a vm_privileged thread */
- async_pending :1, /* asynchronous allocation pending? */
- waiting :1, /* is thread waiting for expansion? */
- zone_replenishing :1,
+ z_destroyed :1, /* zone is (being) destroyed */
+ z_async_refilling :1, /* asynchronous allocation pending? */
+ z_replenish_wait :1, /* someone is waiting on the replenish thread */
+ z_expanding_wait :1, /* is thread waiting for expansion? */
+ z_expander_vm_priv :1, /* a vm privileged thread is expanding */
/*
* Security sensitive configuration bits
*/
- allows_foreign :1, /* allow non-zalloc space */
- destructible :1, /* zone can be zdestroy()ed */
+ z_allows_foreign :1, /* allow non-zalloc space */
+ z_destructible :1, /* zone can be zdestroy()ed */
kalloc_heap :2, /* zone_kheap_id_t when part of a kalloc heap */
- noencrypt :1, /* do not encrypt pages when hibernating */
- submap_idx :2, /* a Z_SUBMAP_IDX_* value */
- va_sequester :1, /* page sequester: no VA reuse with other zones */
- zfree_clear_mem :1, /* clear memory of elements on free and assert on alloc */
+ z_noencrypt :1, /* do not encrypt pages when hibernating */
+ z_submap_idx :2, /* a Z_SUBMAP_IDX_* value */
+ z_va_sequester :1, /* page sequester: no VA reuse with other zones */
+ z_free_zeroes :1, /* clear memory of elements on free and assert on alloc */
/*
* Behavior configuration bits
*/
+ z_percpu :1, /* the zone is percpu */
+ z_permanent :1, /* the zone allocations are permanent */
+ z_replenishes :1, /* uses the async replenish mechanism for VM */
+ z_nocaching :1, /* disallow zone caching for this zone */
collectable :1, /* garbage collect empty pages */
- cpu_cache_enabled :1,
- permanent :1, /* the zone allocations are permanent */
exhaustible :1, /* merely return if empty? */
expandable :1, /* expand zone (with message)? */
no_callout :1,
- percpu :1, /* the zone is percpu */
_reserved :26,
* often mutated fields
*/
- decl_simple_lock_data(, lock);
+ lck_spin_t z_lock;
+ struct zone_depot z_recirc;
+
+ /*
+ * Page accounting (wired / VA)
+ *
+ * Those numbers are unscaled for z_percpu zones
+ * (zone_scale_for_percpu() needs to be used to find the true value).
+ */
+ uint32_t z_wired_max; /* how large can this zone grow */
+ uint32_t z_wired_hwm; /* z_wired_cur high watermark */
+ uint32_t z_wired_cur; /* number of pages used by this zone */
+ uint32_t z_wired_empty; /* pages collectable by GC */
+ uint32_t z_va_cur; /* amount of VA used by this zone */
/*
* list of metadata structs, which maintain per-page free element lists
* Note: Due to the index packing in page metadata,
* these pointers can't be at the beginning of the zone struct.
*/
- zone_pva_t pages_any_free_foreign; /* foreign pages crammed into zone */
- zone_pva_t pages_all_used_foreign;
- zone_pva_t pages_all_free;
- zone_pva_t pages_intermediate;
- zone_pva_t pages_all_used;
- zone_pva_t pages_sequester; /* sequestered pages - allocated VA with no populated pages */
-
- uint32_t zp_count; /* counter for poisoning every N frees */
- uint32_t countfree; /* Number of free elements */
- uint32_t allfree_page_count; /* Number of pages collectable by GC */
- uint32_t sequester_page_count;
+ zone_pva_t z_pageq_empty; /* populated, completely empty pages */
+ zone_pva_t z_pageq_partial;/* populated, partially filled pages */
+ zone_pva_t z_pageq_full; /* populated, completely full pages */
+ zone_pva_t z_pageq_va; /* non-populated VA pages */
+
+ /*
+ * Zone statistics
+ *
+ * z_contention_wma:
+ * weighted moving average of the number of contentions per second,
+ * in Z_CONTENTION_WMA_UNIT units (fixed point decimal).
+ *
+ * z_contention_cur:
+ * count of recorded contentions that will be fused in z_contention_wma
+ * at the next period.
+ *
+ * z_recirc_cur:
+ * number of magazines in the recirculation depot.
+ *
+ * z_elems_free:
+ * number of free elements in the zone.
+ *
+ * z_elems_{min,max}:
+ * tracks the low/high watermark of z_elems_free for the current
+ * weighted moving average period.
+ *
+ * z_elems_free_wss:
+ * weighted moving average of the (z_elems_free_max - z_elems_free_min)
+ * amplited which is used by the GC for trim operations.
+ *
+ * z_elems_avail:
+ * number of elements in the zone (at all).
+ */
+#define Z_CONTENTION_WMA_UNIT (1u << 8)
+ uint32_t z_contention_wma;
+ uint32_t z_contention_cur;
+ uint32_t z_recirc_cur;
+ uint32_t z_elems_free_max;
+ uint32_t z_elems_free_wss;
+ uint32_t z_elems_free_min;
+ uint32_t z_elems_free; /* Number of free elements */
+ uint32_t z_elems_avail; /* Number of elements available */
#if CONFIG_ZLEAKS
uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */
gzalloc_data_t gz;
#endif
#if KASAN_ZALLOC
- vm_size_t kasan_redzone;
+ uint32_t z_kasan_redzone;
+ spl_t z_kasan_spl;
#endif
#if DEBUG || DEVELOPMENT || CONFIG_ZLEAKS
/* zone logging structure to hold stacks and element references to those stacks. */
};
extern zone_security_options_t zsecurity_options;
-extern uint32_t _Atomic num_zones;
+extern zone_id_t _Atomic num_zones;
extern uint32_t zone_view_count;
extern struct zone zone_array[];
-extern lck_grp_t zone_locks_grp;
extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
+extern bool panic_include_zprint;
+#if CONFIG_ZLEAKS
+extern bool panic_include_ztrace;
+extern struct ztrace *top_ztrace;
+#endif
+extern mach_memory_info_t *panic_kext_memory_info;
+extern vm_size_t panic_kext_memory_size;
+extern unsigned int zone_map_jetsam_limit;
#define zone_index_foreach(i) \
- for (uint32_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
+ for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
i < num_zones_##i; i++)
+#define zone_foreach(z) \
+ for (zone_t z = &zone_array[1], \
+ last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
+ z < last_zone_##z; z++)
+
+struct zone_map_range {
+ vm_offset_t min_address;
+ vm_offset_t max_address;
+} __attribute__((aligned(2 * sizeof(vm_offset_t))));
+
__pure2
static inline vm_offset_t
zone_elem_size(zone_t zone)
static inline uint32_t
zone_count_allocated(zone_t zone)
{
- return zone->countavail - zone->countfree;
+ return zone->z_elems_avail - zone->z_elems_free;
+}
+
+static inline vm_size_t
+zone_scale_for_percpu(zone_t zone, vm_size_t size)
+{
+ if (zone->z_percpu) {
+ size *= zpercpu_count();
+ }
+ return size;
}
static inline vm_size_t
* this either require the zone lock,
* or to be used for statistics purposes only.
*/
- return ptoa(os_atomic_load(&zone->page_count, relaxed));
+ vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
+ return zone_scale_for_percpu(zone, size);
}
static inline vm_size_t
zone_size_free(zone_t zone)
{
- return (vm_size_t)zone->pcpu_elem_size * zone->countfree;
+ return zone_scale_for_percpu(zone,
+ (vm_size_t)zone->z_elem_size * zone->z_elems_free);
}
static inline vm_size_t
zone_size_allocated(zone_t zone)
{
- return (vm_size_t)zone->pcpu_elem_size * zone_count_allocated(zone);
+ return zone_scale_for_percpu(zone,
+ (vm_size_t)zone->z_elem_size * zone_count_allocated(zone));
}
static inline vm_size_t
zone_size_wasted(zone_t zone)
{
- return zone_size_wired(zone) -
- (vm_size_t)zone->pcpu_elem_size * zone->countavail;
+ return zone_size_wired(zone) - zone_scale_for_percpu(zone,
+ (vm_size_t)zone->z_elem_size * zone->z_elems_avail);
}
/*
*/
extern uint64_t get_zones_collectable_bytes(void);
-/*
- * zone_gc also checks if the zone maps are getting close to full and triggers
- * jetsams if needed, provided consider_jetsams is set to TRUE.
+/*!
+ * @enum zone_gc_level_t
+ *
+ * @const ZONE_GC_TRIM
+ * Request a trimming GC: it will trim allocations in excess
+ * of the working set size estimate only.
+ *
+ * @const ZONE_GC_DRAIN
+ * Request a draining GC: this is an aggressive mode that will
+ * cause all caches to be drained and all free pages returned to the system.
+ *
+ * @const ZONE_GC_JETSAM
+ * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
+ * @c ZONE_GC_DRAIN depending on the state of the zone map.
+ * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
+ * request a @c ZONE_GC_JETSAM level.
+ */
+__enum_closed_decl(zone_gc_level_t, uint32_t, {
+ ZONE_GC_TRIM,
+ ZONE_GC_DRAIN,
+ ZONE_GC_JETSAM,
+});
+
+/*!
+ * @function zone_gc
+ *
+ * @brief
+ * Reduces memory used by zones by trimming caches and freelists.
*
- * To avoid deadlocks, we only pass a value of TRUE from within the
- * vm_pageout_garbage_collect thread.
+ * @discussion
+ * @c zone_gc() is called:
+ * - by the pageout daemon when the system needs more free pages.
+ * - by the VM when contiguous page allocation requests get stuck
+ * (see vm_page_find_contiguous()).
+ *
+ * @param level The zone GC level requested.
+ */
+extern void zone_gc(zone_gc_level_t level);
+
+extern void zone_gc_trim(void);
+extern void zone_gc_drain(void);
+
+#define ZONE_WSS_UPDATE_PERIOD 10
+/*!
+ * @function compute_zone_working_set_size
+ *
+ * @brief
+ * Recomputes the working set size for every zone
+ *
+ * @discussion
+ * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
+ * computing an exponential moving average with a weight of 75%,
+ * so that the history of the last minute is the dominating factor.
*/
-extern void zone_gc(boolean_t consider_jetsams);
-extern void consider_zone_gc(boolean_t consider_jetsams);
+extern void compute_zone_working_set_size(void *);
/* Debug logging for zone-map-exhaustion jetsams. */
extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
/* Bootstrap zone module (create zone zone) */
extern void zone_bootstrap(void);
-/*
+/*!
+ * @function zone_foreign_mem_init
+ *
+ * @brief
* Steal memory from pmap (prior to initialization of zalloc)
* for the special vm zones that allow foreign memory and store
- * the range so as to facilitate range checking in zfree/zcram.
+ * the range so as to facilitate range checking in zfree.
*/
__startup_func
-extern vm_offset_t zone_foreign_mem_init(vm_size_t size);
+extern vm_offset_t zone_foreign_mem_init(
+ vm_size_t size);
-/*
- * Returns size (greater than min_pages) that is a multiple
- * of the allocation granule for the zone.
+/*!
+ * @function zone_get_foreign_alloc_size
+ *
+ * @brief
+ * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
+ * of the allocation granule for the zone with the given creation flags and
+ * element size.
*/
__startup_func
extern vm_size_t zone_get_foreign_alloc_size(
zone_create_flags_t flags,
uint16_t min_pages);
+/*!
+ * @function zone_cram_foreign
+ *
+ * @brief
+ * Cram memory allocated with @c zone_foreign_mem_init() into a zone.
+ *
+ * @param zone The zone to cram memory into.
+ * @param newmem The base address for the memory to cram.
+ * @param size The size of the memory to cram into the zone.
+ */
+__startup_func
+extern void zone_cram_foreign(
+ zone_t zone,
+ vm_offset_t newmem,
+ vm_size_t size);
+
extern bool zone_maps_owned(
vm_address_t addr,
vm_size_t size);
vm_map_size_t *pfree,
vm_map_size_t *plargest_free);
-extern boolean_t
-is_zone_map_nearing_exhaustion(void);
+extern bool
+zone_map_nearing_exhaustion(void);
#if defined(__LP64__)
#define ZONE_POISON 0xdeadbeefdeadbeef
#define ZONE_POISON 0xdeadbeef
#endif
-/*
- * Used by zalloc_direct_locked() and zcache to mark elements that have been
- * cleared or poisoned and need to be checked.
- */
-#define ZALLOC_ELEMENT_NEEDS_VALIDATION ((vm_offset_t)1)
-
static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)
{
extern void *zalloc_ext(
zone_t zone,
zone_stats_t zstats,
- zalloc_flags_t flags,
- vm_size_t wasted);
+ zalloc_flags_t flags);
extern void zfree_ext(
zone_t zone,
zone_stats_t zstats,
void *addr);
-/* free an element with no regard for gzalloc, zleaks, or kasan*/
-extern void zfree_direct_locked(
- zone_t zone,
- vm_offset_t elem,
- bool poison);
-
-/*
- * attempts to allocate an element with no regard for gzalloc, zleaks, or kasan
- * returns an address possibly tagged with ZALLOC_ELEMENT_NEEDS_VALIDATION.
+/*!
+ * @function zone_replenish_configure
+ *
+ * @brief
+ * Used by zones backing the VM to maintain a reserve of free elements.
+ *
+ * @discussion
+ * This function should not be used by anyone else than the VM.
*/
-extern vm_offset_t zalloc_direct_locked(
- zone_t zone,
- zalloc_flags_t flags,
- vm_size_t waste);
-
-extern uint32_t zone_poison_count_init(
- zone_t zone);
-
-extern bool zfree_clear_or_poison(
- zone_t zone,
- uint32_t *zp_count,
- vm_address_t addr);
-
-extern void zone_clear_freelist_pointers(
- zone_t zone,
- vm_offset_t addr);
-
-#if ZALLOC_ENABLE_POISONING
-extern void zalloc_validate_element(
- zone_t zone,
- vm_offset_t addr,
- vm_size_t size,
- bool validate);
-#endif
-
-extern void zone_allocated_element_validate(
- zone_t zone,
- vm_offset_t addr);
-
-extern void zone_prio_refill_configure(
+extern void zone_replenish_configure(
zone_t zone);
extern vm_size_t zone_element_size(
#endif /* VM_MAX_TAG_ZONES */
-#define lock_zone(zone) simple_lock(&(zone)->lock, &zone_locks_grp)
-#define unlock_zone(zone) simple_unlock(&(zone)->lock)
+static inline void
+zone_lock(zone_t zone)
+{
+#if KASAN_ZALLOC
+ spl_t s = 0;
+ if (zone->kasan_fakestacks) {
+ s = splsched();
+ }
+#endif /* KASAN_ZALLOC */
+ lck_spin_lock(&zone->z_lock);
+#if KASAN_ZALLOC
+ zone->z_kasan_spl = s;
+#endif /* KASAN_ZALLOC */
+}
+
+static inline void
+zone_unlock(zone_t zone)
+{
+#if KASAN_ZALLOC
+ spl_t s = zone->z_kasan_spl;
+ zone->z_kasan_spl = 0;
+#endif /* KASAN_ZALLOC */
+ lck_spin_unlock(&zone->z_lock);
+#if KASAN_ZALLOC
+ if (zone->kasan_fakestacks) {
+ splx(s);
+ }
+#endif /* KASAN_ZALLOC */
+}
#if CONFIG_GZALLOC
void gzalloc_init(vm_size_t);