X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/008676633c2ad2c325837c2b64915f7ded690a8f..cc8bc92ae4a8e9f1a1ab61bf83d34ad8150b3405:/osfmk/kern/zalloc.c

diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c
index be40d8260..699dc3c74 100644
--- a/osfmk/kern/zalloc.c
+++ b/osfmk/kern/zalloc.c
@@ -73,6 +73,7 @@
 #include <mach_debug/zone_info.h>
 #include <mach/vm_map.h>
 
+#include <kern/bits.h>
 #include <kern/kern_types.h>
 #include <kern/assert.h>
 #include <kern/backtrace.h>
@@ -100,6 +101,8 @@
 #include <libkern/OSAtomic.h>
 #include <sys/kdebug.h>
 
+#include <san/kasan.h>
+
 /*
  *  ZONE_ALIAS_ADDR (deprecated)
  */
@@ -208,6 +211,9 @@ vm_size_t       zp_tiny_zone_limit      = 0;
 uintptr_t       zp_poisoned_cookie      = 0;
 uintptr_t       zp_nopoison_cookie      = 0;
 
+#if VM_MAX_TAG_ZONES
+boolean_t       zone_tagging_on;
+#endif /* VM_MAX_TAG_ZONES */
 
 /*
  * initialize zone poisoning
@@ -315,7 +321,7 @@ zp_init(void)
 /*
  * These macros are used to keep track of the number
  * of pages being used by the zone currently. The
- * z->page_count is protected by the zone lock.
+ * z->page_count is not protected by the zone lock.
  */
 #define ZONE_PAGE_COUNT_INCR(z, count)		\
 {						\
@@ -356,15 +362,28 @@ struct zone_free_element {
 };
 
 /*
- *      Protects num_zones and zone_array
+ *      Protects zone_array, num_zones, num_zones_in_use, and zone_empty_bitmap
  */
 decl_simple_lock_data(, all_zones_lock)
+unsigned int            num_zones_in_use;
 unsigned int            num_zones;
 
-#define MAX_ZONES       256
+#define MAX_ZONES       288
 struct zone             zone_array[MAX_ZONES];
 
-#define MULTIPAGE_METADATA_MAGIC 		(0xff)
+/* Used to keep track of empty slots in the zone_array */
+bitmap_t zone_empty_bitmap[BITMAP_LEN(MAX_ZONES)];
+
+#if DEBUG || DEVELOPMENT
+/*
+ * Used for sysctl kern.run_zone_test which is not thread-safe. Ensure only one thread goes through at a time.
+ * Or we can end up with multiple test zones (if a second zinit() comes through before zdestroy()),  which could lead us to
+ * run out of zones.
+ */
+decl_simple_lock_data(, zone_test_lock)
+static boolean_t zone_test_running = FALSE;
+static zone_t test_zone_ptr = NULL;
+#endif /* DEBUG || DEVELOPMENT */
 
 #define PAGE_METADATA_GET_ZINDEX(page_meta) 			\
 	(page_meta->zindex)
@@ -397,12 +416,10 @@ struct zone_page_metadata {
 	/* 
 	 * For the first page in the allocation chunk, this represents the total number of free elements in 
 	 * the chunk. 
-	 * For all other pages, it represents the number of free elements on that page (used 
-	 * for garbage collection of zones with large multipage allocation size)
 	 */
 	uint16_t			free_count;
-	uint8_t 			zindex;		/* Zone index within the zone_array */
-	uint8_t 			page_count; /* Count of pages within the allocation chunk */
+	unsigned 			zindex     : ZINDEX_BITS;    /* Zone index within the zone_array */
+	unsigned 			page_count : PAGECOUNT_BITS; /* Count of pages within the allocation chunk */
 };
 
 /* Macro to get page index (within zone_map) of page containing element */
@@ -428,6 +445,9 @@ struct zone_page_metadata {
 /* Magic value to indicate empty element free list */
 #define PAGE_METADATA_EMPTY_FREELIST 		((uint32_t)(~0))
 
+boolean_t is_zone_map_nearing_exhaustion(void);
+extern void vm_pageout_garbage_collect(int collect);
+
 static inline void *
 page_metadata_get_freelist(struct zone_page_metadata *page_meta)
 {
@@ -498,11 +518,14 @@ zone_populate_metadata_page(struct zone_page_metadata *page_meta)
 		/* All updates to the zone_metadata_region are done under the zone_metadata_region_lck */
 		lck_mtx_lock(&zone_metadata_region_lck);
 		if (0 == pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) {
-			kernel_memory_populate(zone_map, 
+			kern_return_t __unused ret = kernel_memory_populate(zone_map,
 				       page_metadata_begin,
 				       PAGE_SIZE,
 				       KMA_KOBJECT,
 				       VM_KERN_MEMORY_OSFMK);
+
+			/* should not fail with the given arguments */
+			assert(ret == KERN_SUCCESS);
 		}
 		lck_mtx_unlock(&zone_metadata_region_lck);
 	}
@@ -535,7 +558,7 @@ get_zone_page_metadata(struct zone_free_element *element, boolean_t init)
 		page_meta = (struct zone_page_metadata *)(trunc_page((vm_offset_t)element));
 	}
 	if (init)
-		bzero((char *)page_meta, sizeof(struct zone_page_metadata));
+		__nosan_bzero((char *)page_meta, sizeof(struct zone_page_metadata));
 	return ((PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC) ? page_meta : page_metadata_get_realmeta(page_meta));
 }
 
@@ -549,6 +572,424 @@ get_zone_page(struct zone_page_metadata *page_meta)
 		return (vm_offset_t)(trunc_page(page_meta));
 }
 
+/*
+ * ZTAGS
+ */
+
+#if VM_MAX_TAG_ZONES
+
+// for zones with tagging enabled:
+
+// calculate a pointer to the tag base entry,
+// holding either a uint32_t the first tag offset for a page in the zone map,
+// or two uint16_t tags if the page can only hold one or two elements
+
+#define ZTAGBASE(zone, element) \
+    (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_map_min_address)])
+
+// pointer to the tag for an element
+#define ZTAG(zone, element)                                     \
+    ({                                                          \
+        vm_tag_t * result;                                      \
+        if ((zone)->tags_inline) {                              \
+            result = (vm_tag_t *) ZTAGBASE((zone), (element));  \
+            if ((page_mask & element) >= (zone)->elem_size) result++;    \
+        } else {                                                \
+            result =  &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / (zone)->elem_size];   \
+        }                                                       \
+        result;                                                 \
+    })
+
+
+static vm_offset_t  zone_tagbase_min;
+static vm_offset_t  zone_tagbase_max;
+static vm_offset_t  zone_tagbase_map_size;
+static vm_map_t     zone_tagbase_map;
+
+static vm_offset_t  zone_tags_min;
+static vm_offset_t  zone_tags_max;
+static vm_offset_t  zone_tags_map_size;
+static vm_map_t     zone_tags_map;
+
+// simple heap allocator for allocating the tags for new memory
+
+decl_lck_mtx_data(,ztLock)    /* heap lock */
+enum
+{
+    ztFreeIndexCount = 8,
+    ztFreeIndexMax   = (ztFreeIndexCount - 1),
+    ztTagsPerBlock   = 4
+};
+
+struct ztBlock
+{
+#if __LITTLE_ENDIAN__
+    uint64_t free:1,
+             next:21,
+             prev:21,
+             size:21;
+#else
+// ztBlock needs free bit least significant
+#error !__LITTLE_ENDIAN__
+#endif
+};
+typedef struct ztBlock ztBlock;
+
+static ztBlock * ztBlocks;
+static uint32_t  ztBlocksCount;
+static uint32_t  ztBlocksFree;
+
+static uint32_t
+ztLog2up(uint32_t size)
+{
+    if (1 == size) size = 0;
+    else size = 32 - __builtin_clz(size - 1);
+    return (size);
+}
+
+static uint32_t
+ztLog2down(uint32_t size)
+{
+    size = 31 - __builtin_clz(size);
+    return (size);
+}
+
+static void
+ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
+{
+    vm_map_offset_t addr = (vm_map_offset_t) address;
+    vm_map_offset_t page, end;
+
+    page = trunc_page(addr);
+    end  = round_page(addr + size);
+
+    for (; page < end; page += page_size)
+    {
+        if (!pmap_find_phys(kernel_pmap, page))
+        {
+            kern_return_t __unused
+            ret = kernel_memory_populate(map, page, PAGE_SIZE,
+                                         KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG);
+            assert(ret == KERN_SUCCESS);
+        }
+    }
+}
+
+static boolean_t
+ztPresent(const void * address, size_t size)
+{
+    vm_map_offset_t addr = (vm_map_offset_t) address;
+    vm_map_offset_t page, end;
+    boolean_t       result;
+
+    page = trunc_page(addr);
+    end  = round_page(addr + size);
+    for (result = TRUE; (page < end); page += page_size)
+    {
+        result = pmap_find_phys(kernel_pmap, page);
+        if (!result) break;
+    }
+    return (result);
+}
+
+
+void __unused
+ztDump(boolean_t sanity);
+void __unused
+ztDump(boolean_t sanity)
+{
+    uint32_t q, cq, p;
+
+    for (q = 0; q <= ztFreeIndexMax; q++)
+    {
+        p = q;
+        do
+        {
+            if (sanity)
+            {
+                cq = ztLog2down(ztBlocks[p].size);
+                if (cq > ztFreeIndexMax) cq = ztFreeIndexMax;
+                if (!ztBlocks[p].free
+                    || ((p != q) && (q != cq))
+                    || (ztBlocks[ztBlocks[p].next].prev != p)
+                    || (ztBlocks[ztBlocks[p].prev].next != p))
+                {
+                    kprintf("zterror at %d", p);
+                    ztDump(FALSE);
+                    kprintf("zterror at %d", p);
+                    assert(FALSE);
+                }
+                continue;
+            }
+            kprintf("zt[%03d]%c %d, %d, %d\n",
+                    p, ztBlocks[p].free ? 'F' : 'A',
+                    ztBlocks[p].next, ztBlocks[p].prev,
+                    ztBlocks[p].size);
+            p = ztBlocks[p].next;
+            if (p == q) break;
+        }
+        while (p != q);
+        if (!sanity) printf("\n");
+    }
+    if (!sanity) printf("-----------------------\n");
+}
+
+
+
+#define ZTBDEQ(idx)                                                 \
+    ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next;     \
+    ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
+
+static void
+ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
+{
+    uint32_t q, w, p, size, merge;
+
+    assert(count);
+    ztBlocksFree += count;
+
+    // merge with preceding
+    merge = (index + count);
+    if ((merge < ztBlocksCount)
+        && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+        && ztBlocks[merge].free)
+    {
+        ZTBDEQ(merge);
+        count += ztBlocks[merge].size;
+    }
+
+    // merge with following
+    merge = (index - 1);
+    if ((merge > ztFreeIndexMax)
+        && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+        && ztBlocks[merge].free)
+    {
+        size = ztBlocks[merge].size;
+        count += size;
+        index -= size;
+        ZTBDEQ(index);
+    }
+
+    q = ztLog2down(count);
+    if (q > ztFreeIndexMax) q = ztFreeIndexMax;
+    w = q;
+    // queue in order of size
+    while (TRUE)
+    {
+        p = ztBlocks[w].next;
+        if (p == q) break;
+        if (ztBlocks[p].size >= count) break;
+        w = p;
+    }
+    ztBlocks[p].prev = index;
+    ztBlocks[w].next = index;
+
+    // fault in first
+    ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+
+    // mark first & last with free flag and size
+    ztBlocks[index].free = TRUE;
+    ztBlocks[index].size = count;
+    ztBlocks[index].prev = w;
+    ztBlocks[index].next = p;
+    if (count > 1)
+    {
+        index += (count - 1);
+        // fault in last
+        ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+        ztBlocks[index].free = TRUE;
+        ztBlocks[index].size = count;
+    }
+}
+
+static uint32_t
+ztAlloc(zone_t zone, uint32_t count)
+{
+    uint32_t q, w, p, leftover;
+
+    assert(count);
+
+    q = ztLog2up(count);
+    if (q > ztFreeIndexMax) q = ztFreeIndexMax;
+    do
+    {
+        w = q;
+        while (TRUE)
+        {
+            p = ztBlocks[w].next;
+            if (p == q) break;
+            if (ztBlocks[p].size >= count)
+            {
+                // dequeue, mark both ends allocated
+                ztBlocks[w].next = ztBlocks[p].next;
+                ztBlocks[ztBlocks[p].next].prev = w;
+                ztBlocks[p].free = FALSE;
+                ztBlocksFree -= ztBlocks[p].size;
+                if (ztBlocks[p].size > 1) ztBlocks[p + ztBlocks[p].size - 1].free = FALSE;
+
+                // fault all the allocation
+                ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0);
+                // mark last as allocated
+                if (count > 1) ztBlocks[p + count - 1].free = FALSE;
+                // free remainder
+                leftover = ztBlocks[p].size - count;
+                if (leftover) ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
+
+                return (p);
+            }
+            w = p;
+        }
+        q++;
+    }
+    while (q <= ztFreeIndexMax);
+
+    return (-1U);
+}
+
+static void
+ztInit(vm_size_t max_zonemap_size, lck_grp_t * group)
+{
+    kern_return_t         ret;
+    vm_map_kernel_flags_t vmk_flags;
+    uint32_t              idx;
+
+    lck_mtx_init(&ztLock, group, LCK_ATTR_NULL);
+
+    // allocate submaps VM_KERN_MEMORY_DIAG
+
+    zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
+    vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+    vmk_flags.vmkf_permanent = TRUE;
+    ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
+                   FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+                   &zone_tagbase_map);
+
+    if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed");
+    zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
+
+    zone_tags_map_size = 2048*1024 * sizeof(vm_tag_t);
+    vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+    vmk_flags.vmkf_permanent = TRUE;
+    ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
+                   FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+                   &zone_tags_map);
+
+    if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed");
+    zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
+
+    ztBlocks = (ztBlock *) zone_tags_min;
+    ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
+
+    // initialize the qheads
+    lck_mtx_lock(&ztLock);
+
+    ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0);
+    for (idx = 0; idx < ztFreeIndexCount; idx++)
+    {
+        ztBlocks[idx].free = TRUE;
+        ztBlocks[idx].next = idx;
+        ztBlocks[idx].prev = idx;
+        ztBlocks[idx].size = 0;
+    }
+    // free remaining space
+    ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
+
+    lck_mtx_unlock(&ztLock);
+}
+
+static void
+ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+    uint32_t * tagbase;
+    uint32_t   count, block, blocks, idx;
+    size_t     pages;
+
+    pages = atop(size);
+    tagbase = ZTAGBASE(zone, mem);
+
+    lck_mtx_lock(&ztLock);
+
+    // fault tagbase
+    ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0);
+
+    if (!zone->tags_inline)
+    {
+        // allocate tags
+        count = (uint32_t)(size / zone->elem_size);
+        blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+        block = ztAlloc(zone, blocks);
+        if (-1U == block) ztDump(false);
+        assert(-1U != block);
+    }
+
+    lck_mtx_unlock(&ztLock);
+
+    if (!zone->tags_inline)
+    {
+        // set tag base for each page
+        block *= ztTagsPerBlock;
+        for (idx = 0; idx < pages; idx++)
+        {
+            tagbase[idx] = block + (uint32_t)((ptoa(idx) + (zone->elem_size - 1)) / zone->elem_size);
+        }
+    }
+}
+
+static void
+ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+    uint32_t * tagbase;
+    uint32_t   count, block, blocks, idx;
+    size_t     pages;
+
+    // set tag base for each page
+    pages = atop(size);
+    tagbase = ZTAGBASE(zone, mem);
+    block = tagbase[0];
+    for (idx = 0; idx < pages; idx++)
+    {
+        tagbase[idx] = 0xFFFFFFFF;
+    }
+
+    lck_mtx_lock(&ztLock);
+    if (!zone->tags_inline)
+    {
+        count = (uint32_t)(size / zone->elem_size);
+        blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+        assert(block != 0xFFFFFFFF);
+        block /= ztTagsPerBlock;
+        ztFree(NULL /* zone is unlocked */, block, blocks);
+    }
+
+    lck_mtx_unlock(&ztLock);
+}
+
+uint32_t
+zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
+{
+    zone_t z;
+    uint32_t idx;
+
+	simple_lock(&all_zones_lock);
+
+    for (idx = 0; idx < num_zones; idx++)
+    {
+		z = &(zone_array[idx]);
+		if (!z->tags) continue;
+	    if (tag_zone_index != z->tag_zone_index) continue;
+	    *elem_size = z->elem_size;
+	    break;
+    }
+
+    simple_unlock(&all_zones_lock);
+
+    if (idx == num_zones) idx = -1U;
+
+    return (idx);
+}
+
+#endif /* VM_MAX_TAG_ZONES */
+
 /* Routine to get the size of a zone allocated address. If the address doesnt belong to the 
  * zone_map, returns 0.
  */
@@ -575,6 +1016,35 @@ zone_element_size(void *addr, zone_t *z)
 	}
 }
 
+#if DEBUG || DEVELOPMENT
+
+vm_size_t
+zone_element_info(void *addr, vm_tag_t * ptag)
+{
+	vm_size_t     size = 0;
+	vm_tag_t      tag = VM_KERN_MEMORY_NONE;
+	struct zone * src_zone;
+
+	if (from_zone_map(addr, sizeof(void *))) {
+		struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
+		src_zone = PAGE_METADATA_GET_ZONE(page_meta);
+#if VM_MAX_TAG_ZONES
+	    if (__improbable(src_zone->tags)) {
+			tag = (ZTAG(src_zone, (vm_offset_t) addr)[0] >> 1);
+	    }
+#endif /* VM_MAX_TAG_ZONES */
+		size = src_zone->elem_size;
+	} else {
+#if CONFIG_GZALLOC
+		gzalloc_element_size(addr, NULL, &size);
+#endif /* CONFIG_GZALLOC */
+	}
+	*ptag = tag;
+	return size;
+}
+
+#endif /* DEBUG || DEVELOPMENT */
+
 /*
  * Zone checking helper function.
  * A pointer that satisfies these conditions is OK to be a freelist next pointer
@@ -693,7 +1163,7 @@ backup_ptr_mismatch_panic(zone_t        zone,
 	/* The backup is definitely the corrupted one */
 	if (sane_primary && !sane_backup)
 		zone_element_was_modified_panic(zone, element, backup,
-		                                (primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
+		                                (likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
 		                                zone->elem_size - sizeof(vm_offset_t));
 
 	/*
@@ -703,10 +1173,10 @@ backup_ptr_mismatch_panic(zone_t        zone,
 	 * primary pointer has been overwritten with a sane but incorrect address.
 	 */
 	if (sane_primary && sane_backup)
-		zone_element_was_modified_panic(zone, element, primary, likely_backup, 0);
+		zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
 
 	/* Neither are sane, so just guess. */
-	zone_element_was_modified_panic(zone, element, primary, likely_backup, 0);
+	zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
 }
 
 /*
@@ -772,6 +1242,10 @@ free_to_zone(zone_t      zone,
 	}
 	zone->count--;
 	zone->countfree++;
+
+#if KASAN_ZALLOC
+	kasan_poison_range(element, zone->elem_size, ASAN_HEAP_FREED);
+#endif
 }
 
 
@@ -782,6 +1256,7 @@ free_to_zone(zone_t      zone,
  */
 static inline vm_offset_t
 try_alloc_from_zone(zone_t zone,
+	                vm_tag_t tag __unused,
                     boolean_t* check_poison)
 {
 	vm_offset_t  element;
@@ -875,6 +1350,18 @@ try_alloc_from_zone(zone_t zone,
 	zone->count++;
 	zone->sum_count++;
 
+#if VM_MAX_TAG_ZONES
+    if (__improbable(zone->tags)) {
+		// set the tag with b0 clear so the block remains inuse
+		ZTAG(zone, element)[0] = (tag << 1);
+    }
+#endif /* VM_MAX_TAG_ZONES */
+
+
+#if KASAN_ZALLOC
+	kasan_poison_range(element, zone->elem_size, ASAN_VALID);
+#endif
+
 	return element;
 }
 
@@ -887,8 +1374,6 @@ try_alloc_from_zone(zone_t zone,
  */
 #define ZINFO_SLOTS 	MAX_ZONES		/* for now */
 
-void		zone_display_zprint(void);
-
 zone_t		zone_find_largest(void);
 
 /* 
@@ -917,7 +1402,7 @@ static thread_call_data_t call_async_alloc;
 
 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
 #define zone_sleep(zone)				\
-	(void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT);
+	(void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN_ALWAYS, (event_t)(zone), THREAD_UNINT);
 
 /*
  *	The zone_locks_grp allows for collecting lock statistics.
@@ -950,7 +1435,7 @@ lck_mtx_ext_t   zone_gc_lck_ext;
 boolean_t zone_gc_allowed = TRUE;
 boolean_t panic_include_zprint = FALSE;
 
-vm_offset_t panic_kext_memory_info = 0;
+mach_memory_info_t *panic_kext_memory_info = NULL;
 vm_size_t panic_kext_memory_size = 0;
 
 #define ZALLOC_DEBUG_ZONEGC		0x00000001
@@ -990,13 +1475,11 @@ uint32_t zalloc_debug = 0;
 static boolean_t log_records_init = FALSE;
 static int log_records;	/* size of the log, expressed in number of records */
 
-#define MAX_NUM_ZONES_ALLOWED_LOGGING	5 /* Maximum 5 zones can be logged at once */
+#define MAX_NUM_ZONES_ALLOWED_LOGGING	10 /* Maximum 10 zones can be logged at once */
 
 static int  max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
 static int  num_zones_logged = 0;
 
-#define MAX_ZONE_NAME	32	/* max length of a zone name we can take from the boot-args */
-
 static char zone_name_to_log[MAX_ZONE_NAME] = "";	/* the zone name we're logging, if any */
 
 /* Log allocations and frees to help debug a zone element corruption */
@@ -1044,8 +1527,8 @@ boolean_t       leak_scan_debug_flag     = FALSE;    /* enabled by "-zl" boot-ar
  * match a space in the zone name.
  */
 
-static int
-log_this_zone(const char *zonename, const char *logname) 
+int
+track_this_zone(const char *zonename, const char *logname)
 {
 	int len;
 	const char *zc = zonename;
@@ -1181,6 +1664,15 @@ zleak_init(vm_size_t max_zonemap_size)
 	zleak_global_tracking_threshold = max_zonemap_size / 2;	
 	zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
 
+#if CONFIG_EMBEDDED
+	if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
+		zleak_enable_flag = TRUE;
+		printf("zone leak detection enabled\n");
+	} else {
+		zleak_enable_flag = FALSE;
+		printf("zone leak detection disabled\n");
+	}
+#else /* CONFIG_EMBEDDED */
 	/* -zleakoff (flag to disable zone leak monitor) */
 	if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
 		zleak_enable_flag = FALSE;
@@ -1189,6 +1681,7 @@ zleak_init(vm_size_t max_zonemap_size)
 		zleak_enable_flag = TRUE;
 		printf("zone leak detection enabled\n");
 	}
+#endif /* CONFIG_EMBEDDED */
 	
 	/* zfactor=XXXX (override how often to sample the zone allocator) */
 	if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
@@ -1549,11 +2042,36 @@ hashaddr(uintptr_t pt, uint32_t max_size)
 #define ZONE_MAX_ALLOC_SIZE	(32 * 1024) 
 #define ZONE_ALLOC_FRAG_PERCENT(alloc_size, ele_size) (((alloc_size % ele_size) * 100) / alloc_size)
 
+/* Used to manage copying in of new zone names */
+static vm_offset_t zone_names_start;
+static vm_offset_t zone_names_next;
+
+static vm_size_t
+compute_element_size(vm_size_t requested_size)
+{
+	vm_size_t element_size = requested_size;
+
+	/* Zone elements must fit both a next pointer and a backup pointer */
+	vm_size_t  minimum_element_size = sizeof(vm_offset_t) * 2;
+	if (element_size < minimum_element_size)
+		element_size = minimum_element_size;
+
+	/*
+	 *  Round element size to a multiple of sizeof(pointer)
+	 *  This also enforces that allocations will be aligned on pointer boundaries
+	 */
+	element_size = ((element_size-1) + sizeof(vm_offset_t)) -
+	       ((element_size-1) % sizeof(vm_offset_t));
+
+	return element_size;
+}
+
 /*
  *	zinit initializes a new zone.  The zone data structures themselves
  *	are stored in a zone, which is initially a static structure that
  *	is initialized by zone_init.
  */
+
 zone_t
 zinit(
 	vm_size_t	size,		/* the size of an element */
@@ -1561,40 +2079,100 @@ zinit(
 	vm_size_t	alloc,		/* allocation size */
 	const char	*name)		/* a name for the zone */
 {
-	zone_t		z;
+	zone_t			z;
+
+	size = compute_element_size(size);
 
 	simple_lock(&all_zones_lock);
+
 	assert(num_zones < MAX_ZONES);
+	assert(num_zones_in_use <= num_zones);
+
+	/* If possible, find a previously zdestroy'ed zone in the zone_array that we can reuse instead of initializing a new zone. */
+	for (int index = bitmap_first(zone_empty_bitmap, MAX_ZONES);
+			index >= 0 && index < (int)num_zones;
+			index = bitmap_next(zone_empty_bitmap, index)) {
+		z = &(zone_array[index]);
+
+		/*
+		 * If the zone name and the element size are the same, we can just reuse the old zone struct.
+		 * Otherwise hand out a new zone from the zone_array.
+		 */
+		if (!strcmp(z->zone_name, name)) {
+			vm_size_t old_size = z->elem_size;
+#if KASAN_ZALLOC
+			old_size -= z->kasan_redzone * 2;
+#endif
+			if (old_size == size) {
+				/* Clear the empty bit for this zone, increment num_zones_in_use, and mark the zone as valid again. */
+				bitmap_clear(zone_empty_bitmap, index);
+				num_zones_in_use++;
+				z->zone_valid = TRUE;
+
+				/* All other state is already set up since the zone was previously in use. Return early. */
+				simple_unlock(&all_zones_lock);
+				return (z);
+			}
+		}
+	}
+
+	/* If we're here, it means we didn't find a zone above that we could simply reuse. Set up a new zone. */
+
+	/* Clear the empty bit for the new zone */
+	bitmap_clear(zone_empty_bitmap, num_zones);
+
 	z = &(zone_array[num_zones]);
 	z->index = num_zones;
-	num_zones++;
-	simple_unlock(&all_zones_lock);
 
-	/* Zone elements must fit both a next pointer and a backup pointer */
-	vm_size_t  minimum_element_size = sizeof(vm_offset_t) * 2;
-	if (size < minimum_element_size)
-		size = minimum_element_size;
+	num_zones++;
+	num_zones_in_use++;
 
 	/*
-	 *  Round element size to a multiple of sizeof(pointer)
-	 *  This also enforces that allocations will be aligned on pointer boundaries
+	 * Initialize the zone lock here before dropping the all_zones_lock. Otherwise we could race with
+	 * zalloc_async() and try to grab the zone lock before it has been initialized, causing a panic.
 	 */
-	size = ((size-1) + sizeof(vm_offset_t)) -
-	       ((size-1) % sizeof(vm_offset_t));
+	lock_zone_init(z);
+
+	simple_unlock(&all_zones_lock);
 
-	if (alloc == 0)
-		alloc = PAGE_SIZE;
+#if KASAN_ZALLOC
+	/* Expand the zone allocation size to include the redzones. For page-multiple
+	 * zones add a full guard page because they likely require alignment. kalloc
+	 * and fakestack handles its own KASan state, so ignore those zones. */
+	/* XXX: remove this when zinit_with_options() is a thing */
+	const char *kalloc_name = "kalloc.";
+	const char *fakestack_name = "fakestack.";
+	if (strncmp(name, kalloc_name, strlen(kalloc_name)) == 0) {
+		z->kasan_redzone = 0;
+	} else if (strncmp(name, fakestack_name, strlen(fakestack_name)) == 0) {
+		z->kasan_redzone = 0;
+	} else {
+		if ((size % PAGE_SIZE) != 0) {
+			z->kasan_redzone = KASAN_GUARD_SIZE;
+		} else {
+			z->kasan_redzone = PAGE_SIZE;
+		}
+		max = (max / size) * (size + z->kasan_redzone * 2);
+		size += z->kasan_redzone * 2;
+	}
+#endif
 
-	alloc = round_page(alloc);
-	max   = round_page(max);
+	max = round_page(max);
 
 	vm_size_t best_alloc = PAGE_SIZE;
-	vm_size_t alloc_size;
-	for (alloc_size = (2 * PAGE_SIZE); alloc_size <= ZONE_MAX_ALLOC_SIZE; alloc_size += PAGE_SIZE) {
-		if (ZONE_ALLOC_FRAG_PERCENT(alloc_size, size) < ZONE_ALLOC_FRAG_PERCENT(best_alloc, size)) {
-			best_alloc = alloc_size;
+
+	if ((size % PAGE_SIZE) == 0) {
+		/* zero fragmentation by definition */
+		best_alloc = size;
+	} else {
+		vm_size_t alloc_size;
+		for (alloc_size = (2 * PAGE_SIZE); alloc_size <= ZONE_MAX_ALLOC_SIZE; alloc_size += PAGE_SIZE) {
+			if (ZONE_ALLOC_FRAG_PERCENT(alloc_size, size) < ZONE_ALLOC_FRAG_PERCENT(best_alloc, size)) {
+				best_alloc = alloc_size;
+			}
 		}
 	}
+
 	alloc = best_alloc;
 	if (max && (max < alloc))
 		max = alloc;
@@ -1609,7 +2187,6 @@ zinit(
 	z->max_size = max;
 	z->elem_size = size;
 	z->alloc_size = alloc;
-	z->zone_name = name;
 	z->count = 0;
 	z->countfree = 0;
 	z->count_all_free_pages = 0;
@@ -1632,13 +2209,41 @@ zinit(
 	z->prio_refill_watermark = 0;
 	z->zone_replenish_thread = NULL;
 	z->zp_count = 0;
+	z->kasan_quarantine = TRUE;
+	z->zone_valid = TRUE;
 
 #if CONFIG_ZLEAKS
 	z->zleak_capture = 0;
 	z->zleak_on = FALSE;
 #endif /* CONFIG_ZLEAKS */
 
-	lock_zone_init(z);
+	/*
+	 * If the VM is ready to handle kmem_alloc requests, copy the zone name passed in.
+	 *
+	 * Else simply maintain a pointer to the name string. The only zones we'll actually have
+	 * to do this for would be the VM-related zones that are created very early on before any
+	 * kexts can be loaded (unloaded). So we should be fine with just a pointer in this case.
+	 */
+	if (kmem_alloc_ready) {
+		size_t len = MIN(strlen(name)+1, MACH_ZONE_NAME_MAX_LEN);
+
+		if (zone_names_start == 0 || ((zone_names_next - zone_names_start) + len) > PAGE_SIZE) {
+			printf("zalloc: allocating memory for zone names buffer\n");
+			kern_return_t retval = kmem_alloc_kobject(kernel_map, &zone_names_start,
+					PAGE_SIZE, VM_KERN_MEMORY_OSFMK);
+			if (retval != KERN_SUCCESS) {
+				panic("zalloc: zone_names memory allocation failed");
+			}
+			bzero((char *)zone_names_start, PAGE_SIZE);
+			zone_names_next = zone_names_start;
+		}
+
+		strlcpy((char *)zone_names_next, name, len);
+		z->zone_name = (char *)zone_names_next;
+		zone_names_next += len;
+	} else {
+		z->zone_name = name;
+	}
 
 	/*
 	 * Check for and set up zone leak detection if requested via boot-args.  We recognized two
@@ -1662,11 +2267,13 @@ zinit(
 			snprintf(zlog_name, MAX_ZONE_NAME, "zlog%d", i);
 
 			if (PE_parse_boot_argn(zlog_name, zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
-				if (log_this_zone(z->zone_name, zone_name_to_log)) {
-					z->zone_logging = TRUE;
-					zone_logging_enabled = TRUE;
-					num_zones_logged++;
-					break;
+				if (track_this_zone(z->zone_name, zone_name_to_log)) {
+					if (z->zone_valid) {
+						z->zone_logging = TRUE;
+						zone_logging_enabled = TRUE;
+						num_zones_logged++;
+						break;
+					}
 				}
 			}
 			i++;
@@ -1679,10 +2286,12 @@ zinit(
 			 * boot-args.
 			 */
 			if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
-				if (log_this_zone(z->zone_name, zone_name_to_log)) {
+				if (track_this_zone(z->zone_name, zone_name_to_log)) {
+					if (z->zone_valid) {
 						z->zone_logging = TRUE;
 						zone_logging_enabled = TRUE;
 						num_zones_logged++;
+					}
 				}
 			}
 		}
@@ -1724,6 +2333,10 @@ zinit(
 
 				curr_zone = &(zone_array[zone_idx]);
 
+				if (!curr_zone->zone_valid) {
+					continue;
+				}
+
 				/*
 				 * We work with the zone unlocked here because we could end up needing the zone lock to
 				 * enable logging for this zone e.g. need a VM object to allocate memory to enable logging for the
@@ -1751,6 +2364,7 @@ zinit(
 #if	CONFIG_GZALLOC	
 	gzalloc_zone_init(z);
 #endif
+
 	return(z);
 }
 unsigned	zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated, zone_replenish_throttle_count;
@@ -1769,6 +2383,7 @@ zone_replenish_thread(zone_t z)
 
 	for (;;) {
 		lock_zone(z);
+		assert(z->zone_valid);
 		z->zone_replenishing = TRUE;
 		assert(z->prio_refill_watermark != 0);
 		while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
@@ -1789,6 +2404,11 @@ zone_replenish_thread(zone_t z)
 			if (z->noencrypt)
 				zflags |= KMA_NOENCRYPT;
 				
+			/* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */
+			if (is_zone_map_nearing_exhaustion()) {
+				thread_wakeup((event_t) &vm_pageout_garbage_collect);
+			}
+
 			kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
 
 			if (kr == KERN_SUCCESS) {
@@ -1806,6 +2426,7 @@ zone_replenish_thread(zone_t z)
 			}
 
 			lock_zone(z);
+			assert(z->zone_valid);
 			zone_replenish_loops++;
 		}
 
@@ -1837,18 +2458,83 @@ zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) {
 	thread_deallocate(z->zone_replenish_thread);
 }
 
-/* Initialize the metadata for an allocation chunk */
-static inline void
-zcram_metadata_init(vm_offset_t newmem, vm_size_t size, struct zone_page_metadata *chunk_metadata)
+void
+zdestroy(zone_t z)
 {
-	struct zone_page_metadata *page_metadata;
+	unsigned int zindex;
 
-	/* The first page is the real metadata for this allocation chunk. We mark the others as fake metadata */
-	size -= PAGE_SIZE;
-	newmem += PAGE_SIZE;
+	assert(z != NULL);
 
-	for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
-		page_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE);
+	lock_zone(z);
+	assert(z->zone_valid);
+
+	/* Assert that the zone does not have any allocations in flight */
+	assert(z->doing_alloc_without_vm_priv == FALSE);
+	assert(z->doing_alloc_with_vm_priv == FALSE);
+	assert(z->async_pending == FALSE);
+	assert(z->waiting == FALSE);
+	assert(z->async_prio_refill == FALSE);
+
+#if !KASAN_ZALLOC
+	/*
+	 * Unset the valid bit. We'll hit an assert failure on further operations on this zone, until zinit() is called again.
+	 * Leave the zone valid for KASan as we will see zfree's on quarantined free elements even after the zone is destroyed.
+	 */
+	z->zone_valid = FALSE;
+#endif
+	unlock_zone(z);
+
+	/* Dump all the free elements */
+	drop_free_elements(z);
+
+#if	CONFIG_GZALLOC
+	/* If the zone is gzalloc managed dump all the elements in the free cache */
+	gzalloc_empty_free_cache(z);
+#endif
+
+	lock_zone(z);
+
+#if !KASAN_ZALLOC
+	/* Assert that all counts are zero */
+	assert(z->count == 0);
+	assert(z->countfree == 0);
+	assert(z->cur_size == 0);
+	assert(z->page_count == 0);
+	assert(z->count_all_free_pages == 0);
+
+	/* Assert that all queues except the foreign queue are empty. The zone allocator doesn't know how to free up foreign memory. */
+	assert(queue_empty(&z->pages.all_used));
+	assert(queue_empty(&z->pages.intermediate));
+	assert(queue_empty(&z->pages.all_free));
+#endif
+
+	zindex = z->index;
+
+	unlock_zone(z);
+
+	simple_lock(&all_zones_lock);
+
+	assert(!bitmap_test(zone_empty_bitmap, zindex));
+	/* Mark the zone as empty in the bitmap */
+	bitmap_set(zone_empty_bitmap, zindex);
+	num_zones_in_use--;
+	assert(num_zones_in_use > 0);
+
+	simple_unlock(&all_zones_lock);
+}
+
+/* Initialize the metadata for an allocation chunk */
+static inline void
+zcram_metadata_init(vm_offset_t newmem, vm_size_t size, struct zone_page_metadata *chunk_metadata)
+{
+	struct zone_page_metadata *page_metadata;
+
+	/* The first page is the real metadata for this allocation chunk. We mark the others as fake metadata */
+	size -= PAGE_SIZE;
+	newmem += PAGE_SIZE;
+
+	for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
+		page_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE);
 		assert(page_metadata != chunk_metadata);
 		PAGE_METADATA_SET_ZINDEX(page_metadata, MULTIPAGE_METADATA_MAGIC);
 		page_metadata_set_realmeta(page_metadata, chunk_metadata);
@@ -1913,13 +2599,14 @@ random_free_to_zone(
 	vm_size_t       elem_size;
 	int 		index;	
 
+	assert(element_count  <= ZONE_CHUNK_MAXELEMENTS);
 	elem_size = zone->elem_size;
 	last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size);
 	for (index = 0; index < element_count; index++) {
 		assert(first_element_offset <= last_element_offset);
 		if (
 #if DEBUG || DEVELOPMENT
-		leak_scan_debug_flag ||
+		leak_scan_debug_flag || __improbable(zone->tags) ||
 #endif /* DEBUG || DEVELOPMENT */
 	        random_bool_gen(entropy_buffer, index, MAX_ENTROPY_PER_ZCRAM)) {
 			element_addr = newmem + first_element_offset;
@@ -1957,7 +2644,7 @@ zcram(
 
 	elem_size = zone->elem_size;
 
-	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, VM_KERNEL_ADDRPERM(zone), size, 0, 0, 0);
+	KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, zone->index, size);
 
 	if (from_zone_map(newmem, size))
 		from_zm = TRUE;
@@ -1995,11 +2682,20 @@ zcram(
 	page_metadata_set_freelist(chunk_metadata, 0);
 	PAGE_METADATA_SET_ZINDEX(chunk_metadata, zone->index);
 	chunk_metadata->free_count = 0;
-	chunk_metadata->page_count = (size / PAGE_SIZE);
+	assert((size / PAGE_SIZE) <= ZONE_CHUNK_MAXPAGES);
+	chunk_metadata->page_count = (unsigned)(size / PAGE_SIZE);
 
 	zcram_metadata_init(newmem, size, chunk_metadata);
 
+#if VM_MAX_TAG_ZONES
+    if (__improbable(zone->tags)) {
+        assert(from_zm);
+        ztMemoryAdd(zone, newmem, size);
+    }
+#endif /* VM_MAX_TAG_ZONES */
+
 	lock_zone(zone);
+	assert(zone->zone_valid);
 	enqueue_tail(&zone->pages.all_used, &(chunk_metadata->pages));
 
 	if (!from_zm) {
@@ -2024,42 +2720,48 @@ zcram(
 	}
 	unlock_zone(zone);
 	
-	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, VM_KERNEL_ADDRPERM(zone), 0, 0, 0, 0);
+	KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, zone->index);
 
 }
 
 /*
  * Fill a zone with enough memory to contain at least nelem elements.
- * Memory is obtained with kmem_alloc_kobject from the kernel_map.
  * Return the number of elements actually put into the zone, which may
  * be more than the caller asked for since the memory allocation is
- * rounded up to a full page.
+ * rounded up to the next zone allocation size.
  */
 int
 zfill(
 	zone_t	zone,
 	int	nelem)
 {
-	kern_return_t	kr;
-	vm_size_t	size;
+	kern_return_t kr;
 	vm_offset_t	memory;
-	int		nalloc;
 
-	assert(nelem > 0);
-	if (nelem <= 0)
-		return 0;
-	size = nelem * zone->elem_size;
-	size = round_page(size);
-	kr = kmem_alloc_kobject(kernel_map, &memory, size, VM_KERN_MEMORY_ZONE);
-	if (kr != KERN_SUCCESS)
+	vm_size_t alloc_size = zone->alloc_size;
+	vm_size_t elem_per_alloc = alloc_size / zone->elem_size;
+	vm_size_t nalloc = (nelem + elem_per_alloc - 1) / elem_per_alloc;
+
+	/* Don't mix-and-match zfill with foreign memory */
+	assert(!zone->allows_foreign);
+
+	/* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */
+	if (is_zone_map_nearing_exhaustion()) {
+		thread_wakeup((event_t) &vm_pageout_garbage_collect);
+	}
+
+	kr = kernel_memory_allocate(zone_map, &memory, nalloc * alloc_size, 0, KMA_KOBJECT, VM_KERN_MEMORY_ZONE);
+	if (kr != KERN_SUCCESS) {
+		printf("%s: kernel_memory_allocate() of %lu bytes failed\n",
+				__func__, (unsigned long)(nalloc * alloc_size));
 		return 0;
+	}
 
-	zone_change(zone, Z_FOREIGN, TRUE);
-	zcram(zone, memory, size);
-	nalloc = (int)(size / zone->elem_size);
-	assert(nalloc >= nelem);
+	for (vm_size_t i = 0; i < nalloc; i++) {
+		zcram(zone, memory + i * alloc_size, alloc_size);
+	}
 
-	return nalloc;
+	return (int)(nalloc * elem_per_alloc);
 }
 
 /*
@@ -2091,6 +2793,12 @@ zone_bootstrap(void)
 	}	
 
 #if DEBUG || DEVELOPMENT
+#if VM_MAX_TAG_ZONES
+	/* enable tags for zones that ask for  */
+	if (PE_parse_boot_argn("-zt", temp_buf, sizeof(temp_buf))) {
+		zone_tagging_on = TRUE;
+	}
+#endif /* VM_MAX_TAG_ZONES */
 	/* disable element location randomization in a page */
 	if (PE_parse_boot_argn("-zl", temp_buf, sizeof(temp_buf))) {
 		leak_scan_debug_flag = TRUE;
@@ -2099,7 +2807,16 @@ zone_bootstrap(void)
 
 	simple_lock_init(&all_zones_lock, 0);
 
+	num_zones_in_use = 0;
 	num_zones = 0;
+	/* Mark all zones as empty */
+	bitmap_full(zone_empty_bitmap, BITMAP_LEN(MAX_ZONES));
+	zone_names_next = zone_names_start = 0;
+
+#if DEBUG || DEVELOPMENT
+	simple_lock_init(&zone_test_lock, 0);
+#endif /* DEBUG || DEVELOPMENT */
+
 	thread_call_setup(&call_async_alloc, zalloc_async, NULL);
 
 	/* initializing global lock group for zones */
@@ -2110,6 +2827,101 @@ zone_bootstrap(void)
 	lck_mtx_init_ext(&zone_metadata_region_lck, &zone_metadata_region_lck_ext, &zone_locks_grp, &zone_metadata_lock_attr);
 }
 
+/*
+ * We're being very conservative here and picking a value of 95%. We might need to lower this if
+ * we find that we're not catching the problem and are still hitting zone map exhaustion panics.
+ */
+#define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95
+
+/*
+ * Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit.
+ * Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default.
+ */
+unsigned int zone_map_jetsam_limit = ZONE_MAP_JETSAM_LIMIT_DEFAULT;
+
+/*
+ * Returns pid of the task with the largest number of VM map entries.
+ */
+extern pid_t find_largest_process_vm_map_entries(void);
+
+/*
+ * Callout to jetsam. If pid is -1, we wake up the memorystatus thread to do asynchronous kills.
+ * For any other pid we try to kill that process synchronously.
+ */
+boolean_t memorystatus_kill_on_zone_map_exhaustion(pid_t pid);
+
+void get_zone_map_size(uint64_t *current_size, uint64_t *capacity)
+{
+	*current_size = zone_map->size;
+	*capacity = vm_map_max(zone_map) - vm_map_min(zone_map);
+}
+
+void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size)
+{
+	zone_t largest_zone = zone_find_largest();
+	strlcpy(zone_name, largest_zone->zone_name, zone_name_len);
+	*zone_size = largest_zone->cur_size;
+}
+
+boolean_t is_zone_map_nearing_exhaustion(void)
+{
+	uint64_t size = zone_map->size;
+	uint64_t capacity = vm_map_max(zone_map) - vm_map_min(zone_map);
+	if (size > ((capacity * zone_map_jetsam_limit) / 100)) {
+		return TRUE;
+	}
+	return FALSE;
+}
+
+extern zone_t vm_map_entry_zone;
+extern zone_t vm_object_zone;
+
+#define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
+
+/*
+ * Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread
+ * to walk through the jetsam priority bands and kill processes.
+ */
+static void kill_process_in_largest_zone(void)
+{
+	pid_t pid = -1;
+	zone_t largest_zone = zone_find_largest();
+
+	printf("zone_map_exhaustion: Zone map size %lld, capacity %lld [jetsam limit %d%%]\n", (uint64_t)zone_map->size,
+			(uint64_t)(vm_map_max(zone_map) - vm_map_min(zone_map)), zone_map_jetsam_limit);
+	printf("zone_map_exhaustion: Largest zone %s, size %lu\n", largest_zone->zone_name, (uintptr_t)largest_zone->cur_size);
+
+	/*
+	 * We want to make sure we don't call this function from userspace. Or we could end up trying to synchronously kill the process
+	 * whose context we're in, causing the system to hang.
+	 */
+	assert(current_task() == kernel_task);
+
+	/*
+	 * If vm_object_zone is the largest, check to see if the number of elements in vm_map_entry_zone is comparable. If so, consider
+	 * vm_map_entry_zone as the largest. This lets us target a specific process to jetsam to quickly recover from the zone map bloat.
+	 */
+	if (largest_zone == vm_object_zone) {
+		int vm_object_zone_count = vm_object_zone->count;
+		int vm_map_entry_zone_count = vm_map_entry_zone->count;
+		/* Is the VM map entries zone count >= 98% of the VM objects zone count? */
+		if (vm_map_entry_zone_count >= ((vm_object_zone_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
+			largest_zone = vm_map_entry_zone;
+			printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n", (uintptr_t)largest_zone->cur_size);
+		}
+	}
+
+	/* TODO: Extend this to check for the largest process in other zones as well. */
+	if (largest_zone == vm_map_entry_zone) {
+		pid = find_largest_process_vm_map_entries();
+	} else {
+		printf("zone_map_exhaustion: Nothing to do for the largest zone [%s]. Waking up memorystatus thread.\n", largest_zone->zone_name);
+	}
+	if (!memorystatus_kill_on_zone_map_exhaustion(pid)) {
+		printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n", pid);
+	}
+}
+
 /* Global initialization of Zone Allocator.
  * Runs after zone_bootstrap.
  */
@@ -2122,9 +2934,16 @@ zone_init(
 	vm_offset_t	zone_max;
 	vm_offset_t 	zone_metadata_space;
 	unsigned int 	zone_pages;
+	vm_map_kernel_flags_t vmk_flags;
+
+#if VM_MAX_TAG_ZONES
+    if (zone_tagging_on) ztInit(max_zonemap_size, &zone_locks_grp);
+#endif
 
+	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+	vmk_flags.vmkf_permanent = TRUE;
 	retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
-			       FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(VM_KERN_MEMORY_ZONE),
+			       FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_ZONE,
 			       &zone_map);
 
 	if (retval != KERN_SUCCESS)
@@ -2171,6 +2990,15 @@ zone_init(
 	 */
 	zleak_init(max_zonemap_size);
 #endif /* CONFIG_ZLEAKS */
+
+#if VM_MAX_TAG_ZONES
+	if (zone_tagging_on) vm_allocation_zones_init();
+#endif
+
+	int jetsam_limit_temp = 0;
+	if (PE_parse_boot_argn("zone_map_jetsam_limit", &jetsam_limit_temp, sizeof (jetsam_limit_temp)) &&
+			jetsam_limit_temp > 0 && jetsam_limit_temp <= 100)
+		zone_map_jetsam_limit = jetsam_limit_temp;
 }
 
 extern volatile SInt32 kfree_nop_count;
@@ -2178,6 +3006,8 @@ extern volatile SInt32 kfree_nop_count;
 #pragma mark -
 #pragma mark zalloc_canblock
 
+extern boolean_t early_boot_complete;
+
 /*
  *	zalloc returns an element from the specified zone.
  */
@@ -2185,16 +3015,19 @@ static void *
 zalloc_internal(
 	zone_t	zone,
 	boolean_t canblock,
-	boolean_t nopagewait)
+	boolean_t nopagewait,
+	vm_size_t
+#if !VM_MAX_TAG_ZONES
+    __unused
+#endif
+    reqsize,
+	vm_tag_t  tag)
 {
 	vm_offset_t	addr = 0;
 	kern_return_t	retval;
 	uintptr_t	zbt[MAX_ZTRACE_DEPTH];	/* used in zone leak logging and zone leak detection */
 	int 		numsaved = 0;
 	boolean_t	zone_replenish_wakeup = FALSE, zone_alloc_throttle = FALSE;
-#if	CONFIG_GZALLOC
-	boolean_t	did_gzalloc = FALSE;
-#endif
 	thread_t thr = current_thread();
 	boolean_t       check_poison = FALSE;
 	boolean_t       set_doing_alloc_with_vm_priv = FALSE;
@@ -2203,13 +3036,27 @@ zalloc_internal(
 	uint32_t	zleak_tracedepth = 0;  /* log this allocation if nonzero */
 #endif /* CONFIG_ZLEAKS */
 
+#if KASAN
+	/*
+	 * KASan uses zalloc() for fakestack, which can be called anywhere. However,
+	 * we make sure these calls can never block.
+	 */
+	boolean_t irq_safe = FALSE;
+	const char *fakestack_name = "fakestack.";
+	if (strncmp(zone->zone_name, fakestack_name, strlen(fakestack_name)) == 0) {
+		irq_safe = TRUE;
+	}
+#elif MACH_ASSERT
+	/* In every other case, zalloc() from interrupt context is unsafe. */
+	const boolean_t irq_safe = FALSE;
+#endif
+
 	assert(zone != ZONE_NULL);
+	assert(irq_safe || ml_get_interrupts_enabled() || ml_is_quiescing() || debug_mode_active() || !early_boot_complete);
 
 #if	CONFIG_GZALLOC
 	addr = gzalloc_alloc(zone, canblock);
-	did_gzalloc = (addr != 0);
 #endif
-
 	/*
 	 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
 	 */
@@ -2230,21 +3077,33 @@ zalloc_internal(
 	}
 #endif /* CONFIG_ZLEAKS */
 
+#if VM_MAX_TAG_ZONES
+	if (__improbable(zone->tags)) vm_tag_will_update_zone(tag, zone->tag_zone_index);
+#endif /* VM_MAX_TAG_ZONES */
+
 	lock_zone(zone);
+	assert(zone->zone_valid);
 
 	if (zone->async_prio_refill && zone->zone_replenish_thread) {
-		    do {
-			    vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size));
-			    vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size;
-			    zone_replenish_wakeup = (zfreec < zrefillwm);
-			    zone_alloc_throttle = (zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0);
+			vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size));
+			vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size;
+			zone_replenish_wakeup = (zfreec < zrefillwm);
+			zone_alloc_throttle = (((zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0)) || (zfreec == 0));
 
+			do {
 			    if (zone_replenish_wakeup) {
 				    zone_replenish_wakeups_initiated++;
 				    /* Signal the potentially waiting
 				     * refill thread.
 				     */
 				    thread_wakeup(&zone->zone_replenish_thread);
+
+					/* We don't want to wait around for zone_replenish_thread to bump up the free count
+					 * if we're in zone_gc(). This keeps us from deadlocking with zone_replenish_thread.
+					 */
+					if (thr->options & TH_OPT_ZONE_GC)
+						break;
+
 				    unlock_zone(zone);
 				    /* Scheduling latencies etc. may prevent
 				     * the refill thread from keeping up
@@ -2258,13 +3117,27 @@ zalloc_internal(
 					    thread_block(THREAD_CONTINUE_NULL);
 				    }
 				    lock_zone(zone);
+					assert(zone->zone_valid);
 			    }
+
+				zfreec = (zone->cur_size - (zone->count * zone->elem_size));
+				zrefillwm = zone->prio_refill_watermark * zone->elem_size;
+				zone_replenish_wakeup = (zfreec < zrefillwm);
+				zone_alloc_throttle = (((zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0)) || (zfreec == 0));
+
 		    } while (zone_alloc_throttle == TRUE);
 	}
 	
 	if (__probable(addr == 0))
-		addr = try_alloc_from_zone(zone, &check_poison);
+		addr = try_alloc_from_zone(zone, tag, &check_poison);
 
+	/* If we're here because of zone_gc(), we didn't wait for zone_replenish_thread to finish.
+	 * So we need to ensure that we did successfully grab an element. And we only need to assert
+	 * this for zones that have a replenish thread configured (in this case, the Reserved VM map
+	 * entries zone).
+	 */
+	if (thr->options & TH_OPT_ZONE_GC && zone->async_prio_refill)
+		assert(addr != 0);
 
 	while ((addr == 0) && canblock) {
 		/*
@@ -2350,6 +3223,11 @@ zalloc_internal(
 				if (zone->noencrypt)
 					zflags |= KMA_NOENCRYPT;
 				
+				/* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */
+				if (is_zone_map_nearing_exhaustion()) {
+					thread_wakeup((event_t) &vm_pageout_garbage_collect);
+				}
+
 				retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
 				if (retval == KERN_SUCCESS) {
 #if CONFIG_ZLEAKS
@@ -2376,11 +3254,6 @@ zalloc_internal(
 				} else if (retval != KERN_RESOURCE_SHORTAGE) {
 					retry++;
 					
-					if (retry == 2) {
-						zone_gc();
-						printf("zalloc did gc\n");
-						zone_display_zprint();
-					}
 					if (retry == 3) {
 						panic_include_zprint = TRUE;
 #if CONFIG_ZLEAKS
@@ -2402,6 +3275,7 @@ zalloc_internal(
 				}
 			}
 			lock_zone(zone);
+			assert(zone->zone_valid);
 
 			if (set_doing_alloc_with_vm_priv == TRUE)
 			        zone->doing_alloc_with_vm_priv = FALSE;
@@ -2414,7 +3288,7 @@ zalloc_internal(
 			}
 			clear_thread_rwlock_boost();
 
-			addr = try_alloc_from_zone(zone, &check_poison);
+			addr = try_alloc_from_zone(zone, tag, &check_poison);
 			if (addr == 0 &&
 			    retval == KERN_RESOURCE_SHORTAGE) {
 				if (nopagewait == TRUE)
@@ -2423,10 +3297,11 @@ zalloc_internal(
 
 				VM_PAGE_WAIT();
 				lock_zone(zone);
+				assert(zone->zone_valid);
 			}
 		}
 		if (addr == 0)
-			addr = try_alloc_from_zone(zone, &check_poison);
+			addr = try_alloc_from_zone(zone, tag, &check_poison);
 	}
 
 #if CONFIG_ZLEAKS
@@ -2448,13 +3323,21 @@ zalloc_internal(
 		unlock_zone(zone);
 		thread_call_enter(&call_async_alloc);
 		lock_zone(zone);
-		addr = try_alloc_from_zone(zone, &check_poison);
+		assert(zone->zone_valid);
+		addr = try_alloc_from_zone(zone, tag, &check_poison);
 	}
 
-	vm_offset_t     inner_size = zone->elem_size;
+#if VM_MAX_TAG_ZONES
+    if (__improbable(zone->tags) && addr) {
+        if (reqsize) reqsize = zone->elem_size - reqsize;
+        vm_tag_update_zone_size(tag, zone->tag_zone_index, zone->elem_size, reqsize);
+    }
+#endif /* VM_MAX_TAG_ZONES */
 
 	unlock_zone(zone);
 
+	vm_offset_t     inner_size = zone->elem_size;
+
 	if (__improbable(DO_LOGGING(zone) && addr)) {
 		btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_ALLOC, (void **)zbt, numsaved);
 	}
@@ -2497,32 +3380,46 @@ zalloc_internal(
 	}
 
 	TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
+
+#if KASAN_ZALLOC
+	/* Fixup the return address to skip the redzone */
+	if (zone->kasan_redzone) {
+		addr = kasan_alloc(addr, zone->elem_size,
+				zone->elem_size - 2 * zone->kasan_redzone, zone->kasan_redzone);
+	}
+#endif
+
 	return((void *)addr);
 }
 
-
 void *
 zalloc(zone_t zone)
 {
-	return (zalloc_internal(zone, TRUE, FALSE));
+	return (zalloc_internal(zone, TRUE, FALSE, 0, VM_KERN_MEMORY_NONE));
 }
 
 void *
 zalloc_noblock(zone_t zone)
 {
-	return (zalloc_internal(zone, FALSE, FALSE));
+	return (zalloc_internal(zone, FALSE, FALSE, 0, VM_KERN_MEMORY_NONE));
 }
 
 void *
 zalloc_nopagewait(zone_t zone)
 {
-	return (zalloc_internal(zone, TRUE, TRUE));
+	return (zalloc_internal(zone, TRUE, TRUE, 0, VM_KERN_MEMORY_NONE));
+}
+
+void *
+zalloc_canblock_tag(zone_t zone, boolean_t canblock, vm_size_t reqsize, vm_tag_t tag)
+{
+	return (zalloc_internal(zone, canblock, FALSE, reqsize, tag));
 }
 
 void *
 zalloc_canblock(zone_t zone, boolean_t canblock)
 {
-	return (zalloc_internal(zone, canblock, FALSE));
+    return (zalloc_internal(zone, canblock, FALSE, 0, VM_KERN_MEMORY_NONE));
 }
 
 
@@ -2541,15 +3438,21 @@ zalloc_async(
 	simple_unlock(&all_zones_lock);
 	for (i = 0; i < max_zones; i++) {
 		current_z = &(zone_array[i]);
+
+		if (current_z->no_callout == TRUE) {
+			/* async_pending will never be set */
+			continue;
+		}
+
 		lock_zone(current_z);
-		if (current_z->async_pending == TRUE) {
+		if (current_z->zone_valid && current_z->async_pending == TRUE) {
 			current_z->async_pending = FALSE;
 			pending = TRUE;
 		}
 		unlock_zone(current_z);
 
 		if (pending == TRUE) {
-			elt = zalloc_canblock(current_z, TRUE);
+			elt = zalloc_canblock_tag(current_z, TRUE, 0, VM_KERN_MEMORY_OSFMK);
 			zfree(current_z, elt);
 			pending = FALSE;
 		}
@@ -2564,7 +3467,7 @@ void *
 zget(
 	zone_t	zone)
 {
-    return zalloc_internal(zone, FALSE, TRUE);
+    return zalloc_internal(zone, FALSE, TRUE, 0, VM_KERN_MEMORY_NONE);
 }
 
 /* Keep this FALSE by default.  Large memory machine run orders of magnitude
@@ -2620,9 +3523,33 @@ zfree(
 	int		numsaved = 0;
 	boolean_t	gzfreed = FALSE;
 	boolean_t       poison = FALSE;
+#if VM_MAX_TAG_ZONES
+    vm_tag_t tag;
+#endif /* VM_MAX_TAG_ZONES */
 
 	assert(zone != ZONE_NULL);
 
+#if KASAN_ZALLOC
+	/*
+	 * Resize back to the real allocation size and hand off to the KASan
+	 * quarantine. `addr` may then point to a different allocation.
+	 */
+	vm_size_t usersz = zone->elem_size - 2 * zone->kasan_redzone;
+	vm_size_t sz = usersz;
+	if (addr && zone->kasan_redzone) {
+		kasan_check_free((vm_address_t)addr, usersz, KASAN_HEAP_ZALLOC);
+		addr = (void *)kasan_dealloc((vm_address_t)addr, &sz);
+		assert(sz == zone->elem_size);
+	}
+	if (addr && zone->kasan_quarantine) {
+		kasan_free(&addr, &sz, KASAN_HEAP_ZALLOC, &zone, usersz, true);
+		if (!addr) {
+			return;
+		}
+	}
+	elem = (vm_offset_t)addr;
+#endif
+
 	/*
 	 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
 	 */
@@ -2708,13 +3635,22 @@ zfree(
 	}
 
 	lock_zone(zone);
+	assert(zone->zone_valid);
 
 	if (zone_check) {
 		zone_check_freelist(zone, elem);
 	}
 
-	if (__probable(!gzfreed))
+	if (__probable(!gzfreed)) {
+#if VM_MAX_TAG_ZONES
+	    if (__improbable(zone->tags)) {
+			tag = (ZTAG(zone, elem)[0] >> 1);
+			// set the tag with b0 clear so the block remains inuse
+			ZTAG(zone, elem)[0] = 0xFFFE;
+	    }
+#endif /* VM_MAX_TAG_ZONES */
 		free_to_zone(zone, elem, poison);
+	}
 
 #if MACH_ASSERT
 	if (zone->count < 0)
@@ -2732,10 +3668,15 @@ zfree(
 	}
 #endif /* CONFIG_ZLEAKS */
 	
+#if VM_MAX_TAG_ZONES
+	if (__improbable(zone->tags) && __probable(!gzfreed)) {
+		vm_tag_update_zone_size(tag, zone->tag_zone_index, -((int64_t)zone->elem_size), 0);
+	}
+#endif /* VM_MAX_TAG_ZONES */
+
 	unlock_zone(zone);
 }
 
-
 /*	Change a zone's flags.
  *	This routine must be called immediately after zinit.
  */
@@ -2770,6 +3711,16 @@ zone_change(
 		case Z_NOCALLOUT:
 			zone->no_callout = value;
 			break;
+		case Z_TAGS_ENABLED:
+#if VM_MAX_TAG_ZONES
+			{
+				static int tag_zone_index;
+				zone->tags = TRUE;
+				zone->tags_inline = (((page_size + zone->elem_size - 1) / zone->elem_size) <= (sizeof(uint32_t) / sizeof(uint16_t)));
+				zone->tag_zone_index = OSAddAtomic(1, &tag_zone_index);
+			}
+#endif /* VM_MAX_TAG_ZONES */
+			break;
 		case Z_GZALLOC_EXEMPT:
 			zone->gzalloc_exempt = value;
 #if	CONFIG_GZALLOC
@@ -2778,10 +3729,21 @@ zone_change(
 			break;
 		case Z_ALIGNMENT_REQUIRED:
 			zone->alignment_required = value;
+#if KASAN_ZALLOC
+			if (zone->kasan_redzone == KASAN_GUARD_SIZE) {
+				/* Don't disturb alignment with the redzone for zones with
+				 * specific alignment requirements. */
+				zone->elem_size -= zone->kasan_redzone * 2;
+				zone->kasan_redzone = 0;
+			}
+#endif
 #if	CONFIG_GZALLOC
 			gzalloc_reconfigure(zone);
 #endif
 			break;
+		case Z_KASAN_QUARANTINE:
+			zone->kasan_quarantine = value;
+			break;
 		default:
 			panic("Zone_change: Wrong Item Type!");
 			/* break; */
@@ -2809,26 +3771,100 @@ zone_free_count(zone_t zone)
 	return(free_count);
 }
 
+/* Drops the elements in the free queue of a zone. Called by zone_gc() on each zone, and when a zone is zdestroy'ed. */
+void
+drop_free_elements(zone_t z)
+{
+	vm_size_t					elt_size, size_freed;
+	int							total_freed_pages = 0;
+	uint64_t					old_all_free_count;
+	struct zone_page_metadata	*page_meta;
+	queue_head_t				page_meta_head;
+
+	lock_zone(z);
+	if (queue_empty(&z->pages.all_free)) {
+		unlock_zone(z);
+		return;
+	}
+
+	/*
+	 * Snatch all of the free elements away from the zone.
+	 */
+	elt_size = z->elem_size;
+	old_all_free_count = z->count_all_free_pages;
+	queue_new_head(&z->pages.all_free, &page_meta_head, struct zone_page_metadata *, pages);
+	queue_init(&z->pages.all_free);
+	z->count_all_free_pages = 0;
+	unlock_zone(z);
+
+	/* Iterate through all elements to find out size and count of elements we snatched */
+	size_freed = 0;
+	queue_iterate(&page_meta_head, page_meta, struct zone_page_metadata *, pages) {
+		assert(from_zone_map((vm_address_t)page_meta, sizeof(*page_meta))); /* foreign elements should be in any_free_foreign */
+		size_freed += elt_size * page_meta->free_count;
+	}
+
+	/* Update the zone size and free element count */
+	lock_zone(z);
+	z->cur_size -= size_freed;
+	z->countfree -= size_freed/elt_size;
+	unlock_zone(z);
+
+	while ((page_meta = (struct zone_page_metadata *)dequeue_head(&page_meta_head)) != NULL) {
+		vm_address_t        free_page_address;
+		/* Free the pages for metadata and account for them */
+		free_page_address = get_zone_page(page_meta);
+		ZONE_PAGE_COUNT_DECR(z, page_meta->page_count);
+		total_freed_pages += page_meta->page_count;
+		old_all_free_count -= page_meta->page_count;
+#if KASAN_ZALLOC
+		kasan_poison_range(free_page_address, page_meta->page_count * PAGE_SIZE, ASAN_VALID);
+#endif
+#if VM_MAX_TAG_ZONES
+        if (z->tags) ztMemoryRemove(z, free_page_address, (page_meta->page_count * PAGE_SIZE));
+#endif /* VM_MAX_TAG_ZONES */
+		kmem_free(zone_map, free_page_address, (page_meta->page_count * PAGE_SIZE));
+		if (current_thread()->options & TH_OPT_ZONE_GC) {
+			thread_yield_to_preemption();
+		}
+	}
+
+	/* We freed all the pages from the all_free list for this zone */
+	assert(old_all_free_count == 0);
+
+	if (zalloc_debug & ZALLOC_DEBUG_ZONEGC)
+		kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages);
+}
+
 /*	Zone garbage collection
  *
  *	zone_gc will walk through all the free elements in all the
  *	zones that are marked collectable looking for reclaimable
  *	pages.  zone_gc is called by consider_zone_gc when the system
  *	begins to run out of memory.
+ *
+ *	We should ensure that zone_gc never blocks.
  */
-extern zone_t 	vm_map_entry_reserved_zone;
-uint64_t 		zone_gc_bailed = 0;
-
 void
-zone_gc(void)
+zone_gc(boolean_t consider_jetsams)
 {
 	unsigned int	max_zones;
 	zone_t			z;
 	unsigned int	i;
-	zone_t 			zres = vm_map_entry_reserved_zone;
+
+	if (consider_jetsams) {
+		kill_process_in_largest_zone();
+		/*
+		 * If we do end up jetsamming something, we need to do a zone_gc so that
+		 * we can reclaim free zone elements and update the zone map size.
+		 * Fall through.
+		 */
+	}
 
 	lck_mtx_lock(&zone_gc_lock);
 
+	current_thread()->options |= TH_OPT_ZONE_GC;
+
 	simple_lock(&all_zones_lock);
 	max_zones = num_zones;
 	simple_unlock(&all_zones_lock);
@@ -2838,102 +3874,21 @@ zone_gc(void)
 
 	for (i = 0; i < max_zones; i++) {
 		z = &(zone_array[i]);
-		vm_size_t					elt_size, size_freed;
-		int							total_freed_pages = 0;
-		struct zone_page_metadata	*page_meta;
-		queue_head_t				page_meta_head;
-
 		assert(z != ZONE_NULL);
 
-		if (!z->collectable)
-			continue;
-		
-		if (queue_empty(&z->pages.all_free)) {
+		if (!z->collectable) {
 			continue;
 		}
 		
-		/*
-		 * Since kmem_free() might use VM entries from the reserved VM entries zone, we should bail from zone_gc() if we
-		 * are below the critical threshold for that zone. Otherwise, there could be a deadlock between the zone_gc 
-		 * thread and the zone_replenish thread for the VM entries zone on the zone_map lock.
-		 */
-		if (zres->zone_replenishing) {
-			zone_gc_bailed++;
-			break;
-		}
-
-		lock_zone(z);
-		elt_size = z->elem_size;
-
 		if (queue_empty(&z->pages.all_free)) {
-			unlock_zone(z);
 			continue;
 		}
-
-		/*
-		 * Snatch all of the free elements away from the zone.
-		 */
-		uint64_t old_all_free_count = z->count_all_free_pages;
-		queue_new_head(&z->pages.all_free, &page_meta_head, struct zone_page_metadata *, pages);
-		queue_init(&z->pages.all_free);
-		z->count_all_free_pages = 0;
-		unlock_zone(z);
-
-		/* Iterate through all elements to find out size and count of elements we snatched */
-		size_freed = 0;
-		queue_iterate(&page_meta_head, page_meta, struct zone_page_metadata *, pages) {
-			assert(from_zone_map((vm_address_t)page_meta, sizeof(*page_meta))); /* foreign elements should be in any_free_foreign */
-			size_freed += elt_size * page_meta->free_count;
-		}
-
-		/* Update the zone size and free element count */
-		lock_zone(z);
-		z->cur_size -= size_freed;
-		z->countfree -= size_freed/elt_size;
-		unlock_zone(z);
-
-		while ((page_meta = (struct zone_page_metadata *)dequeue_head(&page_meta_head)) != NULL) {
-			vm_address_t        free_page_address;
-			if (zres->zone_replenishing)
-				break;
-			/* Free the pages for metadata and account for them */
-			free_page_address = get_zone_page(page_meta);
-			ZONE_PAGE_COUNT_DECR(z, page_meta->page_count);
-			total_freed_pages += page_meta->page_count;
-			old_all_free_count -= page_meta->page_count;
-			size_freed -= (elt_size * page_meta->free_count);
-			kmem_free(zone_map, free_page_address, (page_meta->page_count * PAGE_SIZE));
-			thread_yield_to_preemption();
-		}
-		if (page_meta != NULL) {
-		   /* 
-			* We bailed because the VM entry reserved zone is replenishing. Put the remaining 
-			* metadata objects back on the all_free list and bail.
-			*/
-			queue_entry_t qe;
-			enqueue_head(&page_meta_head, &(page_meta->pages));
-			zone_gc_bailed++;
-
-			lock_zone(z);
-			qe_foreach_safe(qe, &page_meta_head) {
-				re_queue_tail(&z->pages.all_free, qe);
-			}
-			z->count_all_free_pages += (int)old_all_free_count;
-			z->cur_size += size_freed;
-			z->countfree += size_freed/elt_size;
-			unlock_zone(z);
-			if (zalloc_debug & ZALLOC_DEBUG_ZONEGC)
-				kprintf("zone_gc() bailed due to VM entry zone replenishing (zone_gc_bailed: %lld)\n", zone_gc_bailed);
-			break;
-		}
 		
-		/* We freed all the pages from the all_free list for this zone */
-		assert(old_all_free_count == 0);
-
-		if (zalloc_debug & ZALLOC_DEBUG_ZONEGC)
-			kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages);
+		drop_free_elements(z);
 	}
 
+	current_thread()->options &= ~TH_OPT_ZONE_GC;
+
 	lck_mtx_unlock(&zone_gc_lock);
 }
 
@@ -2947,7 +3902,7 @@ extern unsigned int kmapoff_pgcnt;
  */
 
 void
-consider_zone_gc(void)
+consider_zone_gc(boolean_t consider_jetsams)
 {
 	if (kmapoff_kaddr != 0) {
 		/*
@@ -2960,7 +3915,7 @@ consider_zone_gc(void)
 	}
 
 	if (zone_gc_allowed)
-		zone_gc();
+		zone_gc(consider_jetsams);
 }
 
 kern_return_t
@@ -2986,17 +3941,6 @@ mach_zone_info(
 }
 
 
-kern_return_t
-host_zone_info(
-	host_priv_t		host,
-	zone_name_array_t	*namesp,
-	mach_msg_type_number_t  *namesCntp,
-	zone_info_array_t	*infop,
-	mach_msg_type_number_t  *infoCntp)
-{
-	return (mach_memory_info(host, (mach_zone_name_array_t *)namesp, namesCntp, (mach_zone_info_array_t *)infop, infoCntp, NULL, NULL));
-}
-
 kern_return_t
 mach_memory_info(
 	host_priv_t		host,
@@ -3019,9 +3963,9 @@ mach_memory_info(
 	vm_offset_t		memory_info_addr;
 	vm_size_t		memory_info_size;
 	vm_size_t		memory_info_vmsize;
-        unsigned int		num_sites;
+        unsigned int		num_info;
 
-	unsigned int		max_zones, i;
+	unsigned int		max_zones, used_zones, i;
 	zone_t			z;
 	mach_zone_name_t	*zn;
 	mach_zone_info_t    	*zi;
@@ -3067,17 +4011,23 @@ mach_memory_info(
 	zn = &names[0];
 	zi = &info[0];
 
+	used_zones = max_zones;
 	for (i = 0; i < max_zones; i++) {
 		struct zone zcopy;
 		z = &(zone_array[i]);
 		assert(z != ZONE_NULL);
 
 		lock_zone(z);
+		if (!z->zone_valid) {
+			unlock_zone(z);
+			used_zones--;
+			continue;
+		}
 		zcopy = *z;
 		unlock_zone(z);
 
 		/* assuming here the name data is static */
-		(void) strncpy(zn->mzn_name, zcopy.zone_name,
+		(void) __nosan_strncpy(zn->mzn_name, zcopy.zone_name,
 			       sizeof zn->mzn_name);
 		zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
@@ -3094,7 +4044,7 @@ mach_memory_info(
 		zi++;
 	}
 
-	used = max_zones * sizeof *names;
+	used = used_zones * sizeof *names;
 	if (used != names_size)
 		bzero((char *) (names_addr + used), names_size - used);
 
@@ -3103,9 +4053,9 @@ mach_memory_info(
 	assert(kr == KERN_SUCCESS);
 
 	*namesp = (mach_zone_name_t *) copy;
-	*namesCntp = max_zones;
+	*namesCntp = used_zones;
 
-	used = max_zones * sizeof *info;
+	used = used_zones * sizeof *info;
 
 	if (used != info_size)
 		bzero((char *) (info_addr + used), info_size - used);
@@ -3115,15 +4065,15 @@ mach_memory_info(
 	assert(kr == KERN_SUCCESS);
 
 	*infop = (mach_zone_info_t *) copy;
-	*infoCntp = max_zones;
+	*infoCntp = used_zones;
 	
-	num_sites = 0;
+	num_info = 0;
 	memory_info_addr = 0;
 
 	if (memoryInfop && memoryInfoCntp)
 	{
-		num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
-		memory_info_size = num_sites * sizeof(*info);
+		num_info = vm_page_diagnose_estimate();
+		memory_info_size = num_info * sizeof(*memory_info);
 		memory_info_vmsize = round_page(memory_info_size);
 		kr = kmem_alloc_pageable(ipc_kernel_map,
 					 &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC);
@@ -3135,12 +4085,12 @@ mach_memory_info(
 			return kr;
 		}
 
-		kr = vm_map_wire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
-				     VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_IPC), FALSE);
+		kr = vm_map_wire_kernel(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
+				     VM_PROT_READ|VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
 		assert(kr == KERN_SUCCESS);
 
 		memory_info = (mach_memory_info_t *) memory_info_addr;
-		vm_page_diagnose(memory_info, num_sites, zones_collectable_bytes);
+		vm_page_diagnose(memory_info, num_info, zones_collectable_bytes);
 
 		kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE);
 		assert(kr == KERN_SUCCESS);
@@ -3150,22 +4100,101 @@ mach_memory_info(
 		assert(kr == KERN_SUCCESS);
 
 		*memoryInfop = (mach_memory_info_t *) copy;
-		*memoryInfoCntp = num_sites;
+		*memoryInfoCntp = num_info;
 	}
 
 	return KERN_SUCCESS;
 }
 
+uint64_t
+get_zones_collectable_bytes(void)
+{
+	zone_t z;
+	unsigned int i, max_zones;
+	uint64_t zones_collectable_bytes = 0;
+
+	simple_lock(&all_zones_lock);
+	max_zones = (unsigned int)(num_zones);
+	simple_unlock(&all_zones_lock);
+
+	for (i = 0; i < max_zones; i++) {
+		z = &(zone_array[i]);
+		assert(z != ZONE_NULL);
+
+		lock_zone(z);
+		zones_collectable_bytes += ((uint64_t)z->count_all_free_pages * PAGE_SIZE);
+		unlock_zone(z);
+	}
+
+	return zones_collectable_bytes;
+}
+
+#if DEBUG || DEVELOPMENT
+
+kern_return_t
+mach_memory_info_check(void)
+{
+    mach_memory_info_t * memory_info;
+    mach_memory_info_t * info;
+	zone_t			     zone;
+    unsigned int         idx, num_info, max_zones;
+	vm_offset_t		     memory_info_addr;
+	kern_return_t        kr;
+    size_t               memory_info_size, memory_info_vmsize;
+	uint64_t             top_wired, zonestotal, total;
+
+	num_info = vm_page_diagnose_estimate();
+	memory_info_size = num_info * sizeof(*memory_info);
+	memory_info_vmsize = round_page(memory_info_size);
+	kr = kmem_alloc(kernel_map, &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_DIAG);
+	assert (kr == KERN_SUCCESS);
+
+	memory_info = (mach_memory_info_t *) memory_info_addr;
+	vm_page_diagnose(memory_info, num_info, 0);
+
+	simple_lock(&all_zones_lock);
+	max_zones = num_zones;
+	simple_unlock(&all_zones_lock);
+
+    top_wired = total = zonestotal = 0;
+	for (idx = 0; idx < max_zones; idx++)
+	{
+		zone = &(zone_array[idx]);
+		assert(zone != ZONE_NULL);
+		lock_zone(zone);
+        zonestotal += ptoa_64(zone->page_count);
+		unlock_zone(zone);
+	}
+    for (idx = 0; idx < num_info; idx++)
+    {
+		info = &memory_info[idx];
+		if (!info->size) continue;
+		if (VM_KERN_COUNT_WIRED == info->site) top_wired = info->size;
+		if (VM_KERN_SITE_HIDE & info->flags) continue;
+		if (!(VM_KERN_SITE_WIRED & info->flags)) continue;
+		total += info->size;
+    }
+	total += zonestotal;
+
+	printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n", total, top_wired, zonestotal, top_wired - total);
+
+    kmem_free(kernel_map, memory_info_addr, memory_info_vmsize);
+
+    return (kr);
+}
+
+#endif /* DEBUG || DEVELOPMENT */
+
 kern_return_t
 mach_zone_force_gc(
 	host_t host)
 {
-
 	if (host == HOST_NULL)
 		return KERN_INVALID_HOST;
 
-	consider_zone_gc();
-
+#if DEBUG || DEVELOPMENT
+	consider_zone_gc(FALSE);
+#endif /* DEBUG || DEVELOPMENT */
 	return (KERN_SUCCESS);
 }
 
@@ -3177,26 +4206,6 @@ extern unsigned int inuse_ptepages_count;
 extern long long alloc_ptepages_count;
 #endif
 
-void zone_display_zprint()
-{
-	unsigned int    i;
-	zone_t		the_zone;
-
-	for (i = 0; i < num_zones; i++) {
-		the_zone = &(zone_array[i]);
-		if(the_zone->cur_size > (1024*1024)) {
-			printf("%.20s:\t%lu\n",the_zone->zone_name,(uintptr_t)the_zone->cur_size);
-		}
-	}
-	printf("Kernel Stacks:\t%lu\n",(uintptr_t)(kernel_stack_size * stack_total));
-
-#if defined(__i386__) || defined (__x86_64__)
-	printf("PageTables:\t%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count));
-#endif
-
-	printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total);
-}
-
 zone_t
 zone_find_largest(void)
 {
@@ -3293,13 +4302,18 @@ zone_leaks(const char * zoneName, uint32_t nameLen, leak_site_proc proc, void *
     uint32_t      btidx, btcount, nobtcount, btfound;
     uint32_t      elemSize;
     uint64_t      maxElems;
-    kern_return_t kr;
+	unsigned int  max_zones;
+	kern_return_t kr;
 
-    for (idx = 0; idx < num_zones; idx++)
+	simple_lock(&all_zones_lock);
+	max_zones = num_zones;
+	simple_unlock(&all_zones_lock);
+
+    for (idx = 0; idx < max_zones; idx++)
     {
         if (!strncmp(zoneName, zone_array[idx].zone_name, nameLen)) break;
     }
-    if (idx >= num_zones) return (KERN_INVALID_NAME);
+    if (idx >= max_zones) return (KERN_INVALID_NAME);
     zone = &zone_array[idx];
 
     elemSize = (uint32_t) zone->elem_size;
@@ -3369,41 +4383,78 @@ zone_leaks(const char * zoneName, uint32_t nameLen, leak_site_proc proc, void *
     return (KERN_SUCCESS);
 }
 
-void
-kern_wired_diagnose(void)
+boolean_t
+kdp_is_in_zone(void *addr, const char *zone_name)
 {
-    unsigned int       count = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
-    mach_memory_info_t info[count];
-    unsigned int       idx;
-    uint64_t           total_zone, total_wired, top_wired, osfmk_wired;
+	zone_t z;
+	return (zone_element_size(addr, &z) && !strcmp(z->zone_name, zone_name));
+}
 
-    if (KERN_SUCCESS != vm_page_diagnose(info, count, 0)) return;
+boolean_t
+run_zone_test(void)
+{
+	int i = 0, max_iter = 5;
+	void * test_ptr;
+	zone_t test_zone;
 
-    total_zone = total_wired = top_wired = osfmk_wired = 0;
-    for (idx = 0; idx < num_zones; idx++)
-    {
-        total_zone += ptoa_64(zone_array[idx].page_count);
-    }
-    total_wired = total_zone;
+	simple_lock(&zone_test_lock);
+	if (!zone_test_running) {
+		zone_test_running = TRUE;
+	} else {
+		simple_unlock(&zone_test_lock);
+		printf("run_zone_test: Test already running.\n");
+		return FALSE;
+	}
+	simple_unlock(&zone_test_lock);
 
-    for (idx = 0; idx < count; idx++)
-    {
-	if (VM_KERN_COUNT_WIRED  == info[idx].site)   top_wired   = info[idx].size;
-	if (VM_KERN_MEMORY_OSFMK == info[idx].site)   osfmk_wired = info[idx].size;
-	if (VM_KERN_SITE_HIDE    &  info[idx].flags)  continue;
-	if (!(VM_KERN_SITE_WIRED &  info[idx].flags)) continue;
-	total_wired += info[idx].size;
-    }
+	printf("run_zone_test: Testing zinit(), zalloc(), zfree() and zdestroy() on zone \"test_zone_sysctl\"\n");
 
-    printf("top 0x%qx, total 0x%qx, zone 0x%qx, osfmk 0x%qx\n",
-           top_wired, total_wired, total_zone, osfmk_wired);
-}
+	/* zinit() and zdestroy() a zone with the same name a bunch of times, verify that we get back the same zone each time */
+	do {
+		test_zone = zinit(sizeof(uint64_t), 100 * sizeof(uint64_t), sizeof(uint64_t), "test_zone_sysctl");
+		if (test_zone == NULL) {
+			printf("run_zone_test: zinit() failed\n");
+			return FALSE;
+		}
 
-boolean_t
-kdp_is_in_zone(void *addr, const char *zone_name)
-{
-	zone_t z;
-	return (zone_element_size(addr, &z) && !strcmp(z->zone_name, zone_name));
+#if KASAN_ZALLOC
+		if (test_zone_ptr == NULL && zone_free_count(test_zone) != 0) {
+#else
+		if (zone_free_count(test_zone) != 0) {
+#endif
+			printf("run_zone_test: free count is not zero\n");
+			return FALSE;
+		}
+
+		if (test_zone_ptr == NULL) {
+			/* Stash the zone pointer returned on the fist zinit */
+			printf("run_zone_test: zone created for the first time\n");
+			test_zone_ptr = test_zone;
+		} else if (test_zone != test_zone_ptr) {
+			printf("run_zone_test: old zone pointer and new zone pointer don't match\n");
+			return FALSE;
+		}
+
+		test_ptr = zalloc(test_zone);
+		if (test_ptr == NULL) {
+			printf("run_zone_test: zalloc() failed\n");
+			return FALSE;
+		}
+		zfree(test_zone, test_ptr);
+
+		zdestroy(test_zone);
+		i++;
+
+		printf("run_zone_test: Iteration %d successful\n", i);
+	} while (i < max_iter);
+
+	printf("run_zone_test: Test passed\n");
+
+	simple_lock(&zone_test_lock);
+	zone_test_running = FALSE;
+	simple_unlock(&zone_test_lock);
+
+	return TRUE;
 }
 
 #endif /* DEBUG || DEVELOPMENT */