+
+#if VM_MAX_TAG_ZONES
+
+// for zones with tagging enabled:
+
+// calculate a pointer to the tag base entry,
+// holding either a uint32_t the first tag offset for a page in the zone map,
+// or two uint16_t tags if the page can only hold one or two elements
+
+#define ZTAGBASE(zone, element) \
+ (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_map_min_address)])
+
+// pointer to the tag for an element
+#define ZTAG(zone, element) \
+ ({ \
+ vm_tag_t * result; \
+ if ((zone)->tags_inline) { \
+ result = (vm_tag_t *) ZTAGBASE((zone), (element)); \
+ if ((page_mask & element) >= (zone)->elem_size) result++; \
+ } else { \
+ result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / (zone)->elem_size]; \
+ } \
+ result; \
+ })
+
+
+static vm_offset_t zone_tagbase_min;
+static vm_offset_t zone_tagbase_max;
+static vm_offset_t zone_tagbase_map_size;
+static vm_map_t zone_tagbase_map;
+
+static vm_offset_t zone_tags_min;
+static vm_offset_t zone_tags_max;
+static vm_offset_t zone_tags_map_size;
+static vm_map_t zone_tags_map;
+
+// simple heap allocator for allocating the tags for new memory
+
+decl_lck_mtx_data(, ztLock); /* heap lock */
+enum{
+ ztFreeIndexCount = 8,
+ ztFreeIndexMax = (ztFreeIndexCount - 1),
+ ztTagsPerBlock = 4
+};
+
+struct ztBlock {
+#if __LITTLE_ENDIAN__
+ uint64_t free:1,
+ next:21,
+ prev:21,
+ size:21;
+#else
+// ztBlock needs free bit least significant
+#error !__LITTLE_ENDIAN__
+#endif
+};
+typedef struct ztBlock ztBlock;
+
+static ztBlock * ztBlocks;
+static uint32_t ztBlocksCount;
+static uint32_t ztBlocksFree;
+
+static uint32_t
+ztLog2up(uint32_t size)
+{
+ if (1 == size) {
+ size = 0;
+ } else {
+ size = 32 - __builtin_clz(size - 1);
+ }
+ return size;
+}
+
+static uint32_t
+ztLog2down(uint32_t size)
+{
+ size = 31 - __builtin_clz(size);
+ return size;
+}
+
+static void
+ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
+{
+ vm_map_offset_t addr = (vm_map_offset_t) address;
+ vm_map_offset_t page, end;
+
+ page = trunc_page(addr);
+ end = round_page(addr + size);
+
+ for (; page < end; page += page_size) {
+ if (!pmap_find_phys(kernel_pmap, page)) {
+ kern_return_t __unused
+ ret = kernel_memory_populate(map, page, PAGE_SIZE,
+ KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG);
+ assert(ret == KERN_SUCCESS);
+ }
+ }
+}
+
+static boolean_t
+ztPresent(const void * address, size_t size)
+{
+ vm_map_offset_t addr = (vm_map_offset_t) address;
+ vm_map_offset_t page, end;
+ boolean_t result;
+
+ page = trunc_page(addr);
+ end = round_page(addr + size);
+ for (result = TRUE; (page < end); page += page_size) {
+ result = pmap_find_phys(kernel_pmap, page);
+ if (!result) {
+ break;
+ }
+ }
+ return result;
+}
+
+
+void __unused
+ztDump(boolean_t sanity);
+void __unused
+ztDump(boolean_t sanity)
+{
+ uint32_t q, cq, p;
+
+ for (q = 0; q <= ztFreeIndexMax; q++) {
+ p = q;
+ do{
+ if (sanity) {
+ cq = ztLog2down(ztBlocks[p].size);
+ if (cq > ztFreeIndexMax) {
+ cq = ztFreeIndexMax;
+ }
+ if (!ztBlocks[p].free
+ || ((p != q) && (q != cq))
+ || (ztBlocks[ztBlocks[p].next].prev != p)
+ || (ztBlocks[ztBlocks[p].prev].next != p)) {
+ kprintf("zterror at %d", p);
+ ztDump(FALSE);
+ kprintf("zterror at %d", p);
+ assert(FALSE);
+ }
+ continue;
+ }
+ kprintf("zt[%03d]%c %d, %d, %d\n",
+ p, ztBlocks[p].free ? 'F' : 'A',
+ ztBlocks[p].next, ztBlocks[p].prev,
+ ztBlocks[p].size);
+ p = ztBlocks[p].next;
+ if (p == q) {
+ break;
+ }
+ }while (p != q);
+ if (!sanity) {
+ printf("\n");
+ }
+ }
+ if (!sanity) {
+ printf("-----------------------\n");
+ }
+}
+
+
+
+#define ZTBDEQ(idx) \
+ ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
+ ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
+
+static void
+ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
+{
+ uint32_t q, w, p, size, merge;
+
+ assert(count);
+ ztBlocksFree += count;
+
+ // merge with preceding
+ merge = (index + count);
+ if ((merge < ztBlocksCount)
+ && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+ && ztBlocks[merge].free) {
+ ZTBDEQ(merge);
+ count += ztBlocks[merge].size;
+ }
+
+ // merge with following
+ merge = (index - 1);
+ if ((merge > ztFreeIndexMax)
+ && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+ && ztBlocks[merge].free) {
+ size = ztBlocks[merge].size;
+ count += size;
+ index -= size;
+ ZTBDEQ(index);
+ }
+
+ q = ztLog2down(count);
+ if (q > ztFreeIndexMax) {
+ q = ztFreeIndexMax;
+ }
+ w = q;
+ // queue in order of size
+ while (TRUE) {
+ p = ztBlocks[w].next;
+ if (p == q) {
+ break;
+ }
+ if (ztBlocks[p].size >= count) {
+ break;
+ }
+ w = p;
+ }
+ ztBlocks[p].prev = index;
+ ztBlocks[w].next = index;
+
+ // fault in first
+ ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+
+ // mark first & last with free flag and size
+ ztBlocks[index].free = TRUE;
+ ztBlocks[index].size = count;
+ ztBlocks[index].prev = w;
+ ztBlocks[index].next = p;
+ if (count > 1) {
+ index += (count - 1);
+ // fault in last
+ ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+ ztBlocks[index].free = TRUE;
+ ztBlocks[index].size = count;
+ }
+}
+
+static uint32_t
+ztAlloc(zone_t zone, uint32_t count)
+{
+ uint32_t q, w, p, leftover;
+
+ assert(count);
+
+ q = ztLog2up(count);
+ if (q > ztFreeIndexMax) {
+ q = ztFreeIndexMax;
+ }
+ do{
+ w = q;
+ while (TRUE) {
+ p = ztBlocks[w].next;
+ if (p == q) {
+ break;
+ }
+ if (ztBlocks[p].size >= count) {
+ // dequeue, mark both ends allocated
+ ztBlocks[w].next = ztBlocks[p].next;
+ ztBlocks[ztBlocks[p].next].prev = w;
+ ztBlocks[p].free = FALSE;
+ ztBlocksFree -= ztBlocks[p].size;
+ if (ztBlocks[p].size > 1) {
+ ztBlocks[p + ztBlocks[p].size - 1].free = FALSE;
+ }
+
+ // fault all the allocation
+ ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0);
+ // mark last as allocated
+ if (count > 1) {
+ ztBlocks[p + count - 1].free = FALSE;
+ }
+ // free remainder
+ leftover = ztBlocks[p].size - count;
+ if (leftover) {
+ ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
+ }
+
+ return p;
+ }
+ w = p;
+ }
+ q++;
+ }while (q <= ztFreeIndexMax);
+
+ return -1U;
+}
+
+static void
+ztInit(vm_size_t max_zonemap_size, lck_grp_t * group)
+{
+ kern_return_t ret;
+ vm_map_kernel_flags_t vmk_flags;
+ uint32_t idx;
+
+ lck_mtx_init(&ztLock, group, LCK_ATTR_NULL);
+
+ // allocate submaps VM_KERN_MEMORY_DIAG
+
+ zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
+ FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+ &zone_tagbase_map);
+
+ if (ret != KERN_SUCCESS) {
+ panic("zone_init: kmem_suballoc failed");
+ }
+ zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
+
+ zone_tags_map_size = 2048 * 1024 * sizeof(vm_tag_t);
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
+ FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+ &zone_tags_map);
+
+ if (ret != KERN_SUCCESS) {
+ panic("zone_init: kmem_suballoc failed");
+ }
+ zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
+
+ ztBlocks = (ztBlock *) zone_tags_min;
+ ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
+
+ // initialize the qheads
+ lck_mtx_lock(&ztLock);
+
+ ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0);
+ for (idx = 0; idx < ztFreeIndexCount; idx++) {
+ ztBlocks[idx].free = TRUE;
+ ztBlocks[idx].next = idx;
+ ztBlocks[idx].prev = idx;
+ ztBlocks[idx].size = 0;
+ }
+ // free remaining space
+ ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
+
+ lck_mtx_unlock(&ztLock);
+}
+
+static void
+ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+ uint32_t * tagbase;
+ uint32_t count, block, blocks, idx;
+ size_t pages;
+
+ pages = atop(size);
+ tagbase = ZTAGBASE(zone, mem);
+
+ lck_mtx_lock(&ztLock);
+
+ // fault tagbase
+ ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0);
+
+ if (!zone->tags_inline) {
+ // allocate tags
+ count = (uint32_t)(size / zone->elem_size);
+ blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+ block = ztAlloc(zone, blocks);
+ if (-1U == block) {
+ ztDump(false);
+ }
+ assert(-1U != block);
+ }
+
+ lck_mtx_unlock(&ztLock);
+
+ if (!zone->tags_inline) {
+ // set tag base for each page
+ block *= ztTagsPerBlock;
+ for (idx = 0; idx < pages; idx++) {
+ tagbase[idx] = block + (uint32_t)((ptoa(idx) + (zone->elem_size - 1)) / zone->elem_size);
+ }
+ }
+}
+
+static void
+ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+ uint32_t * tagbase;
+ uint32_t count, block, blocks, idx;
+ size_t pages;
+
+ // set tag base for each page
+ pages = atop(size);
+ tagbase = ZTAGBASE(zone, mem);
+ block = tagbase[0];
+ for (idx = 0; idx < pages; idx++) {
+ tagbase[idx] = 0xFFFFFFFF;
+ }
+
+ lck_mtx_lock(&ztLock);
+ if (!zone->tags_inline) {
+ count = (uint32_t)(size / zone->elem_size);
+ blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+ assert(block != 0xFFFFFFFF);
+ block /= ztTagsPerBlock;
+ ztFree(NULL /* zone is unlocked */, block, blocks);
+ }
+
+ lck_mtx_unlock(&ztLock);
+}
+
+uint32_t
+zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
+{
+ zone_t z;
+ uint32_t idx;
+
+ simple_lock(&all_zones_lock, &zone_locks_grp);
+
+ for (idx = 0; idx < num_zones; idx++) {
+ z = &(zone_array[idx]);
+ if (!z->tags) {
+ continue;
+ }
+ if (tag_zone_index != z->tag_zone_index) {
+ continue;
+ }
+ *elem_size = z->elem_size;
+ break;
+ }
+
+ simple_unlock(&all_zones_lock);
+
+ if (idx == num_zones) {
+ idx = -1U;
+ }
+
+ return idx;
+}
+
+#endif /* VM_MAX_TAG_ZONES */
+
+/* Routine to get the size of a zone allocated address. If the address doesnt belong to the
+ * zone_map, returns 0.
+ */
+vm_size_t
+zone_element_size(void *addr, zone_t *z)
+{
+ struct zone *src_zone;
+ if (from_zone_map(addr, sizeof(void *))) {
+ struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
+ src_zone = PAGE_METADATA_GET_ZONE(page_meta);
+ if (z) {
+ *z = src_zone;
+ }
+ return src_zone->elem_size;
+ } else {
+#if CONFIG_GZALLOC
+ vm_size_t gzsize;
+ if (gzalloc_element_size(addr, z, &gzsize)) {
+ return gzsize;
+ }
+#endif /* CONFIG_GZALLOC */
+
+ return 0;
+ }
+}
+
+#if DEBUG || DEVELOPMENT
+
+vm_size_t
+zone_element_info(void *addr, vm_tag_t * ptag)
+{
+ vm_size_t size = 0;
+ vm_tag_t tag = VM_KERN_MEMORY_NONE;
+ struct zone * src_zone;
+
+ if (from_zone_map(addr, sizeof(void *))) {
+ struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
+ src_zone = PAGE_METADATA_GET_ZONE(page_meta);
+#if VM_MAX_TAG_ZONES
+ if (__improbable(src_zone->tags)) {
+ tag = (ZTAG(src_zone, (vm_offset_t) addr)[0] >> 1);
+ }
+#endif /* VM_MAX_TAG_ZONES */
+ size = src_zone->elem_size;
+ } else {
+#if CONFIG_GZALLOC
+ gzalloc_element_size(addr, NULL, &size);
+#endif /* CONFIG_GZALLOC */
+ }
+ *ptag = tag;
+ return size;
+}
+
+#endif /* DEBUG || DEVELOPMENT */
+
+/*
+ * Zone checking helper function.
+ * A pointer that satisfies these conditions is OK to be a freelist next pointer
+ * A pointer that doesn't satisfy these conditions indicates corruption
+ */
+static inline boolean_t
+is_sane_zone_ptr(zone_t zone,
+ vm_offset_t addr,
+ size_t obj_size)
+{
+ /* Must be aligned to pointer boundary */
+ if (__improbable((addr & (sizeof(vm_offset_t) - 1)) != 0)) {
+ return FALSE;
+ }
+
+ /* Must be a kernel address */
+ if (__improbable(!pmap_kernel_va(addr))) {
+ return FALSE;
+ }
+
+ /* Must be from zone map if the zone only uses memory from the zone_map */
+ /*
+ * TODO: Remove the zone->collectable check when every
+ * zone using foreign memory is properly tagged with allows_foreign
+ */
+ if (zone->collectable && !zone->allows_foreign) {
+ /* check if addr is from zone map */
+ if (addr >= zone_map_min_address &&
+ (addr + obj_size - 1) < zone_map_max_address) {
+ return TRUE;
+ }
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static inline boolean_t
+is_sane_zone_page_metadata(zone_t zone,
+ vm_offset_t page_meta)
+{
+ /* NULL page metadata structures are invalid */
+ if (page_meta == 0) {
+ return FALSE;
+ }
+ return is_sane_zone_ptr(zone, page_meta, sizeof(struct zone_page_metadata));
+}
+
+static inline boolean_t
+is_sane_zone_element(zone_t zone,
+ vm_offset_t addr)
+{
+ /* NULL is OK because it indicates the tail of the list */
+ if (addr == 0) {
+ return TRUE;
+ }
+ return is_sane_zone_ptr(zone, addr, zone->elem_size);
+}
+
+/* Someone wrote to freed memory. */
+__dead2
+static inline void
+zone_element_was_modified_panic(zone_t zone,
+ vm_offset_t element,
+ vm_offset_t found,
+ vm_offset_t expected,
+ vm_offset_t offset)
+{
+ panic("a freed zone element has been modified in zone %s: expected %p but found %p, bits changed %p, at offset %d of %d in element %p, cookies %p %p",
+ zone->zone_name,
+ (void *) expected,
+ (void *) found,
+ (void *) (expected ^ found),
+ (uint32_t) offset,
+ (uint32_t) zone->elem_size,
+ (void *) element,
+ (void *) zp_nopoison_cookie,
+ (void *) zp_poisoned_cookie);
+}
+
+/*
+ * The primary and backup pointers don't match.
+ * Determine which one was likely the corrupted pointer, find out what it
+ * probably should have been, and panic.
+ */
+__dead2
+static void
+backup_ptr_mismatch_panic(zone_t zone,
+ vm_offset_t element,
+ vm_offset_t primary,
+ vm_offset_t backup)
+{
+ vm_offset_t likely_backup;
+ vm_offset_t likely_primary;
+
+ likely_primary = primary ^ zp_nopoison_cookie;
+ boolean_t sane_backup;
+ boolean_t sane_primary = is_sane_zone_element(zone, likely_primary);
+ boolean_t element_was_poisoned = (backup & 0x1) ? TRUE : FALSE;
+
+#if defined(__LP64__)
+ /* We can inspect the tag in the upper bits for additional confirmation */
+ if ((backup & 0xFFFFFF0000000000) == 0xFACADE0000000000) {
+ element_was_poisoned = TRUE;
+ } else if ((backup & 0xFFFFFF0000000000) == 0xC0FFEE0000000000) {
+ element_was_poisoned = FALSE;
+ }
+#endif
+
+ if (element_was_poisoned) {
+ likely_backup = backup ^ zp_poisoned_cookie;
+ sane_backup = is_sane_zone_element(zone, likely_backup);
+ } else {
+ likely_backup = backup ^ zp_nopoison_cookie;
+ sane_backup = is_sane_zone_element(zone, likely_backup);
+ }
+
+ /* The primary is definitely the corrupted one */
+ if (!sane_primary && sane_backup) {
+ zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+ }
+
+ /* The backup is definitely the corrupted one */
+ if (sane_primary && !sane_backup) {
+ zone_element_was_modified_panic(zone, element, backup,
+ (likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
+ zone->elem_size - sizeof(vm_offset_t));
+ }
+
+ /*
+ * Not sure which is the corrupted one.
+ * It's less likely that the backup pointer was overwritten with
+ * ( (sane address) ^ (valid cookie) ), so we'll guess that the
+ * primary pointer has been overwritten with a sane but incorrect address.
+ */
+ if (sane_primary && sane_backup) {
+ zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+ }
+
+ /* Neither are sane, so just guess. */
+ zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+}
+
+/*
+ * Adds the element to the head of the zone's free list
+ * Keeps a backup next-pointer at the end of the element
+ */
+static inline void
+free_to_zone(zone_t zone,
+ vm_offset_t element,
+ boolean_t poison)
+{
+ vm_offset_t old_head;
+ struct zone_page_metadata *page_meta;
+
+ vm_offset_t *primary = (vm_offset_t *) element;
+ vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
+
+ page_meta = get_zone_page_metadata((struct zone_free_element *)element, FALSE);
+ assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
+ old_head = (vm_offset_t)page_metadata_get_freelist(page_meta);
+
+ if (__improbable(!is_sane_zone_element(zone, old_head))) {
+ panic("zfree: invalid head pointer %p for freelist of zone %s\n",
+ (void *) old_head, zone->zone_name);
+ }
+
+ if (__improbable(!is_sane_zone_element(zone, element))) {
+ panic("zfree: freeing invalid pointer %p to zone %s\n",
+ (void *) element, zone->zone_name);
+ }
+
+ if (__improbable(old_head == element)) {
+ panic("zfree: double free of %p to zone %s\n",
+ (void *) element, zone->zone_name);
+ }
+ /*
+ * Always write a redundant next pointer
+ * So that it is more difficult to forge, xor it with a random cookie
+ * A poisoned element is indicated by using zp_poisoned_cookie
+ * instead of zp_nopoison_cookie
+ */
+
+ *backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie);
+
+ /*
+ * Insert this element at the head of the free list. We also xor the
+ * primary pointer with the zp_nopoison_cookie to make sure a free
+ * element does not provide the location of the next free element directly.
+ */
+ *primary = old_head ^ zp_nopoison_cookie;
+ page_metadata_set_freelist(page_meta, (struct zone_free_element *)element);
+ page_meta->free_count++;
+ if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) {
+ if (page_meta->free_count == 1) {
+ /* first foreign element freed on page, move from all_used */
+ re_queue_tail(&zone->pages.any_free_foreign, &(page_meta->pages));
+ } else {
+ /* no other list transitions */
+ }
+ } else if (page_meta->free_count == get_metadata_alloc_count(page_meta)) {
+ /* whether the page was on the intermediate or all_used, queue, move it to free */
+ re_queue_tail(&zone->pages.all_free, &(page_meta->pages));
+ zone->count_all_free_pages += page_meta->page_count;
+ } else if (page_meta->free_count == 1) {
+ /* first free element on page, move from all_used */
+ re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
+ }
+ zone->count--;
+ zone->countfree++;
+
+#if KASAN_ZALLOC
+ kasan_poison_range(element, zone->elem_size, ASAN_HEAP_FREED);
+#endif
+}
+
+
+/*
+ * Removes an element from the zone's free list, returning 0 if the free list is empty.
+ * Verifies that the next-pointer and backup next-pointer are intact,
+ * and verifies that a poisoned element hasn't been modified.
+ */
+static inline vm_offset_t
+try_alloc_from_zone(zone_t zone,
+ vm_tag_t tag __unused,
+ boolean_t* check_poison)
+{
+ vm_offset_t element;
+ struct zone_page_metadata *page_meta;
+
+ *check_poison = FALSE;
+
+ /* if zone is empty, bail */
+ if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign)) {
+ page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
+ } else if (!queue_empty(&zone->pages.intermediate)) {
+ page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
+ } else if (!queue_empty(&zone->pages.all_free)) {
+ page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
+ assert(zone->count_all_free_pages >= page_meta->page_count);
+ zone->count_all_free_pages -= page_meta->page_count;
+ } else {
+ return 0;
+ }
+ /* Check if page_meta passes is_sane_zone_element */
+ if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta))) {
+ panic("zalloc: invalid metadata structure %p for freelist of zone %s\n",
+ (void *) page_meta, zone->zone_name);
+ }
+ assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
+ element = (vm_offset_t)page_metadata_get_freelist(page_meta);
+
+ if (__improbable(!is_sane_zone_ptr(zone, element, zone->elem_size))) {
+ panic("zfree: invalid head pointer %p for freelist of zone %s\n",
+ (void *) element, zone->zone_name);
+ }
+
+ vm_offset_t *primary = (vm_offset_t *) element;
+ vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
+
+ /*
+ * Since the primary next pointer is xor'ed with zp_nopoison_cookie
+ * for obfuscation, retrieve the original value back
+ */
+ vm_offset_t next_element = *primary ^ zp_nopoison_cookie;
+ vm_offset_t next_element_primary = *primary;
+ vm_offset_t next_element_backup = *backup;
+
+ /*
+ * backup_ptr_mismatch_panic will determine what next_element
+ * should have been, and print it appropriately
+ */
+ if (__improbable(!is_sane_zone_element(zone, next_element))) {
+ backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
+ }
+
+ /* Check the backup pointer for the regular cookie */
+ if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) {
+ /* Check for the poisoned cookie instead */
+ if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie))) {
+ /* Neither cookie is valid, corruption has occurred */
+ backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
+ }
+
+ /*
+ * Element was marked as poisoned, so check its integrity before using it.
+ */
+ *check_poison = TRUE;
+ }
+
+ /* Make sure the page_meta is at the correct offset from the start of page */
+ if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element, FALSE))) {
+ panic("zalloc: Incorrect metadata %p found in zone %s page queue. Expected metadata: %p\n",
+ page_meta, zone->zone_name, get_zone_page_metadata((struct zone_free_element *)element, FALSE));
+ }
+
+ /* Make sure next_element belongs to the same page as page_meta */
+ if (next_element) {
+ if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element, FALSE))) {
+ panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n",
+ (void *)next_element, (void *)element, zone->zone_name);
+ }
+ }
+
+ /* Remove this element from the free list */
+ page_metadata_set_freelist(page_meta, (struct zone_free_element *)next_element);
+ page_meta->free_count--;
+
+ if (page_meta->free_count == 0) {
+ /* move to all used */
+ re_queue_tail(&zone->pages.all_used, &(page_meta->pages));
+ } else {
+ if (!zone->allows_foreign || from_zone_map(element, zone->elem_size)) {
+ if (get_metadata_alloc_count(page_meta) == page_meta->free_count + 1) {
+ /* remove from free, move to intermediate */
+ re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
+ }
+ }
+ }
+ zone->countfree--;
+ zone->count++;
+ zone->sum_count++;
+
+#if VM_MAX_TAG_ZONES
+ if (__improbable(zone->tags)) {
+ // set the tag with b0 clear so the block remains inuse
+ ZTAG(zone, element)[0] = (tag << 1);
+ }
+#endif /* VM_MAX_TAG_ZONES */
+
+
+#if KASAN_ZALLOC
+ kasan_poison_range(element, zone->elem_size, ASAN_VALID);
+#endif
+
+ return element;
+}
+
+/*
+ * End of zone poisoning
+ */
+
+/*
+ * Zone info options
+ */
+#define ZINFO_SLOTS MAX_ZONES /* for now */
+
+zone_t zone_find_largest(void);
+
+/*
+ * Async allocation of zones
+ * This mechanism allows for bootstrapping an empty zone which is setup with
+ * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call
+ * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free.
+ * This will prime the zone for the next use.
+ *
+ * Currently the thread_callout function (zalloc_async) will loop through all zones
+ * looking for any zone with async_pending set and do the work for it.
+ *
+ * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call,
+ * then zalloc_noblock to an empty zone may succeed.
+ */
+void zalloc_async(
+ thread_call_param_t p0,
+ thread_call_param_t p1);
+
+static thread_call_data_t call_async_alloc;
+
+/*
+ * Align elements that use the zone page list to 32 byte boundaries.
+ */
+#define ZONE_ELEMENT_ALIGNMENT 32
+
+#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
+#define zone_sleep(zone) \
+ (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN_ALWAYS, (event_t)(zone), THREAD_UNINT);
+
+
+#define lock_zone_init(zone) \
+MACRO_BEGIN \
+ lck_attr_setdefault(&(zone)->lock_attr); \
+ lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
+ &zone_locks_grp, &(zone)->lock_attr); \
+MACRO_END
+
+#define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock)
+
+/*
+ * Exclude more than one concurrent garbage collection
+ */
+decl_lck_mtx_data(, zone_gc_lock);
+
+lck_attr_t zone_gc_lck_attr;
+lck_grp_t zone_gc_lck_grp;
+lck_grp_attr_t zone_gc_lck_grp_attr;
+lck_mtx_ext_t zone_gc_lck_ext;
+
+boolean_t zone_gc_allowed = TRUE;
+boolean_t panic_include_zprint = FALSE;
+
+mach_memory_info_t *panic_kext_memory_info = NULL;
+vm_size_t panic_kext_memory_size = 0;
+
+#define ZALLOC_DEBUG_ZONEGC 0x00000001
+#define ZALLOC_DEBUG_ZCRAM 0x00000002
+
+#if DEBUG || DEVELOPMENT
+static uint32_t zalloc_debug = 0;
+#endif
+
+/*
+ * Zone leak debugging code
+ *
+ * When enabled, this code keeps a log to track allocations to a particular zone that have not
+ * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
+ * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
+ * off by default.
+ *
+ * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
+ * is the name of the zone you wish to log.
+ *
+ * This code only tracks one zone, so you need to identify which one is leaking first.
+ * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
+ * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
+ * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
+ * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
+ * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
+ * See the help in the kgmacros for usage info.
+ *
+ *
+ * Zone corruption logging
+ *
+ * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
+ * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
+ * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
+ * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
+ * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
+ * corrupted to examine its history. This should lead to the source of the corruption.
+ */
+
+static boolean_t log_records_init = FALSE;
+static int log_records; /* size of the log, expressed in number of records */
+
+#define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
+
+static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
+static int num_zones_logged = 0;
+
+static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
+
+/* Log allocations and frees to help debug a zone element corruption */
+boolean_t corruption_debug_flag = DEBUG; /* enabled by "-zc" boot-arg */
+/* Making pointer scanning leaks detection possible for all zones */
+
+#if DEBUG || DEVELOPMENT
+boolean_t leak_scan_debug_flag = FALSE; /* enabled by "-zl" boot-arg */
+#endif /* DEBUG || DEVELOPMENT */
+
+
+/*
+ * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
+ * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
+ * is the number of stacks suspected of leaking, we don't need many records.
+ */
+
+#if defined(__LP64__)
+#define ZRECORDS_MAX 2560 /* Max records allowed in the log */
+#else
+#define ZRECORDS_MAX 1536 /* Max records allowed in the log */
+#endif
+#define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
+
+/*
+ * Each record in the log contains a pointer to the zone element it refers to,
+ * and a small array to hold the pc's from the stack trace. A
+ * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
+ * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
+ * If the log fills, old records are replaced as if it were a circular buffer.
+ */
+
+
+/*
+ * Decide if we want to log this zone by doing a string compare between a zone name and the name
+ * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
+ * possible to include spaces in strings passed in via the boot-args, a period in the logname will
+ * match a space in the zone name.
+ */
+
+int
+track_this_zone(const char *zonename, const char *logname)
+{
+ unsigned int len;
+ const char *zc = zonename;
+ const char *lc = logname;
+
+ /*
+ * Compare the strings. We bound the compare by MAX_ZONE_NAME.
+ */
+
+ for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
+ /*
+ * If the current characters don't match, check for a space in
+ * in the zone name and a corresponding period in the log name.
+ * If that's not there, then the strings don't match.
+ */
+
+ if (*zc != *lc && !(*zc == ' ' && *lc == '.')) {
+ break;
+ }
+
+ /*
+ * The strings are equal so far. If we're at the end, then it's a match.
+ */
+
+ if (*zc == '\0') {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
+ * the buffer for the records has been allocated.
+ */
+
+#define DO_LOGGING(z) (z->zone_logging == TRUE && z->zlog_btlog)
+
+extern boolean_t kmem_alloc_ready;
+
+#if CONFIG_ZLEAKS
+#pragma mark -
+#pragma mark Zone Leak Detection
+
+/*
+ * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
+ * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
+ * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
+ * and stop tracking it if it was being tracked.
+ *
+ * We track the allocations in the zallocations hash table, which stores the address that was returned from
+ * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
+ * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
+ * backtraces - we don't store them more than once.
+ *
+ * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
+ * a large amount of virtual space.
+ */
+#define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
+#define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
+#define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
+#define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
+uint32_t zleak_state = 0; /* State of collection, as above */
+
+boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */
+vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */
+vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */
+unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */
+
+/*
+ * Counters for allocation statistics.
+ */
+
+/* Times two active records want to occupy the same spot */
+unsigned int z_alloc_collisions = 0;
+unsigned int z_trace_collisions = 0;
+
+/* Times a new record lands on a spot previously occupied by a freed allocation */
+unsigned int z_alloc_overwrites = 0;
+unsigned int z_trace_overwrites = 0;
+
+/* Times a new alloc or trace is put into the hash table */
+unsigned int z_alloc_recorded = 0;
+unsigned int z_trace_recorded = 0;
+
+/* Times zleak_log returned false due to not being able to acquire the lock */
+unsigned int z_total_conflicts = 0;
+
+
+#pragma mark struct zallocation
+/*
+ * Structure for keeping track of an allocation
+ * An allocation bucket is in use if its element is not NULL
+ */
+struct zallocation {
+ uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
+ vm_size_t za_size; /* how much memory did this allocation take up? */
+ uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */
+ /* TODO: #if this out */
+ uint32_t za_hit_count; /* for determining effectiveness of hash function */
+};
+
+/* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
+uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
+uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
+
+vm_size_t zleak_max_zonemap_size;
+
+/* Hashmaps of allocations and their corresponding traces */
+static struct zallocation* zallocations;
+static struct ztrace* ztraces;
+
+/* not static so that panic can see this, see kern/debug.c */
+struct ztrace* top_ztrace;
+
+/* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
+static lck_spin_t zleak_lock;
+static lck_attr_t zleak_lock_attr;
+static lck_grp_t zleak_lock_grp;
+static lck_grp_attr_t zleak_lock_grp_attr;
+
+/*
+ * Initializes the zone leak monitor. Called from zone_init()
+ */
+static void
+zleak_init(vm_size_t max_zonemap_size)
+{
+ char scratch_buf[16];
+ boolean_t zleak_enable_flag = FALSE;