+ if (++new_count >= factor) {
+ rolled_over = TRUE;
+ new_count = 0;
+ } else {
+ rolled_over = FALSE;
+ }
+
+ } while (!OSCompareAndSwap(old_count, new_count, count_p));
+
+ return rolled_over;
+}
+
+#if defined(__LP64__)
+#define ZP_POISON 0xdeadbeefdeadbeef
+#else
+#define ZP_POISON 0xdeadbeef
+#endif
+
+#define ZP_DEFAULT_SAMPLING_FACTOR 16
+#define ZP_DEFAULT_SCALE_FACTOR 4
+
+/*
+ * A zp_factor of 0 indicates zone poisoning is disabled,
+ * however, we still poison zones smaller than zp_tiny_zone_limit (a cacheline).
+ * Passing the -no-zp boot-arg disables even this behavior.
+ * In all cases, we record and check the integrity of a backup pointer.
+ */
+
+/* set by zp-factor=N boot arg, zero indicates non-tiny poisoning disabled */
+uint32_t zp_factor = 0;
+
+/* set by zp-scale=N boot arg, scales zp_factor by zone size */
+uint32_t zp_scale = 0;
+
+/* set in zp_init, zero indicates -no-zp boot-arg */
+vm_size_t zp_tiny_zone_limit = 0;
+
+/* initialized to a per-boot random value in zp_init */
+uintptr_t zp_poisoned_cookie = 0;
+uintptr_t zp_nopoison_cookie = 0;
+
+#if VM_MAX_TAG_ZONES
+boolean_t zone_tagging_on;
+#endif /* VM_MAX_TAG_ZONES */
+
+/*
+ * initialize zone poisoning
+ * called from zone_bootstrap before any allocations are made from zalloc
+ */
+static inline void
+zp_init(void)
+{
+ char temp_buf[16];
+
+ /*
+ * Initialize backup pointer random cookie for poisoned elements
+ * Try not to call early_random() back to back, it may return
+ * the same value if mach_absolute_time doesn't have sufficient time
+ * to tick over between calls. <rdar://problem/11597395>
+ * (This is only a problem on embedded devices)
+ */
+ zp_poisoned_cookie = (uintptr_t) early_random();
+
+ /*
+ * Always poison zones smaller than a cacheline,
+ * because it's pretty close to free
+ */
+ ml_cpu_info_t cpu_info;
+ ml_cpu_get_info(&cpu_info);
+ zp_tiny_zone_limit = (vm_size_t) cpu_info.cache_line_size;
+
+ zp_factor = ZP_DEFAULT_SAMPLING_FACTOR;
+ zp_scale = ZP_DEFAULT_SCALE_FACTOR;
+
+ //TODO: Bigger permutation?
+ /*
+ * Permute the default factor +/- 1 to make it less predictable
+ * This adds or subtracts ~4 poisoned objects per 1000 frees.
+ */
+ if (zp_factor != 0) {
+ uint32_t rand_bits = early_random() & 0x3;
+
+ if (rand_bits == 0x1)
+ zp_factor += 1;
+ else if (rand_bits == 0x2)
+ zp_factor -= 1;
+ /* if 0x0 or 0x3, leave it alone */
+ }
+
+ /* -zp: enable poisoning for every alloc and free */
+ if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) {
+ zp_factor = 1;
+ }
+
+ /* -no-zp: disable poisoning completely even for tiny zones */
+ if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) {
+ zp_factor = 0;
+ zp_tiny_zone_limit = 0;
+ printf("Zone poisoning disabled\n");
+ }
+
+ /* zp-factor=XXXX: override how often to poison freed zone elements */
+ if (PE_parse_boot_argn("zp-factor", &zp_factor, sizeof(zp_factor))) {
+ printf("Zone poisoning factor override: %u\n", zp_factor);
+ }
+
+ /* zp-scale=XXXX: override how much zone size scales zp-factor by */
+ if (PE_parse_boot_argn("zp-scale", &zp_scale, sizeof(zp_scale))) {
+ printf("Zone poisoning scale factor override: %u\n", zp_scale);
+ }
+
+ /* Initialize backup pointer random cookie for unpoisoned elements */
+ zp_nopoison_cookie = (uintptr_t) early_random();
+
+#if MACH_ASSERT
+ if (zp_poisoned_cookie == zp_nopoison_cookie)
+ panic("early_random() is broken: %p and %p are not random\n",
+ (void *) zp_poisoned_cookie, (void *) zp_nopoison_cookie);
+#endif
+
+ /*
+ * Use the last bit in the backup pointer to hint poisoning state
+ * to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so
+ * the low bits are zero.
+ */
+ zp_poisoned_cookie |= (uintptr_t)0x1ULL;
+ zp_nopoison_cookie &= ~((uintptr_t)0x1ULL);
+
+#if defined(__LP64__)
+ /*
+ * Make backup pointers more obvious in GDB for 64 bit
+ * by making OxFFFFFF... ^ cookie = 0xFACADE...
+ * (0xFACADE = 0xFFFFFF ^ 0x053521)
+ * (0xC0FFEE = 0xFFFFFF ^ 0x3f0011)
+ * The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked
+ * by the sanity check, so it's OK for that part of the cookie to be predictable.
+ *
+ * TODO: Use #defines, xors, and shifts
+ */
+
+ zp_poisoned_cookie &= 0x000000FFFFFFFFFF;
+ zp_poisoned_cookie |= 0x0535210000000000; /* 0xFACADE */
+
+ zp_nopoison_cookie &= 0x000000FFFFFFFFFF;
+ zp_nopoison_cookie |= 0x3f00110000000000; /* 0xC0FFEE */
+#endif
+}
+
+/*
+ * These macros are used to keep track of the number
+ * of pages being used by the zone currently. The
+ * z->page_count is not protected by the zone lock.
+ */
+#define ZONE_PAGE_COUNT_INCR(z, count) \
+{ \
+ OSAddAtomic64(count, &(z->page_count)); \
+}
+
+#define ZONE_PAGE_COUNT_DECR(z, count) \
+{ \
+ OSAddAtomic64(-count, &(z->page_count)); \
+}
+
+vm_map_t zone_map = VM_MAP_NULL;
+
+/* for is_sane_zone_element and garbage collection */
+
+vm_offset_t zone_map_min_address = 0; /* initialized in zone_init */
+vm_offset_t zone_map_max_address = 0;
+
+/* Globals for random boolean generator for elements in free list */
+#define MAX_ENTROPY_PER_ZCRAM 4
+#define RANDOM_BOOL_GEN_SEED_COUNT 4
+static unsigned int bool_gen_seed[RANDOM_BOOL_GEN_SEED_COUNT];
+static unsigned int bool_gen_global = 0;
+decl_simple_lock_data(, bool_gen_lock)
+
+/* VM region for all metadata structures */
+vm_offset_t zone_metadata_region_min = 0;
+vm_offset_t zone_metadata_region_max = 0;
+decl_lck_mtx_data(static ,zone_metadata_region_lck)
+lck_attr_t zone_metadata_lock_attr;
+lck_mtx_ext_t zone_metadata_region_lck_ext;
+
+/* Helpful for walking through a zone's free element list. */
+struct zone_free_element {
+ struct zone_free_element *next;
+ /* ... */
+ /* void *backup_ptr; */
+};
+
+/*
+ * Protects zone_array, num_zones, num_zones_in_use, and zone_empty_bitmap
+ */
+decl_simple_lock_data(, all_zones_lock)
+unsigned int num_zones_in_use;
+unsigned int num_zones;
+
+#define MAX_ZONES 288
+struct zone zone_array[MAX_ZONES];
+
+/* Used to keep track of empty slots in the zone_array */
+bitmap_t zone_empty_bitmap[BITMAP_LEN(MAX_ZONES)];
+
+#if DEBUG || DEVELOPMENT
+/*
+ * Used for sysctl kern.run_zone_test which is not thread-safe. Ensure only one thread goes through at a time.
+ * Or we can end up with multiple test zones (if a second zinit() comes through before zdestroy()), which could lead us to
+ * run out of zones.
+ */
+decl_simple_lock_data(, zone_test_lock)
+static boolean_t zone_test_running = FALSE;
+static zone_t test_zone_ptr = NULL;
+#endif /* DEBUG || DEVELOPMENT */
+
+#define PAGE_METADATA_GET_ZINDEX(page_meta) \
+ (page_meta->zindex)
+
+#define PAGE_METADATA_GET_ZONE(page_meta) \
+ (&(zone_array[page_meta->zindex]))
+
+#define PAGE_METADATA_SET_ZINDEX(page_meta, index) \
+ page_meta->zindex = (index);
+
+struct zone_page_metadata {
+ queue_chain_t pages; /* linkage pointer for metadata lists */
+
+ /* Union for maintaining start of element free list and real metadata (for multipage allocations) */
+ union {
+ /*
+ * The start of the freelist can be maintained as a 32-bit offset instead of a pointer because
+ * the free elements would be at max ZONE_MAX_ALLOC_SIZE bytes away from the metadata. Offset
+ * from start of the allocation chunk to free element list head.
+ */
+ uint32_t freelist_offset;
+ /*
+ * This field is used to lookup the real metadata for multipage allocations, where we mark the
+ * metadata for all pages except the first as "fake" metadata using MULTIPAGE_METADATA_MAGIC.
+ * Offset from this fake metadata to real metadata of allocation chunk (-ve offset).
+ */
+ uint32_t real_metadata_offset;
+ };
+
+ /*
+ * For the first page in the allocation chunk, this represents the total number of free elements in
+ * the chunk.
+ */
+ uint16_t free_count;
+ unsigned zindex : ZINDEX_BITS; /* Zone index within the zone_array */
+ unsigned page_count : PAGECOUNT_BITS; /* Count of pages within the allocation chunk */
+};
+
+/* Macro to get page index (within zone_map) of page containing element */
+#define PAGE_INDEX_FOR_ELEMENT(element) \
+ (((vm_offset_t)trunc_page(element) - zone_map_min_address) / PAGE_SIZE)
+
+/* Macro to get metadata structure given a page index in zone_map */
+#define PAGE_METADATA_FOR_PAGE_INDEX(index) \
+ (zone_metadata_region_min + ((index) * sizeof(struct zone_page_metadata)))
+
+/* Macro to get index (within zone_map) for given metadata */
+#define PAGE_INDEX_FOR_METADATA(page_meta) \
+ (((vm_offset_t)page_meta - zone_metadata_region_min) / sizeof(struct zone_page_metadata))
+
+/* Macro to get page for given page index in zone_map */
+#define PAGE_FOR_PAGE_INDEX(index) \
+ (zone_map_min_address + (PAGE_SIZE * (index)))
+
+/* Macro to get the actual metadata for a given address */
+#define PAGE_METADATA_FOR_ELEMENT(element) \
+ (struct zone_page_metadata *)(PAGE_METADATA_FOR_PAGE_INDEX(PAGE_INDEX_FOR_ELEMENT(element)))
+
+/* Magic value to indicate empty element free list */
+#define PAGE_METADATA_EMPTY_FREELIST ((uint32_t)(~0))
+
+boolean_t is_zone_map_nearing_exhaustion(void);
+extern void vm_pageout_garbage_collect(int collect);
+
+static inline void *
+page_metadata_get_freelist(struct zone_page_metadata *page_meta)
+{
+ assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
+ if (page_meta->freelist_offset == PAGE_METADATA_EMPTY_FREELIST)
+ return NULL;
+ else {
+ if (from_zone_map(page_meta, sizeof(struct zone_page_metadata)))
+ return (void *)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)) + page_meta->freelist_offset);
+ else
+ return (void *)((vm_offset_t)page_meta + page_meta->freelist_offset);
+ }
+}
+
+static inline void
+page_metadata_set_freelist(struct zone_page_metadata *page_meta, void *addr)
+{
+ assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
+ if (addr == NULL)
+ page_meta->freelist_offset = PAGE_METADATA_EMPTY_FREELIST;
+ else {
+ if (from_zone_map(page_meta, sizeof(struct zone_page_metadata)))
+ page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)));
+ else
+ page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - (vm_offset_t)page_meta);
+ }
+}
+
+static inline struct zone_page_metadata *
+page_metadata_get_realmeta(struct zone_page_metadata *page_meta)
+{
+ assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC);
+ return (struct zone_page_metadata *)((vm_offset_t)page_meta - page_meta->real_metadata_offset);
+}
+
+static inline void
+page_metadata_set_realmeta(struct zone_page_metadata *page_meta, struct zone_page_metadata *real_meta)
+{
+ assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC);
+ assert(PAGE_METADATA_GET_ZINDEX(real_meta) != MULTIPAGE_METADATA_MAGIC);
+ assert((vm_offset_t)page_meta > (vm_offset_t)real_meta);
+ vm_offset_t offset = (vm_offset_t)page_meta - (vm_offset_t)real_meta;
+ assert(offset <= UINT32_MAX);
+ page_meta->real_metadata_offset = (uint32_t)offset;
+}
+
+/* The backup pointer is stored in the last pointer-sized location in an element. */
+static inline vm_offset_t *
+get_backup_ptr(vm_size_t elem_size,
+ vm_offset_t *element)
+{
+ return (vm_offset_t *) ((vm_offset_t)element + elem_size - sizeof(vm_offset_t));
+}
+
+/*
+ * Routine to populate a page backing metadata in the zone_metadata_region.
+ * Must be called without the zone lock held as it might potentially block.
+ */
+static inline void
+zone_populate_metadata_page(struct zone_page_metadata *page_meta)
+{
+ vm_offset_t page_metadata_begin = trunc_page(page_meta);
+ vm_offset_t page_metadata_end = trunc_page((vm_offset_t)page_meta + sizeof(struct zone_page_metadata));
+
+ for(;page_metadata_begin <= page_metadata_end; page_metadata_begin += PAGE_SIZE) {
+ if (pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin))
+ continue;
+ /* All updates to the zone_metadata_region are done under the zone_metadata_region_lck */
+ lck_mtx_lock(&zone_metadata_region_lck);
+ if (0 == pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) {
+ kern_return_t __unused ret = kernel_memory_populate(zone_map,
+ page_metadata_begin,
+ PAGE_SIZE,
+ KMA_KOBJECT,
+ VM_KERN_MEMORY_OSFMK);
+
+ /* should not fail with the given arguments */
+ assert(ret == KERN_SUCCESS);
+ }
+ lck_mtx_unlock(&zone_metadata_region_lck);
+ }
+ return;
+}
+
+static inline uint16_t
+get_metadata_alloc_count(struct zone_page_metadata *page_meta)
+{
+ assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
+ struct zone *z = PAGE_METADATA_GET_ZONE(page_meta);
+ return ((page_meta->page_count * PAGE_SIZE) / z->elem_size);
+}
+
+/*
+ * Routine to lookup metadata for any given address.
+ * If init is marked as TRUE, this should be called without holding the zone lock
+ * since the initialization might block.
+ */
+static inline struct zone_page_metadata *
+get_zone_page_metadata(struct zone_free_element *element, boolean_t init)
+{
+ struct zone_page_metadata *page_meta = 0;
+
+ if (from_zone_map(element, sizeof(struct zone_free_element))) {
+ page_meta = (struct zone_page_metadata *)(PAGE_METADATA_FOR_ELEMENT(element));
+ if (init)
+ zone_populate_metadata_page(page_meta);
+ } else {
+ page_meta = (struct zone_page_metadata *)(trunc_page((vm_offset_t)element));
+ }
+ if (init)
+ __nosan_bzero((char *)page_meta, sizeof(struct zone_page_metadata));
+ return ((PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC) ? page_meta : page_metadata_get_realmeta(page_meta));
+}
+
+/* Routine to get the page for a given metadata */
+static inline vm_offset_t
+get_zone_page(struct zone_page_metadata *page_meta)
+{
+ if (from_zone_map(page_meta, sizeof(struct zone_page_metadata)))
+ return (vm_offset_t)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)));
+ else
+ return (vm_offset_t)(trunc_page(page_meta));
+}
+
+/*
+ * ZTAGS
+ */
+
+#if VM_MAX_TAG_ZONES
+
+// for zones with tagging enabled:
+
+// calculate a pointer to the tag base entry,
+// holding either a uint32_t the first tag offset for a page in the zone map,
+// or two uint16_t tags if the page can only hold one or two elements
+
+#define ZTAGBASE(zone, element) \
+ (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_map_min_address)])
+
+// pointer to the tag for an element
+#define ZTAG(zone, element) \
+ ({ \
+ vm_tag_t * result; \
+ if ((zone)->tags_inline) { \
+ result = (vm_tag_t *) ZTAGBASE((zone), (element)); \
+ if ((page_mask & element) >= (zone)->elem_size) result++; \
+ } else { \
+ result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / (zone)->elem_size]; \
+ } \
+ result; \
+ })
+
+
+static vm_offset_t zone_tagbase_min;
+static vm_offset_t zone_tagbase_max;
+static vm_offset_t zone_tagbase_map_size;
+static vm_map_t zone_tagbase_map;
+
+static vm_offset_t zone_tags_min;
+static vm_offset_t zone_tags_max;
+static vm_offset_t zone_tags_map_size;
+static vm_map_t zone_tags_map;
+
+// simple heap allocator for allocating the tags for new memory
+
+decl_lck_mtx_data(,ztLock) /* heap lock */
+enum
+{
+ ztFreeIndexCount = 8,
+ ztFreeIndexMax = (ztFreeIndexCount - 1),
+ ztTagsPerBlock = 4
+};
+
+struct ztBlock
+{
+#if __LITTLE_ENDIAN__
+ uint64_t free:1,
+ next:21,
+ prev:21,
+ size:21;
+#else
+// ztBlock needs free bit least significant
+#error !__LITTLE_ENDIAN__
+#endif
+};
+typedef struct ztBlock ztBlock;
+
+static ztBlock * ztBlocks;
+static uint32_t ztBlocksCount;
+static uint32_t ztBlocksFree;
+
+static uint32_t
+ztLog2up(uint32_t size)
+{
+ if (1 == size) size = 0;
+ else size = 32 - __builtin_clz(size - 1);
+ return (size);
+}
+
+static uint32_t
+ztLog2down(uint32_t size)
+{
+ size = 31 - __builtin_clz(size);
+ return (size);
+}
+
+static void
+ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
+{
+ vm_map_offset_t addr = (vm_map_offset_t) address;
+ vm_map_offset_t page, end;
+
+ page = trunc_page(addr);
+ end = round_page(addr + size);
+
+ for (; page < end; page += page_size)
+ {
+ if (!pmap_find_phys(kernel_pmap, page))
+ {
+ kern_return_t __unused
+ ret = kernel_memory_populate(map, page, PAGE_SIZE,
+ KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG);
+ assert(ret == KERN_SUCCESS);
+ }
+ }
+}
+
+static boolean_t
+ztPresent(const void * address, size_t size)
+{
+ vm_map_offset_t addr = (vm_map_offset_t) address;
+ vm_map_offset_t page, end;
+ boolean_t result;
+
+ page = trunc_page(addr);
+ end = round_page(addr + size);
+ for (result = TRUE; (page < end); page += page_size)
+ {
+ result = pmap_find_phys(kernel_pmap, page);
+ if (!result) break;
+ }
+ return (result);
+}
+
+
+void __unused
+ztDump(boolean_t sanity);
+void __unused
+ztDump(boolean_t sanity)
+{
+ uint32_t q, cq, p;
+
+ for (q = 0; q <= ztFreeIndexMax; q++)
+ {
+ p = q;
+ do
+ {
+ if (sanity)
+ {
+ cq = ztLog2down(ztBlocks[p].size);
+ if (cq > ztFreeIndexMax) cq = ztFreeIndexMax;
+ if (!ztBlocks[p].free
+ || ((p != q) && (q != cq))
+ || (ztBlocks[ztBlocks[p].next].prev != p)
+ || (ztBlocks[ztBlocks[p].prev].next != p))
+ {
+ kprintf("zterror at %d", p);
+ ztDump(FALSE);
+ kprintf("zterror at %d", p);
+ assert(FALSE);
+ }
+ continue;
+ }
+ kprintf("zt[%03d]%c %d, %d, %d\n",
+ p, ztBlocks[p].free ? 'F' : 'A',
+ ztBlocks[p].next, ztBlocks[p].prev,
+ ztBlocks[p].size);
+ p = ztBlocks[p].next;
+ if (p == q) break;
+ }
+ while (p != q);
+ if (!sanity) printf("\n");
+ }
+ if (!sanity) printf("-----------------------\n");
+}
+
+
+
+#define ZTBDEQ(idx) \
+ ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
+ ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
+
+static void
+ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
+{
+ uint32_t q, w, p, size, merge;
+
+ assert(count);
+ ztBlocksFree += count;
+
+ // merge with preceding
+ merge = (index + count);
+ if ((merge < ztBlocksCount)
+ && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+ && ztBlocks[merge].free)
+ {
+ ZTBDEQ(merge);
+ count += ztBlocks[merge].size;
+ }
+
+ // merge with following
+ merge = (index - 1);
+ if ((merge > ztFreeIndexMax)
+ && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+ && ztBlocks[merge].free)
+ {
+ size = ztBlocks[merge].size;
+ count += size;
+ index -= size;
+ ZTBDEQ(index);
+ }
+
+ q = ztLog2down(count);
+ if (q > ztFreeIndexMax) q = ztFreeIndexMax;
+ w = q;
+ // queue in order of size
+ while (TRUE)
+ {
+ p = ztBlocks[w].next;
+ if (p == q) break;
+ if (ztBlocks[p].size >= count) break;
+ w = p;
+ }
+ ztBlocks[p].prev = index;
+ ztBlocks[w].next = index;
+
+ // fault in first
+ ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+
+ // mark first & last with free flag and size
+ ztBlocks[index].free = TRUE;
+ ztBlocks[index].size = count;
+ ztBlocks[index].prev = w;
+ ztBlocks[index].next = p;
+ if (count > 1)
+ {
+ index += (count - 1);
+ // fault in last
+ ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+ ztBlocks[index].free = TRUE;
+ ztBlocks[index].size = count;
+ }
+}
+
+static uint32_t
+ztAlloc(zone_t zone, uint32_t count)
+{
+ uint32_t q, w, p, leftover;
+
+ assert(count);
+
+ q = ztLog2up(count);
+ if (q > ztFreeIndexMax) q = ztFreeIndexMax;
+ do
+ {
+ w = q;
+ while (TRUE)
+ {
+ p = ztBlocks[w].next;
+ if (p == q) break;
+ if (ztBlocks[p].size >= count)
+ {
+ // dequeue, mark both ends allocated
+ ztBlocks[w].next = ztBlocks[p].next;
+ ztBlocks[ztBlocks[p].next].prev = w;
+ ztBlocks[p].free = FALSE;
+ ztBlocksFree -= ztBlocks[p].size;
+ if (ztBlocks[p].size > 1) ztBlocks[p + ztBlocks[p].size - 1].free = FALSE;
+
+ // fault all the allocation
+ ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0);
+ // mark last as allocated
+ if (count > 1) ztBlocks[p + count - 1].free = FALSE;
+ // free remainder
+ leftover = ztBlocks[p].size - count;
+ if (leftover) ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
+
+ return (p);
+ }
+ w = p;
+ }
+ q++;
+ }
+ while (q <= ztFreeIndexMax);
+
+ return (-1U);
+}
+
+static void
+ztInit(vm_size_t max_zonemap_size, lck_grp_t * group)
+{
+ kern_return_t ret;
+ vm_map_kernel_flags_t vmk_flags;
+ uint32_t idx;
+
+ lck_mtx_init(&ztLock, group, LCK_ATTR_NULL);
+
+ // allocate submaps VM_KERN_MEMORY_DIAG
+
+ zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
+ FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+ &zone_tagbase_map);
+
+ if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed");
+ zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
+
+ zone_tags_map_size = 2048*1024 * sizeof(vm_tag_t);
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
+ FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+ &zone_tags_map);
+
+ if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed");
+ zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
+
+ ztBlocks = (ztBlock *) zone_tags_min;
+ ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
+
+ // initialize the qheads
+ lck_mtx_lock(&ztLock);
+
+ ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0);
+ for (idx = 0; idx < ztFreeIndexCount; idx++)
+ {
+ ztBlocks[idx].free = TRUE;
+ ztBlocks[idx].next = idx;
+ ztBlocks[idx].prev = idx;
+ ztBlocks[idx].size = 0;
+ }
+ // free remaining space
+ ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
+
+ lck_mtx_unlock(&ztLock);
+}
+
+static void
+ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+ uint32_t * tagbase;
+ uint32_t count, block, blocks, idx;
+ size_t pages;
+
+ pages = atop(size);
+ tagbase = ZTAGBASE(zone, mem);
+
+ lck_mtx_lock(&ztLock);
+
+ // fault tagbase
+ ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0);
+
+ if (!zone->tags_inline)
+ {
+ // allocate tags
+ count = (uint32_t)(size / zone->elem_size);
+ blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+ block = ztAlloc(zone, blocks);
+ if (-1U == block) ztDump(false);
+ assert(-1U != block);
+ }
+
+ lck_mtx_unlock(&ztLock);
+
+ if (!zone->tags_inline)
+ {
+ // set tag base for each page
+ block *= ztTagsPerBlock;
+ for (idx = 0; idx < pages; idx++)
+ {
+ tagbase[idx] = block + (uint32_t)((ptoa(idx) + (zone->elem_size - 1)) / zone->elem_size);
+ }
+ }
+}
+
+static void
+ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+ uint32_t * tagbase;
+ uint32_t count, block, blocks, idx;
+ size_t pages;
+
+ // set tag base for each page
+ pages = atop(size);
+ tagbase = ZTAGBASE(zone, mem);
+ block = tagbase[0];
+ for (idx = 0; idx < pages; idx++)
+ {
+ tagbase[idx] = 0xFFFFFFFF;
+ }
+
+ lck_mtx_lock(&ztLock);
+ if (!zone->tags_inline)
+ {
+ count = (uint32_t)(size / zone->elem_size);
+ blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+ assert(block != 0xFFFFFFFF);
+ block /= ztTagsPerBlock;
+ ztFree(NULL /* zone is unlocked */, block, blocks);
+ }
+
+ lck_mtx_unlock(&ztLock);
+}
+
+uint32_t
+zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
+{
+ zone_t z;
+ uint32_t idx;
+
+ simple_lock(&all_zones_lock);
+
+ for (idx = 0; idx < num_zones; idx++)
+ {
+ z = &(zone_array[idx]);
+ if (!z->tags) continue;
+ if (tag_zone_index != z->tag_zone_index) continue;
+ *elem_size = z->elem_size;
+ break;
+ }
+
+ simple_unlock(&all_zones_lock);
+
+ if (idx == num_zones) idx = -1U;
+
+ return (idx);
+}
+
+#endif /* VM_MAX_TAG_ZONES */
+
+/* Routine to get the size of a zone allocated address. If the address doesnt belong to the
+ * zone_map, returns 0.
+ */
+vm_size_t
+zone_element_size(void *addr, zone_t *z)
+{
+ struct zone *src_zone;
+ if (from_zone_map(addr, sizeof(void *))) {
+ struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
+ src_zone = PAGE_METADATA_GET_ZONE(page_meta);
+ if (z) {
+ *z = src_zone;
+ }
+ return (src_zone->elem_size);
+ } else {
+#if CONFIG_GZALLOC
+ vm_size_t gzsize;
+ if (gzalloc_element_size(addr, z, &gzsize)) {
+ return gzsize;
+ }
+#endif /* CONFIG_GZALLOC */
+
+ return 0;
+ }
+}
+
+#if DEBUG || DEVELOPMENT
+
+vm_size_t
+zone_element_info(void *addr, vm_tag_t * ptag)
+{
+ vm_size_t size = 0;
+ vm_tag_t tag = VM_KERN_MEMORY_NONE;
+ struct zone * src_zone;
+
+ if (from_zone_map(addr, sizeof(void *))) {
+ struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE);
+ src_zone = PAGE_METADATA_GET_ZONE(page_meta);
+#if VM_MAX_TAG_ZONES
+ if (__improbable(src_zone->tags)) {
+ tag = (ZTAG(src_zone, (vm_offset_t) addr)[0] >> 1);
+ }
+#endif /* VM_MAX_TAG_ZONES */
+ size = src_zone->elem_size;
+ } else {
+#if CONFIG_GZALLOC
+ gzalloc_element_size(addr, NULL, &size);
+#endif /* CONFIG_GZALLOC */
+ }
+ *ptag = tag;
+ return size;
+}
+
+#endif /* DEBUG || DEVELOPMENT */
+
+/*
+ * Zone checking helper function.
+ * A pointer that satisfies these conditions is OK to be a freelist next pointer
+ * A pointer that doesn't satisfy these conditions indicates corruption
+ */
+static inline boolean_t
+is_sane_zone_ptr(zone_t zone,
+ vm_offset_t addr,
+ size_t obj_size)
+{
+ /* Must be aligned to pointer boundary */
+ if (__improbable((addr & (sizeof(vm_offset_t) - 1)) != 0))
+ return FALSE;
+
+ /* Must be a kernel address */
+ if (__improbable(!pmap_kernel_va(addr)))
+ return FALSE;
+
+ /* Must be from zone map if the zone only uses memory from the zone_map */
+ /*
+ * TODO: Remove the zone->collectable check when every
+ * zone using foreign memory is properly tagged with allows_foreign
+ */
+ if (zone->collectable && !zone->allows_foreign) {
+ /* check if addr is from zone map */
+ if (addr >= zone_map_min_address &&
+ (addr + obj_size - 1) < zone_map_max_address )
+ return TRUE;
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static inline boolean_t
+is_sane_zone_page_metadata(zone_t zone,
+ vm_offset_t page_meta)
+{
+ /* NULL page metadata structures are invalid */
+ if (page_meta == 0)
+ return FALSE;
+ return is_sane_zone_ptr(zone, page_meta, sizeof(struct zone_page_metadata));
+}
+
+static inline boolean_t
+is_sane_zone_element(zone_t zone,
+ vm_offset_t addr)
+{
+ /* NULL is OK because it indicates the tail of the list */
+ if (addr == 0)
+ return TRUE;
+ return is_sane_zone_ptr(zone, addr, zone->elem_size);
+}
+
+/* Someone wrote to freed memory. */
+static inline void /* noreturn */
+zone_element_was_modified_panic(zone_t zone,
+ vm_offset_t element,
+ vm_offset_t found,
+ vm_offset_t expected,
+ vm_offset_t offset)
+{
+ panic("a freed zone element has been modified in zone %s: expected %p but found %p, bits changed %p, at offset %d of %d in element %p, cookies %p %p",
+ zone->zone_name,
+ (void *) expected,
+ (void *) found,
+ (void *) (expected ^ found),
+ (uint32_t) offset,
+ (uint32_t) zone->elem_size,
+ (void *) element,
+ (void *) zp_nopoison_cookie,
+ (void *) zp_poisoned_cookie);
+}
+
+/*
+ * The primary and backup pointers don't match.
+ * Determine which one was likely the corrupted pointer, find out what it
+ * probably should have been, and panic.
+ * I would like to mark this as noreturn, but panic() isn't marked noreturn.
+ */
+static void /* noreturn */
+backup_ptr_mismatch_panic(zone_t zone,
+ vm_offset_t element,
+ vm_offset_t primary,
+ vm_offset_t backup)
+{
+ vm_offset_t likely_backup;
+ vm_offset_t likely_primary;
+
+ likely_primary = primary ^ zp_nopoison_cookie;
+ boolean_t sane_backup;
+ boolean_t sane_primary = is_sane_zone_element(zone, likely_primary);
+ boolean_t element_was_poisoned = (backup & 0x1) ? TRUE : FALSE;
+
+#if defined(__LP64__)
+ /* We can inspect the tag in the upper bits for additional confirmation */
+ if ((backup & 0xFFFFFF0000000000) == 0xFACADE0000000000)
+ element_was_poisoned = TRUE;
+ else if ((backup & 0xFFFFFF0000000000) == 0xC0FFEE0000000000)
+ element_was_poisoned = FALSE;
+#endif
+
+ if (element_was_poisoned) {
+ likely_backup = backup ^ zp_poisoned_cookie;
+ sane_backup = is_sane_zone_element(zone, likely_backup);
+ } else {
+ likely_backup = backup ^ zp_nopoison_cookie;
+ sane_backup = is_sane_zone_element(zone, likely_backup);
+ }
+
+ /* The primary is definitely the corrupted one */
+ if (!sane_primary && sane_backup)
+ zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+
+ /* The backup is definitely the corrupted one */
+ if (sane_primary && !sane_backup)
+ zone_element_was_modified_panic(zone, element, backup,
+ (likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
+ zone->elem_size - sizeof(vm_offset_t));
+
+ /*
+ * Not sure which is the corrupted one.
+ * It's less likely that the backup pointer was overwritten with
+ * ( (sane address) ^ (valid cookie) ), so we'll guess that the
+ * primary pointer has been overwritten with a sane but incorrect address.
+ */
+ if (sane_primary && sane_backup)
+ zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+
+ /* Neither are sane, so just guess. */
+ zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+}
+
+/*
+ * Adds the element to the head of the zone's free list
+ * Keeps a backup next-pointer at the end of the element
+ */
+static inline void
+free_to_zone(zone_t zone,
+ vm_offset_t element,
+ boolean_t poison)
+{
+ vm_offset_t old_head;
+ struct zone_page_metadata *page_meta;
+
+ vm_offset_t *primary = (vm_offset_t *) element;
+ vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
+
+ page_meta = get_zone_page_metadata((struct zone_free_element *)element, FALSE);
+ assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
+ old_head = (vm_offset_t)page_metadata_get_freelist(page_meta);
+
+#if MACH_ASSERT
+ if (__improbable(!is_sane_zone_element(zone, old_head)))
+ panic("zfree: invalid head pointer %p for freelist of zone %s\n",
+ (void *) old_head, zone->zone_name);
+#endif
+
+ if (__improbable(!is_sane_zone_element(zone, element)))
+ panic("zfree: freeing invalid pointer %p to zone %s\n",
+ (void *) element, zone->zone_name);
+
+ /*
+ * Always write a redundant next pointer
+ * So that it is more difficult to forge, xor it with a random cookie
+ * A poisoned element is indicated by using zp_poisoned_cookie
+ * instead of zp_nopoison_cookie
+ */
+
+ *backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie);
+
+ /*
+ * Insert this element at the head of the free list. We also xor the
+ * primary pointer with the zp_nopoison_cookie to make sure a free
+ * element does not provide the location of the next free element directly.
+ */
+ *primary = old_head ^ zp_nopoison_cookie;
+ page_metadata_set_freelist(page_meta, (struct zone_free_element *)element);
+ page_meta->free_count++;
+ if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) {
+ if (page_meta->free_count == 1) {
+ /* first foreign element freed on page, move from all_used */
+ re_queue_tail(&zone->pages.any_free_foreign, &(page_meta->pages));
+ } else {
+ /* no other list transitions */
+ }
+ } else if (page_meta->free_count == get_metadata_alloc_count(page_meta)) {
+ /* whether the page was on the intermediate or all_used, queue, move it to free */
+ re_queue_tail(&zone->pages.all_free, &(page_meta->pages));
+ zone->count_all_free_pages += page_meta->page_count;
+ } else if (page_meta->free_count == 1) {
+ /* first free element on page, move from all_used */
+ re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
+ }
+ zone->count--;
+ zone->countfree++;
+
+#if KASAN_ZALLOC
+ kasan_poison_range(element, zone->elem_size, ASAN_HEAP_FREED);
+#endif
+}
+
+
+/*
+ * Removes an element from the zone's free list, returning 0 if the free list is empty.
+ * Verifies that the next-pointer and backup next-pointer are intact,
+ * and verifies that a poisoned element hasn't been modified.
+ */
+static inline vm_offset_t
+try_alloc_from_zone(zone_t zone,
+ vm_tag_t tag __unused,
+ boolean_t* check_poison)
+{
+ vm_offset_t element;
+ struct zone_page_metadata *page_meta;
+
+ *check_poison = FALSE;
+
+ /* if zone is empty, bail */
+ if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign))
+ page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
+ else if (!queue_empty(&zone->pages.intermediate))
+ page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
+ else if (!queue_empty(&zone->pages.all_free)) {
+ page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
+ assert(zone->count_all_free_pages >= page_meta->page_count);
+ zone->count_all_free_pages -= page_meta->page_count;
+ } else {
+ return 0;
+ }
+ /* Check if page_meta passes is_sane_zone_element */
+ if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta)))
+ panic("zalloc: invalid metadata structure %p for freelist of zone %s\n",
+ (void *) page_meta, zone->zone_name);
+ assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
+ element = (vm_offset_t)page_metadata_get_freelist(page_meta);
+
+ if (__improbable(!is_sane_zone_ptr(zone, element, zone->elem_size)))
+ panic("zfree: invalid head pointer %p for freelist of zone %s\n",
+ (void *) element, zone->zone_name);
+
+ vm_offset_t *primary = (vm_offset_t *) element;
+ vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
+
+ /*
+ * Since the primary next pointer is xor'ed with zp_nopoison_cookie
+ * for obfuscation, retrieve the original value back
+ */
+ vm_offset_t next_element = *primary ^ zp_nopoison_cookie;
+ vm_offset_t next_element_primary = *primary;
+ vm_offset_t next_element_backup = *backup;
+
+ /*
+ * backup_ptr_mismatch_panic will determine what next_element
+ * should have been, and print it appropriately
+ */
+ if (__improbable(!is_sane_zone_element(zone, next_element)))
+ backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
+
+ /* Check the backup pointer for the regular cookie */
+ if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) {
+
+ /* Check for the poisoned cookie instead */
+ if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie)))
+ /* Neither cookie is valid, corruption has occurred */
+ backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
+
+ /*
+ * Element was marked as poisoned, so check its integrity before using it.
+ */
+ *check_poison = TRUE;
+ }
+
+ /* Make sure the page_meta is at the correct offset from the start of page */
+ if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element, FALSE)))
+ panic("zalloc: Incorrect metadata %p found in zone %s page queue. Expected metadata: %p\n",
+ page_meta, zone->zone_name, get_zone_page_metadata((struct zone_free_element *)element, FALSE));
+
+ /* Make sure next_element belongs to the same page as page_meta */
+ if (next_element) {
+ if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element, FALSE)))
+ panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n",
+ (void *)next_element, (void *)element, zone->zone_name);
+ }