+ /* Check if page_meta passes is_sane_zone_element */
+ if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta)))
+ panic("zalloc: invalid metadata structure %p for freelist of zone %s\n",
+ (void *) page_meta, zone->zone_name);
+ assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
+ element = (vm_offset_t)page_metadata_get_freelist(page_meta);
+
+ if (__improbable(!is_sane_zone_ptr(zone, element, zone->elem_size)))
+ panic("zfree: invalid head pointer %p for freelist of zone %s\n",
+ (void *) element, zone->zone_name);
+
+ vm_offset_t *primary = (vm_offset_t *) element;
+ vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
+
+ /*
+ * Since the primary next pointer is xor'ed with zp_nopoison_cookie
+ * for obfuscation, retrieve the original value back
+ */
+ vm_offset_t next_element = *primary ^ zp_nopoison_cookie;
+ vm_offset_t next_element_primary = *primary;
+ vm_offset_t next_element_backup = *backup;
+
+ /*
+ * backup_ptr_mismatch_panic will determine what next_element
+ * should have been, and print it appropriately
+ */
+ if (__improbable(!is_sane_zone_element(zone, next_element)))
+ backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
+
+ /* Check the backup pointer for the regular cookie */
+ if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) {
+
+ /* Check for the poisoned cookie instead */
+ if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie)))
+ /* Neither cookie is valid, corruption has occurred */
+ backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
+
+ /*
+ * Element was marked as poisoned, so check its integrity before using it.
+ */
+ *check_poison = TRUE;
+ }
+
+ /* Make sure the page_meta is at the correct offset from the start of page */
+ if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element, FALSE)))
+ panic("zalloc: Incorrect metadata %p found in zone %s page queue. Expected metadata: %p\n",
+ page_meta, zone->zone_name, get_zone_page_metadata((struct zone_free_element *)element, FALSE));
+
+ /* Make sure next_element belongs to the same page as page_meta */
+ if (next_element) {
+ if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element, FALSE)))
+ panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n",
+ (void *)next_element, (void *)element, zone->zone_name);
+ }
+
+ /* Remove this element from the free list */
+ page_metadata_set_freelist(page_meta, (struct zone_free_element *)next_element);
+ page_meta->free_count--;
+
+ if (page_meta->free_count == 0) {
+ /* move to all used */
+ re_queue_tail(&zone->pages.all_used, &(page_meta->pages));
+ } else {
+ if (!zone->allows_foreign || from_zone_map(element, zone->elem_size)) {
+ if (get_metadata_alloc_count(page_meta) == page_meta->free_count + 1) {
+ /* remove from free, move to intermediate */
+ re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
+ }
+ }
+ }
+ zone->countfree--;
+ zone->count++;
+ zone->sum_count++;
+
+#if VM_MAX_TAG_ZONES
+ if (__improbable(zone->tags)) {
+ // set the tag with b0 clear so the block remains inuse
+ ZTAG(zone, element)[0] = (tag << 1);
+ }
+#endif /* VM_MAX_TAG_ZONES */
+
+
+#if KASAN_ZALLOC
+ kasan_poison_range(element, zone->elem_size, ASAN_VALID);
+#endif
+
+ return element;
+}
+
+/*
+ * End of zone poisoning
+ */
+
+/*
+ * Zone info options
+ */
+#define ZINFO_SLOTS MAX_ZONES /* for now */
+
+zone_t zone_find_largest(void);
+
+/*
+ * Async allocation of zones
+ * This mechanism allows for bootstrapping an empty zone which is setup with
+ * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call
+ * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free.
+ * This will prime the zone for the next use.
+ *
+ * Currently the thread_callout function (zalloc_async) will loop through all zones
+ * looking for any zone with async_pending set and do the work for it.
+ *
+ * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call,
+ * then zalloc_noblock to an empty zone may succeed.
+ */
+void zalloc_async(
+ thread_call_param_t p0,
+ thread_call_param_t p1);
+
+static thread_call_data_t call_async_alloc;
+
+/*
+ * Align elements that use the zone page list to 32 byte boundaries.
+ */
+#define ZONE_ELEMENT_ALIGNMENT 32
+
+#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
+#define zone_sleep(zone) \
+ (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN_ALWAYS, (event_t)(zone), THREAD_UNINT);
+
+/*
+ * The zone_locks_grp allows for collecting lock statistics.
+ * All locks are associated to this group in zinit.
+ * Look at tools/lockstat for debugging lock contention.
+ */
+
+lck_grp_t zone_locks_grp;
+lck_grp_attr_t zone_locks_grp_attr;
+
+#define lock_zone_init(zone) \
+MACRO_BEGIN \
+ lck_attr_setdefault(&(zone)->lock_attr); \
+ lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
+ &zone_locks_grp, &(zone)->lock_attr); \
+MACRO_END
+
+#define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock)
+
+/*
+ * Exclude more than one concurrent garbage collection
+ */
+decl_lck_mtx_data(, zone_gc_lock)
+
+lck_attr_t zone_gc_lck_attr;
+lck_grp_t zone_gc_lck_grp;
+lck_grp_attr_t zone_gc_lck_grp_attr;
+lck_mtx_ext_t zone_gc_lck_ext;
+
+boolean_t zone_gc_allowed = TRUE;
+boolean_t panic_include_zprint = FALSE;
+
+mach_memory_info_t *panic_kext_memory_info = NULL;
+vm_size_t panic_kext_memory_size = 0;
+
+#define ZALLOC_DEBUG_ZONEGC 0x00000001
+#define ZALLOC_DEBUG_ZCRAM 0x00000002
+uint32_t zalloc_debug = 0;
+
+/*
+ * Zone leak debugging code
+ *
+ * When enabled, this code keeps a log to track allocations to a particular zone that have not
+ * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
+ * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
+ * off by default.
+ *
+ * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
+ * is the name of the zone you wish to log.
+ *
+ * This code only tracks one zone, so you need to identify which one is leaking first.
+ * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
+ * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
+ * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
+ * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
+ * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
+ * See the help in the kgmacros for usage info.
+ *
+ *
+ * Zone corruption logging
+ *
+ * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
+ * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
+ * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
+ * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
+ * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
+ * corrupted to examine its history. This should lead to the source of the corruption.
+ */
+
+static boolean_t log_records_init = FALSE;
+static int log_records; /* size of the log, expressed in number of records */
+
+#define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
+
+static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
+static int num_zones_logged = 0;
+
+static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
+
+/* Log allocations and frees to help debug a zone element corruption */
+boolean_t corruption_debug_flag = FALSE; /* enabled by "-zc" boot-arg */
+/* Making pointer scanning leaks detection possible for all zones */
+
+#if DEBUG || DEVELOPMENT
+boolean_t leak_scan_debug_flag = FALSE; /* enabled by "-zl" boot-arg */
+#endif /* DEBUG || DEVELOPMENT */
+
+
+/*
+ * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
+ * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
+ * is the number of stacks suspected of leaking, we don't need many records.
+ */
+
+#if defined(__LP64__)
+#define ZRECORDS_MAX 2560 /* Max records allowed in the log */
+#else
+#define ZRECORDS_MAX 1536 /* Max records allowed in the log */
+#endif
+#define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
+
+/*
+ * Each record in the log contains a pointer to the zone element it refers to,
+ * and a small array to hold the pc's from the stack trace. A
+ * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
+ * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
+ * If the log fills, old records are replaced as if it were a circular buffer.
+ */
+
+
+/*
+ * Opcodes for the btlog operation field:
+ */
+
+#define ZOP_ALLOC 1
+#define ZOP_FREE 0
+
+/*
+ * Decide if we want to log this zone by doing a string compare between a zone name and the name
+ * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
+ * possible to include spaces in strings passed in via the boot-args, a period in the logname will
+ * match a space in the zone name.
+ */
+
+int
+track_this_zone(const char *zonename, const char *logname)
+{
+ int len;
+ const char *zc = zonename;
+ const char *lc = logname;
+
+ /*
+ * Compare the strings. We bound the compare by MAX_ZONE_NAME.
+ */
+
+ for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
+
+ /*
+ * If the current characters don't match, check for a space in
+ * in the zone name and a corresponding period in the log name.
+ * If that's not there, then the strings don't match.
+ */
+
+ if (*zc != *lc && !(*zc == ' ' && *lc == '.'))
+ break;
+
+ /*
+ * The strings are equal so far. If we're at the end, then it's a match.
+ */
+
+ if (*zc == '\0')
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
+ * the buffer for the records has been allocated.
+ */
+
+#define DO_LOGGING(z) (z->zone_logging == TRUE && z->zlog_btlog)
+
+extern boolean_t kmem_alloc_ready;
+
+#if CONFIG_ZLEAKS
+#pragma mark -
+#pragma mark Zone Leak Detection
+
+/*
+ * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
+ * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
+ * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
+ * and stop tracking it if it was being tracked.
+ *
+ * We track the allocations in the zallocations hash table, which stores the address that was returned from
+ * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
+ * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
+ * backtraces - we don't store them more than once.
+ *
+ * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
+ * a large amount of virtual space.
+ */
+#define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
+#define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
+#define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
+#define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
+uint32_t zleak_state = 0; /* State of collection, as above */
+
+boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */
+vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */
+vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */
+unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */
+
+/*
+ * Counters for allocation statistics.
+ */
+
+/* Times two active records want to occupy the same spot */
+unsigned int z_alloc_collisions = 0;
+unsigned int z_trace_collisions = 0;
+
+/* Times a new record lands on a spot previously occupied by a freed allocation */
+unsigned int z_alloc_overwrites = 0;
+unsigned int z_trace_overwrites = 0;
+
+/* Times a new alloc or trace is put into the hash table */
+unsigned int z_alloc_recorded = 0;
+unsigned int z_trace_recorded = 0;
+
+/* Times zleak_log returned false due to not being able to acquire the lock */
+unsigned int z_total_conflicts = 0;
+
+
+#pragma mark struct zallocation
+/*
+ * Structure for keeping track of an allocation
+ * An allocation bucket is in use if its element is not NULL
+ */
+struct zallocation {
+ uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
+ vm_size_t za_size; /* how much memory did this allocation take up? */
+ uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */
+ /* TODO: #if this out */
+ uint32_t za_hit_count; /* for determining effectiveness of hash function */
+};
+
+/* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
+uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
+uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
+
+vm_size_t zleak_max_zonemap_size;
+
+/* Hashmaps of allocations and their corresponding traces */
+static struct zallocation* zallocations;
+static struct ztrace* ztraces;
+
+/* not static so that panic can see this, see kern/debug.c */
+struct ztrace* top_ztrace;
+
+/* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
+static lck_spin_t zleak_lock;
+static lck_attr_t zleak_lock_attr;
+static lck_grp_t zleak_lock_grp;
+static lck_grp_attr_t zleak_lock_grp_attr;
+
+/*
+ * Initializes the zone leak monitor. Called from zone_init()
+ */
+static void
+zleak_init(vm_size_t max_zonemap_size)
+{
+ char scratch_buf[16];
+ boolean_t zleak_enable_flag = FALSE;
+
+ zleak_max_zonemap_size = max_zonemap_size;
+ zleak_global_tracking_threshold = max_zonemap_size / 2;
+ zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
+
+#if CONFIG_EMBEDDED
+ if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
+ zleak_enable_flag = TRUE;
+ printf("zone leak detection enabled\n");
+ } else {
+ zleak_enable_flag = FALSE;
+ printf("zone leak detection disabled\n");
+ }
+#else /* CONFIG_EMBEDDED */
+ /* -zleakoff (flag to disable zone leak monitor) */
+ if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
+ zleak_enable_flag = FALSE;
+ printf("zone leak detection disabled\n");
+ } else {
+ zleak_enable_flag = TRUE;
+ printf("zone leak detection enabled\n");
+ }
+#endif /* CONFIG_EMBEDDED */
+
+ /* zfactor=XXXX (override how often to sample the zone allocator) */
+ if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
+ printf("Zone leak factor override: %u\n", zleak_sample_factor);
+ }
+
+ /* zleak-allocs=XXXX (override number of buckets in zallocations) */
+ if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
+ printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets);
+ /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
+ if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) {
+ printf("Override isn't a power of two, bad things might happen!\n");
+ }
+ }
+
+ /* zleak-traces=XXXX (override number of buckets in ztraces) */
+ if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
+ printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets);
+ /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
+ if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) {
+ printf("Override isn't a power of two, bad things might happen!\n");
+ }
+ }
+
+ /* allocate the zleak_lock */
+ lck_grp_attr_setdefault(&zleak_lock_grp_attr);
+ lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr);
+ lck_attr_setdefault(&zleak_lock_attr);
+ lck_spin_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
+
+ if (zleak_enable_flag) {
+ zleak_state = ZLEAK_STATE_ENABLED;
+ }
+}
+
+#if CONFIG_ZLEAKS
+
+/*
+ * Support for kern.zleak.active sysctl - a simplified
+ * version of the zleak_state variable.
+ */
+int
+get_zleak_state(void)
+{
+ if (zleak_state & ZLEAK_STATE_FAILED)
+ return (-1);
+ if (zleak_state & ZLEAK_STATE_ACTIVE)
+ return (1);
+ return (0);
+}
+
+#endif
+
+
+kern_return_t
+zleak_activate(void)
+{
+ kern_return_t retval;
+ vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
+ vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
+ void *allocations_ptr = NULL;
+ void *traces_ptr = NULL;
+
+ /* Only one thread attempts to activate at a time */
+ if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
+ return KERN_SUCCESS;
+ }
+
+ /* Indicate that we're doing the setup */
+ lck_spin_lock(&zleak_lock);
+ if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
+ lck_spin_unlock(&zleak_lock);
+ return KERN_SUCCESS;
+ }
+
+ zleak_state |= ZLEAK_STATE_ACTIVATING;
+ lck_spin_unlock(&zleak_lock);
+
+ /* Allocate and zero tables */
+ retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK);
+ if (retval != KERN_SUCCESS) {
+ goto fail;
+ }
+
+ retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK);
+ if (retval != KERN_SUCCESS) {
+ goto fail;
+ }
+
+ bzero(allocations_ptr, z_alloc_size);
+ bzero(traces_ptr, z_trace_size);
+
+ /* Everything's set. Install tables, mark active. */
+ zallocations = allocations_ptr;
+ ztraces = traces_ptr;
+
+ /*
+ * Initialize the top_ztrace to the first entry in ztraces,
+ * so we don't have to check for null in zleak_log
+ */
+ top_ztrace = &ztraces[0];
+
+ /*
+ * Note that we do need a barrier between installing
+ * the tables and setting the active flag, because the zfree()
+ * path accesses the table without a lock if we're active.
+ */
+ lck_spin_lock(&zleak_lock);
+ zleak_state |= ZLEAK_STATE_ACTIVE;
+ zleak_state &= ~ZLEAK_STATE_ACTIVATING;
+ lck_spin_unlock(&zleak_lock);
+
+ return 0;
+
+fail:
+ /*
+ * If we fail to allocate memory, don't further tax
+ * the system by trying again.
+ */
+ lck_spin_lock(&zleak_lock);
+ zleak_state |= ZLEAK_STATE_FAILED;
+ zleak_state &= ~ZLEAK_STATE_ACTIVATING;
+ lck_spin_unlock(&zleak_lock);
+
+ if (allocations_ptr != NULL) {
+ kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
+ }
+
+ if (traces_ptr != NULL) {
+ kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
+ }
+
+ return retval;
+}
+
+/*
+ * TODO: What about allocations that never get deallocated,
+ * especially ones with unique backtraces? Should we wait to record
+ * until after boot has completed?
+ * (How many persistent zallocs are there?)
+ */
+
+/*
+ * This function records the allocation in the allocations table,
+ * and stores the associated backtrace in the traces table
+ * (or just increments the refcount if the trace is already recorded)
+ * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
+ * the associated trace's refcount is decremented.
+ * If the trace slot is in use, it returns.
+ * The refcount is incremented by the amount of memory the allocation consumes.
+ * The return value indicates whether to try again next time.
+ */
+static boolean_t
+zleak_log(uintptr_t* bt,
+ uintptr_t addr,
+ uint32_t depth,
+ vm_size_t allocation_size)
+{
+ /* Quit if there's someone else modifying the hash tables */
+ if (!lck_spin_try_lock(&zleak_lock)) {
+ z_total_conflicts++;
+ return FALSE;
+ }
+
+ struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
+
+ uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
+ struct ztrace* trace = &ztraces[trace_index];
+
+ allocation->za_hit_count++;