+ kern_return_t kr;
+ vm_size_t size;
+ vm_offset_t memory;
+ int nalloc;
+
+ assert(nelem > 0);
+ if (nelem <= 0)
+ return 0;
+ size = nelem * zone->elem_size;
+ size = round_page(size);
+ kr = kmem_alloc_kobject(kernel_map, &memory, size, VM_KERN_MEMORY_ZONE);
+ if (kr != KERN_SUCCESS)
+ return 0;
+
+ zone_change(zone, Z_FOREIGN, TRUE);
+ zcram(zone, memory, size);
+ nalloc = (int)(size / zone->elem_size);
+ assert(nalloc >= nelem);
+
+ return nalloc;
+}
+
+/*
+ * Initialize the "zone of zones" which uses fixed memory allocated
+ * earlier in memory initialization. zone_bootstrap is called
+ * before zone_init.
+ */
+void
+zone_bootstrap(void)
+{
+ char temp_buf[16];
+
+ if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) {
+ zinfo_per_task = TRUE;
+ }
+
+ if (!PE_parse_boot_argn("zalloc_debug", &zalloc_debug, sizeof(zalloc_debug)))
+ zalloc_debug = 0;
+
+ /* Set up zone element poisoning */
+ zp_init();
+
+ /* should zlog log to debug zone corruption instead of leaks? */
+ if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
+ corruption_debug_flag = TRUE;
+ }
+
+ /*
+ * Check for and set up zone leak detection if requested via boot-args. We recognized two
+ * boot-args:
+ *
+ * zlog=<zone_to_log>
+ * zrecs=<num_records_in_log>
+ *
+ * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
+ * control the size of the log. If zrecs is not specified, a default value is used.
+ */
+
+ if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
+ if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
+
+ /*
+ * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
+ * This prevents accidentally hogging too much kernel memory and making the system
+ * unusable.
+ */
+
+ log_records = MIN(ZRECORDS_MAX, log_records);
+
+ } else {
+ log_records = ZRECORDS_DEFAULT;
+ }
+ }
+
+ simple_lock_init(&all_zones_lock, 0);
+
+ first_zone = ZONE_NULL;
+ last_zone = &first_zone;
+ num_zones = 0;
+ thread_call_setup(&call_async_alloc, zalloc_async, NULL);
+
+ /* assertion: nobody else called zinit before us */
+ assert(zone_zone == ZONE_NULL);
+
+ /* initializing global lock group for zones */
+ lck_grp_attr_setdefault(&zone_locks_grp_attr);
+ lck_grp_init(&zone_locks_grp, "zone_locks", &zone_locks_grp_attr);
+
+ zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
+ sizeof(struct zone), "zones");
+ zone_change(zone_zone, Z_COLLECT, FALSE);
+ zone_change(zone_zone, Z_CALLERACCT, FALSE);
+ zone_change(zone_zone, Z_NOENCRYPT, TRUE);
+
+ zcram(zone_zone, zdata, zdata_size);
+ VM_PAGE_MOVE_STOLEN(atop_64(zdata_size));
+
+ /* initialize fake zones and zone info if tracking by task */
+ if (zinfo_per_task) {
+ vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
+ unsigned int i;
+
+ for (i = 0; i < num_fake_zones; i++)
+ fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
+ zinfo_zone = zinit(zisize, zisize * CONFIG_TASK_MAX,
+ zisize, "per task zinfo");
+ zone_change(zinfo_zone, Z_CALLERACCT, FALSE);
+ }
+}
+
+void
+zinfo_task_init(task_t task)
+{
+ if (zinfo_per_task) {
+ task->tkm_zinfo = zalloc(zinfo_zone);
+ memset(task->tkm_zinfo, 0, sizeof(zinfo_usage_store_t) * ZINFO_SLOTS);
+ } else {
+ task->tkm_zinfo = NULL;
+ }
+}
+
+void
+zinfo_task_free(task_t task)
+{
+ assert(task != kernel_task);
+ if (task->tkm_zinfo != NULL) {
+ zfree(zinfo_zone, task->tkm_zinfo);
+ task->tkm_zinfo = NULL;
+ }
+}
+
+/* Global initialization of Zone Allocator.
+ * Runs after zone_bootstrap.
+ */
+void
+zone_init(
+ vm_size_t max_zonemap_size)
+{
+ kern_return_t retval;
+ vm_offset_t zone_min;
+ vm_offset_t zone_max;
+
+ retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
+ FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(VM_KERN_MEMORY_ZONE),
+ &zone_map);
+
+ if (retval != KERN_SUCCESS)
+ panic("zone_init: kmem_suballoc failed");
+ zone_max = zone_min + round_page(max_zonemap_size);
+#if CONFIG_GZALLOC
+ gzalloc_init(max_zonemap_size);
+#endif
+ /*
+ * Setup garbage collection information:
+ */
+ zone_map_min_address = zone_min;
+ zone_map_max_address = zone_max;
+
+#if defined(__LP64__)
+ /*
+ * ensure that any vm_page_t that gets created from
+ * the vm_page zone can be packed properly (see vm_page.h
+ * for the packing requirements
+ */
+ if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_min_address)) != (vm_page_t)zone_map_min_address)
+ panic("VM_PAGE_PACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address);
+
+ if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_max_address)) != (vm_page_t)zone_map_max_address)
+ panic("VM_PAGE_PACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address);
+#endif
+
+ zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
+ zone_page_table_used_size = sizeof(zone_page_table);
+
+ zone_page_table_second_level_size = 1;
+ zone_page_table_second_level_shift_amount = 0;
+
+ /*
+ * Find the power of 2 for the second level that allows
+ * the first level to fit in ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE
+ * slots.
+ */
+ while ((zone_page_table_first_level_slot(zone_pages-1)) >= ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE) {
+ zone_page_table_second_level_size <<= 1;
+ zone_page_table_second_level_shift_amount++;
+ }
+
+ lck_grp_attr_setdefault(&zone_gc_lck_grp_attr);
+ lck_grp_init(&zone_gc_lck_grp, "zone_gc", &zone_gc_lck_grp_attr);
+ lck_attr_setdefault(&zone_gc_lck_attr);
+ lck_mtx_init_ext(&zone_gc_lock, &zone_gc_lck_ext, &zone_gc_lck_grp, &zone_gc_lck_attr);
+
+#if CONFIG_ZLEAKS
+ /*
+ * Initialize the zone leak monitor
+ */
+ zleak_init(max_zonemap_size);
+#endif /* CONFIG_ZLEAKS */
+}
+
+void
+zone_page_table_expand(zone_page_index_t pindex)
+{
+ unsigned int first_index;
+ struct zone_page_table_entry * volatile * first_level_ptr;
+
+ assert(pindex < zone_pages);
+
+ first_index = zone_page_table_first_level_slot(pindex);
+ first_level_ptr = &zone_page_table[first_index];
+
+ if (*first_level_ptr == NULL) {
+ /*
+ * We were able to verify the old first-level slot
+ * had NULL, so attempt to populate it.
+ */
+
+ vm_offset_t second_level_array = 0;
+ vm_size_t second_level_size = round_page(zone_page_table_second_level_size * sizeof(struct zone_page_table_entry));
+ zone_page_index_t i;
+ struct zone_page_table_entry *entry_array;
+
+ if (kmem_alloc_kobject(zone_map, &second_level_array,
+ second_level_size, VM_KERN_MEMORY_OSFMK) != KERN_SUCCESS) {
+ panic("zone_page_table_expand");
+ }
+ zone_map_table_page_count += (second_level_size / PAGE_SIZE);
+
+ /*
+ * zone_gc() may scan the "zone_page_table" directly,
+ * so make sure any slots have a valid unused state.
+ */
+ entry_array = (struct zone_page_table_entry *)second_level_array;
+ for (i=0; i < zone_page_table_second_level_size; i++) {
+ entry_array[i].alloc_count = ZONE_PAGE_UNUSED;
+ entry_array[i].collect_count = 0;
+ }
+
+ if (OSCompareAndSwapPtr(NULL, entry_array, first_level_ptr)) {
+ /* Old slot was NULL, replaced with expanded level */
+ OSAddAtomicLong(second_level_size, &zone_page_table_used_size);
+ } else {
+ /* Old slot was not NULL, someone else expanded first */
+ kmem_free(zone_map, second_level_array, second_level_size);
+ zone_map_table_page_count -= (second_level_size / PAGE_SIZE);
+ }
+ } else {
+ /* Old slot was not NULL, already been expanded */
+ }
+}
+
+struct zone_page_table_entry *
+zone_page_table_lookup(zone_page_index_t pindex)
+{
+ unsigned int first_index = zone_page_table_first_level_slot(pindex);
+ struct zone_page_table_entry *second_level = zone_page_table[first_index];
+
+ if (second_level) {
+ return &second_level[zone_page_table_second_level_slot(pindex)];
+ }
+
+ return NULL;
+}
+
+extern volatile SInt32 kfree_nop_count;
+
+#pragma mark -
+#pragma mark zalloc_canblock
+
+/*
+ * zalloc returns an element from the specified zone.
+ */
+static void *
+zalloc_internal(
+ zone_t zone,
+ boolean_t canblock,
+ boolean_t nopagewait)
+{
+ vm_offset_t addr = 0;
+ kern_return_t retval;
+ uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */
+ int numsaved = 0;
+ boolean_t zone_replenish_wakeup = FALSE, zone_alloc_throttle = FALSE;
+#if CONFIG_GZALLOC || ZONE_DEBUG
+ boolean_t did_gzalloc = FALSE;
+#endif
+ thread_t thr = current_thread();
+ boolean_t check_poison = FALSE;
+ boolean_t set_doing_alloc_with_vm_priv = FALSE;
+
+#if CONFIG_ZLEAKS
+ uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */
+#endif /* CONFIG_ZLEAKS */
+
+ assert(zone != ZONE_NULL);
+
+#if CONFIG_GZALLOC
+ addr = gzalloc_alloc(zone, canblock);
+ did_gzalloc = (addr != 0);
+#endif
+
+ /*
+ * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
+ */
+ if (__improbable(DO_LOGGING(zone)))
+ numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH);
+
+#if CONFIG_ZLEAKS
+ /*
+ * Zone leak detection: capture a backtrace every zleak_sample_factor
+ * allocations in this zone.
+ */
+ if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) {
+ /* Avoid backtracing twice if zone logging is on */
+ if (numsaved == 0)
+ zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
+ else
+ zleak_tracedepth = numsaved;
+ }
+#endif /* CONFIG_ZLEAKS */
+
+ lock_zone(zone);
+
+ if (zone->async_prio_refill && zone->zone_replenish_thread) {
+ do {
+ vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size));
+ vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size;
+ zone_replenish_wakeup = (zfreec < zrefillwm);
+ zone_alloc_throttle = (zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0);
+
+ if (zone_replenish_wakeup) {
+ zone_replenish_wakeups_initiated++;
+ unlock_zone(zone);
+ /* Signal the potentially waiting
+ * refill thread.
+ */
+ thread_wakeup(&zone->zone_replenish_thread);
+
+ /* Scheduling latencies etc. may prevent
+ * the refill thread from keeping up
+ * with demand. Throttle consumers
+ * when we fall below half the
+ * watermark, unless VM privileged
+ */
+ if (zone_alloc_throttle) {
+ zone_replenish_throttle_count++;
+ assert_wait_timeout(zone, THREAD_UNINT, 1, NSEC_PER_MSEC);
+ thread_block(THREAD_CONTINUE_NULL);
+ }
+ lock_zone(zone);
+ }
+ } while (zone_alloc_throttle == TRUE);
+ }
+
+ if (__probable(addr == 0))
+ addr = try_alloc_from_zone(zone, &check_poison);
+
+
+ while ((addr == 0) && canblock) {
+ /*
+ * zone is empty, try to expand it
+ *
+ * Note that we now allow up to 2 threads (1 vm_privliged and 1 non-vm_privliged)
+ * to expand the zone concurrently... this is necessary to avoid stalling
+ * vm_privileged threads running critical code necessary to continue compressing/swapping
+ * pages (i.e. making new free pages) from stalling behind non-vm_privileged threads
+ * waiting to acquire free pages when the vm_page_free_count is below the
+ * vm_page_free_reserved limit.
+ */
+ if ((zone->doing_alloc_without_vm_priv || zone->doing_alloc_with_vm_priv) &&
+ (((thr->options & TH_OPT_VMPRIV) == 0) || zone->doing_alloc_with_vm_priv)) {
+ /*
+ * This is a non-vm_privileged thread and a non-vm_privileged or
+ * a vm_privileged thread is already expanding the zone...
+ * OR
+ * this is a vm_privileged thread and a vm_privileged thread is
+ * already expanding the zone...
+ *
+ * In either case wait for a thread to finish, then try again.
+ */
+ zone->waiting = TRUE;
+ zone_sleep(zone);
+ } else if (zone->doing_gc) {
+ /*
+ * zone_gc() is running. Since we need an element
+ * from the free list that is currently being
+ * collected, set the waiting bit and
+ * wait for the GC process to finish
+ * before trying again
+ */
+ zone->waiting = TRUE;
+ zone_sleep(zone);
+ } else {
+ vm_offset_t space;
+ vm_size_t alloc_size;
+ int retry = 0;
+
+ if ((zone->cur_size + zone->elem_size) >
+ zone->max_size) {
+ if (zone->exhaustible)
+ break;
+ if (zone->expandable) {
+ /*
+ * We're willing to overflow certain
+ * zones, but not without complaining.
+ *
+ * This is best used in conjunction
+ * with the collectable flag. What we
+ * want is an assurance we can get the
+ * memory back, assuming there's no
+ * leak.
+ */
+ zone->max_size += (zone->max_size >> 1);
+ } else {
+ unlock_zone(zone);
+
+ panic_include_zprint = TRUE;
+#if CONFIG_ZLEAKS
+ if (zleak_state & ZLEAK_STATE_ACTIVE)
+ panic_include_ztrace = TRUE;
+#endif /* CONFIG_ZLEAKS */
+ panic("zalloc: zone \"%s\" empty.", zone->zone_name);
+ }
+ }
+ if ((thr->options & TH_OPT_VMPRIV)) {
+ zone->doing_alloc_with_vm_priv = TRUE;
+ set_doing_alloc_with_vm_priv = TRUE;
+ } else {
+ zone->doing_alloc_without_vm_priv = TRUE;
+ }
+ unlock_zone(zone);
+
+ for (;;) {
+ int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
+
+ if (vm_pool_low() || retry >= 1)
+ alloc_size =
+ round_page(zone->elem_size);
+ else
+ alloc_size = zone->alloc_size;
+
+ if (zone->noencrypt)
+ zflags |= KMA_NOENCRYPT;
+
+ retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
+ if (retval == KERN_SUCCESS) {
+#if ZONE_ALIAS_ADDR
+ if (alloc_size == PAGE_SIZE)
+ space = zone_alias_addr(space);
+#endif
+
+#if CONFIG_ZLEAKS
+ if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
+ if (zone_map->size >= zleak_global_tracking_threshold) {
+ kern_return_t kr;
+
+ kr = zleak_activate();
+ if (kr != KERN_SUCCESS) {
+ printf("Failed to activate live zone leak debugging (%d).\n", kr);
+ }
+ }
+ }
+
+ if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
+ if (zone->cur_size > zleak_per_zone_tracking_threshold) {
+ zone->zleak_on = TRUE;
+ }
+ }
+#endif /* CONFIG_ZLEAKS */
+ zcram(zone, space, alloc_size);
+
+ break;
+ } else if (retval != KERN_RESOURCE_SHORTAGE) {
+ retry++;
+
+ if (retry == 2) {
+ zone_gc(TRUE);
+ printf("zalloc did gc\n");
+ zone_display_zprint();
+ }
+ if (retry == 3) {
+ panic_include_zprint = TRUE;
+#if CONFIG_ZLEAKS
+ if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
+ panic_include_ztrace = TRUE;
+ }
+#endif /* CONFIG_ZLEAKS */
+ if (retval == KERN_NO_SPACE) {
+ zone_t zone_largest = zone_find_largest();
+ panic("zalloc: zone map exhausted while allocating from zone %s, likely due to memory leak in zone %s (%lu total bytes, %d elements allocated)",
+ zone->zone_name, zone_largest->zone_name,
+ (unsigned long)zone_largest->cur_size, zone_largest->count);
+
+ }
+ panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count);
+ }
+ } else {
+ break;
+ }
+ }
+ lock_zone(zone);
+
+ if (set_doing_alloc_with_vm_priv == TRUE)
+ zone->doing_alloc_with_vm_priv = FALSE;
+ else
+ zone->doing_alloc_without_vm_priv = FALSE;
+
+ if (zone->waiting) {
+ zone->waiting = FALSE;
+ zone_wakeup(zone);
+ }
+ addr = try_alloc_from_zone(zone, &check_poison);
+ if (addr == 0 &&
+ retval == KERN_RESOURCE_SHORTAGE) {
+ if (nopagewait == TRUE)
+ break; /* out of the main while loop */
+ unlock_zone(zone);
+
+ VM_PAGE_WAIT();
+ lock_zone(zone);
+ }
+ }
+ if (addr == 0)
+ addr = try_alloc_from_zone(zone, &check_poison);
+ }
+
+#if CONFIG_ZLEAKS
+ /* Zone leak detection:
+ * If we're sampling this allocation, add it to the zleaks hash table.
+ */
+ if (addr && zleak_tracedepth > 0) {
+ /* Sampling can fail if another sample is happening at the same time in a different zone. */
+ if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
+ /* If it failed, roll back the counter so we sample the next allocation instead. */
+ zone->zleak_capture = zleak_sample_factor;
+ }
+ }
+#endif /* CONFIG_ZLEAKS */
+
+
+ if ((addr == 0) && (!canblock || nopagewait) && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
+ zone->async_pending = TRUE;
+ unlock_zone(zone);
+ thread_call_enter(&call_async_alloc);
+ lock_zone(zone);
+ addr = try_alloc_from_zone(zone, &check_poison);
+ }
+
+ /*
+ * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is
+ * suspected, so this code rarely executes. We need to do this code while still holding the zone lock
+ * since it protects the various log related data structures.
+ */
+
+ if (__improbable(DO_LOGGING(zone) && addr)) {
+ btlog_add_entry(zlog_btlog, (void *)addr, ZOP_ALLOC, (void **)zbt, numsaved);
+ }
+
+ vm_offset_t inner_size = zone->elem_size;
+
+#if ZONE_DEBUG
+ if (!did_gzalloc && addr && zone_debug_enabled(zone)) {
+ enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
+ addr += ZONE_DEBUG_OFFSET;
+ inner_size -= ZONE_DEBUG_OFFSET;
+ }
+#endif
+
+ unlock_zone(zone);
+
+ if (__improbable(check_poison && addr)) {
+ vm_offset_t *element_cursor = ((vm_offset_t *) addr) + 1;
+ vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *) addr);
+
+ for ( ; element_cursor < backup ; element_cursor++)
+ if (__improbable(*element_cursor != ZP_POISON))
+ zone_element_was_modified_panic(zone,
+ addr,
+ *element_cursor,
+ ZP_POISON,
+ ((vm_offset_t)element_cursor) - addr);
+ }
+
+ if (addr) {
+ /*
+ * Clear out the old next pointer and backup to avoid leaking the cookie
+ * and so that only values on the freelist have a valid cookie
+ */
+
+ vm_offset_t *primary = (vm_offset_t *) addr;
+ vm_offset_t *backup = get_backup_ptr(inner_size, primary);
+
+ *primary = ZP_POISON;
+ *backup = ZP_POISON;
+ }
+
+ TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
+
+ if (addr) {
+ task_t task;
+ zinfo_usage_t zinfo;
+ vm_size_t sz = zone->elem_size;
+
+ if (zone->caller_acct)
+ ledger_credit(thr->t_ledger, task_ledgers.tkm_private, sz);
+ else
+ ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, sz);
+
+ if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+ OSAddAtomic64(sz, (int64_t *)&zinfo[zone->index].alloc);
+ }
+ return((void *)addr);
+}
+
+
+void *
+zalloc(zone_t zone)
+{
+ return (zalloc_internal(zone, TRUE, FALSE));
+}
+
+void *
+zalloc_noblock(zone_t zone)
+{
+ return (zalloc_internal(zone, FALSE, FALSE));
+}
+
+void *
+zalloc_nopagewait(zone_t zone)
+{
+ return (zalloc_internal(zone, TRUE, TRUE));
+}
+
+void *
+zalloc_canblock(zone_t zone, boolean_t canblock)
+{
+ return (zalloc_internal(zone, canblock, FALSE));
+}
+
+
+void
+zalloc_async(
+ __unused thread_call_param_t p0,
+ __unused thread_call_param_t p1)
+{
+ zone_t current_z = NULL, head_z;
+ unsigned int max_zones, i;
+ void *elt = NULL;
+ boolean_t pending = FALSE;
+
+ simple_lock(&all_zones_lock);
+ head_z = first_zone;
+ max_zones = num_zones;
+ simple_unlock(&all_zones_lock);
+ current_z = head_z;
+ for (i = 0; i < max_zones; i++) {
+ lock_zone(current_z);
+ if (current_z->async_pending == TRUE) {
+ current_z->async_pending = FALSE;
+ pending = TRUE;
+ }
+ unlock_zone(current_z);
+
+ if (pending == TRUE) {
+ elt = zalloc_canblock(current_z, TRUE);
+ zfree(current_z, elt);
+ pending = FALSE;
+ }
+ /*
+ * This is based on assumption that zones never get
+ * freed once allocated and linked.
+ * Hence a read outside of lock is OK.
+ */
+ current_z = current_z->next_zone;
+ }
+}
+
+/*
+ * zget returns an element from the specified zone
+ * and immediately returns nothing if there is nothing there.
+ *
+ * This form should be used when you can not block (like when
+ * processing an interrupt).
+ *
+ * XXX: It seems like only vm_page_grab_fictitious_common uses this, and its
+ * friend vm_page_more_fictitious can block, so it doesn't seem like
+ * this is used for interrupts any more....
+ */
+void *
+zget(
+ register zone_t zone)
+{
+ vm_offset_t addr;
+ boolean_t check_poison = FALSE;
+
+#if CONFIG_ZLEAKS
+ uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used for zone leak detection */
+ uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */
+#endif /* CONFIG_ZLEAKS */
+
+ assert( zone != ZONE_NULL );
+
+#if CONFIG_ZLEAKS
+ /*
+ * Zone leak detection: capture a backtrace
+ */
+ if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) {
+ zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
+ }
+#endif /* CONFIG_ZLEAKS */
+
+ if (!lock_try_zone(zone))
+ return NULL;
+
+ addr = try_alloc_from_zone(zone, &check_poison);
+
+ vm_offset_t inner_size = zone->elem_size;
+
+#if ZONE_DEBUG
+ if (addr && zone_debug_enabled(zone)) {
+ enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
+ addr += ZONE_DEBUG_OFFSET;
+ inner_size -= ZONE_DEBUG_OFFSET;
+ }
+#endif /* ZONE_DEBUG */
+
+#if CONFIG_ZLEAKS
+ /*
+ * Zone leak detection: record the allocation
+ */
+ if (zone->zleak_on && zleak_tracedepth > 0 && addr) {
+ /* Sampling can fail if another sample is happening at the same time in a different zone. */
+ if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
+ /* If it failed, roll back the counter so we sample the next allocation instead. */
+ zone->zleak_capture = zleak_sample_factor;
+ }
+ }
+#endif /* CONFIG_ZLEAKS */
+
+ unlock_zone(zone);
+
+ if (__improbable(check_poison && addr)) {
+ vm_offset_t *element_cursor = ((vm_offset_t *) addr) + 1;
+ vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *) addr);
+
+ for ( ; element_cursor < backup ; element_cursor++)
+ if (__improbable(*element_cursor != ZP_POISON))
+ zone_element_was_modified_panic(zone,
+ addr,
+ *element_cursor,
+ ZP_POISON,
+ ((vm_offset_t)element_cursor) - addr);
+ }
+
+ if (addr) {
+ /*
+ * Clear out the old next pointer and backup to avoid leaking the cookie
+ * and so that only values on the freelist have a valid cookie
+ */
+ vm_offset_t *primary = (vm_offset_t *) addr;
+ vm_offset_t *backup = get_backup_ptr(inner_size, primary);
+
+ *primary = ZP_POISON;
+ *backup = ZP_POISON;
+ }
+
+ return((void *) addr);
+}
+
+/* Keep this FALSE by default. Large memory machine run orders of magnitude
+ slower in debug mode when true. Use debugger to enable if needed */
+/* static */ boolean_t zone_check = FALSE;
+
+static void zone_check_freelist(zone_t zone, vm_offset_t elem)
+{
+ struct zone_free_element *this;
+ struct zone_page_metadata *thispage;
+
+ if (zone->use_page_list) {
+ if (zone->allows_foreign) {
+ for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
+ !queue_end(&zone->pages.any_free_foreign, (queue_entry_t)thispage);
+ thispage = (struct zone_page_metadata *)queue_next((queue_chain_t *)thispage)) {
+ for (this = thispage->elements;
+ this != NULL;
+ this = this->next) {
+ if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem)
+ panic("zone_check_freelist");
+ }
+ }
+ }
+ for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
+ !queue_end(&zone->pages.all_free, (queue_entry_t)thispage);
+ thispage = (struct zone_page_metadata *)queue_next((queue_chain_t *)thispage)) {
+ for (this = thispage->elements;
+ this != NULL;
+ this = this->next) {
+ if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem)
+ panic("zone_check_freelist");
+ }
+ }
+ for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
+ !queue_end(&zone->pages.intermediate, (queue_entry_t)thispage);
+ thispage = (struct zone_page_metadata *)queue_next((queue_chain_t *)thispage)) {
+ for (this = thispage->elements;
+ this != NULL;
+ this = this->next) {
+ if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem)
+ panic("zone_check_freelist");
+ }
+ }
+ } else {
+ for (this = zone->free_elements;
+ this != NULL;
+ this = this->next) {
+ if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem)
+ panic("zone_check_freelist");
+ }
+ }
+}
+
+static zone_t zone_last_bogus_zone = ZONE_NULL;
+static vm_offset_t zone_last_bogus_elem = 0;
+
+void
+zfree(
+ register zone_t zone,
+ void *addr)
+{
+ vm_offset_t elem = (vm_offset_t) addr;
+ uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */
+ int numsaved = 0;
+ boolean_t gzfreed = FALSE;
+ boolean_t poison = FALSE;
+
+ assert(zone != ZONE_NULL);
+
+#if 1
+ if (zone->use_page_list) {
+ struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr);
+ if (zone != page_meta->zone) {
+ /*
+ * Something bad has happened. Someone tried to zfree a pointer but the metadata says it is from
+ * a different zone (or maybe it's from a zone that doesn't use page free lists at all). We can repair
+ * some cases of this, if:
+ * 1) The specified zone had use_page_list, and the true zone also has use_page_list set. In that case
+ * we can swap the zone_t
+ * 2) The specified zone had use_page_list, but the true zone does not. In this case page_meta is garbage,
+ * and dereferencing page_meta->zone might panic.
+ * To distinguish the two, we enumerate the zone list to match it up.
+ * We do not handle the case where an incorrect zone is passed that does not have use_page_list set,
+ * even if the true zone did have this set.
+ */
+ zone_t fixed_zone = NULL;
+ int fixed_i, max_zones;
+
+ simple_lock(&all_zones_lock);
+ max_zones = num_zones;
+ fixed_zone = first_zone;
+ simple_unlock(&all_zones_lock);
+
+ for (fixed_i=0; fixed_i < max_zones; fixed_i++, fixed_zone = fixed_zone->next_zone) {
+ if (fixed_zone == page_meta->zone && fixed_zone->use_page_list) {
+ /* we can fix this */
+ printf("Fixing incorrect zfree from zone %s to zone %s\n", zone->zone_name, fixed_zone->zone_name);
+ zone = fixed_zone;
+ break;
+ }
+ }
+ }
+ }
+#endif
+
+ /*
+ * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
+ */
+
+ if (__improbable(DO_LOGGING(zone) && corruption_debug_flag))
+ numsaved = OSBacktrace((void *)zbt, MAX_ZTRACE_DEPTH);
+
+#if MACH_ASSERT
+ /* Basic sanity checks */
+ if (zone == ZONE_NULL || elem == (vm_offset_t)0)
+ panic("zfree: NULL");
+ /* zone_gc assumes zones are never freed */
+ if (zone == zone_zone)
+ panic("zfree: freeing to zone_zone breaks zone_gc!");
+#endif
+
+#if CONFIG_GZALLOC
+ gzfreed = gzalloc_free(zone, addr);
+#endif
+
+ TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr);
+
+ if (__improbable(!gzfreed && zone->collectable && !zone->allows_foreign &&
+ !from_zone_map(elem, zone->elem_size))) {
+#if MACH_ASSERT
+ panic("zfree: non-allocated memory in collectable zone!");
+#endif
+ zone_last_bogus_zone = zone;
+ zone_last_bogus_elem = elem;
+ return;
+ }
+
+ if ((zp_factor != 0 || zp_tiny_zone_limit != 0) && !gzfreed) {
+ /*
+ * Poison the memory before it ends up on the freelist to catch
+ * use-after-free and use of uninitialized memory
+ *
+ * Always poison tiny zones' elements (limit is 0 if -no-zp is set)
+ * Also poison larger elements periodically
+ */
+
+ vm_offset_t inner_size = zone->elem_size;
+
+#if ZONE_DEBUG
+ if (!gzfreed && zone_debug_enabled(zone)) {
+ inner_size -= ZONE_DEBUG_OFFSET;
+ }
+#endif
+ uint32_t sample_factor = zp_factor + (((uint32_t)inner_size) >> zp_scale);
+
+ if (inner_size <= zp_tiny_zone_limit)
+ poison = TRUE;
+ else if (zp_factor != 0 && sample_counter(&zone->zp_count, sample_factor) == TRUE)
+ poison = TRUE;
+
+ if (__improbable(poison)) {
+
+ /* memset_pattern{4|8} could help make this faster: <rdar://problem/4662004> */
+ /* Poison everything but primary and backup */
+ vm_offset_t *element_cursor = ((vm_offset_t *) elem) + 1;
+ vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *)elem);
+
+ for ( ; element_cursor < backup; element_cursor++)
+ *element_cursor = ZP_POISON;
+ }
+ }
+
+ lock_zone(zone);
+
+ /*
+ * See if we're doing logging on this zone. There are two styles of logging used depending on
+ * whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
+ */
+
+ if (__improbable(DO_LOGGING(zone))) {
+ if (corruption_debug_flag) {
+ /*
+ * We're logging to catch a corruption. Add a record of this zfree operation
+ * to log.
+ */
+ btlog_add_entry(zlog_btlog, (void *)addr, ZOP_FREE, (void **)zbt, numsaved);
+ } else {
+ /*
+ * We're logging to catch a leak. Remove any record we might have for this
+ * element since it's being freed. Note that we may not find it if the buffer
+ * overflowed and that's OK. Since the log is of a limited size, old records
+ * get overwritten if there are more zallocs than zfrees.
+ */
+ btlog_remove_entries_for_element(zlog_btlog, (void *)addr);
+ }
+ }
+
+#if ZONE_DEBUG
+ if (!gzfreed && zone_debug_enabled(zone)) {
+ queue_t tmp_elem;
+
+ elem -= ZONE_DEBUG_OFFSET;
+ if (zone_check) {
+ /* check the zone's consistency */
+
+ for (tmp_elem = queue_first(&zone->active_zones);
+ !queue_end(tmp_elem, &zone->active_zones);
+ tmp_elem = queue_next(tmp_elem))
+ if (elem == (vm_offset_t)tmp_elem)
+ break;
+ if (elem != (vm_offset_t)tmp_elem)
+ panic("zfree()ing element from wrong zone");
+ }
+ remqueue((queue_t) elem);
+ }
+#endif /* ZONE_DEBUG */
+ if (zone_check) {
+ zone_check_freelist(zone, elem);
+ }
+
+ if (__probable(!gzfreed))
+ free_to_zone(zone, elem, poison);
+
+#if MACH_ASSERT
+ if (zone->count < 0)
+ panic("zfree: zone count underflow in zone %s while freeing element %p, possible cause: double frees or freeing memory that did not come from this zone",
+ zone->zone_name, addr);
+#endif
+
+
+#if CONFIG_ZLEAKS
+ /*
+ * Zone leak detection: un-track the allocation
+ */
+ if (zone->zleak_on) {
+ zleak_free(elem, zone->elem_size);
+ }
+#endif /* CONFIG_ZLEAKS */
+
+ /*
+ * If elements have one or more pages, and memory is low,
+ * request to run the garbage collection in the zone the next
+ * time the pageout thread runs.
+ */
+ if (zone->elem_size >= PAGE_SIZE &&
+ vm_pool_low()){
+ zone_gc_forced = TRUE;
+ }
+ unlock_zone(zone);
+
+ {
+ thread_t thr = current_thread();
+ task_t task;
+ zinfo_usage_t zinfo;
+ vm_size_t sz = zone->elem_size;
+
+ if (zone->caller_acct)
+ ledger_debit(thr->t_ledger, task_ledgers.tkm_private, sz);
+ else
+ ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, sz);
+
+ if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+ OSAddAtomic64(sz, (int64_t *)&zinfo[zone->index].free);
+ }
+}
+
+
+/* Change a zone's flags.
+ * This routine must be called immediately after zinit.
+ */
+void
+zone_change(
+ zone_t zone,
+ unsigned int item,
+ boolean_t value)
+{
+ assert( zone != ZONE_NULL );
+ assert( value == TRUE || value == FALSE );
+
+ switch(item){
+ case Z_NOENCRYPT:
+ zone->noencrypt = value;
+ break;
+ case Z_EXHAUST:
+ zone->exhaustible = value;
+ break;
+ case Z_COLLECT:
+ zone->collectable = value;
+ break;
+ case Z_EXPAND:
+ zone->expandable = value;
+ break;
+ case Z_FOREIGN:
+ zone->allows_foreign = value;
+ break;
+ case Z_CALLERACCT:
+ zone->caller_acct = value;
+ break;
+ case Z_NOCALLOUT:
+ zone->no_callout = value;
+ break;
+ case Z_GZALLOC_EXEMPT:
+ zone->gzalloc_exempt = value;
+#if CONFIG_GZALLOC
+ gzalloc_reconfigure(zone);
+#endif
+ break;
+ case Z_ALIGNMENT_REQUIRED:
+ zone->alignment_required = value;
+ /*
+ * Disable the page list optimization here to provide
+ * more of an alignment guarantee. This prevents
+ * the alignment from being modified by the metadata stored
+ * at the beginning of the page.
+ */
+ zone->use_page_list = FALSE;
+#if ZONE_DEBUG
+ zone_debug_disable(zone);
+#endif
+#if CONFIG_GZALLOC
+ gzalloc_reconfigure(zone);
+#endif
+ break;
+ default:
+ panic("Zone_change: Wrong Item Type!");
+ /* break; */
+ }
+}
+
+/*
+ * Return the expected number of free elements in the zone.
+ * This calculation will be incorrect if items are zfree'd that
+ * were never zalloc'd/zget'd. The correct way to stuff memory
+ * into a zone is by zcram.
+ */
+
+integer_t
+zone_free_count(zone_t zone)
+{
+ integer_t free_count;
+
+ lock_zone(zone);
+ free_count = zone->countfree;
+ unlock_zone(zone);
+
+ assert(free_count >= 0);
+
+ return(free_count);
+}
+
+/*
+ * Zone garbage collection subroutines
+ */
+
+boolean_t
+zone_page_collectable(
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ struct zone_page_table_entry *zp;
+ zone_page_index_t i, j;
+
+#if ZONE_ALIAS_ADDR
+ addr = zone_virtual_addr(addr);
+#endif
+#if MACH_ASSERT
+ if (!from_zone_map(addr, size))
+ panic("zone_page_collectable");
+#endif
+
+ i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+ j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+ for (; i <= j; i++) {
+ zp = zone_page_table_lookup(i);
+ if (zp->collect_count == zp->alloc_count)
+ return (TRUE);
+ }
+
+ return (FALSE);
+}
+
+void
+zone_page_keep(
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ struct zone_page_table_entry *zp;
+ zone_page_index_t i, j;
+
+#if ZONE_ALIAS_ADDR
+ addr = zone_virtual_addr(addr);
+#endif
+#if MACH_ASSERT
+ if (!from_zone_map(addr, size))
+ panic("zone_page_keep");
+#endif
+
+ i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+ j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+ for (; i <= j; i++) {
+ zp = zone_page_table_lookup(i);
+ zp->collect_count = 0;
+ }
+}
+
+void
+zone_page_collect(
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ struct zone_page_table_entry *zp;
+ zone_page_index_t i, j;
+
+#if ZONE_ALIAS_ADDR
+ addr = zone_virtual_addr(addr);
+#endif
+#if MACH_ASSERT
+ if (!from_zone_map(addr, size))
+ panic("zone_page_collect");
+#endif
+
+ i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+ j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+ for (; i <= j; i++) {
+ zp = zone_page_table_lookup(i);
+ ++zp->collect_count;
+ }
+}
+
+void
+zone_page_init(
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ struct zone_page_table_entry *zp;
+ zone_page_index_t i, j;
+
+#if ZONE_ALIAS_ADDR
+ addr = zone_virtual_addr(addr);
+#endif
+#if MACH_ASSERT
+ if (!from_zone_map(addr, size))
+ panic("zone_page_init");
+#endif
+
+ i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+ j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+ for (; i <= j; i++) {
+ /* make sure entry exists before marking unused */
+ zone_page_table_expand(i);
+
+ zp = zone_page_table_lookup(i);
+ assert(zp);
+ zp->alloc_count = ZONE_PAGE_UNUSED;
+ zp->collect_count = 0;
+ }
+}
+
+void
+zone_page_alloc(
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ struct zone_page_table_entry *zp;
+ zone_page_index_t i, j;