/*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*/
#include <stdint.h>
+#include <ptrauth.h>
#include <debug.h>
#include <mach_pagemap.h>
#include <mach/sdt.h>
#include <kern/kern_types.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/host_statistics.h>
#include <kern/machine.h>
#include <kern/misc_protos.h>
#include <kern/sched.h>
#include <kern/thread.h>
-#include <kern/xpr.h>
#include <kern/kalloc.h>
+#include <kern/zalloc_internal.h>
#include <kern/policy_internal.h>
#include <kern/thread_group.h>
extern unsigned int memorystatus_suspended_count;
extern vm_pressure_level_t memorystatus_vm_pressure_level;
+extern lck_mtx_t memorystatus_jetsam_fg_band_lock;
+extern uint32_t memorystatus_jetsam_fg_band_waiters;
+
void vm_pressure_response(void);
extern void consider_vm_pressure_events(void);
#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
#endif /* VM_PRESSURE_EVENTS */
+thread_t vm_pageout_scan_thread = THREAD_NULL;
+boolean_t vps_dynamic_priority_enabled = FALSE;
#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif
#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
*/
#ifndef VM_PAGE_FREE_TARGET
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* VM_PAGE_FREE_TARGET */
*/
#ifndef VM_PAGE_FREE_MIN
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* VM_PAGE_FREE_MIN */
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_FREE_RESERVED_LIMIT 100
#define VM_PAGE_FREE_MIN_LIMIT 1500
#define VM_PAGE_FREE_TARGET_LIMIT 2000
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_FREE_RESERVED_LIMIT 1700
#define VM_PAGE_FREE_MIN_LIMIT 3500
#define VM_PAGE_FREE_TARGET_LIMIT 4000
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
/*
* When vm_page_free_count falls below vm_page_free_reserved,
#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
#ifndef VM_PAGE_REACTIVATE_LIMIT
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* VM_PAGE_REACTIVATE_LIMIT */
#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
extern void vm_pageout_continue(void);
extern void vm_pageout_scan(void);
-void vm_tests(void); /* forward */
+boolean_t vm_pageout_running = FALSE;
+
+uint32_t vm_page_upl_tainted = 0;
+uint32_t vm_page_iopl_tainted = 0;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
static boolean_t vm_pageout_waiter = FALSE;
-static boolean_t vm_pageout_running = FALSE;
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
#if DEVELOPMENT || DEBUG
struct vm_pageout_state vm_pageout_state;
struct vm_config vm_config;
-struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
-struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
+struct vm_pageout_queue vm_pageout_queue_internal VM_PAGE_PACKED_ALIGNED;
+struct vm_pageout_queue vm_pageout_queue_external VM_PAGE_PACKED_ALIGNED;
int vm_upl_wait_for_pages = 0;
vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
int vm_debug_events = 0;
-lck_grp_t vm_pageout_lck_grp;
+LCK_GRP_DECLARE(vm_pageout_lck_grp, "vm_pageout");
#if CONFIG_MEMORYSTATUS
extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
#endif
+#if __AMP__
+int vm_compressor_ebound = 1;
+int vm_pgo_pbound = 0;
+extern void thread_bind_cluster_type(thread_t, char, bool);
+#endif /* __AMP__ */
/*
if (m->vmp_dirty) {
vm_page_unwire(m, TRUE); /* reactivates */
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
PAGE_WAKEUP_DONE(m);
} else {
vm_page_free(m); /* clears busy, etc. */
assert(!m->vmp_cleaning);
#endif
- XPR(XPR_VM_PAGEOUT,
- "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
- VM_PAGE_OBJECT(m), m->vmp_offset, m,
- new_m, new_offset);
-
pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
/*
vm_object_offset_t paging_offset;
memory_object_t pager;
- XPR(XPR_VM_PAGEOUT,
- "vm_pageout_initialize_page, page 0x%X\n",
- m, 0, 0, 0, 0);
-
assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
object = VM_PAGE_OBJECT(m);
vm_object_t object = VM_PAGE_OBJECT(m);
struct vm_pageout_queue *q;
-
- XPR(XPR_VM_PAGEOUT,
- "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
- object, m->vmp_offset, m, 0, 0);
-
VM_PAGE_CHECK(m);
LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
vm_object_lock_assert_exclusive(object);
void
update_vm_info(void)
{
- uint64_t tmp;
+ unsigned long tmp;
+ uint64_t tmp64;
vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
last.vm_pageout_considered_page = tmp;
- tmp = vm_pageout_vminfo.vm_pageout_compressions;
- vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp - last.vm_pageout_compressions);
- last.vm_pageout_compressions = tmp;
+ tmp64 = vm_pageout_vminfo.vm_pageout_compressions;
+ vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp64 - last.vm_pageout_compressions);
+ last.vm_pageout_compressions = tmp64;
tmp = vm_pageout_vminfo.vm_compressor_failed;
vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
last.vm_compressor_failed = tmp;
- tmp = vm_pageout_vminfo.vm_compressor_pages_grabbed;
- vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp - last.vm_compressor_pages_grabbed);
- last.vm_compressor_pages_grabbed = tmp;
+ tmp64 = vm_pageout_vminfo.vm_compressor_pages_grabbed;
+ vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp64 - last.vm_compressor_pages_grabbed);
+ last.vm_compressor_pages_grabbed = tmp64;
tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
last.vm_phantom_cache_added_ghost = tmp;
- tmp = get_pages_grabbed_count();
- vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp - last_vm_page_pages_grabbed);
- last_vm_page_pages_grabbed = tmp;
+ tmp64 = counter_load(&vm_page_grab_count);
+ vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp64 - last_vm_page_pages_grabbed);
+ last_vm_page_pages_grabbed = tmp64;
tmp = vm_pageout_vminfo.vm_page_pages_freed;
vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
extern boolean_t hibernation_vmqueues_inspection;
-void
-vm_page_balance_inactive(int max_to_move)
-{
- vm_page_t m;
+/*
+ * Return values for functions called by vm_pageout_scan
+ * that control its flow.
+ *
+ * PROCEED -- vm_pageout_scan will keep making forward progress.
+ * DONE_RETURN -- page demand satisfied, work is done -> vm_pageout_scan returns.
+ * NEXT_ITERATION -- restart the 'for' loop in vm_pageout_scan aka continue.
+ */
- LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#define VM_PAGEOUT_SCAN_PROCEED (0)
+#define VM_PAGEOUT_SCAN_DONE_RETURN (1)
+#define VM_PAGEOUT_SCAN_NEXT_ITERATION (2)
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it moves overflow secluded pages (one-at-a-time) to the
+ * batched 'local' free Q or active Q.
+ */
+static void
+vps_deal_with_secluded_page_overflow(vm_page_t *local_freeq, int *local_freed)
+{
+#if CONFIG_SECLUDED_MEMORY
+ /*
+ * Deal with secluded_q overflow.
+ */
+ if (vm_page_secluded_count > vm_page_secluded_target) {
+ vm_page_t secluded_page;
- if (hibernation_vmqueues_inspection == TRUE) {
/*
- * It is likely that the hibernation code path is
- * dealing with these very queues as we are about
- * to move pages around in/from them and completely
- * change the linkage of the pages.
- *
- * And so we skip the rebalancing of these queues.
+ * SECLUDED_AGING_BEFORE_ACTIVE:
+ * Excess secluded pages go to the active queue and
+ * will later go to the inactive queue.
*/
- return;
+ assert((vm_page_secluded_count_free +
+ vm_page_secluded_count_inuse) ==
+ vm_page_secluded_count);
+ secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
+ assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
+
+ vm_page_queues_remove(secluded_page, FALSE);
+ assert(!secluded_page->vmp_fictitious);
+ assert(!VM_PAGE_WIRED(secluded_page));
+
+ if (secluded_page->vmp_object == 0) {
+ /* transfer to free queue */
+ assert(secluded_page->vmp_busy);
+ secluded_page->vmp_snext = *local_freeq;
+ *local_freeq = secluded_page;
+ *local_freed += 1;
+ } else {
+ /* transfer to head of active queue */
+ vm_page_enqueue_active(secluded_page, FALSE);
+ secluded_page = VM_PAGE_NULL;
+ }
}
- vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count +
- vm_page_speculative_count);
+#else /* CONFIG_SECLUDED_MEMORY */
- while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
- VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
-
- m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
+#pragma unused(local_freeq)
+#pragma unused(local_freed)
- assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
- assert(!m->vmp_laundry);
- assert(VM_PAGE_OBJECT(m) != kernel_object);
- assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
+ return;
- DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
+#endif /* CONFIG_SECLUDED_MEMORY */
+}
- /*
- * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
- *
- * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
- * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
- * new reference happens. If no futher references happen on the page after that remote TLB flushes
- * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
- * by pageout_scan, which is just fine since the last reference would have happened quite far
- * in the past (TLB caches don't hang around for very long), and of course could just as easily
- * have happened before we moved the page
- */
- if (m->vmp_pmapped == TRUE) {
- pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
- }
+/*
+ * This function is called only from vm_pageout_scan and
+ * it initializes the loop targets for vm_pageout_scan().
+ */
+static void
+vps_init_page_targets(void)
+{
+ /*
+ * LD TODO: Other page targets should be calculated here too.
+ */
+ vm_page_anonymous_min = vm_page_inactive_target / 20;
- /*
- * The page might be absent or busy,
- * but vm_page_deactivate can handle that.
- * FALSE indicates that we don't want a H/W clear reference
- */
- vm_page_deactivate_internal(m, FALSE);
+ if (vm_pageout_state.vm_page_speculative_percentage > 50) {
+ vm_pageout_state.vm_page_speculative_percentage = 50;
+ } else if (vm_pageout_state.vm_page_speculative_percentage <= 0) {
+ vm_pageout_state.vm_page_speculative_percentage = 1;
}
-}
+ vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count);
+}
/*
- * vm_pageout_scan does the dirty work for the pageout daemon.
- * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
- * held and vm_page_free_wanted == 0.
+ * This function is called only from vm_pageout_scan and
+ * it purges a single VM object at-a-time and will either
+ * make vm_pageout_scan() restart the loop or keeping moving forward.
*/
-void
-vm_pageout_scan(void)
+static int
+vps_purge_object()
{
- unsigned int loop_count = 0;
- unsigned int inactive_burst_count = 0;
- unsigned int reactivated_this_call;
- unsigned int reactivate_limit;
- vm_page_t local_freeq = NULL;
- int local_freed = 0;
- int delayed_unlock;
- int delayed_unlock_limit = 0;
- int refmod_state = 0;
- int vm_pageout_deadlock_target = 0;
- struct vm_pageout_queue *iq;
- struct vm_pageout_queue *eq;
- struct vm_speculative_age_q *sq;
- struct flow_control flow_control = { 0, { 0, 0 } };
- boolean_t inactive_throttled = FALSE;
- mach_timespec_t ts;
- unsigned int msecs = 0;
- vm_object_t object = NULL;
- uint32_t inactive_reclaim_run;
- boolean_t exceeded_burst_throttle;
- boolean_t grab_anonymous = FALSE;
- boolean_t force_anonymous = FALSE;
- boolean_t force_speculative_aging = FALSE;
- int anons_grabbed = 0;
- int page_prev_q_state = 0;
-#if CONFIG_BACKGROUND_QUEUE
- boolean_t page_from_bg_q = FALSE;
-#endif
- int cache_evict_throttle = 0;
- uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
- uint32_t inactive_external_count;
- int force_purge = 0;
- int divisor;
-#define DELAY_SPECULATIVE_AGE 1000
- int delay_speculative_age = 0;
- vm_object_t m_object = VM_OBJECT_NULL;
+ int force_purge;
+
+ assert(available_for_purge >= 0);
+ force_purge = 0; /* no force-purging */
#if VM_PRESSURE_EVENTS
vm_pressure_level_t pressure_level;
-#endif /* VM_PRESSURE_EVENTS */
-
- VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
- vm_pageout_vminfo.vm_pageout_freed_speculative,
- vm_pageout_state.vm_pageout_inactive_clean,
- vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
- vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
- flow_control.state = FCS_IDLE;
- iq = &vm_pageout_queue_internal;
- eq = &vm_pageout_queue_external;
- sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+ pressure_level = memorystatus_vm_pressure_level;
+ if (pressure_level > kVMPressureNormal) {
+ if (pressure_level >= kVMPressureCritical) {
+ force_purge = vm_pageout_state.memorystatus_purge_on_critical;
+ } else if (pressure_level >= kVMPressureUrgent) {
+ force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
+ } else if (pressure_level >= kVMPressureWarning) {
+ force_purge = vm_pageout_state.memorystatus_purge_on_warning;
+ }
+ }
+#endif /* VM_PRESSURE_EVENTS */
- XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
+ if (available_for_purge || force_purge) {
+ memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
- /* Ask the pmap layer to return any pages it no longer needs. */
- uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
+ VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
+ if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
+ VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
+ VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
+ memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
- vm_page_lock_queues();
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ }
+ VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
+ memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
+ }
- vm_page_wire_count -= pmap_wired_pages_freed;
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
- delayed_unlock = 1;
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will try to age the next speculative Q if the oldest
+ * one is empty.
+ */
+static int
+vps_age_speculative_queue(boolean_t force_speculative_aging)
+{
+#define DELAY_SPECULATIVE_AGE 1000
/*
- * Calculate the max number of referenced pages on the inactive
- * queue that we will reactivate.
+ * try to pull pages from the aging bins...
+ * see vm_page.h for an explanation of how
+ * this mechanism works
*/
- reactivated_this_call = 0;
- reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
- vm_page_inactive_count);
- inactive_reclaim_run = 0;
+ boolean_t can_steal = FALSE;
+ int num_scanned_queues;
+ static int delay_speculative_age = 0; /* depends the # of times we go through the main pageout_scan loop.*/
+ mach_timespec_t ts;
+ struct vm_speculative_age_q *aq;
+ struct vm_speculative_age_q *sq;
- vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
- /*
- * We must limit the rate at which we send pages to the pagers
- * so that we don't tie up too many pages in the I/O queues.
- * We implement a throttling mechanism using the laundry count
- * to limit the number of pages outstanding to the default
- * and external pagers. We can bypass the throttles and look
- * for clean pages if the pageout queues don't drain in a timely
- * fashion since this may indicate that the pageout paths are
- * stalled waiting for memory, which only we can provide.
- */
+ aq = &vm_page_queue_speculative[speculative_steal_index];
-Restart:
+ num_scanned_queues = 0;
+ while (vm_page_queue_empty(&aq->age_q) &&
+ num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
+ speculative_steal_index++;
- assert(object == NULL);
- assert(delayed_unlock != 0);
+ if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
+ speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
+ }
- vm_page_anonymous_min = vm_page_inactive_target / 20;
+ aq = &vm_page_queue_speculative[speculative_steal_index];
+ }
- if (vm_pageout_state.vm_page_speculative_percentage > 50) {
- vm_pageout_state.vm_page_speculative_percentage = 50;
- } else if (vm_pageout_state.vm_page_speculative_percentage <= 0) {
- vm_pageout_state.vm_page_speculative_percentage = 1;
+ if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
+ /*
+ * XXX We've scanned all the speculative
+ * queues but still haven't found one
+ * that is not empty, even though
+ * vm_page_speculative_count is not 0.
+ */
+ if (!vm_page_queue_empty(&sq->age_q)) {
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ }
+#if DEVELOPMENT || DEBUG
+ panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
+#endif
+ /* readjust... */
+ vm_page_speculative_count = 0;
+ /* ... and continue */
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
}
- vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count);
+ if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE) {
+ can_steal = TRUE;
+ } else {
+ if (!delay_speculative_age) {
+ mach_timespec_t ts_fully_aged;
- for (;;) {
- vm_page_t m;
+ ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
+ ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
+ * 1000 * NSEC_PER_USEC;
- DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
+ ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
- if (vm_upl_wait_for_pages < 0) {
- vm_upl_wait_for_pages = 0;
+ clock_sec_t sec;
+ clock_nsec_t nsec;
+ clock_get_system_nanotime(&sec, &nsec);
+ ts.tv_sec = (unsigned int) sec;
+ ts.tv_nsec = nsec;
+
+ if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) {
+ can_steal = TRUE;
+ } else {
+ delay_speculative_age++;
+ }
+ } else {
+ delay_speculative_age++;
+ if (delay_speculative_age == DELAY_SPECULATIVE_AGE) {
+ delay_speculative_age = 0;
+ }
}
+ }
+ if (can_steal == TRUE) {
+ vm_page_speculate_ageit(aq);
+ }
- delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
- if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) {
- delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
- }
+/*
+ * This function is called only from vm_pageout_scan and
+ * it evicts a single VM object from the cache.
+ */
+static int inline
+vps_object_cache_evict(vm_object_t *object_to_unlock)
+{
+ static int cache_evict_throttle = 0;
+ struct vm_speculative_age_q *sq;
-#if CONFIG_SECLUDED_MEMORY
- /*
- * Deal with secluded_q overflow.
- */
- if (vm_page_secluded_count > vm_page_secluded_target) {
- vm_page_t secluded_page;
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
- /*
- * SECLUDED_AGING_BEFORE_ACTIVE:
- * Excess secluded pages go to the active queue and
- * will later go to the inactive queue.
- */
- assert((vm_page_secluded_count_free +
- vm_page_secluded_count_inuse) ==
- vm_page_secluded_count);
- secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
- assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
-
- vm_page_queues_remove(secluded_page, FALSE);
- assert(!secluded_page->vmp_fictitious);
- assert(!VM_PAGE_WIRED(secluded_page));
-
- if (secluded_page->vmp_object == 0) {
- /* transfer to free queue */
- assert(secluded_page->vmp_busy);
- secluded_page->vmp_snext = local_freeq;
- local_freeq = secluded_page;
- local_freed++;
- } else {
- /* transfer to head of active queue */
- vm_page_enqueue_active(secluded_page, FALSE);
- secluded_page = VM_PAGE_NULL;
- }
- }
-#endif /* CONFIG_SECLUDED_MEMORY */
+ if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
+ int pages_evicted;
- assert(delayed_unlock);
+ if (*object_to_unlock != NULL) {
+ vm_object_unlock(*object_to_unlock);
+ *object_to_unlock = NULL;
+ }
+ KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
- /*
- * maintain our balance
- */
- vm_page_balance_inactive(1);
+ pages_evicted = vm_object_cache_evict(100, 10);
+ KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
- /**********************************************************************
- * above this point we're playing with the active and secluded queues
- * below this point we're playing with the throttling mechanisms
- * and the inactive queue
- **********************************************************************/
+ if (pages_evicted) {
+ vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
- if (vm_page_free_count + local_freed >= vm_page_free_target) {
- vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+ VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
+ vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
+ memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
- vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
- VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
/*
- * make sure the pageout I/O threads are running
- * throttled in case there are still requests
- * in the laundry... since we have met our targets
- * we don't need the laundry to be cleaned in a timely
- * fashion... so let's avoid interfering with foreground
- * activity
+ * we just freed up to 100 pages,
+ * so go back to the top of the main loop
+ * and re-evaulate the memory situation
*/
- vm_pageout_adjust_eq_iothrottle(eq, TRUE);
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ } else {
+ cache_evict_throttle = 1000;
+ }
+ }
+ if (cache_evict_throttle) {
+ cache_evict_throttle--;
+ }
- lck_mtx_lock(&vm_page_queue_free_lock);
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
- if ((vm_page_free_count >= vm_page_free_target) &&
- (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
- /*
- * done - we have met our target *and*
- * there is no one waiting for a page.
- */
-return_from_scan:
- assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
-
- VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
- vm_pageout_state.vm_pageout_inactive,
- vm_pageout_state.vm_pageout_inactive_used, 0, 0);
- VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
- vm_pageout_vminfo.vm_pageout_freed_speculative,
- vm_pageout_state.vm_pageout_inactive_clean,
- vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
- vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
- return;
- }
- lck_mtx_unlock(&vm_page_queue_free_lock);
- }
+/*
+ * This function is called only from vm_pageout_scan and
+ * it calculates the filecache min. that needs to be maintained
+ * as we start to steal pages.
+ */
+static void
+vps_calculate_filecache_min(void)
+{
+ int divisor = vm_pageout_state.vm_page_filecache_min_divisor;
+#if CONFIG_JETSAM
+ /*
+ * don't let the filecache_min fall below 15% of available memory
+ * on systems with an active compressor that isn't nearing its
+ * limits w/r to accepting new data
+ *
+ * on systems w/o the compressor/swapper, the filecache is always
+ * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
+ * since most (if not all) of the anonymous pages are in the
+ * throttled queue (which isn't counted as available) which
+ * effectively disables this filter
+ */
+ if (vm_compressor_low_on_space() || divisor == 0) {
+ vm_pageout_state.vm_page_filecache_min = 0;
+ } else {
+ vm_pageout_state.vm_page_filecache_min =
+ ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
+ }
+#else
+ if (vm_compressor_out_of_space() || divisor == 0) {
+ vm_pageout_state.vm_page_filecache_min = 0;
+ } else {
/*
- * Before anything, we check if we have any ripe volatile
- * objects around. If so, try to purge the first object.
- * If the purge fails, fall through to reclaim a page instead.
- * If the purge succeeds, go back to the top and reevalute
- * the new memory situation.
+ * don't let the filecache_min fall below the specified critical level
*/
+ vm_pageout_state.vm_page_filecache_min =
+ ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
+ }
+#endif
+ if (vm_page_free_count < (vm_page_free_reserved / 4)) {
+ vm_pageout_state.vm_page_filecache_min = 0;
+ }
+}
- assert(available_for_purge >= 0);
- force_purge = 0; /* no force-purging */
-
-#if VM_PRESSURE_EVENTS
- pressure_level = memorystatus_vm_pressure_level;
-
- if (pressure_level > kVMPressureNormal) {
- if (pressure_level >= kVMPressureCritical) {
- force_purge = vm_pageout_state.memorystatus_purge_on_critical;
- } else if (pressure_level >= kVMPressureUrgent) {
- force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
- } else if (pressure_level >= kVMPressureWarning) {
- force_purge = vm_pageout_state.memorystatus_purge_on_warning;
- }
- }
-#endif /* VM_PRESSURE_EVENTS */
-
- if (available_for_purge || force_purge) {
- if (object != NULL) {
- vm_object_unlock(object);
- object = NULL;
- }
-
- memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
-
- VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
- if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
- VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
- VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
- memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
- continue;
- }
- VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
- memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
- }
+/*
+ * This function is called only from vm_pageout_scan and
+ * it updates the flow control time to detect if VM pageoutscan
+ * isn't making progress.
+ */
+static void
+vps_flow_control_reset_deadlock_timer(struct flow_control *flow_control)
+{
+ mach_timespec_t ts;
+ clock_sec_t sec;
+ clock_nsec_t nsec;
- if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
- /*
- * try to pull pages from the aging bins...
- * see vm_page.h for an explanation of how
- * this mechanism works
- */
- struct vm_speculative_age_q *aq;
- boolean_t can_steal = FALSE;
- int num_scanned_queues;
+ ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
+ ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
+ clock_get_system_nanotime(&sec, &nsec);
+ flow_control->ts.tv_sec = (unsigned int) sec;
+ flow_control->ts.tv_nsec = nsec;
+ ADD_MACH_TIMESPEC(&flow_control->ts, &ts);
- aq = &vm_page_queue_speculative[speculative_steal_index];
+ flow_control->state = FCS_DELAYED;
- num_scanned_queues = 0;
- while (vm_page_queue_empty(&aq->age_q) &&
- num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
- speculative_steal_index++;
+ vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
+}
- if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
- speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
- }
+/*
+ * This function is called only from vm_pageout_scan and
+ * it is the flow control logic of VM pageout scan which
+ * controls if it should block and for how long.
+ * Any blocking of vm_pageout_scan happens ONLY in this function.
+ */
+static int
+vps_flow_control(struct flow_control *flow_control, int *anons_grabbed, vm_object_t *object, int *delayed_unlock,
+ vm_page_t *local_freeq, int *local_freed, int *vm_pageout_deadlock_target, unsigned int inactive_burst_count)
+{
+ boolean_t exceeded_burst_throttle = FALSE;
+ unsigned int msecs = 0;
+ uint32_t inactive_external_count;
+ mach_timespec_t ts;
+ struct vm_pageout_queue *iq;
+ struct vm_pageout_queue *eq;
+ struct vm_speculative_age_q *sq;
- aq = &vm_page_queue_speculative[speculative_steal_index];
- }
+ iq = &vm_pageout_queue_internal;
+ eq = &vm_pageout_queue_external;
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
- if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
+ /*
+ * Sometimes we have to pause:
+ * 1) No inactive pages - nothing to do.
+ * 2) Loop control - no acceptable pages found on the inactive queue
+ * within the last vm_pageout_burst_inactive_throttle iterations
+ * 3) Flow control - default pageout queue is full
+ */
+ if (vm_page_queue_empty(&vm_page_queue_inactive) &&
+ vm_page_queue_empty(&vm_page_queue_anonymous) &&
+ vm_page_queue_empty(&vm_page_queue_cleaned) &&
+ vm_page_queue_empty(&sq->age_q)) {
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
+ msecs = vm_pageout_state.vm_pageout_empty_wait;
+ } else if (inactive_burst_count >=
+ MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
+ (vm_page_inactive_count +
+ vm_page_speculative_count))) {
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
+ msecs = vm_pageout_state.vm_pageout_burst_wait;
+
+ exceeded_burst_throttle = TRUE;
+ } else if (VM_PAGE_Q_THROTTLED(iq) &&
+ VM_DYNAMIC_PAGING_ENABLED()) {
+ clock_sec_t sec;
+ clock_nsec_t nsec;
+
+ switch (flow_control->state) {
+ case FCS_IDLE:
+ if ((vm_page_free_count + *local_freed) < vm_page_free_target &&
+ vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
/*
- * XXX We've scanned all the speculative
- * queues but still haven't found one
- * that is not empty, even though
- * vm_page_speculative_count is not 0.
+ * since the compressor is running independently of vm_pageout_scan
+ * let's not wait for it just yet... as long as we have a healthy supply
+ * of filecache pages to work with, let's keep stealing those.
*/
- if (!vm_page_queue_empty(&sq->age_q)) {
- continue;
+ inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
+
+ if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
+ (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
+ *anons_grabbed = ANONS_GRABBED_LIMIT;
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
+ return VM_PAGEOUT_SCAN_PROCEED;
}
-#if DEVELOPMENT || DEBUG
- panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
-#endif
- /* readjust... */
- vm_page_speculative_count = 0;
- /* ... and continue */
- continue;
}
- if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE) {
- can_steal = TRUE;
- } else {
- if (!delay_speculative_age) {
- mach_timespec_t ts_fully_aged;
+ vps_flow_control_reset_deadlock_timer(flow_control);
+ msecs = vm_pageout_state.vm_pageout_deadlock_wait;
- ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
- ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
- * 1000 * NSEC_PER_USEC;
+ break;
- ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
+ case FCS_DELAYED:
+ clock_get_system_nanotime(&sec, &nsec);
+ ts.tv_sec = (unsigned int) sec;
+ ts.tv_nsec = nsec;
- clock_sec_t sec;
- clock_nsec_t nsec;
- clock_get_system_nanotime(&sec, &nsec);
- ts.tv_sec = (unsigned int) sec;
- ts.tv_nsec = nsec;
+ if (CMP_MACH_TIMESPEC(&ts, &flow_control->ts) >= 0) {
+ /*
+ * the pageout thread for the default pager is potentially
+ * deadlocked since the
+ * default pager queue has been throttled for more than the
+ * allowable time... we need to move some clean pages or dirty
+ * pages belonging to the external pagers if they aren't throttled
+ * vm_page_free_wanted represents the number of threads currently
+ * blocked waiting for pages... we'll move one page for each of
+ * these plus a fixed amount to break the logjam... once we're done
+ * moving this number of pages, we'll re-enter the FSC_DELAYED state
+ * with a new timeout target since we have no way of knowing
+ * whether we've broken the deadlock except through observation
+ * of the queue associated with the default pager... we need to
+ * stop moving pages and allow the system to run to see what
+ * state it settles into.
+ */
- if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) {
- can_steal = TRUE;
- } else {
- delay_speculative_age++;
- }
- } else {
- delay_speculative_age++;
- if (delay_speculative_age == DELAY_SPECULATIVE_AGE) {
- delay_speculative_age = 0;
- }
- }
+ *vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
+ vm_page_free_wanted + vm_page_free_wanted_privileged;
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
+ flow_control->state = FCS_DEADLOCK_DETECTED;
+ thread_wakeup((event_t) &vm_pageout_garbage_collect);
+ return VM_PAGEOUT_SCAN_PROCEED;
}
- if (can_steal == TRUE) {
- vm_page_speculate_ageit(aq);
- }
- }
- force_speculative_aging = FALSE;
-
- if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
- int pages_evicted;
+ /*
+ * just resniff instead of trying
+ * to compute a new delay time... we're going to be
+ * awakened immediately upon a laundry completion,
+ * so we won't wait any longer than necessary
+ */
+ msecs = vm_pageout_state.vm_pageout_idle_wait;
+ break;
- if (object != NULL) {
- vm_object_unlock(object);
- object = NULL;
+ case FCS_DEADLOCK_DETECTED:
+ if (*vm_pageout_deadlock_target) {
+ return VM_PAGEOUT_SCAN_PROCEED;
}
- KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
- pages_evicted = vm_object_cache_evict(100, 10);
+ vps_flow_control_reset_deadlock_timer(flow_control);
+ msecs = vm_pageout_state.vm_pageout_deadlock_wait;
- KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
+ break;
+ }
+ } else {
+ /*
+ * No need to pause...
+ */
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
- if (pages_evicted) {
- vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
+ vm_pageout_scan_wants_object = VM_OBJECT_NULL;
- VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
- vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
- memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
+ vm_pageout_prepare_to_block(object, delayed_unlock, local_freeq, local_freed,
+ VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
- /*
- * we just freed up to 100 pages,
- * so go back to the top of the main loop
- * and re-evaulate the memory situation
- */
- continue;
- } else {
- cache_evict_throttle = 1000;
- }
- }
- if (cache_evict_throttle) {
- cache_evict_throttle--;
- }
+ if (vm_page_free_count >= vm_page_free_target) {
+ /*
+ * we're here because
+ * 1) someone else freed up some pages while we had
+ * the queues unlocked above
+ * and we've hit one of the 3 conditions that
+ * cause us to pause the pageout scan thread
+ *
+ * since we already have enough free pages,
+ * let's avoid stalling and return normally
+ *
+ * before we return, make sure the pageout I/O threads
+ * are running throttled in case there are still requests
+ * in the laundry... since we have enough free pages
+ * we don't need the laundry to be cleaned in a timely
+ * fashion... so let's avoid interfering with foreground
+ * activity
+ *
+ * we don't want to hold vm_page_queue_free_lock when
+ * calling vm_pageout_adjust_eq_iothrottle (since it
+ * may cause other locks to be taken), we do the intitial
+ * check outside of the lock. Once we take the lock,
+ * we recheck the condition since it may have changed.
+ * if it has, no problem, we will make the threads
+ * non-throttled before actually blocking
+ */
+ vm_pageout_adjust_eq_iothrottle(eq, TRUE);
+ }
+ lck_mtx_lock(&vm_page_queue_free_lock);
- divisor = vm_pageout_state.vm_page_filecache_min_divisor;
+ if (vm_page_free_count >= vm_page_free_target &&
+ (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
+ return VM_PAGEOUT_SCAN_DONE_RETURN;
+ }
+ lck_mtx_unlock(&vm_page_queue_free_lock);
-#if CONFIG_JETSAM
+ if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
/*
- * don't let the filecache_min fall below 15% of available memory
- * on systems with an active compressor that isn't nearing its
- * limits w/r to accepting new data
+ * we're most likely about to block due to one of
+ * the 3 conditions that cause vm_pageout_scan to
+ * not be able to make forward progress w/r
+ * to providing new pages to the free queue,
+ * so unthrottle the I/O threads in case we
+ * have laundry to be cleaned... it needs
+ * to be completed ASAP.
*
- * on systems w/o the compressor/swapper, the filecache is always
- * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
- * since most (if not all) of the anonymous pages are in the
- * throttled queue (which isn't counted as available) which
- * effectively disables this filter
+ * even if we don't block, we want the io threads
+ * running unthrottled since the sum of free +
+ * clean pages is still under our free target
+ */
+ vm_pageout_adjust_eq_iothrottle(eq, FALSE);
+ }
+ if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
+ /*
+ * if we get here we're below our free target and
+ * we're stalling due to a full laundry queue or
+ * we don't have any inactive pages other then
+ * those in the clean queue...
+ * however, we have pages on the clean queue that
+ * can be moved to the free queue, so let's not
+ * stall the pageout scan
*/
- if (vm_compressor_low_on_space() || divisor == 0) {
- vm_pageout_state.vm_page_filecache_min = 0;
+ flow_control->state = FCS_IDLE;
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+ if (flow_control->state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
+ flow_control->state = FCS_IDLE;
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+
+ VM_CHECK_MEMORYSTATUS;
+
+ if (flow_control->state != FCS_IDLE) {
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
+ }
+
+ iq->pgo_throttled = TRUE;
+ assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000 * NSEC_PER_USEC);
+
+ vm_page_unlock_queues();
+
+ assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
+
+ VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
+ iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
+ memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
+ iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
+ memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
+
+ vm_page_lock_queues();
+
+ iq->pgo_throttled = FALSE;
+
+ vps_init_page_targets();
+
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will find and return the most appropriate page to be
+ * reclaimed.
+ */
+static int
+vps_choose_victim_page(vm_page_t *victim_page, int *anons_grabbed, boolean_t *grab_anonymous, boolean_t force_anonymous,
+ boolean_t *is_page_from_bg_q, unsigned int *reactivated_this_call)
+{
+ vm_page_t m = NULL;
+ vm_object_t m_object = VM_OBJECT_NULL;
+ uint32_t inactive_external_count;
+ struct vm_speculative_age_q *sq;
+ struct vm_pageout_queue *iq;
+ int retval = VM_PAGEOUT_SCAN_PROCEED;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+ iq = &vm_pageout_queue_internal;
+
+ *is_page_from_bg_q = FALSE;
+
+ m = NULL;
+ m_object = VM_OBJECT_NULL;
+
+ if (VM_DYNAMIC_PAGING_ENABLED()) {
+ assert(vm_page_throttled_count == 0);
+ assert(vm_page_queue_empty(&vm_page_queue_throttled));
+ }
+
+ /*
+ * Try for a clean-queue inactive page.
+ * These are pages that vm_pageout_scan tried to steal earlier, but
+ * were dirty and had to be cleaned. Pick them up now that they are clean.
+ */
+ if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
+
+ goto found_page;
+ }
+
+ /*
+ * The next most eligible pages are ones we paged in speculatively,
+ * but which have not yet been touched and have been aged out.
+ */
+ if (!vm_page_queue_empty(&sq->age_q)) {
+ m = (vm_page_t) vm_page_queue_first(&sq->age_q);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
+
+ if (!m->vmp_dirty || force_anonymous == FALSE) {
+ goto found_page;
} else {
- vm_pageout_state.vm_page_filecache_min =
- ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
+ m = NULL;
}
-#else
- if (vm_compressor_out_of_space() || divisor == 0) {
- vm_pageout_state.vm_page_filecache_min = 0;
- } else {
+ }
+
+#if CONFIG_BACKGROUND_QUEUE
+ if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
+ vm_object_t bg_m_object = NULL;
+
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
+
+ bg_m_object = VM_PAGE_OBJECT(m);
+
+ if (!VM_PAGE_PAGEABLE(m)) {
/*
- * don't let the filecache_min fall below the specified critical level
+ * This page is on the background queue
+ * but not on a pageable queue. This is
+ * likely a transient state and whoever
+ * took it out of its pageable queue
+ * will likely put it back on a pageable
+ * queue soon but we can't deal with it
+ * at this point, so let's ignore this
+ * page.
*/
- vm_pageout_state.vm_page_filecache_min =
- ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
- }
-#endif
- if (vm_page_free_count < (vm_page_free_reserved / 4)) {
- vm_pageout_state.vm_page_filecache_min = 0;
+ } else if (force_anonymous == FALSE || bg_m_object->internal) {
+ if (bg_m_object->internal &&
+ (VM_PAGE_Q_THROTTLED(iq) ||
+ vm_compressor_out_of_space() == TRUE ||
+ vm_page_free_count < (vm_page_free_reserved / 4))) {
+ vm_pageout_skipped_bq_internal++;
+ } else {
+ *is_page_from_bg_q = TRUE;
+
+ if (bg_m_object->internal) {
+ vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
+ } else {
+ vm_pageout_vminfo.vm_pageout_considered_bq_external++;
+ }
+ goto found_page;
+ }
}
+ }
+#endif /* CONFIG_BACKGROUND_QUEUE */
- exceeded_burst_throttle = FALSE;
- /*
- * Sometimes we have to pause:
- * 1) No inactive pages - nothing to do.
- * 2) Loop control - no acceptable pages found on the inactive queue
- * within the last vm_pageout_burst_inactive_throttle iterations
- * 3) Flow control - default pageout queue is full
- */
- if (vm_page_queue_empty(&vm_page_queue_inactive) &&
- vm_page_queue_empty(&vm_page_queue_anonymous) &&
- vm_page_queue_empty(&vm_page_queue_cleaned) &&
- vm_page_queue_empty(&sq->age_q)) {
- VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
- msecs = vm_pageout_state.vm_pageout_empty_wait;
- goto vm_pageout_scan_delay;
- } else if (inactive_burst_count >=
- MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
- (vm_page_inactive_count +
- vm_page_speculative_count))) {
- VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
- msecs = vm_pageout_state.vm_pageout_burst_wait;
-
- exceeded_burst_throttle = TRUE;
- goto vm_pageout_scan_delay;
- } else if (VM_PAGE_Q_THROTTLED(iq) &&
- VM_DYNAMIC_PAGING_ENABLED()) {
- clock_sec_t sec;
- clock_nsec_t nsec;
+ inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
- switch (flow_control.state) {
- case FCS_IDLE:
- if ((vm_page_free_count + local_freed) < vm_page_free_target &&
- vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
- /*
- * since the compressor is running independently of vm_pageout_scan
- * let's not wait for it just yet... as long as we have a healthy supply
- * of filecache pages to work with, let's keep stealing those.
- */
- inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
+ if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
+ (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
+ *grab_anonymous = TRUE;
+ *anons_grabbed = 0;
- if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
- (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
- anons_grabbed = ANONS_GRABBED_LIMIT;
- VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
- goto consider_inactive;
- }
- }
-reset_deadlock_timer:
- ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
- ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
- clock_get_system_nanotime(&sec, &nsec);
- flow_control.ts.tv_sec = (unsigned int) sec;
- flow_control.ts.tv_nsec = nsec;
- ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
-
- flow_control.state = FCS_DELAYED;
- msecs = vm_pageout_state.vm_pageout_deadlock_wait;
-
- vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
- break;
+ vm_pageout_vminfo.vm_pageout_skipped_external++;
+ goto want_anonymous;
+ }
+ *grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
- case FCS_DELAYED:
- clock_get_system_nanotime(&sec, &nsec);
- ts.tv_sec = (unsigned int) sec;
- ts.tv_nsec = nsec;
+#if CONFIG_JETSAM
+ /* If the file-backed pool has accumulated
+ * significantly more pages than the jetsam
+ * threshold, prefer to reclaim those
+ * inline to minimise compute overhead of reclaiming
+ * anonymous pages.
+ * This calculation does not account for the CPU local
+ * external page queues, as those are expected to be
+ * much smaller relative to the global pools.
+ */
- if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
- /*
- * the pageout thread for the default pager is potentially
- * deadlocked since the
- * default pager queue has been throttled for more than the
- * allowable time... we need to move some clean pages or dirty
- * pages belonging to the external pagers if they aren't throttled
- * vm_page_free_wanted represents the number of threads currently
- * blocked waiting for pages... we'll move one page for each of
- * these plus a fixed amount to break the logjam... once we're done
- * moving this number of pages, we'll re-enter the FSC_DELAYED state
- * with a new timeout target since we have no way of knowing
- * whether we've broken the deadlock except through observation
- * of the queue associated with the default pager... we need to
- * stop moving pages and allow the system to run to see what
- * state it settles into.
- */
- vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
- vm_page_free_wanted + vm_page_free_wanted_privileged;
- VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
- flow_control.state = FCS_DEADLOCK_DETECTED;
- thread_wakeup((event_t) &vm_pageout_garbage_collect);
- goto consider_inactive;
- }
- /*
- * just resniff instead of trying
- * to compute a new delay time... we're going to be
- * awakened immediately upon a laundry completion,
- * so we won't wait any longer than necessary
- */
- msecs = vm_pageout_state.vm_pageout_idle_wait;
- break;
+ struct vm_pageout_queue *eq = &vm_pageout_queue_external;
- case FCS_DEADLOCK_DETECTED:
- if (vm_pageout_deadlock_target) {
- goto consider_inactive;
- }
- goto reset_deadlock_timer;
+ if (*grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
+ if (vm_page_pageable_external_count >
+ vm_pageout_state.vm_page_filecache_min) {
+ if ((vm_page_pageable_external_count *
+ vm_pageout_memorystatus_fb_factor_dr) >
+ (memorystatus_available_pages_critical *
+ vm_pageout_memorystatus_fb_factor_nr)) {
+ *grab_anonymous = FALSE;
+
+ VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
}
-vm_pageout_scan_delay:
- vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+ }
+ if (*grab_anonymous) {
+ VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
+ }
+ }
+#endif /* CONFIG_JETSAM */
- vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
- VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
+want_anonymous:
+ if (*grab_anonymous == FALSE || *anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ if (!vm_page_queue_empty(&vm_page_queue_inactive)) {
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
- if (vm_page_free_count >= vm_page_free_target) {
- /*
- * we're here because
- * 1) someone else freed up some pages while we had
- * the queues unlocked above
- * and we've hit one of the 3 conditions that
- * cause us to pause the pageout scan thread
- *
- * since we already have enough free pages,
- * let's avoid stalling and return normally
- *
- * before we return, make sure the pageout I/O threads
- * are running throttled in case there are still requests
- * in the laundry... since we have enough free pages
- * we don't need the laundry to be cleaned in a timely
- * fashion... so let's avoid interfering with foreground
- * activity
- *
- * we don't want to hold vm_page_queue_free_lock when
- * calling vm_pageout_adjust_eq_iothrottle (since it
- * may cause other locks to be taken), we do the intitial
- * check outside of the lock. Once we take the lock,
- * we recheck the condition since it may have changed.
- * if it has, no problem, we will make the threads
- * non-throttled before actually blocking
- */
- vm_pageout_adjust_eq_iothrottle(eq, TRUE);
- }
- lck_mtx_lock(&vm_page_queue_free_lock);
+ assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
+ *anons_grabbed = 0;
- if (vm_page_free_count >= vm_page_free_target &&
- (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
- goto return_from_scan;
- }
- lck_mtx_unlock(&vm_page_queue_free_lock);
+ if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
+ if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ if ((++(*reactivated_this_call) % 100)) {
+ vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
- if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
- /*
- * we're most likely about to block due to one of
- * the 3 conditions that cause vm_pageout_scan to
- * not be able to make forward progress w/r
- * to providing new pages to the free queue,
- * so unthrottle the I/O threads in case we
- * have laundry to be cleaned... it needs
- * to be completed ASAP.
- *
- * even if we don't block, we want the io threads
- * running unthrottled since the sum of free +
- * clean pages is still under our free target
- */
- vm_pageout_adjust_eq_iothrottle(eq, FALSE);
- }
- if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
- /*
- * if we get here we're below our free target and
- * we're stalling due to a full laundry queue or
- * we don't have any inactive pages other then
- * those in the clean queue...
- * however, we have pages on the clean queue that
- * can be moved to the free queue, so let's not
- * stall the pageout scan
- */
- flow_control.state = FCS_IDLE;
- goto consider_inactive;
- }
- if (flow_control.state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
- flow_control.state = FCS_IDLE;
- goto consider_inactive;
- }
+ vm_page_activate(m);
+ counter_inc(&vm_statistics_reactivations);
+#if CONFIG_BACKGROUND_QUEUE
+#if DEVELOPMENT || DEBUG
+ if (*is_page_from_bg_q == TRUE) {
+ if (m_object->internal) {
+ vm_pageout_rejected_bq_internal++;
+ } else {
+ vm_pageout_rejected_bq_external++;
+ }
+ }
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+ vm_pageout_state.vm_pageout_inactive_used++;
- VM_CHECK_MEMORYSTATUS;
+ m = NULL;
+ retval = VM_PAGEOUT_SCAN_NEXT_ITERATION;
+
+ goto found_page;
+ }
- if (flow_control.state != FCS_IDLE) {
- VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
+ /*
+ * steal 1 of the file backed pages even if
+ * we are under the limit that has been set
+ * for a healthy filecache
+ */
+ }
}
+ goto found_page;
+ }
+ }
+ if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
- iq->pgo_throttled = TRUE;
- assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000 * NSEC_PER_USEC);
+ assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
+ *anons_grabbed += 1;
- counter(c_vm_pageout_scan_block++);
+ goto found_page;
+ }
- vm_page_unlock_queues();
+ m = NULL;
+
+found_page:
+ *victim_page = m;
+
+ return retval;
+}
- assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will put a page back on the active/inactive queue
+ * if we can't reclaim it for some reason.
+ */
+static void
+vps_requeue_page(vm_page_t m, int page_prev_q_state, __unused boolean_t page_from_bg_q)
+{
+ if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
+ vm_page_enqueue_inactive(m, FALSE);
+ } else {
+ vm_page_activate(m);
+ }
+
+#if CONFIG_BACKGROUND_QUEUE
+#if DEVELOPMENT || DEBUG
+ vm_object_t m_object = VM_PAGE_OBJECT(m);
+
+ if (page_from_bg_q == TRUE) {
+ if (m_object->internal) {
+ vm_pageout_rejected_bq_internal++;
+ } else {
+ vm_pageout_rejected_bq_external++;
+ }
+ }
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will try to grab the victim page's VM object (m_object)
+ * which differs from the previous victim page's object (object).
+ */
+static int
+vps_switch_object(vm_page_t m, vm_object_t m_object, vm_object_t *object, int page_prev_q_state, boolean_t avoid_anon_pages, boolean_t page_from_bg_q)
+{
+ struct vm_speculative_age_q *sq;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ /*
+ * the object associated with candidate page is
+ * different from the one we were just working
+ * with... dump the lock if we still own it
+ */
+ if (*object != NULL) {
+ vm_object_unlock(*object);
+ *object = NULL;
+ }
+ /*
+ * Try to lock object; since we've alread got the
+ * page queues lock, we can only 'try' for this one.
+ * if the 'try' fails, we need to do a mutex_pause
+ * to allow the owner of the object lock a chance to
+ * run... otherwise, we're likely to trip over this
+ * object in the same state as we work our way through
+ * the queue... clumps of pages associated with the same
+ * object are fairly typical on the inactive and active queues
+ */
+ if (!vm_object_lock_try_scan(m_object)) {
+ vm_page_t m_want = NULL;
+
+ vm_pageout_vminfo.vm_pageout_inactive_nolock++;
+
+ if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
+ VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
+ }
+
+ pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
+
+ m->vmp_reference = FALSE;
+
+ if (!m_object->object_is_shared_cache) {
+ /*
+ * don't apply this optimization if this is the shared cache
+ * object, it's too easy to get rid of very hot and important
+ * pages...
+ * m->vmp_object must be stable since we hold the page queues lock...
+ * we can update the scan_collisions field sans the object lock
+ * since it is a separate field and this is the only spot that does
+ * a read-modify-write operation and it is never executed concurrently...
+ * we can asynchronously set this field to 0 when creating a UPL, so it
+ * is possible for the value to be a bit non-determistic, but that's ok
+ * since it's only used as a hint
+ */
+ m_object->scan_collisions = 1;
+ }
+ if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
+ m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
+ } else if (!vm_page_queue_empty(&sq->age_q)) {
+ m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
+ } else if ((avoid_anon_pages || vm_page_queue_empty(&vm_page_queue_anonymous)) &&
+ !vm_page_queue_empty(&vm_page_queue_inactive)) {
+ m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
+ } else if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
+ }
+
+ /*
+ * this is the next object we're going to be interested in
+ * try to make sure its available after the mutex_pause
+ * returns control
+ */
+ if (m_want) {
+ vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
+ }
+
+ vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
+
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ } else {
+ *object = m_object;
+ vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+ }
+
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it notices that pageout scan may be rendered ineffective
+ * due to a FS deadlock and will jetsam a process if possible.
+ * If jetsam isn't supported, it'll move the page to the active
+ * queue to try and get some different pages pushed onwards so
+ * we can try to get out of this scenario.
+ */
+static void
+vps_deal_with_throttled_queues(vm_page_t m, vm_object_t *object, uint32_t *vm_pageout_inactive_external_forced_reactivate_limit,
+ int *delayed_unlock, boolean_t *force_anonymous, __unused boolean_t is_page_from_bg_q)
+{
+ struct vm_pageout_queue *eq;
+ vm_object_t cur_object = VM_OBJECT_NULL;
+
+ cur_object = *object;
+
+ eq = &vm_pageout_queue_external;
+
+ if (cur_object->internal == FALSE) {
+ /*
+ * we need to break up the following potential deadlock case...
+ * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
+ * b) The thread doing the writing is waiting for pages while holding the truncate lock
+ * c) Most of the pages in the inactive queue belong to this file.
+ *
+ * we are potentially in this deadlock because...
+ * a) the external pageout queue is throttled
+ * b) we're done with the active queue and moved on to the inactive queue
+ * c) we've got a dirty external page
+ *
+ * since we don't know the reason for the external pageout queue being throttled we
+ * must suspect that we are deadlocked, so move the current page onto the active queue
+ * in an effort to cause a page from the active queue to 'age' to the inactive queue
+ *
+ * if we don't have jetsam configured (i.e. we have a dynamic pager), set
+ * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
+ * pool the next time we select a victim page... if we can make enough new free pages,
+ * the deadlock will break, the external pageout queue will empty and it will no longer
+ * be throttled
+ *
+ * if we have jetsam configured, keep a count of the pages reactivated this way so
+ * that we can try to find clean pages in the active/inactive queues before
+ * deciding to jetsam a process
+ */
+ vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
+
+ vm_page_check_pageable_safe(m);
+ assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
+ vm_page_queue_enter(&vm_page_queue_active, m, vmp_pageq);
+ m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
+ vm_page_active_count++;
+ vm_page_pageable_external_count++;
+
+ vm_pageout_adjust_eq_iothrottle(eq, FALSE);
+
+#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
+
+#pragma unused(force_anonymous)
+
+ *vm_pageout_inactive_external_forced_reactivate_limit -= 1;
+
+ if (*vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
+ *vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
+ /*
+ * Possible deadlock scenario so request jetsam action
+ */
+
+ assert(cur_object);
+ vm_object_unlock(cur_object);
+
+ cur_object = VM_OBJECT_NULL;
+
+ /*
+ * VM pageout scan needs to know we have dropped this lock and so set the
+ * object variable we got passed in to NULL.
+ */
+ *object = VM_OBJECT_NULL;
+
+ vm_page_unlock_queues();
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
+ vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
+
+ /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
+ if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
+ VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
+ }
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
+ vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
+
+ vm_page_lock_queues();
+ *delayed_unlock = 1;
+ }
+#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
+
+#pragma unused(vm_pageout_inactive_external_forced_reactivate_limit)
+#pragma unused(delayed_unlock)
+
+ *force_anonymous = TRUE;
+#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
+ } else {
+ vm_page_activate(m);
+ counter_inc(&vm_statistics_reactivations);
+
+#if CONFIG_BACKGROUND_QUEUE
+#if DEVELOPMENT || DEBUG
+ if (is_page_from_bg_q == TRUE) {
+ if (cur_object->internal) {
+ vm_pageout_rejected_bq_internal++;
+ } else {
+ vm_pageout_rejected_bq_external++;
+ }
+ }
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+
+ vm_pageout_state.vm_pageout_inactive_used++;
+ }
+}
+
+
+void
+vm_page_balance_inactive(int max_to_move)
+{
+ vm_page_t m;
+
+ LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+
+ if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) {
+ /*
+ * It is likely that the hibernation code path is
+ * dealing with these very queues as we are about
+ * to move pages around in/from them and completely
+ * change the linkage of the pages.
+ *
+ * And so we skip the rebalancing of these queues.
+ */
+ return;
+ }
+ vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count +
+ vm_page_speculative_count);
+
+ while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
+ VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
+
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
+ assert(!m->vmp_laundry);
+ assert(VM_PAGE_OBJECT(m) != kernel_object);
+ assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
+
+ DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
+
+ /*
+ * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
+ *
+ * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
+ * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
+ * new reference happens. If no futher references happen on the page after that remote TLB flushes
+ * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
+ * by pageout_scan, which is just fine since the last reference would have happened quite far
+ * in the past (TLB caches don't hang around for very long), and of course could just as easily
+ * have happened before we moved the page
+ */
+ if (m->vmp_pmapped == TRUE) {
+ pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
+ }
+
+ /*
+ * The page might be absent or busy,
+ * but vm_page_deactivate can handle that.
+ * FALSE indicates that we don't want a H/W clear reference
+ */
+ vm_page_deactivate_internal(m, FALSE);
+ }
+}
+
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
+ * held and vm_page_free_wanted == 0.
+ */
+void
+vm_pageout_scan(void)
+{
+ unsigned int loop_count = 0;
+ unsigned int inactive_burst_count = 0;
+ unsigned int reactivated_this_call;
+ unsigned int reactivate_limit;
+ vm_page_t local_freeq = NULL;
+ int local_freed = 0;
+ int delayed_unlock;
+ int delayed_unlock_limit = 0;
+ int refmod_state = 0;
+ int vm_pageout_deadlock_target = 0;
+ struct vm_pageout_queue *iq;
+ struct vm_pageout_queue *eq;
+ struct vm_speculative_age_q *sq;
+ struct flow_control flow_control = { .state = 0, .ts = { .tv_sec = 0, .tv_nsec = 0 } };
+ boolean_t inactive_throttled = FALSE;
+ vm_object_t object = NULL;
+ uint32_t inactive_reclaim_run;
+ boolean_t grab_anonymous = FALSE;
+ boolean_t force_anonymous = FALSE;
+ boolean_t force_speculative_aging = FALSE;
+ int anons_grabbed = 0;
+ int page_prev_q_state = 0;
+ boolean_t page_from_bg_q = FALSE;
+ uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
+ vm_object_t m_object = VM_OBJECT_NULL;
+ int retval = 0;
+ boolean_t lock_yield_check = FALSE;
+
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
+ vm_pageout_vminfo.vm_pageout_freed_speculative,
+ vm_pageout_state.vm_pageout_inactive_clean,
+ vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
+ vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
+
+ flow_control.state = FCS_IDLE;
+ iq = &vm_pageout_queue_internal;
+ eq = &vm_pageout_queue_external;
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ /* Ask the pmap layer to return any pages it no longer needs. */
+ uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
+
+ vm_page_lock_queues();
+
+ vm_page_wire_count -= pmap_wired_pages_freed;
+
+ delayed_unlock = 1;
+
+ /*
+ * Calculate the max number of referenced pages on the inactive
+ * queue that we will reactivate.
+ */
+ reactivated_this_call = 0;
+ reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
+ vm_page_inactive_count);
+ inactive_reclaim_run = 0;
+
+ vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
+
+ /*
+ * We must limit the rate at which we send pages to the pagers
+ * so that we don't tie up too many pages in the I/O queues.
+ * We implement a throttling mechanism using the laundry count
+ * to limit the number of pages outstanding to the default
+ * and external pagers. We can bypass the throttles and look
+ * for clean pages if the pageout queues don't drain in a timely
+ * fashion since this may indicate that the pageout paths are
+ * stalled waiting for memory, which only we can provide.
+ */
+
+ vps_init_page_targets();
+ assert(object == NULL);
+ assert(delayed_unlock != 0);
+
+ for (;;) {
+ vm_page_t m;
+
+ DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
+
+ if (lock_yield_check) {
+ lock_yield_check = FALSE;
+
+ if (delayed_unlock++ > delayed_unlock_limit) {
+ int freed = local_freed;
+
+ vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
+ VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
+ if (freed == 0) {
+ lck_mtx_yield(&vm_page_queue_lock);
+ }
+ } else if (vm_pageout_scan_wants_object) {
+ vm_page_unlock_queues();
+ mutex_pause(0);
+ vm_page_lock_queues();
+ }
+ }
+
+ if (vm_upl_wait_for_pages < 0) {
+ vm_upl_wait_for_pages = 0;
+ }
+
+ delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
+
+ if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) {
+ delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
+ }
+
+ vps_deal_with_secluded_page_overflow(&local_freeq, &local_freed);
+
+ assert(delayed_unlock);
+
+ /*
+ * maintain our balance
+ */
+ vm_page_balance_inactive(1);
+
+
+ /**********************************************************************
+ * above this point we're playing with the active and secluded queues
+ * below this point we're playing with the throttling mechanisms
+ * and the inactive queue
+ **********************************************************************/
+
+ if (vm_page_free_count + local_freed >= vm_page_free_target) {
+ vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+
+ vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
+ VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
+ /*
+ * make sure the pageout I/O threads are running
+ * throttled in case there are still requests
+ * in the laundry... since we have met our targets
+ * we don't need the laundry to be cleaned in a timely
+ * fashion... so let's avoid interfering with foreground
+ * activity
+ */
+ vm_pageout_adjust_eq_iothrottle(eq, TRUE);
+
+ lck_mtx_lock(&vm_page_queue_free_lock);
+
+ if ((vm_page_free_count >= vm_page_free_target) &&
+ (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
+ /*
+ * done - we have met our target *and*
+ * there is no one waiting for a page.
+ */
+return_from_scan:
+ assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
+ vm_pageout_state.vm_pageout_inactive,
+ vm_pageout_state.vm_pageout_inactive_used, 0, 0);
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
+ vm_pageout_vminfo.vm_pageout_freed_speculative,
+ vm_pageout_state.vm_pageout_inactive_clean,
+ vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
+ vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
+
+ return;
+ }
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+ }
+
+ /*
+ * Before anything, we check if we have any ripe volatile
+ * objects around. If so, try to purge the first object.
+ * If the purge fails, fall through to reclaim a page instead.
+ * If the purge succeeds, go back to the top and reevalute
+ * the new memory situation.
+ */
+ retval = vps_purge_object();
+
+ if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
+ /*
+ * Success
+ */
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
+ }
+
+ lock_yield_check = FALSE;
+ continue;
+ }
+
+ /*
+ * If our 'aged' queue is empty and we have some speculative pages
+ * in the other queues, let's go through and see if we need to age
+ * them.
+ *
+ * If we succeeded in aging a speculative Q or just that everything
+ * looks normal w.r.t queue age and queue counts, we keep going onward.
+ *
+ * If, for some reason, we seem to have a mismatch between the spec.
+ * page count and the page queues, we reset those variables and
+ * restart the loop (LD TODO: Track this better?).
+ */
+ if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
+ retval = vps_age_speculative_queue(force_speculative_aging);
+
+ if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
+ lock_yield_check = FALSE;
+ continue;
+ }
+ }
+ force_speculative_aging = FALSE;
- VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
- iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
- memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
+ /*
+ * Check to see if we need to evict objects from the cache.
+ *
+ * Note: 'object' here doesn't have anything to do with
+ * the eviction part. We just need to make sure we have dropped
+ * any object lock we might be holding if we need to go down
+ * into the eviction logic.
+ */
+ retval = vps_object_cache_evict(&object);
- thread_block(THREAD_CONTINUE_NULL);
+ if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
+ lock_yield_check = FALSE;
+ continue;
+ }
- VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
- iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
- memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
- vm_page_lock_queues();
+ /*
+ * Calculate our filecache_min that will affect the loop
+ * going forward.
+ */
+ vps_calculate_filecache_min();
- iq->pgo_throttled = FALSE;
+ /*
+ * LD TODO: Use a structure to hold all state variables for a single
+ * vm_pageout_scan iteration and pass that structure to this function instead.
+ */
+ retval = vps_flow_control(&flow_control, &anons_grabbed, &object,
+ &delayed_unlock, &local_freeq, &local_freed,
+ &vm_pageout_deadlock_target, inactive_burst_count);
+ if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
if (loop_count >= vm_page_inactive_count) {
loop_count = 0;
}
+
inactive_burst_count = 0;
- goto Restart;
- /*NOTREACHED*/
- }
+ assert(object == NULL);
+ assert(delayed_unlock != 0);
+ lock_yield_check = FALSE;
+ continue;
+ } else if (retval == VM_PAGEOUT_SCAN_DONE_RETURN) {
+ goto return_from_scan;
+ }
flow_control.state = FCS_IDLE;
-consider_inactive:
+
vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
vm_pageout_inactive_external_forced_reactivate_limit);
loop_count++;
/*
* Choose a victim.
*/
- while (1) {
-#if CONFIG_BACKGROUND_QUEUE
- page_from_bg_q = FALSE;
-#endif /* CONFIG_BACKGROUND_QUEUE */
-
- m = NULL;
- m_object = VM_OBJECT_NULL;
-
- if (VM_DYNAMIC_PAGING_ENABLED()) {
- assert(vm_page_throttled_count == 0);
- assert(vm_page_queue_empty(&vm_page_queue_throttled));
- }
-
- /*
- * Try for a clean-queue inactive page.
- * These are pages that vm_pageout_scan tried to steal earlier, but
- * were dirty and had to be cleaned. Pick them up now that they are clean.
- */
- if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
- m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
-
- assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
-
- break;
- }
- /*
- * The next most eligible pages are ones we paged in speculatively,
- * but which have not yet been touched and have been aged out.
- */
- if (!vm_page_queue_empty(&sq->age_q)) {
- m = (vm_page_t) vm_page_queue_first(&sq->age_q);
-
- assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
-
- if (!m->vmp_dirty || force_anonymous == FALSE) {
- break;
- } else {
- m = NULL;
- }
- }
-
-#if CONFIG_BACKGROUND_QUEUE
- if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
- vm_object_t bg_m_object = NULL;
-
- m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
-
- bg_m_object = VM_PAGE_OBJECT(m);
-
- if (!VM_PAGE_PAGEABLE(m)) {
- /*
- * This page is on the background queue
- * but not on a pageable queue. This is
- * likely a transient state and whoever
- * took it out of its pageable queue
- * will likely put it back on a pageable
- * queue soon but we can't deal with it
- * at this point, so let's ignore this
- * page.
- */
- } else if (force_anonymous == FALSE || bg_m_object->internal) {
- if (bg_m_object->internal &&
- (VM_PAGE_Q_THROTTLED(iq) ||
- vm_compressor_out_of_space() == TRUE ||
- vm_page_free_count < (vm_page_free_reserved / 4))) {
- vm_pageout_skipped_bq_internal++;
- } else {
- page_from_bg_q = TRUE;
-
- if (bg_m_object->internal) {
- vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
- } else {
- vm_pageout_vminfo.vm_pageout_considered_bq_external++;
- }
- break;
- }
- }
- }
-#endif
- inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
-
- if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
- (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
- grab_anonymous = TRUE;
- anons_grabbed = 0;
-
- vm_pageout_vminfo.vm_pageout_skipped_external++;
- goto want_anonymous;
- }
- grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
+ m = NULL;
+ retval = vps_choose_victim_page(&m, &anons_grabbed, &grab_anonymous, force_anonymous, &page_from_bg_q, &reactivated_this_call);
-#if CONFIG_JETSAM
- /* If the file-backed pool has accumulated
- * significantly more pages than the jetsam
- * threshold, prefer to reclaim those
- * inline to minimise compute overhead of reclaiming
- * anonymous pages.
- * This calculation does not account for the CPU local
- * external page queues, as those are expected to be
- * much smaller relative to the global pools.
- */
- if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
- if (vm_page_pageable_external_count >
- vm_pageout_state.vm_page_filecache_min) {
- if ((vm_page_pageable_external_count *
- vm_pageout_memorystatus_fb_factor_dr) >
- (memorystatus_available_pages_critical *
- vm_pageout_memorystatus_fb_factor_nr)) {
- grab_anonymous = FALSE;
-
- VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
- }
- }
- if (grab_anonymous) {
- VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
- }
- }
-#endif /* CONFIG_JETSAM */
+ if (m == NULL) {
+ if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
+ inactive_burst_count = 0;
-want_anonymous:
- if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
- if (!vm_page_queue_empty(&vm_page_queue_inactive)) {
- m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
-
- assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
- anons_grabbed = 0;
-
- if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
- if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
- if ((++reactivated_this_call % 100)) {
- vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
- goto must_activate_page;
- }
- /*
- * steal 1% of the file backed pages even if
- * we are under the limit that has been set
- * for a healthy filecache
- */
- }
- }
- break;
+ if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
+ VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
}
- }
- if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
- m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
- assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
- anons_grabbed++;
-
- break;
+ lock_yield_check = TRUE;
+ continue;
}
/*
VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
if (!vm_page_queue_empty(&sq->age_q)) {
- goto done_with_inactivepage;
+ lock_yield_check = TRUE;
+ continue;
}
if (vm_page_speculative_count) {
force_speculative_aging = TRUE;
- goto done_with_inactivepage;
+ lock_yield_check = TRUE;
+ continue;
}
panic("vm_pageout: no victim");
/* NOTREACHED */
}
+
assert(VM_PAGE_PAGEABLE(m));
m_object = VM_PAGE_OBJECT(m);
force_anonymous = FALSE;
* already got the lock
*/
if (m_object != object) {
+ boolean_t avoid_anon_pages = (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT);
+
/*
- * the object associated with candidate page is
- * different from the one we were just working
- * with... dump the lock if we still own it
- */
- if (object != NULL) {
- vm_object_unlock(object);
- object = NULL;
- }
- /*
- * Try to lock object; since we've alread got the
- * page queues lock, we can only 'try' for this one.
- * if the 'try' fails, we need to do a mutex_pause
- * to allow the owner of the object lock a chance to
- * run... otherwise, we're likely to trip over this
- * object in the same state as we work our way through
- * the queue... clumps of pages associated with the same
- * object are fairly typical on the inactive and active queues
+ * vps_switch_object() will always drop the 'object' lock first
+ * and then try to acquire the 'm_object' lock. So 'object' has to point to
+ * either 'm_object' or NULL.
*/
- if (!vm_object_lock_try_scan(m_object)) {
- vm_page_t m_want = NULL;
-
- vm_pageout_vminfo.vm_pageout_inactive_nolock++;
-
- if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
- VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
- }
-
- pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
-
- m->vmp_reference = FALSE;
-
- if (!m_object->object_is_shared_cache) {
- /*
- * don't apply this optimization if this is the shared cache
- * object, it's too easy to get rid of very hot and important
- * pages...
- * m->vmp_object must be stable since we hold the page queues lock...
- * we can update the scan_collisions field sans the object lock
- * since it is a separate field and this is the only spot that does
- * a read-modify-write operation and it is never executed concurrently...
- * we can asynchronously set this field to 0 when creating a UPL, so it
- * is possible for the value to be a bit non-determistic, but that's ok
- * since it's only used as a hint
- */
- m_object->scan_collisions = 1;
- }
- if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
- m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
- } else if (!vm_page_queue_empty(&sq->age_q)) {
- m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
- } else if ((grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT ||
- vm_page_queue_empty(&vm_page_queue_anonymous)) &&
- !vm_page_queue_empty(&vm_page_queue_inactive)) {
- m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
- } else if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
- m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
- }
-
- /*
- * this is the next object we're going to be interested in
- * try to make sure its available after the mutex_pause
- * returns control
- */
- if (m_want) {
- vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
- }
+ retval = vps_switch_object(m, m_object, &object, page_prev_q_state, avoid_anon_pages, page_from_bg_q);
- goto requeue_page;
+ if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
+ lock_yield_check = TRUE;
+ continue;
}
- object = m_object;
- vm_pageout_scan_wants_object = VM_OBJECT_NULL;
}
assert(m_object == object);
assert(VM_PAGE_OBJECT(m) == m_object);
if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
}
-requeue_page:
- if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
- vm_page_enqueue_inactive(m, FALSE);
- } else {
- vm_page_activate(m);
- }
-#if CONFIG_BACKGROUND_QUEUE
-#if DEVELOPMENT || DEBUG
- if (page_from_bg_q == TRUE) {
- if (m_object->internal) {
- vm_pageout_rejected_bq_internal++;
- } else {
- vm_pageout_rejected_bq_external++;
- }
- }
-#endif
-#endif
- goto done_with_inactivepage;
+
+ vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
+
+ lock_yield_check = TRUE;
+ continue;
}
/*
* just leave it off the paging queues
*/
if (m->vmp_free_when_done || m->vmp_cleaning) {
- goto done_with_inactivepage;
+ lock_yield_check = TRUE;
+ continue;
}
}
inactive_burst_count = 0;
- goto done_with_inactivepage;
+
+ lock_yield_check = TRUE;
+ continue;
}
if (object->copy == VM_OBJECT_NULL) {
/*
/* deal with a rogue "reusable" page */
VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
}
- divisor = vm_pageout_state.vm_page_xpmapped_min_divisor;
- if (divisor == 0) {
+ if (vm_pageout_state.vm_page_xpmapped_min_divisor == 0) {
vm_pageout_state.vm_page_xpmapped_min = 0;
} else {
- vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / divisor;
+ vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / vm_pageout_state.vm_page_xpmapped_min_divisor;
}
if (!m->vmp_no_cache &&
-#if CONFIG_BACKGROUND_QUEUE
page_from_bg_q == FALSE &&
-#endif
(m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
(vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
/*
vm_page_deactivate(m);
VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
} else {
-must_activate_page:
/*
* The page was/is being used, so put back on active list.
*/
vm_page_activate(m);
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
inactive_burst_count = 0;
}
#if CONFIG_BACKGROUND_QUEUE
vm_pageout_rejected_bq_external++;
}
}
-#endif
-#endif
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+
if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
}
vm_pageout_state.vm_pageout_inactive_used++;
- goto done_with_inactivepage;
+ lock_yield_check = TRUE;
+ continue;
}
/*
* Make sure we call pmap_get_refmod() if it
}
}
- XPR(XPR_VM_PAGEOUT,
- "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
- object, m->vmp_offset, m, 0, 0);
-
/*
* we've got a candidate page to steal...
*
VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
inactive_burst_count = 0;
- goto done_with_inactivepage;
+
+ lock_yield_check = TRUE;
+ continue;
}
if (inactive_throttled == TRUE) {
- if (object->internal == FALSE) {
- /*
- * we need to break up the following potential deadlock case...
- * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
- * b) The thread doing the writing is waiting for pages while holding the truncate lock
- * c) Most of the pages in the inactive queue belong to this file.
- *
- * we are potentially in this deadlock because...
- * a) the external pageout queue is throttled
- * b) we're done with the active queue and moved on to the inactive queue
- * c) we've got a dirty external page
- *
- * since we don't know the reason for the external pageout queue being throttled we
- * must suspect that we are deadlocked, so move the current page onto the active queue
- * in an effort to cause a page from the active queue to 'age' to the inactive queue
- *
- * if we don't have jetsam configured (i.e. we have a dynamic pager), set
- * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
- * pool the next time we select a victim page... if we can make enough new free pages,
- * the deadlock will break, the external pageout queue will empty and it will no longer
- * be throttled
- *
- * if we have jetsam configured, keep a count of the pages reactivated this way so
- * that we can try to find clean pages in the active/inactive queues before
- * deciding to jetsam a process
- */
- vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
-
- vm_page_check_pageable_safe(m);
- assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
- vm_page_queue_enter(&vm_page_queue_active, m, vmp_pageq);
- m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
- vm_page_active_count++;
- vm_page_pageable_external_count++;
-
- vm_pageout_adjust_eq_iothrottle(eq, FALSE);
-
-#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
- vm_pageout_inactive_external_forced_reactivate_limit--;
-
- if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
- vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
- /*
- * Possible deadlock scenario so request jetsam action
- */
- assert(object);
- vm_object_unlock(object);
- object = VM_OBJECT_NULL;
- vm_page_unlock_queues();
+ vps_deal_with_throttled_queues(m, &object, &vm_pageout_inactive_external_forced_reactivate_limit,
+ &delayed_unlock, &force_anonymous, page_from_bg_q);
- VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
- vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
-
- /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
- if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
- VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
- }
-
- VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
- vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
+ inactive_burst_count = 0;
- vm_page_lock_queues();
- delayed_unlock = 1;
- }
-#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
- force_anonymous = TRUE;
-#endif
- inactive_burst_count = 0;
- goto done_with_inactivepage;
- } else {
- goto must_activate_page;
+ if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
+ VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
}
+
+ lock_yield_check = TRUE;
+ continue;
}
/*
vm_pageout_cluster(m);
inactive_burst_count = 0;
-done_with_inactivepage:
-
- if (delayed_unlock++ > delayed_unlock_limit) {
- int freed = local_freed;
-
- vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
- VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
- if (freed == 0) {
- lck_mtx_yield(&vm_page_queue_lock);
- }
- } else if (vm_pageout_scan_wants_object) {
- vm_page_unlock_queues();
- mutex_pause(0);
- vm_page_lock_queues();
- }
/*
* back to top of pageout scan loop
*/
DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
-#if !CONFIG_EMBEDDED
lck_mtx_lock(&vm_page_queue_free_lock);
vm_pageout_running = TRUE;
lck_mtx_unlock(&vm_page_queue_free_lock);
-#endif /* CONFIG_EMBEDDED */
vm_pageout_scan();
/*
assert(vm_page_free_wanted_privileged == 0);
assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
-#if !CONFIG_EMBEDDED
vm_pageout_running = FALSE;
+#if XNU_TARGET_OS_OSX
if (vm_pageout_waiter) {
vm_pageout_waiter = FALSE;
thread_wakeup((event_t)&vm_pageout_waiter);
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
lck_mtx_unlock(&vm_page_queue_free_lock);
vm_page_unlock_queues();
- counter(c_vm_pageout_block++);
thread_block((thread_continue_t)vm_pageout_continue);
/*NOTREACHED*/
}
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
kern_return_t
vm_pageout_wait(uint64_t deadline)
{
return kr;
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
static void
KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
q = cq->q;
+#if __AMP__
+ if (vm_compressor_ebound && (vm_pageout_state.vm_compressor_thread_count > 1)) {
+ local_batch_size = (q->pgo_maxlaundry >> 3);
+ local_batch_size = MAX(local_batch_size, 16);
+ } else {
+ local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
+ }
+#else
local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
+#endif
#if RECORD_THE_COMPRESSED_DATA
if (q->pgo_laundry) {
vm_object_owner_compressed_update(object,
+1);
}
- VM_STAT_INCR(compressions);
+ counter_inc(&vm_statistics_compressions);
if (m->vmp_tabled) {
vm_page_remove(m, TRUE);
proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
- eq->pgo_lowpriority = req_lowpriority;
-
vm_page_lock_queues();
+ eq->pgo_lowpriority = req_lowpriority;
}
}
thread_vm_bind_group_add();
}
+#if CONFIG_THREAD_GROUPS
+ thread_group_vm_add();
+#endif /* CONFIG_THREAD_GROUPS */
+
+#if __AMP__
+ if (vm_compressor_ebound) {
+ /*
+ * Use the soft bound option for vm_compressor to allow it to run on
+ * P-cores if E-cluster is unavailable.
+ */
+ thread_bind_cluster_type(self, 'E', true);
+ }
+#endif /* __AMP__ */
thread_set_thread_name(current_thread(), "VM_compressor");
#if DEVELOPMENT || DEBUG
kern_return_t
vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
{
- if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
+ if (OSCompareAndSwapPtr(NULL, ptrauth_nop_cast(void *, func), (void * volatile *) &consider_buffer_cache_collect)) {
return KERN_SUCCESS;
} else {
return KERN_FAILURE; /* Already set */
return;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
available_memory = (uint64_t) memorystatus_available_pages;
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
total_pages = (unsigned int) atop_64(max_mem);
#if CONFIG_SECLUDED_MEMORY
}
#endif /* VM_PRESSURE_EVENTS */
+/*
+ * Function called by a kernel thread to either get the current pressure level or
+ * wait until memory pressure changes from a given level.
+ */
kern_return_t
mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level)
{
-#if CONFIG_EMBEDDED
-
- return KERN_FAILURE;
-
-#elif !VM_PRESSURE_EVENTS
+#if !VM_PRESSURE_EVENTS
return KERN_FAILURE;
#else /* VM_PRESSURE_EVENTS */
- kern_return_t kr = KERN_SUCCESS;
+ wait_result_t wr = 0;
+ vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
- if (pressure_level != NULL) {
- vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
+ if (pressure_level == NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
- if (wait_for_pressure == TRUE) {
- wait_result_t wr = 0;
+ if (*pressure_level == kVMPressureJetsam) {
+ if (!wait_for_pressure) {
+ return KERN_INVALID_ARGUMENT;
+ }
- while (old_level == *pressure_level) {
- wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
- THREAD_INTERRUPTIBLE);
- if (wr == THREAD_WAITING) {
- wr = thread_block(THREAD_CONTINUE_NULL);
- }
- if (wr == THREAD_INTERRUPTED) {
- return KERN_ABORTED;
- }
- if (wr == THREAD_AWAKENED) {
- old_level = memorystatus_vm_pressure_level;
+ lck_mtx_lock(&memorystatus_jetsam_fg_band_lock);
+ wr = assert_wait((event_t)&memorystatus_jetsam_fg_band_waiters,
+ THREAD_INTERRUPTIBLE);
+ if (wr == THREAD_WAITING) {
+ ++memorystatus_jetsam_fg_band_waiters;
+ lck_mtx_unlock(&memorystatus_jetsam_fg_band_lock);
+ wr = thread_block(THREAD_CONTINUE_NULL);
+ } else {
+ lck_mtx_unlock(&memorystatus_jetsam_fg_band_lock);
+ }
+ if (wr != THREAD_AWAKENED) {
+ return KERN_ABORTED;
+ }
+ *pressure_level = kVMPressureJetsam;
+ return KERN_SUCCESS;
+ }
- if (old_level != *pressure_level) {
- break;
- }
- }
+ if (wait_for_pressure == TRUE) {
+ while (old_level == *pressure_level) {
+ wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
+ THREAD_INTERRUPTIBLE);
+ if (wr == THREAD_WAITING) {
+ wr = thread_block(THREAD_CONTINUE_NULL);
+ }
+ if (wr == THREAD_INTERRUPTED) {
+ return KERN_ABORTED;
}
- }
- *pressure_level = old_level;
- kr = KERN_SUCCESS;
- } else {
- kr = KERN_INVALID_ARGUMENT;
+ if (wr == THREAD_AWAKENED) {
+ old_level = memorystatus_vm_pressure_level;
+ }
+ }
}
- return kr;
+ *pressure_level = old_level;
+ return KERN_SUCCESS;
#endif /* VM_PRESSURE_EVENTS */
}
*
* 2. The jetsam path might need to allocate zone memory itself. We could try
* using the non-blocking variant of zalloc for this path, but we can still
- * end up trying to do a kernel_memory_allocate when the zone_map is almost
+ * end up trying to do a kernel_memory_allocate when the zone maps are almost
* full.
*/
-extern boolean_t is_zone_map_nearing_exhaustion(void);
-
void
vm_pageout_garbage_collect(int collect)
{
if (collect) {
- if (is_zone_map_nearing_exhaustion()) {
+ if (zone_map_nearing_exhaustion()) {
/*
* Woken up by the zone allocator for zone-map-exhaustion jetsams.
*
* ok; if memory pressure persists, the thread will simply be woken
* up again.
*/
- consider_zone_gc(TRUE);
+ zone_gc(ZONE_GC_JETSAM);
} else {
/* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
boolean_t buf_large_zfree = FALSE;
}
if (first_try == TRUE || buf_large_zfree == TRUE) {
/*
- * consider_zone_gc should be last, because the other operations
+ * zone_gc should be last, because the other operations
* might return memory to zones.
*/
- consider_zone_gc(FALSE);
+ zone_gc(ZONE_GC_TRIM);
}
first_try = FALSE;
} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
void
-vm_set_restrictions()
+vm_set_restrictions(unsigned int num_cpus)
{
- host_basic_info_data_t hinfo;
- mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
-
-#define BSD_HOST 1
- host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
+ int vm_restricted_to_single_processor = 0;
- assert(hinfo.max_cpus > 0);
-
- if (hinfo.max_cpus <= 3) {
- /*
- * on systems with a limited number of CPUS, bind the
- * 4 major threads that can free memory and that tend to use
- * a fair bit of CPU under pressured conditions to a single processor.
- * This insures that these threads don't hog all of the available CPUs
- * (important for camera launch), while allowing them to run independently
- * w/r to locks... the 4 threads are
- * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
- * vm_compressor_swap_trigger_thread (minor and major compactions),
- * memorystatus_thread (jetsams).
- *
- * the first time the thread is run, it is responsible for checking the
- * state of vm_restricted_to_single_processor, and if TRUE it calls
- * thread_bind_master... someday this should be replaced with a group
- * scheduling mechanism and KPI.
- */
- vm_pageout_state.vm_restricted_to_single_processor = TRUE;
+ if (PE_parse_boot_argn("vm_restricted_to_single_processor", &vm_restricted_to_single_processor, sizeof(vm_restricted_to_single_processor))) {
+ kprintf("Overriding vm_restricted_to_single_processor to %d\n", vm_restricted_to_single_processor);
+ vm_pageout_state.vm_restricted_to_single_processor = (vm_restricted_to_single_processor ? TRUE : FALSE);
} else {
- vm_pageout_state.vm_restricted_to_single_processor = FALSE;
+ assert(num_cpus > 0);
+
+ if (num_cpus <= 3) {
+ /*
+ * on systems with a limited number of CPUS, bind the
+ * 4 major threads that can free memory and that tend to use
+ * a fair bit of CPU under pressured conditions to a single processor.
+ * This insures that these threads don't hog all of the available CPUs
+ * (important for camera launch), while allowing them to run independently
+ * w/r to locks... the 4 threads are
+ * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
+ * vm_compressor_swap_trigger_thread (minor and major compactions),
+ * memorystatus_thread (jetsams).
+ *
+ * the first time the thread is run, it is responsible for checking the
+ * state of vm_restricted_to_single_processor, and if TRUE it calls
+ * thread_bind_master... someday this should be replaced with a group
+ * scheduling mechanism and KPI.
+ */
+ vm_pageout_state.vm_restricted_to_single_processor = TRUE;
+ } else {
+ vm_pageout_state.vm_restricted_to_single_processor = FALSE;
+ }
}
}
*/
s = splsched();
+ vm_pageout_scan_thread = self;
+
+#if CONFIG_VPS_DYNAMIC_PRIO
+
+ int vps_dynprio_bootarg = 0;
+
+ if (PE_parse_boot_argn("vps_dynamic_priority_enabled", &vps_dynprio_bootarg, sizeof(vps_dynprio_bootarg))) {
+ vps_dynamic_priority_enabled = (vps_dynprio_bootarg ? TRUE : FALSE);
+ kprintf("Overriding vps_dynamic_priority_enabled to %d\n", vps_dynamic_priority_enabled);
+ } else {
+ if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
+ vps_dynamic_priority_enabled = TRUE;
+ } else {
+ vps_dynamic_priority_enabled = FALSE;
+ }
+ }
+
+ if (vps_dynamic_priority_enabled) {
+ sched_set_kernel_thread_priority(self, MAXPRI_THROTTLE);
+ thread_set_eager_preempt(self);
+ } else {
+ sched_set_kernel_thread_priority(self, BASEPRI_VM);
+ }
+
+#else /* CONFIG_VPS_DYNAMIC_PRIO */
+
+ vps_dynamic_priority_enabled = FALSE;
+ sched_set_kernel_thread_priority(self, BASEPRI_VM);
+
+#endif /* CONFIG_VPS_DYNAMIC_PRIO */
+
thread_lock(self);
self->options |= TH_OPT_VMPRIV;
- sched_set_thread_base_priority(self, BASEPRI_VM);
thread_unlock(self);
if (!self->reserved_stack) {
self->reserved_stack = self->kernel_stack;
}
- if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
- thread_vm_bind_group_add();
+ if (vm_pageout_state.vm_restricted_to_single_processor == TRUE &&
+ vps_dynamic_priority_enabled == FALSE) {
+ thread_vm_bind_group_add();
+ }
+
+
+#if CONFIG_THREAD_GROUPS
+ thread_group_vm_add();
+#endif /* CONFIG_THREAD_GROUPS */
+
+#if __AMP__
+ PE_parse_boot_argn("vmpgo_pcluster", &vm_pgo_pbound, sizeof(vm_pgo_pbound));
+ if (vm_pgo_pbound) {
+ /*
+ * Use the soft bound option for vm pageout to allow it to run on
+ * E-cores if P-cluster is unavailable.
+ */
+ thread_bind_cluster_type(self, 'P', true);
}
+#endif /* __AMP__ */
splx(s);
if (result != KERN_SUCCESS) {
panic("vm_pageout_iothread_external: create failed");
}
-
+ thread_set_thread_name(vm_pageout_state.vm_pageout_external_iothread, "VM_pageout_external_iothread");
thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
- result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
+ result = kernel_thread_create((thread_continue_t)vm_pageout_garbage_collect, NULL,
BASEPRI_DEFAULT,
&thread);
if (result != KERN_SUCCESS) {
panic("vm_pageout_garbage_collect: create failed");
}
+ thread_set_thread_name(thread, "VM_pageout_garbage_collect");
+ if (thread->reserved_stack == 0) {
+ assert(thread->kernel_stack);
+ thread->reserved_stack = thread->kernel_stack;
+ }
+
+ thread_mtx_lock(thread);
+ thread_start(thread);
+ thread_mtx_unlock(thread);
thread_deallocate(thread);
switch (vm_compressor_mode) {
case VM_PAGER_DEFAULT:
printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
+ OS_FALLTHROUGH;
case VM_PAGER_COMPRESSOR_WITH_SWAP:
vm_config.compressor_is_present = TRUE;
case VM_PAGER_FREEZER_DEFAULT:
printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
+ OS_FALLTHROUGH;
case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
vm_config.compressor_is_present = TRUE;
vm_object_tracking_init();
#endif /* VM_OBJECT_TRACKING */
- vm_tests();
-
vm_pageout_continue();
/*
vm_pageout_internal_start(void)
{
kern_return_t result;
- int i;
host_basic_info_data_t hinfo;
+ vm_offset_t buf, bufsize;
assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
assert(hinfo.max_cpus > 0);
- lck_grp_init(&vm_pageout_lck_grp, "vm_pageout", LCK_GRP_ATTR_NULL);
-
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
vm_pageout_state.vm_compressor_thread_count = 1;
-#else
+#else /* !XNU_TARGET_OS_OSX */
if (hinfo.max_cpus > 4) {
vm_pageout_state.vm_compressor_thread_count = 2;
} else {
vm_pageout_state.vm_compressor_thread_count = 1;
}
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
sizeof(vm_pageout_state.vm_compressor_thread_count));
+#if __AMP__
+ PE_parse_boot_argn("vmcomp_ecluster", &vm_compressor_ebound, sizeof(vm_compressor_ebound));
+ if (vm_compressor_ebound) {
+ vm_pageout_state.vm_compressor_thread_count = 2;
+ }
+#endif
if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus) {
vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
}
vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
}
- vm_pageout_queue_internal.pgo_maxlaundry = (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
+ vm_pageout_queue_internal.pgo_maxlaundry =
+ (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
+
+ PE_parse_boot_argn("vmpgoi_maxlaundry",
+ &vm_pageout_queue_internal.pgo_maxlaundry,
+ sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
- PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
+ bufsize = COMPRESSOR_SCRATCH_BUF_SIZE;
+ if (kernel_memory_allocate(kernel_map, &buf,
+ bufsize * vm_pageout_state.vm_compressor_thread_count,
+ 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR)) {
+ panic("vm_pageout_internal_start: Unable to allocate %zd bytes",
+ (size_t)(bufsize * vm_pageout_state.vm_compressor_thread_count));
+ }
- for (i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
+ for (int i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
ciq[i].id = i;
ciq[i].q = &vm_pageout_queue_internal;
ciq[i].current_chead = NULL;
- ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
+ ciq[i].scratch_buf = (char *)(buf + i * bufsize);
- result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i],
- BASEPRI_VM, &vm_pageout_state.vm_pageout_internal_iothread);
+ result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal,
+ (void *)&ciq[i], BASEPRI_VM,
+ &vm_pageout_state.vm_pageout_internal_iothread);
if (result == KERN_SUCCESS) {
thread_deallocate(vm_pageout_state.vm_pageout_internal_iothread);
int upl_flags = 0;
vm_size_t upl_size = sizeof(struct upl);
+ assert(page_aligned(size));
+
size = round_page_32(size);
if (type & UPL_CREATE_LITE) {
upl->flags = upl_flags | flags;
upl->kaddr = (vm_offset_t)0;
- upl->size = 0;
+ upl->u_offset = 0;
+ upl->u_size = 0;
upl->map_object = NULL;
upl->ref_count = 1;
upl->ext_ref_count = 0;
int page_field_size; /* bit field in word size buf */
int size;
+// DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object);
+
if (upl->ext_ref_count) {
panic("upl(%p) ext_ref_count", upl);
}
#endif /* CONFIG_IOSCHED */
#if CONFIG_IOSCHED || UPL_DEBUG
- if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
+ if (((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) &&
+ !(upl->flags & UPL_VECTOR)) {
vm_object_t object;
if (upl->flags & UPL_SHADOWED) {
if (upl->flags & UPL_DEVICE_MEMORY) {
size = PAGE_SIZE;
} else {
- size = upl->size;
+ size = upl_adjusted_size(upl, PAGE_MASK);
}
page_field_size = 0;
return FALSE;
}
+#define MIN_DELAYED_WORK_CTX_ALLOCATED (16)
+#define MAX_DELAYED_WORK_CTX_ALLOCATED (512)
+
+int vm_page_delayed_work_ctx_needed = 0;
+SECURITY_READ_ONLY_LATE(zone_t) dw_ctx_zone;
+
+void
+vm_page_delayed_work_init_ctx(void)
+{
+ size_t elem_size = sizeof(struct vm_page_delayed_work_ctx);
+
+ dw_ctx_zone = zone_create_ext("delayed-work-ctx", elem_size,
+ ZC_NOGC, ZONE_ID_ANY, ^(zone_t z) {
+ zone_set_exhaustible(z, MAX_DELAYED_WORK_CTX_ALLOCATED);
+ });
+
+ zone_fill_initially(dw_ctx_zone, MIN_DELAYED_WORK_CTX_ALLOCATED);
+}
+
+struct vm_page_delayed_work*
+vm_page_delayed_work_get_ctx(void)
+{
+ struct vm_page_delayed_work_ctx * dw_ctx = NULL;
+
+ dw_ctx = (struct vm_page_delayed_work_ctx*) zalloc_noblock(dw_ctx_zone);
+
+ if (dw_ctx) {
+ dw_ctx->delayed_owner = current_thread();
+ } else {
+ vm_page_delayed_work_ctx_needed++;
+ }
+ return dw_ctx ? dw_ctx->dwp : NULL;
+}
+
+void
+vm_page_delayed_work_finish_ctx(struct vm_page_delayed_work* dwp)
+{
+ struct vm_page_delayed_work_ctx *ldw_ctx;
+
+ ldw_ctx = (struct vm_page_delayed_work_ctx *)dwp;
+ ldw_ctx->delayed_owner = NULL;
+
+ zfree(dw_ctx_zone, ldw_ctx);
+}
/*
* Routine: vm_object_upl_request
int refmod_state = 0;
wpl_array_t lite_list = NULL;
vm_object_t last_copy_object;
- struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
- struct vm_page_delayed_work *dwp;
+ struct vm_page_delayed_work dw_array;
+ struct vm_page_delayed_work *dwp, *dwp_start;
+ bool dwp_finish_ctx = TRUE;
int dw_count;
int dw_limit;
int io_tracking_flag = 0;
task_t task = current_task();
#endif /* DEVELOPMENT || DEBUG */
+ dwp_start = dwp = NULL;
+
if (cntrl_flags & ~UPL_VALID_FLAGS) {
/*
* For forward compatibility's sake,
panic("vm_object_upl_request: contiguous object specified\n");
}
+ assertf(page_aligned(offset) && page_aligned(size),
+ "offset 0x%llx size 0x%x",
+ offset, size);
+
VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
+ dw_count = 0;
+ dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+ dwp_start = vm_page_delayed_work_get_ctx();
+ if (dwp_start == NULL) {
+ dwp_start = &dw_array;
+ dw_limit = 1;
+ dwp_finish_ctx = FALSE;
+ }
+
+ dwp = dwp_start;
+
if (size > MAX_UPL_SIZE_BYTES) {
size = MAX_UPL_SIZE_BYTES;
}
upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
upl->map_object->vo_shadow_offset = offset;
upl->map_object->wimg_bits = object->wimg_bits;
+ assertf(page_aligned(upl->map_object->vo_shadow_offset),
+ "object %p shadow_offset 0x%llx",
+ upl->map_object, upl->map_object->vo_shadow_offset);
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ alias_page = vm_page_grab_fictitious(TRUE);
upl->flags |= UPL_SHADOWED;
}
/*
* we can lock in the paging_offset once paging_in_progress is set
*/
- upl->size = size;
- upl->offset = offset + object->paging_offset;
+ upl->u_size = size;
+ upl->u_offset = offset + object->paging_offset;
#if CONFIG_IOSCHED || UPL_DEBUG
if (object->io_tracking || upl_debug_enabled) {
dst_offset = offset;
size_in_pages = size / PAGE_SIZE;
- dwp = &dw_array[0];
- dw_count = 0;
- dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
-
if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT)) {
object->scan_collisions = 0;
if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
boolean_t isSSD = FALSE;
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
isSSD = TRUE;
-#else
+#else /* !XNU_TARGET_OS_OSX */
vnode_pager_get_isSSD(object->pager, &isSSD);
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
vm_object_unlock(object);
OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
vm_object_unlock(object);
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ alias_page = vm_page_grab_fictitious(TRUE);
vm_object_lock(object);
}
if (cntrl_flags & UPL_COPYOUT_FROM) {
pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE);
assert(pg_num == (dst_offset - offset) / PAGE_SIZE);
- lite_list[pg_num >> 5] |= 1 << (pg_num & 31);
+ lite_list[pg_num >> 5] |= 1U << (pg_num & 31);
if (hw_dirty) {
if (pmap_flushes_delayed == FALSE) {
dst_page->vmp_clustered = TRUE;
if (!(cntrl_flags & UPL_FILE_IO)) {
- VM_STAT_INCR(pageins);
+ counter_inc(&vm_statistics_pageins);
}
}
}
pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE);
assert(pg_num == (dst_offset - offset) / PAGE_SIZE);
- lite_list[pg_num >> 5] |= 1 << (pg_num & 31);
+ lite_list[pg_num >> 5] |= 1U << (pg_num & 31);
if (hw_dirty) {
pmap_clear_modify(phys_page);
upl->flags &= ~UPL_CLEAR_DIRTY;
upl->flags |= UPL_SET_DIRTY;
dirty = TRUE;
- upl->flags |= UPL_SET_DIRTY;
+ /*
+ * Page belonging to a code-signed object is about to
+ * be written. Mark it tainted and disconnect it from
+ * all pmaps so processes have to fault it back in and
+ * deal with the tainted bit.
+ */
+ if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
+ dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE;
+ vm_page_upl_tainted++;
+ if (dst_page->vmp_pmapped) {
+ refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
+ if (refmod_state & VM_MEM_REFERENCED) {
+ dst_page->vmp_reference = TRUE;
+ }
+ }
+ }
} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
/*
* clean in place for read implies
try_next_page:
if (dwp->dw_mask) {
if (dwp->dw_mask & DW_vm_page_activate) {
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
}
VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
if (dw_count >= dw_limit) {
- vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
- dwp = &dw_array[0];
+ dwp = dwp_start;
dw_count = 0;
}
}
xfer_size -= PAGE_SIZE;
}
if (dw_count) {
- vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
+ dwp = dwp_start;
+ dw_count = 0;
}
if (alias_page != NULL) {
}
#endif /* DEVELOPMENT || DEBUG */
+ if (dwp_start && dwp_finish_ctx) {
+ vm_page_delayed_work_finish_ctx(dwp_start);
+ dwp_start = dwp = NULL;
+ }
+
return KERN_SUCCESS;
}
return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
}
-#if CONFIG_EMBEDDED
int cs_executable_create_upl = 0;
extern int proc_selfpid(void);
extern char *proc_name_address(void *p);
-#endif /* CONFIG_EMBEDDED */
kern_return_t
vm_map_create_upl(
vm_map_offset_t local_offset;
vm_map_offset_t local_start;
kern_return_t ret;
+ vm_map_address_t original_offset;
+ vm_map_size_t original_size, adjusted_size;
+ vm_map_offset_t local_entry_start;
+ vm_object_offset_t local_entry_offset;
+ vm_object_offset_t offset_in_mapped_page;
+ boolean_t release_map = FALSE;
+
+start_with_map:
- assert(page_aligned(offset));
+ original_offset = offset;
+ original_size = *upl_size;
+ adjusted_size = original_size;
caller_flags = *flags;
* For forward compatibility's sake,
* reject any unknown flag.
*/
- return KERN_INVALID_VALUE;
+ ret = KERN_INVALID_VALUE;
+ goto done;
}
force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
if (upl == NULL) {
- return KERN_INVALID_ARGUMENT;
+ ret = KERN_INVALID_ARGUMENT;
+ goto done;
}
REDISCOVER_ENTRY:
if (!vm_map_lookup_entry(map, offset, &entry)) {
vm_map_unlock_read(map);
- return KERN_FAILURE;
+ ret = KERN_FAILURE;
+ goto done;
}
- if ((entry->vme_end - offset) < *upl_size) {
- *upl_size = (upl_size_t) (entry->vme_end - offset);
- assert(*upl_size == entry->vme_end - offset);
+ local_entry_start = entry->vme_start;
+ local_entry_offset = VME_OFFSET(entry);
+
+ if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
+ DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%x flags 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, *upl_size, *flags);
+ }
+
+ if (entry->vme_end - original_offset < adjusted_size) {
+ adjusted_size = entry->vme_end - original_offset;
+ assert(adjusted_size > 0);
+ *upl_size = (upl_size_t) adjusted_size;
+ assert(*upl_size == adjusted_size);
}
if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
}
}
vm_map_unlock_read(map);
- return KERN_SUCCESS;
+ ret = KERN_SUCCESS;
+ goto done;
+ }
+
+ offset_in_mapped_page = 0;
+ if (VM_MAP_PAGE_SIZE(map) < PAGE_SIZE) {
+ offset = vm_map_trunc_page(original_offset, VM_MAP_PAGE_MASK(map));
+ *upl_size = (upl_size_t)
+ (vm_map_round_page(original_offset + adjusted_size,
+ VM_MAP_PAGE_MASK(map))
+ - offset);
+
+ offset_in_mapped_page = original_offset - offset;
+ assert(offset_in_mapped_page < VM_MAP_PAGE_SIZE(map));
+
+ DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%llx flags 0x%llx -> offset 0x%llx adjusted_size 0x%llx *upl_size 0x%x offset_in_mapped_page 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)original_offset, (uint64_t)original_size, *flags, (uint64_t)offset, (uint64_t)adjusted_size, *upl_size, offset_in_mapped_page);
}
if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
VME_OBJECT_SET(entry,
vm_object_allocate((vm_size_t)
- (entry->vme_end -
- entry->vme_start)));
+ vm_object_round_page((entry->vme_end - entry->vme_start))));
VME_OFFSET_SET(entry, 0);
assert(entry->use_pmap);
!entry->is_sub_map &&
!(entry->protection & VM_PROT_WRITE)) {
vm_map_unlock_read(map);
- return KERN_PROTECTION_FAILURE;
+ ret = KERN_PROTECTION_FAILURE;
+ goto done;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
if (map->pmap != kernel_pmap &&
(caller_flags & UPL_COPYOUT_FROM) &&
(entry->protection & VM_PROT_EXECUTE) &&
*/
vm_map_unlock_read(map);
+ entry = VM_MAP_ENTRY_NULL;
/* allocate kernel buffer */
ksize = round_page(*upl_size);
kaddr = 0;
tag);
if (ret == KERN_SUCCESS) {
/* copyin the user data */
- assert(page_aligned(offset));
ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
}
if (ret == KERN_SUCCESS) {
ksize - *upl_size);
}
/* create the UPL from the kernel buffer */
- ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
- upl, page_list, count, flags, tag);
+ vm_object_offset_t offset_in_object;
+ vm_object_offset_t offset_in_object_page;
+
+ offset_in_object = offset - local_entry_start + local_entry_offset;
+ offset_in_object_page = offset_in_object - vm_object_trunc_page(offset_in_object);
+ assert(offset_in_object_page < PAGE_SIZE);
+ assert(offset_in_object_page + offset_in_mapped_page < PAGE_SIZE);
+ *upl_size -= offset_in_object_page + offset_in_mapped_page;
+ ret = vm_map_create_upl(kernel_map,
+ (vm_map_address_t)(kaddr + offset_in_object_page + offset_in_mapped_page),
+ upl_size, upl, page_list, count, flags, tag);
}
if (kaddr != 0) {
/* free the kernel buffer */
upl_size_t, *upl_size,
kern_return_t, ret);
#endif /* DEVELOPMENT || DEBUG */
- return ret;
+ goto done;
}
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
local_object = VME_OBJECT(entry);
assert(local_object != VM_OBJECT_NULL);
map->mapped_in_other_pmaps)
? PMAP_NULL
: map->pmap),
+ VM_MAP_PAGE_SIZE(map),
entry->vme_start,
prot);
&version, &object,
&new_offset, &prot, &wired,
NULL,
- &real_map) != KERN_SUCCESS) {
+ &real_map, NULL) != KERN_SUCCESS) {
if (fault_type == VM_PROT_WRITE) {
vm_counters.create_upl_lookup_failure_write++;
} else {
vm_counters.create_upl_lookup_failure_copy++;
}
vm_map_unlock_read(local_map);
- return KERN_FAILURE;
+ ret = KERN_FAILURE;
+ goto done;
}
- if (real_map != map) {
+ if (real_map != local_map) {
vm_map_unlock(real_map);
}
vm_map_unlock_read(local_map);
submap = VME_SUBMAP(entry);
local_start = entry->vme_start;
- local_offset = VME_OFFSET(entry);
+ local_offset = (vm_map_offset_t)VME_OFFSET(entry);
vm_map_reference(submap);
vm_map_unlock_read(map);
- ret = vm_map_create_upl(submap,
- local_offset + (offset - local_start),
- upl_size, upl, page_list, count, flags, tag);
- vm_map_deallocate(submap);
+ DEBUG4K_UPL("map %p offset 0x%llx (0x%llx) size 0x%x (adjusted 0x%llx original 0x%llx) offset_in_mapped_page 0x%llx submap %p\n", map, (uint64_t)offset, (uint64_t)original_offset, *upl_size, (uint64_t)adjusted_size, (uint64_t)original_size, offset_in_mapped_page, submap);
+ offset += offset_in_mapped_page;
+ *upl_size -= offset_in_mapped_page;
- return ret;
+ if (release_map) {
+ vm_map_deallocate(map);
+ }
+ map = submap;
+ release_map = TRUE;
+ offset = local_offset + (offset - local_start);
+ goto start_with_map;
}
if (sync_cow_data &&
VME_OBJECT(entry)->copy)) {
local_object = VME_OBJECT(entry);
local_start = entry->vme_start;
- local_offset = VME_OFFSET(entry);
+ local_offset = (vm_map_offset_t)VME_OFFSET(entry);
vm_object_reference(local_object);
vm_map_unlock_read(map);
if (force_data_sync) {
local_object = VME_OBJECT(entry);
local_start = entry->vme_start;
- local_offset = VME_OFFSET(entry);
+ local_offset = (vm_map_offset_t)VME_OFFSET(entry);
vm_object_reference(local_object);
vm_map_unlock_read(map);
}
local_object = VME_OBJECT(entry);
- local_offset = VME_OFFSET(entry);
+ local_offset = (vm_map_offset_t)VME_OFFSET(entry);
local_start = entry->vme_start;
-#if CONFIG_EMBEDDED
/*
* Wiring will copy the pages to the shadow object.
* The shadow object will not be code-signed so
uint64_t, (uint64_t)entry->vme_end);
cs_executable_create_upl++;
}
-#endif /* CONFIG_EMBEDDED */
vm_object_lock(local_object);
vm_map_unlock_read(map);
+ offset += offset_in_mapped_page;
+ assert(*upl_size > offset_in_mapped_page);
+ *upl_size -= offset_in_mapped_page;
+
ret = vm_object_iopl_request(local_object,
((vm_object_offset_t)
((offset - local_start) + local_offset)),
tag);
vm_object_deallocate(local_object);
+done:
+ if (release_map) {
+ vm_map_deallocate(map);
+ }
+
return ret;
}
return KERN_INVALID_ARGUMENT;
}
+ DEBUG4K_UPL("map %p upl %p flags 0x%x object %p offset 0x%llx size 0x%x \n", map, upl, upl->flags, upl->map_object, upl->u_offset, upl->u_size);
+ assert(map == kernel_map);
+
if ((isVectorUPL = vector_upl_is_valid(upl))) {
int mapped = 0, valid_upls = 0;
vector_upl = upl;
}
}
- kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
+ if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
+ panic("TODO4K: vector UPL not implemented");
+ }
+
+ kr = kmem_suballoc(map, &vector_upl_dst_addr,
+ vector_upl->u_size,
+ FALSE,
VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
&vector_upl_submap);
if (kr != KERN_SUCCESS) {
return KERN_FAILURE;
}
}
+
+ size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
+
if ((!(upl->flags & UPL_SHADOWED)) &&
((upl->flags & UPL_HAS_BUSY) ||
!((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
if (upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t)
((((uintptr_t)upl) + sizeof(struct upl))
- + ((upl->size / PAGE_SIZE) * sizeof(upl_page_info_t)));
+ + ((size / PAGE_SIZE) * sizeof(upl_page_info_t)));
} else {
lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
}
object = upl->map_object;
- upl->map_object = vm_object_allocate(upl->size);
+ upl->map_object = vm_object_allocate(vm_object_round_page(size));
vm_object_lock(upl->map_object);
upl->map_object->pageout = TRUE;
upl->map_object->can_persist = FALSE;
upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
- upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
+ upl->map_object->vo_shadow_offset = upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset;
+ assertf(page_aligned(upl->map_object->vo_shadow_offset),
+ "object %p shadow_offset 0x%llx",
+ upl->map_object,
+ (uint64_t)upl->map_object->vo_shadow_offset);
upl->map_object->wimg_bits = object->wimg_bits;
+ assertf(page_aligned(upl->map_object->vo_shadow_offset),
+ "object %p shadow_offset 0x%llx",
+ upl->map_object, upl->map_object->vo_shadow_offset);
offset = upl->map_object->vo_shadow_offset;
new_offset = 0;
- size = upl->size;
+ size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
upl->flags |= UPL_SHADOWED;
pg_num = (unsigned int) (new_offset / PAGE_SIZE);
assert(pg_num == new_offset / PAGE_SIZE);
- if (lite_list[pg_num >> 5] & (1 << (pg_num & 31))) {
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ if (lite_list[pg_num >> 5] & (1U << (pg_num & 31))) {
+ alias_page = vm_page_grab_fictitious(TRUE);
vm_object_lock(object);
if (upl->flags & UPL_SHADOWED) {
offset = 0;
} else {
- offset = upl->offset - upl->map_object->paging_offset;
+ offset = upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)) - upl->map_object->paging_offset;
}
- size = upl->size;
+ size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
vm_object_reference(upl->map_object);
goto process_upl_to_enter;
}
+ if (!isVectorUPL) {
+ vm_map_offset_t addr_adjustment;
+
+ addr_adjustment = (vm_map_offset_t)(upl->u_offset - upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)));
+ if (addr_adjustment) {
+ assert(VM_MAP_PAGE_MASK(map) != PAGE_MASK);
+ DEBUG4K_UPL("dst_addr 0x%llx (+ 0x%llx) -> 0x%llx\n", (uint64_t)*dst_addr, (uint64_t)addr_adjustment, (uint64_t)(*dst_addr + addr_adjustment));
+ *dst_addr += addr_adjustment;
+ }
+ }
+
upl_unlock(upl);
return KERN_SUCCESS;
vm_offset_t v_upl_submap_dst_addr;
vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
- vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_REMOVE_NO_FLAGS);
+ vm_map_remove(map, v_upl_submap_dst_addr,
+ v_upl_submap_dst_addr + vector_upl->u_size,
+ VM_MAP_REMOVE_NO_FLAGS);
vm_map_deallocate(v_upl_submap);
upl_unlock(vector_upl);
return KERN_SUCCESS;
if (upl->flags & UPL_PAGE_LIST_MAPPED) {
addr = upl->kaddr;
- size = upl->size;
+ size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
assert(upl->ref_count > 1);
upl->ref_count--; /* removing mapping ref */
mach_msg_type_number_t count,
boolean_t *empty)
{
- upl_size_t xfer_size, subupl_size = size;
+ upl_size_t xfer_size, subupl_size;
vm_object_t shadow_object;
vm_object_t object;
vm_object_t m_object;
int occupied;
int clear_refmod = 0;
int pgpgout_count = 0;
- struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
- struct vm_page_delayed_work *dwp;
+ struct vm_page_delayed_work dw_array;
+ struct vm_page_delayed_work *dwp, *dwp_start;
+ bool dwp_finish_ctx = TRUE;
int dw_count;
int dw_limit;
int isVectorUPL = 0;
int unwired_count = 0;
int local_queue_count = 0;
vm_page_t first_local, last_local;
+ vm_object_offset_t obj_start, obj_end, obj_offset;
+ kern_return_t kr = KERN_SUCCESS;
+
+// DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p offset 0x%llx size 0x%llx flags 0x%x\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object, (uint64_t)offset, (uint64_t)size, flags);
+
+ dwp_start = dwp = NULL;
+ subupl_size = size;
*empty = FALSE;
if (upl == UPL_NULL) {
return KERN_INVALID_ARGUMENT;
}
+ dw_count = 0;
+ dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+ dwp_start = vm_page_delayed_work_get_ctx();
+ if (dwp_start == NULL) {
+ dwp_start = &dw_array;
+ dw_limit = 1;
+ dwp_finish_ctx = FALSE;
+ }
+
+ dwp = dwp_start;
+
if (count == 0) {
page_list = NULL;
}
offset = subupl_offset;
if (size == 0) {
upl_unlock(vector_upl);
- return KERN_SUCCESS;
+ kr = KERN_SUCCESS;
+ goto done;
}
upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
if (upl == NULL) {
upl_unlock(vector_upl);
- return KERN_FAILURE;
+ kr = KERN_FAILURE;
+ goto done;
}
page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
subupl_size -= size;
#endif
if (upl->flags & UPL_DEVICE_MEMORY) {
xfer_size = 0;
- } else if ((offset + size) <= upl->size) {
+ } else if ((offset + size) <= upl_adjusted_size(upl, PAGE_MASK)) {
xfer_size = size;
} else {
if (!isVectorUPL) {
} else {
upl_unlock(vector_upl);
}
- return KERN_FAILURE;
+ DEBUG4K_ERROR("upl %p (u_offset 0x%llx u_size 0x%x) offset 0x%x size 0x%x\n", upl, upl->u_offset, upl->u_size, offset, size);
+ kr = KERN_FAILURE;
+ goto done;
}
if (upl->flags & UPL_SET_DIRTY) {
flags |= UPL_COMMIT_SET_DIRTY;
if (upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
- + ((upl->size / PAGE_SIZE) * sizeof(upl_page_info_t)));
+ + ((upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE) * sizeof(upl_page_info_t)));
} else {
lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
}
entry = offset / PAGE_SIZE;
target_offset = (vm_object_offset_t)offset;
- assert(!(target_offset & PAGE_MASK));
- assert(!(xfer_size & PAGE_MASK));
-
if (upl->flags & UPL_KERNEL_OBJECT) {
vm_object_lock_shared(shadow_object);
} else {
should_be_throttled = TRUE;
}
- dwp = &dw_array[0];
- dw_count = 0;
- dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
-
if ((upl->flags & UPL_IO_WIRE) &&
!(flags & UPL_COMMIT_FREE_ABSENT) &&
!isVectorUPL &&
first_local = VM_PAGE_NULL;
last_local = VM_PAGE_NULL;
- while (xfer_size) {
+ obj_start = target_offset + upl->u_offset - shadow_object->paging_offset;
+ obj_end = obj_start + xfer_size;
+ obj_start = vm_object_trunc_page(obj_start);
+ obj_end = vm_object_round_page(obj_end);
+ for (obj_offset = obj_start;
+ obj_offset < obj_end;
+ obj_offset += PAGE_SIZE) {
vm_page_t t, m;
dwp->dw_mask = 0;
pg_num = (unsigned int) (target_offset / PAGE_SIZE);
assert(pg_num == target_offset / PAGE_SIZE);
- if (lite_list[pg_num >> 5] & (1 << (pg_num & 31))) {
- lite_list[pg_num >> 5] &= ~(1 << (pg_num & 31));
+ if (lite_list[pg_num >> 5] & (1U << (pg_num & 31))) {
+ lite_list[pg_num >> 5] &= ~(1U << (pg_num & 31));
if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) {
- m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
+ m = vm_page_lookup(shadow_object, obj_offset);
}
} else {
m = NULL;
* Set the code signing bits according to
* what the UPL says they should be.
*/
- m->vmp_cs_validated = page_list[entry].cs_validated;
- m->vmp_cs_tainted = page_list[entry].cs_tainted;
- m->vmp_cs_nx = page_list[entry].cs_nx;
+ m->vmp_cs_validated |= page_list[entry].cs_validated;
+ m->vmp_cs_tainted |= page_list[entry].cs_tainted;
+ m->vmp_cs_nx |= page_list[entry].cs_nx;
}
if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL) {
m->vmp_written_by_kernel = TRUE;
m->vmp_dirty = FALSE;
if (!(flags & UPL_COMMIT_CS_VALIDATED) &&
- m->vmp_cs_validated && !m->vmp_cs_tainted) {
+ m->vmp_cs_validated &&
+ m->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
/*
* CODE SIGNING:
* This page is no longer dirty
* so it will need to be
* re-validated.
*/
- m->vmp_cs_validated = FALSE;
+ m->vmp_cs_validated = VMP_CS_ALL_FALSE;
VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
}
if (!(flags & UPL_COMMIT_CS_VALIDATED) &&
- m->vmp_cs_validated && !m->vmp_cs_tainted) {
+ m->vmp_cs_validated &&
+ m->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
/*
* CODE SIGNING:
* This page is no longer dirty
* so it will need to be
* re-validated.
*/
- m->vmp_cs_validated = FALSE;
+ m->vmp_cs_validated = VMP_CS_ALL_FALSE;
VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
if (m->vmp_free_when_done) {
/*
* With the clean queue enabled, UPL_PAGEOUT should
- * no longer set the pageout bit. It's pages now go
+ * no longer set the pageout bit. Its pages now go
* to the clean queue.
+ *
+ * We don't use the cleaned Q anymore and so this
+ * assert isn't correct. The code for the clean Q
+ * still exists and might be used in the future. If we
+ * go back to the cleaned Q, we will re-enable this
+ * assert.
+ *
+ * assert(!(upl->flags & UPL_PAGEOUT));
*/
- assert(!(flags & UPL_PAGEOUT));
assert(!m_object->internal);
m->vmp_free_when_done = FALSE;
dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
if (upl->flags & UPL_PAGEOUT) {
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
}
} else {
if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
pgpgout_count++;
- VM_STAT_INCR(pageouts);
+ counter_inc(&vm_statistics_pageouts);
DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
dwp->dw_mask |= DW_enqueue_cleaned;
VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
if (dw_count >= dw_limit) {
- vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
- dwp = &dw_array[0];
+ dwp = dwp_start;
dw_count = 0;
}
} else {
}
}
if (dw_count) {
- vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
+ dwp = dwp_start;
+ dw_count = 0;
}
if (fast_path_possible) {
occupied = 0;
if (!fast_path_full_commit) {
- pg_num = upl->size / PAGE_SIZE;
+ pg_num = upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE;
pg_num = (pg_num + 31) >> 5;
for (i = 0; i < pg_num; i++) {
DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
}
- return KERN_SUCCESS;
+ kr = KERN_SUCCESS;
+done:
+ if (dwp_start && dwp_finish_ctx) {
+ vm_page_delayed_work_finish_ctx(dwp_start);
+ dwp_start = dwp = NULL;
+ }
+
+ return kr;
}
kern_return_t
boolean_t *empty)
{
upl_page_info_t *user_page_list = NULL;
- upl_size_t xfer_size, subupl_size = size;
+ upl_size_t xfer_size, subupl_size;
vm_object_t shadow_object;
vm_object_t object;
vm_object_offset_t target_offset;
int entry;
wpl_array_t lite_list;
int occupied;
- struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
- struct vm_page_delayed_work *dwp;
+ struct vm_page_delayed_work dw_array;
+ struct vm_page_delayed_work *dwp, *dwp_start;
+ bool dwp_finish_ctx = TRUE;
int dw_count;
int dw_limit;
int isVectorUPL = 0;
upl_t vector_upl = NULL;
+ vm_object_offset_t obj_start, obj_end, obj_offset;
+ kern_return_t kr = KERN_SUCCESS;
+
+// DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p offset 0x%llx size 0x%llx error 0x%x\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object, (uint64_t)offset, (uint64_t)size, error);
+ dwp_start = dwp = NULL;
+
+ subupl_size = size;
*empty = FALSE;
if (upl == UPL_NULL) {
return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
}
+ dw_count = 0;
+ dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+ dwp_start = vm_page_delayed_work_get_ctx();
+ if (dwp_start == NULL) {
+ dwp_start = &dw_array;
+ dw_limit = 1;
+ dwp_finish_ctx = FALSE;
+ }
+
+ dwp = dwp_start;
+
if ((isVectorUPL = vector_upl_is_valid(upl))) {
vector_upl = upl;
upl_lock(vector_upl);
offset = subupl_offset;
if (size == 0) {
upl_unlock(vector_upl);
- return KERN_SUCCESS;
+ kr = KERN_SUCCESS;
+ goto done;
}
upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
if (upl == NULL) {
upl_unlock(vector_upl);
- return KERN_FAILURE;
+ kr = KERN_FAILURE;
+ goto done;
}
subupl_size -= size;
subupl_offset += size;
#endif
if (upl->flags & UPL_DEVICE_MEMORY) {
xfer_size = 0;
- } else if ((offset + size) <= upl->size) {
+ } else if ((offset + size) <= upl_adjusted_size(upl, PAGE_MASK)) {
xfer_size = size;
} else {
if (!isVectorUPL) {
} else {
upl_unlock(vector_upl);
}
-
- return KERN_FAILURE;
+ DEBUG4K_ERROR("upl %p (u_offset 0x%llx u_size 0x%x) offset 0x%x size 0x%x\n", upl, upl->u_offset, upl->u_size, offset, size);
+ kr = KERN_FAILURE;
+ goto done;
}
if (upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t)
((((uintptr_t)upl) + sizeof(struct upl))
- + ((upl->size / PAGE_SIZE) * sizeof(upl_page_info_t)));
+ + ((upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE) * sizeof(upl_page_info_t)));
user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
} else {
entry = offset / PAGE_SIZE;
target_offset = (vm_object_offset_t)offset;
- assert(!(target_offset & PAGE_MASK));
- assert(!(xfer_size & PAGE_MASK));
-
if (upl->flags & UPL_KERNEL_OBJECT) {
vm_object_lock_shared(shadow_object);
} else {
vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
}
- dwp = &dw_array[0];
- dw_count = 0;
- dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
-
if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT)) {
panic("upl_abort_range: kernel_object being DUMPED");
}
- while (xfer_size) {
+ obj_start = target_offset + upl->u_offset - shadow_object->paging_offset;
+ obj_end = obj_start + xfer_size;
+ obj_start = vm_object_trunc_page(obj_start);
+ obj_end = vm_object_round_page(obj_end);
+ for (obj_offset = obj_start;
+ obj_offset < obj_end;
+ obj_offset += PAGE_SIZE) {
vm_page_t t, m;
unsigned int pg_num;
boolean_t needed;
m = VM_PAGE_NULL;
if (upl->flags & UPL_LITE) {
- if (lite_list[pg_num >> 5] & (1 << (pg_num & 31))) {
- lite_list[pg_num >> 5] &= ~(1 << (pg_num & 31));
+ if (lite_list[pg_num >> 5] & (1U << (pg_num & 31))) {
+ lite_list[pg_num >> 5] &= ~(1U << (pg_num & 31));
if (!(upl->flags & UPL_KERNEL_OBJECT)) {
- m = vm_page_lookup(shadow_object, target_offset +
- (upl->offset - shadow_object->paging_offset));
+ m = vm_page_lookup(shadow_object, obj_offset);
}
}
}
VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
if (dw_count >= dw_limit) {
- vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
- dwp = &dw_array[0];
+ dwp = dwp_start;
dw_count = 0;
}
} else {
}
}
if (dw_count) {
- vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
+ dwp = dwp_start;
+ dw_count = 0;
}
occupied = 1;
int pg_num;
int i;
- pg_num = upl->size / PAGE_SIZE;
+ pg_num = upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE;
pg_num = (pg_num + 31) >> 5;
occupied = 0;
goto process_upl_to_abort;
}
- return KERN_SUCCESS;
+ kr = KERN_SUCCESS;
+
+done:
+ if (dwp_start && dwp_finish_ctx) {
+ vm_page_delayed_work_finish_ctx(dwp_start);
+ dwp_start = dwp = NULL;
+ }
+
+ return kr;
}
return KERN_INVALID_ARGUMENT;
}
- return upl_abort_range(upl, 0, upl->size, error, &empty);
+ return upl_abort_range(upl, 0, upl->u_size, error, &empty);
}
return KERN_INVALID_ARGUMENT;
}
- return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
+ return upl_commit_range(upl, 0, upl->u_size, 0,
+ page_list, count, &empty);
}
object, object->purgable);
}
- size = upl->size;
+ size = upl_adjusted_size(upl, PAGE_MASK);
vm_object_lock(object);
VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE)) {
nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
} else {
- offset = 0 + upl->offset - object->paging_offset;
+ offset = (vm_offset_t)(upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset);
}
while (size) {
}
entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
assert(entry >= 0 && entry < object->resident_page_count);
- lite_list[entry >> 5] |= 1 << (entry & 31);
+ lite_list[entry >> 5] |= 1U << (entry & 31);
phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
- lite_list[entry >> 5] |= 1 << (entry & 31);
+ lite_list[entry >> 5] |= 1U << (entry & 31);
phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
kern_return_t ret;
vm_prot_t prot;
struct vm_object_fault_info fault_info = {};
- struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
- struct vm_page_delayed_work *dwp;
+ struct vm_page_delayed_work dw_array;
+ struct vm_page_delayed_work *dwp, *dwp_start;
+ bool dwp_finish_ctx = TRUE;
int dw_count;
int dw_limit;
int dw_index;
task_t task = current_task();
#endif /* DEVELOPMENT || DEBUG */
+ dwp_start = dwp = NULL;
+
+ vm_object_offset_t original_offset = offset;
+ upl_size_t original_size = size;
+
+// DEBUG4K_UPL("object %p offset 0x%llx size 0x%llx cntrl_flags 0x%llx\n", object, (uint64_t)offset, (uint64_t)size, cntrl_flags);
+
+ size = (upl_size_t)(vm_object_round_page(offset + size) - vm_object_trunc_page(offset));
+ offset = vm_object_trunc_page(offset);
+ if (size != original_size || offset != original_offset) {
+ DEBUG4K_IOKIT("flags 0x%llx object %p offset 0x%llx size 0x%x -> offset 0x%llx size 0x%x\n", cntrl_flags, object, original_offset, original_size, offset, size);
+ }
+
if (cntrl_flags & ~UPL_VALID_FLAGS) {
/*
* For forward compatibility's sake,
panic("vm_object_iopl_request: external object with non-zero paging offset\n");
}
+
VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
#if CONFIG_IOSCHED || UPL_DEBUG
psize = PAGE_SIZE;
} else {
psize = size;
+
+ dw_count = 0;
+ dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+ dwp_start = vm_page_delayed_work_get_ctx();
+ if (dwp_start == NULL) {
+ dwp_start = &dw_array;
+ dw_limit = 1;
+ dwp_finish_ctx = FALSE;
+ }
+
+ dwp = dwp_start;
}
if (cntrl_flags & UPL_SET_INTERNAL) {
}
upl->map_object = object;
- upl->size = size;
+ upl->u_offset = original_offset;
+ upl->u_size = original_size;
size_in_pages = size / PAGE_SIZE;
/*
* paging in progress also protects the paging_offset
*/
- upl->offset = offset + object->paging_offset;
+ upl->u_offset = original_offset + object->paging_offset;
if (cntrl_flags & UPL_BLOCK_ACCESS) {
/*
}
#if CONFIG_IOSCHED || UPL_DEBUG
- if (upl->flags & UPL_TRACKED_BY_OBJECT) {
+ if ((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
vm_object_activity_begin(object);
queue_enter(&object->uplq, upl, upl_t, uplq);
}
xfer_size = size;
dst_offset = offset;
- dw_count = 0;
if (fast_path_full_req) {
if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE) {
fault_info.interruptible = interruptible;
fault_info.batch_pmap_op = TRUE;
- dwp = &dw_array[0];
- dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
-
while (xfer_size) {
vm_fault_return_t result;
VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
- /* fall thru */
+ OS_FALLTHROUGH;
case VM_FAULT_INTERRUPTED:
error_code = MACH_SEND_INTERRUPTED;
+ OS_FALLTHROUGH;
case VM_FAULT_MEMORY_ERROR:
memory_error:
ret = (error_code ? error_code: KERN_MEMORY_ERROR);
if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
SET_PAGE_DIRTY(dst_page, TRUE);
+ /*
+ * Page belonging to a code-signed object is about to
+ * be written. Mark it tainted and disconnect it from
+ * all pmaps so processes have to fault it back in and
+ * deal with the tainted bit.
+ */
+ if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
+ dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE;
+ vm_page_iopl_tainted++;
+ if (dst_page->vmp_pmapped) {
+ int refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
+ if (refmod & VM_MEM_REFERENCED) {
+ dst_page->vmp_reference = TRUE;
+ }
+ }
+ }
}
if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
pmap_sync_page_attributes_phys(phys_page);
upl->flags |= UPL_HAS_BUSY;
}
- lite_list[entry >> 5] |= 1 << (entry & 31);
+ lite_list[entry >> 5] |= 1U << (entry & 31);
if (phys_page > upl->highest_page) {
upl->highest_page = phys_page;
VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
if (dw_count >= dw_limit) {
- vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
- dwp = &dw_array[0];
+ dwp = dwp_start;
dw_count = 0;
}
}
assert(entry == size_in_pages);
if (dw_count) {
- vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
+ vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
+ dwp = dwp_start;
+ dw_count = 0;
}
finish:
if (user_page_list && set_cache_attr_needed == TRUE) {
* can't be accessed without causing a page fault.
*/
vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
- PMAP_NULL, 0, VM_PROT_NONE);
+ PMAP_NULL,
+ PAGE_SIZE,
+ 0, VM_PROT_NONE);
assert(!object->blocked_access);
object->blocked_access = TRUE;
}
ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
}
#endif /* DEVELOPMENT || DEBUG */
+
+ if (dwp_start && dwp_finish_ctx) {
+ vm_page_delayed_work_finish_ctx(dwp_start);
+ dwp_start = dwp = NULL;
+ }
+
return KERN_SUCCESS;
return_err:
need_unwire = TRUE;
if (dw_count) {
- if (dw_array[dw_index].dw_m == dst_page) {
+ if ((dwp_start)[dw_index].dw_m == dst_page) {
/*
* still in the deferred work list
* which means we haven't yet called
vm_page_unlock_queues();
if (need_unwire == TRUE) {
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
}
}
#if UPL_DEBUG
ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
}
#endif /* DEVELOPMENT || DEBUG */
+
+ if (dwp_start && dwp_finish_ctx) {
+ vm_page_delayed_work_finish_ctx(dwp_start);
+ dwp_start = dwp = NULL;
+ }
return ret;
}
object1 = upl1->map_object;
object2 = upl2->map_object;
- if (upl1->offset != 0 || upl2->offset != 0 ||
- upl1->size != upl2->size) {
+ if (upl1->u_offset != 0 || upl2->u_offset != 0 ||
+ upl1->u_size != upl2->u_size) {
/*
* We deal only with full objects, not subsets.
* That's because we exchange the entire backing store info
* Tranpose the VM objects' backing store.
*/
retval = vm_object_transpose(object1, object2,
- (vm_object_size_t) upl1->size);
+ upl_adjusted_size(upl1, PAGE_MASK));
if (retval == KERN_SUCCESS) {
/*
vm_object_lock(object1);
vm_object_lock(object2);
}
- if (upl1->flags & UPL_TRACKED_BY_OBJECT) {
+ if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
queue_remove(&object1->uplq, upl1, upl_t, uplq);
}
- if (upl2->flags & UPL_TRACKED_BY_OBJECT) {
+ if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
queue_remove(&object2->uplq, upl2, upl_t, uplq);
}
#endif
upl2->map_object = object1;
#if CONFIG_IOSCHED || UPL_DEBUG
- if (upl1->flags & UPL_TRACKED_BY_OBJECT) {
+ if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
queue_enter(&object2->uplq, upl1, upl_t, uplq);
}
- if (upl2->flags & UPL_TRACKED_BY_OBJECT) {
+ if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
queue_enter(&object1->uplq, upl2, upl_t, uplq);
}
if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
return;
}
- size_in_pages = upl->size / PAGE_SIZE;
+ size_in_pages = upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE;
user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
* virtaul address space each time we need to work with
* a physical page.
*/
-decl_simple_lock_data(, vm_paging_lock)
+SIMPLE_LOCK_DECLARE(vm_paging_lock, 0);
#define VM_PAGING_NUM_PAGES 64
vm_map_offset_t vm_paging_base_address = 0;
boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
unsigned long vm_paging_objects_mapped_slow = 0;
unsigned long vm_paging_pages_mapped_slow = 0;
+__startup_func
void
vm_paging_map_init(void)
{
if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
/* use permanent 1-to-1 kernel mapping of physical memory ? */
-#if __x86_64__
- *address = (vm_map_offset_t)
- PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
- PAGE_SHIFT);
- *need_unmap = FALSE;
- return KERN_SUCCESS;
-#elif __arm__ || __arm64__
*address = (vm_map_offset_t)
phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
*need_unmap = FALSE;
return KERN_SUCCESS;
-#else
-#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
-#endif
assert(page->vmp_busy);
/*
upl = upl_create(0, UPL_VECTOR, 0);
upl->vector_upl = vector_upl;
- upl->offset = upl_offset;
+ upl->u_offset = upl_offset;
vector_upl->size = 0;
vector_upl->offset = upl_offset;
vector_upl->invalid_upls = 0;
subupl->vector_upl = (void*)vector_upl;
vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
vector_upl->size += io_size;
- upl->size += io_size;
+ upl->u_size += io_size;
} else {
uint32_t i = 0, invalid_upls = 0;
for (i = 0; i < vector_upl->num_upls; i++) {
}
vector_upl->upl_elems[i] = NULL;
- invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
+ invalid_upls = os_atomic_inc(&(vector_upl)->invalid_upls,
+ relaxed);
if (invalid_upls == vector_upl->num_upls) {
return TRUE;
} else {
vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info) * (vector_upl->size / PAGE_SIZE));
for (i = 0; i < vector_upl->num_upls; i++) {
- cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size / PAGE_SIZE;
+ cur_upl_pagelist_size = sizeof(struct upl_page_info) * upl_adjusted_size(vector_upl->upl_elems[i], PAGE_MASK) / PAGE_SIZE;
bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
pagelist_size += cur_upl_pagelist_size;
if (vector_upl->upl_elems[i]->highest_page > upl->highest_page) {
upl_get_size(
upl_t upl)
{
- return upl->size;
+ return upl_adjusted_size(upl, PAGE_MASK);
+}
+
+upl_size_t
+upl_adjusted_size(
+ upl_t upl,
+ vm_map_offset_t pgmask)
+{
+ vm_object_offset_t start_offset, end_offset;
+
+ start_offset = trunc_page_mask_64(upl->u_offset, pgmask);
+ end_offset = round_page_mask_64(upl->u_offset + upl->u_size, pgmask);
+
+ return (upl_size_t)(end_offset - start_offset);
+}
+
+vm_object_offset_t
+upl_adjusted_offset(
+ upl_t upl,
+ vm_map_offset_t pgmask)
+{
+ return trunc_page_mask_64(upl->u_offset, pgmask);
+}
+
+vm_object_offset_t
+upl_get_data_offset(
+ upl_t upl)
+{
+ return upl->u_offset - upl_adjusted_offset(upl, PAGE_MASK);
}
upl_t
}
}
#endif /* VM_PRESSURE_EVENTS */
-
-
-
-#define VM_TEST_COLLAPSE_COMPRESSOR 0
-#define VM_TEST_WIRE_AND_EXTRACT 0
-#define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
-#if __arm64__
-#define VM_TEST_KERNEL_OBJECT_FAULT 0
-#endif /* __arm64__ */
-#define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT || DEBUG)
-
-#if VM_TEST_COLLAPSE_COMPRESSOR
-extern boolean_t vm_object_collapse_compressor_allowed;
-#include <IOKit/IOLib.h>
-static void
-vm_test_collapse_compressor(void)
-{
- vm_object_size_t backing_size, top_size;
- vm_object_t backing_object, top_object;
- vm_map_offset_t backing_offset, top_offset;
- unsigned char *backing_address, *top_address;
- kern_return_t kr;
-
- printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
-
- /* create backing object */
- backing_size = 15 * PAGE_SIZE;
- backing_object = vm_object_allocate(backing_size);
- assert(backing_object != VM_OBJECT_NULL);
- printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
- backing_object);
- /* map backing object */
- backing_offset = 0;
- kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
- VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
- backing_object, 0, FALSE,
- VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
- assert(kr == KERN_SUCCESS);
- backing_address = (unsigned char *) backing_offset;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "mapped backing object %p at 0x%llx\n",
- backing_object, (uint64_t) backing_offset);
- /* populate with pages to be compressed in backing object */
- backing_address[0x1 * PAGE_SIZE] = 0xB1;
- backing_address[0x4 * PAGE_SIZE] = 0xB4;
- backing_address[0x7 * PAGE_SIZE] = 0xB7;
- backing_address[0xa * PAGE_SIZE] = 0xBA;
- backing_address[0xd * PAGE_SIZE] = 0xBD;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "populated pages to be compressed in "
- "backing_object %p\n", backing_object);
- /* compress backing object */
- vm_object_pageout(backing_object);
- printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
- backing_object);
- /* wait for all the pages to be gone */
- while (*(volatile int *)&backing_object->resident_page_count != 0) {
- IODelay(10);
- }
- printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
- backing_object);
- /* populate with pages to be resident in backing object */
- backing_address[0x0 * PAGE_SIZE] = 0xB0;
- backing_address[0x3 * PAGE_SIZE] = 0xB3;
- backing_address[0x6 * PAGE_SIZE] = 0xB6;
- backing_address[0x9 * PAGE_SIZE] = 0xB9;
- backing_address[0xc * PAGE_SIZE] = 0xBC;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "populated pages to be resident in "
- "backing_object %p\n", backing_object);
- /* leave the other pages absent */
- /* mess with the paging_offset of the backing_object */
- assert(backing_object->paging_offset == 0);
- backing_object->paging_offset = 0x3000;
-
- /* create top object */
- top_size = 9 * PAGE_SIZE;
- top_object = vm_object_allocate(top_size);
- assert(top_object != VM_OBJECT_NULL);
- printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
- top_object);
- /* map top object */
- top_offset = 0;
- kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
- VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
- top_object, 0, FALSE,
- VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
- assert(kr == KERN_SUCCESS);
- top_address = (unsigned char *) top_offset;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "mapped top object %p at 0x%llx\n",
- top_object, (uint64_t) top_offset);
- /* populate with pages to be compressed in top object */
- top_address[0x3 * PAGE_SIZE] = 0xA3;
- top_address[0x4 * PAGE_SIZE] = 0xA4;
- top_address[0x5 * PAGE_SIZE] = 0xA5;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "populated pages to be compressed in "
- "top_object %p\n", top_object);
- /* compress top object */
- vm_object_pageout(top_object);
- printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
- top_object);
- /* wait for all the pages to be gone */
- while (top_object->resident_page_count != 0) {
- IODelay(10);
- }
- printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
- top_object);
- /* populate with pages to be resident in top object */
- top_address[0x0 * PAGE_SIZE] = 0xA0;
- top_address[0x1 * PAGE_SIZE] = 0xA1;
- top_address[0x2 * PAGE_SIZE] = 0xA2;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "populated pages to be resident in "
- "top_object %p\n", top_object);
- /* leave the other pages absent */
-
- /* link the 2 objects */
- vm_object_reference(backing_object);
- top_object->shadow = backing_object;
- top_object->vo_shadow_offset = 0x3000;
- printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
- top_object, backing_object);
-
- /* unmap backing object */
- vm_map_remove(kernel_map,
- backing_offset,
- backing_offset + backing_size,
- VM_MAP_REMOVE_NO_FLAGS);
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "unmapped backing_object %p [0x%llx:0x%llx]\n",
- backing_object,
- (uint64_t) backing_offset,
- (uint64_t) (backing_offset + backing_size));
-
- /* collapse */
- printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
- vm_object_lock(top_object);
- vm_object_collapse(top_object, 0, FALSE);
- vm_object_unlock(top_object);
- printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
-
- /* did it work? */
- if (top_object->shadow != VM_OBJECT_NULL) {
- printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
- printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
- if (vm_object_collapse_compressor_allowed) {
- panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
- }
- } else {
- /* check the contents of the mapping */
- unsigned char expect[9] =
- { 0xA0, 0xA1, 0xA2, /* resident in top */
- 0xA3, 0xA4, 0xA5, /* compressed in top */
- 0xB9, /* resident in backing + shadow_offset */
- 0xBD, /* compressed in backing + shadow_offset + paging_offset */
- 0x00 }; /* absent in both */
- unsigned char actual[9];
- unsigned int i, errors;
-
- errors = 0;
- for (i = 0; i < sizeof(actual); i++) {
- actual[i] = (unsigned char) top_address[i * PAGE_SIZE];
- if (actual[i] != expect[i]) {
- errors++;
- }
- }
- printf("VM_TEST_COLLAPSE_COMPRESSOR: "
- "actual [%x %x %x %x %x %x %x %x %x] "
- "expect [%x %x %x %x %x %x %x %x %x] "
- "%d errors\n",
- actual[0], actual[1], actual[2], actual[3],
- actual[4], actual[5], actual[6], actual[7],
- actual[8],
- expect[0], expect[1], expect[2], expect[3],
- expect[4], expect[5], expect[6], expect[7],
- expect[8],
- errors);
- if (errors) {
- panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
- } else {
- printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
- }
- }
-}
-#else /* VM_TEST_COLLAPSE_COMPRESSOR */
-#define vm_test_collapse_compressor()
-#endif /* VM_TEST_COLLAPSE_COMPRESSOR */
-
-#if VM_TEST_WIRE_AND_EXTRACT
-extern ledger_template_t task_ledger_template;
-#include <mach/mach_vm.h>
-extern ppnum_t vm_map_get_phys_page(vm_map_t map,
- vm_offset_t offset);
-static void
-vm_test_wire_and_extract(void)
-{
- ledger_t ledger;
- vm_map_t user_map, wire_map;
- mach_vm_address_t user_addr, wire_addr;
- mach_vm_size_t user_size, wire_size;
- mach_vm_offset_t cur_offset;
- vm_prot_t cur_prot, max_prot;
- ppnum_t user_ppnum, wire_ppnum;
- kern_return_t kr;
-
- ledger = ledger_instantiate(task_ledger_template,
- LEDGER_CREATE_ACTIVE_ENTRIES);
- user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
- 0x100000000ULL,
- 0x200000000ULL,
- TRUE);
- wire_map = vm_map_create(NULL,
- 0x100000000ULL,
- 0x200000000ULL,
- TRUE);
- user_addr = 0;
- user_size = 0x10000;
- kr = mach_vm_allocate(user_map,
- &user_addr,
- user_size,
- VM_FLAGS_ANYWHERE);
- assert(kr == KERN_SUCCESS);
- wire_addr = 0;
- wire_size = user_size;
- kr = mach_vm_remap(wire_map,
- &wire_addr,
- wire_size,
- 0,
- VM_FLAGS_ANYWHERE,
- user_map,
- user_addr,
- FALSE,
- &cur_prot,
- &max_prot,
- VM_INHERIT_NONE);
- assert(kr == KERN_SUCCESS);
- for (cur_offset = 0;
- cur_offset < wire_size;
- cur_offset += PAGE_SIZE) {
- kr = vm_map_wire_and_extract(wire_map,
- wire_addr + cur_offset,
- VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
- TRUE,
- &wire_ppnum);
- assert(kr == KERN_SUCCESS);
- user_ppnum = vm_map_get_phys_page(user_map,
- user_addr + cur_offset);
- printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
- "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
- kr,
- user_map, user_addr + cur_offset, user_ppnum,
- wire_map, wire_addr + cur_offset, wire_ppnum);
- if (kr != KERN_SUCCESS ||
- wire_ppnum == 0 ||
- wire_ppnum != user_ppnum) {
- panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
- }
- }
- cur_offset -= PAGE_SIZE;
- kr = vm_map_wire_and_extract(wire_map,
- wire_addr + cur_offset,
- VM_PROT_DEFAULT,
- TRUE,
- &wire_ppnum);
- assert(kr == KERN_SUCCESS);
- printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
- "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
- kr,
- user_map, user_addr + cur_offset, user_ppnum,
- wire_map, wire_addr + cur_offset, wire_ppnum);
- if (kr != KERN_SUCCESS ||
- wire_ppnum == 0 ||
- wire_ppnum != user_ppnum) {
- panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
- }
-
- printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
-}
-#else /* VM_TEST_WIRE_AND_EXTRACT */
-#define vm_test_wire_and_extract()
-#endif /* VM_TEST_WIRE_AND_EXTRACT */
-
-#if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
-static void
-vm_test_page_wire_overflow_panic(void)
-{
- vm_object_t object;
- vm_page_t page;
-
- printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
-
- object = vm_object_allocate(PAGE_SIZE);
- vm_object_lock(object);
- page = vm_page_alloc(object, 0x0);
- vm_page_lock_queues();
- do {
- vm_page_wire(page, 1, FALSE);
- } while (page->wire_count != 0);
- vm_page_unlock_queues();
- vm_object_unlock(object);
- panic("FBDP(%p,%p): wire_count overflow not detected\n",
- object, page);
-}
-#else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
-#define vm_test_page_wire_overflow_panic()
-#endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
-
-#if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
-extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
-static void
-vm_test_kernel_object_fault(void)
-{
- kern_return_t kr;
- vm_offset_t stack;
- uintptr_t frameb[2];
- int ret;
-
- kr = kernel_memory_allocate(kernel_map, &stack,
- kernel_stack_size + (2 * PAGE_SIZE),
- 0,
- (KMA_KSTACK | KMA_KOBJECT |
- KMA_GUARD_FIRST | KMA_GUARD_LAST),
- VM_KERN_MEMORY_STACK);
- if (kr != KERN_SUCCESS) {
- panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
- }
- ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
- if (ret != 0) {
- printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
- } else {
- printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
- }
- vm_map_remove(kernel_map,
- stack,
- stack + kernel_stack_size + (2 * PAGE_SIZE),
- VM_MAP_REMOVE_KUNWIRE);
- stack = 0;
-}
-#else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
-#define vm_test_kernel_object_fault()
-#endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
-
-#if VM_TEST_DEVICE_PAGER_TRANSPOSE
-static void
-vm_test_device_pager_transpose(void)
-{
- memory_object_t device_pager;
- vm_object_t anon_object, device_object;
- vm_size_t size;
- vm_map_offset_t device_mapping;
- kern_return_t kr;
-
- size = 3 * PAGE_SIZE;
- anon_object = vm_object_allocate(size);
- assert(anon_object != VM_OBJECT_NULL);
- device_pager = device_pager_setup(NULL, 0, size, 0);
- assert(device_pager != NULL);
- device_object = memory_object_to_vm_object(device_pager);
- assert(device_object != VM_OBJECT_NULL);
-#if 0
- /*
- * Can't actually map this, since another thread might do a
- * vm_map_enter() that gets coalesced into this object, which
- * would cause the test to fail.
- */
- vm_map_offset_t anon_mapping = 0;
- kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
- VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
- anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
- VM_INHERIT_DEFAULT);
- assert(kr == KERN_SUCCESS);
-#endif
- device_mapping = 0;
- kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
- VM_FLAGS_ANYWHERE,
- VM_MAP_KERNEL_FLAGS_NONE,
- VM_KERN_MEMORY_NONE,
- (void *)device_pager, 0, FALSE,
- VM_PROT_DEFAULT, VM_PROT_ALL,
- VM_INHERIT_DEFAULT);
- assert(kr == KERN_SUCCESS);
- memory_object_deallocate(device_pager);
-
- vm_object_lock(anon_object);
- vm_object_activity_begin(anon_object);
- anon_object->blocked_access = TRUE;
- vm_object_unlock(anon_object);
- vm_object_lock(device_object);
- vm_object_activity_begin(device_object);
- device_object->blocked_access = TRUE;
- vm_object_unlock(device_object);
-
- assert(anon_object->ref_count == 1);
- assert(!anon_object->named);
- assert(device_object->ref_count == 2);
- assert(device_object->named);
-
- kr = vm_object_transpose(device_object, anon_object, size);
- assert(kr == KERN_SUCCESS);
-
- vm_object_lock(anon_object);
- vm_object_activity_end(anon_object);
- anon_object->blocked_access = FALSE;
- vm_object_unlock(anon_object);
- vm_object_lock(device_object);
- vm_object_activity_end(device_object);
- device_object->blocked_access = FALSE;
- vm_object_unlock(device_object);
-
- assert(anon_object->ref_count == 2);
- assert(anon_object->named);
-#if 0
- kr = vm_deallocate(kernel_map, anon_mapping, size);
- assert(kr == KERN_SUCCESS);
-#endif
- assert(device_object->ref_count == 1);
- assert(!device_object->named);
- kr = vm_deallocate(kernel_map, device_mapping, size);
- assert(kr == KERN_SUCCESS);
-
- printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
-}
-#else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
-#define vm_test_device_pager_transpose()
-#endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
-
-void
-vm_tests(void)
-{
- vm_test_collapse_compressor();
- vm_test_wire_and_extract();
- vm_test_page_wire_overflow_panic();
- vm_test_kernel_object_fault();
- vm_test_device_pager_transpose();
-}