+#endif
+
+
+static void
+vm_pageout_page_queue(vm_page_queue_head_t *, int);
+
+/*
+ * condition variable used to make sure there is
+ * only a single sweep going on at a time
+ */
+boolean_t vm_pageout_anonymous_pages_active = FALSE;
+
+
+void
+vm_pageout_anonymous_pages()
+{
+ if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
+ vm_page_lock_queues();
+
+ if (vm_pageout_anonymous_pages_active == TRUE) {
+ vm_page_unlock_queues();
+ return;
+ }
+ vm_pageout_anonymous_pages_active = TRUE;
+ vm_page_unlock_queues();
+
+ vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
+ vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
+ vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
+
+ if (VM_CONFIG_SWAP_IS_PRESENT) {
+ vm_consider_swapping();
+ }
+
+ vm_page_lock_queues();
+ vm_pageout_anonymous_pages_active = FALSE;
+ vm_page_unlock_queues();
+ }
+}
+
+
+void
+vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
+{
+ vm_page_t m;
+ vm_object_t t_object = NULL;
+ vm_object_t l_object = NULL;
+ vm_object_t m_object = NULL;
+ int delayed_unlock = 0;
+ int try_failed_count = 0;
+ int refmod_state;
+ int pmap_options;
+ struct vm_pageout_queue *iq;
+ ppnum_t phys_page;
+
+
+ iq = &vm_pageout_queue_internal;
+
+ vm_page_lock_queues();
+
+ while (qcount && !vm_page_queue_empty(q)) {
+ LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+
+ if (VM_PAGE_Q_THROTTLED(iq)) {
+ if (l_object != NULL) {
+ vm_object_unlock(l_object);
+ l_object = NULL;
+ }
+ iq->pgo_draining = TRUE;
+
+ assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
+ vm_page_unlock_queues();
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ vm_page_lock_queues();
+ delayed_unlock = 0;
+ continue;
+ }
+ m = (vm_page_t) vm_page_queue_first(q);
+ m_object = VM_PAGE_OBJECT(m);
+
+ /*
+ * check to see if we currently are working
+ * with the same object... if so, we've
+ * already got the lock
+ */
+ if (m_object != l_object) {
+ if (!m_object->internal) {
+ goto reenter_pg_on_q;
+ }
+
+ /*
+ * the object associated with candidate page is
+ * different from the one we were just working
+ * with... dump the lock if we still own it
+ */
+ if (l_object != NULL) {
+ vm_object_unlock(l_object);
+ l_object = NULL;
+ }
+ if (m_object != t_object) {
+ try_failed_count = 0;
+ }
+
+ /*
+ * Try to lock object; since we've alread got the
+ * page queues lock, we can only 'try' for this one.
+ * if the 'try' fails, we need to do a mutex_pause
+ * to allow the owner of the object lock a chance to
+ * run...
+ */
+ if (!vm_object_lock_try_scan(m_object)) {
+ if (try_failed_count > 20) {
+ goto reenter_pg_on_q;
+ }
+ vm_page_unlock_queues();
+ mutex_pause(try_failed_count++);
+ vm_page_lock_queues();
+ delayed_unlock = 0;
+
+ t_object = m_object;
+ continue;
+ }
+ l_object = m_object;
+ }
+ if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
+ /*
+ * page is not to be cleaned
+ * put it back on the head of its queue
+ */
+ goto reenter_pg_on_q;
+ }
+ phys_page = VM_PAGE_GET_PHYS_PAGE(m);
+
+ if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
+ refmod_state = pmap_get_refmod(phys_page);
+
+ if (refmod_state & VM_MEM_REFERENCED) {
+ m->vmp_reference = TRUE;
+ }
+ if (refmod_state & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(m, FALSE);
+ }
+ }
+ if (m->vmp_reference == TRUE) {
+ m->vmp_reference = FALSE;
+ pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
+ goto reenter_pg_on_q;
+ }
+ if (m->vmp_pmapped == TRUE) {
+ if (m->vmp_dirty || m->vmp_precious) {
+ pmap_options = PMAP_OPTIONS_COMPRESSOR;
+ } else {
+ pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+ }
+ refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
+ if (refmod_state & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(m, FALSE);
+ }
+ }
+
+ if (!m->vmp_dirty && !m->vmp_precious) {
+ vm_page_unlock_queues();
+ VM_PAGE_FREE(m);
+ vm_page_lock_queues();
+ delayed_unlock = 0;
+
+ goto next_pg;
+ }
+ if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
+ if (!m_object->pager_initialized) {
+ vm_page_unlock_queues();
+
+ vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
+
+ if (!m_object->pager_initialized) {
+ vm_object_compressor_pager_create(m_object);
+ }
+
+ vm_page_lock_queues();
+ delayed_unlock = 0;
+ }
+ if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
+ goto reenter_pg_on_q;
+ }
+ /*
+ * vm_object_compressor_pager_create will drop the object lock
+ * which means 'm' may no longer be valid to use
+ */
+ continue;
+ }
+ /*
+ * we've already factored out pages in the laundry which
+ * means this page can't be on the pageout queue so it's
+ * safe to do the vm_page_queues_remove
+ */
+ vm_page_queues_remove(m, TRUE);
+
+ LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+
+ vm_pageout_cluster(m);
+
+ goto next_pg;
+
+reenter_pg_on_q:
+ vm_page_queue_remove(q, m, vmp_pageq);
+ vm_page_queue_enter(q, m, vmp_pageq);
+next_pg:
+ qcount--;
+ try_failed_count = 0;
+
+ if (delayed_unlock++ > 128) {
+ if (l_object != NULL) {
+ vm_object_unlock(l_object);
+ l_object = NULL;
+ }
+ lck_mtx_yield(&vm_page_queue_lock);
+ delayed_unlock = 0;
+ }
+ }
+ if (l_object != NULL) {
+ vm_object_unlock(l_object);
+ l_object = NULL;
+ }
+ vm_page_unlock_queues();
+}
+
+
+
+/*
+ * function in BSD to apply I/O throttle to the pageout thread
+ */
+extern void vm_pageout_io_throttle(void);
+
+#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
+ MACRO_BEGIN \
+ /* \
+ * If a "reusable" page somehow made it back into \
+ * the active queue, it's been re-used and is not \
+ * quite re-usable. \
+ * If the VM object was "all_reusable", consider it \
+ * as "all re-used" instead of converting it to \
+ * "partially re-used", which could be expensive. \
+ */ \
+ assert(VM_PAGE_OBJECT((m)) == (obj)); \
+ if ((m)->vmp_reusable || \
+ (obj)->all_reusable) { \
+ vm_object_reuse_pages((obj), \
+ (m)->vmp_offset, \
+ (m)->vmp_offset + PAGE_SIZE_64, \
+ FALSE); \
+ } \
+ MACRO_END
+
+
+#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
+#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
+
+#define FCS_IDLE 0
+#define FCS_DELAYED 1
+#define FCS_DEADLOCK_DETECTED 2
+
+struct flow_control {
+ int state;
+ mach_timespec_t ts;
+};
+
+
+#if CONFIG_BACKGROUND_QUEUE
+uint64_t vm_pageout_rejected_bq_internal = 0;
+uint64_t vm_pageout_rejected_bq_external = 0;
+uint64_t vm_pageout_skipped_bq_internal = 0;
+#endif
+
+#define ANONS_GRABBED_LIMIT 2
+
+
+#if 0
+static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
+#endif
+static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
+
+#define VM_PAGEOUT_PB_NO_ACTION 0
+#define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
+#define VM_PAGEOUT_PB_THREAD_YIELD 2
+
+
+#if 0
+static void
+vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
+{
+ if (*local_freeq) {
+ vm_page_unlock_queues();
+
+ VM_DEBUG_CONSTANT_EVENT(
+ vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+ vm_page_free_count, 0, 0, 1);
+
+ vm_page_free_list(*local_freeq, TRUE);
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+ vm_page_free_count, *local_freed, 0, 1);
+
+ *local_freeq = NULL;
+ *local_freed = 0;
+
+ vm_page_lock_queues();
+ } else {
+ lck_mtx_yield(&vm_page_queue_lock);
+ }
+ *delayed_unlock = 1;
+}
+#endif
+
+
+static void
+vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
+ vm_page_t *local_freeq, int *local_freed, int action)
+{
+ vm_page_unlock_queues();
+
+ if (*object != NULL) {
+ vm_object_unlock(*object);
+ *object = NULL;
+ }
+ if (*local_freeq) {
+ vm_page_free_list(*local_freeq, TRUE);
+
+ *local_freeq = NULL;
+ *local_freed = 0;
+ }
+ *delayed_unlock = 1;
+
+ switch (action) {
+ case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
+ vm_consider_waking_compactor_swapper();
+ break;
+ case VM_PAGEOUT_PB_THREAD_YIELD:
+ thread_yield_internal(1);
+ break;
+ case VM_PAGEOUT_PB_NO_ACTION:
+ default:
+ break;
+ }
+ vm_page_lock_queues();
+}
+
+
+static struct vm_pageout_vminfo last;
+
+uint64_t last_vm_page_pages_grabbed = 0;
+
+extern uint32_t c_segment_pages_compressed;
+
+extern uint64_t shared_region_pager_reclaimed;
+extern struct memory_object_pager_ops shared_region_pager_ops;
+
+void
+update_vm_info(void)
+{
+ unsigned long tmp;
+ uint64_t tmp64;
+
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
+
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
+
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
+
+
+ tmp = vm_pageout_vminfo.vm_pageout_considered_page;
+ vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
+ last.vm_pageout_considered_page = tmp;
+
+ tmp64 = vm_pageout_vminfo.vm_pageout_compressions;
+ vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp64 - last.vm_pageout_compressions);
+ last.vm_pageout_compressions = tmp64;
+
+ tmp = vm_pageout_vminfo.vm_compressor_failed;
+ vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
+ last.vm_compressor_failed = tmp;
+
+ tmp64 = vm_pageout_vminfo.vm_compressor_pages_grabbed;
+ vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp64 - last.vm_compressor_pages_grabbed);
+ last.vm_compressor_pages_grabbed = tmp64;
+
+ tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
+ vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
+ last.vm_phantom_cache_found_ghost = tmp;
+
+ tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
+ vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
+ last.vm_phantom_cache_added_ghost = tmp;
+
+ tmp64 = get_pages_grabbed_count();
+ vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp64 - last_vm_page_pages_grabbed);
+ last_vm_page_pages_grabbed = tmp64;
+
+ tmp = vm_pageout_vminfo.vm_page_pages_freed;
+ vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
+ last.vm_page_pages_freed = tmp;
+
+
+ if (vm_pageout_stats[vm_pageout_stat_now].considered) {
+ tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
+ vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
+ last.vm_pageout_pages_evicted = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
+ vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
+ last.vm_pageout_pages_purged = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
+ vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
+ last.vm_pageout_freed_speculative = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_freed_external;
+ vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
+ last.vm_pageout_freed_external = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
+ vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
+ last.vm_pageout_inactive_referenced = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
+ vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
+ last.vm_pageout_scan_inactive_throttled_external = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
+ vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
+ last.vm_pageout_inactive_dirty_external = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
+ vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
+ last.vm_pageout_freed_cleaned = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
+ vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
+ last.vm_pageout_inactive_nolock = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
+ vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
+ last.vm_pageout_scan_inactive_throttled_internal = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
+ vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
+ last.vm_pageout_skipped_external = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
+ vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
+ last.vm_pageout_reactivation_limit_exceeded = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
+ vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
+ last.vm_pageout_inactive_force_reclaim = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
+ vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
+ last.vm_pageout_freed_internal = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
+ vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
+ last.vm_pageout_considered_bq_internal = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
+ vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
+ last.vm_pageout_considered_bq_external = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
+ vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
+ last.vm_pageout_filecache_min_reactivated = tmp;
+
+ tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
+ vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
+ last.vm_pageout_inactive_dirty_internal = tmp;
+ }
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count,
+ 0);
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count,
+ 0,
+ 0);
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
+ vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count,
+ 0);
+
+ if (vm_pageout_stats[vm_pageout_stat_now].considered ||
+ vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
+ vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].considered,
+ vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
+ vm_pageout_stats[vm_pageout_stat_now].freed_external,
+ vm_pageout_stats[vm_pageout_stat_now].inactive_referenced,
+ 0);
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
+ vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
+ vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
+ vm_pageout_stats[vm_pageout_stat_now].inactive_nolock,
+ 0);
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
+ vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
+ vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
+ vm_pageout_stats[vm_pageout_stat_now].skipped_external,
+ 0);
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
+ vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
+ vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
+ vm_pageout_stats[vm_pageout_stat_now].freed_internal,
+ 0);
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
+ vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
+ vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
+ vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal,
+ 0);
+ }
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) | DBG_FUNC_NONE,
+ vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
+ vm_pageout_stats[vm_pageout_stat_now].pages_freed,
+ vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
+ vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added,
+ 0);
+
+ record_memory_pressure();
+}
+
+extern boolean_t hibernation_vmqueues_inspection;
+
+/*
+ * Return values for functions called by vm_pageout_scan
+ * that control its flow.
+ *
+ * PROCEED -- vm_pageout_scan will keep making forward progress.
+ * DONE_RETURN -- page demand satisfied, work is done -> vm_pageout_scan returns.
+ * NEXT_ITERATION -- restart the 'for' loop in vm_pageout_scan aka continue.
+ */
+
+#define VM_PAGEOUT_SCAN_PROCEED (0)
+#define VM_PAGEOUT_SCAN_DONE_RETURN (1)
+#define VM_PAGEOUT_SCAN_NEXT_ITERATION (2)
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it moves overflow secluded pages (one-at-a-time) to the
+ * batched 'local' free Q or active Q.
+ */
+static void
+vps_deal_with_secluded_page_overflow(vm_page_t *local_freeq, int *local_freed)
+{
+#if CONFIG_SECLUDED_MEMORY
+ /*
+ * Deal with secluded_q overflow.
+ */
+ if (vm_page_secluded_count > vm_page_secluded_target) {
+ vm_page_t secluded_page;
+
+ /*
+ * SECLUDED_AGING_BEFORE_ACTIVE:
+ * Excess secluded pages go to the active queue and
+ * will later go to the inactive queue.
+ */
+ assert((vm_page_secluded_count_free +
+ vm_page_secluded_count_inuse) ==
+ vm_page_secluded_count);
+ secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
+ assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
+
+ vm_page_queues_remove(secluded_page, FALSE);
+ assert(!secluded_page->vmp_fictitious);
+ assert(!VM_PAGE_WIRED(secluded_page));
+
+ if (secluded_page->vmp_object == 0) {
+ /* transfer to free queue */
+ assert(secluded_page->vmp_busy);
+ secluded_page->vmp_snext = *local_freeq;
+ *local_freeq = secluded_page;
+ *local_freed += 1;
+ } else {
+ /* transfer to head of active queue */
+ vm_page_enqueue_active(secluded_page, FALSE);
+ secluded_page = VM_PAGE_NULL;
+ }
+ }
+#else /* CONFIG_SECLUDED_MEMORY */
+
+#pragma unused(local_freeq)
+#pragma unused(local_freed)
+
+ return;
+
+#endif /* CONFIG_SECLUDED_MEMORY */
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it initializes the loop targets for vm_pageout_scan().
+ */
+static void
+vps_init_page_targets(void)
+{
+ /*
+ * LD TODO: Other page targets should be calculated here too.
+ */
+ vm_page_anonymous_min = vm_page_inactive_target / 20;
+
+ if (vm_pageout_state.vm_page_speculative_percentage > 50) {
+ vm_pageout_state.vm_page_speculative_percentage = 50;
+ } else if (vm_pageout_state.vm_page_speculative_percentage <= 0) {
+ vm_pageout_state.vm_page_speculative_percentage = 1;
+ }
+
+ vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count);
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it purges a single VM object at-a-time and will either
+ * make vm_pageout_scan() restart the loop or keeping moving forward.
+ */
+static int
+vps_purge_object()
+{
+ int force_purge;
+
+ assert(available_for_purge >= 0);
+ force_purge = 0; /* no force-purging */
+
+#if VM_PRESSURE_EVENTS
+ vm_pressure_level_t pressure_level;
+
+ pressure_level = memorystatus_vm_pressure_level;
+
+ if (pressure_level > kVMPressureNormal) {
+ if (pressure_level >= kVMPressureCritical) {
+ force_purge = vm_pageout_state.memorystatus_purge_on_critical;
+ } else if (pressure_level >= kVMPressureUrgent) {
+ force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
+ } else if (pressure_level >= kVMPressureWarning) {
+ force_purge = vm_pageout_state.memorystatus_purge_on_warning;
+ }
+ }
+#endif /* VM_PRESSURE_EVENTS */
+
+ if (available_for_purge || force_purge) {
+ memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
+
+ VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
+ if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
+ VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
+ VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
+ memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
+
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ }
+ VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
+ memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
+ }
+
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will try to age the next speculative Q if the oldest
+ * one is empty.
+ */
+static int
+vps_age_speculative_queue(boolean_t force_speculative_aging)
+{
+#define DELAY_SPECULATIVE_AGE 1000
+
+ /*
+ * try to pull pages from the aging bins...
+ * see vm_page.h for an explanation of how
+ * this mechanism works
+ */
+ boolean_t can_steal = FALSE;
+ int num_scanned_queues;
+ static int delay_speculative_age = 0; /* depends the # of times we go through the main pageout_scan loop.*/
+ mach_timespec_t ts;
+ struct vm_speculative_age_q *aq;
+ struct vm_speculative_age_q *sq;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ aq = &vm_page_queue_speculative[speculative_steal_index];
+
+ num_scanned_queues = 0;
+ while (vm_page_queue_empty(&aq->age_q) &&
+ num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
+ speculative_steal_index++;
+
+ if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
+ speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
+ }
+
+ aq = &vm_page_queue_speculative[speculative_steal_index];
+ }
+
+ if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
+ /*
+ * XXX We've scanned all the speculative
+ * queues but still haven't found one
+ * that is not empty, even though
+ * vm_page_speculative_count is not 0.
+ */
+ if (!vm_page_queue_empty(&sq->age_q)) {
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ }
+#if DEVELOPMENT || DEBUG
+ panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
+#endif
+ /* readjust... */
+ vm_page_speculative_count = 0;
+ /* ... and continue */
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ }
+
+ if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE) {
+ can_steal = TRUE;
+ } else {
+ if (!delay_speculative_age) {
+ mach_timespec_t ts_fully_aged;
+
+ ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
+ ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
+ * 1000 * NSEC_PER_USEC;
+
+ ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
+
+ clock_sec_t sec;
+ clock_nsec_t nsec;
+ clock_get_system_nanotime(&sec, &nsec);
+ ts.tv_sec = (unsigned int) sec;
+ ts.tv_nsec = nsec;
+
+ if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) {
+ can_steal = TRUE;
+ } else {
+ delay_speculative_age++;
+ }
+ } else {
+ delay_speculative_age++;
+ if (delay_speculative_age == DELAY_SPECULATIVE_AGE) {
+ delay_speculative_age = 0;
+ }
+ }
+ }
+ if (can_steal == TRUE) {
+ vm_page_speculate_ageit(aq);
+ }
+
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it evicts a single VM object from the cache.
+ */
+static int inline
+vps_object_cache_evict(vm_object_t *object_to_unlock)
+{
+ static int cache_evict_throttle = 0;
+ struct vm_speculative_age_q *sq;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
+ int pages_evicted;
+
+ if (*object_to_unlock != NULL) {
+ vm_object_unlock(*object_to_unlock);
+ *object_to_unlock = NULL;
+ }
+ KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+ pages_evicted = vm_object_cache_evict(100, 10);
+
+ KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
+
+ if (pages_evicted) {
+ vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
+
+ VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
+ vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
+ memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
+
+ /*
+ * we just freed up to 100 pages,
+ * so go back to the top of the main loop
+ * and re-evaulate the memory situation
+ */
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ } else {
+ cache_evict_throttle = 1000;
+ }
+ }
+ if (cache_evict_throttle) {
+ cache_evict_throttle--;
+ }
+
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
+
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it calculates the filecache min. that needs to be maintained
+ * as we start to steal pages.
+ */
+static void
+vps_calculate_filecache_min(void)
+{
+ int divisor = vm_pageout_state.vm_page_filecache_min_divisor;
+
+#if CONFIG_JETSAM
+ /*
+ * don't let the filecache_min fall below 15% of available memory
+ * on systems with an active compressor that isn't nearing its
+ * limits w/r to accepting new data
+ *
+ * on systems w/o the compressor/swapper, the filecache is always
+ * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
+ * since most (if not all) of the anonymous pages are in the
+ * throttled queue (which isn't counted as available) which
+ * effectively disables this filter
+ */
+ if (vm_compressor_low_on_space() || divisor == 0) {
+ vm_pageout_state.vm_page_filecache_min = 0;
+ } else {
+ vm_pageout_state.vm_page_filecache_min =
+ ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
+ }
+#else
+ if (vm_compressor_out_of_space() || divisor == 0) {
+ vm_pageout_state.vm_page_filecache_min = 0;
+ } else {
+ /*
+ * don't let the filecache_min fall below the specified critical level
+ */
+ vm_pageout_state.vm_page_filecache_min =
+ ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
+ }
+#endif
+ if (vm_page_free_count < (vm_page_free_reserved / 4)) {
+ vm_pageout_state.vm_page_filecache_min = 0;
+ }
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it updates the flow control time to detect if VM pageoutscan
+ * isn't making progress.
+ */
+static void
+vps_flow_control_reset_deadlock_timer(struct flow_control *flow_control)
+{
+ mach_timespec_t ts;
+ clock_sec_t sec;
+ clock_nsec_t nsec;
+
+ ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
+ ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
+ clock_get_system_nanotime(&sec, &nsec);
+ flow_control->ts.tv_sec = (unsigned int) sec;
+ flow_control->ts.tv_nsec = nsec;
+ ADD_MACH_TIMESPEC(&flow_control->ts, &ts);
+
+ flow_control->state = FCS_DELAYED;
+
+ vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it is the flow control logic of VM pageout scan which
+ * controls if it should block and for how long.
+ * Any blocking of vm_pageout_scan happens ONLY in this function.
+ */
+static int
+vps_flow_control(struct flow_control *flow_control, int *anons_grabbed, vm_object_t *object, int *delayed_unlock,
+ vm_page_t *local_freeq, int *local_freed, int *vm_pageout_deadlock_target, unsigned int inactive_burst_count)
+{
+ boolean_t exceeded_burst_throttle = FALSE;
+ unsigned int msecs = 0;
+ uint32_t inactive_external_count;
+ mach_timespec_t ts;
+ struct vm_pageout_queue *iq;
+ struct vm_pageout_queue *eq;
+ struct vm_speculative_age_q *sq;
+
+ iq = &vm_pageout_queue_internal;
+ eq = &vm_pageout_queue_external;
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ /*
+ * Sometimes we have to pause:
+ * 1) No inactive pages - nothing to do.
+ * 2) Loop control - no acceptable pages found on the inactive queue
+ * within the last vm_pageout_burst_inactive_throttle iterations
+ * 3) Flow control - default pageout queue is full
+ */
+ if (vm_page_queue_empty(&vm_page_queue_inactive) &&
+ vm_page_queue_empty(&vm_page_queue_anonymous) &&
+ vm_page_queue_empty(&vm_page_queue_cleaned) &&
+ vm_page_queue_empty(&sq->age_q)) {
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
+ msecs = vm_pageout_state.vm_pageout_empty_wait;
+ } else if (inactive_burst_count >=
+ MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
+ (vm_page_inactive_count +
+ vm_page_speculative_count))) {
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
+ msecs = vm_pageout_state.vm_pageout_burst_wait;
+
+ exceeded_burst_throttle = TRUE;
+ } else if (VM_PAGE_Q_THROTTLED(iq) &&
+ VM_DYNAMIC_PAGING_ENABLED()) {
+ clock_sec_t sec;
+ clock_nsec_t nsec;
+
+ switch (flow_control->state) {
+ case FCS_IDLE:
+ if ((vm_page_free_count + *local_freed) < vm_page_free_target &&
+ vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
+ /*
+ * since the compressor is running independently of vm_pageout_scan
+ * let's not wait for it just yet... as long as we have a healthy supply
+ * of filecache pages to work with, let's keep stealing those.
+ */
+ inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
+
+ if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
+ (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
+ *anons_grabbed = ANONS_GRABBED_LIMIT;
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+ }
+
+ vps_flow_control_reset_deadlock_timer(flow_control);
+ msecs = vm_pageout_state.vm_pageout_deadlock_wait;
+
+ break;
+
+ case FCS_DELAYED:
+ clock_get_system_nanotime(&sec, &nsec);
+ ts.tv_sec = (unsigned int) sec;
+ ts.tv_nsec = nsec;
+
+ if (CMP_MACH_TIMESPEC(&ts, &flow_control->ts) >= 0) {
+ /*
+ * the pageout thread for the default pager is potentially
+ * deadlocked since the
+ * default pager queue has been throttled for more than the
+ * allowable time... we need to move some clean pages or dirty
+ * pages belonging to the external pagers if they aren't throttled
+ * vm_page_free_wanted represents the number of threads currently
+ * blocked waiting for pages... we'll move one page for each of
+ * these plus a fixed amount to break the logjam... once we're done
+ * moving this number of pages, we'll re-enter the FSC_DELAYED state
+ * with a new timeout target since we have no way of knowing
+ * whether we've broken the deadlock except through observation
+ * of the queue associated with the default pager... we need to
+ * stop moving pages and allow the system to run to see what
+ * state it settles into.
+ */
+
+ *vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
+ vm_page_free_wanted + vm_page_free_wanted_privileged;
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
+ flow_control->state = FCS_DEADLOCK_DETECTED;
+ thread_wakeup((event_t) &vm_pageout_garbage_collect);
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+ /*
+ * just resniff instead of trying
+ * to compute a new delay time... we're going to be
+ * awakened immediately upon a laundry completion,
+ * so we won't wait any longer than necessary
+ */
+ msecs = vm_pageout_state.vm_pageout_idle_wait;
+ break;
+
+ case FCS_DEADLOCK_DETECTED:
+ if (*vm_pageout_deadlock_target) {
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+
+ vps_flow_control_reset_deadlock_timer(flow_control);
+ msecs = vm_pageout_state.vm_pageout_deadlock_wait;
+
+ break;
+ }
+ } else {
+ /*
+ * No need to pause...
+ */
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+
+ vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+
+ vm_pageout_prepare_to_block(object, delayed_unlock, local_freeq, local_freed,
+ VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
+
+ if (vm_page_free_count >= vm_page_free_target) {
+ /*
+ * we're here because
+ * 1) someone else freed up some pages while we had
+ * the queues unlocked above
+ * and we've hit one of the 3 conditions that
+ * cause us to pause the pageout scan thread
+ *
+ * since we already have enough free pages,
+ * let's avoid stalling and return normally
+ *
+ * before we return, make sure the pageout I/O threads
+ * are running throttled in case there are still requests
+ * in the laundry... since we have enough free pages
+ * we don't need the laundry to be cleaned in a timely
+ * fashion... so let's avoid interfering with foreground
+ * activity
+ *
+ * we don't want to hold vm_page_queue_free_lock when
+ * calling vm_pageout_adjust_eq_iothrottle (since it
+ * may cause other locks to be taken), we do the intitial
+ * check outside of the lock. Once we take the lock,
+ * we recheck the condition since it may have changed.
+ * if it has, no problem, we will make the threads
+ * non-throttled before actually blocking
+ */
+ vm_pageout_adjust_eq_iothrottle(eq, TRUE);
+ }
+ lck_mtx_lock(&vm_page_queue_free_lock);
+
+ if (vm_page_free_count >= vm_page_free_target &&
+ (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
+ return VM_PAGEOUT_SCAN_DONE_RETURN;
+ }
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+
+ if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
+ /*
+ * we're most likely about to block due to one of
+ * the 3 conditions that cause vm_pageout_scan to
+ * not be able to make forward progress w/r
+ * to providing new pages to the free queue,
+ * so unthrottle the I/O threads in case we
+ * have laundry to be cleaned... it needs
+ * to be completed ASAP.
+ *
+ * even if we don't block, we want the io threads
+ * running unthrottled since the sum of free +
+ * clean pages is still under our free target
+ */
+ vm_pageout_adjust_eq_iothrottle(eq, FALSE);
+ }
+ if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
+ /*
+ * if we get here we're below our free target and
+ * we're stalling due to a full laundry queue or
+ * we don't have any inactive pages other then
+ * those in the clean queue...
+ * however, we have pages on the clean queue that
+ * can be moved to the free queue, so let's not
+ * stall the pageout scan
+ */
+ flow_control->state = FCS_IDLE;
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+ if (flow_control->state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
+ flow_control->state = FCS_IDLE;
+ return VM_PAGEOUT_SCAN_PROCEED;
+ }
+
+ VM_CHECK_MEMORYSTATUS;
+
+ if (flow_control->state != FCS_IDLE) {
+ VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
+ }
+
+ iq->pgo_throttled = TRUE;
+ assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000 * NSEC_PER_USEC);
+
+ counter(c_vm_pageout_scan_block++);
+
+ vm_page_unlock_queues();
+
+ assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
+
+ VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
+ iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
+ memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
+ iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
+ memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
+
+ vm_page_lock_queues();
+
+ iq->pgo_throttled = FALSE;
+
+ vps_init_page_targets();
+
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will find and return the most appropriate page to be
+ * reclaimed.
+ */
+static int
+vps_choose_victim_page(vm_page_t *victim_page, int *anons_grabbed, boolean_t *grab_anonymous, boolean_t force_anonymous,
+ boolean_t *is_page_from_bg_q, unsigned int *reactivated_this_call)
+{
+ vm_page_t m = NULL;
+ vm_object_t m_object = VM_OBJECT_NULL;
+ uint32_t inactive_external_count;
+ struct vm_speculative_age_q *sq;
+ struct vm_pageout_queue *iq;
+ int retval = VM_PAGEOUT_SCAN_PROCEED;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+ iq = &vm_pageout_queue_internal;
+
+ *is_page_from_bg_q = FALSE;
+
+ m = NULL;
+ m_object = VM_OBJECT_NULL;
+
+ if (VM_DYNAMIC_PAGING_ENABLED()) {
+ assert(vm_page_throttled_count == 0);
+ assert(vm_page_queue_empty(&vm_page_queue_throttled));
+ }
+
+ /*
+ * Try for a clean-queue inactive page.
+ * These are pages that vm_pageout_scan tried to steal earlier, but
+ * were dirty and had to be cleaned. Pick them up now that they are clean.
+ */
+ if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
+
+ goto found_page;
+ }
+
+ /*
+ * The next most eligible pages are ones we paged in speculatively,
+ * but which have not yet been touched and have been aged out.
+ */
+ if (!vm_page_queue_empty(&sq->age_q)) {
+ m = (vm_page_t) vm_page_queue_first(&sq->age_q);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
+
+ if (!m->vmp_dirty || force_anonymous == FALSE) {
+ goto found_page;
+ } else {
+ m = NULL;
+ }
+ }
+
+#if CONFIG_BACKGROUND_QUEUE
+ if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
+ vm_object_t bg_m_object = NULL;
+
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
+
+ bg_m_object = VM_PAGE_OBJECT(m);
+
+ if (!VM_PAGE_PAGEABLE(m)) {
+ /*
+ * This page is on the background queue
+ * but not on a pageable queue. This is
+ * likely a transient state and whoever
+ * took it out of its pageable queue
+ * will likely put it back on a pageable
+ * queue soon but we can't deal with it
+ * at this point, so let's ignore this
+ * page.
+ */
+ } else if (force_anonymous == FALSE || bg_m_object->internal) {
+ if (bg_m_object->internal &&
+ (VM_PAGE_Q_THROTTLED(iq) ||
+ vm_compressor_out_of_space() == TRUE ||
+ vm_page_free_count < (vm_page_free_reserved / 4))) {
+ vm_pageout_skipped_bq_internal++;
+ } else {
+ *is_page_from_bg_q = TRUE;
+
+ if (bg_m_object->internal) {
+ vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
+ } else {
+ vm_pageout_vminfo.vm_pageout_considered_bq_external++;
+ }
+ goto found_page;
+ }
+ }
+ }
+#endif /* CONFIG_BACKGROUND_QUEUE */
+
+ inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
+
+ if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
+ (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
+ *grab_anonymous = TRUE;
+ *anons_grabbed = 0;
+
+ vm_pageout_vminfo.vm_pageout_skipped_external++;
+ goto want_anonymous;
+ }
+ *grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
+
+#if CONFIG_JETSAM
+ /* If the file-backed pool has accumulated
+ * significantly more pages than the jetsam
+ * threshold, prefer to reclaim those
+ * inline to minimise compute overhead of reclaiming
+ * anonymous pages.
+ * This calculation does not account for the CPU local
+ * external page queues, as those are expected to be
+ * much smaller relative to the global pools.
+ */
+
+ struct vm_pageout_queue *eq = &vm_pageout_queue_external;
+
+ if (*grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
+ if (vm_page_pageable_external_count >
+ vm_pageout_state.vm_page_filecache_min) {
+ if ((vm_page_pageable_external_count *
+ vm_pageout_memorystatus_fb_factor_dr) >
+ (memorystatus_available_pages_critical *
+ vm_pageout_memorystatus_fb_factor_nr)) {
+ *grab_anonymous = FALSE;
+
+ VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
+ }
+ }
+ if (*grab_anonymous) {
+ VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
+ }
+ }
+#endif /* CONFIG_JETSAM */
+
+want_anonymous:
+ if (*grab_anonymous == FALSE || *anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ if (!vm_page_queue_empty(&vm_page_queue_inactive)) {
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
+ *anons_grabbed = 0;
+
+ if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
+ if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ if ((++(*reactivated_this_call) % 100)) {
+ vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
+
+ vm_page_activate(m);
+ VM_STAT_INCR(reactivations);
+#if CONFIG_BACKGROUND_QUEUE
+#if DEVELOPMENT || DEBUG
+ if (*is_page_from_bg_q == TRUE) {
+ if (m_object->internal) {
+ vm_pageout_rejected_bq_internal++;
+ } else {
+ vm_pageout_rejected_bq_external++;
+ }
+ }
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+ vm_pageout_state.vm_pageout_inactive_used++;
+
+ m = NULL;
+ retval = VM_PAGEOUT_SCAN_NEXT_ITERATION;
+
+ goto found_page;
+ }
+
+ /*
+ * steal 1 of the file backed pages even if
+ * we are under the limit that has been set
+ * for a healthy filecache
+ */
+ }
+ }
+ goto found_page;
+ }
+ }
+ if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
+ *anons_grabbed += 1;
+
+ goto found_page;
+ }
+
+ m = NULL;
+
+found_page:
+ *victim_page = m;
+
+ return retval;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will put a page back on the active/inactive queue
+ * if we can't reclaim it for some reason.
+ */
+static void
+vps_requeue_page(vm_page_t m, int page_prev_q_state, __unused boolean_t page_from_bg_q)
+{
+ if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
+ vm_page_enqueue_inactive(m, FALSE);
+ } else {
+ vm_page_activate(m);
+ }
+
+#if CONFIG_BACKGROUND_QUEUE
+#if DEVELOPMENT || DEBUG
+ vm_object_t m_object = VM_PAGE_OBJECT(m);
+
+ if (page_from_bg_q == TRUE) {
+ if (m_object->internal) {
+ vm_pageout_rejected_bq_internal++;
+ } else {
+ vm_pageout_rejected_bq_external++;
+ }
+ }
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it will try to grab the victim page's VM object (m_object)
+ * which differs from the previous victim page's object (object).
+ */
+static int
+vps_switch_object(vm_page_t m, vm_object_t m_object, vm_object_t *object, int page_prev_q_state, boolean_t avoid_anon_pages, boolean_t page_from_bg_q)
+{
+ struct vm_speculative_age_q *sq;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ /*
+ * the object associated with candidate page is
+ * different from the one we were just working
+ * with... dump the lock if we still own it
+ */
+ if (*object != NULL) {
+ vm_object_unlock(*object);
+ *object = NULL;
+ }
+ /*
+ * Try to lock object; since we've alread got the
+ * page queues lock, we can only 'try' for this one.
+ * if the 'try' fails, we need to do a mutex_pause
+ * to allow the owner of the object lock a chance to
+ * run... otherwise, we're likely to trip over this
+ * object in the same state as we work our way through
+ * the queue... clumps of pages associated with the same
+ * object are fairly typical on the inactive and active queues
+ */
+ if (!vm_object_lock_try_scan(m_object)) {
+ vm_page_t m_want = NULL;
+
+ vm_pageout_vminfo.vm_pageout_inactive_nolock++;
+
+ if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
+ VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
+ }
+
+ pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
+
+ m->vmp_reference = FALSE;
+
+ if (!m_object->object_is_shared_cache) {
+ /*
+ * don't apply this optimization if this is the shared cache
+ * object, it's too easy to get rid of very hot and important
+ * pages...
+ * m->vmp_object must be stable since we hold the page queues lock...
+ * we can update the scan_collisions field sans the object lock
+ * since it is a separate field and this is the only spot that does
+ * a read-modify-write operation and it is never executed concurrently...
+ * we can asynchronously set this field to 0 when creating a UPL, so it
+ * is possible for the value to be a bit non-determistic, but that's ok
+ * since it's only used as a hint
+ */
+ m_object->scan_collisions = 1;
+ }
+ if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
+ m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
+ } else if (!vm_page_queue_empty(&sq->age_q)) {
+ m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
+ } else if ((avoid_anon_pages || vm_page_queue_empty(&vm_page_queue_anonymous)) &&
+ !vm_page_queue_empty(&vm_page_queue_inactive)) {
+ m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
+ } else if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
+ m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
+ }
+
+ /*
+ * this is the next object we're going to be interested in
+ * try to make sure its available after the mutex_pause
+ * returns control
+ */
+ if (m_want) {
+ vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
+ }
+
+ vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
+
+ return VM_PAGEOUT_SCAN_NEXT_ITERATION;
+ } else {
+ *object = m_object;
+ vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+ }
+
+ return VM_PAGEOUT_SCAN_PROCEED;
+}
+
+/*
+ * This function is called only from vm_pageout_scan and
+ * it notices that pageout scan may be rendered ineffective
+ * due to a FS deadlock and will jetsam a process if possible.
+ * If jetsam isn't supported, it'll move the page to the active
+ * queue to try and get some different pages pushed onwards so
+ * we can try to get out of this scenario.
+ */
+static void
+vps_deal_with_throttled_queues(vm_page_t m, vm_object_t *object, uint32_t *vm_pageout_inactive_external_forced_reactivate_limit,
+ int *delayed_unlock, boolean_t *force_anonymous, __unused boolean_t is_page_from_bg_q)
+{
+ struct vm_pageout_queue *eq;
+ vm_object_t cur_object = VM_OBJECT_NULL;
+
+ cur_object = *object;
+
+ eq = &vm_pageout_queue_external;
+
+ if (cur_object->internal == FALSE) {
+ /*
+ * we need to break up the following potential deadlock case...
+ * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
+ * b) The thread doing the writing is waiting for pages while holding the truncate lock
+ * c) Most of the pages in the inactive queue belong to this file.
+ *
+ * we are potentially in this deadlock because...
+ * a) the external pageout queue is throttled
+ * b) we're done with the active queue and moved on to the inactive queue
+ * c) we've got a dirty external page
+ *
+ * since we don't know the reason for the external pageout queue being throttled we
+ * must suspect that we are deadlocked, so move the current page onto the active queue
+ * in an effort to cause a page from the active queue to 'age' to the inactive queue
+ *
+ * if we don't have jetsam configured (i.e. we have a dynamic pager), set
+ * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
+ * pool the next time we select a victim page... if we can make enough new free pages,
+ * the deadlock will break, the external pageout queue will empty and it will no longer
+ * be throttled
+ *
+ * if we have jetsam configured, keep a count of the pages reactivated this way so
+ * that we can try to find clean pages in the active/inactive queues before
+ * deciding to jetsam a process
+ */
+ vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
+
+ vm_page_check_pageable_safe(m);
+ assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
+ vm_page_queue_enter(&vm_page_queue_active, m, vmp_pageq);
+ m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
+ vm_page_active_count++;
+ vm_page_pageable_external_count++;
+
+ vm_pageout_adjust_eq_iothrottle(eq, FALSE);
+
+#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
+
+#pragma unused(force_anonymous)
+
+ *vm_pageout_inactive_external_forced_reactivate_limit -= 1;
+
+ if (*vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
+ *vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
+ /*
+ * Possible deadlock scenario so request jetsam action
+ */
+
+ assert(cur_object);
+ vm_object_unlock(cur_object);
+
+ cur_object = VM_OBJECT_NULL;
+
+ /*
+ * VM pageout scan needs to know we have dropped this lock and so set the
+ * object variable we got passed in to NULL.
+ */
+ *object = VM_OBJECT_NULL;
+
+ vm_page_unlock_queues();
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
+ vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
+
+ /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
+ if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
+ VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
+ }
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
+ vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
+
+ vm_page_lock_queues();
+ *delayed_unlock = 1;
+ }
+#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
+
+#pragma unused(vm_pageout_inactive_external_forced_reactivate_limit)
+#pragma unused(delayed_unlock)
+
+ *force_anonymous = TRUE;
+#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
+ } else {
+ vm_page_activate(m);
+ VM_STAT_INCR(reactivations);
+
+#if CONFIG_BACKGROUND_QUEUE
+#if DEVELOPMENT || DEBUG
+ if (is_page_from_bg_q == TRUE) {
+ if (cur_object->internal) {
+ vm_pageout_rejected_bq_internal++;
+ } else {
+ vm_pageout_rejected_bq_external++;
+ }
+ }
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_BACKGROUND_QUEUE */
+
+ vm_pageout_state.vm_pageout_inactive_used++;
+ }
+}
+
+
+void
+vm_page_balance_inactive(int max_to_move)
+{
+ vm_page_t m;
+
+ LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+
+ if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) {
+ /*
+ * It is likely that the hibernation code path is
+ * dealing with these very queues as we are about
+ * to move pages around in/from them and completely
+ * change the linkage of the pages.
+ *
+ * And so we skip the rebalancing of these queues.
+ */
+ return;
+ }
+ vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count +
+ vm_page_speculative_count);
+
+ while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
+ VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
+
+ m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
+
+ assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
+ assert(!m->vmp_laundry);
+ assert(VM_PAGE_OBJECT(m) != kernel_object);
+ assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
+
+ DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
+
+ /*
+ * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
+ *
+ * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
+ * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
+ * new reference happens. If no futher references happen on the page after that remote TLB flushes
+ * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
+ * by pageout_scan, which is just fine since the last reference would have happened quite far
+ * in the past (TLB caches don't hang around for very long), and of course could just as easily
+ * have happened before we moved the page
+ */
+ if (m->vmp_pmapped == TRUE) {
+ pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
+ }
+
+ /*
+ * The page might be absent or busy,
+ * but vm_page_deactivate can handle that.
+ * FALSE indicates that we don't want a H/W clear reference
+ */
+ vm_page_deactivate_internal(m, FALSE);
+ }
+}
+
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
+ * held and vm_page_free_wanted == 0.
+ */
+void
+vm_pageout_scan(void)
+{
+ unsigned int loop_count = 0;
+ unsigned int inactive_burst_count = 0;
+ unsigned int reactivated_this_call;
+ unsigned int reactivate_limit;
+ vm_page_t local_freeq = NULL;
+ int local_freed = 0;
+ int delayed_unlock;
+ int delayed_unlock_limit = 0;
+ int refmod_state = 0;
+ int vm_pageout_deadlock_target = 0;
+ struct vm_pageout_queue *iq;
+ struct vm_pageout_queue *eq;
+ struct vm_speculative_age_q *sq;
+ struct flow_control flow_control = { .state = 0, .ts = { .tv_sec = 0, .tv_nsec = 0 } };
+ boolean_t inactive_throttled = FALSE;
+ vm_object_t object = NULL;
+ uint32_t inactive_reclaim_run;
+ boolean_t grab_anonymous = FALSE;
+ boolean_t force_anonymous = FALSE;
+ boolean_t force_speculative_aging = FALSE;
+ int anons_grabbed = 0;
+ int page_prev_q_state = 0;
+ boolean_t page_from_bg_q = FALSE;
+ uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
+ vm_object_t m_object = VM_OBJECT_NULL;
+ int retval = 0;
+ boolean_t lock_yield_check = FALSE;
+
+
+ VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
+ vm_pageout_vminfo.vm_pageout_freed_speculative,
+ vm_pageout_state.vm_pageout_inactive_clean,
+ vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
+ vm_pageout_vminfo.vm_pageout_inactive_dirty_external);