+#define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128
+#define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16 /* don't make this too big... */
+ /* we use it to size an array on the stack */
+
+int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW;
+
+#define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024)
+
+/*
+ * vm_page_is_sequential
+ *
+ * Determine if sequential access is in progress
+ * in accordance with the behavior specified.
+ * Update state to indicate current access pattern.
+ *
+ * object must have at least the shared lock held
+ */
+static
+void
+vm_fault_is_sequential(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_behavior_t behavior)
+{
+ vm_object_offset_t last_alloc;
+ int sequential;
+ int orig_sequential;
+
+ last_alloc = object->last_alloc;
+ sequential = object->sequential;
+ orig_sequential = sequential;
+
+ switch (behavior) {
+ case VM_BEHAVIOR_RANDOM:
+ /*
+ * reset indicator of sequential behavior
+ */
+ sequential = 0;
+ break;
+
+ case VM_BEHAVIOR_SEQUENTIAL:
+ if (offset && last_alloc == offset - PAGE_SIZE_64) {
+ /*
+ * advance indicator of sequential behavior
+ */
+ if (sequential < MAX_SEQUENTIAL_RUN)
+ sequential += PAGE_SIZE;
+ } else {
+ /*
+ * reset indicator of sequential behavior
+ */
+ sequential = 0;
+ }
+ break;
+
+ case VM_BEHAVIOR_RSEQNTL:
+ if (last_alloc && last_alloc == offset + PAGE_SIZE_64) {
+ /*
+ * advance indicator of sequential behavior
+ */
+ if (sequential > -MAX_SEQUENTIAL_RUN)
+ sequential -= PAGE_SIZE;
+ } else {
+ /*
+ * reset indicator of sequential behavior
+ */
+ sequential = 0;
+ }
+ break;
+
+ case VM_BEHAVIOR_DEFAULT:
+ default:
+ if (offset && last_alloc == (offset - PAGE_SIZE_64)) {
+ /*
+ * advance indicator of sequential behavior
+ */
+ if (sequential < 0)
+ sequential = 0;
+ if (sequential < MAX_SEQUENTIAL_RUN)
+ sequential += PAGE_SIZE;
+
+ } else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) {
+ /*
+ * advance indicator of sequential behavior
+ */
+ if (sequential > 0)
+ sequential = 0;
+ if (sequential > -MAX_SEQUENTIAL_RUN)
+ sequential -= PAGE_SIZE;
+ } else {
+ /*
+ * reset indicator of sequential behavior
+ */
+ sequential = 0;
+ }
+ break;
+ }
+ if (sequential != orig_sequential) {
+ if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) {
+ /*
+ * if someone else has already updated object->sequential
+ * don't bother trying to update it or object->last_alloc
+ */
+ return;
+ }
+ }
+ /*
+ * I'd like to do this with a OSCompareAndSwap64, but that
+ * doesn't exist for PPC... however, it shouldn't matter
+ * that much... last_alloc is maintained so that we can determine
+ * if a sequential access pattern is taking place... if only
+ * one thread is banging on this object, no problem with the unprotected
+ * update... if 2 or more threads are banging away, we run the risk of
+ * someone seeing a mangled update... however, in the face of multiple
+ * accesses, no sequential access pattern can develop anyway, so we
+ * haven't lost any real info.
+ */
+ object->last_alloc = offset;
+}
+
+
+int vm_page_deactivate_behind_count = 0;
+
+/*
+ * vm_page_deactivate_behind
+ *
+ * Determine if sequential access is in progress
+ * in accordance with the behavior specified. If
+ * so, compute a potential page to deactivate and
+ * deactivate it.
+ *
+ * object must be locked.
+ *
+ * return TRUE if we actually deactivate a page
+ */
+static
+boolean_t
+vm_fault_deactivate_behind(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_behavior_t behavior)
+{
+ int n;
+ int pages_in_run = 0;
+ int max_pages_in_run = 0;
+ int sequential_run;
+ int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+ vm_object_offset_t run_offset = 0;
+ vm_object_offset_t pg_offset = 0;
+ vm_page_t m;
+ vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER];
+
+ pages_in_run = 0;
+#if TRACEFAULTPAGE
+ dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
+#endif
+
+ if (object == kernel_object || vm_page_deactivate_behind == FALSE) {
+ /*
+ * Do not deactivate pages from the kernel object: they
+ * are not intended to become pageable.
+ * or we've disabled the deactivate behind mechanism
+ */
+ return FALSE;
+ }
+ if ((sequential_run = object->sequential)) {
+ if (sequential_run < 0) {
+ sequential_behavior = VM_BEHAVIOR_RSEQNTL;
+ sequential_run = 0 - sequential_run;
+ } else {
+ sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+ }
+ }
+ switch (behavior) {
+ case VM_BEHAVIOR_RANDOM:
+ break;
+ case VM_BEHAVIOR_SEQUENTIAL:
+ if (sequential_run >= (int)PAGE_SIZE) {
+ run_offset = 0 - PAGE_SIZE_64;
+ max_pages_in_run = 1;
+ }
+ break;
+ case VM_BEHAVIOR_RSEQNTL:
+ if (sequential_run >= (int)PAGE_SIZE) {
+ run_offset = PAGE_SIZE_64;
+ max_pages_in_run = 1;
+ }
+ break;
+ case VM_BEHAVIOR_DEFAULT:
+ default:
+ { vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
+
+ /*
+ * determine if the run of sequential accesss has been
+ * long enough on an object with default access behavior
+ * to consider it for deactivation
+ */
+ if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) {
+ /*
+ * the comparisons between offset and behind are done
+ * in this kind of odd fashion in order to prevent wrap around
+ * at the end points
+ */
+ if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) {
+ if (offset >= behind) {
+ run_offset = 0 - behind;
+ pg_offset = PAGE_SIZE_64;
+ max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;
+ }
+ } else {
+ if (offset < -behind) {
+ run_offset = behind;
+ pg_offset = 0 - PAGE_SIZE_64;
+ max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;
+ }
+ }
+ }
+ break;
+ }
+ }
+ for (n = 0; n < max_pages_in_run; n++) {
+ m = vm_page_lookup(object, offset + run_offset + (n * pg_offset));
+
+ if (m && !m->laundry && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) {
+ page_run[pages_in_run++] = m;
+
+ /*
+ * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
+ *
+ * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
+ * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
+ * new reference happens. If no futher references happen on the page after that remote TLB flushes
+ * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
+ * by pageout_scan, which is just fine since the last reference would have happened quite far
+ * in the past (TLB caches don't hang around for very long), and of course could just as easily
+ * have happened before we did the deactivate_behind.
+ */
+ pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
+ }
+ }
+ if (pages_in_run) {
+ vm_page_lockspin_queues();
+
+ for (n = 0; n < pages_in_run; n++) {
+
+ m = page_run[n];
+
+ vm_page_deactivate_internal(m, FALSE);
+
+ vm_page_deactivate_behind_count++;
+#if TRACEFAULTPAGE
+ dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
+#endif
+ }
+ vm_page_unlock_queues();
+
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+static int
+vm_page_throttled(void)
+{
+ clock_sec_t elapsed_sec;
+ clock_sec_t tv_sec;
+ clock_usec_t tv_usec;
+
+ thread_t thread = current_thread();
+
+ if (thread->options & TH_OPT_VMPRIV)
+ return (0);
+
+ thread->t_page_creation_count++;
+
+ if (NEED_TO_HARD_THROTTLE_THIS_TASK())
+ return (HARD_THROTTLE_DELAY);
+
+ if ((vm_page_free_count < vm_page_throttle_limit || ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && SWAPPER_NEEDS_TO_UNTHROTTLE())) &&
+ thread->t_page_creation_count > vm_page_creation_throttle) {
+
+ clock_get_system_microtime(&tv_sec, &tv_usec);
+
+ elapsed_sec = tv_sec - thread->t_page_creation_time;
+
+ if (elapsed_sec <= 6 || (thread->t_page_creation_count / elapsed_sec) >= (vm_page_creation_throttle / 6)) {
+
+ if (elapsed_sec >= 60) {
+ /*
+ * we'll reset our stats to give a well behaved app
+ * that was unlucky enough to accumulate a bunch of pages
+ * over a long period of time a chance to get out of
+ * the throttled state... we reset the counter and timestamp
+ * so that if it stays under the rate limit for the next second
+ * it will be back in our good graces... if it exceeds it, it
+ * will remain in the throttled state
+ */
+ thread->t_page_creation_time = tv_sec;
+ thread->t_page_creation_count = (vm_page_creation_throttle / 6) * 5;
+ }
+ ++vm_page_throttle_count;
+
+ if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && HARD_THROTTLE_LIMIT_REACHED())
+ return (HARD_THROTTLE_DELAY);
+ else
+ return (SOFT_THROTTLE_DELAY);
+ }
+ thread->t_page_creation_time = tv_sec;
+ thread->t_page_creation_count = 0;
+ }
+ return (0);
+}
+
+
+/*
+ * check for various conditions that would
+ * prevent us from creating a ZF page...
+ * cleanup is based on being called from vm_fault_page
+ *
+ * object must be locked
+ * object == m->object
+ */
+static vm_fault_return_t
+vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state)
+{
+ int throttle_delay;
+
+ if (object->shadow_severed ||
+ VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) {
+ /*
+ * Either:
+ * 1. the shadow chain was severed,
+ * 2. the purgeable object is volatile or empty and is marked
+ * to fault on access while volatile.
+ * Just have to return an error at this point
+ */
+ if (m != VM_PAGE_NULL)
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+
+ thread_interrupt_level(interruptible_state);
+
+ return (VM_FAULT_MEMORY_ERROR);
+ }
+ if (vm_backing_store_low) {
+ /*
+ * are we protecting the system from
+ * backing store exhaustion. If so
+ * sleep unless we are privileged.
+ */
+ if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
+
+ if (m != VM_PAGE_NULL)
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+
+ assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
+
+ thread_block(THREAD_CONTINUE_NULL);
+ thread_interrupt_level(interruptible_state);
+
+ return (VM_FAULT_RETRY);
+ }
+ }
+ if ((throttle_delay = vm_page_throttled())) {
+ /*
+ * we're throttling zero-fills...
+ * treat this as if we couldn't grab a page
+ */
+ if (m != VM_PAGE_NULL)
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+
+ VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
+
+ delay(throttle_delay);
+
+ if (current_thread_aborted()) {
+ thread_interrupt_level(interruptible_state);
+ return VM_FAULT_INTERRUPTED;
+ }
+ thread_interrupt_level(interruptible_state);
+
+ return (VM_FAULT_MEMORY_SHORTAGE);
+ }
+ return (VM_FAULT_SUCCESS);
+}
+
+
+/*
+ * do the work to zero fill a page and
+ * inject it into the correct paging queue
+ *
+ * m->object must be locked
+ * page queue lock must NOT be held
+ */
+static int
+vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
+{
+ int my_fault = DBG_ZERO_FILL_FAULT;
+
+ /*
+ * This is is a zero-fill page fault...
+ *
+ * Checking the page lock is a waste of
+ * time; this page was absent, so
+ * it can't be page locked by a pager.
+ *
+ * we also consider it undefined
+ * with respect to instruction
+ * execution. i.e. it is the responsibility
+ * of higher layers to call for an instruction
+ * sync after changing the contents and before
+ * sending a program into this area. We
+ * choose this approach for performance
+ */
+ m->pmapped = TRUE;
+
+ m->cs_validated = FALSE;
+ m->cs_tainted = FALSE;
+
+ if (no_zero_fill == TRUE) {
+ my_fault = DBG_NZF_PAGE_FAULT;
+ } else {
+ vm_page_zero_fill(m);
+
+ VM_STAT_INCR(zero_fill_count);
+ DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);
+ }
+ assert(!m->laundry);
+ assert(m->object != kernel_object);
+ //assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+
+ if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
+ (m->object->purgable == VM_PURGABLE_DENY ||
+ m->object->purgable == VM_PURGABLE_NONVOLATILE ||
+ m->object->purgable == VM_PURGABLE_VOLATILE )) {
+
+ vm_page_lockspin_queues();
+
+ if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
+ assert(!VM_PAGE_WIRED(m));
+
+ /*
+ * can't be on the pageout queue since we don't
+ * have a pager to try and clean to
+ */
+ assert(!m->pageout_queue);
+
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
+ m->throttled = TRUE;
+ vm_page_throttled_count++;
+ }
+ vm_page_unlock_queues();
+ }
+ return (my_fault);
+}