+
+
+/*
+ * vm_object_build_cluster
+ *
+ * Determine how big a cluster we should issue an I/O for...
+ *
+ * Inputs: *start == offset of page needed
+ * *length == maximum cluster pager can handle
+ * Outputs: *start == beginning offset of cluster
+ * *length == length of cluster to try
+ *
+ * The original *start will be encompassed by the cluster
+ *
+ */
+extern int speculative_reads_disabled;
+
+uint32_t pre_heat_scaling[MAX_UPL_TRANSFER];
+uint32_t pre_heat_cluster[MAX_UPL_TRANSFER];
+
+#define PRE_HEAT_MULTIPLIER 4
+
+__private_extern__ void
+vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
+ vm_size_t *length, vm_object_fault_info_t fault_info)
+{
+ vm_size_t pre_heat_size;
+ vm_size_t tail_size;
+ vm_size_t head_size;
+ vm_size_t max_length;
+ vm_size_t cluster_size;
+ vm_object_offset_t object_size;
+ vm_object_offset_t orig_start;
+ vm_object_offset_t target_start;
+ vm_object_offset_t offset;
+ vm_behavior_t behavior;
+ boolean_t look_behind = TRUE;
+ boolean_t look_ahead = TRUE;
+ int sequential_run;
+ int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+
+ assert( !(*length & PAGE_MASK));
+ assert( !(*start & PAGE_MASK_64));
+
+ if ( (max_length = *length) > (MAX_UPL_TRANSFER * PAGE_SIZE) )
+ max_length = (MAX_UPL_TRANSFER * PAGE_SIZE);
+ /*
+ * we'll always return a cluster size of at least
+ * 1 page, since the original fault must always
+ * be processed
+ */
+ *length = PAGE_SIZE;
+
+ if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
+ /*
+ * no cluster... just fault the page in
+ */
+ return;
+ }
+ orig_start = *start;
+ target_start = orig_start;
+ cluster_size = round_page_32(fault_info->cluster_size);
+ behavior = fault_info->behavior;
+
+ vm_object_lock(object);
+
+ if (object->internal)
+ object_size = object->size;
+ else if (object->pager != MEMORY_OBJECT_NULL)
+ vnode_pager_get_object_size(object->pager, &object_size);
+ else
+ goto out; /* pager is gone for this object, nothing more to do */
+
+ object_size = round_page_64(object_size);
+
+ if (orig_start >= object_size) {
+ /*
+ * fault occurred beyond the EOF...
+ * we need to punt w/o changing the
+ * starting offset
+ */
+ goto out;
+ }
+ if (object->pages_used > object->pages_created) {
+ /*
+ * must have wrapped our 32 bit counters
+ * so reset
+ */
+ object->pages_used = object->pages_created = 0;
+ }
+ if ((sequential_run = object->sequential)) {
+ if (sequential_run < 0) {
+ sequential_behavior = VM_BEHAVIOR_RSEQNTL;
+ sequential_run = 0 - sequential_run;
+ } else {
+ sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+ }
+ }
+ switch(behavior) {
+
+ default:
+ behavior = VM_BEHAVIOR_DEFAULT;
+
+ case VM_BEHAVIOR_DEFAULT:
+ if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
+ goto out;
+
+ if (sequential_run >= (3 * PAGE_SIZE)) {
+ pre_heat_size = sequential_run + PAGE_SIZE;
+
+ if ((behavior = sequential_behavior) == VM_BEHAVIOR_SEQUENTIAL)
+ look_behind = FALSE;
+ else
+ look_ahead = FALSE;
+ } else {
+ uint32_t pages_unused;
+
+ if (object->pages_created < 32 * PRE_HEAT_MULTIPLIER) {
+ /*
+ * prime the pump
+ */
+ pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
+ break;
+ }
+ pages_unused = object->pages_created - object->pages_used;
+
+ if (pages_unused < (object->pages_created / 8)) {
+ pre_heat_size = PAGE_SIZE * 32 * PRE_HEAT_MULTIPLIER;
+ } else if (pages_unused < (object->pages_created / 4)) {
+ pre_heat_size = PAGE_SIZE * 16 * PRE_HEAT_MULTIPLIER;
+ } else if (pages_unused < (object->pages_created / 2)) {
+ pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
+ } else {
+ pre_heat_size = PAGE_SIZE * 4 * PRE_HEAT_MULTIPLIER;
+ }
+ }
+ break;
+
+ case VM_BEHAVIOR_RANDOM:
+ if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
+ goto out;
+ break;
+
+ case VM_BEHAVIOR_SEQUENTIAL:
+ if ((pre_heat_size = cluster_size) == 0)
+ pre_heat_size = sequential_run + PAGE_SIZE;
+ look_behind = FALSE;
+
+ break;
+
+ case VM_BEHAVIOR_RSEQNTL:
+ if ((pre_heat_size = cluster_size) == 0)
+ pre_heat_size = sequential_run + PAGE_SIZE;
+ look_ahead = FALSE;
+
+ break;
+
+ }
+ if (pre_heat_size > max_length)
+ pre_heat_size = max_length;
+
+ if (behavior == VM_BEHAVIOR_DEFAULT && vm_page_free_count < vm_page_free_target)
+ pre_heat_size /= 2;
+
+ if (look_ahead == TRUE) {
+ if (look_behind == TRUE)
+ target_start &= ~(pre_heat_size - 1);
+
+ if ((target_start + pre_heat_size) > object_size)
+ pre_heat_size = (vm_size_t)(trunc_page_64(object_size - target_start));
+
+ tail_size = pre_heat_size - (orig_start - target_start) - PAGE_SIZE;
+ } else {
+ if (pre_heat_size > target_start)
+ pre_heat_size = target_start;
+ tail_size = 0;
+ }
+ pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
+
+ if (pre_heat_size <= PAGE_SIZE)
+ goto out;
+
+ if (look_behind == TRUE) {
+ /*
+ * take a look at the pages before the original
+ * faulting offset
+ */
+ head_size = pre_heat_size - tail_size - PAGE_SIZE;
+
+ for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
+ /*
+ * don't poke below the lowest offset
+ */
+ if (offset < fault_info->lo_offset)
+ break;
+ /*
+ * for external objects and internal objects w/o an existence map
+ * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
+ */
+#if MACH_PAGEMAP
+ if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
+ /*
+ * we know for a fact that the pager can't provide the page
+ * so don't include it or any pages beyond it in this cluster
+ */
+ break;
+ }
+#endif
+ if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
+ /*
+ * don't bridge resident pages
+ */
+ break;
+ }
+ *start = offset;
+ *length += PAGE_SIZE;
+ }
+ }
+ if (look_ahead == TRUE) {
+ for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
+ /*
+ * don't poke above the highest offset
+ */
+ if (offset >= fault_info->hi_offset)
+ break;
+ /*
+ * for external objects and internal objects w/o an existence map
+ * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
+ */
+#if MACH_PAGEMAP
+ if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
+ /*
+ * we know for a fact that the pager can't provide the page
+ * so don't include it or any pages beyond it in this cluster
+ */
+ break;
+ }
+#endif
+ if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
+ /*
+ * don't bridge resident pages
+ */
+ break;
+ }
+ *length += PAGE_SIZE;
+ }
+ }
+out:
+ pre_heat_cluster[*length / PAGE_SIZE]++;
+
+ vm_object_unlock(object);
+}
+
+
+/*
+ * Allow manipulation of individual page state. This is actually part of
+ * the UPL regimen but takes place on the VM object rather than on a UPL
+ */
+
+kern_return_t
+vm_object_page_op(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ int ops,
+ ppnum_t *phys_entry,
+ int *flags)
+{
+ vm_page_t dst_page;
+
+ vm_object_lock(object);
+
+ if(ops & UPL_POP_PHYSICAL) {
+ if(object->phys_contiguous) {
+ if (phys_entry) {
+ *phys_entry = (ppnum_t)
+ (object->shadow_offset >> 12);
+ }
+ vm_object_unlock(object);
+ return KERN_SUCCESS;
+ } else {
+ vm_object_unlock(object);
+ return KERN_INVALID_OBJECT;
+ }
+ }
+ if(object->phys_contiguous) {
+ vm_object_unlock(object);
+ return KERN_INVALID_OBJECT;
+ }
+
+ while(TRUE) {
+ if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
+ vm_object_unlock(object);
+ return KERN_FAILURE;
+ }
+
+ /* Sync up on getting the busy bit */
+ if((dst_page->busy || dst_page->cleaning) &&
+ (((ops & UPL_POP_SET) &&
+ (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
+ /* someone else is playing with the page, we will */
+ /* have to wait */
+ PAGE_SLEEP(object, dst_page, THREAD_UNINT);
+ continue;
+ }
+
+ if (ops & UPL_POP_DUMP) {
+ if (dst_page->pmapped == TRUE)
+ pmap_disconnect(dst_page->phys_page);
+
+ vm_page_lock_queues();
+ vm_page_free(dst_page);
+ vm_page_unlock_queues();
+
+ break;
+ }
+
+ if (flags) {
+ *flags = 0;
+
+ /* Get the condition of flags before requested ops */
+ /* are undertaken */
+
+ if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
+ if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
+ if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
+ if(dst_page->absent) *flags |= UPL_POP_ABSENT;
+ if(dst_page->busy) *flags |= UPL_POP_BUSY;
+ }
+
+ /* The caller should have made a call either contingent with */
+ /* or prior to this call to set UPL_POP_BUSY */
+ if(ops & UPL_POP_SET) {
+ /* The protection granted with this assert will */
+ /* not be complete. If the caller violates the */
+ /* convention and attempts to change page state */
+ /* without first setting busy we may not see it */
+ /* because the page may already be busy. However */
+ /* if such violations occur we will assert sooner */
+ /* or later. */
+ assert(dst_page->busy || (ops & UPL_POP_BUSY));
+ if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
+ if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
+ if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
+ if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
+ if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
+ }
+
+ if(ops & UPL_POP_CLR) {
+ assert(dst_page->busy);
+ if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
+ if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
+ if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
+ if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
+ if (ops & UPL_POP_BUSY) {
+ dst_page->busy = FALSE;
+ PAGE_WAKEUP(dst_page);
+ }
+ }
+
+ if (dst_page->encrypted) {
+ /*
+ * ENCRYPTED SWAP:
+ * We need to decrypt this encrypted page before the
+ * caller can access its contents.
+ * But if the caller really wants to access the page's
+ * contents, they have to keep the page "busy".
+ * Otherwise, the page could get recycled or re-encrypted
+ * at any time.
+ */
+ if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
+ dst_page->busy) {
+ /*
+ * The page is stable enough to be accessed by
+ * the caller, so make sure its contents are
+ * not encrypted.
+ */
+ vm_page_decrypt(dst_page, 0);
+ } else {
+ /*
+ * The page is not busy, so don't bother
+ * decrypting it, since anything could
+ * happen to it between now and when the
+ * caller wants to access it.
+ * We should not give the caller access
+ * to this page.
+ */
+ assert(!phys_entry);
+ }
+ }
+
+ if (phys_entry) {
+ /*
+ * The physical page number will remain valid
+ * only if the page is kept busy.
+ * ENCRYPTED SWAP: make sure we don't let the
+ * caller access an encrypted page.
+ */
+ assert(dst_page->busy);
+ assert(!dst_page->encrypted);
+ *phys_entry = dst_page->phys_page;
+ }
+
+ break;
+ }
+
+ vm_object_unlock(object);
+ return KERN_SUCCESS;
+
+}
+
+/*
+ * vm_object_range_op offers performance enhancement over
+ * vm_object_page_op for page_op functions which do not require page
+ * level state to be returned from the call. Page_op was created to provide
+ * a low-cost alternative to page manipulation via UPLs when only a single
+ * page was involved. The range_op call establishes the ability in the _op
+ * family of functions to work on multiple pages where the lack of page level
+ * state handling allows the caller to avoid the overhead of the upl structures.
+ */
+
+kern_return_t
+vm_object_range_op(
+ vm_object_t object,
+ vm_object_offset_t offset_beg,
+ vm_object_offset_t offset_end,
+ int ops,
+ int *range)
+{
+ vm_object_offset_t offset;
+ vm_page_t dst_page;
+
+ if (object->resident_page_count == 0) {
+ if (range) {
+ if (ops & UPL_ROP_PRESENT)
+ *range = 0;
+ else
+ *range = offset_end - offset_beg;
+ }
+ return KERN_SUCCESS;
+ }
+ vm_object_lock(object);
+
+ if (object->phys_contiguous) {
+ vm_object_unlock(object);
+ return KERN_INVALID_OBJECT;
+ }
+
+ offset = offset_beg & ~PAGE_MASK_64;
+
+ while (offset < offset_end) {
+ dst_page = vm_page_lookup(object, offset);
+ if (dst_page != VM_PAGE_NULL) {
+ if (ops & UPL_ROP_DUMP) {
+ if (dst_page->busy || dst_page->cleaning) {
+ /*
+ * someone else is playing with the
+ * page, we will have to wait
+ */
+ PAGE_SLEEP(object, dst_page, THREAD_UNINT);
+ /*
+ * need to relook the page up since it's
+ * state may have changed while we slept
+ * it might even belong to a different object
+ * at this point
+ */
+ continue;
+ }
+ if (dst_page->pmapped == TRUE)
+ pmap_disconnect(dst_page->phys_page);
+
+ vm_page_lock_queues();
+ vm_page_free(dst_page);
+ vm_page_unlock_queues();
+
+ } else if (ops & UPL_ROP_ABSENT)
+ break;
+ } else if (ops & UPL_ROP_PRESENT)
+ break;
+
+ offset += PAGE_SIZE;
+ }
+ vm_object_unlock(object);
+
+ if (range) {
+ if (offset > offset_end)
+ offset = offset_end;
+ *range = offset - offset_beg;
+ }
+ return KERN_SUCCESS;
+}
+
+
+uint32_t scan_object_collision = 0;
+
+void
+vm_object_lock(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ mutex_pause(2);
+ }
+ lck_rw_lock_exclusive(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_try(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ mutex_pause(2);
+ }
+ return (lck_rw_try_lock_exclusive(&object->Lock));
+}
+
+void
+vm_object_lock_shared(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ mutex_pause(2);
+ }
+ lck_rw_lock_shared(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_try_shared(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ mutex_pause(2);
+ }
+ return (lck_rw_try_lock_shared(&object->Lock));
+}