+
+/*
+ * vm_object_transpose
+ *
+ * This routine takes two VM objects of the same size and exchanges
+ * their backing store.
+ * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
+ * and UPL_BLOCK_ACCESS if they are referenced anywhere.
+ *
+ * The VM objects must not be locked by caller.
+ */
+unsigned int vm_object_transpose_count = 0;
+kern_return_t
+vm_object_transpose(
+ vm_object_t object1,
+ vm_object_t object2,
+ vm_object_size_t transpose_size)
+{
+ vm_object_t tmp_object;
+ kern_return_t retval;
+ boolean_t object1_locked, object2_locked;
+ vm_page_t page;
+ vm_object_offset_t page_offset;
+ lck_mtx_t *hash_lck;
+ vm_object_hash_entry_t hash_entry;
+
+ tmp_object = VM_OBJECT_NULL;
+ object1_locked = FALSE; object2_locked = FALSE;
+
+ if (object1 == object2 ||
+ object1 == VM_OBJECT_NULL ||
+ object2 == VM_OBJECT_NULL) {
+ /*
+ * If the 2 VM objects are the same, there's
+ * no point in exchanging their backing store.
+ */
+ retval = KERN_INVALID_VALUE;
+ goto done;
+ }
+
+ /*
+ * Since we need to lock both objects at the same time,
+ * make sure we always lock them in the same order to
+ * avoid deadlocks.
+ */
+ if (object1 > object2) {
+ tmp_object = object1;
+ object1 = object2;
+ object2 = tmp_object;
+ }
+
+ /*
+ * Allocate a temporary VM object to hold object1's contents
+ * while we copy object2 to object1.
+ */
+ tmp_object = vm_object_allocate(transpose_size);
+ vm_object_lock(tmp_object);
+ tmp_object->can_persist = FALSE;
+
+
+ /*
+ * Grab control of the 1st VM object.
+ */
+ vm_object_lock(object1);
+ object1_locked = TRUE;
+ if (!object1->alive || object1->terminating ||
+ object1->copy || object1->shadow || object1->shadowed ||
+ object1->purgable != VM_PURGABLE_DENY) {
+ /*
+ * We don't deal with copy or shadow objects (yet).
+ */
+ retval = KERN_INVALID_VALUE;
+ goto done;
+ }
+ /*
+ * We're about to mess with the object's backing store and
+ * taking a "paging_in_progress" reference wouldn't be enough
+ * to prevent any paging activity on this object, so the caller should
+ * have "quiesced" the objects beforehand, via a UPL operation with
+ * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
+ * and UPL_BLOCK_ACCESS (to mark the pages "busy").
+ *
+ * Wait for any paging operation to complete (but only paging, not
+ * other kind of activities not linked to the pager). After we're
+ * statisfied that there's no more paging in progress, we keep the
+ * object locked, to guarantee that no one tries to access its pager.
+ */
+ vm_object_paging_only_wait(object1, THREAD_UNINT);
+
+ /*
+ * Same as above for the 2nd object...
+ */
+ vm_object_lock(object2);
+ object2_locked = TRUE;
+ if (! object2->alive || object2->terminating ||
+ object2->copy || object2->shadow || object2->shadowed ||
+ object2->purgable != VM_PURGABLE_DENY) {
+ retval = KERN_INVALID_VALUE;
+ goto done;
+ }
+ vm_object_paging_only_wait(object2, THREAD_UNINT);
+
+
+ if (object1->size != object2->size ||
+ object1->size != transpose_size) {
+ /*
+ * If the 2 objects don't have the same size, we can't
+ * exchange their backing stores or one would overflow.
+ * If their size doesn't match the caller's
+ * "transpose_size", we can't do it either because the
+ * transpose operation will affect the entire span of
+ * the objects.
+ */
+ retval = KERN_INVALID_VALUE;
+ goto done;
+ }
+
+
+ /*
+ * Transpose the lists of resident pages.
+ * This also updates the resident_page_count and the memq_hint.
+ */
+ if (object1->phys_contiguous || queue_empty(&object1->memq)) {
+ /*
+ * No pages in object1, just transfer pages
+ * from object2 to object1. No need to go through
+ * an intermediate object.
+ */
+ while (!queue_empty(&object2->memq)) {
+ page = (vm_page_t) queue_first(&object2->memq);
+ vm_page_rename(page, object1, page->offset, FALSE);
+ }
+ assert(queue_empty(&object2->memq));
+ } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
+ /*
+ * No pages in object2, just transfer pages
+ * from object1 to object2. No need to go through
+ * an intermediate object.
+ */
+ while (!queue_empty(&object1->memq)) {
+ page = (vm_page_t) queue_first(&object1->memq);
+ vm_page_rename(page, object2, page->offset, FALSE);
+ }
+ assert(queue_empty(&object1->memq));
+ } else {
+ /* transfer object1's pages to tmp_object */
+ while (!queue_empty(&object1->memq)) {
+ page = (vm_page_t) queue_first(&object1->memq);
+ page_offset = page->offset;
+ vm_page_remove(page, TRUE);
+ page->offset = page_offset;
+ queue_enter(&tmp_object->memq, page, vm_page_t, listq);
+ }
+ assert(queue_empty(&object1->memq));
+ /* transfer object2's pages to object1 */
+ while (!queue_empty(&object2->memq)) {
+ page = (vm_page_t) queue_first(&object2->memq);
+ vm_page_rename(page, object1, page->offset, FALSE);
+ }
+ assert(queue_empty(&object2->memq));
+ /* transfer tmp_object's pages to object1 */
+ while (!queue_empty(&tmp_object->memq)) {
+ page = (vm_page_t) queue_first(&tmp_object->memq);
+ queue_remove(&tmp_object->memq, page,
+ vm_page_t, listq);
+ vm_page_insert(page, object2, page->offset);
+ }
+ assert(queue_empty(&tmp_object->memq));
+ }
+
+#define __TRANSPOSE_FIELD(field) \
+MACRO_BEGIN \
+ tmp_object->field = object1->field; \
+ object1->field = object2->field; \
+ object2->field = tmp_object->field; \
+MACRO_END
+
+ /* "Lock" refers to the object not its contents */
+ /* "size" should be identical */
+ assert(object1->size == object2->size);
+ /* "memq_hint" was updated above when transposing pages */
+ /* "ref_count" refers to the object not its contents */
+#if TASK_SWAPPER
+ /* "res_count" refers to the object not its contents */
+#endif
+ /* "resident_page_count" was updated above when transposing pages */
+ /* "wired_page_count" was updated above when transposing pages */
+ /* "reusable_page_count" was updated above when transposing pages */
+ /* there should be no "copy" */
+ assert(!object1->copy);
+ assert(!object2->copy);
+ /* there should be no "shadow" */
+ assert(!object1->shadow);
+ assert(!object2->shadow);
+ __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
+ __TRANSPOSE_FIELD(pager);
+ __TRANSPOSE_FIELD(paging_offset);
+ __TRANSPOSE_FIELD(pager_control);
+ /* update the memory_objects' pointers back to the VM objects */
+ if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
+ memory_object_control_collapse(object1->pager_control,
+ object1);
+ }
+ if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
+ memory_object_control_collapse(object2->pager_control,
+ object2);
+ }
+ __TRANSPOSE_FIELD(copy_strategy);
+ /* "paging_in_progress" refers to the object not its contents */
+ assert(!object1->paging_in_progress);
+ assert(!object2->paging_in_progress);
+ assert(object1->activity_in_progress);
+ assert(object2->activity_in_progress);
+ /* "all_wanted" refers to the object not its contents */
+ __TRANSPOSE_FIELD(pager_created);
+ __TRANSPOSE_FIELD(pager_initialized);
+ __TRANSPOSE_FIELD(pager_ready);
+ __TRANSPOSE_FIELD(pager_trusted);
+ __TRANSPOSE_FIELD(can_persist);
+ __TRANSPOSE_FIELD(internal);
+ __TRANSPOSE_FIELD(temporary);
+ __TRANSPOSE_FIELD(private);
+ __TRANSPOSE_FIELD(pageout);
+ /* "alive" should be set */
+ assert(object1->alive);
+ assert(object2->alive);
+ /* "purgeable" should be non-purgeable */
+ assert(object1->purgable == VM_PURGABLE_DENY);
+ assert(object2->purgable == VM_PURGABLE_DENY);
+ /* "shadowed" refers to the the object not its contents */
+ __TRANSPOSE_FIELD(silent_overwrite);
+ __TRANSPOSE_FIELD(advisory_pageout);
+ __TRANSPOSE_FIELD(true_share);
+ /* "terminating" should not be set */
+ assert(!object1->terminating);
+ assert(!object2->terminating);
+ __TRANSPOSE_FIELD(named);
+ /* "shadow_severed" refers to the object not its contents */
+ __TRANSPOSE_FIELD(phys_contiguous);
+ __TRANSPOSE_FIELD(nophyscache);
+ /* "cached_list.next" points to transposed object */
+ object1->cached_list.next = (queue_entry_t) object2;
+ object2->cached_list.next = (queue_entry_t) object1;
+ /* "cached_list.prev" should be NULL */
+ assert(object1->cached_list.prev == NULL);
+ assert(object2->cached_list.prev == NULL);
+ /* "msr_q" is linked to the object not its contents */
+ assert(queue_empty(&object1->msr_q));
+ assert(queue_empty(&object2->msr_q));
+ __TRANSPOSE_FIELD(last_alloc);
+ __TRANSPOSE_FIELD(sequential);
+ __TRANSPOSE_FIELD(pages_created);
+ __TRANSPOSE_FIELD(pages_used);
+#if MACH_PAGEMAP
+ __TRANSPOSE_FIELD(existence_map);
+#endif
+ __TRANSPOSE_FIELD(cow_hint);
+#if MACH_ASSERT
+ __TRANSPOSE_FIELD(paging_object);
+#endif
+ __TRANSPOSE_FIELD(wimg_bits);
+ __TRANSPOSE_FIELD(code_signed);
+ if (object1->hashed) {
+ hash_lck = vm_object_hash_lock_spin(object2->pager);
+ hash_entry = vm_object_hash_lookup(object2->pager, FALSE);
+ assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
+ hash_entry->object = object2;
+ vm_object_hash_unlock(hash_lck);
+ }
+ if (object2->hashed) {
+ hash_lck = vm_object_hash_lock_spin(object1->pager);
+ hash_entry = vm_object_hash_lookup(object1->pager, FALSE);
+ assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
+ hash_entry->object = object1;
+ vm_object_hash_unlock(hash_lck);
+ }
+ __TRANSPOSE_FIELD(hashed);
+ object1->transposed = TRUE;
+ object2->transposed = TRUE;
+ __TRANSPOSE_FIELD(mapping_in_progress);
+ __TRANSPOSE_FIELD(volatile_empty);
+ __TRANSPOSE_FIELD(volatile_fault);
+ __TRANSPOSE_FIELD(all_reusable);
+ assert(object1->blocked_access);
+ assert(object2->blocked_access);
+ assert(object1->__object2_unused_bits == 0);
+ assert(object2->__object2_unused_bits == 0);
+#if UPL_DEBUG
+ /* "uplq" refers to the object not its contents (see upl_transpose()) */
+#endif
+ assert(object1->objq.next == NULL);
+ assert(object1->objq.prev == NULL);
+ assert(object2->objq.next == NULL);
+ assert(object2->objq.prev == NULL);
+
+#undef __TRANSPOSE_FIELD
+
+ retval = KERN_SUCCESS;
+
+done:
+ /*
+ * Cleanup.
+ */
+ if (tmp_object != VM_OBJECT_NULL) {
+ vm_object_unlock(tmp_object);
+ /*
+ * Re-initialize the temporary object to avoid
+ * deallocating a real pager.
+ */
+ _vm_object_allocate(transpose_size, tmp_object);
+ vm_object_deallocate(tmp_object);
+ tmp_object = VM_OBJECT_NULL;
+ }
+
+ if (object1_locked) {
+ vm_object_unlock(object1);
+ object1_locked = FALSE;
+ }
+ if (object2_locked) {
+ vm_object_unlock(object2);
+ object2_locked = FALSE;
+ }
+
+ vm_object_transpose_count++;
+
+ return retval;
+}
+
+
+/*
+ * vm_object_cluster_size
+ *
+ * Determine how big a cluster we should issue an I/O for...
+ *
+ * Inputs: *start == offset of page needed
+ * *length == maximum cluster pager can handle
+ * Outputs: *start == beginning offset of cluster
+ * *length == length of cluster to try
+ *
+ * The original *start will be encompassed by the cluster
+ *
+ */
+extern int speculative_reads_disabled;
+#if CONFIG_EMBEDDED
+unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
+unsigned int preheat_pages_min = 8;
+unsigned int preheat_pages_mult = 4;
+#else
+unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
+unsigned int preheat_pages_min = 8;
+unsigned int preheat_pages_mult = 4;
+#endif
+
+uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1];
+uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1];
+
+
+__private_extern__ void
+vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
+ vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming)
+{
+ vm_size_t pre_heat_size;
+ vm_size_t tail_size;
+ vm_size_t head_size;
+ vm_size_t max_length;
+ vm_size_t cluster_size;
+ vm_object_offset_t object_size;
+ vm_object_offset_t orig_start;
+ vm_object_offset_t target_start;
+ vm_object_offset_t offset;
+ vm_behavior_t behavior;
+ boolean_t look_behind = TRUE;
+ boolean_t look_ahead = TRUE;
+ uint32_t throttle_limit;
+ int sequential_run;
+ int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+ unsigned int max_ph_size;
+ unsigned int min_ph_size;
+ unsigned int ph_mult;
+
+ assert( !(*length & PAGE_MASK));
+ assert( !(*start & PAGE_MASK_64));
+
+ if ( (ph_mult = preheat_pages_mult) < 1 )
+ ph_mult = 1;
+ if ( (min_ph_size = preheat_pages_min) < 1 )
+ min_ph_size = 1;
+ if ( (max_ph_size = preheat_pages_max) > MAX_UPL_TRANSFER )
+ max_ph_size = MAX_UPL_TRANSFER;
+
+ if ( (max_length = *length) > (max_ph_size * PAGE_SIZE) )
+ max_length = (max_ph_size * PAGE_SIZE);
+
+ /*
+ * we'll always return a cluster size of at least
+ * 1 page, since the original fault must always
+ * be processed
+ */
+ *length = PAGE_SIZE;
+ *io_streaming = 0;
+
+ if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
+ /*
+ * no cluster... just fault the page in
+ */
+ return;
+ }
+ orig_start = *start;
+ target_start = orig_start;
+ cluster_size = round_page(fault_info->cluster_size);
+ behavior = fault_info->behavior;
+
+ vm_object_lock(object);
+
+ if (object->internal)
+ object_size = object->size;
+ else if (object->pager != MEMORY_OBJECT_NULL)
+ vnode_pager_get_object_size(object->pager, &object_size);
+ else
+ goto out; /* pager is gone for this object, nothing more to do */
+
+ object_size = round_page_64(object_size);
+
+ if (orig_start >= object_size) {
+ /*
+ * fault occurred beyond the EOF...
+ * we need to punt w/o changing the
+ * starting offset
+ */
+ goto out;
+ }
+ if (object->pages_used > object->pages_created) {
+ /*
+ * must have wrapped our 32 bit counters
+ * so reset
+ */
+ object->pages_used = object->pages_created = 0;
+ }
+ if ((sequential_run = object->sequential)) {
+ if (sequential_run < 0) {
+ sequential_behavior = VM_BEHAVIOR_RSEQNTL;
+ sequential_run = 0 - sequential_run;
+ } else {
+ sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+ }
+
+ }
+ switch(behavior) {
+
+ default:
+ behavior = VM_BEHAVIOR_DEFAULT;
+
+ case VM_BEHAVIOR_DEFAULT:
+ if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
+ goto out;
+
+ if (sequential_run >= (3 * PAGE_SIZE)) {
+ pre_heat_size = sequential_run + PAGE_SIZE;
+
+ if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL)
+ look_behind = FALSE;
+ else
+ look_ahead = FALSE;
+
+ *io_streaming = 1;
+ } else {
+
+ if (object->pages_created < 32 * ph_mult) {
+ /*
+ * prime the pump
+ */
+ pre_heat_size = PAGE_SIZE * 8 * ph_mult;
+ break;
+ }
+ /*
+ * Linear growth in PH size: The maximum size is max_length...
+ * this cacluation will result in a size that is neither a
+ * power of 2 nor a multiple of PAGE_SIZE... so round
+ * it up to the nearest PAGE_SIZE boundary
+ */
+ pre_heat_size = (ph_mult * (max_length * object->pages_used) / object->pages_created);
+
+ if (pre_heat_size < PAGE_SIZE * min_ph_size)
+ pre_heat_size = PAGE_SIZE * min_ph_size;
+ else
+ pre_heat_size = round_page(pre_heat_size);
+ }
+ break;
+
+ case VM_BEHAVIOR_RANDOM:
+ if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
+ goto out;
+ break;
+
+ case VM_BEHAVIOR_SEQUENTIAL:
+ if ((pre_heat_size = cluster_size) == 0)
+ pre_heat_size = sequential_run + PAGE_SIZE;
+ look_behind = FALSE;
+ *io_streaming = 1;
+
+ break;
+
+ case VM_BEHAVIOR_RSEQNTL:
+ if ((pre_heat_size = cluster_size) == 0)
+ pre_heat_size = sequential_run + PAGE_SIZE;
+ look_ahead = FALSE;
+ *io_streaming = 1;
+
+ break;
+
+ }
+ throttle_limit = (uint32_t) max_length;
+ assert(throttle_limit == max_length);
+
+ if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) {
+ if (max_length > throttle_limit)
+ max_length = throttle_limit;
+ }
+ if (pre_heat_size > max_length)
+ pre_heat_size = max_length;
+
+ if (behavior == VM_BEHAVIOR_DEFAULT) {
+ if (vm_page_free_count < vm_page_throttle_limit)
+ pre_heat_size = trunc_page(pre_heat_size / 8);
+ else if (vm_page_free_count < vm_page_free_target)
+ pre_heat_size = trunc_page(pre_heat_size / 2);
+
+ if (pre_heat_size <= PAGE_SIZE)
+ goto out;
+ }
+ if (look_ahead == TRUE) {
+ if (look_behind == TRUE) {
+ /*
+ * if we get here its due to a random access...
+ * so we want to center the original fault address
+ * within the cluster we will issue... make sure
+ * to calculate 'head_size' as a multiple of PAGE_SIZE...
+ * 'pre_heat_size' is a multiple of PAGE_SIZE but not
+ * necessarily an even number of pages so we need to truncate
+ * the result to a PAGE_SIZE boundary
+ */
+ head_size = trunc_page(pre_heat_size / 2);
+
+ if (target_start > head_size)
+ target_start -= head_size;
+ else
+ target_start = 0;
+
+ /*
+ * 'target_start' at this point represents the beginning offset
+ * of the cluster we are considering... 'orig_start' will be in
+ * the center of this cluster if we didn't have to clip the start
+ * due to running into the start of the file
+ */
+ }
+ if ((target_start + pre_heat_size) > object_size)
+ pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start));
+ /*
+ * at this point caclulate the number of pages beyond the original fault
+ * address that we want to consider... this is guaranteed not to extend beyond
+ * the current EOF...
+ */
+ assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start));
+ tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE;
+ } else {
+ if (pre_heat_size > target_start)
+ pre_heat_size = (vm_size_t) target_start; /* XXX: 32-bit vs 64-bit ? Joe ? */
+ tail_size = 0;
+ }
+ assert( !(target_start & PAGE_MASK_64));
+ assert( !(pre_heat_size & PAGE_MASK));
+
+ pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
+
+ if (pre_heat_size <= PAGE_SIZE)
+ goto out;
+
+ if (look_behind == TRUE) {
+ /*
+ * take a look at the pages before the original
+ * faulting offset... recalculate this in case
+ * we had to clip 'pre_heat_size' above to keep
+ * from running past the EOF.
+ */
+ head_size = pre_heat_size - tail_size - PAGE_SIZE;
+
+ for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
+ /*
+ * don't poke below the lowest offset
+ */
+ if (offset < fault_info->lo_offset)
+ break;
+ /*
+ * for external objects and internal objects w/o an existence map
+ * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
+ */
+#if MACH_PAGEMAP
+ if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
+ /*
+ * we know for a fact that the pager can't provide the page
+ * so don't include it or any pages beyond it in this cluster
+ */
+ break;
+ }
+#endif
+ if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
+ /*
+ * don't bridge resident pages
+ */
+ break;
+ }
+ *start = offset;
+ *length += PAGE_SIZE;
+ }
+ }
+ if (look_ahead == TRUE) {
+ for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
+ /*
+ * don't poke above the highest offset
+ */
+ if (offset >= fault_info->hi_offset)
+ break;
+ assert(offset < object_size);
+
+ /*
+ * for external objects and internal objects w/o an existence map
+ * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
+ */
+#if MACH_PAGEMAP
+ if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
+ /*
+ * we know for a fact that the pager can't provide the page
+ * so don't include it or any pages beyond it in this cluster
+ */
+ break;
+ }
+#endif
+ if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
+ /*
+ * don't bridge resident pages
+ */
+ break;
+ }
+ *length += PAGE_SIZE;
+ }
+ }
+out:
+ if (*length > max_length)
+ *length = max_length;
+
+ pre_heat_cluster[*length / PAGE_SIZE]++;
+
+ vm_object_unlock(object);
+}
+
+
+/*
+ * Allow manipulation of individual page state. This is actually part of
+ * the UPL regimen but takes place on the VM object rather than on a UPL
+ */
+
+kern_return_t
+vm_object_page_op(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ int ops,
+ ppnum_t *phys_entry,
+ int *flags)
+{
+ vm_page_t dst_page;
+
+ vm_object_lock(object);
+
+ if(ops & UPL_POP_PHYSICAL) {
+ if(object->phys_contiguous) {
+ if (phys_entry) {
+ *phys_entry = (ppnum_t)
+ (object->shadow_offset >> PAGE_SHIFT);
+ }
+ vm_object_unlock(object);
+ return KERN_SUCCESS;
+ } else {
+ vm_object_unlock(object);
+ return KERN_INVALID_OBJECT;
+ }
+ }
+ if(object->phys_contiguous) {
+ vm_object_unlock(object);
+ return KERN_INVALID_OBJECT;
+ }
+
+ while(TRUE) {
+ if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
+ vm_object_unlock(object);
+ return KERN_FAILURE;
+ }
+
+ /* Sync up on getting the busy bit */
+ if((dst_page->busy || dst_page->cleaning) &&
+ (((ops & UPL_POP_SET) &&
+ (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
+ /* someone else is playing with the page, we will */
+ /* have to wait */
+ PAGE_SLEEP(object, dst_page, THREAD_UNINT);
+ continue;
+ }
+
+ if (ops & UPL_POP_DUMP) {
+ if (dst_page->pmapped == TRUE)
+ pmap_disconnect(dst_page->phys_page);
+
+ VM_PAGE_FREE(dst_page);
+ break;
+ }
+
+ if (flags) {
+ *flags = 0;
+
+ /* Get the condition of flags before requested ops */
+ /* are undertaken */
+
+ if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
+ if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
+ if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
+ if(dst_page->absent) *flags |= UPL_POP_ABSENT;
+ if(dst_page->busy) *flags |= UPL_POP_BUSY;
+ }
+
+ /* The caller should have made a call either contingent with */
+ /* or prior to this call to set UPL_POP_BUSY */
+ if(ops & UPL_POP_SET) {
+ /* The protection granted with this assert will */
+ /* not be complete. If the caller violates the */
+ /* convention and attempts to change page state */
+ /* without first setting busy we may not see it */
+ /* because the page may already be busy. However */
+ /* if such violations occur we will assert sooner */
+ /* or later. */
+ assert(dst_page->busy || (ops & UPL_POP_BUSY));
+ if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
+ if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
+ if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
+ if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
+ if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
+ }
+
+ if(ops & UPL_POP_CLR) {
+ assert(dst_page->busy);
+ if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
+ if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
+ if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
+ if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
+ if (ops & UPL_POP_BUSY) {
+ dst_page->busy = FALSE;
+ PAGE_WAKEUP(dst_page);
+ }
+ }
+
+ if (dst_page->encrypted) {
+ /*
+ * ENCRYPTED SWAP:
+ * We need to decrypt this encrypted page before the
+ * caller can access its contents.
+ * But if the caller really wants to access the page's
+ * contents, they have to keep the page "busy".
+ * Otherwise, the page could get recycled or re-encrypted
+ * at any time.
+ */
+ if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
+ dst_page->busy) {
+ /*
+ * The page is stable enough to be accessed by
+ * the caller, so make sure its contents are
+ * not encrypted.
+ */
+ vm_page_decrypt(dst_page, 0);
+ } else {
+ /*
+ * The page is not busy, so don't bother
+ * decrypting it, since anything could
+ * happen to it between now and when the
+ * caller wants to access it.
+ * We should not give the caller access
+ * to this page.
+ */
+ assert(!phys_entry);
+ }
+ }
+
+ if (phys_entry) {
+ /*
+ * The physical page number will remain valid
+ * only if the page is kept busy.
+ * ENCRYPTED SWAP: make sure we don't let the
+ * caller access an encrypted page.
+ */
+ assert(dst_page->busy);
+ assert(!dst_page->encrypted);
+ *phys_entry = dst_page->phys_page;
+ }
+
+ break;
+ }
+
+ vm_object_unlock(object);
+ return KERN_SUCCESS;
+
+}
+
+/*
+ * vm_object_range_op offers performance enhancement over
+ * vm_object_page_op for page_op functions which do not require page
+ * level state to be returned from the call. Page_op was created to provide
+ * a low-cost alternative to page manipulation via UPLs when only a single
+ * page was involved. The range_op call establishes the ability in the _op
+ * family of functions to work on multiple pages where the lack of page level
+ * state handling allows the caller to avoid the overhead of the upl structures.
+ */
+
+kern_return_t
+vm_object_range_op(
+ vm_object_t object,
+ vm_object_offset_t offset_beg,
+ vm_object_offset_t offset_end,
+ int ops,
+ uint32_t *range)
+{
+ vm_object_offset_t offset;
+ vm_page_t dst_page;
+
+ if (offset_end - offset_beg > (uint32_t) -1) {
+ /* range is too big and would overflow "*range" */
+ return KERN_INVALID_ARGUMENT;
+ }
+ if (object->resident_page_count == 0) {
+ if (range) {
+ if (ops & UPL_ROP_PRESENT) {
+ *range = 0;
+ } else {
+ *range = (uint32_t) (offset_end - offset_beg);
+ assert(*range == (offset_end - offset_beg));
+ }
+ }
+ return KERN_SUCCESS;
+ }
+ vm_object_lock(object);
+
+ if (object->phys_contiguous) {
+ vm_object_unlock(object);
+ return KERN_INVALID_OBJECT;
+ }
+
+ offset = offset_beg & ~PAGE_MASK_64;
+
+ while (offset < offset_end) {
+ dst_page = vm_page_lookup(object, offset);
+ if (dst_page != VM_PAGE_NULL) {
+ if (ops & UPL_ROP_DUMP) {
+ if (dst_page->busy || dst_page->cleaning) {
+ /*
+ * someone else is playing with the
+ * page, we will have to wait
+ */
+ PAGE_SLEEP(object, dst_page, THREAD_UNINT);
+ /*
+ * need to relook the page up since it's
+ * state may have changed while we slept
+ * it might even belong to a different object
+ * at this point
+ */
+ continue;
+ }
+ if (dst_page->pmapped == TRUE)
+ pmap_disconnect(dst_page->phys_page);
+
+ VM_PAGE_FREE(dst_page);
+
+ } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent)
+ break;
+ } else if (ops & UPL_ROP_PRESENT)
+ break;
+
+ offset += PAGE_SIZE;
+ }
+ vm_object_unlock(object);
+
+ if (range) {
+ if (offset > offset_end)
+ offset = offset_end;
+ if(offset > offset_beg) {
+ *range = (uint32_t) (offset - offset_beg);
+ assert(*range == (offset - offset_beg));
+ } else {
+ *range = 0;
+ }
+ }
+ return KERN_SUCCESS;
+}
+
+
+uint32_t scan_object_collision = 0;
+
+void
+vm_object_lock(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ mutex_pause(2);
+ }
+ lck_rw_lock_exclusive(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_avoid(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+boolean_t
+_vm_object_lock_try(vm_object_t object)
+{
+ return (lck_rw_try_lock_exclusive(&object->Lock));
+}
+
+boolean_t
+vm_object_lock_try(vm_object_t object)
+{
+ // called from hibernate path so check before blocking
+ if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled()) {
+ mutex_pause(2);
+ }
+ return _vm_object_lock_try(object);
+}
+void
+vm_object_lock_shared(vm_object_t object)
+{
+ if (vm_object_lock_avoid(object)) {
+ mutex_pause(2);
+ }
+ lck_rw_lock_shared(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_try_shared(vm_object_t object)
+{
+ if (vm_object_lock_avoid(object)) {
+ mutex_pause(2);
+ }
+ return (lck_rw_try_lock_shared(&object->Lock));
+}