+
+/*
+ * Used to point a pager directly to a range of memory (when the pager may be associated
+ * with a non-device vnode). Takes a virtual address, an offset, and a size. We currently
+ * expect that the virtual address will denote the start of a range that is physically contiguous.
+ */
+kern_return_t pager_map_to_phys_contiguous(
+ memory_object_control_t object,
+ memory_object_offset_t offset,
+ addr64_t base_vaddr,
+ vm_size_t size)
+{
+ ppnum_t page_num;
+ boolean_t clobbered_private;
+ kern_return_t retval;
+ vm_object_t pager_object;
+
+ page_num = pmap_find_phys(kernel_pmap, base_vaddr);
+
+ if (!page_num) {
+ retval = KERN_FAILURE;
+ goto out;
+ }
+
+ pager_object = memory_object_control_to_vm_object(object);
+
+ if (!pager_object) {
+ retval = KERN_FAILURE;
+ goto out;
+ }
+
+ clobbered_private = pager_object->private;
+ pager_object->private = TRUE;
+ retval = vm_object_populate_with_private(pager_object, offset, page_num, size);
+
+ if (retval != KERN_SUCCESS)
+ pager_object->private = clobbered_private;
+
+out:
+ return retval;
+}
+
+uint32_t scan_object_collision = 0;
+
+void
+vm_object_lock(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ mutex_pause(2);
+ }
+ lck_rw_lock_exclusive(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_avoid(vm_object_t object)
+{
+ if (object == vm_pageout_scan_wants_object) {
+ scan_object_collision++;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+boolean_t
+_vm_object_lock_try(vm_object_t object)
+{
+ return (lck_rw_try_lock_exclusive(&object->Lock));
+}
+
+boolean_t
+vm_object_lock_try(vm_object_t object)
+{
+ /*
+ * Called from hibernate path so check before blocking.
+ */
+ if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) {
+ mutex_pause(2);
+ }
+ return _vm_object_lock_try(object);
+}
+
+void
+vm_object_lock_shared(vm_object_t object)
+{
+ if (vm_object_lock_avoid(object)) {
+ mutex_pause(2);
+ }
+ lck_rw_lock_shared(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_try_shared(vm_object_t object)
+{
+ if (vm_object_lock_avoid(object)) {
+ mutex_pause(2);
+ }
+ return (lck_rw_try_lock_shared(&object->Lock));
+}
+
+
+unsigned int vm_object_change_wimg_mode_count = 0;
+
+/*
+ * The object must be locked
+ */
+void
+vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode)
+{
+ vm_page_t p;
+
+ vm_object_lock_assert_exclusive(object);
+
+ vm_object_paging_wait(object, THREAD_UNINT);
+
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+
+ if (!p->fictitious)
+ pmap_set_cache_attributes(p->phys_page, wimg_mode);
+ }
+ if (wimg_mode == VM_WIMG_USE_DEFAULT)
+ object->set_cache_attr = FALSE;
+ else
+ object->set_cache_attr = TRUE;
+
+ object->wimg_bits = wimg_mode;
+
+ vm_object_change_wimg_mode_count++;
+}
+
+#if CONFIG_FREEZE
+
+kern_return_t vm_object_pack(
+ unsigned int *purgeable_count,
+ unsigned int *wired_count,
+ unsigned int *clean_count,
+ unsigned int *dirty_count,
+ unsigned int dirty_budget,
+ boolean_t *shared,
+ vm_object_t src_object,
+ struct default_freezer_handle *df_handle)
+{
+ kern_return_t kr = KERN_SUCCESS;
+
+ vm_object_lock(src_object);
+
+ *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
+ *shared = FALSE;
+
+ if (!src_object->alive || src_object->terminating){
+ kr = KERN_FAILURE;
+ goto done;
+ }
+
+ if (src_object->purgable == VM_PURGABLE_VOLATILE) {
+ *purgeable_count = src_object->resident_page_count;
+
+ /* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */
+ if (df_handle != NULL) {
+ purgeable_q_t queue;
+ /* object should be on a queue */
+ assert(src_object->objq.next != NULL &&
+ src_object->objq.prev != NULL);
+
+ queue = vm_purgeable_object_remove(src_object);
+ assert(queue);
+ if (src_object->purgeable_when_ripe) {
+ vm_page_lock_queues();
+ vm_purgeable_token_delete_first(queue);
+ vm_page_unlock_queues();
+ }
+
+ vm_object_purge(src_object, 0);
+ assert(src_object->purgable == VM_PURGABLE_EMPTY);
+
+ /*
+ * This object was "volatile" so its pages must have
+ * already been accounted as "volatile": no change
+ * in accounting now that it's "empty".
+ */
+ }
+ goto done;
+ }
+
+ if (src_object->ref_count == 1) {
+ vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle);
+ } else {
+ if (src_object->internal) {
+ *shared = TRUE;
+ }
+ }
+done:
+ vm_object_unlock(src_object);
+
+ return kr;
+}
+
+
+void
+vm_object_pack_pages(
+ unsigned int *wired_count,
+ unsigned int *clean_count,
+ unsigned int *dirty_count,
+ unsigned int dirty_budget,
+ vm_object_t src_object,
+ struct default_freezer_handle *df_handle)
+{
+ vm_page_t p, next;
+
+ next = (vm_page_t)queue_first(&src_object->memq);
+
+ while (!queue_end(&src_object->memq, (queue_entry_t)next)) {
+ p = next;
+ next = (vm_page_t)queue_next(&next->listq);
+
+ /* Finish up if we've hit our pageout limit */
+ if (dirty_budget && (dirty_budget == *dirty_count)) {
+ break;
+ }
+ assert(!p->laundry);
+
+ if (p->fictitious || p->busy )
+ continue;
+
+ if (p->absent || p->unusual || p->error)
+ continue;
+
+ if (VM_PAGE_WIRED(p)) {
+ (*wired_count)++;
+ continue;
+ }
+
+ if (df_handle == NULL) {
+ if (p->dirty || pmap_is_modified(p->phys_page)) {
+ (*dirty_count)++;
+ } else {
+ (*clean_count)++;
+ }
+ continue;
+ }
+
+ if (p->cleaning) {
+ p->pageout = TRUE;
+ continue;
+ }
+
+ if (p->pmapped == TRUE) {
+ int refmod_state;
+ refmod_state = pmap_disconnect(p->phys_page);
+ if (refmod_state & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(p, FALSE);
+ }
+ }
+
+ if (p->dirty) {
+ default_freezer_pack_page(p, df_handle);
+ (*dirty_count)++;
+ }
+ else {
+ VM_PAGE_FREE(p);
+ (*clean_count)++;
+ }
+ }
+}
+
+
+/*
+ * This routine does the "relocation" of previously
+ * compressed pages belonging to this object that are
+ * residing in a number of compressed segments into
+ * a set of compressed segments dedicated to hold
+ * compressed pages belonging to this object.
+ */
+
+extern void *freezer_chead;
+extern char *freezer_compressor_scratch_buf;
+extern int c_freezer_compression_count;
+extern AbsoluteTime c_freezer_last_yield_ts;
+
+#define MAX_FREE_BATCH 32
+#define FREEZER_DUTY_CYCLE_ON_MS 5
+#define FREEZER_DUTY_CYCLE_OFF_MS 5
+
+static int c_freezer_should_yield(void);
+
+
+static int
+c_freezer_should_yield()
+{
+ AbsoluteTime cur_time;
+ uint64_t nsecs;
+
+ assert(c_freezer_last_yield_ts);
+ clock_get_uptime(&cur_time);
+
+ SUB_ABSOLUTETIME(&cur_time, &c_freezer_last_yield_ts);
+ absolutetime_to_nanoseconds(cur_time, &nsecs);
+
+ if (nsecs > 1000 * 1000 * FREEZER_DUTY_CYCLE_ON_MS)
+ return (1);
+ return (0);
+}
+
+
+void
+vm_object_compressed_freezer_done()
+{
+ vm_compressor_finished_filling(&freezer_chead);
+}
+
+
+void
+vm_object_compressed_freezer_pageout(
+ vm_object_t object)
+{
+ vm_page_t p;
+ vm_page_t local_freeq = NULL;
+ int local_freed = 0;
+ kern_return_t retval = KERN_SUCCESS;
+ int obj_resident_page_count_snapshot = 0;
+
+ assert(object != VM_OBJECT_NULL);
+
+ vm_object_lock(object);
+
+ if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL) {
+
+ if (!object->pager_initialized) {
+
+ vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
+
+ if (!object->pager_initialized)
+ vm_object_compressor_pager_create(object);
+ }
+
+ if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL) {
+ vm_object_unlock(object);
+ return;
+ }
+ }
+
+ if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+ vm_object_offset_t curr_offset = 0;
+
+ /*
+ * Go through the object and make sure that any
+ * previously compressed pages are relocated into
+ * a compressed segment associated with our "freezer_chead".
+ */
+ while (curr_offset < object->vo_size) {
+
+ curr_offset = vm_compressor_pager_next_compressed(object->pager, curr_offset);
+
+ if (curr_offset == (vm_object_offset_t) -1)
+ break;
+
+ retval = vm_compressor_pager_relocate(object->pager, curr_offset, &freezer_chead);
+
+ if (retval != KERN_SUCCESS)
+ break;
+
+ curr_offset += PAGE_SIZE_64;
+ }
+ }
+
+ /*
+ * We can't hold the object lock while heading down into the compressed pager
+ * layer because we might need the kernel map lock down there to allocate new
+ * compressor data structures. And if this same object is mapped in the kernel
+ * and there's a fault on it, then that thread will want the object lock while
+ * holding the kernel map lock.
+ *
+ * Since we are going to drop/grab the object lock repeatedly, we must make sure
+ * we won't be stuck in an infinite loop if the same page(s) keep getting
+ * decompressed. So we grab a snapshot of the number of pages in the object and
+ * we won't process any more than that number of pages.
+ */
+
+ obj_resident_page_count_snapshot = object->resident_page_count;
+
+ vm_object_activity_begin(object);
+
+ while ((obj_resident_page_count_snapshot--) && !queue_empty(&object->memq)) {
+
+ p = (vm_page_t)queue_first(&object->memq);
+
+ KERNEL_DEBUG(0xe0430004 | DBG_FUNC_START, object, local_freed, 0, 0, 0);
+
+ vm_page_lockspin_queues();
+
+ if (p->cleaning || p->fictitious || p->busy || p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) {
+ if (p->cleaning)
+ p->pageout = TRUE;
+
+ vm_page_unlock_queues();
+
+ KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END, object, local_freed, 1, 0, 0);
+
+ queue_remove(&object->memq, p, vm_page_t, listq);
+ queue_enter(&object->memq, p, vm_page_t, listq);
+
+ continue;
+ }
+
+ if (p->pmapped == TRUE) {
+ int refmod_state, pmap_flags;
+
+ if (p->dirty || p->precious) {
+ pmap_flags = PMAP_OPTIONS_COMPRESSOR;
+ } else {
+ pmap_flags = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+ }
+
+ refmod_state = pmap_disconnect_options(p->phys_page, pmap_flags, NULL);
+ if (refmod_state & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(p, FALSE);
+ }
+ }
+
+ if (p->dirty == FALSE && p->precious == FALSE) {
+ /*
+ * Clean and non-precious page.
+ */
+ vm_page_unlock_queues();
+ VM_PAGE_FREE(p);
+
+ KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END, object, local_freed, 2, 0, 0);
+ continue;
+ }
+
+ if (p->laundry) {
+ p->pageout = FALSE;
+ vm_pageout_steal_laundry(p, TRUE);
+ }
+
+ vm_page_queues_remove(p);
+ vm_page_unlock_queues();
+
+
+ /*
+ * In case the compressor fails to compress this page, we need it at
+ * the back of the object memq so that we don't keep trying to process it.
+ * Make the move here while we have the object lock held.
+ */
+
+ queue_remove(&object->memq, p, vm_page_t, listq);
+ queue_enter(&object->memq, p, vm_page_t, listq);
+
+ /*
+ * Grab an activity_in_progress here for vm_pageout_compress_page() to consume.
+ *
+ * Mark the page busy so no one messes with it while we have the object lock dropped.
+ */
+
+ p->busy = TRUE;
+
+ vm_object_activity_begin(object);
+
+ vm_object_unlock(object);
+
+ /*
+ * arg3 == FALSE tells vm_pageout_compress_page that we don't hold the object lock and the pager may not be initialized.
+ */
+ if (vm_pageout_compress_page(&freezer_chead, freezer_compressor_scratch_buf, p, FALSE) == KERN_SUCCESS) {
+ /*
+ * page has already been un-tabled from the object via 'vm_page_remove'
+ */
+ p->pageq.next = (queue_entry_t)local_freeq;
+ local_freeq = p;
+ local_freed++;
+
+ if (local_freed >= MAX_FREE_BATCH) {
+
+ vm_page_free_list(local_freeq, TRUE);
+
+ local_freeq = NULL;
+ local_freed = 0;
+ }
+ c_freezer_compression_count++;
+ }
+ KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END, object, local_freed, 0, 0, 0);
+
+ if (local_freed == 0 && c_freezer_should_yield()) {
+
+ thread_yield_internal(FREEZER_DUTY_CYCLE_OFF_MS);
+ clock_get_uptime(&c_freezer_last_yield_ts);
+ }
+
+ vm_object_lock(object);
+ }
+
+ if (local_freeq) {
+ vm_page_free_list(local_freeq, TRUE);
+
+ local_freeq = NULL;
+ local_freed = 0;
+ }
+
+ vm_object_activity_end(object);
+
+ vm_object_unlock(object);
+
+ if (c_freezer_should_yield()) {
+
+ thread_yield_internal(FREEZER_DUTY_CYCLE_OFF_MS);
+ clock_get_uptime(&c_freezer_last_yield_ts);
+ }
+}
+
+kern_return_t
+vm_object_pagein(
+ vm_object_t object)
+{
+ memory_object_t pager;
+ kern_return_t kr;
+
+ vm_object_lock(object);
+
+ pager = object->pager;
+
+ if (!object->pager_ready || pager == MEMORY_OBJECT_NULL) {
+ vm_object_unlock(object);
+ return KERN_FAILURE;
+ }
+
+ vm_object_paging_wait(object, THREAD_UNINT);
+ vm_object_paging_begin(object);
+
+ object->blocked_access = TRUE;
+ vm_object_unlock(object);
+
+ kr = memory_object_data_reclaim(pager, TRUE);
+
+ vm_object_lock(object);
+
+ object->blocked_access = FALSE;
+ vm_object_paging_end(object);
+
+ vm_object_unlock(object);
+
+ return kr;
+}
+#endif /* CONFIG_FREEZE */
+
+
+void
+vm_object_pageout(
+ vm_object_t object)
+{
+ vm_page_t p, next;
+ struct vm_pageout_queue *iq;
+ boolean_t need_unlock = TRUE;
+
+ iq = &vm_pageout_queue_internal;
+
+ assert(object != VM_OBJECT_NULL );
+ assert(!DEFAULT_PAGER_IS_ACTIVE && !DEFAULT_FREEZER_IS_ACTIVE);
+
+ vm_object_lock(object);
+
+ if (!object->internal ||
+ object->terminating ||
+ !object->alive) {
+ vm_object_unlock(object);
+ return;
+ }
+
+ if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL) {
+
+ if (!object->pager_initialized) {
+
+ vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
+
+ if (!object->pager_initialized)
+ vm_object_compressor_pager_create(object);
+ }
+
+ if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL) {
+ vm_object_unlock(object);
+ return;
+ }
+ }
+
+ReScan:
+ next = (vm_page_t)queue_first(&object->memq);
+
+ while (!queue_end(&object->memq, (queue_entry_t)next)) {
+ p = next;
+ next = (vm_page_t)queue_next(&next->listq);
+
+ if (!(p->active || p->inactive || p->speculative) ||
+ p->encrypted_cleaning ||
+ p->cleaning ||
+ p->laundry ||
+ p->pageout ||
+ p->busy ||
+ p->absent ||
+ p->error ||
+ p->fictitious ||
+ VM_PAGE_WIRED(p)) {
+ /*
+ * Page is already being cleaned or can't be cleaned.
+ */
+ continue;
+ }
+
+ /* Throw to the pageout queue */
+
+ vm_page_lockspin_queues();
+ need_unlock = TRUE;
+
+ if (vm_compressor_low_on_space()) {
+ vm_page_unlock_queues();
+ break;
+ }
+
+ if (VM_PAGE_Q_THROTTLED(iq)) {
+
+ iq->pgo_draining = TRUE;
+
+ assert_wait((event_t) (&iq->pgo_laundry + 1),
+ THREAD_INTERRUPTIBLE);
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ vm_object_lock(object);
+ goto ReScan;
+ }
+
+ assert(!p->fictitious);
+ assert(!p->busy);
+ assert(!p->absent);
+ assert(!p->unusual);
+ assert(!p->error);
+ assert(!VM_PAGE_WIRED(p));
+ assert(!p->cleaning);
+
+ if (p->pmapped == TRUE) {
+ int refmod_state;
+ int pmap_options;
+
+ pmap_options = 0;
+ if (COMPRESSED_PAGER_IS_ACTIVE ||
+ DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+ /*
+ * Tell pmap the page should be accounted
+ * for as "compressed" if it's been modified.
+ */
+ pmap_options =
+ PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+ if (p->dirty || p->precious) {
+ /*
+ * We already know it's been modified,
+ * so tell pmap to account for it
+ * as "compressed".
+ */
+ pmap_options = PMAP_OPTIONS_COMPRESSOR;
+ }
+ }
+ refmod_state = pmap_disconnect_options(p->phys_page,
+ pmap_options,
+ NULL);
+ if (refmod_state & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(p, FALSE);
+ }
+ }
+
+ if (!p->dirty && !p->precious) {
+ vm_page_unlock_queues();
+ VM_PAGE_FREE(p);
+ continue;
+ }
+
+ vm_page_queues_remove(p);
+ if (vm_pageout_cluster(p, TRUE, FALSE, TRUE))
+ need_unlock = FALSE;
+
+ if (need_unlock == TRUE)
+ vm_page_unlock_queues();
+ }
+
+ vm_object_unlock(object);
+}
+
+
+#if CONFIG_IOSCHED
+void
+vm_page_request_reprioritize(vm_object_t o, uint64_t blkno, uint32_t len, int prio)
+{
+ io_reprioritize_req_t req;
+ struct vnode *devvp = NULL;
+
+ if(vnode_pager_get_object_devvp(o->pager, (uintptr_t *)&devvp) != KERN_SUCCESS)
+ return;
+
+ /*
+ * Create the request for I/O reprioritization.
+ * We use the noblock variant of zalloc because we're holding the object
+ * lock here and we could cause a deadlock in low memory conditions.
+ */
+ req = (io_reprioritize_req_t)zalloc_noblock(io_reprioritize_req_zone);
+ if (req == NULL)
+ return;
+ req->blkno = blkno;
+ req->len = len;
+ req->priority = prio;
+ req->devvp = devvp;
+
+ /* Insert request into the reprioritization list */
+ IO_REPRIORITIZE_LIST_LOCK();
+ queue_enter(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list);
+ IO_REPRIORITIZE_LIST_UNLOCK();
+
+ /* Wakeup reprioritize thread */
+ IO_REPRIO_THREAD_WAKEUP();
+
+ return;
+}
+
+void
+vm_decmp_upl_reprioritize(upl_t upl, int prio)
+{
+ int offset;
+ vm_object_t object;
+ io_reprioritize_req_t req;
+ struct vnode *devvp = NULL;
+ uint64_t blkno;
+ uint32_t len;
+ upl_t io_upl;
+ uint64_t *io_upl_reprio_info;
+ int io_upl_size;
+
+ if ((upl->flags & UPL_TRACKED_BY_OBJECT) == 0 || (upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
+ return;
+
+ /*
+ * We dont want to perform any allocations with the upl lock held since that might
+ * result in a deadlock. If the system is low on memory, the pageout thread would
+ * try to pageout stuff and might wait on this lock. If we are waiting for the memory to
+ * be freed up by the pageout thread, it would be a deadlock.
+ */
+
+
+ /* First step is just to get the size of the upl to find out how big the reprio info is */
+ if(!upl_try_lock(upl))
+ return;
+
+ if (upl->decmp_io_upl == NULL) {
+ /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */
+ upl_unlock(upl);
+ return;
+ }
+
+ io_upl = upl->decmp_io_upl;
+ assert((io_upl->flags & UPL_DECMP_REAL_IO) != 0);
+ io_upl_size = io_upl->size;
+ upl_unlock(upl);
+
+ /* Now perform the allocation */
+ io_upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * (io_upl_size / PAGE_SIZE));
+ if (io_upl_reprio_info == NULL)
+ return;
+
+ /* Now again take the lock, recheck the state and grab out the required info */
+ if(!upl_try_lock(upl))
+ goto out;
+
+ if (upl->decmp_io_upl == NULL || upl->decmp_io_upl != io_upl) {
+ /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */
+ upl_unlock(upl);
+ goto out;
+ }
+ memcpy(io_upl_reprio_info, io_upl->upl_reprio_info, sizeof(uint64_t) * (io_upl_size / PAGE_SIZE));
+
+ /* Get the VM object for this UPL */
+ if (io_upl->flags & UPL_SHADOWED) {
+ object = io_upl->map_object->shadow;
+ } else {
+ object = io_upl->map_object;
+ }
+
+ /* Get the dev vnode ptr for this object */
+ if(!object || !object->pager ||
+ vnode_pager_get_object_devvp(object->pager, (uintptr_t *)&devvp) != KERN_SUCCESS) {
+ upl_unlock(upl);
+ goto out;
+ }
+
+ upl_unlock(upl);
+
+ /* Now we have all the information needed to do the expedite */
+
+ offset = 0;
+ while (offset < io_upl_size) {
+ blkno = io_upl_reprio_info[(offset / PAGE_SIZE)] & UPL_REPRIO_INFO_MASK;
+ len = (io_upl_reprio_info[(offset / PAGE_SIZE)] >> UPL_REPRIO_INFO_SHIFT) & UPL_REPRIO_INFO_MASK;
+
+ /*
+ * This implementation may cause some spurious expedites due to the
+ * fact that we dont cleanup the blkno & len from the upl_reprio_info
+ * even after the I/O is complete.
+ */
+
+ if (blkno != 0 && len != 0) {
+ /* Create the request for I/O reprioritization */
+ req = (io_reprioritize_req_t)zalloc(io_reprioritize_req_zone);
+ assert(req != NULL);
+ req->blkno = blkno;
+ req->len = len;
+ req->priority = prio;
+ req->devvp = devvp;
+
+ /* Insert request into the reprioritization list */
+ IO_REPRIORITIZE_LIST_LOCK();
+ queue_enter(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list);
+ IO_REPRIORITIZE_LIST_UNLOCK();
+
+ offset += len;
+ } else {
+ offset += PAGE_SIZE;
+ }
+ }
+
+ /* Wakeup reprioritize thread */
+ IO_REPRIO_THREAD_WAKEUP();
+
+out:
+ kfree(io_upl_reprio_info, sizeof(uint64_t) * (io_upl_size / PAGE_SIZE));
+ return;
+}
+
+void
+vm_page_handle_prio_inversion(vm_object_t o, vm_page_t m)
+{
+ upl_t upl;
+ upl_page_info_t *pl;
+ unsigned int i, num_pages;
+ int cur_tier;
+
+ cur_tier = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
+
+ /*
+ Scan through all UPLs associated with the object to find the
+ UPL containing the contended page.
+ */
+ queue_iterate(&o->uplq, upl, upl_t, uplq) {
+ if (((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) || upl->upl_priority <= cur_tier)
+ continue;
+ pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+ num_pages = (upl->size / PAGE_SIZE);
+
+ /*
+ For each page in the UPL page list, see if it matches the contended
+ page and was issued as a low prio I/O.
+ */
+ for(i=0; i < num_pages; i++) {
+ if(UPL_PAGE_PRESENT(pl,i) && m->phys_page == pl[i].phys_addr) {
+ if ((upl->flags & UPL_DECMP_REQ) && upl->decmp_io_upl) {
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_EXPEDITE)) | DBG_FUNC_NONE, upl->upl_creator, m, upl, upl->upl_priority, 0);
+ vm_decmp_upl_reprioritize(upl, cur_tier);
+ break;
+ }
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_EXPEDITE)) | DBG_FUNC_NONE, upl->upl_creator, m, upl->upl_reprio_info[i], upl->upl_priority, 0);
+ if (UPL_REPRIO_INFO_BLKNO(upl, i) != 0 && UPL_REPRIO_INFO_LEN(upl, i) != 0)
+ vm_page_request_reprioritize(o, UPL_REPRIO_INFO_BLKNO(upl, i), UPL_REPRIO_INFO_LEN(upl, i), cur_tier);
+ break;
+ }
+ }
+ /* Check if we found any hits */
+ if (i != num_pages)
+ break;
+ }
+
+ return;
+}
+
+wait_result_t
+vm_page_sleep(vm_object_t o, vm_page_t m, int interruptible)
+{
+ wait_result_t ret;
+
+ KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_SLEEP)) | DBG_FUNC_START, o, m, 0, 0, 0);
+
+ if (o->io_tracking && ((m->busy == TRUE) || (m->cleaning == TRUE) || VM_PAGE_WIRED(m))) {
+ /*
+ Indicates page is busy due to an I/O. Issue a reprioritize request if necessary.
+ */
+ vm_page_handle_prio_inversion(o,m);
+ }
+ m->wanted = TRUE;
+ ret = thread_sleep_vm_object(o, m, interruptible);
+ KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_SLEEP)) | DBG_FUNC_END, o, m, 0, 0, 0);
+ return ret;
+}
+
+static void
+io_reprioritize_thread(void *param __unused, wait_result_t wr __unused)
+{
+ io_reprioritize_req_t req = NULL;
+
+ while(1) {
+
+ IO_REPRIORITIZE_LIST_LOCK();
+ if (queue_empty(&io_reprioritize_list)) {
+ IO_REPRIORITIZE_LIST_UNLOCK();
+ break;
+ }
+
+ queue_remove_first(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list);
+ IO_REPRIORITIZE_LIST_UNLOCK();
+
+ vnode_pager_issue_reprioritize_io(req->devvp, req->blkno, req->len, req->priority);
+ zfree(io_reprioritize_req_zone, req);
+ }
+
+ IO_REPRIO_THREAD_CONTINUATION();
+}
+#endif